Merge amd-staging into amd-master 20230914
Signed-off-by: Hao Zhou <Hao.Zhou@amd.com> Change-Id: I408a62826faff38d319b0d7ef08767223b3b327f
This commit is contained in:
@@ -884,14 +884,28 @@ struct metrics_table_header_t {
|
||||
#define RSMI_GPU_METRICS_API_FORMAT_VER 1
|
||||
// The content version increments when gpu_metrics is extended with new and/or
|
||||
// existing field sizes are changed.
|
||||
|
||||
/**
|
||||
* @brief The GPU metrics version 1
|
||||
*/
|
||||
#define RSMI_GPU_METRICS_API_CONTENT_VER_1 1
|
||||
/**
|
||||
* @brief The GPU metrics version 2
|
||||
*/
|
||||
#define RSMI_GPU_METRICS_API_CONTENT_VER_2 2
|
||||
/**
|
||||
* @brief The GPU metrics version 3
|
||||
*/
|
||||
#define RSMI_GPU_METRICS_API_CONTENT_VER_3 3
|
||||
|
||||
// This should match NUM_HBM_INSTANCES
|
||||
/**
|
||||
* @brief This should match NUM_HBM_INSTANCES
|
||||
*/
|
||||
#define RSMI_NUM_HBM_INSTANCES 4
|
||||
|
||||
// Unit conversion factor for HBM temperatures
|
||||
/**
|
||||
* @brief Unit conversion factor for HBM temperatures
|
||||
*/
|
||||
#define CENTRIGRADE_TO_MILLI_CENTIGRADE 1000
|
||||
|
||||
typedef struct {
|
||||
@@ -2230,7 +2244,7 @@ rsmi_dev_busy_percent_get(uint32_t dv_ind, uint32_t *busy_percent);
|
||||
* If the function reutrns RSMI_STATUS_SUCCESS, the counter will be set in the value field of
|
||||
* the rsmi_utilization_counter_t.
|
||||
*
|
||||
* @param[in] count The size of @utilization_counters array.
|
||||
* @param[in] count The size of utilization_counters array.
|
||||
*
|
||||
* @param[inout] timestamp The timestamp when the counter is retreived. Resolution: 1 ns.
|
||||
* @retval ::RSMI_STATUS_SUCCESS call was successful
|
||||
@@ -2392,7 +2406,7 @@ rsmi_status_t rsmi_dev_gpu_clk_freq_get(uint32_t dv_ind,
|
||||
* @retval ::RSMI_STATUS_INVALID_ARGS the provided arguments are not valid
|
||||
*
|
||||
*/
|
||||
rsmi_status_t rsmi_dev_gpu_reset(int32_t dv_ind);
|
||||
rsmi_status_t rsmi_dev_gpu_reset(uint32_t dv_ind);
|
||||
|
||||
/**
|
||||
* @brief This function retrieves the voltage/frequency curve information
|
||||
@@ -2626,7 +2640,7 @@ rsmi_dev_power_profile_presets_get(uint32_t dv_ind, uint32_t sensor_ind,
|
||||
*
|
||||
*/
|
||||
rsmi_status_t
|
||||
rsmi_dev_perf_level_set(int32_t dv_ind, rsmi_dev_perf_level_t perf_lvl);
|
||||
rsmi_dev_perf_level_set(uint32_t dv_ind, rsmi_dev_perf_level_t perf_lvl);
|
||||
|
||||
/**
|
||||
* @brief Set the PowerPlay performance level associated with the device with
|
||||
@@ -2692,7 +2706,7 @@ rsmi_dev_perf_level_set_v1(uint32_t dv_ind, rsmi_dev_perf_level_t perf_lvl);
|
||||
* @retval ::RSMI_STATUS_PERMISSION function requires root access
|
||||
*
|
||||
*/
|
||||
rsmi_status_t rsmi_dev_overdrive_level_set(int32_t dv_ind, uint32_t od);
|
||||
rsmi_status_t rsmi_dev_overdrive_level_set(uint32_t dv_ind, uint32_t od);
|
||||
|
||||
/**
|
||||
* @brief Set the overdrive percent associated with the device with provided
|
||||
@@ -3340,7 +3354,7 @@ rsmi_compute_process_gpus_get(uint32_t pid, uint32_t *dv_indices,
|
||||
* @brief Get the info of a process on a specific device.
|
||||
*
|
||||
* @details Given a process id @p pid, a @p dv_ind, this function will
|
||||
* write the process information for @p pid on the device, if available, to
|
||||
* write the process information for pid on the device, if available, to
|
||||
* the memory pointed to by @p proc.
|
||||
*
|
||||
* @param[in] pid The process id of the process for which the gpu
|
||||
@@ -3348,7 +3362,7 @@ rsmi_compute_process_gpus_get(uint32_t pid, uint32_t *dv_indices,
|
||||
*
|
||||
* @param[in] dv_ind a device index where the process running on.
|
||||
*
|
||||
* @param[inout] procs a pointer to memory provided by the caller to which
|
||||
* @param[inout] proc a pointer to memory provided by the caller to which
|
||||
* process information will be written.
|
||||
*
|
||||
* @retval ::RSMI_STATUS_SUCCESS is returned upon successful call
|
||||
@@ -3540,7 +3554,7 @@ rsmi_topo_get_link_type(uint32_t dv_ind_src, uint32_t dv_ind_dst,
|
||||
*
|
||||
* @details Given a source device index @p dv_ind_src and
|
||||
* a destination device index @p dv_ind_dst, and a pointer to a
|
||||
* bool @accessible, this function will write the P2P connection status
|
||||
* bool @p accessible, this function will write the P2P connection status
|
||||
* between the device @p dv_ind_src and @p dv_ind_dst to the memory
|
||||
* pointed to by @p accessible.
|
||||
*
|
||||
|
||||
@@ -90,7 +90,7 @@
|
||||
/* This group of macros is used to facilitate checking of support for rsmi_dev*
|
||||
* "getter" functions. When the return buffer is set to nullptr, the macro will
|
||||
* check the previously gathered device support data to see if the function,
|
||||
* with possible variants (e.g., memory types, firware types,...) and
|
||||
* with possible variants (e.g., memory types, firmware types,...) and
|
||||
* subvariants (e.g. monitors/sensors) are supported.
|
||||
*/
|
||||
// This macro assumes dev already available
|
||||
|
||||
@@ -118,6 +118,10 @@ GetProcessGPUs(uint32_t pid, std::unordered_set<uint64_t> *gpu_count);
|
||||
int
|
||||
ReadKFDDeviceProperties(uint32_t dev_id, std::vector<std::string> *retVec);
|
||||
|
||||
int read_node_properties(uint32_t node, std::string property_name,
|
||||
uint64_t *val);
|
||||
int get_gpu_id(uint32_t node, uint64_t *gpu_id);
|
||||
|
||||
} // namespace smi
|
||||
} // namespace amd
|
||||
|
||||
|
||||
@@ -113,7 +113,8 @@ class RocmSMI {
|
||||
uint64_t *weight);
|
||||
int get_node_index(uint32_t dv_ind, uint32_t *node_ind);
|
||||
const RocmSMI_env_vars& getEnv(void);
|
||||
void printEnvVarInfo(void);
|
||||
std::string getRSMIEnvVarInfo(void);
|
||||
void debugRSMIEnvVarInfo();
|
||||
bool isLoggingOn(void);
|
||||
uint32_t getLogSetting(void);
|
||||
static const std::map<amd::smi::DevInfoTypes, std::string> devInfoTypesStrings;
|
||||
|
||||
@@ -99,13 +99,17 @@ GetDevBinaryBlob(amd::smi::DevInfoTypes type,
|
||||
rsmi_status_t ErrnoToRsmiStatus(int err);
|
||||
std::string getRSMIStatusString(rsmi_status_t ret);
|
||||
std::tuple<bool, std::string, std::string, std::string, std::string,
|
||||
std::string, std::string, std::string, std::string>
|
||||
std::string, std::string, std::string, std::string,
|
||||
std::string, std::string, std::string>
|
||||
getSystemDetails(void);
|
||||
void logSystemDetails(void);
|
||||
rsmi_status_t getBDFString(uint64_t bdf_id, std::string& bfd_str);
|
||||
void logHexDump(const char *desc, const void *addr, const size_t len,
|
||||
size_t perLine);
|
||||
bool isSystemBigEndian();
|
||||
std::string getBuildType();
|
||||
std::string getMyLibPath();
|
||||
int subDirectoryCountInPath(const std::string path);
|
||||
template <typename T>
|
||||
std::string print_int_as_hex(T i, bool showHexNotation=true) {
|
||||
std::stringstream ss;
|
||||
|
||||
+1
-1
@@ -72,7 +72,7 @@ target_include_directories(${OAM_EXAMPLE_EXE} PRIVATE ${OAM_INC_LIST})
|
||||
target_link_libraries(${OAM_EXAMPLE_EXE} ${OAM_TARGET})
|
||||
add_library(${OAM_TARGET} ${CMN_SRC_LIST} ${OAM_SRC_LIST}
|
||||
${CMN_INC_LIST} ${OAM_INC_LIST})
|
||||
target_link_libraries(${OAM_TARGET} pthread rt)
|
||||
target_link_libraries(${OAM_TARGET} pthread rt dl)
|
||||
target_include_directories(${OAM_TARGET} PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include ${COMMON_PROJ_ROOT}/common/shared_mutex)
|
||||
|
||||
|
||||
+115
-77
@@ -173,10 +173,12 @@ def formatMatrixToJSON(deviceList, matrix, metricName):
|
||||
printSysLog(metricName.format(deviceList[row_indx], deviceList[col_ind]), valueStr)
|
||||
|
||||
|
||||
def getBus(device):
|
||||
def getBus(device, silent=False):
|
||||
""" Return the bus identifier of a given device
|
||||
|
||||
@param device: DRM device identifier
|
||||
@param silent=Turn on to silence error output
|
||||
(you plan to handle manually). Default is off.
|
||||
"""
|
||||
bdfid = c_uint64(0)
|
||||
ret = rocmsmi.rsmi_dev_pci_id_get(device, byref(bdfid))
|
||||
@@ -188,16 +190,18 @@ def getBus(device):
|
||||
function = bdfid.value & 0x7
|
||||
|
||||
pic_id = '{:04X}:{:02X}:{:02X}.{:0X}'.format(domain, bus, device, function)
|
||||
if rsmi_ret_ok(ret, device, 'get_pci_id'):
|
||||
if rsmi_ret_ok(ret, device, 'get_pci_id', silent):
|
||||
return pic_id
|
||||
|
||||
|
||||
def getFanSpeed(device):
|
||||
def getFanSpeed(device, silent=True):
|
||||
""" Return a tuple with the fan speed (value,%) for a specified device,
|
||||
or (None,None) if either current fan speed or max fan speed cannot be
|
||||
obtained
|
||||
|
||||
@param device: DRM device identifier
|
||||
@param silent=Turn on to silence error output
|
||||
(you plan to handle manually). Default is on.
|
||||
"""
|
||||
fanLevel = c_int64()
|
||||
fanMax = c_int64()
|
||||
@@ -209,7 +213,7 @@ def getFanSpeed(device):
|
||||
/sys/class/drm/cardX/device/hwmon/hwmonX/pwmX
|
||||
"""
|
||||
ret = rocmsmi.rsmi_dev_fan_speed_get(device, sensor_ind, byref(fanLevel))
|
||||
if rsmi_ret_ok(ret, device, 'get_fan_speed', True):
|
||||
if rsmi_ret_ok(ret, device, 'get_fan_speed', silent):
|
||||
fl = fanLevel.value
|
||||
last_ret = ret
|
||||
|
||||
@@ -217,7 +221,7 @@ def getFanSpeed(device):
|
||||
/sys/class/drm/cardX/device/hwmon/hwmonX/pwmX
|
||||
"""
|
||||
ret = rocmsmi.rsmi_dev_fan_speed_max_get(device, sensor_ind, byref(fanMax))
|
||||
if rsmi_ret_ok(ret, device, 'get_fan_max_speed', True):
|
||||
if rsmi_ret_ok(ret, device, 'get_fan_max_speed', silent):
|
||||
fm = fanMax.value
|
||||
|
||||
""" In case we had an error before, we don't overwrite it with a
|
||||
@@ -232,59 +236,67 @@ def getFanSpeed(device):
|
||||
return (last_ret, fl, round((float(fl) / float(fm)) * 100, 2))
|
||||
|
||||
|
||||
def getGpuUse(device):
|
||||
def getGpuUse(device, silent=False):
|
||||
""" Return the current GPU usage as a percentage
|
||||
|
||||
@param device: DRM device identifier
|
||||
@param silent=Turn on to silence error output
|
||||
(you plan to handle manually). Default is off.
|
||||
"""
|
||||
percent = c_uint32()
|
||||
ret = rocmsmi.rsmi_dev_busy_percent_get(device, byref(percent))
|
||||
if rsmi_ret_ok(ret, device, 'GPU Utilization '):
|
||||
if rsmi_ret_ok(ret, device, 'GPU Utilization ', silent):
|
||||
return percent.value
|
||||
return -1
|
||||
|
||||
|
||||
def getId(device):
|
||||
def getId(device, silent=False):
|
||||
""" Return the hexadecimal value of a device's ID
|
||||
|
||||
@param device: DRM device identifier
|
||||
@param silent=Turn on to silence error output
|
||||
(you plan to handle manually). Default is off.
|
||||
"""
|
||||
dv_id = c_short()
|
||||
ret = rocmsmi.rsmi_dev_id_get(device, byref(dv_id))
|
||||
if rsmi_ret_ok(ret, device, 'get_device_id'):
|
||||
if rsmi_ret_ok(ret, device, 'get_device_id', silent):
|
||||
return hex(dv_id.value)
|
||||
|
||||
|
||||
def getRev(device):
|
||||
def getRev(device, silent=False):
|
||||
""" Return the hexadecimal value of a device's Revision
|
||||
|
||||
@param device: DRM device identifier
|
||||
@param silent=Turn on to silence error output
|
||||
(you plan to handle manually). Default is off.
|
||||
"""
|
||||
dv_rev = c_short()
|
||||
ret = rocmsmi.rsmi_dev_revision_get(device, byref(dv_rev))
|
||||
if rsmi_ret_ok(ret, device, 'get_device_rev'):
|
||||
if rsmi_ret_ok(ret, device, 'get_device_rev', silent):
|
||||
return hex(dv_rev.value)
|
||||
|
||||
|
||||
def getMaxPower(device):
|
||||
def getMaxPower(device, silent=False):
|
||||
""" Return the maximum power cap of a given device
|
||||
|
||||
@param device: DRM device identifier
|
||||
@param silent=Turn on to silence error output
|
||||
(you plan to handle manually). Default is off.
|
||||
"""
|
||||
power_cap = c_uint64()
|
||||
ret = rocmsmi.rsmi_dev_power_cap_get(device, 0, byref(power_cap))
|
||||
if rsmi_ret_ok(ret, device, 'get_power_cap'):
|
||||
if rsmi_ret_ok(ret, device, 'get_power_cap', silent):
|
||||
return power_cap.value / 1000000
|
||||
return -1
|
||||
|
||||
|
||||
def getMemInfo(device, memType, quiet=False):
|
||||
def getMemInfo(device, memType, silent=False):
|
||||
""" Returns a tuple of (memory_used, memory_total) of
|
||||
the requested memory type usage for the device specified
|
||||
|
||||
@param device: DRM device identifier
|
||||
@param type: [vram|vis_vram|gtt] Memory type to return
|
||||
@param quiet=Turn on to silience error output
|
||||
@param silent=Turn on to silence error output
|
||||
(you plan to handle manually). Default is off,
|
||||
which exposes any issue accessing the different
|
||||
memory types.
|
||||
@@ -300,11 +312,11 @@ def getMemInfo(device, memType, quiet=False):
|
||||
memTotal = None
|
||||
|
||||
ret = rocmsmi.rsmi_dev_memory_usage_get(device, memory_type_l.index(memType), byref(memoryUse))
|
||||
if rsmi_ret_ok(ret, device, 'get_memory_usage_' + str(memType), quiet):
|
||||
if rsmi_ret_ok(ret, device, 'get_memory_usage_' + str(memType), silent):
|
||||
memUsed = memoryUse.value
|
||||
|
||||
ret = rocmsmi.rsmi_dev_memory_total_get(device, memory_type_l.index(memType), byref(memoryTot))
|
||||
if rsmi_ret_ok(ret, device, 'get_memory_total_' + str(memType), quiet):
|
||||
if rsmi_ret_ok(ret, device, 'get_memory_total_' + str(memType), silent):
|
||||
memTotal = memoryTot.value
|
||||
return (memUsed, memTotal)
|
||||
|
||||
@@ -334,14 +346,16 @@ def getProcessName(pid):
|
||||
return pName
|
||||
|
||||
|
||||
def getPerfLevel(device):
|
||||
def getPerfLevel(device, silent=False):
|
||||
""" Return the current performance level of a given device
|
||||
|
||||
@param device: DRM device identifier
|
||||
@param silent=Turn on to silence error output
|
||||
(you plan to handle manually). Default is off.
|
||||
"""
|
||||
perf = rsmi_dev_perf_level_t()
|
||||
ret = rocmsmi.rsmi_dev_perf_level_get(device, byref(perf))
|
||||
if rsmi_ret_ok(ret, device, 'get_perf_level'):
|
||||
if rsmi_ret_ok(ret, device, 'get_perf_level', silent):
|
||||
return perf_level_string(perf.value)
|
||||
return 'N/A'
|
||||
|
||||
@@ -369,42 +383,48 @@ def getPidList():
|
||||
return
|
||||
|
||||
|
||||
def getPower(device):
|
||||
def getPower(device, silent=False):
|
||||
""" Return the current power level of a given device
|
||||
|
||||
@param device: DRM device identifier
|
||||
@param silent=Turn on to silence error output
|
||||
(you plan to handle manually). Default is off.
|
||||
"""
|
||||
power = c_uint32()
|
||||
ret = rocmsmi.rsmi_dev_power_ave_get(device, 0, byref(power))
|
||||
if rsmi_ret_ok(ret, device, 'get_power_avg'):
|
||||
if rsmi_ret_ok(ret, device, 'get_power_avg', silent):
|
||||
return power.value / 1000000
|
||||
return 'N/A'
|
||||
|
||||
|
||||
def getRasEnablement(device, block):
|
||||
def getRasEnablement(device, block, silent=True):
|
||||
""" Return RAS enablement state for a given device
|
||||
|
||||
@param device: DRM device identifier
|
||||
@param block: RAS block identifier
|
||||
@param silent=Turn on to silence error output
|
||||
(you plan to handle manually). Default is on.
|
||||
"""
|
||||
state = rsmi_ras_err_state_t()
|
||||
ret = rocmsmi.rsmi_dev_ecc_status_get(device, rsmi_gpu_block_d[block], byref(state))
|
||||
|
||||
if rsmi_ret_ok(ret, device, 'get_ecc_status_' + str(block), True):
|
||||
if rsmi_ret_ok(ret, device, 'get_ecc_status_' + str(block), silent):
|
||||
return rsmi_ras_err_stale_machine[state.value].upper()
|
||||
return 'N/A'
|
||||
|
||||
|
||||
def getTemp(device, sensor):
|
||||
def getTemp(device, sensor, silent=True):
|
||||
""" Display the current temperature from a given device's sensor
|
||||
|
||||
@param device: DRM device identifier
|
||||
@param sensor: Temperature sensor identifier
|
||||
@param silent=Turn on to silence error output
|
||||
(you plan to handle manually). Default is on.
|
||||
"""
|
||||
temp = c_int64(0)
|
||||
metric = rsmi_temperature_metric_t.RSMI_TEMP_CURRENT
|
||||
ret = rocmsmi.rsmi_dev_temp_metric_get(c_uint32(device), temp_type_lst.index(sensor), metric, byref(temp))
|
||||
if rsmi_ret_ok(ret, device, 'get_temp_metric' + str(sensor), True):
|
||||
if rsmi_ret_ok(ret, device, 'get_temp_metric' + str(sensor), silent):
|
||||
return temp.value / 1000
|
||||
return 'N/A'
|
||||
|
||||
@@ -428,52 +448,60 @@ def findFirstAvailableTemp(device):
|
||||
continue
|
||||
return (ret_temp_type, ret_temp)
|
||||
|
||||
def getVbiosVersion(device):
|
||||
def getVbiosVersion(device, silent=False):
|
||||
""" Returns the VBIOS version for a given device
|
||||
|
||||
@param device: DRM device identifier
|
||||
@param silent=Turn on to silence error output
|
||||
(you plan to handle manually). Default is off.
|
||||
"""
|
||||
vbios = create_string_buffer(256)
|
||||
ret = rocmsmi.rsmi_dev_vbios_version_get(device, vbios, 256)
|
||||
if ret == rsmi_status_t.RSMI_STATUS_NOT_SUPPORTED:
|
||||
return "Unsupported"
|
||||
elif rsmi_ret_ok(ret, device):
|
||||
elif rsmi_ret_ok(ret, device, silent=silent):
|
||||
return vbios.value.decode()
|
||||
|
||||
|
||||
def getVersion(deviceList, component):
|
||||
def getVersion(deviceList, component, silent=False):
|
||||
""" Return the software version for the specified component
|
||||
|
||||
@param deviceList: List of DRM devices (can be a single-item list)
|
||||
@param component: Component (currently only driver)
|
||||
@param silent=Turn on to silence error output
|
||||
(you plan to handle manually). Default is off.
|
||||
"""
|
||||
ver_str = create_string_buffer(256)
|
||||
ret = rocmsmi.rsmi_version_str_get(component, ver_str, 256)
|
||||
if rsmi_ret_ok(ret, None, 'get_version_str_' + str(component)):
|
||||
if rsmi_ret_ok(ret, None, 'get_version_str_' + str(component), silent):
|
||||
return ver_str.value.decode()
|
||||
return None
|
||||
|
||||
|
||||
def getComputePartition(device):
|
||||
def getComputePartition(device, silent=True):
|
||||
""" Return the current compute partition of a given device
|
||||
|
||||
@param device: DRM device identifier
|
||||
@param silent=Turn on to silence error output
|
||||
(you plan to handle manually). Default is on.
|
||||
"""
|
||||
currentComputePartition = create_string_buffer(256)
|
||||
ret = rocmsmi.rsmi_dev_compute_partition_get(device, currentComputePartition, 256)
|
||||
if rsmi_ret_ok(ret, device, 'get_compute_partition', silent=True) and currentComputePartition.value.decode():
|
||||
if rsmi_ret_ok(ret, device, 'get_compute_partition', silent) and currentComputePartition.value.decode():
|
||||
return str(currentComputePartition.value.decode())
|
||||
return "N/A"
|
||||
|
||||
|
||||
def getMemoryPartition(device):
|
||||
def getMemoryPartition(device, silent=True):
|
||||
""" Return the current memory partition of a given device
|
||||
|
||||
@param device: DRM device identifier
|
||||
@param silent=Turn on to silence error output
|
||||
(you plan to handle manually). Default is on.
|
||||
"""
|
||||
currentNPSMode = create_string_buffer(256)
|
||||
ret = rocmsmi.rsmi_dev_nps_mode_get(device, currentNPSMode, 256)
|
||||
if rsmi_ret_ok(ret, device, 'get_NPS_mode', silent=True) and currentNPSMode.value.decode():
|
||||
if rsmi_ret_ok(ret, device, 'get_NPS_mode', silent) and currentNPSMode.value.decode():
|
||||
return str(currentNPSMode.value.decode())
|
||||
return "N/A"
|
||||
|
||||
@@ -1610,10 +1638,13 @@ def showAllConcise(deviceList):
|
||||
MAX_ALL_CONCISE_WIDTH = 100
|
||||
appWidth_temp = appWidth
|
||||
appWidth = MAX_ALL_CONCISE_WIDTH
|
||||
silent = True
|
||||
|
||||
printLogSpacer(' Concise Info ')
|
||||
deviceList.sort()
|
||||
(temp_type, _) = findFirstAvailableTemp(deviceList[0])
|
||||
temp_type = '(' + temp_type_lst[0] + ')'
|
||||
if len(deviceList) >= 1:
|
||||
(temp_type, _) = findFirstAvailableTemp(deviceList[0])
|
||||
available_temp_type = temp_type.lower()
|
||||
available_temp_type = available_temp_type.replace('(', '')
|
||||
available_temp_type = available_temp_type.replace(')', '')
|
||||
@@ -1635,9 +1666,9 @@ def showAllConcise(deviceList):
|
||||
values = {}
|
||||
degree_sign = u'\N{DEGREE SIGN}'
|
||||
for device in deviceList:
|
||||
gpu_dev_product_info = getDevProductInfo(device)
|
||||
gpu_dev_product_info = getDevProductInfo(device, silent)
|
||||
gpu_dev_product_info_names = list(gpu_dev_product_info[device])
|
||||
temp_val = str(getTemp(device, available_temp_type))
|
||||
temp_val = str(getTemp(device, available_temp_type, silent))
|
||||
if temp_val != 'N/A':
|
||||
temp_val += degree_sign + 'C'
|
||||
avgPwr = str(getPower(device))
|
||||
@@ -1645,26 +1676,25 @@ def showAllConcise(deviceList):
|
||||
avgPwr += 'W'
|
||||
else:
|
||||
avgPwr = 'N/A'
|
||||
combined_partition = (getMemoryPartition(device) + ", "
|
||||
+ getComputePartition(device))
|
||||
concise = True
|
||||
sclk = showCurrentClocks([device], 'sclk', concise)
|
||||
mclk = showCurrentClocks([device], 'mclk', concise)
|
||||
(retCode, fanLevel, fanSpeed) = getFanSpeed(device)
|
||||
combined_partition = (getMemoryPartition(device, silent) + ", "
|
||||
+ getComputePartition(device, silent))
|
||||
sclk = showCurrentClocks([device], 'sclk', concise=silent)
|
||||
mclk = showCurrentClocks([device], 'mclk', concise=silent)
|
||||
(retCode, fanLevel, fanSpeed) = getFanSpeed(device, silent)
|
||||
fan = str(fanSpeed) + '%'
|
||||
if getPerfLevel(device) != -1:
|
||||
perf = getPerfLevel(device)
|
||||
if getPerfLevel(device, silent) != -1:
|
||||
perf = getPerfLevel(device, silent)
|
||||
else:
|
||||
perf = 'Unsupported'
|
||||
if getMaxPower(device) != -1:
|
||||
pwrCap = str(getMaxPower(device)) + 'W'
|
||||
if getMaxPower(device, silent) != -1:
|
||||
pwrCap = str(getMaxPower(device, silent)) + 'W'
|
||||
else:
|
||||
pwrCap = 'Unsupported'
|
||||
if getGpuUse(device) != -1:
|
||||
gpu_busy = str(getGpuUse(device)) + '%'
|
||||
if getGpuUse(device, silent) != -1:
|
||||
gpu_busy = str(getGpuUse(device, silent)) + '%'
|
||||
else:
|
||||
gpu_busy = 'Unsupported'
|
||||
vram_used, vram_total = getMemInfo(device, 'vram', True)
|
||||
vram_used, vram_total = getMemInfo(device, 'vram', silent)
|
||||
mem_use_pct = 0
|
||||
if vram_used is None:
|
||||
mem_use_pct='Unsupported'
|
||||
@@ -1698,7 +1728,7 @@ def showAllConcise(deviceList):
|
||||
for device in deviceList:
|
||||
printLog(None, "".join(str(word).ljust(max_widths[col]) for col, word in
|
||||
zip(range(len(max_widths)), values['card%s' % (str(device))])), None)
|
||||
gpu_dev_product_info = getDevProductInfo(device)
|
||||
gpu_dev_product_info = getDevProductInfo(device, silent)
|
||||
gpu_dev_product_info_names = list(gpu_dev_product_info[device])
|
||||
if (len(gpu_dev_product_info_names) > 1):
|
||||
printLog(None, "".join(str(word).ljust(max_widths[col]) for col, word in
|
||||
@@ -1722,19 +1752,20 @@ def showAllConciseHw(deviceList):
|
||||
header = ['GPU', 'DID', 'DREV', 'GFX RAS', 'SDMA RAS', 'UMC RAS', 'VBIOS', 'BUS']
|
||||
head_widths = [len(head) + 2 for head in header]
|
||||
values = {}
|
||||
silent = True
|
||||
for device in deviceList:
|
||||
gpuid = getId(device)
|
||||
gpuid = getId(device, silent)
|
||||
if str(gpuid).startswith('0x'):
|
||||
gpuid = str(gpuid)[2:]
|
||||
gpurev = getRev(device)
|
||||
gpurev = getRev(device, silent)
|
||||
if str(gpurev).startswith('0x'):
|
||||
gpurev = str(gpurev)[2:]
|
||||
|
||||
gfxRas = getRasEnablement(device, 'GFX')
|
||||
sdmaRas = getRasEnablement(device, 'SDMA')
|
||||
umcRas = getRasEnablement(device, 'UMC')
|
||||
vbios = getVbiosVersion(device)
|
||||
bus = getBus(device)
|
||||
gfxRas = getRasEnablement(device, 'GFX', silent)
|
||||
sdmaRas = getRasEnablement(device, 'SDMA', silent)
|
||||
umcRas = getRasEnablement(device, 'UMC', silent)
|
||||
vbios = getVbiosVersion(device, silent)
|
||||
bus = getBus(device, silent)
|
||||
values['card%s' % (str(device))] = [device, gpuid, gpurev, gfxRas, sdmaRas, umcRas, vbios, bus]
|
||||
val_widths = {}
|
||||
for device in deviceList:
|
||||
@@ -1829,8 +1860,8 @@ def showCurrentClocks(deviceList, clk_defined=None, concise=False):
|
||||
if concise: # in case function is used for concise output, no need to print.
|
||||
return '{:.0f}Mhz'.format(fr)
|
||||
printLog(device, '{} clock level'.format(clk_defined), '{} ({:.0f}Mhz)'.format(levl, fr))
|
||||
else:
|
||||
printErrLog(device, '%s clock is unsupported' % (clk_defined))
|
||||
elif not concise:
|
||||
logging.debug('{} clock is unsupported on device[{}]'.format(clk_defined, device))
|
||||
|
||||
else: # if clk is not defined, will display all current clk
|
||||
for clk_type in sorted(rsmi_clk_names_dict):
|
||||
@@ -1847,7 +1878,7 @@ def showCurrentClocks(deviceList, clk_defined=None, concise=False):
|
||||
printLog(device, '%s clock level:' % (clk_type), levl)
|
||||
else:
|
||||
printLog(device, '%s clock level: %s' % (clk_type, levl), '(%sMhz)' % (str(fr)[:-2]))
|
||||
else:
|
||||
elif not concise:
|
||||
logging.debug('{} clock is unsupported on device[{}]'.format(clk_type, device))
|
||||
# pcie clocks
|
||||
if rocmsmi.rsmi_dev_pci_bandwidth_get(device, None) == 1:
|
||||
@@ -1860,9 +1891,10 @@ def showCurrentClocks(deviceList, clk_defined=None, concise=False):
|
||||
fr = '{:.1f}GT/s x{}'.format(bw.transfer_rate.frequency[current_f] / 1000000000,
|
||||
bw.lanes[current_f])
|
||||
printLog(device, 'pcie clock level', '{} ({})'.format(current_f, fr))
|
||||
else:
|
||||
logging.debug('PCIe clock is unsupported on device[{}]'.format(device))
|
||||
printLogSpacer()
|
||||
elif not concise:
|
||||
logging.debug('{} clock is unsupported on device[{}]'.format('PCIe', device))
|
||||
if not concise:
|
||||
printLogSpacer()
|
||||
|
||||
|
||||
def showCurrentFans(deviceList):
|
||||
@@ -2419,47 +2451,51 @@ def showProductName(deviceList):
|
||||
printLogSpacer()
|
||||
|
||||
|
||||
def getDevProductInfo(device):
|
||||
def getDevProductInfo(device, silent=False):
|
||||
""" Show the requested product name for the device requested
|
||||
|
||||
@param device: Device we want to get the info for
|
||||
@param silent=Turn on to silence error output
|
||||
(you plan to handle manually). Default is off.
|
||||
"""
|
||||
|
||||
# Retrieve card vendor
|
||||
MAX_BUFF_SIZE = 256
|
||||
MAX_DESC_SIZE = 20
|
||||
device_info = "N/A"
|
||||
device_series = "N/A"
|
||||
device_model = "N/A"
|
||||
gpu_revision = "N/A"
|
||||
device_list = {}
|
||||
vendor = create_string_buffer(MAX_BUFF_SIZE)
|
||||
ret = rocmsmi.rsmi_dev_vendor_name_get(device, vendor, MAX_BUFF_SIZE)
|
||||
# Only continue if GPU vendor is AMD
|
||||
if rsmi_ret_ok(ret, device, 'get_vendor_name') and isAmdDevice(device):
|
||||
if rsmi_ret_ok(ret, device, 'get_vendor_name', silent) and isAmdDevice(device):
|
||||
# Retrieve the device series
|
||||
series = create_string_buffer(MAX_BUFF_SIZE)
|
||||
ret = rocmsmi.rsmi_dev_name_get(device, series, MAX_BUFF_SIZE)
|
||||
if rsmi_ret_ok(ret, device, 'get_name'):
|
||||
if rsmi_ret_ok(ret, device, 'get_name', silent):
|
||||
try:
|
||||
device_series = series.value.decode()
|
||||
except UnicodeDecodeError:
|
||||
device_series = "N/A"
|
||||
printErrLog(device, "Unable to read card series")
|
||||
if not silent:
|
||||
printErrLog(device, "Unable to read card series")
|
||||
|
||||
# Retrieve the device model
|
||||
model = create_string_buffer(MAX_BUFF_SIZE)
|
||||
ret = rocmsmi.rsmi_dev_subsystem_name_get(device, model, MAX_BUFF_SIZE)
|
||||
if rsmi_ret_ok(ret, device, 'get_subsystem_name'):
|
||||
if rsmi_ret_ok(ret, device, 'get_subsystem_name', silent):
|
||||
try:
|
||||
device_model = model.value.decode()
|
||||
device_model = padHexValue(device_model, 4)
|
||||
except UnicodeDecodeError:
|
||||
device_model = "N/A"
|
||||
printErrLog(device, "Unable to read device model")
|
||||
if not silent:
|
||||
printErrLog(device, "Unable to read device model")
|
||||
|
||||
try:
|
||||
gpu_revision = padHexValue(getRev(device), 2)
|
||||
except Exception as exc:
|
||||
gpu_revision = "N/A"
|
||||
printErrLog(device, "Unable to read card revision %s" % (exc))
|
||||
if not silent:
|
||||
printErrLog(device, "Unable to read card revision %s" % (exc))
|
||||
|
||||
device_series_str = str(device_series[:MAX_DESC_SIZE])
|
||||
device_series_str = device_series_str.ljust(MAX_DESC_SIZE, ' ')
|
||||
@@ -2805,7 +2841,9 @@ def getGraphColor(percentage):
|
||||
|
||||
def showTempGraph(deviceList):
|
||||
deviceList.sort()
|
||||
(temp_type, temp_value) = findFirstAvailableTemp(deviceList[0])
|
||||
temp_type = '(' + temp_type_lst[0] + ')'
|
||||
if len(deviceList) >= 1:
|
||||
(temp_type, _) = findFirstAvailableTemp(deviceList[0])
|
||||
printLogSpacer(' Temperature Graph ' + temp_type + ' ')
|
||||
temp_type = temp_type.lower()
|
||||
temp_type = temp_type.replace('(', '')
|
||||
@@ -3396,7 +3434,7 @@ def rsmi_ret_ok(my_ret, device=None, metric=None, silent=False):
|
||||
@param my_ret: Return of RSMI call (rocm_smi_lib API)
|
||||
@param metric: Parameter of GPU currently being analyzed
|
||||
@param silent: Echo verbose error reponse.
|
||||
True siliences err output, False does not silience err output (default).
|
||||
True silences err output, False does not silence err output (default).
|
||||
"""
|
||||
global RETCODE
|
||||
global PRINT_JSON
|
||||
@@ -3413,8 +3451,8 @@ def rsmi_ret_ok(my_ret, device=None, metric=None, silent=False):
|
||||
if err_str.value is not None:
|
||||
returnString += '%s\t' % (err_str.value.decode())
|
||||
if not PRINT_JSON:
|
||||
logging.debug('%s', returnString)
|
||||
if not silent:
|
||||
logging.debug('%s', returnString)
|
||||
if my_ret in rsmi_status_verbose_err_out:
|
||||
printLog(device, metric + ", " + rsmi_status_verbose_err_out[my_ret], None)
|
||||
RETCODE = my_ret
|
||||
|
||||
@@ -80,7 +80,7 @@ add_executable(${SMI_EXAMPLE_EXE} "example/rocm_smi_example.cc")
|
||||
target_link_libraries(${SMI_EXAMPLE_EXE} ${ROCM_SMI_TARGET})
|
||||
add_library(${ROCM_SMI_TARGET} ${CMN_SRC_LIST} ${SMI_SRC_LIST}
|
||||
${CMN_INC_LIST} ${SMI_INC_LIST})
|
||||
target_link_libraries(${ROCM_SMI_TARGET} pthread rt)
|
||||
target_link_libraries(${ROCM_SMI_TARGET} pthread rt dl)
|
||||
target_include_directories(${ROCM_SMI_TARGET} PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR} ${COMMON_PROJ_ROOT}/common/shared_mutex)
|
||||
|
||||
|
||||
+277
-352
File diff soppresso perché troppo grande
Carica Diff
+13
-13
@@ -41,20 +41,20 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <unistd.h>
|
||||
#include <asm/unistd.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/stat.h>
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <sstream>
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <unordered_set>
|
||||
|
||||
#include "rocm_smi/rocm_smi.h"
|
||||
@@ -164,8 +164,7 @@ GetSupportedEventGroups(uint32_t dev_num, dev_evt_grp_set_t *supported_grps) {
|
||||
}
|
||||
// /sys/bus/event_source/devices/<hw block>_<instance>/type
|
||||
Event::Event(rsmi_event_type_t event, uint32_t dev_ind) :
|
||||
event_type_(event), prev_cntr_val_(0) {
|
||||
fd_ = -1;
|
||||
event_type_(event), fd_(-1), prev_cntr_val_(0) {
|
||||
rsmi_event_group_t grp = EvtGrpFromEvtID(event);
|
||||
assert(grp != RSMI_EVNT_GRP_INVALID); // This should have failed before now
|
||||
|
||||
@@ -398,10 +397,11 @@ readn(int fd, void *buf, size_t n) {
|
||||
return static_cast<ssize_t>(n - left);
|
||||
}
|
||||
if (bytes < 0) {
|
||||
if (errno == EINTR) /* read got interrupted */
|
||||
if (errno == EINTR) {
|
||||
/* read got interrupted */
|
||||
continue;
|
||||
else
|
||||
return -errno;
|
||||
}
|
||||
return -errno;
|
||||
}
|
||||
|
||||
left -= static_cast<size_t>(bytes);
|
||||
|
||||
+45
-52
@@ -43,30 +43,28 @@
|
||||
|
||||
#include <pthread.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/types.h>
|
||||
#include <assert.h>
|
||||
#include <sys/stat.h>
|
||||
#include <stdint.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <fstream>
|
||||
#include <cstdint>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <algorithm>
|
||||
#include <iterator>
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <iterator>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
|
||||
#include "rocm_smi/rocm_smi_main.h"
|
||||
#include "rocm_smi/rocm_smi_device.h"
|
||||
#include "rocm_smi/rocm_smi.h"
|
||||
#include "rocm_smi/rocm_smi_exception.h"
|
||||
#include "rocm_smi/rocm_smi_utils.h"
|
||||
#include "rocm_smi/rocm_smi_kfd.h"
|
||||
#include "rocm_smi/rocm_smi_logger.h"
|
||||
#include "shared_mutex.h" // NOLINT
|
||||
|
||||
@@ -689,7 +687,7 @@ int Device::writeDevInfoStr(DevInfoTypes type, std::string valStr) {
|
||||
int ret;
|
||||
std::ostringstream ss;
|
||||
|
||||
fs.rdbuf()->pubsetbuf(0,0);
|
||||
fs.rdbuf()->pubsetbuf(nullptr,0);
|
||||
ret = openSysfsFileStream(type, &fs, valStr.c_str());
|
||||
if (ret != 0) {
|
||||
ss << "Could not write device info string (" << valStr
|
||||
@@ -856,7 +854,7 @@ int Device::readDevInfoMultiLineStr(DevInfoTypes type,
|
||||
retVec->push_back(line);
|
||||
}
|
||||
|
||||
if (retVec->size() == 0) {
|
||||
if (retVec->empty()) {
|
||||
ss << "Read devInfoMultiLineStr for DevInfoType ("
|
||||
<< RocmSMI::devInfoTypesStrings.at(type) << ")"
|
||||
<< ", but contained no string lines";
|
||||
@@ -864,13 +862,13 @@ int Device::readDevInfoMultiLineStr(DevInfoTypes type,
|
||||
return 0;
|
||||
}
|
||||
// Remove any *trailing* empty (whitespace) lines
|
||||
while (retVec->size() != 0 &&
|
||||
while (!retVec->empty() &&
|
||||
retVec->back().find_first_not_of(" \t\n\v\f\r") == std::string::npos) {
|
||||
retVec->pop_back();
|
||||
}
|
||||
|
||||
// allow logging output of multiline strings
|
||||
for (auto l: *retVec) {
|
||||
for (const auto& l: *retVec) {
|
||||
allLines += "\n" + l;
|
||||
}
|
||||
|
||||
@@ -905,10 +903,10 @@ int Device::readDevInfo(DevInfoTypes type, uint64_t *val) {
|
||||
ret = readDevInfoStr(type, &tempStr);
|
||||
RET_IF_NONZERO(ret);
|
||||
|
||||
if (tempStr == "") {
|
||||
if (tempStr.empty()) {
|
||||
return EINVAL;
|
||||
}
|
||||
tmp_val = std::stoi(tempStr, 0, 16);
|
||||
tmp_val = std::stoi(tempStr, nullptr, 16);
|
||||
if (tmp_val < 0) {
|
||||
return EINVAL;
|
||||
}
|
||||
@@ -930,10 +928,10 @@ int Device::readDevInfo(DevInfoTypes type, uint64_t *val) {
|
||||
case kDevXGMIError:
|
||||
ret = readDevInfoStr(type, &tempStr);
|
||||
RET_IF_NONZERO(ret);
|
||||
if (tempStr == "") {
|
||||
if (tempStr.empty()) {
|
||||
return EINVAL;
|
||||
}
|
||||
*val = std::stoul(tempStr, 0);
|
||||
*val = std::stoul(tempStr, nullptr);
|
||||
break;
|
||||
|
||||
case kDevUniqueId:
|
||||
@@ -960,10 +958,10 @@ int Device::readDevInfo(DevInfoTypes type, uint64_t *val) {
|
||||
case kDevFwVersionVcn:
|
||||
ret = readDevInfoStr(type, &tempStr);
|
||||
RET_IF_NONZERO(ret);
|
||||
if (tempStr == "") {
|
||||
if (tempStr.empty()) {
|
||||
return EINVAL;
|
||||
}
|
||||
*val = std::stoul(tempStr, 0, 16);
|
||||
*val = std::stoul(tempStr, nullptr, 16);
|
||||
break;
|
||||
|
||||
case kDevGpuReset:
|
||||
@@ -1100,13 +1098,9 @@ void Device::DumpSupportedFunctions(void) {
|
||||
}
|
||||
|
||||
void Device::fillSupportedFuncs(void) {
|
||||
if (supported_funcs_.size() != 0) {
|
||||
if (!supported_funcs_.empty()) {
|
||||
return;
|
||||
}
|
||||
if (monitor() == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::map<const char *, dev_depends_t>::const_iterator it =
|
||||
kDevFuncDependsMap.begin();
|
||||
std::string dev_rt = path_ + "/device";
|
||||
@@ -1140,7 +1134,7 @@ void Device::fillSupportedFuncs(void) {
|
||||
std::vector<DevInfoTypes>::const_iterator var =
|
||||
it->second.variants.begin();
|
||||
|
||||
if (it->second.variants.size() == 0) {
|
||||
if (it->second.variants.empty()) {
|
||||
supported_funcs_[it->first] = nullptr;
|
||||
it++;
|
||||
continue;
|
||||
@@ -1156,13 +1150,15 @@ void Device::fillSupportedFuncs(void) {
|
||||
(*supported_variants)[kDevInfoVarTypeToRSMIVariant.at(*var)] = nullptr;
|
||||
}
|
||||
|
||||
if ((*supported_variants).size() > 0) {
|
||||
if (!(*supported_variants).empty()) {
|
||||
supported_funcs_[it->first] = supported_variants;
|
||||
}
|
||||
|
||||
it++;
|
||||
}
|
||||
monitor()->fillSupportedFuncs(&supported_funcs_);
|
||||
if (monitor() != nullptr) {
|
||||
monitor()->fillSupportedFuncs(&supported_funcs_);
|
||||
}
|
||||
// DumpSupportedFunctions();
|
||||
}
|
||||
|
||||
@@ -1202,35 +1198,32 @@ bool Device::DeviceAPISupported(std::string name, uint64_t variant,
|
||||
|
||||
if (sub_variant == RSMI_DEFAULT_VARIANT) {
|
||||
return true;
|
||||
} else { // sub_variant != RSMI_DEFAULT_VARIANT
|
||||
// if variant is != RSMI_DEFAULT_VARIANT, we should not have a nullptr
|
||||
assert(var_it->second != nullptr);
|
||||
}
|
||||
// sub_variant != RSMI_DEFAULT_VARIANT
|
||||
// if variant is != RSMI_DEFAULT_VARIANT, we should not have a nullptr
|
||||
assert(var_it->second != nullptr);
|
||||
|
||||
return subvariant_match(&(var_it->second), sub_variant);
|
||||
}
|
||||
} else { // variant == RSMI_DEFAULT_VARIANT
|
||||
if (func_it->second != nullptr) {
|
||||
var_it = func_it->second->find(variant);
|
||||
}
|
||||
if (sub_variant == RSMI_DEFAULT_VARIANT) {
|
||||
return true;
|
||||
} else { // sub_variant != RSMI_DEFAULT_VARIANT
|
||||
if (func_it->second == nullptr) {
|
||||
return false;
|
||||
}
|
||||
return subvariant_match(&(var_it->second), sub_variant);
|
||||
}
|
||||
return subvariant_match(&(var_it->second), sub_variant);
|
||||
}
|
||||
assert(false); // We should not reach here
|
||||
|
||||
return false;
|
||||
// variant == RSMI_DEFAULT_VARIANT
|
||||
if (func_it->second != nullptr) {
|
||||
var_it = func_it->second->find(variant);
|
||||
}
|
||||
if (sub_variant == RSMI_DEFAULT_VARIANT) {
|
||||
return true;
|
||||
}
|
||||
// sub_variant != RSMI_DEFAULT_VARIANT
|
||||
if (func_it->second == nullptr) {
|
||||
return false;
|
||||
}
|
||||
return subvariant_match(&(var_it->second), sub_variant);
|
||||
}
|
||||
|
||||
rsmi_status_t Device::restartAMDGpuDriver(void) {
|
||||
REQUIRE_ROOT_ACCESS
|
||||
bool restartSuccessful = true;
|
||||
bool success = false;
|
||||
std::string out = "";
|
||||
std::string out;
|
||||
bool wasGdmServiceActive = false;
|
||||
|
||||
// sudo systemctl is-active gdm
|
||||
|
||||
+14
-16
@@ -41,23 +41,22 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <dirent.h>
|
||||
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include <cstdint>
|
||||
#include <map>
|
||||
#include <iostream>
|
||||
#include <algorithm>
|
||||
#include <regex> // NOLINT
|
||||
#include <vector>
|
||||
#include <pthread.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <regex> // NOLINT
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "rocm_smi/rocm_smi_common.h" // Should go before rocm_smi.h
|
||||
#include "rocm_smi/rocm_smi_main.h"
|
||||
#include "rocm_smi/rocm_smi_monitor.h"
|
||||
#include "rocm_smi/rocm_smi_utils.h"
|
||||
#include "rocm_smi/rocm_smi_exception.h"
|
||||
#include "rocm_smi/rocm_smi_logger.h"
|
||||
@@ -150,7 +149,7 @@ void log_gpu_metrics(const metrics_table_header_t *gpu_metrics_table_header,
|
||||
const rsmi_gpu_metrics_v_1_2 *rsmi_gpu_metrics_v_1_2,
|
||||
const rsmi_gpu_metrics_v_1_3 *gpu_metrics_v_1_3,
|
||||
const rsmi_gpu_metrics_t *rsmi_gpu_metrics) {
|
||||
if (RocmSMI::getInstance().isLoggingOn() == false) {
|
||||
if (!RocmSMI::getInstance().isLoggingOn()) {
|
||||
return;
|
||||
}
|
||||
std::ostringstream ss;
|
||||
@@ -170,9 +169,8 @@ void log_gpu_metrics(const metrics_table_header_t *gpu_metrics_table_header,
|
||||
}
|
||||
if (rsmi_gpu_metrics == nullptr) {
|
||||
return;
|
||||
} else {
|
||||
// do nothing - continue
|
||||
}
|
||||
|
||||
ss
|
||||
/* Common Header */
|
||||
<< print_unsigned_hex_and_int(
|
||||
@@ -365,7 +363,7 @@ static rsmi_status_t GetGPUMetricsFormat1(uint32_t dv_ind,
|
||||
}
|
||||
|
||||
#define ASSIGN_DATA_FIELD(FIELD, SRC) \
|
||||
data->FIELD = SRC->FIELD;
|
||||
data->FIELD = (SRC)->FIELD;
|
||||
|
||||
#define ASSIGN_COMMON_FORMATS(SRC) \
|
||||
ASSIGN_DATA_FIELD(common_header, (SRC)) \
|
||||
|
||||
+19
-21
@@ -41,20 +41,19 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <sys/stat.h>
|
||||
#include <dirent.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <unordered_set>
|
||||
#include <fstream>
|
||||
#include <cstdint>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
|
||||
#include "rocm_smi/rocm_smi.h"
|
||||
#include "rocm_smi/rocm_smi_exception.h"
|
||||
#include "rocm_smi/rocm_smi_utils.h"
|
||||
#include "rocm_smi/rocm_smi_io_link.h"
|
||||
|
||||
@@ -161,7 +160,7 @@ static int ReadLinkProperties(uint32_t node_indx, uint32_t link_indx,
|
||||
retVec->push_back(line);
|
||||
}
|
||||
|
||||
if (retVec->size() == 0) {
|
||||
if (retVec->empty()) {
|
||||
fs.close();
|
||||
return 0;
|
||||
}
|
||||
@@ -182,7 +181,7 @@ static int DiscoverLinks(std::map<std::pair<uint32_t, uint32_t>,
|
||||
if (links == nullptr) {
|
||||
return EINVAL;
|
||||
}
|
||||
assert(links->size() == 0);
|
||||
assert(links->empty());
|
||||
|
||||
links->clear();
|
||||
|
||||
@@ -229,8 +228,8 @@ static int DiscoverLinks(std::map<std::pair<uint32_t, uint32_t>,
|
||||
}
|
||||
|
||||
link_indx = static_cast<uint32_t>(std::stoi(dentry_io_link->d_name));
|
||||
link = std::shared_ptr<IOLink>(new IOLink(node_indx, link_indx,
|
||||
directory));
|
||||
link = std::make_shared<IOLink>(node_indx, link_indx,
|
||||
directory);
|
||||
|
||||
link->Initialize();
|
||||
|
||||
@@ -273,7 +272,7 @@ static int DiscoverLinksPerNode(uint32_t node_indx, std::map<uint32_t,
|
||||
if (links == nullptr) {
|
||||
return EINVAL;
|
||||
}
|
||||
assert(links->size() == 0);
|
||||
assert(links->empty());
|
||||
|
||||
links->clear();
|
||||
|
||||
@@ -297,8 +296,8 @@ static int DiscoverLinksPerNode(uint32_t node_indx, std::map<uint32_t,
|
||||
}
|
||||
|
||||
link_indx = static_cast<uint32_t>(std::stoi(dentry->d_name));
|
||||
link = std::shared_ptr<IOLink>(new IOLink(node_indx, link_indx,
|
||||
directory));
|
||||
link = std::make_shared<IOLink>(node_indx, link_indx,
|
||||
directory);
|
||||
|
||||
link->Initialize();
|
||||
|
||||
@@ -323,16 +322,15 @@ int DiscoverP2PLinksPerNode(uint32_t node_indx, std::map<uint32_t,
|
||||
return DiscoverLinksPerNode(node_indx, links, P2P_LINK_DIRECTORY);
|
||||
}
|
||||
|
||||
IOLink::~IOLink() {
|
||||
}
|
||||
IOLink::~IOLink() = default;
|
||||
|
||||
int IOLink::ReadProperties(void) {
|
||||
int ret;
|
||||
|
||||
std::vector<std::string> propVec;
|
||||
|
||||
assert(properties_.size() == 0);
|
||||
if (properties_.size() > 0) {
|
||||
assert(properties_.empty());
|
||||
if (!properties_.empty()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -347,8 +345,8 @@ int IOLink::ReadProperties(void) {
|
||||
uint64_t val_int; // Assume all properties are unsigned integers for now
|
||||
std::istringstream fs;
|
||||
|
||||
for (uint32_t i = 0; i < propVec.size(); ++i) {
|
||||
fs.str(propVec[i]);
|
||||
for (const auto & i : propVec) {
|
||||
fs.str(i);
|
||||
fs >> key_str;
|
||||
fs >> val_int;
|
||||
|
||||
|
||||
+107
-21
@@ -41,28 +41,29 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
#include <dirent.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <unordered_set>
|
||||
#include <fstream>
|
||||
#include <cstdint>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
|
||||
#include "rocm_smi/rocm_smi_io_link.h"
|
||||
#include "rocm_smi/rocm_smi_kfd.h"
|
||||
#include "rocm_smi/rocm_smi.h"
|
||||
#include "rocm_smi/rocm_smi_exception.h"
|
||||
#include "rocm_smi/rocm_smi_utils.h"
|
||||
#include "rocm_smi/rocm_smi_device.h"
|
||||
#include "rocm_smi/rocm_smi_main.h"
|
||||
#include "rocm_smi/rocm_smi_logger.h"
|
||||
|
||||
namespace amd {
|
||||
namespace smi {
|
||||
@@ -195,7 +196,7 @@ int ReadKFDDeviceProperties(uint32_t kfd_node_id,
|
||||
retVec->push_back(line);
|
||||
}
|
||||
|
||||
if (retVec->size() == 0) {
|
||||
if (retVec->empty()) {
|
||||
fs.close();
|
||||
return ENOENT;
|
||||
}
|
||||
@@ -517,7 +518,7 @@ int DiscoverKFDNodes(std::map<uint64_t, std::shared_ptr<KFDNode>> *nodes) {
|
||||
if (nodes == nullptr) {
|
||||
return EINVAL;
|
||||
}
|
||||
assert(nodes->size() == 0);
|
||||
assert(nodes->empty());
|
||||
|
||||
nodes->clear();
|
||||
|
||||
@@ -548,7 +549,7 @@ int DiscoverKFDNodes(std::map<uint64_t, std::shared_ptr<KFDNode>> *nodes) {
|
||||
continue;
|
||||
}
|
||||
|
||||
node = std::shared_ptr<KFDNode>(new KFDNode(node_indx));
|
||||
node = std::make_shared<KFDNode>(node_indx);
|
||||
|
||||
node->Initialize();
|
||||
|
||||
@@ -596,16 +597,15 @@ int DiscoverKFDNodes(std::map<uint64_t, std::shared_ptr<KFDNode>> *nodes) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
KFDNode::~KFDNode() {
|
||||
}
|
||||
KFDNode::~KFDNode() = default;
|
||||
|
||||
int KFDNode::ReadProperties(void) {
|
||||
int ret;
|
||||
|
||||
std::vector<std::string> propVec;
|
||||
|
||||
assert(properties_.size() == 0);
|
||||
if (properties_.size() > 0) {
|
||||
assert(properties_.empty());
|
||||
if (!properties_.empty()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -620,8 +620,8 @@ int KFDNode::ReadProperties(void) {
|
||||
uint64_t val_int; // Assume all properties are unsigned integers for now
|
||||
std::istringstream fs;
|
||||
|
||||
for (uint32_t i = 0; i < propVec.size(); ++i) {
|
||||
fs.str(propVec[i]);
|
||||
for (const auto & i : propVec) {
|
||||
fs.str(i);
|
||||
fs >> key_str;
|
||||
fs >> val_int;
|
||||
|
||||
@@ -776,20 +776,30 @@ KFDNode::get_io_link_bandwidth(uint32_t node_to, uint64_t *max_bandwidth,
|
||||
// /sys/class/kfd/kfd/topology/nodes/*/mem_banks/*/properties
|
||||
// size_in_bytes 68702699520
|
||||
int KFDNode::get_total_memory(uint64_t* total) {
|
||||
if (total == nullptr) return EINVAL;
|
||||
std::ostringstream ss;
|
||||
if (total == nullptr) {
|
||||
return EINVAL;
|
||||
}
|
||||
*total = 0;
|
||||
|
||||
std::string f_path = kKFDNodesPathRoot;
|
||||
f_path += "/";
|
||||
f_path += std::to_string(node_indx_);
|
||||
f_path += "/mem_banks";
|
||||
int subDirCount = subDirectoryCountInPath(f_path);
|
||||
ss << __PRETTY_FUNCTION__ << " | [before loop] Within " << f_path
|
||||
<< " has subdirectory count = " << std::to_string(subDirCount);
|
||||
LOG_DEBUG(ss);
|
||||
|
||||
auto kfd_node_dir = opendir(f_path.c_str());
|
||||
if (kfd_node_dir == nullptr) {
|
||||
return errno;
|
||||
}
|
||||
auto dentry = readdir(kfd_node_dir);
|
||||
while (dentry != nullptr) {
|
||||
while (dentry != nullptr && subDirCount > 0) {
|
||||
ss << __PRETTY_FUNCTION__ << " | [inside loop] Within " << f_path
|
||||
<< " has subdirectory count = " << std::to_string(subDirCount);
|
||||
LOG_DEBUG(ss);
|
||||
if (dentry->d_name[0] == '.') {
|
||||
dentry = readdir(kfd_node_dir);
|
||||
continue;
|
||||
@@ -823,6 +833,7 @@ int KFDNode::get_total_memory(uint64_t* total) {
|
||||
}
|
||||
}
|
||||
} // end loop for lines in property file
|
||||
subDirCount--;
|
||||
} // end loop for mem_bank directory
|
||||
|
||||
if (closedir(kfd_node_dir)) {
|
||||
@@ -863,5 +874,80 @@ int KFDNode::get_used_memory(uint64_t* used) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
// /sys/class/kfd/kfd/topology/nodes/*/properties
|
||||
int read_node_properties(uint32_t node, std::string property_name,
|
||||
uint64_t *val) {
|
||||
std::ostringstream ss;
|
||||
int retVal = EINVAL;
|
||||
if (property_name.empty() || val == nullptr) {
|
||||
ss << __PRETTY_FUNCTION__
|
||||
<< " | Issue: Could not read node #" << std::to_string(node)
|
||||
<< ", property_name is empty or *val is nullptr "
|
||||
<< " | return = " << std::to_string(retVal)
|
||||
<< " | ";
|
||||
LOG_DEBUG(ss);
|
||||
return retVal;
|
||||
}
|
||||
std::shared_ptr<KFDNode> myNode = std::shared_ptr<KFDNode>(new KFDNode(node));
|
||||
myNode->Initialize();
|
||||
if (KFDNodeSupported(node)) {
|
||||
retVal = myNode->get_property_value(property_name, val);
|
||||
ss << __PRETTY_FUNCTION__
|
||||
<< " | Successfully read node #" << std::to_string(node)
|
||||
<< " for property_name = " << property_name
|
||||
<< " | Data (" << property_name << ") * val = "
|
||||
<< std::to_string(*val)
|
||||
<< " | return = " << std::to_string(retVal)
|
||||
<< " | ";
|
||||
LOG_DEBUG(ss);
|
||||
} else {
|
||||
retVal = 1;
|
||||
ss << __PRETTY_FUNCTION__
|
||||
<< " | Issue: Could not read node #" << std::to_string(node)
|
||||
<< ", KFD node was an unsupported node."
|
||||
<< " | return = " << std::to_string(retVal)
|
||||
<< " | ";
|
||||
LOG_ERROR(ss);
|
||||
}
|
||||
return retVal;
|
||||
}
|
||||
|
||||
// /sys/class/kfd/kfd/topology/nodes/*/gpu_id
|
||||
int get_gpu_id(uint32_t node, uint64_t *gpu_id) {
|
||||
std::ostringstream ss;
|
||||
int retVal = EINVAL;
|
||||
if (gpu_id == nullptr) {
|
||||
ss << __PRETTY_FUNCTION__
|
||||
<< " | Issue: Could not read node #" << std::to_string(node)
|
||||
<< ", gpu_id is a nullptr "
|
||||
<< " | return = " << std::to_string(retVal)
|
||||
<< " | ";
|
||||
LOG_DEBUG(ss);
|
||||
return retVal;
|
||||
}
|
||||
std::shared_ptr<KFDNode> myNode = std::shared_ptr<KFDNode>(new KFDNode(node));
|
||||
myNode->Initialize();
|
||||
if (KFDNodeSupported(node)) {
|
||||
retVal = ReadKFDGpuId(node, gpu_id);
|
||||
ss << __PRETTY_FUNCTION__
|
||||
<< " | Successfully read node #" << std::to_string(node)
|
||||
<< " for gpu_id"
|
||||
<< " | Data (gpu_id) *gpu_id = "
|
||||
<< std::to_string(*gpu_id)
|
||||
<< " | return = " << std::to_string(retVal)
|
||||
<< " | ";
|
||||
LOG_DEBUG(ss);
|
||||
} else {
|
||||
retVal = 1;
|
||||
ss << __PRETTY_FUNCTION__
|
||||
<< " | Issue: Could not read node #" << std::to_string(node)
|
||||
<< ", KFD node was an unsupported node."
|
||||
<< " | return = " << std::to_string(retVal)
|
||||
<< " | ";
|
||||
LOG_ERROR(ss);
|
||||
}
|
||||
return retVal;
|
||||
}
|
||||
|
||||
} // namespace smi
|
||||
} // namespace amd
|
||||
|
||||
+11
-12
@@ -55,7 +55,7 @@
|
||||
* be printed, unless RSMI_LOGGING is enabled.
|
||||
*
|
||||
* BUFFER log type should be use while logging raw buffer or raw messages
|
||||
* Having direct interface as well as C++ Singleton inface. Can use
|
||||
* Having direct interface as well as C++ Singleton iface. Can use
|
||||
* whatever interface fits your needs.
|
||||
*/
|
||||
|
||||
@@ -70,7 +70,6 @@
|
||||
// Code Specific Header Files(s)
|
||||
#include "rocm_smi/rocm_smi_logger.h"
|
||||
#include "rocm_smi/rocm_smi_main.h"
|
||||
#include "rocm_smi/rocm_smi_utils.h"
|
||||
|
||||
using namespace ROCmLogging;
|
||||
|
||||
@@ -117,7 +116,7 @@ void Logger::logIntoFile(std::string& data) {
|
||||
if(!m_File.is_open()) {
|
||||
initialize_resources();
|
||||
if (!m_File.is_open()) {
|
||||
std::cout << "WARNING: re-initializing resources was unsuccessfull."
|
||||
std::cout << "WARNING: re-initializing resources was unsuccessful."
|
||||
<<" Unable to print the following message." << std::endl;
|
||||
logOnConsole(data);
|
||||
unlock();
|
||||
@@ -164,7 +163,7 @@ void Logger::error(const char* text) throw() {
|
||||
// By default, logging is disabled
|
||||
// The check below allows us to toggle logging through RSMI_LOGGING
|
||||
// set or unset
|
||||
if (m_loggingIsOn == false) {
|
||||
if (!m_loggingIsOn) {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -198,7 +197,7 @@ void Logger::alarm(const char* text) throw() {
|
||||
// By default, logging is disabled (ie. no RSMI_LOGGING)
|
||||
// The check below allows us to toggle logging through RSMI_LOGGING
|
||||
// set or unset
|
||||
if (m_loggingIsOn == false) {
|
||||
if (!m_loggingIsOn) {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -232,7 +231,7 @@ void Logger::always(const char* text) throw() {
|
||||
// By default, logging is disabled (ie. no RSMI_LOGGING)
|
||||
// The check below allows us to toggle logging through RSMI_LOGGING
|
||||
// set or unset
|
||||
if (m_loggingIsOn == false) {
|
||||
if (!m_loggingIsOn) {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -270,7 +269,7 @@ void Logger::buffer(const char* text) throw() {
|
||||
if(!m_File.is_open()) {
|
||||
initialize_resources();
|
||||
if (!m_File.is_open()) {
|
||||
std::cout << "WARNING: re-initializing resources was unsuccessfull."
|
||||
std::cout << "WARNING: re-initializing resources was unsuccessful."
|
||||
<<" Unable to print the following message." << std::endl;
|
||||
std::string txtStr(text);
|
||||
std::cout << txtStr << std::endl;
|
||||
@@ -300,7 +299,7 @@ void Logger::info(const char* text) throw() {
|
||||
// By default, logging is disabled (ie. no RSMI_LOGGING)
|
||||
// The check below allows us to toggle logging through RSMI_LOGGING
|
||||
// set or unset
|
||||
if (m_loggingIsOn == false) {
|
||||
if (!m_loggingIsOn) {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -334,7 +333,7 @@ void Logger::trace(const char* text) throw() {
|
||||
// By default, logging is disabled (ie. no RSMI_LOGGING)
|
||||
// The check below allows us to toggle logging through RSMI_LOGGING
|
||||
// set or unset
|
||||
if (m_loggingIsOn == false) {
|
||||
if (!m_loggingIsOn) {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -368,7 +367,7 @@ void Logger::debug(const char* text) throw() {
|
||||
// By default, logging is disabled (ie. no RSMI_LOGGING)
|
||||
// The check below allows us to toggle logging through RSMI_LOGGING
|
||||
// set or unset
|
||||
if (m_loggingIsOn == false) {
|
||||
if (!m_loggingIsOn) {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -426,7 +425,7 @@ void Logger::enableFileLogging() {
|
||||
|
||||
// Returns a string of details on current log settings
|
||||
std::string Logger::getLogSettings() {
|
||||
std::string logSettings = "";
|
||||
std::string logSettings;
|
||||
|
||||
if (m_File.is_open()) {
|
||||
logSettings += "OpenStatus = File (" + logFileName + ") is open";
|
||||
@@ -490,7 +489,7 @@ void Logger::initialize_resources() {
|
||||
// The check below allows us to toggle logging through RSMI_LOGGING
|
||||
// set or unset
|
||||
m_loggingIsOn = amd::smi::RocmSMI::getInstance().isLoggingOn();
|
||||
if (m_loggingIsOn == false) {
|
||||
if (!m_loggingIsOn) {
|
||||
return;
|
||||
}
|
||||
m_File.open(logFileName.c_str(), std::ios::out | std::ios::app);
|
||||
|
||||
+183
-72
@@ -39,25 +39,26 @@
|
||||
* DEALINGS WITH THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
#include <dirent.h>
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/types.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <string>
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include <set>
|
||||
#include <utility>
|
||||
#include <functional>
|
||||
#include <dirent.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <cassert>
|
||||
#include <cerrno>
|
||||
#include <unordered_map>
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <functional>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "rocm_smi/rocm_smi.h"
|
||||
#include "rocm_smi/rocm_smi_device.h"
|
||||
@@ -284,7 +285,8 @@ static uint32_t ConstructBDFID(std::string path, uint64_t *bdfid) {
|
||||
|
||||
// We are looking for the last element in the path that has the form
|
||||
// XXXX:XX:XX.X, where X is a hex integer (lower case is expected)
|
||||
std::size_t slash_i, end_i;
|
||||
std::size_t slash_i;
|
||||
std::size_t end_i;
|
||||
std::string tmp;
|
||||
|
||||
std::string tpath_str(tpath);
|
||||
@@ -331,9 +333,9 @@ RocmSMI::Initialize(uint64_t flags) {
|
||||
|
||||
GetEnvVariables();
|
||||
// To help debug env variable issues
|
||||
// printEnvVarInfo();
|
||||
// debugRSMIEnvVarInfo();
|
||||
|
||||
while (std::string(kAMDMonitorTypes[i]) != "") {
|
||||
while (!std::string(kAMDMonitorTypes[i]).empty()) {
|
||||
amd_monitor_types_.insert(kAMDMonitorTypes[i]);
|
||||
++i;
|
||||
}
|
||||
@@ -347,12 +349,12 @@ RocmSMI::Initialize(uint64_t flags) {
|
||||
}
|
||||
|
||||
uint64_t bdfid;
|
||||
for (uint32_t i = 0; i < devices_.size(); ++i) {
|
||||
if (ConstructBDFID(devices_[i]->path(), &bdfid) != 0) {
|
||||
for (auto & device : devices_) {
|
||||
if (ConstructBDFID(device->path(), &bdfid) != 0) {
|
||||
std::cerr << "Failed to construct BDFID." << std::endl;
|
||||
ret = 1;
|
||||
} else {
|
||||
devices_[i]->set_bdfid(bdfid);
|
||||
device->set_bdfid(bdfid);
|
||||
}
|
||||
}
|
||||
if (ret != 0) {
|
||||
@@ -388,7 +390,7 @@ RocmSMI::Initialize(uint64_t flags) {
|
||||
uint64_t bdfid = (*dev_iter)->bdfid();
|
||||
if (tmp_map.find(bdfid) == tmp_map.end()) {
|
||||
ss << __PRETTY_FUNCTION__ << " | removing device = "
|
||||
<< (*dev_iter)->path();
|
||||
<< (*dev_iter)->path() << "; bdfid = " << std::to_string(bdfid);
|
||||
dev_iter = devices_.erase(dev_iter);
|
||||
LOG_DEBUG(ss);
|
||||
continue;
|
||||
@@ -443,8 +445,7 @@ RocmSMI::RocmSMI(uint64_t flags) : init_options_(flags),
|
||||
kfd_notif_evt_fh_(-1), kfd_notif_evt_fh_refcnt_(0) {
|
||||
}
|
||||
|
||||
RocmSMI::~RocmSMI() {
|
||||
}
|
||||
RocmSMI::~RocmSMI() = default;
|
||||
|
||||
RocmSMI& RocmSMI::getInstance(uint64_t flags) {
|
||||
// Assume c++11 or greater. static objects will be created by only 1 thread
|
||||
@@ -493,7 +494,7 @@ static inline std::unordered_set<uint32_t> GetEnvVarUIntegerSets(
|
||||
if(ev_str == nullptr) { return returnSet; }
|
||||
std::string stringEnv = ev_str;
|
||||
|
||||
if (stringEnv.empty() == false) {
|
||||
if (!stringEnv.empty()) {
|
||||
// parse out values by commas
|
||||
std::string parsedVal;
|
||||
std::istringstream ev_str_ss(stringEnv);
|
||||
@@ -548,48 +549,54 @@ uint32_t RocmSMI::getLogSetting() {
|
||||
return this->env_vars_.logging_on;
|
||||
}
|
||||
|
||||
void RocmSMI::printEnvVarInfo(void) {
|
||||
std::cout << __PRETTY_FUNCTION__ << " | env_vars_.debug_output_bitfield = "
|
||||
<< ((env_vars_.debug_output_bitfield == 0) ? "<undefined>"
|
||||
: std::to_string(env_vars_.debug_output_bitfield))
|
||||
<< std::endl;
|
||||
std::cout << __PRETTY_FUNCTION__ << " | env_vars_.path_DRM_root_override = "
|
||||
<< ((env_vars_.path_DRM_root_override == nullptr)
|
||||
? "<undefined>" : env_vars_.path_DRM_root_override)
|
||||
<< std::endl;
|
||||
std::cout << __PRETTY_FUNCTION__ << " | env_vars_.path_HWMon_root_override = "
|
||||
<< ((env_vars_.path_HWMon_root_override == nullptr)
|
||||
? "<undefined>" : env_vars_.path_HWMon_root_override)
|
||||
<< std::endl;
|
||||
std::cout << __PRETTY_FUNCTION__ << " | env_vars_.path_power_root_override = "
|
||||
<< ((env_vars_.path_power_root_override == nullptr)
|
||||
? "<undefined>" : env_vars_.path_power_root_override)
|
||||
<< std::endl;
|
||||
std::cout << __PRETTY_FUNCTION__ << " | env_vars_.debug_inf_loop = "
|
||||
<< ((env_vars_.debug_inf_loop == 0) ? "<undefined>"
|
||||
: std::to_string(env_vars_.debug_inf_loop))
|
||||
<< std::endl;
|
||||
std::cout << __PRETTY_FUNCTION__ << " | env_vars_.logging_on = "
|
||||
void RocmSMI::debugRSMIEnvVarInfo(void) {
|
||||
std::cout << __PRETTY_FUNCTION__
|
||||
<< RocmSMI::getInstance().getRSMIEnvVarInfo();
|
||||
}
|
||||
|
||||
std::string RocmSMI::getRSMIEnvVarInfo(void) {
|
||||
std::ostringstream ss;
|
||||
ss << "\n\tRSMI_DEBUG_BITFIELD = "
|
||||
<< ((env_vars_.debug_output_bitfield == 0) ? "<undefined>"
|
||||
: std::to_string(env_vars_.debug_output_bitfield))
|
||||
<< std::endl;
|
||||
ss << "\tRSMI_DEBUG_DRM_ROOT_OVERRIDE = "
|
||||
<< ((env_vars_.path_DRM_root_override == nullptr)
|
||||
? "<undefined>" : env_vars_.path_DRM_root_override)
|
||||
<< std::endl;
|
||||
ss << "\tRSMI_DEBUG_HWMON_ROOT_OVERRIDE = "
|
||||
<< ((env_vars_.path_HWMon_root_override == nullptr)
|
||||
? "<undefined>" : env_vars_.path_HWMon_root_override)
|
||||
<< std::endl;
|
||||
ss << "\tRSMI_DEBUG_PP_ROOT_OVERRIDE = "
|
||||
<< ((env_vars_.path_power_root_override == nullptr)
|
||||
? "<undefined>" : env_vars_.path_power_root_override)
|
||||
<< std::endl;
|
||||
ss << "\tRSMI_DEBUG_INFINITE_LOOP = "
|
||||
<< ((env_vars_.debug_inf_loop == 0) ? "<undefined>"
|
||||
: std::to_string(env_vars_.debug_inf_loop))
|
||||
<< std::endl;
|
||||
ss << "\tRSMI_LOGGING = "
|
||||
<< getLogSetting() << std::endl;
|
||||
bool isLoggingOn = RocmSMI::isLoggingOn() ? true : false;
|
||||
std::cout << __PRETTY_FUNCTION__ << " | env_vars_.logging_on = "
|
||||
<< (isLoggingOn ? "true" : "false") << std::endl;
|
||||
std::cout << __PRETTY_FUNCTION__ << " | env_vars_.enum_overrides = {";
|
||||
ss << "\tRSMI_LOGGING (are logs on) = "
|
||||
<< (isLoggingOn ? "TRUE" : "FALSE") << std::endl;
|
||||
ss << "\tRSMI_DEBUG_ENUM_OVERRIDE = {";
|
||||
if (env_vars_.enum_overrides.empty()) {
|
||||
std::cout << "}" << std::endl;
|
||||
return;
|
||||
ss << "}" << std::endl;
|
||||
return ss.str();
|
||||
}
|
||||
for (auto it=env_vars_.enum_overrides.begin();
|
||||
it != env_vars_.enum_overrides.end(); ++it) {
|
||||
DevInfoTypes type = static_cast<DevInfoTypes>(*it);
|
||||
std::cout << (std::to_string(*it) + " (" + devInfoTypesStrings.at(type)
|
||||
+ ")");
|
||||
ss << (std::to_string(*it) + " (" + devInfoTypesStrings.at(type) + ")");
|
||||
auto temp_it = it;
|
||||
if(++temp_it != env_vars_.enum_overrides.end()) {
|
||||
std::cout << ", ";
|
||||
ss << ", ";
|
||||
}
|
||||
}
|
||||
std::cout << "}" << std::endl;
|
||||
ss << "}" << std::endl;
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
std::shared_ptr<Monitor>
|
||||
@@ -637,7 +644,7 @@ RocmSMI::FindMonitor(std::string monitor_path) {
|
||||
fs.close();
|
||||
|
||||
if (amd_monitor_types_.find(mon_type) != amd_monitor_types_.end()) {
|
||||
m = std::shared_ptr<Monitor>(new Monitor(mon_name, &env_vars_));
|
||||
m = std::make_shared<Monitor>(mon_name, &env_vars_);
|
||||
m->setTempSensorLabelMap();
|
||||
m->setVoltSensorLabelMap();
|
||||
break;
|
||||
@@ -665,12 +672,12 @@ RocmSMI::AddToDeviceList(std::string dev_name) {
|
||||
dev_path += "/";
|
||||
dev_path += dev_name;
|
||||
|
||||
auto dev = std::shared_ptr<Device>(new Device(dev_path, &env_vars_));
|
||||
auto dev = std::make_shared<Device>(dev_path, &env_vars_);
|
||||
|
||||
std::shared_ptr<Monitor> m = FindMonitor(dev_path + "/device/hwmon");
|
||||
dev->set_monitor(m);
|
||||
|
||||
std::string d_name = dev_name;
|
||||
const std::string& d_name = dev_name;
|
||||
uint32_t card_indx = GetDeviceIndex(d_name);
|
||||
dev->set_drm_render_minor(GetDrmRenderMinor(dev_path));
|
||||
dev->set_card_index(card_indx);
|
||||
@@ -681,8 +688,6 @@ RocmSMI::AddToDeviceList(std::string dev_name) {
|
||||
<< dev_name << " | path = " << dev_path
|
||||
<< " | card index = " << std::to_string(card_indx) << " | ";
|
||||
LOG_DEBUG(ss);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
static const uint32_t kAmdGpuId = 0x1002;
|
||||
@@ -693,8 +698,7 @@ static bool isAMDGPU(std::string dev_path) {
|
||||
std::string vend_path = dev_path + "/device/vendor";
|
||||
if (!FileExists(vend_path.c_str())) {
|
||||
ss << __PRETTY_FUNCTION__ << " | device_path = " << dev_path
|
||||
<< " is " << (isAmdGpu ? "is an amdgpu device - TRUE":
|
||||
"is an amdgpu device - FALSE");
|
||||
<< " is an amdgpu device - " << (isAmdGpu ? "TRUE": " FALSE");
|
||||
LOG_DEBUG(ss);
|
||||
return isAmdGpu;
|
||||
}
|
||||
@@ -704,8 +708,7 @@ static bool isAMDGPU(std::string dev_path) {
|
||||
|
||||
if (!fs.is_open()) {
|
||||
ss << __PRETTY_FUNCTION__ << " | device_path = " << dev_path
|
||||
<< " is " << (isAmdGpu ? "is an amdgpu device - TRUE":
|
||||
"is an amdgpu device - FALSE");
|
||||
<< " is an amdgpu device - " << (isAmdGpu ? "TRUE": " FALSE");
|
||||
LOG_DEBUG(ss);
|
||||
return isAmdGpu;
|
||||
}
|
||||
@@ -720,8 +723,7 @@ static bool isAMDGPU(std::string dev_path) {
|
||||
isAmdGpu = true;
|
||||
}
|
||||
ss << __PRETTY_FUNCTION__ << " | device_path = " << dev_path
|
||||
<< " is " << (isAmdGpu ? "is an amdgpu device - TRUE":
|
||||
"is an amdgpu device - FALSE");
|
||||
<< " is an amdgpu device - " << (isAmdGpu ? "TRUE": " FALSE");
|
||||
LOG_DEBUG(ss);
|
||||
return isAmdGpu;
|
||||
}
|
||||
@@ -729,6 +731,7 @@ static bool isAMDGPU(std::string dev_path) {
|
||||
uint32_t RocmSMI::DiscoverAmdgpuDevices(void) {
|
||||
std::string err_msg;
|
||||
uint32_t count = 0;
|
||||
std::ostringstream ss;
|
||||
|
||||
// If this gets called more than once, clear previous findings.
|
||||
devices_.clear();
|
||||
@@ -755,17 +758,125 @@ uint32_t RocmSMI::DiscoverAmdgpuDevices(void) {
|
||||
}
|
||||
dentry = readdir(drm_dir);
|
||||
}
|
||||
ss << __PRETTY_FUNCTION__ << " | Discovered a potential of "
|
||||
<< std::to_string(count) << " cards" << " | ";
|
||||
LOG_DEBUG(ss);
|
||||
|
||||
struct systemNode {
|
||||
uint32_t s_node_id = 0;
|
||||
uint64_t s_gpu_id = 0;
|
||||
uint64_t s_unique_id = 0;
|
||||
};
|
||||
// allSystemNodes[key = unique_id] => {node_id, gpu_id, unique_id}
|
||||
std::multimap<uint64_t, systemNode> allSystemNodes;
|
||||
uint32_t node_id = 0;
|
||||
while (true) {
|
||||
uint64_t gpu_id = 0, unique_id = 0;
|
||||
int ret_gpu_id = get_gpu_id(node_id, &gpu_id);
|
||||
int ret_unique_id = read_node_properties(node_id, "unique_id", &unique_id);
|
||||
if (ret_gpu_id == 0 || ret_unique_id == 0) {
|
||||
systemNode myNode;
|
||||
myNode.s_node_id = node_id;
|
||||
myNode.s_gpu_id = gpu_id;
|
||||
myNode.s_unique_id = unique_id;
|
||||
if(gpu_id != 0) { // only add gpu nodes, 0 = CPU
|
||||
allSystemNodes.emplace(unique_id, myNode);
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
node_id++;
|
||||
}
|
||||
|
||||
ss << __PRETTY_FUNCTION__ << " | Ordered system nodes found = {";
|
||||
for(auto i: allSystemNodes) {
|
||||
ss << "\n[node_id = " << std::to_string(i.second.s_node_id)
|
||||
<< "; gpu_id = " << std::to_string(i.second.s_gpu_id)
|
||||
<< "; unique_id = " << std::to_string(i.second.s_unique_id)
|
||||
<< "], "
|
||||
;
|
||||
}
|
||||
ss << "}";
|
||||
LOG_DEBUG(ss);
|
||||
|
||||
// Discover all root cards & gpu partitions associated with each
|
||||
for (uint32_t node_id = 0; node_id < count; node_id++) {
|
||||
std::string path = kPathDRMRoot;
|
||||
path += "/card";
|
||||
path += std::to_string(node_id);
|
||||
uint64_t primary_unique_id = 0;
|
||||
|
||||
// each identified gpu card node is a primary node for
|
||||
// potential matching unique ids
|
||||
if (isAMDGPU(path) ||
|
||||
(init_options_ & RSMI_INIT_FLAG_ALL_GPUS)) {
|
||||
std::string d_name = "card";
|
||||
d_name += std::to_string(node_id);
|
||||
AddToDeviceList(d_name);
|
||||
}
|
||||
|
||||
ss << __PRETTY_FUNCTION__
|
||||
<< " | Ordered system nodes seen in lookup = {";
|
||||
for (auto i : allSystemNodes) {
|
||||
ss << "\n[node_id = " << std::to_string(i.second.s_node_id)
|
||||
<< "; gpu_id = " << std::to_string(i.second.s_gpu_id)
|
||||
<< "; unique_id = " << std::to_string(i.second.s_unique_id)
|
||||
<< "], ";
|
||||
}
|
||||
ss << "}";
|
||||
LOG_DEBUG(ss);
|
||||
|
||||
uint64_t temp_primary_unique_id = 0;
|
||||
if (allSystemNodes.empty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// get lowest key 1st to keep order of nodes matching card
|
||||
uint32_t lowest_NodeId = 0;
|
||||
uint32_t curr_NodeId = 0;
|
||||
|
||||
for (auto it = allSystemNodes.begin(), end = allSystemNodes.end();
|
||||
it != end; it = allSystemNodes.upper_bound(it->first)) {
|
||||
curr_NodeId = it->second.s_node_id;
|
||||
if (it == allSystemNodes.begin()) {
|
||||
lowest_NodeId = it->second.s_node_id;
|
||||
}
|
||||
if (curr_NodeId <= lowest_NodeId) {
|
||||
lowest_NodeId = curr_NodeId;
|
||||
temp_primary_unique_id = it->second.s_unique_id;
|
||||
}
|
||||
}
|
||||
ss << __PRETTY_FUNCTION__
|
||||
<< " | lowest_NodeId = " << std::to_string(lowest_NodeId)
|
||||
<< " | curr_NodeId = " << std::to_string(curr_NodeId)
|
||||
<< " | temp_primary_unique_id = "
|
||||
<< std::to_string(temp_primary_unique_id);
|
||||
LOG_DEBUG(ss);
|
||||
|
||||
if (temp_primary_unique_id != 0) {
|
||||
primary_unique_id = temp_primary_unique_id;
|
||||
} else {
|
||||
allSystemNodes.erase(primary_unique_id);
|
||||
continue;
|
||||
}
|
||||
|
||||
auto numb_nodes = allSystemNodes.count(primary_unique_id);
|
||||
ss << __PRETTY_FUNCTION__ << " | REFRESH - primary_unique_id = "
|
||||
<< std::to_string(primary_unique_id) << " has "
|
||||
<< std::to_string(numb_nodes) << " known gpu nodes";
|
||||
LOG_DEBUG(ss);
|
||||
while (numb_nodes > 1) {
|
||||
std::string secNode = "card";
|
||||
secNode += std::to_string(node_id); // add the primary node id
|
||||
AddToDeviceList(secNode);
|
||||
numb_nodes--;
|
||||
}
|
||||
// remove already added nodes associated with current card
|
||||
auto erasedNodes = allSystemNodes.erase(primary_unique_id);
|
||||
ss << __PRETTY_FUNCTION__ << " | After finding primary_unique_id = "
|
||||
<< std::to_string(primary_unique_id) << " erased "
|
||||
<< std::to_string(erasedNodes) << " nodes";
|
||||
LOG_DEBUG(ss);
|
||||
}
|
||||
}
|
||||
|
||||
if (closedir(drm_dir)) {
|
||||
@@ -789,7 +900,7 @@ int RocmSMI::DiscoverAMDPowerMonitors(bool force_update) {
|
||||
power_mons_.clear();
|
||||
}
|
||||
|
||||
if (power_mons_.size() != 0) {
|
||||
if (!power_mons_.empty()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -817,7 +928,7 @@ int RocmSMI::DiscoverAMDPowerMonitors(bool force_update) {
|
||||
|
||||
if (FileExists(tmp.c_str())) {
|
||||
std::shared_ptr<PowerMon> mon =
|
||||
std::shared_ptr<PowerMon>(new PowerMon(mon_name, &env_vars_));
|
||||
std::make_shared<PowerMon>(mon_name, &env_vars_);
|
||||
power_mons_.push_back(mon);
|
||||
mon->set_dev_index(GetDeviceIndex(dentry->d_name));
|
||||
}
|
||||
@@ -830,8 +941,8 @@ int RocmSMI::DiscoverAMDPowerMonitors(bool force_update) {
|
||||
return errno;
|
||||
}
|
||||
|
||||
for (auto m : power_mons_) {
|
||||
for (auto d : devices_) {
|
||||
for (const auto& m : power_mons_) {
|
||||
for (const auto& d : devices_) {
|
||||
if (m->dev_index() == d->index()) {
|
||||
d->set_power_monitor(m);
|
||||
break;
|
||||
|
||||
+23
-25
@@ -41,19 +41,18 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <dirent.h>
|
||||
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include <cstdint>
|
||||
#include <map>
|
||||
#include <iostream>
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <regex> // NOLINT
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "rocm_smi/rocm_smi_main.h"
|
||||
#include "rocm_smi/rocm_smi_monitor.h"
|
||||
#include "rocm_smi/rocm_smi_utils.h"
|
||||
#include "rocm_smi/rocm_smi_exception.h"
|
||||
@@ -286,8 +285,7 @@ static const std::map<const char *, monitor_depends_t> kMonFuncDependsMap = {
|
||||
env_ = nullptr;
|
||||
#endif
|
||||
}
|
||||
Monitor::~Monitor(void) {
|
||||
}
|
||||
Monitor::~Monitor(void) = default;
|
||||
|
||||
std::string
|
||||
Monitor::MakeMonitorPath(MonitorTypes type, uint32_t sensor_id) {
|
||||
@@ -339,7 +337,7 @@ Monitor::setTempSensorLabelMap(void) {
|
||||
std::string type_str;
|
||||
int ret;
|
||||
|
||||
if (temp_type_index_map_.size() > 0) {
|
||||
if (!temp_type_index_map_.empty()) {
|
||||
return 0; // We've already filled in the map
|
||||
}
|
||||
auto add_temp_sensor_entry = [&](uint32_t file_index) {
|
||||
@@ -377,7 +375,7 @@ Monitor::setVoltSensorLabelMap(void) {
|
||||
std::string type_str;
|
||||
int ret;
|
||||
|
||||
if (volt_type_index_map_.size() > 0) {
|
||||
if (!volt_type_index_map_.empty()) {
|
||||
return 0; // We've already filled in the map
|
||||
}
|
||||
auto add_volt_sensor_entry = [&](uint32_t file_index) {
|
||||
@@ -510,10 +508,10 @@ typedef enum {
|
||||
static monitor_types getFuncType(std::string f_name) {
|
||||
monitor_types ret = eDefaultMonitor;
|
||||
|
||||
if (f_name.compare("rsmi_dev_temp_metric_get") == 0) {
|
||||
if (f_name == "rsmi_dev_temp_metric_get") {
|
||||
ret = eTempMonitor;
|
||||
}
|
||||
if (f_name.compare("rsmi_dev_volt_metric_get") == 0) {
|
||||
if (f_name == "rsmi_dev_volt_metric_get") {
|
||||
ret = eVoltMonitor;
|
||||
}
|
||||
return ret;
|
||||
@@ -614,22 +612,22 @@ void Monitor::fillSupportedFuncs(SupportedFuncMap *supported_funcs) {
|
||||
} else {
|
||||
supported_monitors = intersect;
|
||||
}
|
||||
if (supported_monitors.size() > 0) {
|
||||
for (uint32_t i = 0; i < supported_monitors.size(); ++i) {
|
||||
if (!supported_monitors.empty()) {
|
||||
for (unsigned long & supported_monitor : supported_monitors) {
|
||||
if (m_type == eDefaultMonitor) {
|
||||
assert(supported_monitors[i] > 0);
|
||||
supported_monitors[i] |=
|
||||
(supported_monitors[i] - 1) << MONITOR_TYPE_BIT_POSITION;
|
||||
assert(supported_monitor > 0);
|
||||
supported_monitor |=
|
||||
(supported_monitor - 1) << MONITOR_TYPE_BIT_POSITION;
|
||||
} else if (m_type == eTempMonitor) {
|
||||
// Temp sensor file names are 1-based
|
||||
assert(supported_monitors[i] > 0);
|
||||
supported_monitors[i] |=
|
||||
static_cast<uint64_t>(getTempSensorEnum(supported_monitors[i]))
|
||||
assert(supported_monitor > 0);
|
||||
supported_monitor |=
|
||||
static_cast<uint64_t>(getTempSensorEnum(supported_monitor))
|
||||
<< MONITOR_TYPE_BIT_POSITION;
|
||||
} else if (m_type == eVoltMonitor) {
|
||||
// Voltage sensor file names are 0-based
|
||||
supported_monitors[i] |=
|
||||
static_cast<uint64_t>(getVoltSensorEnum(supported_monitors[i]))
|
||||
supported_monitor |=
|
||||
static_cast<uint64_t>(getVoltSensorEnum(supported_monitor))
|
||||
<< MONITOR_TYPE_BIT_POSITION;
|
||||
} else {
|
||||
assert(false); // Unexpected monitor type
|
||||
@@ -640,10 +638,10 @@ void Monitor::fillSupportedFuncs(SupportedFuncMap *supported_funcs) {
|
||||
}
|
||||
}
|
||||
|
||||
if (it->second.variants.size() == 0) {
|
||||
if (it->second.variants.empty()) {
|
||||
(*supported_funcs)[it->first] = nullptr;
|
||||
supported_variants = nullptr; // Invoke destructor
|
||||
} else if ((*supported_variants).size() > 0) {
|
||||
} else if (!(*supported_variants).empty()) {
|
||||
(*supported_funcs)[it->first] = supported_variants;
|
||||
}
|
||||
|
||||
|
||||
@@ -41,17 +41,14 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <map>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
#include "rocm_smi/rocm_smi_main.h"
|
||||
#include "rocm_smi/rocm_smi_monitor.h"
|
||||
#include "rocm_smi/rocm_smi_utils.h"
|
||||
#include "rocm_smi/rocm_smi_common.h"
|
||||
#include "rocm_smi/rocm_smi_exception.h"
|
||||
@@ -70,8 +67,7 @@ static const std::map<PowerMonTypes, const char *> kMonitorNameMap = {
|
||||
PowerMon::PowerMon(std::string path, RocmSMI_env_vars const *e) :
|
||||
path_(path), env_(e) {
|
||||
}
|
||||
PowerMon::~PowerMon(void) {
|
||||
}
|
||||
PowerMon::~PowerMon(void) = default;
|
||||
|
||||
static int parse_power_str(std::string s, PowerMonTypes type, uint64_t *val) {
|
||||
std::stringstream ss(s);
|
||||
|
||||
@@ -90,7 +90,6 @@ AMDGpuPropertyId_t unmake_unique_property_id(AMDGpuPropertyId_t property_id) {
|
||||
static_cast<AMDGpuPropertyOffsetType>(AMDGpuPropertyTypesOffset_t::kClkTypes) |
|
||||
static_cast<AMDGpuPropertyOffsetType>(AMDGpuPropertyTypesOffset_t::kVoltMetricTypes);
|
||||
|
||||
auto property_type_offset = (static_cast<AMDGpuPropertyOffsetType>(property_type_offset_mask) & (property_id));
|
||||
auto property_type_id = (static_cast<AMDGpuPropertyOffsetType>(property_id) & ~(property_type_offset_mask));
|
||||
|
||||
return property_type_id;
|
||||
@@ -435,7 +434,7 @@ rsmi_status_t Device::check_amdgpu_property_reinforcement_query(uint32_t dev_idx
|
||||
id_filter_result = rsmi_dev_revision_get(dev_idx, &tmp_amdgpu_query.m_pci_rev_id);
|
||||
}
|
||||
}
|
||||
is_filter_good = (id_filter_result == rsmi_status_t::RSMI_STATUS_SUCCESS) ? true : false;
|
||||
is_filter_good = (id_filter_result == rsmi_status_t::RSMI_STATUS_SUCCESS);
|
||||
return tmp_amdgpu_query;
|
||||
};
|
||||
|
||||
@@ -475,13 +474,6 @@ rsmi_status_t Device::run_amdgpu_property_reinforcement_query(const AMDGpuProper
|
||||
return (amdgpu_property_reinforcement_list.find(asic_id) != amdgpu_property_reinforcement_list.end());
|
||||
};
|
||||
|
||||
auto ends_with = [](const std::string& value, const std::string& ending) {
|
||||
if (value.size() < ending.size()) {
|
||||
return false;
|
||||
}
|
||||
return std::equal(ending.rbegin(), ending.rend(), value.rbegin());
|
||||
};
|
||||
|
||||
// Traverse through all values for a given key
|
||||
osstream << __PRETTY_FUNCTION__ << "| ======= start =======" << "\n";
|
||||
LOG_TRACE(osstream);
|
||||
|
||||
+121
-44
@@ -40,26 +40,31 @@
|
||||
* DEALINGS WITH THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
#define _GNU_SOURCE 1 // REQUIRED: to utilize some GNU features/functions, see
|
||||
// _GNU_SOURCE functions which check
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
#include <dirent.h>
|
||||
#include <glob.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/utsname.h>
|
||||
#include <dlfcn.h>
|
||||
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include <cstring>
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cerrno>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <regex>
|
||||
#include <iomanip>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
|
||||
#include "rocm_smi/rocm_smi.h"
|
||||
#include "rocm_smi/rocm_smi_utils.h"
|
||||
@@ -137,7 +142,7 @@ std::vector<std::string> globFilesExist(const std::string& filePattern) {
|
||||
glob_t result_glob;
|
||||
memset(&result_glob, 0, sizeof(result_glob));
|
||||
|
||||
if (glob(filePattern.c_str(), GLOB_TILDE, NULL, &result_glob) != 0) {
|
||||
if (glob(filePattern.c_str(), GLOB_TILDE, nullptr, &result_glob) != 0) {
|
||||
globfree(&result_glob);
|
||||
// Leaving below to help debug issues discovering future glob file searches
|
||||
// debugFilesDiscovered(fileNames);
|
||||
@@ -145,7 +150,7 @@ std::vector<std::string> globFilesExist(const std::string& filePattern) {
|
||||
}
|
||||
|
||||
for(size_t i = 0; i < result_glob.gl_pathc; ++i) {
|
||||
fileNames.push_back(std::string(result_glob.gl_pathv[i]));
|
||||
fileNames.emplace_back(result_glob.gl_pathv[i]);
|
||||
}
|
||||
globfree(&result_glob);
|
||||
|
||||
@@ -367,7 +372,7 @@ std::string removeString(const std::string origStr,
|
||||
// defaults to trim stdOut
|
||||
std::pair<bool, std::string> executeCommand(std::string command, bool stdOut) {
|
||||
char buffer[128];
|
||||
std::string stdoutAndErr = "";
|
||||
std::string stdoutAndErr;
|
||||
bool successfulRun = true;
|
||||
command = "stdbuf -i0 -o0 -e0 " + command; // remove stdOut and err buffering
|
||||
|
||||
@@ -397,14 +402,10 @@ std::pair<bool, std::string> executeCommand(std::string command, bool stdOut) {
|
||||
return std::make_pair(successfulRun, stdoutAndErr);
|
||||
}
|
||||
|
||||
// originalstring - string to search for substring
|
||||
// originalString - string to search for substring
|
||||
// substring - string looking to find
|
||||
bool containsString(std::string originalString, std::string substring) {
|
||||
if (originalString.find(substring) != std::string::npos) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
return (originalString.find(substring) != std::string::npos);
|
||||
}
|
||||
|
||||
// Creates and stores supplied data into a temporary file (within /tmp/).
|
||||
@@ -415,9 +416,9 @@ bool containsString(std::string originalString, std::string substring) {
|
||||
// https://man7.org/linux/man-pages/man3/mkstemp.3.html
|
||||
//
|
||||
// Temporary file name format:
|
||||
// <app prefix>_<state name>_<paramenter name>_<device id>
|
||||
// <app prefix>_<state name>_<parameter name>_<device id>
|
||||
// <app prefix> - prefix for our application's identifier (see kTmpFilePrefix)
|
||||
// <paramenter name> - name of parameter being stored
|
||||
// <parameter name> - name of parameter being stored
|
||||
// <state name> - state at which the stored value captures
|
||||
// <device index> - device identifier
|
||||
//
|
||||
@@ -452,9 +453,8 @@ rsmi_status_t storeTmpFile(uint32_t dv_ind, std::string parameterName,
|
||||
close(fd);
|
||||
if (rc_write == -1) {
|
||||
return RSMI_STATUS_FILE_ERROR;
|
||||
} else {
|
||||
return RSMI_STATUS_SUCCESS;
|
||||
}
|
||||
return RSMI_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
std::vector<std::string> getListOfAppTmpFiles() {
|
||||
@@ -463,16 +463,18 @@ std::vector<std::string> getListOfAppTmpFiles() {
|
||||
struct dirent *ent;
|
||||
std::vector<std::string> tmpFiles;
|
||||
|
||||
if ((dir = opendir(path.c_str())) != nullptr) {
|
||||
// captures all files & directories under specified path
|
||||
while ((ent = readdir(dir)) != nullptr) {
|
||||
std::string fileDirName = ent->d_name;
|
||||
// we only want our app specific files
|
||||
if (containsString(fileDirName, kTmpFilePrefix)) {
|
||||
tmpFiles.emplace_back(path + "/" + fileDirName);
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
dir = opendir(path.c_str());
|
||||
if (dir == nullptr) {
|
||||
return tmpFiles;
|
||||
}
|
||||
// captures all files & directories under specified path
|
||||
while ((ent = readdir(dir)) != nullptr) {
|
||||
std::string fileDirName = ent->d_name;
|
||||
// we only want our app specific files
|
||||
if (containsString(fileDirName, kTmpFilePrefix)) {
|
||||
tmpFiles.emplace_back(path + "/" + fileDirName);
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return tmpFiles;
|
||||
@@ -501,7 +503,7 @@ std::vector<std::string> readEntireFile(std::string path) {
|
||||
std::string line;
|
||||
while (std::getline(inFileStream, line)) {
|
||||
std::istringstream ss(line);
|
||||
if(line.size() > 0) {
|
||||
if (!line.empty()) {
|
||||
fileContent.push_back(line);
|
||||
}
|
||||
}
|
||||
@@ -513,7 +515,7 @@ std::vector<std::string> readEntireFile(std::string path) {
|
||||
// and their content
|
||||
void displayAppTmpFilesContent() {
|
||||
std::vector<std::string> tmpFiles = getListOfAppTmpFiles();
|
||||
if (tmpFiles.empty() == false) {
|
||||
if (!tmpFiles.empty()) {
|
||||
for (auto &x: tmpFiles) {
|
||||
std::string out = readFile(x);
|
||||
std::cout << __PRETTY_FUNCTION__ << " | Temporary file: " << x
|
||||
@@ -529,7 +531,7 @@ void displayAppTmpFilesContent() {
|
||||
std::string debugVectorContent(std::vector<std::string> v) {
|
||||
std::ostringstream ss;
|
||||
ss << "Vector = {";
|
||||
if (v.size() > 0) {
|
||||
if (!v.empty()) {
|
||||
for (auto it=v.begin(); it < v.end(); it++) {
|
||||
ss << *it;
|
||||
auto temp_it = it;
|
||||
@@ -547,7 +549,7 @@ std::string debugVectorContent(std::vector<std::string> v) {
|
||||
std::string displayAllDevicePaths(std::vector<std::shared_ptr<Device>> v) {
|
||||
std::ostringstream ss;
|
||||
ss << "Vector = {";
|
||||
if (v.size() > 0) {
|
||||
if (!v.empty()) {
|
||||
for (auto it=v.begin(); it < v.end(); it++) {
|
||||
ss << (*it)->path();
|
||||
auto temp_it = it;
|
||||
@@ -562,7 +564,7 @@ std::string displayAllDevicePaths(std::vector<std::shared_ptr<Device>> v) {
|
||||
}
|
||||
|
||||
// Attempts to read application specific temporary file
|
||||
// This method is to be used for reading (or determing if it exists),
|
||||
// This method is to be used for reading (or determining if it exists),
|
||||
// in order to keep file naming scheme consistent.
|
||||
//
|
||||
// dv_ind - device index
|
||||
@@ -580,7 +582,7 @@ std::tuple<bool, std::string> readTmpFile(uint32_t dv_ind,
|
||||
"_" + std::to_string(dv_ind);
|
||||
std::string fileContent;
|
||||
std::vector<std::string> tmpFiles = getListOfAppTmpFiles();
|
||||
if (tmpFiles.empty() == false) {
|
||||
if (!tmpFiles.empty()) {
|
||||
for (auto &x: tmpFiles) {
|
||||
if (containsString(x, tmpFileName)) {
|
||||
fileContent = readFile(x);
|
||||
@@ -615,15 +617,23 @@ std::string getRSMIStatusString(rsmi_status_t ret) {
|
||||
// Big Endian (BE), multi-bit symbols encoded as big endian (MSB first)
|
||||
// Little Endian (LE), multi-bit symbols encoded as little endian (LSB first)
|
||||
std::tuple<bool, std::string, std::string, std::string, std::string,
|
||||
std::string, std::string, std::string, std::string>
|
||||
std::string, std::string, std::string, std::string,
|
||||
std::string, std::string, std::string>
|
||||
getSystemDetails(void) {
|
||||
struct utsname buf;
|
||||
bool errorDetected = false;
|
||||
std::string temp_data;
|
||||
std::string sysname, nodename, release, version, machine;
|
||||
std::string sysname;
|
||||
std::string nodename;
|
||||
std::string release;
|
||||
std::string version;
|
||||
std::string machine;
|
||||
std::string domainName = "<undefined>";
|
||||
std::string os_distribution = "<undefined>";
|
||||
std::string endianness = "<undefined>";
|
||||
std::string rocm_lib_path = "<undefined>";
|
||||
std::string rocm_build_type = "<undefined>";
|
||||
std::string rocm_env_variables = "<undefined>";
|
||||
|
||||
if (uname(&buf) < 0) {
|
||||
errorDetected = true;
|
||||
@@ -640,7 +650,7 @@ std::tuple<bool, std::string, std::string, std::string, std::string,
|
||||
|
||||
std::string filePath = "/etc/os-release";
|
||||
bool fileExists = FileExists(filePath.c_str());
|
||||
if (fileExists == true) {
|
||||
if (fileExists) {
|
||||
std::vector<std::string> fileContent = readEntireFile(filePath);
|
||||
for (auto &line: fileContent) {
|
||||
if (line.find("PRETTY_NAME=") != std::string::npos) {
|
||||
@@ -658,9 +668,13 @@ std::tuple<bool, std::string, std::string, std::string, std::string,
|
||||
endianness = "Little Endian, multi-bit symbols encoded as"
|
||||
" little endian (LSB first)";
|
||||
}
|
||||
rocm_build_type = getBuildType();
|
||||
rocm_lib_path = getMyLibPath();
|
||||
rocm_env_variables = RocmSMI::getInstance().getRSMIEnvVarInfo();
|
||||
return std::make_tuple(errorDetected, sysname, nodename, release,
|
||||
version, machine, domainName, os_distribution,
|
||||
endianness);
|
||||
endianness, rocm_build_type, rocm_lib_path,
|
||||
rocm_env_variables);
|
||||
}
|
||||
|
||||
// If logging is enabled through RSMI_LOGGING environment variable.
|
||||
@@ -669,9 +683,10 @@ void logSystemDetails(void) {
|
||||
std::ostringstream ss;
|
||||
bool errorDetected;
|
||||
std::string sysname, node, release, version, machine, domain, distName,
|
||||
endianness;
|
||||
endianness, rocm_build_type, lib_path, rocm_env_vars;
|
||||
std::tie(errorDetected, sysname, node, release, version, machine, domain,
|
||||
distName, endianness) = getSystemDetails();
|
||||
distName, endianness, rocm_build_type, lib_path,
|
||||
rocm_env_vars) = getSystemDetails();
|
||||
if (errorDetected == false) {
|
||||
ss << "====== Gathered system details ============\n"
|
||||
<< "SYSTEM NAME: " << sysname << "\n"
|
||||
@@ -681,7 +696,10 @@ void logSystemDetails(void) {
|
||||
<< "VERSION: " << version << "\n"
|
||||
<< "MACHINE TYPE: " << machine << "\n"
|
||||
<< "DOMAIN: " << domain << "\n"
|
||||
<< "ENDIANNESS: " << endianness << "\n";
|
||||
<< "ENDIANNESS: " << endianness << "\n"
|
||||
<< "ROCM BUILD TYPE: " << rocm_build_type << "\n"
|
||||
<< "ROCM-SMI-LIB PATH: " << lib_path << "\n"
|
||||
<< "ROCM ENV VARIABLES: " << rocm_env_vars << "\n";
|
||||
LOG_INFO(ss);
|
||||
} else {
|
||||
ss << "====== Gathered system details ============\n"
|
||||
@@ -710,7 +728,7 @@ void logHexDump(
|
||||
|
||||
// Output description if given.
|
||||
// if (desc != NULL) printf("%s:\n", desc);
|
||||
if (desc != NULL) ss << "\n" << desc << "\n";
|
||||
if (desc != nullptr) ss << "\n" << desc << "\n";
|
||||
|
||||
// Length checks.
|
||||
if (len == 0) {
|
||||
@@ -779,6 +797,36 @@ bool isSystemBigEndian() {
|
||||
return isBigEndian;
|
||||
}
|
||||
|
||||
std::string getBuildType() {
|
||||
std::string build = "<unknown>";
|
||||
#ifndef DEBUG
|
||||
build = "release";
|
||||
#else
|
||||
build = "debug";
|
||||
#endif
|
||||
return build;
|
||||
}
|
||||
|
||||
const char *my_fname(void) {
|
||||
std::string emptyRet="";
|
||||
#ifdef _GNU_SOURCE
|
||||
Dl_info dl_info;
|
||||
dladdr((void *)my_fname, &dl_info);
|
||||
return (dl_info.dli_fname);
|
||||
#else
|
||||
return emptyRet.c_str();
|
||||
#endif
|
||||
}
|
||||
|
||||
std::string getMyLibPath(void) {
|
||||
std::string libName = "rocm-smi-lib";
|
||||
std::string path = std::string(my_fname());
|
||||
if (path.empty()) {
|
||||
path = "Could not find library path for " + libName;
|
||||
}
|
||||
return path;
|
||||
}
|
||||
|
||||
rsmi_status_t getBDFString(uint64_t bdf_id, std::string& bfd_str)
|
||||
{
|
||||
auto result = rsmi_status_t::RSMI_STATUS_SUCCESS;
|
||||
@@ -800,6 +848,35 @@ rsmi_status_t getBDFString(uint64_t bdf_id, std::string& bfd_str)
|
||||
return result;
|
||||
}
|
||||
|
||||
int subDirectoryCountInPath(const std::string path) {
|
||||
int dir_count = 0;
|
||||
struct dirent *dent;
|
||||
DIR *srcdir = opendir(path.c_str());
|
||||
|
||||
if (srcdir == NULL) {
|
||||
perror("opendir");
|
||||
return -1;
|
||||
}
|
||||
|
||||
while ((dent = readdir(srcdir)) != NULL) {
|
||||
struct stat st;
|
||||
|
||||
if (strcmp(dent->d_name, ".") == 0 || strcmp(dent->d_name, "..") == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (fstatat(dirfd(srcdir), dent->d_name, &st, 0) < 0) {
|
||||
perror(dent->d_name);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (S_ISDIR(st.st_mode)) {
|
||||
dir_count++;
|
||||
}
|
||||
}
|
||||
closedir(srcdir);
|
||||
return dir_count;
|
||||
}
|
||||
|
||||
} // namespace smi
|
||||
} // namespace amd
|
||||
|
||||
@@ -67,7 +67,8 @@ target_link_libraries(
|
||||
PUBLIC GTest::gtest_main
|
||||
PUBLIC c
|
||||
PUBLIC stdc++
|
||||
PUBLIC pthread)
|
||||
PUBLIC pthread
|
||||
PUBLIC dl)
|
||||
|
||||
install(TARGETS ${RSMITST} gtest gtest_main
|
||||
DESTINATION ${SHARE_INSTALL_PREFIX}/rsmitst_tests
|
||||
|
||||
@@ -108,14 +108,12 @@ static void RunCustomTestProlog(TestBase *test) {
|
||||
}
|
||||
test->SetUp();
|
||||
test->Run();
|
||||
return;
|
||||
}
|
||||
static void RunCustomTestEpilog(TestBase *tst) {
|
||||
if (sRSMIGlvalues->verbosity >= TestBase::VERBOSE_STANDARD) {
|
||||
tst->DisplayResults();
|
||||
}
|
||||
tst->Close();
|
||||
return;
|
||||
}
|
||||
|
||||
// If the test case one big test, you should use RunGenericTest()
|
||||
@@ -127,7 +125,6 @@ static void RunCustomTestEpilog(TestBase *tst) {
|
||||
static void RunGenericTest(TestBase *test) {
|
||||
RunCustomTestProlog(test);
|
||||
RunCustomTestEpilog(test);
|
||||
return;
|
||||
}
|
||||
|
||||
// TEST ENTRY TEMPLATE:
|
||||
|
||||
@@ -65,8 +65,6 @@ $BLACKLIST_ALL_ASICS\
|
||||
FILTER[90400]=\
|
||||
$BLACKLIST_ALL_ASICS\
|
||||
"rsmitstReadOnly.TestVoltCurvRead:"\
|
||||
"rsmitstReadOnly.TestFrequenciesRead:"\
|
||||
"rsmitstReadWrite.TestFrequenciesReadWrite:"\
|
||||
"rsmitstReadWrite.TestPowerReadWrite"
|
||||
FILTER[90401]=${FILTER[90400]}
|
||||
FILTER[90402]=${FILTER[90400]}
|
||||
|
||||
@@ -43,7 +43,7 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <cassert>
|
||||
|
||||
#include "rocm_smi/rocm_smi.h"
|
||||
#include "rocm_smi_test/test_base.h"
|
||||
@@ -61,10 +61,9 @@ static const char kResultsLabel[] = "TEST RESULTS";
|
||||
// This one is used outside this file
|
||||
const char kSetupLabel[] = "TEST SETUP";
|
||||
|
||||
TestBase::TestBase() : setup_failed_(false), description_("") {
|
||||
}
|
||||
TestBase::~TestBase() {
|
||||
TestBase::TestBase() : setup_failed_(false) {
|
||||
}
|
||||
TestBase::~TestBase() = default;
|
||||
|
||||
void TestBase::MakeHeaderStr(const char *inStr,
|
||||
std::string *outStr) const {
|
||||
@@ -116,8 +115,6 @@ void TestBase::SetUp(uint64_t init_flags) {
|
||||
std::cout << "No ROCm SMI tests can be run." << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void TestBase::PrintDeviceHeader(uint32_t dv_ind) {
|
||||
@@ -213,7 +210,7 @@ void TestBase::set_description(std::string d) {
|
||||
size_t endlptr;
|
||||
|
||||
for (size_t i = le; i < description_.size(); i += le) {
|
||||
endlptr = description_.find_last_of(" ", i);
|
||||
endlptr = description_.find_last_of(' ', i);
|
||||
description_.replace(endlptr, 1, "\n");
|
||||
i = endlptr;
|
||||
}
|
||||
|
||||
@@ -45,6 +45,7 @@
|
||||
#ifndef TESTS_ROCM_SMI_TEST_TEST_BASE_H_
|
||||
#define TESTS_ROCM_SMI_TEST_TEST_BASE_H_
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
class TestBase {
|
||||
@@ -142,9 +143,8 @@ class TestBase {
|
||||
"\t===> Abort is over-ridden due to dont_fail command line option." \
|
||||
<< std::endl; \
|
||||
return; \
|
||||
} else { \
|
||||
ASSERT_EQ(RSMI_STATUS_SUCCESS, (RET)); \
|
||||
} \
|
||||
ASSERT_EQ(RSMI_STATUS_SUCCESS, (RET)); \
|
||||
}
|
||||
|
||||
void MakeHeaderStr(const char *inStr, std::string *outStr);
|
||||
|
||||
@@ -43,13 +43,13 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdint.h>
|
||||
#include <getopt.h>
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <string>
|
||||
|
||||
#include "rocm_smi_test/test_base.h"
|
||||
#include "rocm_smi_test/test_common.h"
|
||||
|
||||
@@ -74,7 +74,7 @@ void DumpMonitorInfo(const TestBase *test);
|
||||
#endif
|
||||
|
||||
#define DISPLAY_RSMI_ERR(RET) { \
|
||||
if (RET != RSMI_STATUS_SUCCESS) { \
|
||||
if ((RET) != RSMI_STATUS_SUCCESS) { \
|
||||
const char *err_str; \
|
||||
std::cout << "\t===> ERROR: RSMI call returned " << (RET) << std::endl; \
|
||||
rsmi_status_string((RET), &err_str); \
|
||||
@@ -91,7 +91,7 @@ void DumpMonitorInfo(const TestBase *test);
|
||||
} \
|
||||
}
|
||||
#define CHK_RSMI_PERM_ERR(RET) { \
|
||||
if (RET == RSMI_STATUS_PERMISSION) { \
|
||||
if ((RET) == RSMI_STATUS_PERMISSION) { \
|
||||
std::cout << "This command requires root access." << std::endl; \
|
||||
} else { \
|
||||
DISPLAY_RSMI_ERR(RET) \
|
||||
|
||||
Fai riferimento in un nuovo problema
Block a user