[lib] Enhance Logger: gpu_metrics + enable console out

* Updates:
    - Env variable RSMI_LOGGING=0 or any other value
        -> all logging off
    - Env variable RSMI_LOGGING=1 -> logs only
    - Env variable RSMI_LOGGING=2 -> console only
    - Env variable RSMI_LOGGING=3 -> both logs + console
    - Metrics output includes hexdump of current file
      and decoded metrics (functions: logHexDump
      and log_gpu_metrics)
    - System info gathered, now includes if system's
      perceived endianness - little or big endian
      helpful for viewing decoded hexdump or any
      binary translation
    - Added templates for printing unsigned hex
      (print_unsigned_hex_and_int), unsigned integers
      (print_unsigned_int), and printing both unsigned
      hex and int with an optional header
      (print_unsigned_hex_and_int)
    - Fixed some build compile warnings/errors -
      ex. doing strncpys for sku or board names
      this operation is expected and needed
      and for temp file writes if unsuccessful
      we now properly send RSMI_STATUS_FILE_ERROR
    - Fixed on RHEL 8.8/9.x logrotate does not properly
      initialize

Change-Id: Ifa0f0218c9cafd0a8cd6aa8e7f94d61e9107200f
Signed-off-by: Charis Poag <Charis.Poag@amd.com>
Этот коммит содержится в:
Charis Poag
2023-08-01 21:46:19 -05:00
родитель 0522439ac2
Коммит 9c7eed7edc
12 изменённых файлов: 470 добавлений и 24 удалений
+3 -2
Просмотреть файл
@@ -62,12 +62,11 @@ EOF
# confirm logrotate file exists in daily
if [ -f /etc/cron.daily/logrotate ]; then
# move logrotate daily to hourly
if [ -f /etc/cron.hourly/logrotate ]; then
if [ -d /etc/cron.hourly ]; then
sudo mv /etc/cron.daily/logrotate /etc/cron.hourly/logrotate
else
echo "[WARNING] Could find and configure hourly cron for $packageName's"\
" logrotate. $packageName logs (when turned on) will not rotate properly."
return
fi
else
# confirm that it's already been moved to hourly
@@ -77,6 +76,7 @@ EOF
"$packageName logs (when turned on) may not rotate properly."
fi
fi
return #done configuring for non-systemd timers
else
# Configure systemd timers - the typical setup for modern Linux logrotation setups
if [ -f /lib/systemd/system/logrotate.timer ]; then
@@ -102,6 +102,7 @@ EOF
echo "[WARNING] Could not configure systemd timer for $packageName's logrotate."\
"$packageName logs (when turned on) will not rotate properly."
fi
return #done configuring for systemd timers
fi
}
+3 -2
Просмотреть файл
@@ -62,12 +62,11 @@ EOF
# confirm logrotate file exists in daily
if [ -f /etc/cron.daily/logrotate ]; then
# move logrotate daily to hourly
if [ -f /etc/cron.hourly/logrotate ]; then
if [ -d /etc/cron.hourly ]; then
sudo mv /etc/cron.daily/logrotate /etc/cron.hourly/logrotate
else
echo "[WARNING] Could find and configure hourly cron for $packageName's"\
" logrotate. $packageName logs (when turned on) will not rotate properly."
return
fi
else
# confirm that it's already been moved to hourly
@@ -77,6 +76,7 @@ EOF
"$packageName logs (when turned on) may not rotate properly."
fi
fi
return #done configuring for non-systemd timers
else
# Configure systemd timers - the typical setup for modern Linux logrotation setups
if [ -f /lib/systemd/system/logrotate.timer ]; then
@@ -102,6 +102,7 @@ EOF
echo "[WARNING] Could not configure systemd timer for $packageName's logrotate."\
"$packageName logs (when turned on) will not rotate properly."
fi
return #done configuring for systemd timers
fi
}
+1
Просмотреть файл
@@ -100,6 +100,7 @@ typedef enum LOG_TYPE {
NO_LOG = 1,
CONSOLE = 2,
FILE_LOG = 3,
BOTH_FILE_AND_CONSOLE = 4
} LogType;
class Logger {
+1
Просмотреть файл
@@ -115,6 +115,7 @@ class RocmSMI {
const RocmSMI_env_vars& getEnv(void);
void printEnvVarInfo(void);
bool isLoggingOn(void);
uint32_t getLogSetting(void);
static const std::map<amd::smi::DevInfoTypes, std::string> devInfoTypesStrings;
private:
+48 -1
Просмотреть файл
@@ -48,6 +48,9 @@
#include <string>
#include <cstdint>
#include <vector>
#include <sstream>
#include <iomanip>
#include <type_traits>
#include "rocm_smi/rocm_smi_device.h"
@@ -94,8 +97,52 @@ GetDevBinaryBlob(amd::smi::DevInfoTypes type,
rsmi_status_t ErrnoToRsmiStatus(int err);
std::string getRSMIStatusString(rsmi_status_t ret);
std::tuple<bool, std::string, std::string, std::string, std::string,
std::string, std::string, std::string> getSystemDetails(void);
std::string, std::string, std::string, std::string>
getSystemDetails(void);
void logSystemDetails(void);
void logHexDump(const char *desc, const void *addr, const size_t len,
size_t perLine);
bool isSystemBigEndian();
template <typename T>
std::string print_int_as_hex(T i, bool showHexNotation=true) {
std::stringstream ss;
if (showHexNotation) {
ss << "0x" << std::setfill('0') << std::setw(sizeof(T) * 2) << std::hex;
} else {
ss << std::setfill('0') << std::setw(sizeof(T) * 2) << std::hex;
}
if (std::is_same<std::uint8_t, T>::value) {
ss << static_cast<unsigned int>(i|0);
} else if (std::is_same<std::int8_t, T>::value) {
ss << static_cast<int>(static_cast<uint8_t>(i|0));
} else if (std::is_signed<T>::value) {
ss << static_cast<long long int>(i | 0);
} else {
ss << static_cast<unsigned long long int>(i | 0);
}
ss << std::dec;
return ss.str();
};
template <typename T>
std::string print_unsigned_int(T i) {
std::stringstream ss;
ss << static_cast<unsigned long long int>(i | 0);
return ss.str();
}
template <typename T>
std::string print_unsigned_hex_and_int(T i, std::string heading="") {
std::stringstream ss;
if (heading.empty() == false) {
ss << "\n" << heading << " = ";
}
ss << "Hex (MSB): " << print_int_as_hex(i) << ", "
<< "Unsigned int: " << print_unsigned_int(i) << ", "
<< "Byte Size: " << sizeof(T);
return ss.str();
}
struct pthread_wrap {
public:
+3
Просмотреть файл
@@ -166,8 +166,11 @@ TRY
rsmi_dev_name_get(dev_inx, dev->device_name, DEVICE_NAME_LEN);
rsmi_dev_vbios_version_get(dev_inx, buf, buf_size);
if (std::strlen(buf) > 0) {
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wstringop-truncation"
std::strncpy(dev->sku_name, &buf[4], 6);
std::strncpy(dev->board_name, buf, 12);
#pragma GCC diagnostic pop
}
rsmi_dev_serial_number_get(dev_inx, dev->board_serial_number,
BOARD_SERIAL_NUM_LEN);
+2 -2
Просмотреть файл
@@ -673,8 +673,8 @@ rsmi_dev_ecc_count_get(uint32_t dv_ind, rsmi_gpu_block_t block,
default:
ss << __PRETTY_FUNCTION__ << " | ======= end ======="
<< ", default case -> reporting RSMI_STATUS_NOT_SUPPORTED"
<< amd::smi::getRSMIStatusString(ret);
<< ", default case -> reporting "
<< amd::smi::getRSMIStatusString(RSMI_STATUS_NOT_SUPPORTED);
LOG_ERROR(ss);
return RSMI_STATUS_NOT_SUPPORTED;
}
+6 -1
Просмотреть файл
@@ -826,7 +826,12 @@ int Device::readDevInfoBinary(DevInfoTypes type, std::size_t b_size,
}
ss << "Successfully read DevInfoBinary for DevInfoType ("
<< RocmSMI::devInfoTypesStrings.at(type) << ") - SYSFS ("
<< sysfs_path << "), returning binaryData = " << p_binary_data;
<< sysfs_path << "), returning binaryData = " << p_binary_data
<< "; byte_size = " << std::dec << static_cast<int>(b_size);
std::string metricDescription = "AMD SMI GPU METRICS (16-byte width), "
+ sysfs_path;
logHexDump(metricDescription.c_str(), p_binary_data, b_size, 16);
LOG_INFO(ss);
return 0;
}
+218
Просмотреть файл
@@ -60,6 +60,10 @@
#include "rocm_smi/rocm_smi_monitor.h"
#include "rocm_smi/rocm_smi_utils.h"
#include "rocm_smi/rocm_smi_exception.h"
#include "rocm_smi/rocm_smi_logger.h"
using namespace ROCmLogging;
using namespace amd::smi;
#define TRY try {
#define CATCH } catch (...) {return amd::smi::handleException();}
@@ -139,6 +143,196 @@ typedef struct {
} rsmi_gpu_metrics_v_1_3;
// log current gpu_metrics file content read
// any metrics value can be a nullptr
void log_gpu_metrics(const metrics_table_header_t *gpu_metrics_table_header,
const rsmi_gpu_metrics_v_1_2 *rsmi_gpu_metrics_v_1_2,
const rsmi_gpu_metrics_v_1_3 *gpu_metrics_v_1_3,
const rsmi_gpu_metrics_t *rsmi_gpu_metrics) {
if (RocmSMI::getInstance().isLoggingOn() == false) {
return;
}
std::ostringstream ss;
if (gpu_metrics_table_header != nullptr) {
ss
/* Common Header */
<< print_unsigned_hex_and_int(
gpu_metrics_table_header->structure_size,
"gpu_metrics_table_header->structure_size")
<< print_unsigned_hex_and_int(
gpu_metrics_table_header->format_revision,
"gpu_metrics_table_header->format_revision")
<< print_unsigned_hex_and_int(
gpu_metrics_table_header->content_revision,
"gpu_metrics_table_header->content_revision");
LOG_DEBUG(ss);
}
if (rsmi_gpu_metrics == nullptr) {
return;
} else {
// do nothing - continue
}
ss
/* Common Header */
<< print_unsigned_hex_and_int(
rsmi_gpu_metrics->common_header.structure_size,
"rsmi_gpu_metrics->common_header.structure_size")
<< print_unsigned_hex_and_int(
rsmi_gpu_metrics->common_header.format_revision,
"rsmi_gpu_metrics->common_header.format_revision")
<< print_unsigned_hex_and_int(
rsmi_gpu_metrics->common_header.content_revision,
"rsmi_gpu_metrics->common_header.content_revision")
/* Temperature */
<< print_unsigned_hex_and_int(
rsmi_gpu_metrics->temperature_edge,
"rsmi_gpu_metrics->temperature_edge")
<< print_unsigned_hex_and_int(
rsmi_gpu_metrics->temperature_hotspot,
"rsmi_gpu_metrics->temperature_hotspot")
<< print_unsigned_hex_and_int(
rsmi_gpu_metrics->temperature_mem,
"rsmi_gpu_metrics->temperature_mem")
<< print_unsigned_hex_and_int(
rsmi_gpu_metrics->temperature_vrgfx,
"rsmi_gpu_metrics->temperature_vrgfx")
<< print_unsigned_hex_and_int(
rsmi_gpu_metrics->temperature_vrsoc,
"rsmi_gpu_metrics->temperature_vrsoc")
<< print_unsigned_hex_and_int(
rsmi_gpu_metrics->temperature_vrmem,
"rsmi_gpu_metrics->temperature_vrmem")
/* Utilization */
<< print_unsigned_hex_and_int(
rsmi_gpu_metrics->average_gfx_activity,
"rsmi_gpu_metrics->average_gfx_activity")
<< print_unsigned_hex_and_int(
rsmi_gpu_metrics->average_umc_activity,
"rsmi_gpu_metrics->average_umc_activity")
<< print_unsigned_hex_and_int(
rsmi_gpu_metrics->average_mm_activity,
"rsmi_gpu_metrics->average_mm_activity")
/* Power/Energy */
<< print_unsigned_hex_and_int(
rsmi_gpu_metrics->average_socket_power,
"rsmi_gpu_metrics->average_socket_power")
<< print_unsigned_hex_and_int(
rsmi_gpu_metrics->energy_accumulator,
"rsmi_gpu_metrics->energy_accumulator")
/* Driver attached timestamp (in ns) */
<< print_unsigned_hex_and_int(
rsmi_gpu_metrics->system_clock_counter,
"rsmi_gpu_metrics->system_clock_counter")
/* Average clocks */
<< print_unsigned_hex_and_int(
rsmi_gpu_metrics->average_gfxclk_frequency,
"rsmi_gpu_metrics->average_gfxclk_frequency")
<< print_unsigned_hex_and_int(
rsmi_gpu_metrics->average_socclk_frequency,
"rsmi_gpu_metrics->average_socclk_frequency")
<< print_unsigned_hex_and_int(
rsmi_gpu_metrics->average_uclk_frequency,
"rsmi_gpu_metrics->average_uclk_frequency")
<< print_unsigned_hex_and_int(
rsmi_gpu_metrics->average_vclk0_frequency,
"rsmi_gpu_metrics->average_vclk0_frequency")
<< print_unsigned_hex_and_int(
rsmi_gpu_metrics->average_dclk0_frequency,
"rsmi_gpu_metrics->average_dclk0_frequency")
<< print_unsigned_hex_and_int(
rsmi_gpu_metrics->average_vclk1_frequency,
"rsmi_gpu_metrics->average_vclk1_frequency")
<< print_unsigned_hex_and_int(
rsmi_gpu_metrics->average_dclk1_frequency,
"rsmi_gpu_metrics->average_dclk1_frequency")
/* Current clocks */
<< print_unsigned_hex_and_int(
rsmi_gpu_metrics->current_gfxclk,
"rsmi_gpu_metrics->current_gfxclk")
<< print_unsigned_hex_and_int(
rsmi_gpu_metrics->current_socclk,
"rsmi_gpu_metrics->current_socclk")
<< print_unsigned_hex_and_int(
rsmi_gpu_metrics->current_uclk,
"rsmi_gpu_metrics->current_uclk")
<< print_unsigned_hex_and_int(
rsmi_gpu_metrics->current_vclk0,
"rsmi_gpu_metrics->current_vclk0")
<< print_unsigned_hex_and_int(
rsmi_gpu_metrics->current_dclk0,
"rsmi_gpu_metrics->current_dclk0")
<< print_unsigned_hex_and_int(
rsmi_gpu_metrics->current_vclk1,
"rsmi_gpu_metrics->current_vclk1")
<< print_unsigned_hex_and_int(
rsmi_gpu_metrics->current_dclk1,
"rsmi_gpu_metrics->current_dclk1")
/* Throttle status */
<< print_unsigned_hex_and_int(
rsmi_gpu_metrics->throttle_status,
"rsmi_gpu_metrics->throttle_status")
/* Fans */
<< print_unsigned_hex_and_int(
rsmi_gpu_metrics->current_fan_speed,
"rsmi_gpu_metrics->current_fan_speed")
/* Link width/speed */
<< print_unsigned_hex_and_int(
rsmi_gpu_metrics->pcie_link_width,
"rsmi_gpu_metrics->pcie_link_width")
<< print_unsigned_hex_and_int(
rsmi_gpu_metrics->pcie_link_speed,
"rsmi_gpu_metrics->pcie_link_speed")
<< print_unsigned_hex_and_int(
rsmi_gpu_metrics->padding,
"rsmi_gpu_metrics->padding")
<< print_unsigned_hex_and_int(
rsmi_gpu_metrics->gfx_activity_acc,
"rsmi_gpu_metrics->gfx_activity_acc")
<< print_unsigned_hex_and_int(
rsmi_gpu_metrics->mem_actvity_acc,
"rsmi_gpu_metrics->mem_actvity_acc");
for (int i=0; i < RSMI_NUM_HBM_INSTANCES; i++) {
ss << print_unsigned_hex_and_int(
rsmi_gpu_metrics->temperature_hbm[i],
"rsmi_gpu_metrics->temperature_hbm[" + std::to_string(i) + "]");
}
if (rsmi_gpu_metrics_v_1_2 != nullptr) {
/* PMFW attached timestamp (10ns resolution) */
ss
<< print_unsigned_hex_and_int(
rsmi_gpu_metrics_v_1_2->firmware_timestamp,
"rsmi_gpu_metrics_v_1_2->firmware_timestamp");
}
if (gpu_metrics_v_1_3 != nullptr) {
/* PMFW attached timestamp (10ns resolution) */
ss
<< print_unsigned_hex_and_int(
gpu_metrics_v_1_3->firmware_timestamp,
"gpu_metrics_v_1_3->firmware_timestamp")
/* Voltage (mV) */
<< print_unsigned_hex_and_int(
gpu_metrics_v_1_3->voltage_soc,
"gpu_metrics_v_1_3->voltage_soc")
<< print_unsigned_hex_and_int(
gpu_metrics_v_1_3->voltage_gfx,
"gpu_metrics_v_1_3->voltage_gfx")
<< print_unsigned_hex_and_int(
gpu_metrics_v_1_3->voltage_mem,
"gpu_metrics_v_1_3->voltage_mem")
<< print_unsigned_hex_and_int(
gpu_metrics_v_1_3->padding1,
"gpu_metrics_v_1_3->padding1")
/* Throttle status (ASIC independent) */
<< print_unsigned_hex_and_int(
gpu_metrics_v_1_3->indep_throttle_status,
"gpu_metrics_v_1_3->indep_throttle_status");
}
LOG_DEBUG(ss);
}
static rsmi_status_t GetGPUMetricsFormat1(uint32_t dv_ind,
rsmi_gpu_metrics_t *data, uint8_t content_v) {
assert(content_v != RSMI_GPU_METRICS_API_CONTENT_VER_1 &&
@@ -268,16 +462,28 @@ rsmi_dev_gpu_metrics_info_get(uint32_t dv_ind, rsmi_gpu_metrics_t *smu) {
rsmi_gpu_metrics_v_1_3 smu_v_1_3;
rsmi_status_t ret;
std::ostringstream ss;
if (!dev->gpu_metrics_ver().structure_size) {
ret = GetDevBinaryBlob(amd::smi::kDevGpuMetrics, dv_ind,
sizeof(struct metrics_table_header_t), &dev->gpu_metrics_ver());
log_gpu_metrics(&dev->gpu_metrics_ver(), nullptr, nullptr, nullptr);
if (ret != RSMI_STATUS_SUCCESS) {
ss << "Returning = " << getRSMIStatusString(ret)
<< ",\ndev->gpu_metrics_ver().structure_size = "
<< print_unsigned_int(dev->gpu_metrics_ver().structure_size)
<< ", could not read common header";
LOG_ERROR(ss);
return ret;
}
}
// only supports gpu_metrics_v1_x version
if (dev->gpu_metrics_ver().format_revision != 1) {
ss << "Returning = " << getRSMIStatusString(RSMI_STATUS_NOT_SUPPORTED)
<< ",\ndev->gpu_metrics_ver().format_revision = "
<< print_unsigned_int(dev->gpu_metrics_ver().format_revision)
<< " was not equal to 1";
LOG_ERROR(ss);
return RSMI_STATUS_NOT_SUPPORTED;
}
@@ -289,19 +495,31 @@ rsmi_dev_gpu_metrics_info_get(uint32_t dv_ind, rsmi_gpu_metrics_t *smu) {
RSMI_GPU_METRICS_API_CONTENT_VER_1) {
ret = GetDevBinaryBlob(amd::smi::kDevGpuMetrics, dv_ind,
sizeof(rsmi_gpu_metrics_t), smu);
ss << __PRETTY_FUNCTION__ << " | RSMI_GPU_METRICS_API_CONTENT_VER_1";
LOG_DEBUG(ss);
log_gpu_metrics(nullptr, nullptr, nullptr, smu);
} else if (dev->gpu_metrics_ver().content_revision ==
RSMI_GPU_METRICS_API_CONTENT_VER_2) {
ret = GetDevBinaryBlob(amd::smi::kDevGpuMetrics, dv_ind,
sizeof(rsmi_gpu_metrics_v_1_2), &smu_v_1_2);
map_gpu_metrics_1_2_to_rsmi_gpu_metrics_t(&smu_v_1_2, smu);
ss << __PRETTY_FUNCTION__ << " | RSMI_GPU_METRICS_API_CONTENT_VER_2";
LOG_DEBUG(ss);
log_gpu_metrics(nullptr, &smu_v_1_2, nullptr, smu);
} else if (dev->gpu_metrics_ver().content_revision ==
RSMI_GPU_METRICS_API_CONTENT_VER_3) {
ret = GetDevBinaryBlob(amd::smi::kDevGpuMetrics, dv_ind,
sizeof(rsmi_gpu_metrics_v_1_3), &smu_v_1_3);
map_gpu_metrics_1_3_to_rsmi_gpu_metrics_t(&smu_v_1_3, smu);
ss << __PRETTY_FUNCTION__ << " | RSMI_GPU_METRICS_API_CONTENT_VER_3";
LOG_DEBUG(ss);
log_gpu_metrics(nullptr, nullptr, &smu_v_1_3, smu);
} else {
ret = GetGPUMetricsFormat1(dv_ind, smu,
dev->gpu_metrics_ver().content_revision);
ss << __PRETTY_FUNCTION__ << " | GetGPUMetricsFormat1";
LOG_DEBUG(ss);
log_gpu_metrics(nullptr, nullptr, nullptr, smu);
}
if (ret != RSMI_STATUS_SUCCESS) {
+44 -1
Просмотреть файл
@@ -177,6 +177,9 @@ void Logger::error(const char* text) throw() {
logIntoFile(data);
} else if (m_LogType == CONSOLE) {
logOnConsole(data);
} else if (m_LogType == BOTH_FILE_AND_CONSOLE) {
logOnConsole(data);
logIntoFile(data);
}
}
@@ -208,6 +211,9 @@ void Logger::alarm(const char* text) throw() {
logIntoFile(data);
} else if (m_LogType == CONSOLE) {
logOnConsole(data);
} else if (m_LogType == BOTH_FILE_AND_CONSOLE) {
logOnConsole(data);
logIntoFile(data);
}
}
@@ -239,6 +245,9 @@ void Logger::always(const char* text) throw() {
logIntoFile(data);
} else if (m_LogType == CONSOLE) {
logOnConsole(data);
} else if (m_LogType == BOTH_FILE_AND_CONSOLE) {
logOnConsole(data);
logIntoFile(data);
}
}
@@ -303,6 +312,10 @@ void Logger::info(const char* text) throw() {
logIntoFile(data);
} else if ((m_LogType == CONSOLE) && (m_LogLevel >= LOG_LEVEL_INFO)) {
logOnConsole(data);
} else if ((m_LogType == BOTH_FILE_AND_CONSOLE)
&& (m_LogLevel >= LOG_LEVEL_INFO)) {
logOnConsole(data);
logIntoFile(data);
}
}
@@ -333,6 +346,10 @@ void Logger::trace(const char* text) throw() {
logIntoFile(data);
} else if ((m_LogType == CONSOLE) && (m_LogLevel >= LOG_LEVEL_TRACE)) {
logOnConsole(data);
} else if ((m_LogType == BOTH_FILE_AND_CONSOLE)
&& (m_LogLevel >= LOG_LEVEL_TRACE)) {
logOnConsole(data);
logIntoFile(data);
}
}
@@ -363,6 +380,10 @@ void Logger::debug(const char* text) throw() {
logIntoFile(data);
} else if ((m_LogType == CONSOLE) && (m_LogLevel >= LOG_LEVEL_DEBUG)) {
logOnConsole(data);
} else if ((m_LogType == BOTH_FILE_AND_CONSOLE)
&& (m_LogLevel >= LOG_LEVEL_DEBUG)) {
logOnConsole(data);
logIntoFile(data);
}
}
@@ -424,6 +445,9 @@ std::string Logger::getLogSettings() {
case CONSOLE:
logSettings += "LogType = CONSOLE";
break;
case BOTH_FILE_AND_CONSOLE:
logSettings += "LogType = BOTH_FILE_AND_CONSOLE";
break;
default:
logSettings += "LogType = <undefined>";
}
@@ -471,7 +495,26 @@ void Logger::initialize_resources() {
}
m_File.open(logFileName.c_str(), std::ios::out | std::ios::app);
m_LogLevel = LOG_LEVEL_TRACE;
m_LogType = FILE_LOG;
// RSMI_LOGGING = 1, output to logs only
// RSMI_LOGGING = 2, output to console only
// RSMI_LOGGING = 3, output to logs and console
switch (amd::smi::RocmSMI::getInstance().getLogSetting()) {
case 0:
m_LogType = NO_LOG;
break;
case 1:
m_LogType = FILE_LOG;
break;
case 2:
m_LogType = CONSOLE;
break;
case 3:
m_LogType = BOTH_FILE_AND_CONSOLE;
break;
default:
m_LogType = NO_LOG;
break;
}
if (!m_File.is_open()) {
std::cout << "WARNING: Issue opening log file (" << logFileName
<< ") to write." << std::endl;
+22 -7
Просмотреть файл
@@ -458,17 +458,21 @@ static uint32_t GetEnvVarUInteger(const char *ev_str) {
// provides a way to get env variable detail in both debug & release
// helps enable full logging
static bool getRSMIEnvVar_LoggingEnabled(const char *ev_str) {
bool isLoggingEnabled = false;
// RSMI_LOGGING = 1, output to logs only
// RSMI_LOGGING = 2, output to console only
// RSMI_LOGGING = 3, output to logs and console
static uint32_t getRSMIEnvVar_LoggingEnabled(const char *ev_str) {
uint32_t ret = 0;
ev_str = getenv(ev_str);
if (ev_str != nullptr) {
isLoggingEnabled = true;
int ev_ret = atoi(ev_str);
ret = static_cast<uint32_t>(ev_ret);
}
return isLoggingEnabled;
return ret;
}
static std::unordered_set<uint32_t> GetEnvVarUIntegerSets(const char *ev_str) {
static inline std::unordered_set<uint32_t> GetEnvVarUIntegerSets(
const char *ev_str) {
std::unordered_set<uint32_t> returnSet;
#ifndef DEBUG
(void)ev_str;
@@ -519,7 +523,16 @@ const RocmSMI_env_vars& RocmSMI::getEnv(void) {
}
bool RocmSMI::isLoggingOn(void) {
bool isLoggingOn = false;
GetEnvVariables();
if (this->env_vars_.logging_on > 0
&& this->env_vars_.logging_on <= 3) {
isLoggingOn = true;
}
return isLoggingOn;
}
uint32_t RocmSMI::getLogSetting() {
return this->env_vars_.logging_on;
}
@@ -544,7 +557,9 @@ void RocmSMI::printEnvVarInfo(void) {
<< ((env_vars_.debug_inf_loop == 0) ? "<undefined>"
: std::to_string(env_vars_.debug_inf_loop))
<< std::endl;
bool isLoggingOn = (env_vars_.logging_on) ? true : false;
std::cout << __PRETTY_FUNCTION__ << " | env_vars_.logging_on = "
<< getLogSetting() << std::endl;
bool isLoggingOn = RocmSMI::isLoggingOn() ? true : false;
std::cout << __PRETTY_FUNCTION__ << " | env_vars_.logging_on = "
<< (isLoggingOn ? "true" : "false") << std::endl;
std::cout << __PRETTY_FUNCTION__ << " | env_vars_.enum_overrides = {";
+119 -8
Просмотреть файл
@@ -57,6 +57,8 @@
#include <algorithm>
#include <vector>
#include <regex>
#include <iomanip>
#include <type_traits>
#include "rocm_smi/rocm_smi.h"
#include "rocm_smi/rocm_smi_utils.h"
@@ -103,7 +105,7 @@ bool FileExists(char const *filename) {
return (stat(filename, &buf) == 0);
}
static void debugFilesDiscovered(std::vector<std::string> files) {
static inline void debugFilesDiscovered(std::vector<std::string> files) {
std::ostringstream ss;
int numberOfFilesFound = static_cast<int>(files.size());
ss << "fileName.size() = " << numberOfFilesFound
@@ -435,9 +437,13 @@ rsmi_status_t storeTmpFile(uint32_t dv_ind, std::string parameterName,
}
chmod(fileName, S_IRUSR|S_IRGRP|S_IROTH);
write(fd, storageData.c_str(), storageData.size());
ssize_t rc_write = write(fd, storageData.c_str(), storageData.size());
close(fd);
return RSMI_STATUS_SUCCESS;
if (rc_write == -1) {
return RSMI_STATUS_FILE_ERROR;
} else {
return RSMI_STATUS_SUCCESS;
}
}
std::vector<std::string> getListOfAppTmpFiles() {
@@ -573,14 +579,20 @@ std::string getRSMIStatusString(rsmi_status_t ret) {
// string domainName = domain name of the the system's node on the network
// string os_distribution = pretty name of os distribution
// (typically found in /etc/*-release file)
// string endianness = system's endianness.
// Expressed as big endian or little endian.
// Big Endian (BE), multi-bit symbols encoded as big endian (MSB first)
// Little Endian (LE), multi-bit symbols encoded as little endian (LSB first)
std::tuple<bool, std::string, std::string, std::string, std::string,
std::string, std::string, std::string> getSystemDetails(void) {
std::string, std::string, std::string, std::string>
getSystemDetails(void) {
struct utsname buf;
bool errorDetected = false;
std::string temp_data;
std::string sysname, nodename, release, version, machine;
std::string domainName = "<undefined>";
std::string os_distribution = "<undefined>";
std::string endianness = "<undefined>";
if (uname(&buf) < 0) {
errorDetected = true;
@@ -608,8 +620,16 @@ std::tuple<bool, std::string, std::string, std::string, std::string,
}
}
}
if (isSystemBigEndian()) {
endianness = "Big Endian, multi-bit symbols encoded as"
" big endian (MSB first)";
} else {
endianness = "Little Endian, multi-bit symbols encoded as"
" little endian (LSB first)";
}
return std::make_tuple(errorDetected, sysname, nodename, release,
version, machine, domainName, os_distribution);
version, machine, domainName, os_distribution,
endianness);
}
// If logging is enabled through RSMI_LOGGING environment variable.
@@ -617,9 +637,10 @@ std::tuple<bool, std::string, std::string, std::string, std::string,
void logSystemDetails(void) {
std::ostringstream ss;
bool errorDetected;
std::string sysname, node, release, version, machine, domain, distName;
std::string sysname, node, release, version, machine, domain, distName,
endianness;
std::tie(errorDetected, sysname, node, release, version, machine, domain,
distName) = getSystemDetails();
distName, endianness) = getSystemDetails();
if (errorDetected == false) {
ss << "====== Gathered system details ============\n"
<< "SYSTEM NAME: " << sysname << "\n"
@@ -628,7 +649,8 @@ void logSystemDetails(void) {
<< "RELEASE: " << release << "\n"
<< "VERSION: " << version << "\n"
<< "MACHINE TYPE: " << machine << "\n"
<< "DOMAIN: " << domain << "\n";
<< "DOMAIN: " << domain << "\n"
<< "ENDIANNESS: " << endianness << "\n";
LOG_INFO(ss);
} else {
ss << "====== Gathered system details ============\n"
@@ -637,5 +659,94 @@ void logSystemDetails(void) {
}
}
// Usage:
// logHexDump(desc, addr, len, bytesPerLine);
// desc: if non-NULL, printed as a description before hex dump.
// addr: the address to start dumping from.
// len: the number of bytes to dump.
// bytesPerLine: number of bytes on each output line.
void logHexDump(
const char *desc, const void *addr, const size_t len, size_t bytesPerLine) {
// UNCOMMENT: printf lines if you want to see directly to stdout
std::ostringstream ss;
// Silently ignore per-line values.
if (bytesPerLine < 4 || bytesPerLine > 64) bytesPerLine = 16;
size_t i;
unsigned char buff[bytesPerLine + 1];
const unsigned char *pc // ptr to data (char, 1 byte sized data)
= (const unsigned char *) addr;
// Output description if given.
// if (desc != NULL) printf("%s:\n", desc);
if (desc != NULL) ss << "\n" << desc << "\n";
// Length checks.
if (len == 0) {
// printf(" ZERO LENGTH\n");
ss << " ZERO LENGTH\n";
LOG_ERROR(ss);
return;
}
std::string endianness = "<undefined>";
if (isSystemBigEndian()) {
endianness = "** System is Big Endian, multi-bit symbols encoded as"
" big endian (MSB first) **";
} else {
endianness = "** System is Little Endian, multi-bit symbols encoded as"
" little endian (LSB first) **";
}
ss << "\t" << endianness << "\n";
// Process every byte in the data.
for (i = 0; i < len; i++) {
// Multiple of bytesPerLine means new or first line (with line offset).
if ((i % bytesPerLine) == 0) {
// Only print previous-line ASCII buffer for lines beyond first.
// if (i != 0) printf(" %s\n", buff);
if (i != 0) ss << " " << buff << "\n";
// Output the offset of current line.
// printf(" %08lx ", i);
ss << " " << std::setw(8) << std::setfill('0') << std::hex << i << " ";
}
// Now the hex code for the specific character.
// printf(" %02x", pc[i]);
ss << " " << std::setw(2) << std::setfill('0') << std::hex
<< static_cast<unsigned>(pc[i]);
// And buffer a printable ASCII character for later.
// x20 = 32 || x7e = 126 (ascii table range)
if ((pc[i] < 0x20) || (pc[i] > 0x7e)) { // isprint() may be better.
buff[i % bytesPerLine] = '.';
} else {
buff[i % bytesPerLine] = pc[i];
}
buff[(i % bytesPerLine) + 1] = '\0';
}
// Pad out last line if not exactly bytesPerLine characters.
while ((i % bytesPerLine) != 0) {
// printf(" ");
ss << " ";
i++;
}
// And print the final ASCII buffer.
// printf(" %s\n", buff);
ss << " " << buff << "\n";
LOG_DEBUG(ss);
}
bool isSystemBigEndian() {
int n = 1;
bool isBigEndian = true;
if (*(char *)&n == 1) {
isBigEndian = false;
}
return isBigEndian;
}
} // namespace smi
} // namespace amd