Fix RAS change
RAS formatting changed, so get it to handle both types of sysfs output until it's normalized Change-Id: I56f2a2495af8ff4d01011bc614283376afb9ad0a
이 커밋은 다음에 포함됨:
@@ -383,8 +383,9 @@ typedef enum {
|
||||
RSMI_RAS_ERR_STATE_MULT_UC, //!< Multiple uncorrectable errors
|
||||
RSMI_RAS_ERR_STATE_POISON, //!< Firmware detected error and isolated
|
||||
//!< page. Treat as uncorrectable.
|
||||
RSMI_RAS_ERR_STATE_ENABLED, //!< ECC is enabled
|
||||
|
||||
RSMI_RAS_ERR_STATE_LAST = RSMI_RAS_ERR_STATE_POISON,
|
||||
RSMI_RAS_ERR_STATE_LAST = RSMI_RAS_ERR_STATE_ENABLED,
|
||||
RSMI_RAS_ERR_STATE_INVALID = 0xFFFFFFFF
|
||||
} rsmi_ras_err_state_t;
|
||||
|
||||
|
||||
+7
-3
@@ -532,8 +532,10 @@ static const std::map<std::string, rsmi_ras_err_state_t> kRocmSMIStateMap = {
|
||||
{"single_correctable", RSMI_RAS_ERR_STATE_SING_C},
|
||||
{"multi_uncorrectable", RSMI_RAS_ERR_STATE_MULT_UC},
|
||||
{"poison", RSMI_RAS_ERR_STATE_POISON},
|
||||
{"off", RSMI_RAS_ERR_STATE_DISABLED},
|
||||
{"on", RSMI_RAS_ERR_STATE_ENABLED},
|
||||
};
|
||||
static_assert(RSMI_RAS_ERR_STATE_LAST == RSMI_RAS_ERR_STATE_POISON,
|
||||
static_assert(RSMI_RAS_ERR_STATE_LAST == RSMI_RAS_ERR_STATE_ENABLED,
|
||||
"rsmi_gpu_block_t and/or above name map need to be updated"
|
||||
" and then this assert");
|
||||
|
||||
@@ -562,6 +564,7 @@ rsmi_status_t rsmi_dev_ecc_status_get(uint32_t dv_ind, rsmi_gpu_block_t block,
|
||||
|
||||
std::string blk_line;
|
||||
std::string search_str = kRocmSMIBlockMap.at(block);
|
||||
std::string sysfs_junk = " ras feature mask:";
|
||||
std::string state_str;
|
||||
|
||||
search_str += ":";
|
||||
@@ -570,8 +573,9 @@ rsmi_status_t rsmi_dev_ecc_status_get(uint32_t dv_ind, rsmi_gpu_block_t block,
|
||||
std::istringstream fs1(val_vec[i]);
|
||||
|
||||
fs1 >> blk_line;
|
||||
|
||||
if (blk_line == search_str) {
|
||||
if (blk_line == search_str || blk_line == kRocmSMIBlockMap.at(block)) {
|
||||
if (blk_line.back() != ':')
|
||||
fs1.ignore(sysfs_junk.length(), ':');
|
||||
fs1 >> state_str;
|
||||
assert(kRocmSMIStateMap.count(state_str));
|
||||
*state = kRocmSMIStateMap.at(state_str);
|
||||
|
||||
@@ -70,6 +70,8 @@ static const char * kRasErrStateStrings[] = {
|
||||
"Single, Correctable", // RSMI_RAS_ERR_STATE_SING_C
|
||||
"Multiple, Uncorrectable", // RSMI_RAS_ERR_STATE_MULT_UC
|
||||
"Poison" // RSMI_RAS_ERR_STATE_POISON
|
||||
"off", // RSMI_RAS_ERR_STATE_DISABLED
|
||||
"on", // RSMI_RAS_ERR_STATE_ENABLED
|
||||
};
|
||||
static_assert(
|
||||
sizeof(kRasErrStateStrings)/sizeof(char *) == (RSMI_RAS_ERR_STATE_LAST + 1),
|
||||
@@ -89,8 +91,10 @@ static const std::map<rsmi_ras_err_state_t, const char *> kErrStateNameMap = {
|
||||
kRasErrStateStrings[RSMI_RAS_ERR_STATE_MULT_UC]},
|
||||
{RSMI_RAS_ERR_STATE_POISON,
|
||||
kRasErrStateStrings[RSMI_RAS_ERR_STATE_POISON]},
|
||||
{RSMI_RAS_ERR_STATE_ENABLED,
|
||||
kRasErrStateStrings[RSMI_RAS_ERR_STATE_ENABLED]},
|
||||
};
|
||||
static_assert(RSMI_RAS_ERR_STATE_LAST == RSMI_RAS_ERR_STATE_POISON,
|
||||
static_assert(RSMI_RAS_ERR_STATE_LAST == RSMI_RAS_ERR_STATE_ENABLED,
|
||||
"kErrStateNameMap needs to be updated");
|
||||
|
||||
static const struct option long_options[] = {
|
||||
|
||||
새 이슈에서 참조
사용자 차단