Fix RAS change

RAS formatting changed, so get it to handle both types of sysfs output
until it's normalized
Change-Id: I56f2a2495af8ff4d01011bc614283376afb9ad0a
이 커밋은 다음에 포함됨:
Kent Russell
2019-08-08 09:00:22 -04:00
부모 0da1599c4f
커밋 a34832f11e
3개의 변경된 파일14개의 추가작업 그리고 5개의 파일을 삭제
+2 -1
파일 보기
@@ -383,8 +383,9 @@ typedef enum {
RSMI_RAS_ERR_STATE_MULT_UC, //!< Multiple uncorrectable errors
RSMI_RAS_ERR_STATE_POISON, //!< Firmware detected error and isolated
//!< page. Treat as uncorrectable.
RSMI_RAS_ERR_STATE_ENABLED, //!< ECC is enabled
RSMI_RAS_ERR_STATE_LAST = RSMI_RAS_ERR_STATE_POISON,
RSMI_RAS_ERR_STATE_LAST = RSMI_RAS_ERR_STATE_ENABLED,
RSMI_RAS_ERR_STATE_INVALID = 0xFFFFFFFF
} rsmi_ras_err_state_t;
+7 -3
파일 보기
@@ -532,8 +532,10 @@ static const std::map<std::string, rsmi_ras_err_state_t> kRocmSMIStateMap = {
{"single_correctable", RSMI_RAS_ERR_STATE_SING_C},
{"multi_uncorrectable", RSMI_RAS_ERR_STATE_MULT_UC},
{"poison", RSMI_RAS_ERR_STATE_POISON},
{"off", RSMI_RAS_ERR_STATE_DISABLED},
{"on", RSMI_RAS_ERR_STATE_ENABLED},
};
static_assert(RSMI_RAS_ERR_STATE_LAST == RSMI_RAS_ERR_STATE_POISON,
static_assert(RSMI_RAS_ERR_STATE_LAST == RSMI_RAS_ERR_STATE_ENABLED,
"rsmi_gpu_block_t and/or above name map need to be updated"
" and then this assert");
@@ -562,6 +564,7 @@ rsmi_status_t rsmi_dev_ecc_status_get(uint32_t dv_ind, rsmi_gpu_block_t block,
std::string blk_line;
std::string search_str = kRocmSMIBlockMap.at(block);
std::string sysfs_junk = " ras feature mask:";
std::string state_str;
search_str += ":";
@@ -570,8 +573,9 @@ rsmi_status_t rsmi_dev_ecc_status_get(uint32_t dv_ind, rsmi_gpu_block_t block,
std::istringstream fs1(val_vec[i]);
fs1 >> blk_line;
if (blk_line == search_str) {
if (blk_line == search_str || blk_line == kRocmSMIBlockMap.at(block)) {
if (blk_line.back() != ':')
fs1.ignore(sysfs_junk.length(), ':');
fs1 >> state_str;
assert(kRocmSMIStateMap.count(state_str));
*state = kRocmSMIStateMap.at(state_str);
+5 -1
파일 보기
@@ -70,6 +70,8 @@ static const char * kRasErrStateStrings[] = {
"Single, Correctable", // RSMI_RAS_ERR_STATE_SING_C
"Multiple, Uncorrectable", // RSMI_RAS_ERR_STATE_MULT_UC
"Poison" // RSMI_RAS_ERR_STATE_POISON
"off", // RSMI_RAS_ERR_STATE_DISABLED
"on", // RSMI_RAS_ERR_STATE_ENABLED
};
static_assert(
sizeof(kRasErrStateStrings)/sizeof(char *) == (RSMI_RAS_ERR_STATE_LAST + 1),
@@ -89,8 +91,10 @@ static const std::map<rsmi_ras_err_state_t, const char *> kErrStateNameMap = {
kRasErrStateStrings[RSMI_RAS_ERR_STATE_MULT_UC]},
{RSMI_RAS_ERR_STATE_POISON,
kRasErrStateStrings[RSMI_RAS_ERR_STATE_POISON]},
{RSMI_RAS_ERR_STATE_ENABLED,
kRasErrStateStrings[RSMI_RAS_ERR_STATE_ENABLED]},
};
static_assert(RSMI_RAS_ERR_STATE_LAST == RSMI_RAS_ERR_STATE_POISON,
static_assert(RSMI_RAS_ERR_STATE_LAST == RSMI_RAS_ERR_STATE_ENABLED,
"kErrStateNameMap needs to be updated");
static const struct option long_options[] = {