From 7a617e6ef2f1996e64c08b72d37d2f6a09a7dcfe Mon Sep 17 00:00:00 2001 From: "Bill(Shuzhou) Liu" Date: Tue, 25 Jun 2024 10:03:33 -0500 Subject: [PATCH] Make the the devInfoTypesStrings.at(type) exception safe Wrap it in a function to make it exception safe. Change-Id: I29835993ae4fe2b7aa1a7027fab88c05ba89e6e3 --- rocm_smi/include/rocm_smi/rocm_smi_device.h | 1 + rocm_smi/src/rocm_smi.cc | 53 ++++++++++----------- rocm_smi/src/rocm_smi_device.cc | 47 ++++++++++-------- rocm_smi/src/rocm_smi_main.cc | 2 +- 4 files changed, 56 insertions(+), 47 deletions(-) diff --git a/rocm_smi/include/rocm_smi/rocm_smi_device.h b/rocm_smi/include/rocm_smi/rocm_smi_device.h index 00b553d207..426a9ad017 100755 --- a/rocm_smi/include/rocm_smi/rocm_smi_device.h +++ b/rocm_smi/include/rocm_smi/rocm_smi_device.h @@ -261,6 +261,7 @@ class Device { AMGpuMetricsPublicLatestTupl_t dev_copy_internal_to_external_metrics(); static const std::map devInfoTypesStrings; + static const char* get_type_string(DevInfoTypes type); private: std::shared_ptr monitor_; diff --git a/rocm_smi/src/rocm_smi.cc b/rocm_smi/src/rocm_smi.cc index 2813124951..12e269590f 100755 --- a/rocm_smi/src/rocm_smi.cc +++ b/rocm_smi/src/rocm_smi.cc @@ -83,7 +83,6 @@ using amd::smi::monitorTypesToString; using amd::smi::getRSMIStatusString; using amd::smi::AMDGpuMetricsUnitType_t; using amd::smi::AMDGpuMetricTypeId_t; -auto &devInfoTypesStrings = amd::smi::Device::devInfoTypesStrings; static const uint32_t kMaxOverdriveLevel = 20; static const float kEnergyCounterResolution = 15.3F; @@ -3849,7 +3848,7 @@ rsmi_dev_memory_total_get(uint32_t dv_ind, rsmi_memory_type_t mem_type, ss << __PRETTY_FUNCTION__ << " | inside success fallback... " << " | Device #: " << std::to_string(dv_ind) - << " | Type = " << devInfoTypesStrings.at(mem_type_file) + << " | Type = " << amd::smi::Device::get_type_string(mem_type_file) << " | Data: total = " << std::to_string(*total) << " | ret = " << getRSMIStatusString(RSMI_STATUS_SUCCESS); LOG_DEBUG(ss); @@ -3860,7 +3859,7 @@ rsmi_dev_memory_total_get(uint32_t dv_ind, rsmi_memory_type_t mem_type, ss << __PRETTY_FUNCTION__ << " | after fallback... " << " | Device #: " << std::to_string(dv_ind) - << " | Type = " << devInfoTypesStrings.at(mem_type_file) + << " | Type = " << amd::smi::Device::get_type_string(mem_type_file) << " | Data: total = " << std::to_string(*total) << " | ret = " << getRSMIStatusString(ret); LOG_DEBUG(ss); @@ -3929,7 +3928,7 @@ rsmi_dev_memory_usage_get(uint32_t dv_ind, rsmi_memory_type_t mem_type, ss << __PRETTY_FUNCTION__ << " no fallback needed! - " << " | Device #: " << std::to_string(dv_ind) - << " | Type = " << devInfoTypesStrings.at(mem_type_file) + << " | Type = " << amd::smi::Device::get_type_string(mem_type_file) << " | Data: Used = " << std::to_string(*used) << " | Data: total = " << std::to_string(total) << " | ret = " << getRSMIStatusString(ret); @@ -3940,7 +3939,7 @@ rsmi_dev_memory_usage_get(uint32_t dv_ind, rsmi_memory_type_t mem_type, ss << __PRETTY_FUNCTION__ << " | in fallback == success ..." << " | Device #: " << std::to_string(dv_ind) - << " | Type = " << devInfoTypesStrings.at(mem_type_file) + << " | Type = " << amd::smi::Device::get_type_string(mem_type_file) << " | Data: Used = " << std::to_string(*used) << " | Data: total = " << std::to_string(total) << " | ret = " << getRSMIStatusString(RSMI_STATUS_SUCCESS); @@ -3951,7 +3950,7 @@ rsmi_dev_memory_usage_get(uint32_t dv_ind, rsmi_memory_type_t mem_type, ss << __PRETTY_FUNCTION__ << " | at end!!!! after fallback ..." << " | Device #: " << std::to_string(dv_ind) - << " | Type = " << devInfoTypesStrings.at(mem_type_file) + << " | Type = " << amd::smi::Device::get_type_string(mem_type_file) << " | Data: Used = " << std::to_string(*used) << " | ret = " << getRSMIStatusString(ret); LOG_DEBUG(ss); @@ -5234,7 +5233,7 @@ rsmi_dev_compute_partition_get(uint32_t dv_ind, char *compute_partition, << " | Fail " << " | Device #: " << dv_ind << " | Type: " - << devInfoTypesStrings.at(amd::smi::kDevComputePartition) + << amd::smi::Device::get_type_string(amd::smi::kDevComputePartition) << " | Cause: len was 0 or compute_partition variable was null" << " | Returning = " << getRSMIStatusString(RSMI_STATUS_INVALID_ARGS) << " |"; @@ -5253,7 +5252,7 @@ rsmi_dev_compute_partition_get(uint32_t dv_ind, char *compute_partition, << " | Fail " << " | Device #: " << dv_ind << " | Type: " - << devInfoTypesStrings.at(amd::smi::kDevComputePartition) + << amd::smi::Device::get_type_string(amd::smi::kDevComputePartition) << " | Cause: could not retrieve current compute partition" << " | Returning = " << getRSMIStatusString(ret) << " |"; @@ -5270,7 +5269,7 @@ rsmi_dev_compute_partition_get(uint32_t dv_ind, char *compute_partition, << " | Fail " << " | Device #: " << dv_ind << " | Type: " - << devInfoTypesStrings.at(amd::smi::kDevComputePartition) + << amd::smi::Device::get_type_string(amd::smi::kDevComputePartition) << " | Cause: requested size was insufficient" << " | Returning = " << getRSMIStatusString(RSMI_STATUS_INSUFFICIENT_SIZE) << " |"; @@ -5282,7 +5281,7 @@ rsmi_dev_compute_partition_get(uint32_t dv_ind, char *compute_partition, << " | Success " << " | Device #: " << dv_ind << " | Type: " - << devInfoTypesStrings.at(amd::smi::kDevComputePartition) + << amd::smi::Device::get_type_string(amd::smi::kDevComputePartition) << " | Data: " << compute_partition << " | Returning = " << getRSMIStatusString(ret) << " |"; @@ -5342,7 +5341,7 @@ rsmi_dev_compute_partition_set(uint32_t dv_ind, << " | Fail " << " | Device #: " << dv_ind << " | Type: " - << devInfoTypesStrings.at(amd::smi::kDevComputePartition) + << amd::smi::Device::get_type_string(amd::smi::kDevComputePartition) << " | Data: " << newComputePartitionStr << " | Cause: requested setting was invalid" << " | Returning = " @@ -5361,7 +5360,7 @@ rsmi_dev_compute_partition_set(uint32_t dv_ind, << " | Fail " << " | Device #: " << dv_ind << " | Type: " - << devInfoTypesStrings.at(amd::smi::kDevComputePartition) + << amd::smi::Device::get_type_string(amd::smi::kDevComputePartition) << " | Data: " << newComputePartitionStr << " | Cause: not an available compute partition setting" << " | Returning = " @@ -5381,7 +5380,7 @@ rsmi_dev_compute_partition_set(uint32_t dv_ind, << " | Fail " << " | Device #: " << dv_ind << " | Type: " - << devInfoTypesStrings.at(amd::smi::kDevComputePartition) + << amd::smi::Device::get_type_string(amd::smi::kDevComputePartition) << " | Cause: could retrieve current compute partition or retrieved" << " unexpected data" << " | Returning = " @@ -5397,7 +5396,7 @@ rsmi_dev_compute_partition_set(uint32_t dv_ind, << " | Success - compute partition was already set at requested value" << " | Device #: " << dv_ind << " | Type: " - << devInfoTypesStrings.at(amd::smi::kDevComputePartition) + << amd::smi::Device::get_type_string(amd::smi::kDevComputePartition) << " | Data: " << newComputePartitionStr << " | Returning = " << getRSMIStatusString(RSMI_STATUS_SUCCESS) << " |"; @@ -5423,7 +5422,7 @@ rsmi_dev_compute_partition_set(uint32_t dv_ind, << " | Success " << " | Device #: " << dv_ind << " | Type: " - << devInfoTypesStrings.at(amd::smi::kDevComputePartition) + << amd::smi::Device::get_type_string(amd::smi::kDevComputePartition) << " | Data: " << newComputePartitionStr << " | Returning = " << getRSMIStatusString(returnResponse) << " |"; @@ -5495,7 +5494,7 @@ rsmi_dev_memory_partition_set(uint32_t dv_ind, << " | Fail " << " | Device #: " << dv_ind << " | Type: " - << devInfoTypesStrings.at(amd::smi::kDevMemoryPartition) + << amd::smi::Device::get_type_string(amd::smi::kDevMemoryPartition) << " | Cause: device board name does not support this action" << " | Returning = " << getRSMIStatusString(RSMI_STATUS_NOT_SUPPORTED) << " |"; @@ -5516,7 +5515,7 @@ rsmi_dev_memory_partition_set(uint32_t dv_ind, << " | Fail " << " | Device #: " << dv_ind << " | Type: " - << devInfoTypesStrings.at(amd::smi::kDevMemoryPartition) + << amd::smi::Device::get_type_string(amd::smi::kDevMemoryPartition) << " | Cause: requested setting was invalid" << " | Returning = " << getRSMIStatusString(RSMI_STATUS_INVALID_ARGS) << " |"; @@ -5537,7 +5536,7 @@ rsmi_dev_memory_partition_set(uint32_t dv_ind, << " | Fail " << " | Device #: " << dv_ind << " | Type: " - << devInfoTypesStrings.at(amd::smi::kDevMemoryPartition) + << amd::smi::Device::get_type_string(amd::smi::kDevMemoryPartition) << " | Cause: could retrieve current memory partition or retrieved" << " unexpected data" << " | Returning = " @@ -5554,7 +5553,7 @@ rsmi_dev_memory_partition_set(uint32_t dv_ind, << " setting" << " | Device #: " << dv_ind << " | Type: " - << devInfoTypesStrings.at(amd::smi::kDevMemoryPartition) + << amd::smi::Device::get_type_string(amd::smi::kDevMemoryPartition) << " | Data: " << newMemoryPartition << " | Returning = " << getRSMIStatusString(RSMI_STATUS_SUCCESS) << " |"; @@ -5576,7 +5575,7 @@ rsmi_dev_memory_partition_set(uint32_t dv_ind, << " | Fail " << " | Device #: " << dv_ind << " | Type: " - << devInfoTypesStrings.at(amd::smi::kDevMemoryPartition) + << amd::smi::Device::get_type_string(amd::smi::kDevMemoryPartition) << " | Cause: issue writing reqested setting of " + newMemoryPartition << " | Returning = " << getRSMIStatusString(err) << " |"; @@ -5590,7 +5589,7 @@ rsmi_dev_memory_partition_set(uint32_t dv_ind, << " | Success - if restart completed successfully" << " | Device #: " << dv_ind << " | Type: " - << devInfoTypesStrings.at(amd::smi::kDevMemoryPartition) + << amd::smi::Device::get_type_string(amd::smi::kDevMemoryPartition) << " | Data: " << newMemoryPartition << " | Returning = " << getRSMIStatusString(restartRet) << " |"; @@ -5612,7 +5611,7 @@ rsmi_dev_memory_partition_get(uint32_t dv_ind, char *memory_partition, << " | Fail " << " | Device #: " << dv_ind << " | Type: " - << devInfoTypesStrings.at(amd::smi::kDevMemoryPartition) + << amd::smi::Device::get_type_string(amd::smi::kDevMemoryPartition) << " | Cause: user sent invalid arguments, len = 0 or memory partition" << " was a null ptr" << " | Returning = " @@ -5632,7 +5631,7 @@ rsmi_dev_memory_partition_get(uint32_t dv_ind, char *memory_partition, << " | Fail " << " | Device #: " << dv_ind << " | Type: " - << devInfoTypesStrings.at(amd::smi::kDevMemoryPartition) + << amd::smi::Device::get_type_string(amd::smi::kDevMemoryPartition) << " | Cause: could not successfully retrieve current memory partition " << " | Returning = " << getRSMIStatusString(ret) << " |"; @@ -5650,7 +5649,7 @@ rsmi_dev_memory_partition_get(uint32_t dv_ind, char *memory_partition, << " | Fail " << " | Device #: " << dv_ind << " | Type: " - << devInfoTypesStrings.at(amd::smi::kDevMemoryPartition) + << amd::smi::Device::get_type_string(amd::smi::kDevMemoryPartition) << " | Cause: could not successfully retrieve current memory partition " << " | Returning = " << getRSMIStatusString(ret) << " |"; @@ -5662,7 +5661,7 @@ rsmi_dev_memory_partition_get(uint32_t dv_ind, char *memory_partition, << " | Success " << " | Device #: " << dv_ind << " | Type: " - << devInfoTypesStrings.at(amd::smi::kDevMemoryPartition) + << amd::smi::Device::get_type_string(amd::smi::kDevMemoryPartition) << " | Data: " << memory_partition << " | Returning = " << getRSMIStatusString(ret) << " |"; @@ -5701,7 +5700,7 @@ rsmi_status_t rsmi_dev_compute_partition_reset(uint32_t dv_ind) { << " | Success - if original boot state was not unknown or valid setting" << " | Device #: " << dv_ind << " | Type: " - << devInfoTypesStrings.at(amd::smi::kDevComputePartition) + << amd::smi::Device::get_type_string(amd::smi::kDevComputePartition) << " | Data: " << bootState << " | Returning = " << getRSMIStatusString(ret) << " |"; @@ -5740,7 +5739,7 @@ rsmi_status_t rsmi_dev_memory_partition_reset(uint32_t dv_ind) { << " | Success - if original boot state was not unknown or valid setting" << " | Device #: " << dv_ind << " | Type: " - << devInfoTypesStrings.at(amd::smi::kDevMemoryPartition) + << amd::smi::Device::get_type_string(amd::smi::kDevMemoryPartition) << " | Data: " << bootState << " | Returning = " << getRSMIStatusString(ret) << " |"; diff --git a/rocm_smi/src/rocm_smi_device.cc b/rocm_smi/src/rocm_smi_device.cc index 5eafc45555..e0ebe8a055 100755 --- a/rocm_smi/src/rocm_smi_device.cc +++ b/rocm_smi/src/rocm_smi_device.cc @@ -746,7 +746,7 @@ int Device::openSysfsFileStream(DevInfoTypes type, T *fs, const char *str) { if (ret != 0) { ss << __PRETTY_FUNCTION__ << " | Issue: File did not exist - SYSFS file (" << sysfs_path - << ") for DevInfoInfoType (" << devInfoTypesStrings.at(type) + << ") for DevInfoInfoType (" << get_type_string(type) << "), returning " << std::to_string(ret); LOG_ERROR(ss); return ret; @@ -755,7 +755,7 @@ int Device::openSysfsFileStream(DevInfoTypes type, T *fs, const char *str) { ss << __PRETTY_FUNCTION__ << " | Issue: File is not a regular file - SYSFS file (" << sysfs_path << ") for " - << "DevInfoInfoType (" << devInfoTypesStrings.at(type) << ")," + << "DevInfoInfoType (" << get_type_string(type) << ")," << " returning ENOENT (" << std::strerror(ENOENT) << ")"; LOG_ERROR(ss); return ENOENT; @@ -766,7 +766,7 @@ int Device::openSysfsFileStream(DevInfoTypes type, T *fs, const char *str) { if (!fs->is_open()) { ss << __PRETTY_FUNCTION__ << " | Issue: Could not open - SYSFS file (" << sysfs_path << ") for " - << "DevInfoInfoType (" << devInfoTypesStrings.at(type) << "), " + << "DevInfoInfoType (" << get_type_string(type) << "), " << ", returning " << std::to_string(errno) << " (" << std::strerror(errno) << ")"; LOG_ERROR(ss); @@ -775,7 +775,7 @@ int Device::openSysfsFileStream(DevInfoTypes type, T *fs, const char *str) { ss << __PRETTY_FUNCTION__ << " | Successfully opened SYSFS file (" << sysfs_path - << ") for DevInfoInfoType (" << devInfoTypesStrings.at(type) + << ") for DevInfoInfoType (" << get_type_string(type) << ")"; LOG_INFO(ss); return 0; @@ -792,7 +792,7 @@ int Device::readDebugInfoStr(DevInfoTypes type, std::string *retStr) { ret = openDebugFileStream(type, &fs); if (ret != 0) { ss << "Could not read debugInfoStr for DevInfoType (" - << devInfoTypesStrings.at(type)<< "), returning " + << get_type_string(type)<< "), returning " << std::to_string(ret); LOG_ERROR(ss); return ret; @@ -806,7 +806,7 @@ int Device::readDebugInfoStr(DevInfoTypes type, std::string *retStr) { fs.close(); ss << "Successfully read debugInfoStr for DevInfoType (" - << devInfoTypesStrings.at(type)<< "), retString= " << *retStr; + << get_type_string(type)<< "), retString= " << *retStr; LOG_INFO(ss); return 0; @@ -822,7 +822,7 @@ int Device::readDevInfoStr(DevInfoTypes type, std::string *retStr) { ret = openSysfsFileStream(type, &fs); if (ret != 0) { ss << "Could not read device info string for DevInfoType (" - << devInfoTypesStrings.at(type) << "), returning " + << get_type_string(type) << "), returning " << std::to_string(ret); LOG_ERROR(ss); return ret; @@ -832,7 +832,7 @@ int Device::readDevInfoStr(DevInfoTypes type, std::string *retStr) { fs.close(); ss << __PRETTY_FUNCTION__ << "Successfully read device info string for DevInfoType (" << - devInfoTypesStrings.at(type) << "): " + *retStr + get_type_string(type) << "): " + *retStr << " | " << (fs.is_open() ? " File stream is opened" : " File stream is closed") << " | " << (fs.bad() ? "[ERROR] Bad read operation" : @@ -867,7 +867,7 @@ int Device::writeDevInfoStr(DevInfoTypes type, std::string valStr, fs.close(); ss << __PRETTY_FUNCTION__ << " | Issue: Could not open fileStream; " << "Could not write device info string (" << valStr - << ") for DevInfoType (" << devInfoTypesStrings.at(type) + << ") for DevInfoType (" << get_type_string(type) << "), returning " << std::to_string(ret); LOG_ERROR(ss); return ret; @@ -878,7 +878,7 @@ int Device::writeDevInfoStr(DevInfoTypes type, std::string valStr, fs.flush(); fs.close(); ss << "Successfully wrote device info string (" << valStr - << ") for DevInfoType (" << devInfoTypesStrings.at(type) + << ") for DevInfoType (" << get_type_string(type) << "), returning RSMI_STATUS_SUCCESS"; LOG_INFO(ss); ret = RSMI_STATUS_SUCCESS; @@ -892,7 +892,7 @@ int Device::writeDevInfoStr(DevInfoTypes type, std::string valStr, fs.close(); ss << __PRETTY_FUNCTION__ << " | Issue: Could not write to file; " << "Could not write device info string (" << valStr - << ") for DevInfoType (" << devInfoTypesStrings.at(type) + << ") for DevInfoType (" << get_type_string(type) << "), returning " << getRSMIStatusString(ErrnoToRsmiStatus(ret)); ss << " | " << (fs.is_open() ? "[ERROR] File stream open" : @@ -983,20 +983,29 @@ int Device::readDevInfoLine(DevInfoTypes type, std::string *line) { ret = openSysfsFileStream(type, &fs); if (ret != 0) { ss << "Could not read DevInfoLine for DevInfoType (" - << devInfoTypesStrings.at(type) << ")"; + << get_type_string(type) << ")"; LOG_ERROR(ss); return ret; } std::getline(fs, *line); ss << "Successfully read DevInfoLine for DevInfoType (" - << devInfoTypesStrings.at(type) << "), returning *line = " + << get_type_string(type) << "), returning *line = " << *line; LOG_INFO(ss); return 0; } +const char* Device::get_type_string(DevInfoTypes type) { + auto ite = devInfoTypesStrings.find(type); + if (ite != devInfoTypesStrings.end()) { + return ite->second; + } + + return "Unknown"; + +} int Device::readDevInfoBinary(DevInfoTypes type, std::size_t b_size, void *p_binary_data) { auto sysfs_path = path_; @@ -1009,7 +1018,7 @@ int Device::readDevInfoBinary(DevInfoTypes type, std::size_t b_size, ptr = fopen(sysfs_path.c_str(), "rb"); if (!ptr) { ss << "Could not read DevInfoBinary for DevInfoType (" - << devInfoTypesStrings.at(type) << ")" + << get_type_string(type) << ")" << " - SYSFS (" << sysfs_path << ")" << ", returning " << std::to_string(errno) << " (" << std::strerror(errno) << ")"; @@ -1021,7 +1030,7 @@ int Device::readDevInfoBinary(DevInfoTypes type, std::size_t b_size, fclose(ptr); if ((num*b_size) != b_size) { ss << "Could not read DevInfoBinary for DevInfoType (" - << devInfoTypesStrings.at(type) << ") - SYSFS (" + << get_type_string(type) << ") - SYSFS (" << sysfs_path << "), binary size error; " << "[buff: " << p_binary_data @@ -1035,7 +1044,7 @@ int Device::readDevInfoBinary(DevInfoTypes type, std::size_t b_size, return ENOENT; } ss << "Successfully read DevInfoBinary for DevInfoType (" - << devInfoTypesStrings.at(type) << ") - SYSFS (" + << get_type_string(type) << ") - SYSFS (" << sysfs_path << "), returning binaryData = " << p_binary_data << "; byte_size = " << std::dec << static_cast(b_size); @@ -1067,7 +1076,7 @@ int Device::readDevInfoMultiLineStr(DevInfoTypes type, if (retVec->empty()) { ss << "Read devInfoMultiLineStr for DevInfoType (" - << devInfoTypesStrings.at(type) << ")" + << get_type_string(type) << ")" << ", but contained no string lines"; LOG_ERROR(ss); return ENXIO; @@ -1085,12 +1094,12 @@ int Device::readDevInfoMultiLineStr(DevInfoTypes type, if (!allLines.empty()) { ss << "Successfully read devInfoMultiLineStr for DevInfoType (" - << devInfoTypesStrings.at(type) << ") " + << get_type_string(type) << ") " << ", returning lines read = " << allLines; LOG_INFO(ss); } else { ss << "Read devInfoMultiLineStr for DevInfoType (" - << devInfoTypesStrings.at(type) << ")" + << get_type_string(type) << ")" << ", but lines were empty"; LOG_INFO(ss); return ENXIO; diff --git a/rocm_smi/src/rocm_smi_main.cc b/rocm_smi/src/rocm_smi_main.cc index 3b27d6aee9..4c6b019009 100755 --- a/rocm_smi/src/rocm_smi_main.cc +++ b/rocm_smi/src/rocm_smi_main.cc @@ -560,7 +560,7 @@ std::string RocmSMI::getRSMIEnvVarInfo(void) { for (auto it=env_vars_.enum_overrides.begin(); it != env_vars_.enum_overrides.end(); ++it) { DevInfoTypes type = static_cast(*it); - ss << (std::to_string(*it) + " (" + Device::devInfoTypesStrings.at(type) + ")"); + ss << (std::to_string(*it) + " (" + Device::get_type_string(type) + ")"); auto temp_it = it; if(++temp_it != env_vars_.enum_overrides.end()) { ss << ", ";