From d54164d73388a006a9c9e126d24c70dd06bb02f4 Mon Sep 17 00:00:00 2001 From: Charis Poag Date: Wed, 4 Oct 2023 16:18:32 -0500 Subject: [PATCH] Add rsmi_dev_power_get * Updates: - [API] Added rsmi_dev_power_get(uint32_t dv_ind, uint64_t *power, RSMI_POWER_TYPE *type) provides generic get to average or current power & provides backwards compatibility - Added a utility function to get MonitorTypes (monitor_type_string(type)) & RSMI_POWER_TYPE (power_type_string(type)) strings - [Tests] Added rsmi_dev_power_get tests and provided better verification of return values for all power APIs - [Tests] Updated power outputs to show correct units - [example] Now uses avg, current, and generic power functions with type output response Change-Id: I5ca06ca37fd5f61e100f2835b664d6cdd1ca42e6 Signed-off-by: Charis Poag [ROCm/amdsmi commit: 31a1fcce7d3f6c6fae8f69671329f8e3fdb0d692] --- projects/amdsmi/include/rocm_smi/rocm_smi.h | 43 +++++++ .../amdsmi/include/rocm_smi/rocm_smi_utils.h | 2 + .../rocm_smi/example/rocm_smi_example.cc | 32 ++++- projects/amdsmi/src/rocm_smi.cc | 111 +++++++++++++----- projects/amdsmi/src/rocm_smi_utils.cc | 94 ++++++++++++++- .../rocm_smi_test/functional/power_read.cc | 54 +++++++-- .../rocm_smi_test/functional/power_read.h | 2 +- .../amdsmi/tests/rocm_smi_test/test_common.cc | 5 + .../amdsmi/tests/rocm_smi_test/test_common.h | 2 + 9 files changed, 294 insertions(+), 51 deletions(-) diff --git a/projects/amdsmi/include/rocm_smi/rocm_smi.h b/projects/amdsmi/include/rocm_smi/rocm_smi.h index 900dcee0f2..a3396ae4a2 100755 --- a/projects/amdsmi/include/rocm_smi/rocm_smi.h +++ b/projects/amdsmi/include/rocm_smi/rocm_smi.h @@ -697,6 +697,15 @@ typedef enum { RSMI_UTILIZATION_COUNTER_LAST = RSMI_COARSE_GRAIN_MEM_ACTIVITY } RSMI_UTILIZATION_COUNTER_TYPE; +/** + * @brief Power types + */ +typedef enum { + RSMI_AVERAGE_POWER = 0, //!< Average Power + RSMI_CURRENT_POWER, //!< Current / Instant Power + RSMI_INVALID_POWER = 0xFFFFFFFF //!< Invalid / Undetected Power +} RSMI_POWER_TYPE; + /** * @brief The utilization counter data */ @@ -1729,6 +1738,40 @@ rsmi_dev_power_ave_get(uint32_t dv_ind, uint32_t sensor_ind, uint64_t *power); rsmi_status_t rsmi_dev_current_socket_power_get(uint32_t dv_ind, uint64_t *socket_power); +/** + * @brief A generic get which attempts to retieve current socket power + * (also known as instant power) of the device index provided, if not + * supported tries to get average power consumed by device. Current + * socket power is typically supported by newer devices, whereas average + * power is generally reported on older devices. This function + * aims to provide backwards compatability depending on device support. + * + * @details Given a device index @p dv_ind, a pointer to a uint64_t + * @p power, and @p type this function will write the current socket or + * average power (in microwatts) to the uint64_t pointed to by @p power and + * a pointer to its @p type RSMI_POWER_TYPE read. + * + * @param[in] dv_ind a device index + * + * @param[inout] power a pointer to uint64_t to which the current or average + * power will be written to. If this parameter is nullptr, + * this function will return ::RSMI_STATUS_INVALID_ARGS if the function is + * supported with the provided, arguments and ::RSMI_STATUS_NOT_SUPPORTED + * if it is not supported with the provided arguments. + * + * @param[inout] type a pointer to RSMI_POWER_TYPE object. Returns the type + * of power retrieved from the device. Current power is ::RSMI_CURRENT_POWER + * and average power is ::RSMI_AVERAGE_POWER. If an error occurs, + * returns an invalid power type ::RSMI_INVALID_POWER. + * + * @retval ::RSMI_STATUS_SUCCESS call was successful + * @retval ::RSMI_STATUS_NOT_SUPPORTED installed software or hardware does not + * support this function with the given arguments + * @retval ::RSMI_STATUS_INVALID_ARGS the provided arguments are not valid + */ +rsmi_status_t rsmi_dev_power_get(uint32_t dv_ind, uint64_t *power, + RSMI_POWER_TYPE *type); + /** * @brief Get the energy accumulator counter of the device with provided * device index. diff --git a/projects/amdsmi/include/rocm_smi/rocm_smi_utils.h b/projects/amdsmi/include/rocm_smi/rocm_smi_utils.h index 49a3521dc1..9c1ef0290d 100755 --- a/projects/amdsmi/include/rocm_smi/rocm_smi_utils.h +++ b/projects/amdsmi/include/rocm_smi/rocm_smi_utils.h @@ -110,6 +110,8 @@ bool isSystemBigEndian(); std::string getBuildType(); std::string getMyLibPath(); int subDirectoryCountInPath(const std::string path); +std::string monitor_type_string(amd::smi::MonitorTypes type); +std::string power_type_string(RSMI_POWER_TYPE type); template std::string print_int_as_hex(T i, bool showHexNotation=true) { std::stringstream ss; diff --git a/projects/amdsmi/rocm_smi/example/rocm_smi_example.cc b/projects/amdsmi/rocm_smi/example/rocm_smi_example.cc index 45e79b07fd..a2df8e66ee 100755 --- a/projects/amdsmi/rocm_smi/example/rocm_smi_example.cc +++ b/projects/amdsmi/rocm_smi/example/rocm_smi_example.cc @@ -53,14 +53,13 @@ #include #include "rocm_smi/rocm_smi.h" +#include "rocm_smi/rocm_smi_utils.h" #define PRINT_RSMI_ERR(RET) { \ if (RET != RSMI_STATUS_SUCCESS) { \ - const char *err_str; \ std::cout << "[ERROR] RSMI call returned " << (RET) \ - << " at line " << __LINE__ << "\n"; \ - rsmi_status_string((RET), &err_str); \ - std::cout << err_str << "\n"; \ + << " at line " << __LINE__ << std::endl; \ + std::cout << amd::smi::getRSMIStatusString(RET) << std::endl; \ } \ } @@ -696,6 +695,10 @@ static rsmi_status_t test_set_memory_partition(uint32_t dv_ind) { return RSMI_STATUS_SUCCESS; } +template constexpr float convert_mw_to_w(T mw) { + return static_cast(mw / 1000.0); +} + int main() { rsmi_status_t ret; @@ -711,9 +714,11 @@ int main() { rsmi_frequencies_t f; uint32_t num_monitor_devs = 0; rsmi_gpu_metrics_t p; + RSMI_POWER_TYPE power_type = RSMI_INVALID_POWER; rsmi_num_monitor_devices(&num_monitor_devs); for (uint32_t i = 0; i < num_monitor_devs; ++i) { + std::cout << "\t**Device #: " << std::dec << i << std::endl; ret = rsmi_dev_id_get(i, &val_ui16); CHK_RSMI_RET_I(ret) std::cout << "\t**Device ID: 0x" << std::hex << val_ui16 << "\n"; @@ -816,7 +821,7 @@ int main() { ret = rsmi_dev_temp_metric_get(i, RSMI_TEMP_TYPE_JUNCTION, rsmi_temperature_metric_t::RSMI_TEMP_CURRENT, &val_i64); if (ret == RSMI_STATUS_SUCCESS) { - std::cout << val_i64/1000 << "C" << "\n"; + std::cout << (val_i64 / 1000) << "C" << std::endl; } CHK_RSMI_NOT_SUPPORTED_RET(ret) @@ -864,7 +869,22 @@ int main() { std::cout << "\t**Average Power Usage: "; ret = rsmi_dev_power_ave_get(i, 0, &val_ui64); if (ret == RSMI_STATUS_SUCCESS) { - std::cout << static_cast(val_ui64)/1000 << " W" << "\n"; + std::cout << convert_mw_to_w(val_ui64) << " W" << std::endl; + } + CHK_RSMI_NOT_SUPPORTED_RET(ret) + + std::cout << "\t**Current Socket Power Usage: "; + ret = rsmi_dev_current_socket_power_get(i, &val_ui64); + if (ret == RSMI_STATUS_SUCCESS) { + std::cout << convert_mw_to_w(val_ui64) << " W" << std::endl; + } + CHK_RSMI_NOT_SUPPORTED_RET(ret) + + std::cout << "\t**Generic Power Usage: "; + ret = rsmi_dev_power_get(i, &val_ui64, &power_type); + if (ret == RSMI_STATUS_SUCCESS) { + std::cout << "[" << amd::smi::power_type_string(power_type) << "] " + << convert_mw_to_w(val_ui64) << " W" << std::endl; } CHK_RSMI_NOT_SUPPORTED_RET(ret) std::cout << "\t=======" << "\n"; diff --git a/projects/amdsmi/src/rocm_smi.cc b/projects/amdsmi/src/rocm_smi.cc index 3f09b83683..2ee44473ad 100755 --- a/projects/amdsmi/src/rocm_smi.cc +++ b/projects/amdsmi/src/rocm_smi.cc @@ -61,7 +61,6 @@ #include #include #include -#include #include #include @@ -77,7 +76,9 @@ #include "rocm_smi/rocm_smi64Config.h" #include "rocm_smi/rocm_smi_logger.h" -using namespace amd::smi; +using amd::smi::monitorTypesToString; +using amd::smi::getRSMIStatusString; +auto &devInfoTypesStrings = amd::smi::RocmSMI::devInfoTypesStrings; static const uint32_t kMaxOverdriveLevel = 20; static const float kEnergyCounterResolution = 15.3F; @@ -2838,7 +2839,7 @@ rsmi_dev_current_socket_power_get(uint32_t dv_ind, uint64_t *socket_power) { rsmi_status_t rsmiReturn = RSMI_STATUS_NOT_SUPPORTED; std::string val_str; uint32_t sensor_ind = 1; // socket_power sysfs files have 1-based indices - MonitorTypes mon_type = amd::smi::kMonPowerInput; + amd::smi::MonitorTypes mon_type = amd::smi::kMonPowerInput; ss << __PRETTY_FUNCTION__ << " | ======= start =======, dv_ind=" << std::to_string(dv_ind); LOG_TRACE(ss); @@ -2905,6 +2906,58 @@ rsmi_dev_current_socket_power_get(uint32_t dv_ind, uint64_t *socket_power) { CATCH } +rsmi_status_t rsmi_dev_power_get(uint32_t dv_ind, uint64_t *power, + RSMI_POWER_TYPE *type) { + TRY + std::ostringstream ss; + ss << __PRETTY_FUNCTION__ << " | ======= start =======, dv_ind=" + << std::to_string(dv_ind); + LOG_TRACE(ss); + rsmi_status_t ret = RSMI_STATUS_NOT_SUPPORTED; + RSMI_POWER_TYPE temp_power_type = RSMI_INVALID_POWER; + uint64_t temp_power = 0; + + if (type == nullptr || power == nullptr) { + ret = RSMI_STATUS_INVALID_ARGS; + ss << __PRETTY_FUNCTION__ + << " | ======= end ======= " + << " | Fail " + << " | Device #: " << dv_ind + << " | Type: " << amd::smi::power_type_string(temp_power_type) + << " | Cause: power or monitor type was a null ptr reference" + << " | Returning = " + << getRSMIStatusString(ret) << " |"; + LOG_ERROR(ss); + return ret; + } + + // only change return value on success, invalid otherwise + rsmi_status_t temp_ret = rsmi_dev_current_socket_power_get(dv_ind, &temp_power); + if (temp_ret == RSMI_STATUS_SUCCESS) { + temp_power_type = RSMI_CURRENT_POWER; + ret = temp_ret; + } else { + temp_ret = rsmi_dev_power_ave_get(dv_ind, 0, &temp_power); + if (temp_ret == RSMI_STATUS_SUCCESS) { + temp_power_type = RSMI_AVERAGE_POWER; + ret = temp_ret; + } + } + *power = temp_power; + *type = temp_power_type; + ss << __PRETTY_FUNCTION__ + << " | ======= end ======= " + << " | Success " + << " | Device #: " << dv_ind + << " | Type: " << amd::smi::power_type_string(temp_power_type) + << " | Data: " << *power + << " | Returning = " + << getRSMIStatusString(ret) << " |"; + LOG_TRACE(ss); + return ret; + CATCH +} + rsmi_status_t rsmi_dev_energy_count_get(uint32_t dv_ind, uint64_t *power, float *counter_resolution, uint64_t *timestamp) { @@ -3106,7 +3159,7 @@ rsmi_dev_memory_total_get(uint32_t dv_ind, rsmi_memory_type_t mem_type, ss << __PRETTY_FUNCTION__ << " | inside success fallback... " << " | Device #: " << std::to_string(dv_ind) - << " | Type = " << RocmSMI::devInfoTypesStrings.at(mem_type_file) + << " | Type = " << devInfoTypesStrings.at(mem_type_file) << " | Data: total = " << std::to_string(*total) << " | ret = " << getRSMIStatusString(RSMI_STATUS_SUCCESS); LOG_DEBUG(ss); @@ -3117,7 +3170,7 @@ rsmi_dev_memory_total_get(uint32_t dv_ind, rsmi_memory_type_t mem_type, ss << __PRETTY_FUNCTION__ << " | after fallback... " << " | Device #: " << std::to_string(dv_ind) - << " | Type = " << RocmSMI::devInfoTypesStrings.at(mem_type_file) + << " | Type = " << devInfoTypesStrings.at(mem_type_file) << " | Data: total = " << std::to_string(*total) << " | ret = " << getRSMIStatusString(ret); LOG_DEBUG(ss); @@ -3166,7 +3219,7 @@ rsmi_dev_memory_usage_get(uint32_t dv_ind, rsmi_memory_type_t mem_type, ss << __PRETTY_FUNCTION__ << " no fallback needed! - " << " | Device #: " << std::to_string(dv_ind) - << " | Type = " << RocmSMI::devInfoTypesStrings.at(mem_type_file) + << " | Type = " << devInfoTypesStrings.at(mem_type_file) << " | Data: Used = " << std::to_string(*used) << " | Data: total = " << std::to_string(total) << " | ret = " << getRSMIStatusString(ret); @@ -3177,7 +3230,7 @@ rsmi_dev_memory_usage_get(uint32_t dv_ind, rsmi_memory_type_t mem_type, ss << __PRETTY_FUNCTION__ << " | in fallback == success ..." << " | Device #: " << std::to_string(dv_ind) - << " | Type = " << RocmSMI::devInfoTypesStrings.at(mem_type_file) + << " | Type = " << devInfoTypesStrings.at(mem_type_file) << " | Data: Used = " << std::to_string(*used) << " | Data: total = " << std::to_string(total) << " | ret = " << getRSMIStatusString(RSMI_STATUS_SUCCESS); @@ -3188,7 +3241,7 @@ rsmi_dev_memory_usage_get(uint32_t dv_ind, rsmi_memory_type_t mem_type, ss << __PRETTY_FUNCTION__ << " | at end!!!! after fallback ..." << " | Device #: " << std::to_string(dv_ind) - << " | Type = " << RocmSMI::devInfoTypesStrings.at(mem_type_file) + << " | Type = " << devInfoTypesStrings.at(mem_type_file) << " | Data: Used = " << std::to_string(*used) << " | ret = " << getRSMIStatusString(ret); LOG_DEBUG(ss); @@ -4444,7 +4497,7 @@ rsmi_dev_compute_partition_get(uint32_t dv_ind, char *compute_partition, << " | Fail " << " | Device #: " << dv_ind << " | Type: " - << RocmSMI::devInfoTypesStrings.at(amd::smi::kDevComputePartition) + << devInfoTypesStrings.at(amd::smi::kDevComputePartition) << " | Cause: len was 0 or compute_partition variable was null" << " | Returning = " << getRSMIStatusString(RSMI_STATUS_INVALID_ARGS) << " |"; @@ -4463,7 +4516,7 @@ rsmi_dev_compute_partition_get(uint32_t dv_ind, char *compute_partition, << " | Fail " << " | Device #: " << dv_ind << " | Type: " - << RocmSMI::devInfoTypesStrings.at(amd::smi::kDevComputePartition) + << devInfoTypesStrings.at(amd::smi::kDevComputePartition) << " | Cause: could not retrieve current compute partition" << " | Returning = " << getRSMIStatusString(ret) << " |"; @@ -4480,7 +4533,7 @@ rsmi_dev_compute_partition_get(uint32_t dv_ind, char *compute_partition, << " | Fail " << " | Device #: " << dv_ind << " | Type: " - << RocmSMI::devInfoTypesStrings.at(amd::smi::kDevComputePartition) + << devInfoTypesStrings.at(amd::smi::kDevComputePartition) << " | Cause: requested size was insufficient" << " | Returning = " << getRSMIStatusString(RSMI_STATUS_INSUFFICIENT_SIZE) << " |"; @@ -4492,7 +4545,7 @@ rsmi_dev_compute_partition_get(uint32_t dv_ind, char *compute_partition, << " | Success " << " | Device #: " << dv_ind << " | Type: " - << RocmSMI::devInfoTypesStrings.at(amd::smi::kDevComputePartition) + << devInfoTypesStrings.at(amd::smi::kDevComputePartition) << " | Data: " << compute_partition << " | Returning = " << getRSMIStatusString(ret) << " |"; @@ -4549,7 +4602,7 @@ rsmi_dev_compute_partition_set(uint32_t dv_ind, << " | Fail " << " | Device #: " << dv_ind << " | Type: " - << RocmSMI::devInfoTypesStrings.at(amd::smi::kDevComputePartition) + << devInfoTypesStrings.at(amd::smi::kDevComputePartition) << " | Cause: requested setting was invalid" << " | Returning = " << getRSMIStatusString(RSMI_STATUS_INVALID_ARGS) << " |"; @@ -4567,7 +4620,7 @@ rsmi_dev_compute_partition_set(uint32_t dv_ind, << " | Fail " << " | Device #: " << dv_ind << " | Type: " - << RocmSMI::devInfoTypesStrings.at(amd::smi::kDevComputePartition) + << devInfoTypesStrings.at(amd::smi::kDevComputePartition) << " | Cause: not an available compute partition setting" << " | Returning = " << getRSMIStatusString(available_ret) << " |"; @@ -4586,7 +4639,7 @@ rsmi_dev_compute_partition_set(uint32_t dv_ind, << " | Fail " << " | Device #: " << dv_ind << " | Type: " - << RocmSMI::devInfoTypesStrings.at(amd::smi::kDevComputePartition) + << devInfoTypesStrings.at(amd::smi::kDevComputePartition) << " | Cause: could retrieve current compute partition or retrieved" << " unexpected data" << " | Returning = " @@ -4602,7 +4655,7 @@ rsmi_dev_compute_partition_set(uint32_t dv_ind, << " | Success - compute partition was already set at requested value" << " | Device #: " << dv_ind << " | Type: " - << RocmSMI::devInfoTypesStrings.at(amd::smi::kDevComputePartition) + << devInfoTypesStrings.at(amd::smi::kDevComputePartition) << " | Data: " << newComputePartitionStr << " | Returning = " << getRSMIStatusString(RSMI_STATUS_SUCCESS) << " |"; @@ -4619,7 +4672,7 @@ rsmi_dev_compute_partition_set(uint32_t dv_ind, << " | Success " << " | Device #: " << dv_ind << " | Type: " - << RocmSMI::devInfoTypesStrings.at(amd::smi::kDevComputePartition) + << devInfoTypesStrings.at(amd::smi::kDevComputePartition) << " | Data: " << newComputePartitionStr << " | Returning = " << getRSMIStatusString(returnResponse) << " |"; @@ -4691,7 +4744,7 @@ rsmi_dev_memory_partition_set(uint32_t dv_ind, << " | Fail " << " | Device #: " << dv_ind << " | Type: " - << RocmSMI::devInfoTypesStrings.at(amd::smi::kDevMemoryPartition) + << devInfoTypesStrings.at(amd::smi::kDevMemoryPartition) << " | Cause: device board name does not support this action" << " | Returning = " << getRSMIStatusString(RSMI_STATUS_INVALID_ARGS) << " |"; @@ -4716,7 +4769,7 @@ rsmi_dev_memory_partition_set(uint32_t dv_ind, << " | Fail " << " | Device #: " << dv_ind << " | Type: " - << RocmSMI::devInfoTypesStrings.at(amd::smi::kDevMemoryPartition) + << devInfoTypesStrings.at(amd::smi::kDevMemoryPartition) << " | Cause: requested setting was invalid" << " | Returning = " << getRSMIStatusString(RSMI_STATUS_INVALID_ARGS) << " |"; @@ -4734,7 +4787,7 @@ rsmi_dev_memory_partition_set(uint32_t dv_ind, << " | Fail " << " | Device #: " << dv_ind << " | Type: " - << RocmSMI::devInfoTypesStrings.at(amd::smi::kDevMemoryPartition) + << devInfoTypesStrings.at(amd::smi::kDevMemoryPartition) << " | Cause: could retrieve current memory partition or retrieved" << " unexpected data" << " | Returning = " @@ -4751,7 +4804,7 @@ rsmi_dev_memory_partition_set(uint32_t dv_ind, << " setting" << " | Device #: " << dv_ind << " | Type: " - << RocmSMI::devInfoTypesStrings.at(amd::smi::kDevMemoryPartition) + << devInfoTypesStrings.at(amd::smi::kDevMemoryPartition) << " | Data: " << newMemoryPartition << " | Returning = " << getRSMIStatusString(RSMI_STATUS_SUCCESS) << " |"; @@ -4770,7 +4823,7 @@ rsmi_dev_memory_partition_set(uint32_t dv_ind, << " | Fail " << " | Device #: " << dv_ind << " | Type: " - << RocmSMI::devInfoTypesStrings.at(amd::smi::kDevMemoryPartition) + << devInfoTypesStrings.at(amd::smi::kDevMemoryPartition) << " | Cause: issue writing reqested setting of " + newMemoryPartition << " | Returning = " << getRSMIStatusString(err) << " |"; @@ -4784,7 +4837,7 @@ rsmi_dev_memory_partition_set(uint32_t dv_ind, << " | Success - if restart completed successfully" << " | Device #: " << dv_ind << " | Type: " - << RocmSMI::devInfoTypesStrings.at(amd::smi::kDevMemoryPartition) + << devInfoTypesStrings.at(amd::smi::kDevMemoryPartition) << " | Data: " << newMemoryPartition << " | Returning = " << getRSMIStatusString(restartRet) << " |"; @@ -4806,7 +4859,7 @@ rsmi_dev_memory_partition_get(uint32_t dv_ind, char *memory_partition, << " | Fail " << " | Device #: " << dv_ind << " | Type: " - << RocmSMI::devInfoTypesStrings.at(amd::smi::kDevMemoryPartition) + << devInfoTypesStrings.at(amd::smi::kDevMemoryPartition) << " | Cause: user sent invalid arguments, len = 0 or memory partition" << " was a null ptr" << " | Returning = " @@ -4826,7 +4879,7 @@ rsmi_dev_memory_partition_get(uint32_t dv_ind, char *memory_partition, << " | Fail " << " | Device #: " << dv_ind << " | Type: " - << RocmSMI::devInfoTypesStrings.at(amd::smi::kDevMemoryPartition) + << devInfoTypesStrings.at(amd::smi::kDevMemoryPartition) << " | Cause: could not successfully retrieve current memory partition " << " | Returning = " << getRSMIStatusString(ret) << " |"; @@ -4844,7 +4897,7 @@ rsmi_dev_memory_partition_get(uint32_t dv_ind, char *memory_partition, << " | Fail " << " | Device #: " << dv_ind << " | Type: " - << RocmSMI::devInfoTypesStrings.at(amd::smi::kDevMemoryPartition) + << devInfoTypesStrings.at(amd::smi::kDevMemoryPartition) << " | Cause: could not successfully retrieve current memory partition " << " | Returning = " << getRSMIStatusString(ret) << " |"; @@ -4856,7 +4909,7 @@ rsmi_dev_memory_partition_get(uint32_t dv_ind, char *memory_partition, << " | Success " << " | Device #: " << dv_ind << " | Type: " - << RocmSMI::devInfoTypesStrings.at(amd::smi::kDevMemoryPartition) + << devInfoTypesStrings.at(amd::smi::kDevMemoryPartition) << " | Data: " << memory_partition << " | Returning = " << getRSMIStatusString(ret) << " |"; @@ -4895,7 +4948,7 @@ rsmi_status_t rsmi_dev_compute_partition_reset(uint32_t dv_ind) { << " | Success - if original boot state was not unknown or valid setting" << " | Device #: " << dv_ind << " | Type: " - << RocmSMI::devInfoTypesStrings.at(amd::smi::kDevComputePartition) + << devInfoTypesStrings.at(amd::smi::kDevComputePartition) << " | Data: " << bootState << " | Returning = " << getRSMIStatusString(ret) << " |"; @@ -4934,7 +4987,7 @@ rsmi_status_t rsmi_dev_memory_partition_reset(uint32_t dv_ind) { << " | Success - if original boot state was not unknown or valid setting" << " | Device #: " << dv_ind << " | Type: " - << RocmSMI::devInfoTypesStrings.at(amd::smi::kDevMemoryPartition) + << devInfoTypesStrings.at(amd::smi::kDevMemoryPartition) << " | Data: " << bootState << " | Returning = " << getRSMIStatusString(ret) << " |"; diff --git a/projects/amdsmi/src/rocm_smi_utils.cc b/projects/amdsmi/src/rocm_smi_utils.cc index 66944ba524..db11f0645c 100755 --- a/projects/amdsmi/src/rocm_smi_utils.cc +++ b/projects/amdsmi/src/rocm_smi_utils.cc @@ -48,7 +48,6 @@ #include #include #include -#include #include #include @@ -63,7 +62,6 @@ #include #include #include -#include #include #include "rocm_smi/rocm_smi.h" @@ -884,5 +882,97 @@ int subDirectoryCountInPath(const std::string path) { return dir_count; } +std::string monitor_type_string(MonitorTypes type) { + const std::map monitorTypesToString{ + {kMonName, + "MonitorTypes::kMonName"}, + {kMonTemp, + "MonitorTypes::kMonTemp"}, + {kMonFanSpeed, + "MonitorTypes::kMonFanSpeed"}, + {kMonMaxFanSpeed, + "MonitorTypes::kMonMaxFanSpeed"}, + {kMonFanRPMs, + "MonitorTypes::kMonFanRPMs"}, + {kMonFanCntrlEnable, + "MonitorTypes::kMonFanCntrlEnable"}, + {kMonPowerCap, + "MonitorTypes::kMonPowerCap"}, + {kMonPowerCapDefault, + "MonitorTypes::kMonPowerCapDefault"}, + {kMonPowerCapMax, + "MonitorTypes::kMonPowerCapMax"}, + {kMonPowerCapMin, + "MonitorTypes::kMonPowerCapMin"}, + {kMonPowerAve, + "MonitorTypes::kMonPowerAve"}, + {kMonPowerInput, + "MonitorTypes::kMonPowerInput"}, + {kMonPowerLabel, + "MonitorTypes::kMonPowerLabel"}, + {kMonTempMax, + "MonitorTypes::kMonTempMax"}, + {kMonTempMin, + "MonitorTypes::kMonTempMin"}, + {kMonTempMaxHyst, + "MonitorTypes::kMonTempMaxHyst"}, + {kMonTempMinHyst, + "MonitorTypes::kMonTempMinHyst"}, + {kMonTempCritical, + "MonitorTypes::kMonTempCritical"}, + {kMonTempCriticalHyst, + "MonitorTypes::kMonTempCriticalHyst"}, + {kMonTempEmergency, + "MonitorTypes::kMonTempEmergency"}, + {kMonTempEmergencyHyst, + "MonitorTypes::kMonTempEmergencyHyst"}, + {kMonTempCritMin, + "MonitorTypes::kMonTempCritMin"}, + {kMonTempCritMinHyst, + "MonitorTypes::kMonTempCritMinHyst"}, + {kMonTempOffset, + "MonitorTypes::kMonTempOffset"}, + {kMonTempLowest, + "MonitorTypes::kMonTempLowest"}, + {kMonTempHighest, + "MonitorTypes::kMonTempHighest"}, + {kMonTempLabel, + "MonitorTypes::kMonTempLabel"}, + {kMonVolt, + "MonitorTypes::kMonVolt"}, + {kMonVoltMax, + "MonitorTypes::kMonVoltMax"}, + {kMonVoltMinCrit, + "MonitorTypes::kMonVoltMinCrit"}, + {kMonVoltMin, + "MonitorTypes::kMonVoltMin"}, + {kMonVoltMaxCrit, + "MonitorTypes::kMonVoltMaxCrit"}, + {kMonVoltAverage, + "MonitorTypes::kMonVoltAverage"}, + {kMonVoltLowest, + "MonitorTypes::kMonVoltLowest"}, + {kMonVoltHighest, + "MonitorTypes::kMonVoltHighest"}, + {kMonVoltLabel, + "MonitorTypes::kMonVoltLabel"}, + {kMonInvalid, + "MonitorTypes::kMonInvalid"}, + }; + return monitorTypesToString.at(type); +} + +std::string power_type_string(RSMI_POWER_TYPE type) { + const std::map powerTypesToString{ + {RSMI_AVERAGE_POWER, + "RSMI_POWER_TYPE::RSMI_AVERAGE_POWER"}, + {RSMI_CURRENT_POWER, + "RSMI_POWER_TYPE::RSMI_CURRENT_POWER"}, + {RSMI_INVALID_POWER, + "RSMI_POWER_TYPE::RSMI_INVALID_POWER"}, + }; + return powerTypesToString.at(type); +} + } // namespace smi } // namespace amd diff --git a/projects/amdsmi/tests/rocm_smi_test/functional/power_read.cc b/projects/amdsmi/tests/rocm_smi_test/functional/power_read.cc index 641ddde00e..f379fd48c8 100755 --- a/projects/amdsmi/tests/rocm_smi_test/functional/power_read.cc +++ b/projects/amdsmi/tests/rocm_smi_test/functional/power_read.cc @@ -5,7 +5,7 @@ * The University of Illinois/NCSA * Open Source License (NCSA) * - * Copyright (c) 2019, Advanced Micro Devices, Inc. + * Copyright (c) 2019-2023, Advanced Micro Devices, Inc. * All rights reserved. * * Developed by: @@ -89,6 +89,7 @@ void TestPowerRead::Close() { void TestPowerRead::Run(void) { rsmi_status_t err; uint64_t val_ui64, val2_ui64; + RSMI_POWER_TYPE type = RSMI_INVALID_POWER; TestBase::Run(); if (setup_failed_) { @@ -119,44 +120,71 @@ void TestPowerRead::Run(void) { /* Average Power */ err = rsmi_dev_power_ave_get(i, 0, &val_ui64); + + ASSERT_TRUE(err == RSMI_STATUS_SUCCESS + || err == RSMI_STATUS_NOT_SUPPORTED); if (err == RSMI_STATUS_NOT_SUPPORTED) { std::cout << "\t**Average Power Usage: not supported on this device" << std::endl; } else { + CHK_RSMI_PERM_ERR(err) IF_VERB(STANDARD) { std::cout << "\t**Average Power Usage: "; - CHK_RSMI_PERM_ERR(err) if (err == RSMI_STATUS_SUCCESS) { - std::cout << static_cast(val_ui64) / 1000 << " mW" + std::cout << static_cast(val_ui64) / 1000 << " W" << std::endl; } - // Verify api support checking functionality is working - err = rsmi_dev_power_ave_get(i, 0, nullptr); - ASSERT_EQ(err, RSMI_STATUS_INVALID_ARGS); } + // Verify api support checking functionality is working + err = rsmi_dev_power_ave_get(i, 0, nullptr); + ASSERT_EQ(err, RSMI_STATUS_INVALID_ARGS); } /* Current Socket Power */ err = rsmi_dev_current_socket_power_get(i, &val_ui64); - + ASSERT_TRUE(err == RSMI_STATUS_SUCCESS + || err == RSMI_STATUS_NOT_SUPPORTED); if (err == RSMI_STATUS_NOT_SUPPORTED) { std::cout << "\t**Current Socket Power: not supported" " on this device" << std::endl; } else { + CHK_RSMI_PERM_ERR(err) IF_VERB(STANDARD) { std::cout << "\t**Current Socket Power: "; - CHK_RSMI_PERM_ERR(err) if (err == RSMI_STATUS_SUCCESS) { - std::cout << static_cast(val_ui64) / 1000 << " mW" + std::cout << static_cast(val_ui64) / 1000 << " W" << std::endl; } - // Verify api support checking functionality is working - err = rsmi_dev_current_socket_power_get(i, nullptr); - // std::cout << "err = " << amd::smi::getRSMIStatusString(err); - ASSERT_EQ(err, RSMI_STATUS_INVALID_ARGS); } + // Verify api support checking functionality is working + err = rsmi_dev_current_socket_power_get(i, nullptr); + ASSERT_EQ(err, RSMI_STATUS_INVALID_ARGS); + } + + /* Generic Power */ + err = rsmi_dev_power_get(i, &val_ui64, &type); + ASSERT_TRUE(err == RSMI_STATUS_SUCCESS + || err == RSMI_STATUS_NOT_SUPPORTED); + + if (err == RSMI_STATUS_NOT_SUPPORTED) { + std::cout << + "\t**Generic Power: not supported" + " on this device" << std::endl; + } else { + CHK_RSMI_PERM_ERR(err) + IF_VERB(STANDARD) { + std::cout << "\t**Generic Power: "; + if (err == RSMI_STATUS_SUCCESS) { + std::cout << "[" << amd::smi::power_type_string(type) << "] " + << static_cast(val_ui64) / 1000 << " W" + << std::endl; + } + } + // Verify api support checking functionality is working + err = rsmi_dev_power_get(i, nullptr, nullptr); + ASSERT_EQ(err, RSMI_STATUS_INVALID_ARGS); } std::cout << "\n"; } diff --git a/projects/amdsmi/tests/rocm_smi_test/functional/power_read.h b/projects/amdsmi/tests/rocm_smi_test/functional/power_read.h index f9d5b83e33..2a5746c251 100755 --- a/projects/amdsmi/tests/rocm_smi_test/functional/power_read.h +++ b/projects/amdsmi/tests/rocm_smi_test/functional/power_read.h @@ -5,7 +5,7 @@ * The University of Illinois/NCSA * Open Source License (NCSA) * - * Copyright (c) 2019, Advanced Micro Devices, Inc. + * Copyright (c) 2019-2023, Advanced Micro Devices, Inc. * All rights reserved. * * Developed by: diff --git a/projects/amdsmi/tests/rocm_smi_test/test_common.cc b/projects/amdsmi/tests/rocm_smi_test/test_common.cc index db6fa24098..5e8e3f8e47 100755 --- a/projects/amdsmi/tests/rocm_smi_test/test_common.cc +++ b/projects/amdsmi/tests/rocm_smi_test/test_common.cc @@ -54,6 +54,7 @@ #include "rocm_smi_test/test_base.h" #include "rocm_smi_test/test_common.h" #include "rocm_smi/rocm_smi.h" +#include "rocm_smi/rocm_smi_utils.h" static const std::map kDevPerfLvlNameMap = { @@ -227,6 +228,10 @@ const char *FreqEnumToStr(rsmi_clk_type rsmi_clk) { } } +void printRSMIError(rsmi_status_t err) { + std::cout << "err = " << amd::smi::getRSMIStatusString(err); +} + #if ENABLE_SMI void DumpMonitorInfo(const TestBase *test) { int ret = 0; diff --git a/projects/amdsmi/tests/rocm_smi_test/test_common.h b/projects/amdsmi/tests/rocm_smi_test/test_common.h index ba425cc462..f976a7c0e7 100755 --- a/projects/amdsmi/tests/rocm_smi_test/test_common.h +++ b/projects/amdsmi/tests/rocm_smi_test/test_common.h @@ -98,4 +98,6 @@ void DumpMonitorInfo(const TestBase *test); } \ } +void printRSMIError(rsmi_status_t err); + #endif // TESTS_ROCM_SMI_TEST_TEST_COMMON_H_