diff --git a/include/rocm_smi/rocm_smi.h b/include/rocm_smi/rocm_smi.h index b57e7dfd1d..93e26c1cc0 100755 --- a/include/rocm_smi/rocm_smi.h +++ b/include/rocm_smi/rocm_smi.h @@ -2251,7 +2251,6 @@ rsmi_status_t rsmi_perf_determinism_mode_set(uint32_t dv_ind, uint64_t clkvalue) * @retval ::RSMI_STATUS_INVALID_ARGS the provided arguments are not valid * */ - rsmi_status_t rsmi_dev_overdrive_level_get(uint32_t dv_ind, uint32_t *od); /** @@ -2269,7 +2268,7 @@ rsmi_status_t rsmi_dev_overdrive_level_get(uint32_t dv_ind, uint32_t *od); * * @param[inout] f a pointer to a caller provided ::rsmi_frequencies_t structure * to which the frequency information will be written. Frequency values are in - * Hz. + * Hz. * If this parameter is nullptr, this function will return * ::RSMI_STATUS_INVALID_ARGS if the function is supported with the provided, * arguments and ::RSMI_STATUS_NOT_SUPPORTED if it is not supported with the @@ -2284,6 +2283,31 @@ rsmi_status_t rsmi_dev_overdrive_level_get(uint32_t dv_ind, uint32_t *od); * @retval ::RSMI_STATUS_INVALID_ARGS the provided arguments are not valid * */ +rsmi_status_t rsmi_dev_mem_overdrive_level_get(uint32_t dv_ind, uint32_t *od); + +/** + * @brief Get the memory clock overdrive percent associated with the device + * with provided device index. + * + * @details Given a device index @p dv_ind and a pointer to a uint32_t @p od, + * this function will write the memory overdrive percentage to the uint32_t + * pointed to by @p od + * + * @param[in] dv_ind a device index + * + * @param[inout] od a pointer to uint32_t to which the overdrive percentage + * will be written + * If this parameter is nullptr, this function will return + * ::RSMI_STATUS_INVALID_ARGS if the function is supported with the provided, + * arguments and ::RSMI_STATUS_NOT_SUPPORTED if it is not supported with the + * provided arguments. + * + * @retval ::RSMI_STATUS_SUCCESS call was successful + * @retval ::RSMI_STATUS_NOT_SUPPORTED installed software or hardware does not + * support this function with the given arguments + * @retval ::RSMI_STATUS_INVALID_ARGS the provided arguments are not valid + * + */ rsmi_status_t rsmi_dev_gpu_clk_freq_get(uint32_t dv_ind, rsmi_clk_type_t clk_type, rsmi_frequencies_t *f); diff --git a/include/rocm_smi/rocm_smi_device.h b/include/rocm_smi/rocm_smi_device.h index a21a64a9ce..aef25c2804 100755 --- a/include/rocm_smi/rocm_smi_device.h +++ b/include/rocm_smi/rocm_smi_device.h @@ -98,6 +98,7 @@ enum DevKFDNodePropTypes { enum DevInfoTypes { kDevPerfLevel, kDevOverDriveLevel, + kDevMemOverDriveLevel, kDevDevID, kDevDevProdName, kDevDevProdNum, diff --git a/python_smi_tools/rocm_smi.py b/python_smi_tools/rocm_smi.py index f86870aa96..91c455783d 100755 --- a/python_smi_tools/rocm_smi.py +++ b/python_smi_tools/rocm_smi.py @@ -1825,20 +1825,16 @@ def showOverDrive(deviceList, odtype): ret = rocmsmi.rsmi_dev_overdrive_level_get(device, byref(rsmi_od)) od = rsmi_od.value if not rsmi_ret_ok(ret, device): - printErrLog(device, 'Unable to retrieve sclk OverDrive level') + continue elif odtype == 'mclk': odStr = 'GPU Memory' - filePath = os.path.join('/sys/class/drm', 'card%d' % (device), 'device', 'pp_mclk_od') - if filePath: - try: - with open(filePath, 'r') as fileContents: - od = fileContents.read().rstrip('\n') - except: - printErrLog(device, 'Unable to retrieve mclk OverDrive level') - return None + ret = rocmsmi.rsmi_dev_mem_overdrive_level_get(device, byref(rsmi_od)) + od = rsmi_od.value + if not rsmi_ret_ok(ret, device): + continue else: printErrLog(device, 'Unable to retrieve OverDrive') - logging.error('Unsupported clock type %s', clktype) + logging.error('Unsupported clock type %s', odtype) RETCODE = 1 printLog(device, odStr + ' OverDrive value (%)', od) printLogSpacer() diff --git a/src/rocm_smi.cc b/src/rocm_smi.cc index f6d1638b0e..6c71343f8b 100755 --- a/src/rocm_smi.cc +++ b/src/rocm_smi.cc @@ -863,6 +863,33 @@ rsmi_dev_overdrive_level_get(uint32_t dv_ind, uint32_t *od) { CATCH } +rsmi_status_t +rsmi_dev_mem_overdrive_level_get(uint32_t dv_ind, uint32_t *od) { + TRY + std::string val_str; + CHK_SUPPORT_NAME_ONLY(od) + DEVICE_MUTEX + + rsmi_status_t ret = get_dev_value_str(amd::smi::kDevMemOverDriveLevel, dv_ind, + &val_str); + if (ret != RSMI_STATUS_SUCCESS) { + return ret; + } + + errno = 0; + uint64_t val_ul = strtoul(val_str.c_str(), nullptr, 10); + + if (val_ul > 0xFFFFFFFF) { + return RSMI_STATUS_UNEXPECTED_SIZE; + } + + *od = static_cast(val_ul); + assert(errno == 0); + + return RSMI_STATUS_SUCCESS; + CATCH +} + rsmi_status_t rsmi_dev_overdrive_level_set(int32_t dv_ind, uint32_t od) { if (dv_ind < 0) { diff --git a/src/rocm_smi_device.cc b/src/rocm_smi_device.cc index cd6956cad0..0d602f4595 100755 --- a/src/rocm_smi_device.cc +++ b/src/rocm_smi_device.cc @@ -85,6 +85,7 @@ static const char *kDevVendorIDFName = "vendor"; static const char *kDevSubSysDevIDFName = "subsystem_device"; static const char *kDevSubSysVendorIDFName = "subsystem_vendor"; static const char *kDevOverDriveLevelFName = "pp_sclk_od"; +static const char *kDevMemOverDriveLevelFName = "pp_mclk_od"; static const char *kDevGPUSClkFName = "pp_dpm_sclk"; static const char *kDevGPUMClkFName = "pp_dpm_mclk"; static const char *kDevDCEFClkFName = "pp_dpm_dcefclk"; @@ -225,6 +226,7 @@ static const char *kDevPerfLevelUnknownStr = "unknown"; static const std::map kDevAttribNameMap = { {kDevPerfLevel, kDevPerfLevelFName}, {kDevOverDriveLevel, kDevOverDriveLevelFName}, + {kDevMemOverDriveLevel, kDevMemOverDriveLevelFName}, {kDevDevProdName, kDevDevProdNameFName}, {kDevDevProdNum, kDevDevProdNumFName}, {kDevDevID, kDevDevIDFName}, @@ -388,6 +390,7 @@ static const std::map kDevFuncDependsMap = { {"rsmi_dev_busy_percent_get", {{kDevUsageFName}, {}}}, {"rsmi_dev_memory_reserved_pages_get", {{kDevMemPageBadFName}, {}}}, {"rsmi_dev_overdrive_level_get", {{kDevOverDriveLevelFName}, {}}}, + {"rsmi_dev_mem_overdrive_level_get", {{kDevMemOverDriveLevelFName}, {}}}, {"rsmi_dev_power_profile_presets_get", {{kDevPowerProfileModeFName}, {}}}, {"rsmi_dev_perf_level_set", {{kDevPerfLevelFName}, {}}}, {"rsmi_dev_perf_level_set_v1", {{kDevPerfLevelFName}, {}}}, @@ -794,6 +797,7 @@ int Device::readDevInfo(DevInfoTypes type, uint64_t *val) { case kDevUsage: case kDevOverDriveLevel: + case kDevMemOverDriveLevel: case kDevMemTotGTT: case kDevMemTotVisVRAM: case kDevMemTotVRAM: @@ -907,6 +911,7 @@ int Device::readDevInfo(DevInfoTypes type, std::string *val) { case kDevPerfLevel: case kDevUsage: case kDevOverDriveLevel: + case kDevMemOverDriveLevel: case kDevDevProdName: case kDevDevProdNum: case kDevDevID: