rocm_smi_lib: Fix rocm-smi --resetfans results in Permission Denied

For operations related to:
  --resetfans
  --setfan

We report 'Not supported' for these cases instead of 'Permission denied'

Code changes related to the following:
  * rocm_smi_properties
  * rocm_smi related APIs

Change-Id: I144646efc3804fabd45cc5a46351803950b4feb7
Signed-off-by: Oliveira, Daniel <daniel.oliveira@amd.com>


[ROCm/amdsmi commit: 12f395e592]
Этот коммит содержится в:
Oliveira, Daniel
2023-09-12 16:34:04 -05:00
коммит произвёл Daniel Oliveira
родитель 90929bce8e
Коммит c9f9221ebe
4 изменённых файлов: 43 добавлений и 16 удалений
+6 -2
Просмотреть файл
@@ -813,8 +813,10 @@ def resetFans(deviceList):
for device in deviceList:
sensor_ind = c_uint32(0)
ret = rocmsmi.rsmi_dev_fan_reset(device, sensor_ind)
if rsmi_ret_ok(ret, device, 'reset_fan'):
if rsmi_ret_ok(ret, device, silent=True):
printLog(device, 'Successfully reset fan speed to driver control', None)
else:
printLog(device, 'Not supported on the given system', None)
printLogSpacer()
@@ -1335,8 +1337,10 @@ def setFanSpeed(deviceList, fan):
else:
fanLevel = int(str(fan))
ret = rocmsmi.rsmi_dev_fan_speed_set(device, 0, int(fanLevel))
if rsmi_ret_ok(ret, device, 'set_fan_speed'):
if rsmi_ret_ok(ret, device, silent=True):
printLog(device, 'Successfully set fan speed to level %s' % (str(int(fanLevel))), None)
else:
printLog(device, 'Not supported on the given system', None)
printLogSpacer()
+5 -4
Просмотреть файл
@@ -407,6 +407,10 @@ static rsmi_status_t set_dev_mon_value(amd::smi::MonitorTypes type,
}
int ret = dev->monitor()->writeMonitor(type, sensor_ind,
std::to_string(val));
/// If the sysfs file doesn't exist, it is not supported.
if (ret == ENOENT) {
return rsmi_status_t::RSMI_STATUS_NOT_SUPPORTED;
}
return amd::smi::ErrnoToRsmiStatus(ret);
}
@@ -2631,9 +2635,8 @@ rsmi_dev_fan_reset(uint32_t dv_ind, uint32_t sensor_ind) {
LOG_TRACE(ss);
++sensor_ind; // fan sysfs files have 1-based indices
REQUIRE_ROOT_ACCESS
DEVICE_MUTEX
ret = set_dev_mon_value<uint64_t>(amd::smi::kMonFanCntrlEnable,
dv_ind, sensor_ind, 2);
return ret;
@@ -2669,14 +2672,12 @@ rsmi_dev_fan_speed_set(uint32_t dv_ind, uint32_t sensor_ind, uint64_t speed) {
// First need to set fan mode (pwm1_enable) to 1 (aka, "manual")
ret = set_dev_mon_value<uint64_t>(amd::smi::kMonFanCntrlEnable, dv_ind,
sensor_ind, 1);
if (ret != RSMI_STATUS_SUCCESS) {
return ret;
}
ret = set_dev_mon_value<uint64_t>(amd::smi::kMonFanSpeed, dv_ind,
sensor_ind, speed);
return ret;
CATCH
+25 -10
Просмотреть файл
@@ -166,6 +166,7 @@ const AMDGpuVerbList_t amdgpu_verb_check_list {
{ AMDGpuVerbTypes_t::kGetGpuOdVoltCurveRegions, "amdsmi_get_gpu_od_volt_curve_regions" }
};
const uint16_t kDevIDAll(0xFFFF);
const uint16_t kDevRevIDAll(0xFFFF);
const AMDGpuPropertyList_t amdgpu_property_reinforcement_list {
//
@@ -176,6 +177,14 @@ const AMDGpuPropertyList_t amdgpu_property_reinforcement_list {
// rsmi_dev_perf_level::RSMI_DEV_PERF_LEVEL_MANUAL = rsmi_dev_clk_range_set;
//
// AMD All Families
{kDevIDAll, {kDevRevIDAll,
make_unique_property_id(AMDGpuPropertyTypesOffset_t::kMonitorTypes,
MonitorTypes::kMonFanCntrlEnable),
AMDGpuVerbTypes_t::kResetGpuFan,
AMDGpuPropertyOpModeTypes_t::kBoth, false }
},
// AMD Instinct MI210
{0x740F, {0x02,
make_unique_property_id(AMDGpuPropertyTypesOffset_t::kDevInfoTypes,
@@ -239,12 +248,6 @@ const AMDGpuPropertyList_t amdgpu_property_reinforcement_list {
AMDGpuVerbTypes_t::kGetGpuPowerProfilePresets,
AMDGpuPropertyOpModeTypes_t::kBoth, false }
},
{0x74A1, {kDevRevIDAll,
make_unique_property_id(AMDGpuPropertyTypesOffset_t::kDevInfoTypes,
DevInfoTypes::kDevGpuReset),
AMDGpuVerbTypes_t::kResetGpu,
AMDGpuPropertyOpModeTypes_t::kSrIov, false }
},
{0x74A1, {kDevRevIDAll,
make_unique_property_id(AMDGpuPropertyTypesOffset_t::kPerfTypes,
rsmi_dev_perf_level::RSMI_DEV_PERF_LEVEL_DETERMINISM),
@@ -350,7 +353,7 @@ rsmi_status_t validate_property_reinforcement_query(uint32_t dv_ind, AMDGpuVerbT
// likely the reinforcement table does not contain any entries/rules for the
// dev_id in question.
//
auto amdgpu_property_query_result_hdlr = [](rsmi_status_t query_result) {
auto amdgpu_property_query_result_hdlr = [&](const rsmi_status_t query_result) {
switch (query_result) {
case (rsmi_status_t::RSMI_STATUS_UNKNOWN_ERROR):
case (rsmi_status_t::RSMI_STATUS_NO_DATA):
@@ -363,7 +366,7 @@ rsmi_status_t validate_property_reinforcement_query(uint32_t dv_ind, AMDGpuVerbT
break;
default:
return rsmi_status_t::RSMI_STATUS_NOT_FOUND;
return actual_error_code;
break;
}
};
@@ -415,7 +418,7 @@ rsmi_status_t Device::check_amdgpu_property_reinforcement_query(uint32_t dev_idx
std::ostringstream osstream;
auto rsmi_status(rsmi_status_t::RSMI_STATUS_UNKNOWN_ERROR);
AMDGpuPropertyQuery_t amdgpu_property_query = [&]() {
auto amdgpu_property_query = [&]() {
AMDGpuPropertyQuery_t amdgpu_property_query_init{};
amdgpu_property_query_init.m_asic_id = 0;
amdgpu_property_query_init.m_pci_rev_id = 0;
@@ -445,6 +448,18 @@ rsmi_status_t Device::check_amdgpu_property_reinforcement_query(uint32_t dev_idx
LOG_TRACE(osstream);
bool is_proper_query(false);
// Generic filter for checking properties for all asics and revisions.
auto amdgpu_property_query_all_asics = amdgpu_property_query;
amdgpu_property_query_all_asics.m_asic_id = kDevIDAll;
amdgpu_property_query_all_asics.m_pci_rev_id = kDevRevIDAll;
auto amdgpu_property_query_result = run_amdgpu_property_reinforcement_query(amdgpu_property_query_all_asics);
// We found a generic entry for all asics and revisions
if (amdgpu_property_query_result != rsmi_status_t::RSMI_STATUS_UNKNOWN_ERROR) {
return amdgpu_property_query_result;
}
// If no generic entry, then we query for specific asic and revision ids.
amdgpu_property_query = build_asic_id_filters(amdgpu_property_query, is_proper_query);
if (!is_proper_query) {
rsmi_status = rsmi_status_t::RSMI_STATUS_NO_DATA;
@@ -487,7 +502,7 @@ rsmi_status_t Device::run_amdgpu_property_reinforcement_query(const AMDGpuProper
osstream << __PRETTY_FUNCTION__ << " asic id found: " << itr_begin->first << "\n";
// Pci_rev_id matches the filter or ALL Revisions
if ((itr_begin->second.m_pci_rev_id == amdgpu_property_query.m_pci_rev_id) ||
(itr_begin->second.m_pci_rev_id == kDevRevIDAll)) {
(itr_begin->second.m_pci_rev_id == kDevRevIDAll)) {
osstream << __PRETTY_FUNCTION__ << " asic rev.id found: " << itr_begin->second.m_pci_rev_id << "\n";
// Do we have the property we are looking for?
if (((amdgpu_property_query.m_property != 0) &&
+7
Просмотреть файл
@@ -176,6 +176,13 @@ int isRegularFile(std::string fname, bool *is_reg) {
}
int WriteSysfsStr(std::string path, std::string val) {
// On success, zero is returned. On error, -1 is returned, and
// errno is set to indicate the error.
auto is_regular_file_result = isRegularFile(path, nullptr);
if (is_regular_file_result != 0) {
return ENOENT;
}
std::ofstream fs;
int ret = 0;
std::ostringstream ss;