rocm_smi_lib: Fix rocm-smi --resetfans results in Permission Denied
For operations related to:
--resetfans
--setfan
We report 'Not supported' for these cases instead of 'Permission denied'
Code changes related to the following:
* rocm_smi_properties
* rocm_smi related APIs
Change-Id: I144646efc3804fabd45cc5a46351803950b4feb7
Signed-off-by: Oliveira, Daniel <daniel.oliveira@amd.com>
[ROCm/amdsmi commit: 12f395e592]
Этот коммит содержится в:
коммит произвёл
Daniel Oliveira
родитель
90929bce8e
Коммит
c9f9221ebe
@@ -813,8 +813,10 @@ def resetFans(deviceList):
|
||||
for device in deviceList:
|
||||
sensor_ind = c_uint32(0)
|
||||
ret = rocmsmi.rsmi_dev_fan_reset(device, sensor_ind)
|
||||
if rsmi_ret_ok(ret, device, 'reset_fan'):
|
||||
if rsmi_ret_ok(ret, device, silent=True):
|
||||
printLog(device, 'Successfully reset fan speed to driver control', None)
|
||||
else:
|
||||
printLog(device, 'Not supported on the given system', None)
|
||||
printLogSpacer()
|
||||
|
||||
|
||||
@@ -1335,8 +1337,10 @@ def setFanSpeed(deviceList, fan):
|
||||
else:
|
||||
fanLevel = int(str(fan))
|
||||
ret = rocmsmi.rsmi_dev_fan_speed_set(device, 0, int(fanLevel))
|
||||
if rsmi_ret_ok(ret, device, 'set_fan_speed'):
|
||||
if rsmi_ret_ok(ret, device, silent=True):
|
||||
printLog(device, 'Successfully set fan speed to level %s' % (str(int(fanLevel))), None)
|
||||
else:
|
||||
printLog(device, 'Not supported on the given system', None)
|
||||
printLogSpacer()
|
||||
|
||||
|
||||
|
||||
@@ -407,6 +407,10 @@ static rsmi_status_t set_dev_mon_value(amd::smi::MonitorTypes type,
|
||||
}
|
||||
int ret = dev->monitor()->writeMonitor(type, sensor_ind,
|
||||
std::to_string(val));
|
||||
/// If the sysfs file doesn't exist, it is not supported.
|
||||
if (ret == ENOENT) {
|
||||
return rsmi_status_t::RSMI_STATUS_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
return amd::smi::ErrnoToRsmiStatus(ret);
|
||||
}
|
||||
@@ -2631,9 +2635,8 @@ rsmi_dev_fan_reset(uint32_t dv_ind, uint32_t sensor_ind) {
|
||||
LOG_TRACE(ss);
|
||||
|
||||
++sensor_ind; // fan sysfs files have 1-based indices
|
||||
|
||||
REQUIRE_ROOT_ACCESS
|
||||
DEVICE_MUTEX
|
||||
|
||||
ret = set_dev_mon_value<uint64_t>(amd::smi::kMonFanCntrlEnable,
|
||||
dv_ind, sensor_ind, 2);
|
||||
return ret;
|
||||
@@ -2669,14 +2672,12 @@ rsmi_dev_fan_speed_set(uint32_t dv_ind, uint32_t sensor_ind, uint64_t speed) {
|
||||
// First need to set fan mode (pwm1_enable) to 1 (aka, "manual")
|
||||
ret = set_dev_mon_value<uint64_t>(amd::smi::kMonFanCntrlEnable, dv_ind,
|
||||
sensor_ind, 1);
|
||||
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = set_dev_mon_value<uint64_t>(amd::smi::kMonFanSpeed, dv_ind,
|
||||
sensor_ind, speed);
|
||||
|
||||
return ret;
|
||||
|
||||
CATCH
|
||||
|
||||
@@ -166,6 +166,7 @@ const AMDGpuVerbList_t amdgpu_verb_check_list {
|
||||
{ AMDGpuVerbTypes_t::kGetGpuOdVoltCurveRegions, "amdsmi_get_gpu_od_volt_curve_regions" }
|
||||
};
|
||||
|
||||
const uint16_t kDevIDAll(0xFFFF);
|
||||
const uint16_t kDevRevIDAll(0xFFFF);
|
||||
const AMDGpuPropertyList_t amdgpu_property_reinforcement_list {
|
||||
//
|
||||
@@ -176,6 +177,14 @@ const AMDGpuPropertyList_t amdgpu_property_reinforcement_list {
|
||||
// rsmi_dev_perf_level::RSMI_DEV_PERF_LEVEL_MANUAL = rsmi_dev_clk_range_set;
|
||||
//
|
||||
|
||||
// AMD All Families
|
||||
{kDevIDAll, {kDevRevIDAll,
|
||||
make_unique_property_id(AMDGpuPropertyTypesOffset_t::kMonitorTypes,
|
||||
MonitorTypes::kMonFanCntrlEnable),
|
||||
AMDGpuVerbTypes_t::kResetGpuFan,
|
||||
AMDGpuPropertyOpModeTypes_t::kBoth, false }
|
||||
},
|
||||
|
||||
// AMD Instinct MI210
|
||||
{0x740F, {0x02,
|
||||
make_unique_property_id(AMDGpuPropertyTypesOffset_t::kDevInfoTypes,
|
||||
@@ -239,12 +248,6 @@ const AMDGpuPropertyList_t amdgpu_property_reinforcement_list {
|
||||
AMDGpuVerbTypes_t::kGetGpuPowerProfilePresets,
|
||||
AMDGpuPropertyOpModeTypes_t::kBoth, false }
|
||||
},
|
||||
{0x74A1, {kDevRevIDAll,
|
||||
make_unique_property_id(AMDGpuPropertyTypesOffset_t::kDevInfoTypes,
|
||||
DevInfoTypes::kDevGpuReset),
|
||||
AMDGpuVerbTypes_t::kResetGpu,
|
||||
AMDGpuPropertyOpModeTypes_t::kSrIov, false }
|
||||
},
|
||||
{0x74A1, {kDevRevIDAll,
|
||||
make_unique_property_id(AMDGpuPropertyTypesOffset_t::kPerfTypes,
|
||||
rsmi_dev_perf_level::RSMI_DEV_PERF_LEVEL_DETERMINISM),
|
||||
@@ -350,7 +353,7 @@ rsmi_status_t validate_property_reinforcement_query(uint32_t dv_ind, AMDGpuVerbT
|
||||
// likely the reinforcement table does not contain any entries/rules for the
|
||||
// dev_id in question.
|
||||
//
|
||||
auto amdgpu_property_query_result_hdlr = [](rsmi_status_t query_result) {
|
||||
auto amdgpu_property_query_result_hdlr = [&](const rsmi_status_t query_result) {
|
||||
switch (query_result) {
|
||||
case (rsmi_status_t::RSMI_STATUS_UNKNOWN_ERROR):
|
||||
case (rsmi_status_t::RSMI_STATUS_NO_DATA):
|
||||
@@ -363,7 +366,7 @@ rsmi_status_t validate_property_reinforcement_query(uint32_t dv_ind, AMDGpuVerbT
|
||||
break;
|
||||
|
||||
default:
|
||||
return rsmi_status_t::RSMI_STATUS_NOT_FOUND;
|
||||
return actual_error_code;
|
||||
break;
|
||||
}
|
||||
};
|
||||
@@ -415,7 +418,7 @@ rsmi_status_t Device::check_amdgpu_property_reinforcement_query(uint32_t dev_idx
|
||||
std::ostringstream osstream;
|
||||
auto rsmi_status(rsmi_status_t::RSMI_STATUS_UNKNOWN_ERROR);
|
||||
|
||||
AMDGpuPropertyQuery_t amdgpu_property_query = [&]() {
|
||||
auto amdgpu_property_query = [&]() {
|
||||
AMDGpuPropertyQuery_t amdgpu_property_query_init{};
|
||||
amdgpu_property_query_init.m_asic_id = 0;
|
||||
amdgpu_property_query_init.m_pci_rev_id = 0;
|
||||
@@ -445,6 +448,18 @@ rsmi_status_t Device::check_amdgpu_property_reinforcement_query(uint32_t dev_idx
|
||||
LOG_TRACE(osstream);
|
||||
|
||||
bool is_proper_query(false);
|
||||
|
||||
// Generic filter for checking properties for all asics and revisions.
|
||||
auto amdgpu_property_query_all_asics = amdgpu_property_query;
|
||||
amdgpu_property_query_all_asics.m_asic_id = kDevIDAll;
|
||||
amdgpu_property_query_all_asics.m_pci_rev_id = kDevRevIDAll;
|
||||
auto amdgpu_property_query_result = run_amdgpu_property_reinforcement_query(amdgpu_property_query_all_asics);
|
||||
// We found a generic entry for all asics and revisions
|
||||
if (amdgpu_property_query_result != rsmi_status_t::RSMI_STATUS_UNKNOWN_ERROR) {
|
||||
return amdgpu_property_query_result;
|
||||
}
|
||||
|
||||
// If no generic entry, then we query for specific asic and revision ids.
|
||||
amdgpu_property_query = build_asic_id_filters(amdgpu_property_query, is_proper_query);
|
||||
if (!is_proper_query) {
|
||||
rsmi_status = rsmi_status_t::RSMI_STATUS_NO_DATA;
|
||||
@@ -487,7 +502,7 @@ rsmi_status_t Device::run_amdgpu_property_reinforcement_query(const AMDGpuProper
|
||||
osstream << __PRETTY_FUNCTION__ << " asic id found: " << itr_begin->first << "\n";
|
||||
// Pci_rev_id matches the filter or ALL Revisions
|
||||
if ((itr_begin->second.m_pci_rev_id == amdgpu_property_query.m_pci_rev_id) ||
|
||||
(itr_begin->second.m_pci_rev_id == kDevRevIDAll)) {
|
||||
(itr_begin->second.m_pci_rev_id == kDevRevIDAll)) {
|
||||
osstream << __PRETTY_FUNCTION__ << " asic rev.id found: " << itr_begin->second.m_pci_rev_id << "\n";
|
||||
// Do we have the property we are looking for?
|
||||
if (((amdgpu_property_query.m_property != 0) &&
|
||||
|
||||
@@ -176,6 +176,13 @@ int isRegularFile(std::string fname, bool *is_reg) {
|
||||
}
|
||||
|
||||
int WriteSysfsStr(std::string path, std::string val) {
|
||||
// On success, zero is returned. On error, -1 is returned, and
|
||||
// errno is set to indicate the error.
|
||||
auto is_regular_file_result = isRegularFile(path, nullptr);
|
||||
if (is_regular_file_result != 0) {
|
||||
return ENOENT;
|
||||
}
|
||||
|
||||
std::ofstream fs;
|
||||
int ret = 0;
|
||||
std::ostringstream ss;
|
||||
|
||||
Ссылка в новой задаче
Block a user