Add errors for existing but empty dev files
Change-Id: Iad9febc50f9b8e6085f8b605249ee884d2f134d6
Signed-off-by: Galantsev, Dmitrii <dmitrii.galantsev@amd.com>
[ROCm/rocm_smi_lib commit: a4b470fe71]
此提交包含在:
@@ -1791,15 +1791,19 @@ def showClocks(deviceList):
|
||||
for clk_type in sorted(rsmi_clk_names_dict):
|
||||
if rocmsmi.rsmi_dev_gpu_clk_freq_get(device, rsmi_clk_names_dict[clk_type], None) == 1:
|
||||
ret = rocmsmi.rsmi_dev_gpu_clk_freq_get(device, rsmi_clk_names_dict[clk_type], byref(freq))
|
||||
if rsmi_ret_ok(ret, device, 'get_clk_freq_' + clk_type, True):
|
||||
printLog(device, 'Supported %s frequencies on GPU%s' % (clk_type, str(device)), None)
|
||||
for x in range(freq.num_supported):
|
||||
fr = '{:>.0f}Mhz'.format(freq.frequency[x] / 1000000)
|
||||
if x == freq.current:
|
||||
printLog(device, str(x), str(fr) + ' *')
|
||||
else:
|
||||
printLog(device, str(x), str(fr))
|
||||
printLog(device, '', None)
|
||||
if ret == rsmi_status_t.RSMI_STATUS_UNEXPECTED_DATA:
|
||||
printLog(device, 'Clock [%s] on device [%s] exists but EMPTY! Likely driver error!' % (clk_type, str(device)))
|
||||
continue
|
||||
if not rsmi_ret_ok(ret, device, 'get_clk_freq_' + clk_type, True):
|
||||
continue
|
||||
printLog(device, 'Supported %s frequencies on GPU%s' % (clk_type, str(device)), None)
|
||||
for x in range(freq.num_supported):
|
||||
fr = '{:>.0f}Mhz'.format(freq.frequency[x] / 1000000)
|
||||
if x == freq.current:
|
||||
printLog(device, str(x), str(fr) + ' *')
|
||||
else:
|
||||
printLog(device, str(x), str(fr))
|
||||
printLog(device, '', None)
|
||||
else:
|
||||
logging.debug('{} frequency is unsupported on device[{}]'.format(clk_type, device))
|
||||
printLog(device, '', None)
|
||||
|
||||
@@ -3740,6 +3740,10 @@ rsmi_dev_memory_reserved_pages_get(uint32_t dv_ind, uint32_t *num_pages,
|
||||
|
||||
ret = GetDevValueVec(amd::smi::kDevMemPageBad, dv_ind, &val_vec);
|
||||
|
||||
// file is empty, which is valid for no errors
|
||||
if (ret == RSMI_STATUS_UNEXPECTED_DATA) {
|
||||
ret = RSMI_STATUS_SUCCESS;
|
||||
}
|
||||
if (ret == RSMI_STATUS_FILE_ERROR) {
|
||||
return RSMI_STATUS_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
@@ -858,8 +858,8 @@ int Device::readDevInfoMultiLineStr(DevInfoTypes type,
|
||||
ss << "Read devInfoMultiLineStr for DevInfoType ("
|
||||
<< RocmSMI::devInfoTypesStrings.at(type) << ")"
|
||||
<< ", but contained no string lines";
|
||||
LOG_INFO(ss);
|
||||
return 0;
|
||||
LOG_ERROR(ss);
|
||||
return ENXIO;
|
||||
}
|
||||
// Remove any *trailing* empty (whitespace) lines
|
||||
while (!retVec->empty() &&
|
||||
@@ -882,6 +882,7 @@ int Device::readDevInfoMultiLineStr(DevInfoTypes type,
|
||||
<< RocmSMI::devInfoTypesStrings.at(type) << ")"
|
||||
<< ", but lines were empty";
|
||||
LOG_INFO(ss);
|
||||
return ENXIO;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -123,16 +123,22 @@ void TestFrequenciesRead::Run(void) {
|
||||
// Verify api support checking functionality is working
|
||||
err = rsmi_dev_gpu_clk_freq_get(i, t, nullptr);
|
||||
ASSERT_EQ(err, RSMI_STATUS_NOT_SUPPORTED);
|
||||
} else {
|
||||
CHK_ERR_ASRT(err)
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**Supported " << name << " clock frequencies: ";
|
||||
std::cout << f.num_supported << std::endl;
|
||||
print_frequencies(&f);
|
||||
// Verify api support checking functionality is working
|
||||
err = rsmi_dev_gpu_clk_freq_get(i, t, nullptr);
|
||||
ASSERT_EQ(err, RSMI_STATUS_INVALID_ARGS);
|
||||
}
|
||||
}
|
||||
|
||||
// special driver issue, shouldn't normally occur
|
||||
if (err == RSMI_STATUS_UNEXPECTED_DATA) {
|
||||
std::cerr << "WARN: Clock file [" << FreqEnumToStr(t) << "] exists on device [" << i << "] but empty!" << std::endl;
|
||||
std::cerr << " Likely a driver issue!" << std::endl;
|
||||
}
|
||||
|
||||
CHK_ERR_ASRT(err)
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**Supported " << name << " clock frequencies: ";
|
||||
std::cout << f.num_supported << std::endl;
|
||||
print_frequencies(&f);
|
||||
// Verify api support checking functionality is working
|
||||
err = rsmi_dev_gpu_clk_freq_get(i, t, nullptr);
|
||||
ASSERT_EQ(err, RSMI_STATUS_INVALID_ARGS);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -114,14 +114,20 @@ void TestFrequenciesReadWrite::Run(void) {
|
||||
std::cout << "\t**Set " << FreqEnumToStr(rsmi_clk) <<
|
||||
": Not supported on this machine" << std::endl;
|
||||
return false;
|
||||
} else {
|
||||
// CHK_ERR_ASRT(ret)
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "Initial frequency for clock " <<
|
||||
FreqEnumToStr(rsmi_clk) << " is " << f.current << std::endl;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// special driver issue, shouldn't normally occur
|
||||
if (ret == RSMI_STATUS_UNEXPECTED_DATA) {
|
||||
std::cerr << "WARN: Clock file [" << FreqEnumToStr(rsmi_clk) << "] exists on device [" << dv_ind << "] but empty!" << std::endl;
|
||||
std::cerr << " Likely a driver issue!" << std::endl;
|
||||
}
|
||||
|
||||
// CHK_ERR_ASRT(ret)
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "Initial frequency for clock " <<
|
||||
FreqEnumToStr(rsmi_clk) << " is " << f.current << std::endl;
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
auto freq_write = [&]() {
|
||||
@@ -177,44 +183,6 @@ void TestFrequenciesReadWrite::Run(void) {
|
||||
}
|
||||
freq_write();
|
||||
CHK_ERR_ASRT(ret)
|
||||
#if 0
|
||||
ret = rsmi_dev_gpu_clk_freq_get(dv_ind, rsmi_clk, &f);
|
||||
CHK_ERR_ASRT(ret)
|
||||
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "Initial frequency for clock " << rsmi_clk << " is " <<
|
||||
f.current << std::endl;
|
||||
}
|
||||
// Set clocks to something other than the usual default of the lowest
|
||||
// frequency.
|
||||
freq_bitmask = 0b01100; // Try the 3rd and 4th clocks
|
||||
|
||||
std::string freq_bm_str =
|
||||
std::bitset<RSMI_MAX_NUM_FREQUENCIES>(freq_bitmask).to_string();
|
||||
|
||||
freq_bm_str.erase(0, std::min(freq_bm_str.find_first_not_of('0'),
|
||||
freq_bm_str.size()-1));
|
||||
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "Setting frequency mask for clock " << rsmi_clk <<
|
||||
" to 0b" << freq_bm_str << " ..." << std::endl;
|
||||
}
|
||||
ret = rsmi_dev_gpu_clk_freq_set(dv_ind, rsmi_clk, freq_bitmask);
|
||||
CHK_ERR_ASRT(ret)
|
||||
|
||||
ret = rsmi_dev_gpu_clk_freq_get(dv_ind, rsmi_clk, &f);
|
||||
CHK_ERR_ASRT(ret)
|
||||
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "Frequency is now index " << f.current << std::endl;
|
||||
std::cout << "Resetting mask to all frequencies." << std::endl;
|
||||
}
|
||||
ret = rsmi_dev_gpu_clk_freq_set(dv_ind, rsmi_clk, 0xFFFFFFFF);
|
||||
CHK_ERR_ASRT(ret)
|
||||
|
||||
ret = rsmi_dev_perf_level_set(dv_ind, RSMI_DEV_PERF_LEVEL_AUTO);
|
||||
CHK_ERR_ASRT(ret)
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
新增問題並參考
封鎖使用者