[SWDEV-335697] Add RSMI_STATUS_SETTING_UNAVAILABLE for dynamic partition
Updates:
* Added RSMI_STATUS_SETTING_UNAVAILABLE for
rsmi_dev_compute_partition_set - gives users
better error output when attempting to set
compute partition to values not listed in
available_compute_partition SYSFS
* Updated python --setcomputepartition to
provide better output when receiving
RSMI_STATUS_SETTING_UNAVAILABLE
* Updated all test & example files to check for
RSMI_STATUS_SETTING_UNAVAILABLE when doing
rsmi_dev_compute_partition_set
Change-Id: Ida5d54880d9b9b6e4a0468cdb962fdc0c18d6257
Signed-off-by: Charis Poag <Charis.Poag@amd.com>
Этот коммит содержится в:
@@ -126,6 +126,8 @@ typedef enum {
|
||||
//!< being used
|
||||
RSMI_STATUS_REFCOUNT_OVERFLOW, //!< An internal reference counter
|
||||
//!< exceeded INT32_MAX
|
||||
RSMI_STATUS_SETTING_UNAVAILABLE, //!< Requested setting is unavailable
|
||||
//!< for the current device
|
||||
RSMI_STATUS_AMDGPU_RESTART_ERR, //!< Could not successfully restart
|
||||
//!< the amdgpu driver
|
||||
|
||||
@@ -3572,6 +3574,8 @@ rsmi_dev_compute_partition_get(uint32_t dv_ind, char *compute_partition,
|
||||
* @retval ::RSMI_STATUS_SUCCESS call was successful
|
||||
* @retval ::RSMI_STATUS_PERMISSION function requires root access
|
||||
* @retval ::RSMI_STATUS_INVALID_ARGS the provided arguments are not valid
|
||||
* @retval ::RSMI_STATUS_SETTING_UNAVAILABLE the provided setting is
|
||||
* unavailable for current device
|
||||
* @retval ::RSMI_STATUS_NOT_SUPPORTED installed software or hardware does not
|
||||
* support this function
|
||||
*
|
||||
|
||||
@@ -162,6 +162,7 @@ enum DevInfoTypes {
|
||||
kDevNumaNode,
|
||||
kDevGpuMetrics,
|
||||
kDevGpuReset,
|
||||
kDevAvailableComputePartition,
|
||||
kDevComputePartition,
|
||||
kDevMemoryPartition
|
||||
};
|
||||
|
||||
@@ -1421,6 +1421,9 @@ def setComputePartition(deviceList, computePartitionType):
|
||||
None)
|
||||
elif ret == rsmi_status_t.RSMI_STATUS_PERMISSION:
|
||||
printLog(device, 'Permission denied', None)
|
||||
elif ret == rsmi_status_t.RSMI_STATUS_SETTING_UNAVAILABLE:
|
||||
printLog(device, 'Requested setting (%s) is unavailable for current device'
|
||||
%computePartitionType, None)
|
||||
elif ret == rsmi_status_t.RSMI_STATUS_NOT_SUPPORTED:
|
||||
printLog(device, 'Not supported on the given system', None)
|
||||
else:
|
||||
|
||||
@@ -69,7 +69,8 @@ class rsmi_status_t(c_int):
|
||||
RSMI_STATUS_UNEXPECTED_DATA = 0xF
|
||||
RSMI_STATUS_BUSY = 0x10
|
||||
RSMI_STATUS_REFCOUNT_OVERFLOW = 0x11
|
||||
RSMI_STATUS_AMDGPU_RESTART_ERR = 0x12
|
||||
RSMI_STATUS_SETTING_UNAVAILABLE = 0x12
|
||||
RSMI_STATUS_AMDGPU_RESTART_ERR = 0x13
|
||||
RSMI_STATUS_UNKNOWN_ERROR = 0xFFFFFFFF
|
||||
|
||||
|
||||
@@ -93,6 +94,7 @@ rsmi_status_verbose_err_out = {
|
||||
rsmi_status_t.RSMI_STATUS_UNEXPECTED_DATA: 'Unexpected data received',
|
||||
rsmi_status_t.RSMI_STATUS_BUSY: 'Busy - resources are preventing call the ability to execute',
|
||||
rsmi_status_t.RSMI_STATUS_REFCOUNT_OVERFLOW: 'Data overflow - data exceeded INT32_MAX',
|
||||
rsmi_status_t.RSMI_STATUS_SETTING_UNAVAILABLE: 'Requested setting is unavailable for current device',
|
||||
rsmi_status_t.RSMI_STATUS_AMDGPU_RESTART_ERR: 'Could not successfully restart the amdgpu driver',
|
||||
rsmi_status_t.RSMI_STATUS_UNKNOWN_ERROR: 'Unknown error occured'
|
||||
}
|
||||
|
||||
Двоичные данные
Двоичный файл не отображается.
@@ -114,6 +114,18 @@
|
||||
} \
|
||||
}
|
||||
|
||||
#define CHK_RSMI_NOT_SUPPORTED_OR_SETTING_UNAVAILABLE_RET(RET) {\
|
||||
if ((RET) == RSMI_STATUS_NOT_SUPPORTED) { \
|
||||
std::cout << "This function is not supported in the current environment."\
|
||||
<< std::endl; \
|
||||
} else if ((RET) == RSMI_STATUS_SETTING_UNAVAILABLE) { \
|
||||
std::cout << "[WARN] RSMI_STATUS_SETTING_UNAVAILABLE retrieved." \
|
||||
<< std::endl; \
|
||||
} else { \
|
||||
CHK_RSMI_RET(RET) \
|
||||
} \
|
||||
}
|
||||
|
||||
#define CHK_NOT_SUPPORTED_OR_UNEXPECTED_DATA_OR_INSUFFICIENT_SIZE_RET(RET) { \
|
||||
if ((RET) == RSMI_STATUS_NOT_SUPPORTED) { \
|
||||
std::cout << "This function is not supported in the current environment." \
|
||||
@@ -570,10 +582,9 @@ static rsmi_status_t test_set_compute_partitioning(uint32_t dv_ind) {
|
||||
<< compute_partition_string(newPartition) << "..."
|
||||
<< std::endl;
|
||||
ret = rsmi_dev_compute_partition_set(dv_ind, newPartition);
|
||||
CHK_RSMI_NOT_SUPPORTED_RET(ret)
|
||||
CHK_RSMI_NOT_SUPPORTED_OR_SETTING_UNAVAILABLE_RET(ret)
|
||||
std::cout << "Done setting compute partition to "
|
||||
<< compute_partition_string(newPartition)
|
||||
<< "." << std::endl;
|
||||
<< compute_partition_string(newPartition) << "." << std::endl;
|
||||
std::cout << std::endl << std::endl;
|
||||
}
|
||||
|
||||
@@ -589,7 +600,7 @@ static rsmi_status_t test_set_compute_partitioning(uint32_t dv_ind) {
|
||||
rsmi_compute_partition_type origComputePartitionType
|
||||
= mapStringToRSMIComputePartitionTypes[originalComputePartition];
|
||||
ret = rsmi_dev_compute_partition_set(dv_ind, origComputePartitionType);
|
||||
CHK_RSMI_NOT_SUPPORTED_RET(ret)
|
||||
CHK_RSMI_NOT_SUPPORTED_OR_SETTING_UNAVAILABLE_RET(ret)
|
||||
std::cout << "Done" << std::endl;
|
||||
}
|
||||
return RSMI_STATUS_SUCCESS;
|
||||
|
||||
+38
-6
@@ -2865,6 +2865,11 @@ rsmi_status_string(rsmi_status_t status, const char **status_string) {
|
||||
"counter exceeded INT32_MAX";
|
||||
break;
|
||||
|
||||
case RSMI_STATUS_SETTING_UNAVAILABLE:
|
||||
*status_string = "RSMI_STATUS_SETTING_UNAVAILABLE: Requested setting is "
|
||||
"unavailable for the current device";
|
||||
break;
|
||||
|
||||
case RSMI_STATUS_AMDGPU_RESTART_ERR:
|
||||
*status_string = "RSMI_STATUS_AMDGPU_RESTART_ERR: Could not successfully "
|
||||
"restart the amdgpu driver";
|
||||
@@ -3751,17 +3756,16 @@ static rsmi_status_t
|
||||
get_compute_partition(uint32_t dv_ind, std::string &compute_partition) {
|
||||
TRY
|
||||
CHK_SUPPORT_NAME_ONLY(compute_partition.c_str())
|
||||
std::string val_str;
|
||||
std::string compute_partition_str;
|
||||
|
||||
DEVICE_MUTEX
|
||||
rsmi_status_t ret = get_dev_value_str(amd::smi::kDevComputePartition,
|
||||
dv_ind, &val_str);
|
||||
|
||||
dv_ind, &compute_partition_str);
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
switch (mapStringToRSMIComputePartitionTypes[val_str]) {
|
||||
switch (mapStringToRSMIComputePartitionTypes[compute_partition_str]) {
|
||||
case RSMI_COMPUTE_PARTITION_INVALID:
|
||||
// Retrieved an unknown compute partition
|
||||
return RSMI_STATUS_UNEXPECTED_DATA;
|
||||
@@ -3779,7 +3783,7 @@ get_compute_partition(uint32_t dv_ind, std::string &compute_partition) {
|
||||
// Retrieved an unknown compute partition
|
||||
return RSMI_STATUS_UNEXPECTED_DATA;
|
||||
}
|
||||
compute_partition = val_str;
|
||||
compute_partition = compute_partition_str;
|
||||
return RSMI_STATUS_SUCCESS;
|
||||
CATCH
|
||||
}
|
||||
@@ -3809,13 +3813,33 @@ rsmi_dev_compute_partition_get(uint32_t dv_ind, char *compute_partition,
|
||||
CATCH
|
||||
}
|
||||
|
||||
static rsmi_status_t
|
||||
is_available_compute_partition(uint32_t dv_ind,
|
||||
std::string new_compute_partition) {
|
||||
TRY
|
||||
DEVICE_MUTEX
|
||||
std::string availableComputePartitions;
|
||||
rsmi_status_t ret =
|
||||
get_dev_value_line(amd::smi::kDevAvailableComputePartition,
|
||||
dv_ind, &availableComputePartitions);
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool isComputePartitionAvailable =
|
||||
amd::smi::containsString(availableComputePartitions,
|
||||
new_compute_partition);
|
||||
return (isComputePartitionAvailable) ? RSMI_STATUS_SUCCESS :
|
||||
RSMI_STATUS_SETTING_UNAVAILABLE;
|
||||
CATCH
|
||||
}
|
||||
|
||||
rsmi_status_t
|
||||
rsmi_dev_compute_partition_set(uint32_t dv_ind,
|
||||
rsmi_compute_partition_type_t compute_partition) {
|
||||
TRY
|
||||
REQUIRE_ROOT_ACCESS
|
||||
DEVICE_MUTEX
|
||||
|
||||
std::string newComputePartitionStr
|
||||
= mapRSMIToStringComputePartitionTypes[compute_partition];
|
||||
std::string currentComputePartition;
|
||||
@@ -3838,6 +3862,14 @@ rsmi_dev_compute_partition_set(uint32_t dv_ind,
|
||||
return RSMI_STATUS_INVALID_ARGS;
|
||||
}
|
||||
|
||||
// Confirm what we are trying to set is available, otherwise provide
|
||||
// RSMI_STATUS_SETTING_UNAVAILABLE
|
||||
rsmi_status_t available_ret =
|
||||
is_available_compute_partition(dv_ind, newComputePartitionStr);
|
||||
if (available_ret != RSMI_STATUS_SUCCESS) {
|
||||
return available_ret;
|
||||
}
|
||||
|
||||
// do nothing if compute_partition is the current compute partition
|
||||
rsmi_status_t ret_get = get_compute_partition(dv_ind, currentComputePartition);
|
||||
// we can try to set, even if we get unexpected data
|
||||
|
||||
@@ -122,6 +122,8 @@ static const char *kDevXGMIErrorFName = "xgmi_error";
|
||||
static const char *kDevSerialNumberFName = "serial_number";
|
||||
static const char *kDevNumaNodeFName = "numa_node";
|
||||
static const char *kDevGpuMetricsFName = "gpu_metrics";
|
||||
static const char *kDevAvailableComputePartitionFName =
|
||||
"available_compute_partition";
|
||||
static const char *kDevComputePartitionFName = "current_compute_partition";
|
||||
static const char *kDevMemoryPartitionFName = "current_memory_partition";
|
||||
|
||||
@@ -293,6 +295,7 @@ static const std::map<DevInfoTypes, const char *> kDevAttribNameMap = {
|
||||
{kDevNumaNode, kDevNumaNodeFName},
|
||||
{kDevGpuMetrics, kDevGpuMetricsFName},
|
||||
{kDevGpuReset, kDevGpuResetFName},
|
||||
{kDevAvailableComputePartition, kDevAvailableComputePartitionFName},
|
||||
{kDevComputePartition, kDevComputePartitionFName},
|
||||
{kDevMemoryPartition, kDevMemoryPartitionFName},
|
||||
};
|
||||
@@ -930,6 +933,7 @@ int Device::readDevInfo(DevInfoTypes type, std::string *val) {
|
||||
case kDevVBiosVer:
|
||||
case kDevPCIEThruPut:
|
||||
case kDevSerialNumber:
|
||||
case kDevAvailableComputePartition:
|
||||
case kDevComputePartition:
|
||||
case kDevMemoryPartition:
|
||||
return readDevInfoStr(type, val);
|
||||
|
||||
@@ -75,7 +75,8 @@ static const char *kDeviceNamePrefix = "card";
|
||||
static const char *kAMDMonitorTypes[] = {"radeon", "amdgpu", ""};
|
||||
|
||||
static const std::string amdSMI = "amd::smi::";
|
||||
const std::map<amd::smi::DevInfoTypes, std::string> amd::smi::RocmSMI::devInfoTypesStrings = {
|
||||
const std::map<amd::smi::DevInfoTypes, std::string>
|
||||
amd::smi::RocmSMI::devInfoTypesStrings = {
|
||||
{amd::smi::kDevPerfLevel, amdSMI + "kDevPerfLevel"},
|
||||
{amd::smi::kDevOverDriveLevel, amdSMI + "kDevOverDriveLevel"},
|
||||
{amd::smi::kDevMemOverDriveLevel, amdSMI + "kDevMemOverDriveLevel"},
|
||||
@@ -142,6 +143,8 @@ const std::map<amd::smi::DevInfoTypes, std::string> amd::smi::RocmSMI::devInfoTy
|
||||
{amd::smi::kDevNumaNode, amdSMI + "kDevNumaNode"},
|
||||
{amd::smi::kDevGpuMetrics, amdSMI + "kDevGpuMetrics"},
|
||||
{amd::smi::kDevGpuReset, amdSMI + "kDevGpuReset"},
|
||||
{amd::smi::kDevAvailableComputePartition, amdSMI +
|
||||
"kDevAvailableComputePartition"},
|
||||
{amd::smi::kDevComputePartition, amdSMI + "kDevComputePartition"},
|
||||
{amd::smi::kDevMemoryPartition, amdSMI + "kDevMemoryPartition"}
|
||||
};
|
||||
|
||||
@@ -390,7 +390,7 @@ std::string readTemporaryFile(std::string path) {
|
||||
return fileContent;
|
||||
}
|
||||
|
||||
// Used to debug application temporary files (idenified by kTmpFilePrefix)
|
||||
// Used to debug application temporary files (identified by kTmpFilePrefix)
|
||||
// and their content
|
||||
void displayAppTmpFilesContent() {
|
||||
std::vector<std::string> tmpFiles = getListOfAppTmpFiles();
|
||||
|
||||
@@ -174,8 +174,8 @@ void TestComputePartitionReadWrite::Run(void) {
|
||||
IF_VERB(STANDARD) {
|
||||
if (err == RSMI_STATUS_INSUFFICIENT_SIZE) {
|
||||
std::cout << "\t**"
|
||||
<< "Confirmed RSMI_STATUS_INSUFFICIENT_SIZE was returned "
|
||||
<< "and size matches length requested." << std::endl;
|
||||
<< "Confirmed RSMI_STATUS_INSUFFICIENT_SIZE was returned"
|
||||
<< "\n\t and size matches length requested." << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -275,19 +275,48 @@ void TestComputePartitionReadWrite::Run(void) {
|
||||
<< computePartitionString(new_computePartition) << std::endl;
|
||||
}
|
||||
ret = rsmi_dev_compute_partition_set(dv_ind, new_computePartition);
|
||||
CHK_ERR_ASRT(ret)
|
||||
ret = rsmi_dev_compute_partition_get(dv_ind, current_char_computePartition,
|
||||
255);
|
||||
CHK_ERR_ASRT(ret)
|
||||
bool isSettingUnavailable = false;
|
||||
ASSERT_TRUE((ret == RSMI_STATUS_SUCCESS) ||
|
||||
(ret == RSMI_STATUS_SETTING_UNAVAILABLE));
|
||||
if (ret == RSMI_STATUS_SETTING_UNAVAILABLE) {
|
||||
isSettingUnavailable = true;
|
||||
}
|
||||
rsmi_status_t retGet =
|
||||
rsmi_dev_compute_partition_get(dv_ind, current_char_computePartition,
|
||||
255);
|
||||
CHK_ERR_ASRT(retGet)
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**"
|
||||
<< "Current compute partition: "
|
||||
<< current_char_computePartition
|
||||
<< std::endl;
|
||||
}
|
||||
EXPECT_EQ(RSMI_STATUS_SUCCESS, ret);
|
||||
EXPECT_STREQ(computePartitionString(new_computePartition).c_str(),
|
||||
current_char_computePartition);
|
||||
if (isSettingUnavailable) {
|
||||
ASSERT_EQ(RSMI_STATUS_SETTING_UNAVAILABLE, ret);
|
||||
ASSERT_STRNE(computePartitionString(new_computePartition).c_str(),
|
||||
current_char_computePartition);
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**"
|
||||
<< "Confirmed after receiving "
|
||||
<< "RSMI_STATUS_SETTING_UNAVAILABLE,\n\t current compute "
|
||||
<< "partition (" << current_char_computePartition
|
||||
<< ") did not update to ("
|
||||
<< computePartitionString(new_computePartition) << ")"
|
||||
<< std::endl;
|
||||
}
|
||||
} else {
|
||||
ASSERT_EQ(RSMI_STATUS_SUCCESS, ret);
|
||||
ASSERT_STREQ(computePartitionString(new_computePartition).c_str(),
|
||||
current_char_computePartition);
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**"
|
||||
<< "Confirmed current compute partition ("
|
||||
<< current_char_computePartition << ") matches"
|
||||
<< "\n\t requested compute partition ("
|
||||
<< computePartitionString(new_computePartition) << ")"
|
||||
<< std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* TEST RETURN TO BOOT COMPUTE PARTITION SETTING */
|
||||
@@ -309,15 +338,15 @@ void TestComputePartitionReadWrite::Run(void) {
|
||||
255);
|
||||
CHK_ERR_ASRT(ret)
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**"
|
||||
<< "Current compute partition: " << current_char_computePartition << std::endl;
|
||||
std::cout << "\t**" << "Current compute partition: "
|
||||
<< current_char_computePartition << std::endl;
|
||||
}
|
||||
if (wasResetSuccess) {
|
||||
ASSERT_STRNE(oldPartition.c_str(), current_char_computePartition);
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**"
|
||||
<< "Confirmed prior partition (" << oldPartition << ") is not "
|
||||
<< "equal to current partition ("
|
||||
<< "equal to current\n\t partition ("
|
||||
<< current_char_computePartition << ")" << std::endl;
|
||||
}
|
||||
} else {
|
||||
@@ -325,16 +354,16 @@ void TestComputePartitionReadWrite::Run(void) {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**"
|
||||
<< "Confirmed prior partition (" << oldPartition << ") is equal"
|
||||
<< " to current partition ("
|
||||
<< " to current\n\t partition ("
|
||||
<< current_char_computePartition << ")" << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
/* TEST RETURN TO ORIGINAL COMPUTE PARTITIONING SETTING */
|
||||
/* TEST RETURN TO ORIGINAL COMPUTE PARTITION SETTING */
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << std::endl;
|
||||
std::cout << "\t**"
|
||||
<< "=========== TEST RETURN TO ORIGINAL COMPUTE PARTITIONING "
|
||||
<< "=========== TEST RETURN TO ORIGINAL COMPUTE PARTITION "
|
||||
<< "SETTING ========" << std::endl;
|
||||
}
|
||||
new_computePartition
|
||||
@@ -351,8 +380,8 @@ void TestComputePartitionReadWrite::Run(void) {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**" << "Attempted to set compute partition: "
|
||||
<< computePartitionString(new_computePartition) << std::endl
|
||||
<< "\t**"
|
||||
<< "Current compute partition: " << current_char_computePartition
|
||||
<< "\t**" << "Current compute partition: "
|
||||
<< current_char_computePartition
|
||||
<< std::endl;
|
||||
}
|
||||
EXPECT_EQ(RSMI_STATUS_SUCCESS, ret);
|
||||
|
||||
Ссылка в новой задаче
Block a user