Merge amd-staging into amd-master 20240628
Signed-off-by: Zhang Ava <niandong.zhang@amd.com> Change-Id: I9493cdf35b64cfa0a99de017e2d6b521af71cf14
This commit is contained in:
@@ -4,6 +4,41 @@ Full documentation for rocm_smi_lib is available at [https://rocm.docs.amd.com/]
|
||||
|
||||
***All information listed below is for reference and subject to change.***
|
||||
|
||||
## rocm_smi_lib for ROCm 6.2
|
||||
|
||||
### Added
|
||||
|
||||
- **Added Partition ID API (`rsmi_dev_partition_id_get(..)`)**
|
||||
Previously `rsmi_dev_partition_id_get` could only be retrived by querying through `rsmi_dev_pci_id_get()`
|
||||
and parsing optional bits in our python CLI/API. We are now making this available directly through API.
|
||||
As well as added testing, in our compute partitioning tests verifing partition IDs update accordingly.
|
||||
|
||||
### Changed
|
||||
|
||||
- N/A
|
||||
|
||||
### Optimized
|
||||
|
||||
- N/A
|
||||
|
||||
### Fixed
|
||||
|
||||
- **Partition ID CLI output**
|
||||
Due to driver changes in KFD, some devices may report bits [31:28] or [2:0]. With the newly added `rsmi_dev_partition_id_get(..)`, we provided this fallback to properly retreive partition ID. We
|
||||
plan to eventually remove partition ID from the function portion of the BDF (Bus Device Function). See below for PCI ID description.
|
||||
|
||||
- bits [63:32] = domain
|
||||
- bits [31:28] or bits [2:0] = partition id
|
||||
- bits [27:16] = reserved
|
||||
- bits [15:8] = Bus
|
||||
- bits [7:3] = Device
|
||||
- bits [2:0] = Function (partition id maybe in bits [2:0]) <-- Fallback for non SPX modes
|
||||
|
||||
### Known Issues
|
||||
|
||||
- N/A
|
||||
|
||||
|
||||
## rocm_smi_lib for ROCm 6.1.2
|
||||
|
||||
### Added
|
||||
|
||||
@@ -367,8 +367,6 @@ typedef rsmi_clk_type_t rsmi_clk_type;
|
||||
*/
|
||||
typedef enum {
|
||||
RSMI_COMPUTE_PARTITION_INVALID = 0,
|
||||
RSMI_COMPUTE_PARTITION_CPX, //!< Core mode (CPX)- Per-chip XCC with
|
||||
//!< shared memory
|
||||
RSMI_COMPUTE_PARTITION_SPX, //!< Single GPU mode (SPX)- All XCCs work
|
||||
//!< together with shared memory
|
||||
RSMI_COMPUTE_PARTITION_DPX, //!< Dual GPU mode (DPX)- Half XCCs work
|
||||
@@ -377,6 +375,8 @@ typedef enum {
|
||||
//!< work together with shared memory
|
||||
RSMI_COMPUTE_PARTITION_QPX, //!< Quad GPU mode (QPX)- Quarter XCCs
|
||||
//!< work together with shared memory
|
||||
RSMI_COMPUTE_PARTITION_CPX //!< Core mode (CPX)- Per-chip XCC with
|
||||
//!< shared memory
|
||||
} rsmi_compute_partition_type_t;
|
||||
/// \cond Ignore in docs.
|
||||
typedef rsmi_compute_partition_type_t rsmi_compute_partition_type;
|
||||
@@ -4053,6 +4053,30 @@ rsmi_dev_compute_partition_set(uint32_t dv_ind,
|
||||
*/
|
||||
rsmi_status_t rsmi_dev_compute_partition_reset(uint32_t dv_ind);
|
||||
|
||||
/**
|
||||
* @brief Retrieves the partition_id for a desired device
|
||||
*
|
||||
* @details
|
||||
* Given a device index @p dv_ind and a uint32_t pointer @p partition_id ,
|
||||
* this function will attempt to obtain the device's partition ID.
|
||||
* Upon successful retreival, the obtained device's partition will be stored
|
||||
* in the passed @p partition_id uint32_t variable. If device does
|
||||
* not support partitions or is in SPX, a @p partition_id ID of 0 shall
|
||||
* be returned.
|
||||
*
|
||||
* @param[in] dv_ind a device index
|
||||
*
|
||||
* @param[inout] partition_id a uint32_t variable,
|
||||
* which the device's partition_id will be written to.
|
||||
*
|
||||
* @retval ::RSMI_STATUS_SUCCESS call was successful
|
||||
* @retval ::RSMI_STATUS_INVALID_ARGS the provided arguments are not valid
|
||||
* @retval ::RSMI_STATUS_NOT_SUPPORTED installed software or hardware does not
|
||||
* support this function
|
||||
*
|
||||
*/
|
||||
rsmi_status_t rsmi_dev_partition_id_get(uint32_t dv_ind, uint32_t *partition_id);
|
||||
|
||||
/** @} */ // end of ComputePartition
|
||||
|
||||
/*****************************************************************************/
|
||||
|
||||
@@ -196,9 +196,11 @@ def getBus(device, silent=False):
|
||||
# BDFID = ((DOMAIN & 0xFFFFFFFF) << 32) | ((PARTITION_ID & 0xF) << 28) | ((BUS & 0xFF) << 8) |
|
||||
# ((DEVICE & 0x1F) <<3 ) | (FUNCTION & 0x7)
|
||||
# bits [63:32] = domain
|
||||
# bits [31:28] = partition id
|
||||
# bits [31:28] or bits [2:0] = partition id
|
||||
# bits [27:16] = reserved
|
||||
# bits [15: 0] = pci bus/device/function
|
||||
# bits [15:8] = Bus
|
||||
# bits [7:3] = Device
|
||||
# bits [2:0] = Function (partition id maybe in bits [2:0]) <-- Fallback for non SPX modes
|
||||
domain = (bdfid.value >> 32) & 0xffffffff
|
||||
bus = (bdfid.value >> 8) & 0xff
|
||||
device = (bdfid.value >> 3) & 0x1f
|
||||
@@ -215,19 +217,19 @@ def getPartitionId(device, silent=False):
|
||||
:param silent: Turn on to silence error output
|
||||
(you plan to handle manually). Default is off.
|
||||
"""
|
||||
bdfid = c_uint64(0)
|
||||
ret = rocmsmi.rsmi_dev_pci_id_get(device, byref(bdfid))
|
||||
partition_id = c_uint32(0)
|
||||
ret = rocmsmi.rsmi_dev_partition_id_get(device, byref(partition_id))
|
||||
|
||||
# BDFID = ((DOMAIN & 0xFFFFFFFF) << 32) | ((PARTITION_ID & 0xF) << 28) | ((BUS & 0xFF) << 8) |
|
||||
# ((DEVICE & 0x1F) <<3 ) | (FUNCTION & 0x7)
|
||||
# bits [63:32] = domain
|
||||
# bits [31:28] = partition id
|
||||
# bits [27:16] = reserved
|
||||
# bits [15: 0] = pci bus/device/function
|
||||
partition_num = (bdfid.value >> 28) & 0xf
|
||||
pci_id = bdfid.value
|
||||
partition_id = '{:d}'.format(partition_num)
|
||||
if rsmi_ret_ok(ret, device, 'get_pci_id', silent):
|
||||
# bits [31:28] or bits [2:0] = partition id
|
||||
# bits [27:16] = reserved
|
||||
# bits [15:8] = Bus
|
||||
# bits [7:3] = Device
|
||||
# bits [2:0] = Function (partition id maybe in bits [2:0]) <-- Fallback for non SPX modes
|
||||
partition_id = '{:d}'.format(partition_id.value)
|
||||
if rsmi_ret_ok(ret, device, 'rsmi_dev_partition_id_get', silent):
|
||||
return partition_id
|
||||
|
||||
|
||||
|
||||
@@ -583,19 +583,19 @@ class rsmi_func_id_value_t(Union):
|
||||
|
||||
class rsmi_compute_partition_type_t(c_int):
|
||||
RSMI_COMPUTE_PARTITION_INVALID = 0
|
||||
RSMI_COMPUTE_PARTITION_CPX = 1
|
||||
RSMI_COMPUTE_PARTITION_SPX = 2
|
||||
RSMI_COMPUTE_PARTITION_DPX = 3
|
||||
RSMI_COMPUTE_PARTITION_TPX = 4
|
||||
RSMI_COMPUTE_PARTITION_QPX = 5
|
||||
RSMI_COMPUTE_PARTITION_SPX = 1
|
||||
RSMI_COMPUTE_PARTITION_DPX = 2
|
||||
RSMI_COMPUTE_PARTITION_TPX = 3
|
||||
RSMI_COMPUTE_PARTITION_QPX = 4
|
||||
RSMI_COMPUTE_PARTITION_CPX = 5
|
||||
|
||||
rsmi_compute_partition_type_dict = {
|
||||
#'RSMI_COMPUTE_PARTITION_INVALID': 0,
|
||||
'CPX': 1,
|
||||
'SPX': 2,
|
||||
'DPX': 3,
|
||||
'TPX': 4,
|
||||
'QPX': 5
|
||||
'SPX': 1,
|
||||
'DPX': 2,
|
||||
'TPX': 3,
|
||||
'QPX': 4,
|
||||
'CPX': 5,
|
||||
}
|
||||
|
||||
rsmi_compute_partition_type = rsmi_compute_partition_type_t
|
||||
@@ -604,7 +604,7 @@ rsmi_compute_partition_type = rsmi_compute_partition_type_t
|
||||
# Usage example to get corresponding names:
|
||||
# compute_partition_type_l[rsmi_compute_partition_type_t.RSMI_COMPUTE_PARTITION_CPX]
|
||||
# will return string 'CPX'
|
||||
compute_partition_type_l = ['CPX', 'SPX', 'DPX', 'TPX', 'QPX']
|
||||
compute_partition_type_l = ['SPX', 'DPX', 'TPX', 'QPX', 'CPX']
|
||||
|
||||
class rsmi_memory_partition_type_t(c_int):
|
||||
RSMI_MEMORY_PARTITION_UNKNOWN = 0
|
||||
|
||||
+133
-24
@@ -754,13 +754,18 @@ rsmi_dev_pci_id_get(uint32_t dv_ind, uint64_t *bdfid) {
|
||||
|
||||
kfd_node->get_property_value("domain", &domain);
|
||||
|
||||
// Add domain to full pci_id:
|
||||
// BDFID = ((DOMAIN & 0xFFFFFFFF) << 32) | ((PARTITION_ID & 0xF) << 28) |
|
||||
// ((BUS & 0xFF) << 8) | ((DEVICE & 0x1F) <<3 ) | (FUNCTION & 0x7)
|
||||
// bits [63:32] = domain
|
||||
// bits [31:28] = partition id in multi partition system
|
||||
// bits [27:16] = reserved
|
||||
// bits [15: 0] = pci bus/device/function
|
||||
/**
|
||||
* Add domain to full pci_id:
|
||||
* BDFID = ((DOMAIN & 0xFFFFFFFF) << 32) | ((PARTITION_ID & 0xF) << 28) |
|
||||
* ((BUS & 0xFF) << 8) | ((DEVICE & 0x1F) <<3 ) | (FUNCTION & 0x7)
|
||||
*
|
||||
* bits [63:32] = domain
|
||||
* bits [31:28] or bits [2:0] = partition id
|
||||
* bits [27:16] = reserved
|
||||
* bits [15:8] = Bus
|
||||
* bits [7:3] = Device
|
||||
* bits [2:0] = Function (partition id maybe in bits [2:0]) <-- Fallback for non SPX modes
|
||||
*/
|
||||
assert((domain & 0xFFFFFFFF00000000) == 0);
|
||||
(*bdfid) &= 0xFFFFFFFF; // keep bottom 32 bits of pci_id
|
||||
*bdfid |= (domain & 0xFFFFFFFF) << 32; // Add domain to top of pci_id
|
||||
@@ -4575,9 +4580,12 @@ rsmi_is_P2P_accessible(uint32_t dv_ind_src, uint32_t dv_ind_dst,
|
||||
CATCH
|
||||
}
|
||||
|
||||
static rsmi_status_t
|
||||
get_compute_partition(uint32_t dv_ind, std::string &compute_partition) {
|
||||
static rsmi_status_t get_compute_partition(uint32_t dv_ind,
|
||||
std::string &compute_partition) {
|
||||
TRY
|
||||
std::ostringstream ss;
|
||||
ss << __PRETTY_FUNCTION__ << " | ======= start =======, " << dv_ind;
|
||||
LOG_TRACE(ss);
|
||||
CHK_SUPPORT_NAME_ONLY(compute_partition.c_str())
|
||||
std::string compute_partition_str;
|
||||
|
||||
@@ -4601,6 +4609,8 @@ get_compute_partition(uint32_t dv_ind, std::string &compute_partition) {
|
||||
return RSMI_STATUS_UNEXPECTED_DATA;
|
||||
}
|
||||
compute_partition = compute_partition_str;
|
||||
ss << __PRETTY_FUNCTION__ << " | ======= END =======, " << dv_ind;
|
||||
LOG_TRACE(ss);
|
||||
return RSMI_STATUS_SUCCESS;
|
||||
CATCH
|
||||
}
|
||||
@@ -4610,7 +4620,7 @@ rsmi_dev_compute_partition_get(uint32_t dv_ind, char *compute_partition,
|
||||
uint32_t len) {
|
||||
TRY
|
||||
std::ostringstream ss;
|
||||
ss << __PRETTY_FUNCTION__ << "| ======= start =======, dv_ind = "
|
||||
ss << __PRETTY_FUNCTION__ << " | ======= start =======, dv_ind = "
|
||||
<< dv_ind;
|
||||
LOG_TRACE(ss);
|
||||
if ((len == 0) || (compute_partition == nullptr)) {
|
||||
@@ -4646,7 +4656,7 @@ rsmi_dev_compute_partition_get(uint32_t dv_ind, char *compute_partition,
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::size_t length = returning_compute_partition.copy(compute_partition, len);
|
||||
std::size_t length = returning_compute_partition.copy(compute_partition, len-1);
|
||||
compute_partition[length]='\0';
|
||||
|
||||
if (len < (returning_compute_partition.size() + 1)) {
|
||||
@@ -4680,20 +4690,47 @@ static rsmi_status_t
|
||||
is_available_compute_partition(uint32_t dv_ind,
|
||||
std::string new_compute_partition) {
|
||||
TRY
|
||||
std::ostringstream ss;
|
||||
ss << __PRETTY_FUNCTION__ << " | ======= start =======, " << dv_ind;
|
||||
LOG_TRACE(ss);
|
||||
DEVICE_MUTEX
|
||||
std::string availableComputePartitions;
|
||||
rsmi_status_t ret =
|
||||
get_dev_value_line(amd::smi::kDevAvailableComputePartition,
|
||||
dv_ind, &availableComputePartitions);
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
ss << __PRETTY_FUNCTION__
|
||||
<< " | ======= end ======= "
|
||||
<< " | FAIL "
|
||||
<< " | Device #: " << dv_ind
|
||||
<< " | Type: "
|
||||
<< devInfoTypesStrings.at(amd::smi::kDevAvailableComputePartition)
|
||||
<< " | Data: could not retrieve requested data"
|
||||
<< " | Returning = "
|
||||
<< getRSMIStatusString(ret) << " |";
|
||||
LOG_ERROR(ss);
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool isComputePartitionAvailable =
|
||||
amd::smi::containsString(availableComputePartitions,
|
||||
new_compute_partition);
|
||||
return (isComputePartitionAvailable) ? RSMI_STATUS_SUCCESS :
|
||||
RSMI_STATUS_SETTING_UNAVAILABLE;
|
||||
|
||||
ret = ((isComputePartitionAvailable) ? RSMI_STATUS_SUCCESS :
|
||||
RSMI_STATUS_SETTING_UNAVAILABLE);
|
||||
ss << __PRETTY_FUNCTION__
|
||||
<< " | ======= end ======= "
|
||||
<< " | Success "
|
||||
<< " | Device #: " << dv_ind
|
||||
<< " | Type: "
|
||||
<< devInfoTypesStrings.at(amd::smi::kDevAvailableComputePartition)
|
||||
<< " | Data: available_partitions = " << availableComputePartitions
|
||||
<< " | Data: isComputePartitionAvailable = "
|
||||
<< (isComputePartitionAvailable ? "True" : "False")
|
||||
<< " | Returning = "
|
||||
<< getRSMIStatusString(ret) << " |";
|
||||
LOG_INFO(ss);
|
||||
return ret;
|
||||
CATCH
|
||||
}
|
||||
|
||||
@@ -4702,16 +4739,14 @@ rsmi_dev_compute_partition_set(uint32_t dv_ind,
|
||||
rsmi_compute_partition_type_t compute_partition) {
|
||||
TRY
|
||||
std::ostringstream ss;
|
||||
ss << __PRETTY_FUNCTION__ << "| ======= start =======";
|
||||
ss << __PRETTY_FUNCTION__ << " | ======= start =======, " << dv_ind;
|
||||
LOG_TRACE(ss);
|
||||
REQUIRE_ROOT_ACCESS
|
||||
if (!amd::smi::is_sudo_user()) {
|
||||
return RSMI_STATUS_PERMISSION;
|
||||
}
|
||||
DEVICE_MUTEX
|
||||
std::string newComputePartitionStr
|
||||
= mapRSMIToStringComputePartitionTypes.at(compute_partition);
|
||||
std::string currentComputePartition;
|
||||
std::string currentComputePartition = "";
|
||||
std::string newComputePartitionStr = "";
|
||||
|
||||
switch (compute_partition) {
|
||||
case RSMI_COMPUTE_PARTITION_CPX:
|
||||
@@ -4719,9 +4754,13 @@ rsmi_dev_compute_partition_set(uint32_t dv_ind,
|
||||
case RSMI_COMPUTE_PARTITION_DPX:
|
||||
case RSMI_COMPUTE_PARTITION_TPX:
|
||||
case RSMI_COMPUTE_PARTITION_QPX:
|
||||
newComputePartitionStr =
|
||||
mapRSMIToStringComputePartitionTypes.at(compute_partition);
|
||||
break;
|
||||
case RSMI_COMPUTE_PARTITION_INVALID:
|
||||
default:
|
||||
newComputePartitionStr =
|
||||
mapRSMIToStringComputePartitionTypes.at(RSMI_COMPUTE_PARTITION_INVALID);
|
||||
ss << __PRETTY_FUNCTION__
|
||||
<< " | ======= end ======= "
|
||||
<< " | Fail "
|
||||
@@ -4798,8 +4837,8 @@ rsmi_dev_compute_partition_set(uint32_t dv_ind,
|
||||
<< "| sizeof string = " << std::dec
|
||||
<< sizeof(newComputePartitionStr);
|
||||
LOG_DEBUG(ss);
|
||||
|
||||
GET_DEV_FROM_INDX
|
||||
DEVICE_MUTEX
|
||||
int ret = dev->writeDevInfo(amd::smi::kDevComputePartition,
|
||||
newComputePartitionStr);
|
||||
rsmi_status_t returnResponse = amd::smi::ErrnoToRsmiStatus(ret);
|
||||
@@ -4814,7 +4853,6 @@ rsmi_dev_compute_partition_set(uint32_t dv_ind,
|
||||
<< getRSMIStatusString(returnResponse) << " |";
|
||||
LOG_TRACE(ss);
|
||||
|
||||
// TODO(charpoag): investigate providing GPU busy state occured with
|
||||
return returnResponse;
|
||||
CATCH
|
||||
}
|
||||
@@ -4822,6 +4860,9 @@ rsmi_dev_compute_partition_set(uint32_t dv_ind,
|
||||
static rsmi_status_t get_memory_partition(uint32_t dv_ind,
|
||||
std::string &memory_partition) {
|
||||
TRY
|
||||
std::ostringstream ss;
|
||||
ss << __PRETTY_FUNCTION__ << " | ======= start =======, " << dv_ind;
|
||||
LOG_TRACE(ss);
|
||||
CHK_SUPPORT_NAME_ONLY(memory_partition.c_str())
|
||||
std::string val_str;
|
||||
|
||||
@@ -4845,6 +4886,8 @@ static rsmi_status_t get_memory_partition(uint32_t dv_ind,
|
||||
return RSMI_STATUS_UNEXPECTED_DATA;
|
||||
}
|
||||
memory_partition = val_str;
|
||||
ss << __PRETTY_FUNCTION__ << " | ======= END =======, " << dv_ind;
|
||||
LOG_TRACE(ss);
|
||||
return RSMI_STATUS_SUCCESS;
|
||||
CATCH
|
||||
}
|
||||
@@ -4854,7 +4897,7 @@ rsmi_dev_memory_partition_set(uint32_t dv_ind,
|
||||
rsmi_memory_partition_type_t memory_partition) {
|
||||
TRY
|
||||
std::ostringstream ss;
|
||||
ss << __PRETTY_FUNCTION__ << "| ======= start =======";
|
||||
ss << __PRETTY_FUNCTION__ << " | ======= start =======, " << dv_ind;
|
||||
LOG_TRACE(ss);
|
||||
REQUIRE_ROOT_ACCESS
|
||||
DEVICE_MUTEX
|
||||
@@ -4989,7 +5032,7 @@ rsmi_dev_memory_partition_get(uint32_t dv_ind, char *memory_partition,
|
||||
uint32_t len) {
|
||||
TRY
|
||||
std::ostringstream ss;
|
||||
ss << __PRETTY_FUNCTION__ << "| ======= start =======";
|
||||
ss << __PRETTY_FUNCTION__ << " | ======= start =======, " << dv_ind;
|
||||
LOG_TRACE(ss);
|
||||
if ((len == 0) || (memory_partition == nullptr)) {
|
||||
ss << __PRETTY_FUNCTION__
|
||||
@@ -5059,7 +5102,7 @@ rsmi_dev_memory_partition_get(uint32_t dv_ind, char *memory_partition,
|
||||
rsmi_status_t rsmi_dev_compute_partition_reset(uint32_t dv_ind) {
|
||||
TRY
|
||||
std::ostringstream ss;
|
||||
ss << __PRETTY_FUNCTION__ << "| ======= start =======";
|
||||
ss << __PRETTY_FUNCTION__ << " | ======= start =======, " << dv_ind;
|
||||
LOG_TRACE(ss);
|
||||
REQUIRE_ROOT_ACCESS
|
||||
DEVICE_MUTEX
|
||||
@@ -5098,7 +5141,7 @@ rsmi_status_t rsmi_dev_compute_partition_reset(uint32_t dv_ind) {
|
||||
rsmi_status_t rsmi_dev_memory_partition_reset(uint32_t dv_ind) {
|
||||
TRY
|
||||
std::ostringstream ss;
|
||||
ss << __PRETTY_FUNCTION__ << "| ======= start =======";
|
||||
ss << __PRETTY_FUNCTION__ << "| ======= start =======, " << dv_ind;
|
||||
LOG_TRACE(ss);
|
||||
REQUIRE_ROOT_ACCESS
|
||||
DEVICE_MUTEX
|
||||
@@ -5134,6 +5177,72 @@ rsmi_status_t rsmi_dev_memory_partition_reset(uint32_t dv_ind) {
|
||||
CATCH
|
||||
}
|
||||
|
||||
rsmi_status_t
|
||||
rsmi_dev_partition_id_get(uint32_t dv_ind, uint32_t *partition_id) {
|
||||
TRY
|
||||
std::ostringstream ss;
|
||||
ss << __PRETTY_FUNCTION__ << "| ======= start =======, " << dv_ind;
|
||||
LOG_TRACE(ss);
|
||||
if (partition_id == nullptr) {
|
||||
ss << __PRETTY_FUNCTION__
|
||||
<< " | ======= end ======= "
|
||||
<< " | FAIL"
|
||||
<< " | Device #: " << dv_ind
|
||||
<< " | Type: partition_id"
|
||||
<< " | Data: nullptr"
|
||||
<< " | Returning = "
|
||||
<< getRSMIStatusString(RSMI_STATUS_INVALID_ARGS) << " |";
|
||||
LOG_ERROR(ss);
|
||||
return RSMI_STATUS_INVALID_ARGS;
|
||||
}
|
||||
DEVICE_MUTEX
|
||||
std::string strCompPartition = "UNKNOWN";
|
||||
const uint32_t PARTITION_LEN = 10;
|
||||
char compute_partition[PARTITION_LEN];
|
||||
rsmi_status_t ret = rsmi_dev_compute_partition_get(dv_ind, compute_partition, PARTITION_LEN);
|
||||
if (ret == RSMI_STATUS_SUCCESS) {
|
||||
strCompPartition = compute_partition;
|
||||
}
|
||||
uint64_t pci_id = UINT64_MAX;
|
||||
*partition_id = UINT32_MAX;
|
||||
ret = rsmi_dev_pci_id_get(dv_ind, &pci_id);
|
||||
if (ret == RSMI_STATUS_SUCCESS) {
|
||||
*partition_id = static_cast<uint32_t>((pci_id >> 28) & 0xf);
|
||||
}
|
||||
|
||||
/**
|
||||
* Fall back is required due to driver changes within KFD.
|
||||
* Some devices may report bits [31:28] or [2:0].
|
||||
* With the newly added rsmi_dev_partition_id_get(..),
|
||||
* we provided this fallback to properly retrieve the partition ID. We
|
||||
* plan to eventually remove partition ID from the function portion of the
|
||||
* BDF (Bus Device Function). See below for PCI ID description.
|
||||
*
|
||||
* bits [63:32] = domain
|
||||
* bits [31:28] or bits [2:0] = partition id
|
||||
* bits [27:16] = reserved
|
||||
* bits [15:8] = Bus
|
||||
* bits [7:3] = Device
|
||||
* bits [2:0] = Function (partition id maybe in bits [2:0]) <-- Fallback for non SPX modes
|
||||
*/
|
||||
if (*partition_id != UINT32_MAX && *partition_id == 0 &&
|
||||
(strCompPartition == "DPX" || strCompPartition == "TPX"
|
||||
|| strCompPartition == "CPX" || strCompPartition == "QPX")) {
|
||||
*partition_id = static_cast<uint32_t>(pci_id & 0x7);
|
||||
}
|
||||
ss << __PRETTY_FUNCTION__
|
||||
<< " | ======= end ======= "
|
||||
<< " | Success"
|
||||
<< " | Device #: " << dv_ind
|
||||
<< " | Type: partition_id"
|
||||
<< " | Data: " << *partition_id
|
||||
<< " | Returning = "
|
||||
<< getRSMIStatusString(RSMI_STATUS_SUCCESS) << " |";
|
||||
LOG_INFO(ss);
|
||||
return ret;
|
||||
CATCH
|
||||
}
|
||||
|
||||
rsmi_status_t rsmi_dev_target_graphics_version_get(uint32_t dv_ind,
|
||||
uint64_t *gfx_version) {
|
||||
TRY
|
||||
|
||||
@@ -265,7 +265,7 @@ void ROCmLogging::Logger::buffer(const char* text) throw() {
|
||||
// and timestamp in the buffer message. Just log the raw bytes.
|
||||
if ((m_LogType == FILE_LOG) && (m_LogLevel >= LOG_LEVEL_BUFFER)) {
|
||||
lock();
|
||||
if(!m_File.is_open()) {
|
||||
if (!m_File.is_open()) {
|
||||
initialize_resources();
|
||||
if (!m_File.is_open()) {
|
||||
std::cout << "WARNING: re-initializing resources was unsuccessful."
|
||||
|
||||
@@ -235,15 +235,7 @@ RocmSMI::Initialize(uint64_t flags) {
|
||||
int i_ret;
|
||||
std::ostringstream ss;
|
||||
|
||||
LOG_ALWAYS("=============== ROCM SMI initialize ================");
|
||||
ROCmLogging::Logger::getInstance()->enableAllLogLevels();
|
||||
// Leaving below to allow developers to check current log settings
|
||||
// std::string logSettings = Logger::getInstance()->getLogSettings();
|
||||
// std::cout << "Current log settings:\n" << logSettings << std::endl;
|
||||
|
||||
if (ROCmLogging::Logger::getInstance()->isLoggerEnabled()) {
|
||||
logSystemDetails();
|
||||
}
|
||||
|
||||
assert(ref_count_ == 1);
|
||||
if (ref_count_ != 1) {
|
||||
@@ -259,6 +251,15 @@ RocmSMI::Initialize(uint64_t flags) {
|
||||
// To help debug env variable issues
|
||||
// debugRSMIEnvVarInfo();
|
||||
|
||||
if (ROCmLogging::Logger::getInstance()->isLoggerEnabled()) {
|
||||
ROCmLogging::Logger::getInstance()->enableAllLogLevels();
|
||||
LOG_ALWAYS("=============== ROCM SMI initialize ================");
|
||||
logSystemDetails();
|
||||
}
|
||||
// Leaving below to allow developers to check current log settings
|
||||
// std::string logSettings = ROCmLogging::Logger::getInstance()->getLogSettings();
|
||||
// std::cout << "Current log settings:\n" << logSettings << std::endl;
|
||||
|
||||
while (!std::string(kAMDMonitorTypes[i]).empty()) {
|
||||
amd_monitor_types_.insert(kAMDMonitorTypes[i]);
|
||||
++i;
|
||||
@@ -863,6 +864,15 @@ uint32_t RocmSMI::DiscoverAmdgpuDevices(void) {
|
||||
<< " BDF = " << std::to_string(primaryBdfId)
|
||||
<< " (" << print_int_as_hex(primaryBdfId) << ")";
|
||||
LOG_DEBUG(ss);
|
||||
if (doesDeviceSupportPartitions && strCompPartition != "SPX"
|
||||
&& i->second.s_partition_id == 0) {
|
||||
i->second.s_partition_id = i->second.s_function;
|
||||
ss << __PRETTY_FUNCTION__ << " | (secondary node add) fall back - "
|
||||
<< "detected !SPX && partition_id == 0"
|
||||
<< "; function = " << std::to_string(i->second.s_function)
|
||||
<< "; partition_id = " << std::to_string(i->second.s_partition_id);
|
||||
LOG_DEBUG(ss);
|
||||
}
|
||||
ss << __PRETTY_FUNCTION__
|
||||
<< " | (secondary node add) B4 AddToDeviceList() -->"
|
||||
<< "\n[node_id = " << std::to_string(i->second.s_node_id)
|
||||
@@ -881,6 +891,15 @@ uint32_t RocmSMI::DiscoverAmdgpuDevices(void) {
|
||||
} else {
|
||||
ss << __PRETTY_FUNCTION__ << " | primary node add ; "
|
||||
<< " BDF = " << std::to_string(UINT64_MAX);
|
||||
if (doesDeviceSupportPartitions && strCompPartition != "SPX"
|
||||
&& i->second.s_partition_id == 0) {
|
||||
i->second.s_partition_id = i->second.s_function;
|
||||
ss << __PRETTY_FUNCTION__ << " | (primary node add) fall back - "
|
||||
<< "detected !SPX && partition_id == 0"
|
||||
<< "; function = " << std::to_string(i->second.s_function)
|
||||
<< "; partition_id = " << std::to_string(i->second.s_partition_id);
|
||||
LOG_DEBUG(ss);
|
||||
}
|
||||
LOG_DEBUG(ss);
|
||||
ss << __PRETTY_FUNCTION__
|
||||
<< " | (primary node add) After AddToDeviceList() -->"
|
||||
@@ -1010,6 +1029,15 @@ uint32_t RocmSMI::DiscoverAmdgpuDevices(void) {
|
||||
<< " BDF = " << std::to_string(myBdfId)
|
||||
<< " (" << print_int_as_hex(myBdfId) << ")";
|
||||
LOG_DEBUG(ss);
|
||||
if (doesDeviceSupportPartitions && strCompPartition != "SPX"
|
||||
&& it->second.s_partition_id == 0) {
|
||||
it->second.s_partition_id = it->second.s_function;
|
||||
ss << __PRETTY_FUNCTION__ << " | (secondary node add #2) fall back - "
|
||||
<< "detected !SPX && partition_id == 0"
|
||||
<< "; function = " << std::to_string(it->second.s_function)
|
||||
<< "; partition_id = " << std::to_string(it->second.s_partition_id);
|
||||
LOG_DEBUG(ss);
|
||||
}
|
||||
ss << __PRETTY_FUNCTION__
|
||||
<< " | (secondary node add #2) B4 AddToDeviceList() -->"
|
||||
<< "\n[node_id = " << std::to_string(it->second.s_node_id)
|
||||
|
||||
@@ -88,6 +88,13 @@ void TestComputePartitionReadWrite::Close() {
|
||||
TestBase::Close();
|
||||
}
|
||||
|
||||
const uint32_t MAX_UNSUPPORTED_PARTITIONS = 0;
|
||||
const uint32_t MAX_SPX_PARTITIONS = 1;
|
||||
const uint32_t MAX_DPX_PARTITIONS = 2;
|
||||
const uint32_t MAX_TPX_PARTITIONS = 3;
|
||||
const uint32_t MAX_QPX_PARTITIONS = 4;
|
||||
const uint32_t MAX_CPX_PARTITIONS = 8;
|
||||
|
||||
static const std::string
|
||||
computePartitionString(rsmi_compute_partition_type computeParitionType) {
|
||||
/**
|
||||
@@ -139,50 +146,186 @@ static void system_wait(int seconds) {
|
||||
|
||||
static const std::map<std::string, rsmi_compute_partition_type_t>
|
||||
mapStringToRSMIComputePartitionTypes {
|
||||
{"CPX", RSMI_COMPUTE_PARTITION_CPX},
|
||||
{"SPX", RSMI_COMPUTE_PARTITION_SPX},
|
||||
{"DPX", RSMI_COMPUTE_PARTITION_DPX},
|
||||
{"TPX", RSMI_COMPUTE_PARTITION_TPX},
|
||||
{"QPX", RSMI_COMPUTE_PARTITION_QPX}
|
||||
{"QPX", RSMI_COMPUTE_PARTITION_QPX},
|
||||
{"CPX", RSMI_COMPUTE_PARTITION_CPX},
|
||||
{"SPX", RSMI_COMPUTE_PARTITION_SPX}
|
||||
};
|
||||
|
||||
void TestComputePartitionReadWrite::Run(void) {
|
||||
rsmi_status_t ret, err;
|
||||
char orig_char_computePartition[255];
|
||||
char current_char_computePartition[255];
|
||||
static void checkPartitionIdChanges(
|
||||
uint32_t dev, const std::string current_partition, bool isVerbose,
|
||||
bool reinitialize) {
|
||||
uint32_t max_loop = MAX_SPX_PARTITIONS;
|
||||
|
||||
TestBase::Run();
|
||||
if (setup_failed_) {
|
||||
std::cout << "** SetUp Failed for this test. Skipping.**" << std::endl;
|
||||
return;
|
||||
// re-initialize to ensure new device ordering is followed
|
||||
if (reinitialize) {
|
||||
if (isVerbose) {
|
||||
std::cout << "\t**Reinitializing device list due to parition changes.\n";
|
||||
}
|
||||
rsmi_shut_down();
|
||||
rsmi_init(0);
|
||||
}
|
||||
|
||||
// Confirm system supports compute partition, before executing wait
|
||||
ret = rsmi_dev_compute_partition_get(0, orig_char_computePartition, 255);
|
||||
if (ret == RSMI_STATUS_SUCCESS) {
|
||||
system_wait(25);
|
||||
}
|
||||
|
||||
for (uint32_t dv_ind = 0; dv_ind < num_monitor_devs(); ++dv_ind) {
|
||||
if (dv_ind != 0) {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << std::endl;
|
||||
if (current_partition == "DPX") {
|
||||
max_loop = MAX_DPX_PARTITIONS;
|
||||
} else if (current_partition == "TPX") {
|
||||
max_loop = MAX_TPX_PARTITIONS;
|
||||
} else if (current_partition == "QPX") {
|
||||
max_loop = MAX_QPX_PARTITIONS;
|
||||
} else if (current_partition == "CPX") {
|
||||
max_loop = MAX_CPX_PARTITIONS;
|
||||
uint16_t num_xcd;
|
||||
rsmi_status_t ret = rsmi_dev_metrics_xcd_counter_get(dev, &num_xcd);
|
||||
if (ret == RSMI_STATUS_SUCCESS) {
|
||||
max_loop = num_xcd;
|
||||
if (isVerbose) {
|
||||
std::cout << "\t**Expecting num_xcd = " << num_xcd << " to equal "
|
||||
"total CPX nodes\n";
|
||||
}
|
||||
}
|
||||
PrintDeviceHeader(dv_ind);
|
||||
bool devicePartitionUpdated = false;
|
||||
}
|
||||
|
||||
// Standard checks to see if API is supported, before running full tests
|
||||
ret = rsmi_dev_compute_partition_get(dv_ind, orig_char_computePartition,
|
||||
255);
|
||||
if (ret == RSMI_STATUS_NOT_SUPPORTED) {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**" << ": "
|
||||
<< "Not supported on this device" << std::endl;
|
||||
for (uint32_t i = dev; i < dev + max_loop; i++) {
|
||||
uint32_t partition_id;
|
||||
rsmi_status_t ret = rsmi_dev_partition_id_get(i, &partition_id);
|
||||
std::cout << "\t** Checking Partition ID | Device: " << std::to_string(i)
|
||||
<< "; Current Partition: " << current_partition
|
||||
<< " ; Max partition IDs to check: " << max_loop << "\n";
|
||||
ASSERT_EQ(ret, RSMI_STATUS_SUCCESS);
|
||||
if (ret == RSMI_STATUS_SUCCESS && current_partition == "SPX") {
|
||||
ASSERT_LT(partition_id, max_loop);
|
||||
if (isVerbose) {
|
||||
std::cout << "\n\t**Confirmed partition_id < " << max_loop
|
||||
<< " for SPX"
|
||||
<< "\n\t**rsmi_dev_partition_id_get(" + std::to_string(i) +
|
||||
", &partition_id); partition_id = "
|
||||
<< static_cast<uint32_t>(partition_id) << std::endl;
|
||||
}
|
||||
} else if (ret == RSMI_STATUS_SUCCESS && current_partition == "DPX") {
|
||||
ASSERT_LT(partition_id, max_loop);
|
||||
if (isVerbose) {
|
||||
std::cout << "\n\t**Confirmed partition_id < " << max_loop
|
||||
<< " for DPX"
|
||||
<< "\n\t**rsmi_dev_partition_id_get(" + std::to_string(i) +
|
||||
", &partition_id); partition_id = "
|
||||
<< static_cast<uint32_t>(partition_id) << std::endl;
|
||||
}
|
||||
} else if (ret == RSMI_STATUS_SUCCESS && current_partition == "TPX") {
|
||||
ASSERT_LT(partition_id, max_loop);
|
||||
if (isVerbose) {
|
||||
std::cout << "\n\t**Confirmed partition_id < "
|
||||
<< max_loop << " for TPX"
|
||||
<< "\n\t**rsmi_dev_partition_id_get(" + std::to_string(i) +
|
||||
", &partition_id); partition_id = "
|
||||
<< static_cast<uint32_t>(partition_id) << std::endl;
|
||||
}
|
||||
} else if (ret == RSMI_STATUS_SUCCESS && current_partition == "QPX") {
|
||||
ASSERT_LT(partition_id, max_loop);
|
||||
if (isVerbose) {
|
||||
std::cout << "\n\t**Confirmed partition_id < "
|
||||
<< max_loop << " for QPX"
|
||||
<< "\n\t**rsmi_dev_partition_id_get(" + std::to_string(i) +
|
||||
", &partition_id); partition_id = "
|
||||
<< static_cast<uint32_t>(partition_id) << std::endl;
|
||||
}
|
||||
} else if (ret == RSMI_STATUS_SUCCESS && current_partition == "CPX") {
|
||||
ASSERT_LT(partition_id, max_loop);
|
||||
if (isVerbose) {
|
||||
std::cout << "\n\t**Confirmed partition_id < "
|
||||
<< max_loop << " for CPX"
|
||||
<< "\n\t**rsmi_dev_partition_id_get(" + std::to_string(i) +
|
||||
", &partition_id); partition_id = "
|
||||
<< static_cast<uint32_t>(partition_id) << std::endl;
|
||||
}
|
||||
} else if (ret == RSMI_STATUS_SUCCESS && current_partition == "UNKNOWN") {
|
||||
ASSERT_EQ(partition_id, max_loop - 1);
|
||||
if (isVerbose) {
|
||||
std::cout << "\n\t**Confirmed partition_id = "
|
||||
<< (max_loop - 1)
|
||||
<< " for current_partition = UNKNOWN"
|
||||
<< "\n\t**rsmi_dev_partition_id_get(" + std::to_string(i) +
|
||||
", &partition_id); partition_id = "
|
||||
<< static_cast<uint32_t>(partition_id) << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void TestComputePartitionReadWrite::Run(void) {
|
||||
rsmi_status_t ret, err;
|
||||
char orig_char_computePartition[255];
|
||||
orig_char_computePartition[0] = '\0';
|
||||
char current_char_computePartition[255];
|
||||
current_char_computePartition[0] = '\0';
|
||||
|
||||
TestBase::Run();
|
||||
if (setup_failed_) {
|
||||
std::cout << "** SetUp Failed for this test. Skipping.**" << std::endl;
|
||||
return;
|
||||
}
|
||||
bool isVerbose = (this->verbosity() &&
|
||||
this->verbosity() >= (this->TestBase::VERBOSE_STANDARD)) ? true: false;
|
||||
|
||||
// Confirm system supports compute partition, before executing wait
|
||||
ret = rsmi_dev_compute_partition_get(0, orig_char_computePartition, 255);
|
||||
if (ret == RSMI_STATUS_SUCCESS) {
|
||||
system_wait(15);
|
||||
}
|
||||
|
||||
// initial_num_devices - keep this value static, due to parition changes
|
||||
// fluctuating # of devices. We should end up with same # of devices at
|
||||
// end of test.
|
||||
uint32_t initial_num_devices = num_monitor_devs();
|
||||
for (uint32_t dv_ind = 0; dv_ind < initial_num_devices; ++dv_ind) {
|
||||
if (dv_ind >= 0) {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << std::endl;
|
||||
std::cout << "\t**"
|
||||
<< "========= LOOP THROUGH DEVICES - DEVICE #"
|
||||
<< std::to_string(dv_ind) << " =============="
|
||||
<< std::endl;
|
||||
}
|
||||
}
|
||||
PrintDeviceHeader(dv_ind);
|
||||
bool devicePartitionUpdated = false;
|
||||
|
||||
ret = rsmi_dev_partition_id_get(dv_ind, nullptr);
|
||||
ASSERT_EQ(ret, RSMI_STATUS_INVALID_ARGS);
|
||||
IF_VERB(STANDARD) {
|
||||
if (ret == RSMI_STATUS_INVALID_ARGS) {
|
||||
std::cout << "\t**" << "Confirmed rsmi_dev_partition_id_get(..,nullptr): "
|
||||
<< "RSMI_STATUS_INVALID_ARGS" << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
std::string partitionStr = "";
|
||||
ret = rsmi_dev_compute_partition_get(dv_ind, orig_char_computePartition, 255);
|
||||
if (ret == RSMI_STATUS_NOT_SUPPORTED) {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**rsmi_dev_compute_partition_get(): "
|
||||
<< "Not supported on this device"
|
||||
<< std::endl;
|
||||
}
|
||||
partitionStr = orig_char_computePartition;
|
||||
if (partitionStr.empty()) {
|
||||
partitionStr = computePartitionString(
|
||||
rsmi_compute_partition_type_t::RSMI_COMPUTE_PARTITION_INVALID);
|
||||
}
|
||||
// Regardless of partition support - no changes made, so no device
|
||||
// refresh needed (ie. rsmi_dev_compute_partition_set(..))
|
||||
checkPartitionIdChanges(dv_ind, partitionStr, isVerbose, false);
|
||||
continue;
|
||||
} else {
|
||||
CHK_ERR_ASRT(ret)
|
||||
std::string partitionStr = orig_char_computePartition;
|
||||
if (partitionStr.empty()) {
|
||||
partitionStr = computePartitionString(
|
||||
rsmi_compute_partition_type_t::RSMI_COMPUTE_PARTITION_INVALID);
|
||||
}
|
||||
// Regardless of partition support - no changes made, so no device
|
||||
// refresh needed (ie. rsmi_dev_compute_partition_set(..))
|
||||
checkPartitionIdChanges(dv_ind, partitionStr, isVerbose, false);
|
||||
}
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << std::endl << "\t**"
|
||||
@@ -236,32 +379,12 @@ void TestComputePartitionReadWrite::Run(void) {
|
||||
}
|
||||
}
|
||||
|
||||
// Verify api support checking functionality is working
|
||||
rsmi_compute_partition_type_t breakMe;
|
||||
err = rsmi_dev_compute_partition_set(dv_ind, breakMe);
|
||||
std::cout << "\t**rsmi_dev_compute_partition_set(null ptr): "
|
||||
<< amd::smi::getRSMIStatusString(err, false) << "\n";
|
||||
ASSERT_TRUE((err == RSMI_STATUS_INVALID_ARGS) ||
|
||||
(err == RSMI_STATUS_NOT_SUPPORTED) ||
|
||||
(err == RSMI_STATUS_PERMISSION));
|
||||
IF_VERB(STANDARD) {
|
||||
if (err == RSMI_STATUS_INVALID_ARGS) {
|
||||
std::cout << "\t**"
|
||||
<< "Confirmed RSMI_STATUS_INVALID_ARGS was returned."
|
||||
<< std::endl;
|
||||
} else if (err == RSMI_STATUS_PERMISSION) {
|
||||
DISPLAY_RSMI_ERR(err)
|
||||
// tests should not continue if err is a permission issue
|
||||
ASSERT_FALSE(err == RSMI_STATUS_PERMISSION);
|
||||
} else {
|
||||
DISPLAY_RSMI_ERR(err)
|
||||
}
|
||||
}
|
||||
|
||||
// Re-run original get, so we can reset to later
|
||||
ret = rsmi_dev_compute_partition_get(dv_ind, orig_char_computePartition,
|
||||
255);
|
||||
ASSERT_EQ(RSMI_STATUS_SUCCESS, ret);
|
||||
std::cout << "\t**rsmi_dev_compute_partition_get(" << dv_ind
|
||||
<< ", " << orig_char_computePartition << ")\n";
|
||||
|
||||
/**
|
||||
* RSMI_COMPUTE_PARTITION_INVALID = 0,
|
||||
@@ -277,8 +400,27 @@ void TestComputePartitionReadWrite::Run(void) {
|
||||
* //!< work together with shared memory
|
||||
*/
|
||||
|
||||
for (int partition = static_cast<int>(RSMI_COMPUTE_PARTITION_CPX);
|
||||
partition <= static_cast<int>(RSMI_COMPUTE_PARTITION_QPX);
|
||||
/**
|
||||
* General Loop Logic:
|
||||
* [0:SPX, 1:SPX, 2:SPX, 3:SPX]
|
||||
* [0:DPX, 1:DPX, 2: SPX, 3:SPX, 4:SPX] <- set 0 to DPX
|
||||
* [0:TPX, 1:TPX, 2:TPX, 3:SPX, 4:SPX, 5:SPX] <- set 0 to TPX
|
||||
* [0:QPX, 1:QPX, 2:QPX, 3:QPX, 4:SPX, 5:SPX, 6:SPX] <- set 0 to TPX
|
||||
* [0:CPX, 1:CPX, 2:CPX, 3:CPX, 4:CPX, 5:SPX, 6:SPX, 7:SPX] <- set 0 to CPX
|
||||
* [0:SPX, 1:SPX, 2:SPX, 3:SPX] <- reset(0)
|
||||
* +1 index
|
||||
* [0:SPX, 1:SPX, 2:SPX, 3:SPX]
|
||||
* [0:SPX, 1:DPX, 2: DPX, 3:SPX, 4:SPX] <- set 1 to DPX
|
||||
* [0:SPX, 1:TPX, 2:TPX, 3:TPX, 4:SPX, 5:SPX] <- set 1 to TPX
|
||||
* [0:SPX, 1:QPX, 2:QPX, 3:QPX, 4:QPX, 5:SPX, 6:SPX] <- set 1 to TPX
|
||||
* [0:SPX, 1:CPX, 2:CPX, 3:CPX, 4:CPX, 5:CPX, 6:SPX, 7:SPX] <- set 1 to CPX
|
||||
* [0:SPX, 1:SPX, 2:SPX, 3:SPX] <- reset(1)
|
||||
* ...
|
||||
*
|
||||
*/
|
||||
std::string final_partition_state = "UNKNOWN";
|
||||
for (int partition = static_cast<int>(RSMI_COMPUTE_PARTITION_SPX);
|
||||
partition <= static_cast<int>(RSMI_COMPUTE_PARTITION_CPX);
|
||||
partition++) {
|
||||
rsmi_compute_partition_type_t updatePartition
|
||||
= static_cast<rsmi_compute_partition_type_t>(partition);
|
||||
@@ -292,7 +434,8 @@ void TestComputePartitionReadWrite::Run(void) {
|
||||
ret = rsmi_dev_compute_partition_set(dv_ind, updatePartition);
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**"
|
||||
<< "rsmi_dev_compute_partition_set(dv_ind, updatePartition): "
|
||||
<< "rsmi_dev_compute_partition_set(" << dv_ind
|
||||
<< ", updatePartition): "
|
||||
<< amd::smi::getRSMIStatusString(ret, false) << "\n"
|
||||
<< "\t**New Partition (set): "
|
||||
<< computePartitionString(updatePartition) << "\n";
|
||||
@@ -341,6 +484,7 @@ void TestComputePartitionReadWrite::Run(void) {
|
||||
if (strcmp(orig_char_computePartition, current_char_computePartition) !=
|
||||
0) {
|
||||
devicePartitionUpdated = true;
|
||||
final_partition_state = current_char_computePartition;
|
||||
} else {
|
||||
devicePartitionUpdated = false;
|
||||
}
|
||||
@@ -356,8 +500,13 @@ void TestComputePartitionReadWrite::Run(void) {
|
||||
<< computePartitionString(updatePartition) << ")"
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
checkPartitionIdChanges(dv_ind, computePartitionString(updatePartition),
|
||||
isVerbose, true);
|
||||
}
|
||||
} // END looping through partition changes
|
||||
std::cout << "\t**=========== END PARTITION LOOP (dev = "
|
||||
<< std::to_string(dv_ind) << ") ===========\n";
|
||||
|
||||
/* TEST RETURN TO BOOT COMPUTE PARTITION SETTING */
|
||||
IF_VERB(STANDARD) {
|
||||
@@ -371,7 +520,7 @@ void TestComputePartitionReadWrite::Run(void) {
|
||||
ret = rsmi_dev_compute_partition_reset(dv_ind);
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**"
|
||||
<< "rsmi_dev_compute_partition_reset(dv_ind): "
|
||||
<< "rsmi_dev_compute_partition_reset(" << dv_ind << "): "
|
||||
<< amd::smi::getRSMIStatusString(ret, false) << "\n";
|
||||
}
|
||||
ASSERT_TRUE((ret == RSMI_STATUS_SUCCESS) ||
|
||||
@@ -393,6 +542,14 @@ void TestComputePartitionReadWrite::Run(void) {
|
||||
<< "\t**" << "Partitions Updated: "
|
||||
<< (devicePartitionUpdated ? "TRUE" : "FALSE") << "\n";
|
||||
}
|
||||
|
||||
if (final_partition_state != std::string(current_char_computePartition)) {
|
||||
checkPartitionIdChanges(dv_ind, std::string(current_char_computePartition),
|
||||
isVerbose, true);
|
||||
} else {
|
||||
checkPartitionIdChanges(dv_ind, std::string(current_char_computePartition),
|
||||
isVerbose, false);
|
||||
}
|
||||
if (wasResetSuccess && devicePartitionUpdated) {
|
||||
ASSERT_STRNE(oldPartition.c_str(), current_char_computePartition);
|
||||
IF_VERB(STANDARD) {
|
||||
@@ -401,6 +558,7 @@ void TestComputePartitionReadWrite::Run(void) {
|
||||
<< "equal to current\n\t partition ("
|
||||
<< current_char_computePartition << ")" << std::endl;
|
||||
}
|
||||
final_partition_state = std::string(current_char_computePartition);
|
||||
} else {
|
||||
ASSERT_STREQ(oldPartition.c_str(), current_char_computePartition);
|
||||
IF_VERB(STANDARD) {
|
||||
@@ -424,7 +582,12 @@ void TestComputePartitionReadWrite::Run(void) {
|
||||
ret = rsmi_dev_compute_partition_set(dv_ind, newPartition);
|
||||
CHK_ERR_ASRT(ret)
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**" << "Returning compute partition to: "
|
||||
std::cout << "\t**"
|
||||
<< "rsmi_dev_compute_partition_set("
|
||||
<< std::to_string(dv_ind) << ", "
|
||||
<< std::string(orig_char_computePartition) << ")" << std::endl;
|
||||
std::cout << "\t**"
|
||||
<< "Returning compute partition to: "
|
||||
<< computePartitionString(newPartition) << std::endl;
|
||||
}
|
||||
ret = rsmi_dev_compute_partition_get(dv_ind, current_char_computePartition,
|
||||
@@ -440,5 +603,20 @@ void TestComputePartitionReadWrite::Run(void) {
|
||||
ASSERT_EQ(RSMI_STATUS_SUCCESS, ret);
|
||||
ASSERT_STREQ(computePartitionString(newPartition).c_str(),
|
||||
current_char_computePartition);
|
||||
|
||||
// only refresh (rsmi_shut_down() -> rsmi_init(0)) device list
|
||||
// if there was a partition change
|
||||
if (final_partition_state != std::string(current_char_computePartition)) {
|
||||
checkPartitionIdChanges(dv_ind, computePartitionString(newPartition),
|
||||
isVerbose, true);
|
||||
} else {
|
||||
checkPartitionIdChanges(dv_ind, computePartitionString(newPartition),
|
||||
isVerbose, false);
|
||||
}
|
||||
std::cout << "\t**"
|
||||
<< "========= END LOOP THROUGH DEVICES - DEVICE #"
|
||||
<< std::to_string(dv_ind) << " =============="
|
||||
<< std::endl;
|
||||
} // END looping through devices
|
||||
std::cout << "\t**=========== END TEST ===========\n";
|
||||
}
|
||||
|
||||
@@ -229,7 +229,7 @@ const char *FreqEnumToStr(rsmi_clk_type rsmi_clk) {
|
||||
}
|
||||
|
||||
void printRSMIError(rsmi_status_t err) {
|
||||
std::cout << "err = " << amd::smi::getRSMIStatusString(err);
|
||||
std::cout << "err = " << amd::smi::getRSMIStatusString(err) << "\n";
|
||||
}
|
||||
|
||||
#if ENABLE_SMI
|
||||
|
||||
Reference in New Issue
Block a user