Merge amd-staging into amd-master 20231116
This merge skips Ibdaf031be9d916020b4049544dbd725858c7711d as that change introduces a bug in gpu-metrics Change-Id: Ied8447affd5ed3c847734d75517b04c073dc44b4 Signed-off-by: Galantsev, Dmitrii <dmitrii.galantsev@amd.com>
Este cometimento está contido em:
+2
-2
@@ -35,7 +35,7 @@ find_program (GIT NAMES git)
|
||||
|
||||
## Setup the package version based on git tags.
|
||||
set(PKG_VERSION_GIT_TAG_PREFIX "rsmi_pkg_ver")
|
||||
get_package_version_number("6.0.0" ${PKG_VERSION_GIT_TAG_PREFIX} GIT)
|
||||
get_package_version_number("6.1.0" ${PKG_VERSION_GIT_TAG_PREFIX} GIT)
|
||||
message("Package version: ${PKG_VERSION_STR}")
|
||||
set(${ROCM_SMI_LIBS_TARGET}_VERSION_MAJOR "${VERSION_MAJOR}")
|
||||
set(${ROCM_SMI_LIBS_TARGET}_VERSION_MINOR "${VERSION_MINOR}")
|
||||
@@ -72,7 +72,7 @@ endif()
|
||||
|
||||
## Compiler flags
|
||||
set(CMAKE_CXX_FLAGS
|
||||
"${CMAKE_CXX_FLAGS} -Wall -Wextra -fno-rtti -std=c++17")
|
||||
"${CMAKE_CXX_FLAGS} -Wall -Wextra -fno-rtti")
|
||||
if (${CMAKE_HOST_SYSTEM_PROCESSOR} STREQUAL "x86_64")
|
||||
set(CMAKE_CXX_FLAGS
|
||||
"${CMAKE_CXX_FLAGS} -m64 -msse -msse2")
|
||||
|
||||
@@ -363,16 +363,16 @@ typedef rsmi_clk_type_t rsmi_clk_type;
|
||||
*/
|
||||
typedef enum {
|
||||
RSMI_COMPUTE_PARTITION_INVALID = 0,
|
||||
RSMI_COMPUTE_PARTITION_CPX = 1, //!< Core mode (CPX)- Per-chip XCC with
|
||||
//!< shared memory
|
||||
RSMI_COMPUTE_PARTITION_SPX = 2, //!< Single GPU mode (SPX)- All XCCs work
|
||||
//!< together with shared memory
|
||||
RSMI_COMPUTE_PARTITION_DPX = 3, //!< Dual GPU mode (DPX)- Half XCCs work
|
||||
//!< together with shared memory
|
||||
RSMI_COMPUTE_PARTITION_TPX = 4, //!< Triple GPU mode (TPX)- One-third XCCs
|
||||
//!< work together with shared memory
|
||||
RSMI_COMPUTE_PARTITION_QPX = 5, //!< Quad GPU mode (QPX)- Quarter XCCs
|
||||
//!< work together with shared memory
|
||||
RSMI_COMPUTE_PARTITION_CPX, //!< Core mode (CPX)- Per-chip XCC with
|
||||
//!< shared memory
|
||||
RSMI_COMPUTE_PARTITION_SPX, //!< Single GPU mode (SPX)- All XCCs work
|
||||
//!< together with shared memory
|
||||
RSMI_COMPUTE_PARTITION_DPX, //!< Dual GPU mode (DPX)- Half XCCs work
|
||||
//!< together with shared memory
|
||||
RSMI_COMPUTE_PARTITION_TPX, //!< Triple GPU mode (TPX)- One-third XCCs
|
||||
//!< work together with shared memory
|
||||
RSMI_COMPUTE_PARTITION_QPX, //!< Quad GPU mode (QPX)- Quarter XCCs
|
||||
//!< work together with shared memory
|
||||
} rsmi_compute_partition_type_t;
|
||||
/// \cond Ignore in docs.
|
||||
typedef rsmi_compute_partition_type_t rsmi_compute_partition_type;
|
||||
@@ -680,8 +680,8 @@ typedef enum {
|
||||
*/
|
||||
typedef enum _RSMI_IO_LINK_TYPE {
|
||||
RSMI_IOLINK_TYPE_UNDEFINED = 0, //!< unknown type.
|
||||
RSMI_IOLINK_TYPE_PCIEXPRESS = 1, //!< PCI Express
|
||||
RSMI_IOLINK_TYPE_XGMI = 2, //!< XGMI
|
||||
RSMI_IOLINK_TYPE_PCIEXPRESS, //!< PCI Express
|
||||
RSMI_IOLINK_TYPE_XGMI, //!< XGMI
|
||||
RSMI_IOLINK_TYPE_NUMIOLINKTYPES, //!< Number of IO Link types
|
||||
RSMI_IOLINK_TYPE_SIZE = 0xFFFFFFFF //!< Max of IO Link types
|
||||
} RSMI_IO_LINK_TYPE;
|
||||
@@ -1503,6 +1503,23 @@ rsmi_status_t rsmi_dev_subsystem_vendor_id_get(uint32_t dv_ind, uint16_t *id);
|
||||
*/
|
||||
rsmi_status_t rsmi_dev_unique_id_get(uint32_t dv_ind, uint64_t *id);
|
||||
|
||||
/**
|
||||
* @brief Get the XGMI physical id associated with the device
|
||||
*
|
||||
* @details Given a device index @p dv_ind and a pointer to a uint32_t to
|
||||
* which the XGMI physical id will be written
|
||||
*
|
||||
* @param[in] dv_ind a device index
|
||||
*
|
||||
* @param[inout] revision a pointer to uint32_t to which the XGMI physical id
|
||||
* will be written
|
||||
*
|
||||
* @retval ::RSMI_STATUS_SUCCESS is returned upon successful call.
|
||||
*
|
||||
*/
|
||||
rsmi_status_t rsmi_dev_xgmi_physical_id_get(uint32_t dv_ind, uint16_t *id);
|
||||
|
||||
|
||||
/** @} */ // end of IDQuer
|
||||
|
||||
/*****************************************************************************/
|
||||
|
||||
@@ -103,6 +103,7 @@ enum DevInfoTypes {
|
||||
kDevOverDriveLevel,
|
||||
kDevMemOverDriveLevel,
|
||||
kDevDevID,
|
||||
kDevXGMIPhysicalID,
|
||||
kDevDevRevID,
|
||||
kDevDevProdName,
|
||||
kDevDevProdNum,
|
||||
|
||||
+15
-3
@@ -836,6 +836,21 @@ rsmi_dev_id_get(uint32_t dv_ind, uint16_t *id) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
rsmi_status_t
|
||||
rsmi_dev_xgmi_physical_id_get(uint32_t dv_ind, uint16_t *id) {
|
||||
std::ostringstream ss;
|
||||
rsmi_status_t ret;
|
||||
ss << __PRETTY_FUNCTION__ << "| ======= start =======";
|
||||
LOG_TRACE(ss);
|
||||
CHK_SUPPORT_NAME_ONLY(id)
|
||||
|
||||
ret = get_id(dv_ind, amd::smi::kDevXGMIPhysicalID, id);
|
||||
ss << __PRETTY_FUNCTION__ << " | ======= end ======="
|
||||
<< ", reporting " << amd::smi::getRSMIStatusString(ret);
|
||||
LOG_TRACE(ss);
|
||||
return ret;
|
||||
}
|
||||
|
||||
rsmi_status_t
|
||||
rsmi_dev_revision_get(uint32_t dv_ind, uint16_t *revision) {
|
||||
std::ostringstream outss;
|
||||
@@ -2776,9 +2791,6 @@ rsmi_dev_od_volt_info_get(uint32_t dv_ind, rsmi_od_volt_freq_data_t *odv) {
|
||||
ss << __PRETTY_FUNCTION__ << "| ======= start =======";
|
||||
LOG_TRACE(ss);
|
||||
DEVICE_MUTEX
|
||||
if (odv == nullptr) {
|
||||
return RSMI_STATUS_INVALID_ARGS;
|
||||
}
|
||||
CHK_SUPPORT_NAME_ONLY(odv)
|
||||
rsmi_status_t ret = get_od_clk_volt_info(dv_ind, odv);
|
||||
|
||||
|
||||
@@ -82,6 +82,7 @@ static const char *kDevPerfLevelFName = "power_dpm_force_performance_level";
|
||||
static const char *kDevDevProdNameFName = "product_name";
|
||||
static const char *kDevDevProdNumFName = "product_number";
|
||||
static const char *kDevDevIDFName = "device";
|
||||
static const char* kDevXGMIPhysicalIDFName = "xgmi_physical_id";
|
||||
static const char *kDevDevRevIDFName = "revision";
|
||||
static const char *kDevVendorIDFName = "vendor";
|
||||
static const char *kDevSubSysDevIDFName = "subsystem_device";
|
||||
@@ -238,6 +239,7 @@ static const std::map<DevInfoTypes, const char *> kDevAttribNameMap = {
|
||||
{kDevDevProdName, kDevDevProdNameFName},
|
||||
{kDevDevProdNum, kDevDevProdNumFName},
|
||||
{kDevDevID, kDevDevIDFName},
|
||||
{kDevXGMIPhysicalID, kDevXGMIPhysicalIDFName},
|
||||
{kDevDevRevID, kDevDevRevIDFName},
|
||||
{kDevVendorID, kDevVendorIDFName},
|
||||
{kDevSubSysDevID, kDevSubSysDevIDFName},
|
||||
@@ -379,6 +381,7 @@ static const std::map<const char *, dev_depends_t> kDevFuncDependsMap = {
|
||||
// Functions with only mandatory dependencies
|
||||
{"rsmi_dev_vram_vendor_get", {{kDevVramVendorFName}, {}}},
|
||||
{"rsmi_dev_id_get", {{kDevDevIDFName}, {}}},
|
||||
{"rsmi_dev_xgmi_physical_id_get", {{kDevXGMIPhysicalIDFName}, {}}},
|
||||
{"rsmi_dev_revision_get", {{kDevDevRevIDFName}, {}}},
|
||||
{"rsmi_dev_vendor_id_get", {{kDevVendorIDFName}, {}}},
|
||||
{"rsmi_dev_name_get", {{kDevVendorIDFName,
|
||||
@@ -956,6 +959,7 @@ int Device::readDevInfo(DevInfoTypes type, uint64_t *val) {
|
||||
case kDevSubSysVendorID:
|
||||
case kDevVendorID:
|
||||
case kDevErrCntFeatures:
|
||||
case kDevXGMIPhysicalID:
|
||||
ret = readDevInfoStr(type, &tempStr);
|
||||
RET_IF_NONZERO(ret);
|
||||
|
||||
@@ -1102,6 +1106,7 @@ int Device::readDevInfo(DevInfoTypes type, std::string *val) {
|
||||
case kDevComputePartition:
|
||||
case kDevMemoryPartition:
|
||||
case kDevNumaNode:
|
||||
case kDevXGMIPhysicalID:
|
||||
return readDevInfoStr(type, val);
|
||||
break;
|
||||
|
||||
|
||||
+25
-4
@@ -53,6 +53,7 @@
|
||||
#include <functional>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <algorithm>
|
||||
#include <set>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
@@ -85,6 +86,7 @@ amd::smi::RocmSMI::devInfoTypesStrings = {
|
||||
{amd::smi::kDevOverDriveLevel, amdSMI + "kDevOverDriveLevel"},
|
||||
{amd::smi::kDevMemOverDriveLevel, amdSMI + "kDevMemOverDriveLevel"},
|
||||
{amd::smi::kDevDevID, amdSMI + "kDevDevID"},
|
||||
{amd::smi::kDevXGMIPhysicalID, amdSMI + "kDevXGMIPhysicalID"},
|
||||
{amd::smi::kDevDevRevID, amdSMI + "kDevDevRevID"},
|
||||
{amd::smi::kDevDevProdName, amdSMI + "kDevDevProdName"},
|
||||
{amd::smi::kDevDevProdNum, amdSMI + "kDevDevProdNum"},
|
||||
@@ -383,9 +385,28 @@ RocmSMI::Initialize(uint64_t flags) {
|
||||
<< "\n | final update: device->bdfid() holds correct device bdf";
|
||||
LOG_TRACE(ss);
|
||||
}
|
||||
if (ret != 0) {
|
||||
throw amd::smi::rsmi_exception(RSMI_INITIALIZATION_ERROR,
|
||||
"Failed to initialize rocm_smi library (amdgpu node discovery).");
|
||||
|
||||
std::shared_ptr<amd::smi::Device> dev;
|
||||
// Sort index based on the BDF, collect BDF id firstly.
|
||||
std::vector<std::pair<uint64_t, std::shared_ptr<amd::smi::Device>>> dv_to_id;
|
||||
dv_to_id.reserve(devices_.size());
|
||||
for (uint32_t dv_ind = 0; dv_ind < devices_.size(); ++dv_ind) {
|
||||
dev = devices_[dv_ind];
|
||||
uint64_t bdfid = dev->bdfid();
|
||||
dv_to_id.push_back({bdfid, dev});
|
||||
}
|
||||
ss << __PRETTY_FUNCTION__ << " Sort index based on BDF.";
|
||||
LOG_DEBUG(ss);
|
||||
|
||||
// Stable sort to keep the order if bdf is equal.
|
||||
std::stable_sort(dv_to_id.begin(), dv_to_id.end(), []
|
||||
(const std::pair<uint64_t, std::shared_ptr<amd::smi::Device>>& p1,
|
||||
const std::pair<uint64_t, std::shared_ptr<amd::smi::Device>>& p2) {
|
||||
return p1.first < p2.first;
|
||||
});
|
||||
devices_.clear();
|
||||
for (uint32_t dv_ind = 0; dv_ind < dv_to_id.size(); ++dv_ind) {
|
||||
devices_.push_back(dv_to_id[dv_ind].second);
|
||||
}
|
||||
|
||||
std::map<uint64_t, std::shared_ptr<KFDNode>> tmp_map;
|
||||
@@ -406,7 +427,6 @@ RocmSMI::Initialize(uint64_t flags) {
|
||||
for (it = io_link_map_tmp.begin(); it != io_link_map_tmp.end(); it++)
|
||||
io_link_map_[it->first] = it->second;
|
||||
|
||||
std::shared_ptr<amd::smi::Device> dev;
|
||||
|
||||
// Remove any drm nodes that don't have a corresponding readable kfd node.
|
||||
// kfd nodes will not be added if their properties file is not readable.
|
||||
@@ -451,6 +471,7 @@ RocmSMI::Initialize(uint64_t flags) {
|
||||
if (ROCmLogging::Logger::getInstance()->isLoggerEnabled()) {
|
||||
logSystemDetails();
|
||||
}
|
||||
|
||||
// Leaving below to help debug temp file issues
|
||||
// displayAppTmpFilesContent();
|
||||
std::string amdGPUDeviceList = displayAllDevicePaths(devices_);
|
||||
|
||||
Criar uma nova questão referindo esta
Bloquear um utilizador