diff --git a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi.h b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi.h index ed7836cdd2..d20c480a35 100755 --- a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi.h +++ b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi.h @@ -979,6 +979,33 @@ rsmi_status_t rsmi_num_monitor_devices(uint32_t *num_devices); */ rsmi_status_t rsmi_dev_id_get(uint32_t dv_ind, uint16_t *id); + +/** + * @brief Get the SKU for a desired device associated with the device with + * provided device index. + * + * @details Given a device index @p dv_ind and a pointer to a char @p sku, + * this function will attempt to obtain the SKU from the Product Information + * FRU chip, present on server ASICs. It will write the sku value to the + * char array pointed to by @p sku. + * + * @param[in] dv_ind a device index + * + * @param[inout] sku a pointer to char to which the sku will be written + * + * If this parameter is nullptr, this function will return + * ::RSMI_STATUS_INVALID_ARGS if the function is supported with the provided, + * arguments and ::RSMI_STATUS_NOT_SUPPORTED if it is not supported with the + * provided arguments. + * + * @retval ::RSMI_STATUS_SUCCESS call was successful + * @retval ::RSMI_STATUS_NOT_SUPPORTED installed software or hardware does not + * support this function with the given arguments + * @retval ::RSMI_STATUS_INVALID_ARGS the provided arguments are not valid + * + */ +rsmi_status_t rsmi_dev_sku_get(uint32_t dv_ind, char *sku); + /** * @brief Get the device vendor id associated with the device with provided * device index. diff --git a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_device.h b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_device.h index 105e0cb44a..f5100f926b 100755 --- a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_device.h +++ b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_device.h @@ -99,6 +99,8 @@ enum DevInfoTypes { kDevPerfLevel, kDevOverDriveLevel, kDevDevID, + kDevDevProdName, + kDevDevProdNum, kDevVendorID, kDevSubSysDevID, kDevSubSysVendorID, diff --git a/projects/rocm-smi-lib/python_smi_tools/rocm_smi.py b/projects/rocm-smi-lib/python_smi_tools/rocm_smi.py index 26087fd83d..555f92f1c6 100755 --- a/projects/rocm-smi-lib/python_smi_tools/rocm_smi.py +++ b/projects/rocm-smi-lib/python_smi_tools/rocm_smi.py @@ -1621,13 +1621,15 @@ def showProductName(deviceList): device_model = model.value.decode() printLog(device, 'Card model', '\t\t' + device_model) printLog(device, 'Card vendor', '\t\t' + device_vendor) - # Retrieve the device SKU from VBIOS - ret = rocmsmi.rsmi_dev_vbios_version_get(device, vbios, 256) - if rsmi_ret_ok(ret, device) and vbios.value.decode(): - # Device SKU is just 6 characters after the first occurance of '-' in vbios_version - # TODO: Use Product Info sysfs where possible - device_sku = vbios.value.decode().split('-')[1][:6] - printLog(device, 'Card SKU', '\t\t' + device_sku) + # Retrieve the SKU + ret = rocmsmi.rsmi_dev_sku_get(device, device_sku, 256) + if not rsmi_ret_ok(ret, device) or not device_sku.value.decode(): + # Retrieve the device SKU from VBIOS if product_number doesn't exist + ret = rocmsmi.rsmi_dev_vbios_version_get(device, vbios, 256) + if rsmi_ret_ok(ret, device) and vbios.value.decode(): + # Device SKU is just 6 characters after the first occurance of '-' in vbios_version + device_sku = vbios.value.decode().split('-')[1][:6] + printLog(device, 'Card SKU', '\t\t' + device_sku) else: printLog(device, 'Incompatible device.\n' \ 'GPU[%s]\t\t: Expected vendor name: Advanced Micro Devices, Inc. [AMD/ATI]\n'\ diff --git a/projects/rocm-smi-lib/src/rocm_smi.cc b/projects/rocm-smi-lib/src/rocm_smi.cc index e12c502d21..b7d5ddd8ce 100755 --- a/projects/rocm-smi-lib/src/rocm_smi.cc +++ b/projects/rocm-smi-lib/src/rocm_smi.cc @@ -722,6 +722,15 @@ rsmi_dev_id_get(uint32_t dv_ind, uint16_t *id) { return get_id(dv_ind, amd::smi::kDevDevID, id); } +rsmi_status_t +rsmi_dev_sku_get(uint32_t dv_ind, uint16_t *id) { + TRY + CHK_SUPPORT_NAME_ONLY(id) + DEVICE_MUTEX + return get_id(dv_ind, amd::smi::kDevDevProdNum, id); + CATCH +} + rsmi_status_t rsmi_dev_subsystem_id_get(uint32_t dv_ind, uint16_t *id) { CHK_SUPPORT_NAME_ONLY(id) @@ -1530,6 +1539,25 @@ static rsmi_status_t get_backup_name(uint16_t id, char *name, size_t len) { return RSMI_STATUS_SUCCESS; } +static rsmi_status_t get_dev_name_from_file(uint32_t dv_ind, char *name, + size_t len) { + std::string val_str; + rsmi_status_t ret = get_dev_value_line(amd::smi::kDevDevProdName, dv_ind, &val_str); + + if (ret != 0) { + return amd::smi::ErrnoToRsmiStatus(ret); + } + size_t ct = val_str.copy(name, len); + + name[std::min(len - 1, ct)] = '\0'; + + if (len < (val_str.size() + 1)) { + return RSMI_STATUS_INSUFFICIENT_SIZE; + } + + return RSMI_STATUS_SUCCESS; +} + // Parse pci.ids files. Comment lines have # in first column. Otherwise, // Syntax: // vendor vendor_name @@ -1687,7 +1715,11 @@ rsmi_dev_name_get(uint32_t dv_ind, char *name, size_t len) { DEVICE_MUTEX - ret = get_dev_name_from_id(dv_ind, name, len, NAME_STR_DEVICE); + ret = get_dev_name_from_file(dv_ind, name, len); + + if (ret) { + ret = get_dev_name_from_id(dv_ind, name, len, NAME_STR_DEVICE); + } return ret; CATCH diff --git a/projects/rocm-smi-lib/src/rocm_smi_device.cc b/projects/rocm-smi-lib/src/rocm_smi_device.cc index faa4f277f0..3b00390ce3 100755 --- a/projects/rocm-smi-lib/src/rocm_smi_device.cc +++ b/projects/rocm-smi-lib/src/rocm_smi_device.cc @@ -72,6 +72,8 @@ namespace smi { // Device sysfs file names static const char *kDevPerfLevelFName = "power_dpm_force_performance_level"; +static const char *kDevDevProdNameFName = "product_name"; +static const char *kDevDevProdNumFName = "product_number"; static const char *kDevDevIDFName = "device"; static const char *kDevVendorIDFName = "vendor"; static const char *kDevSubSysDevIDFName = "subsystem_device"; @@ -212,6 +214,8 @@ static const char *kDevPerfLevelUnknownStr = "unknown"; static const std::map kDevAttribNameMap = { {kDevPerfLevel, kDevPerfLevelFName}, {kDevOverDriveLevel, kDevOverDriveLevelFName}, + {kDevDevProdName, kDevDevProdNameFName}, + {kDevDevProdNum, kDevDevProdNumFName}, {kDevDevID, kDevDevIDFName}, {kDevVendorID, kDevVendorIDFName}, {kDevSubSysDevID, kDevSubSysDevIDFName}, @@ -338,7 +342,8 @@ static const std::map kDevFuncDependsMap = { {"rsmi_dev_vendor_id_get", {{kDevVendorIDFName}, {}}}, {"rsmi_dev_name_get", {{kDevVendorIDFName, - kDevDevIDFName}, {}}}, + kDevDevIDFName}, {}}}, + {"rsmi_dev_sku_get", {{kDevDevProdNumFName}, {}}}, {"rsmi_dev_brand_get", {{kDevVendorIDFName}, {}}}, {"rsmi_dev_vendor_name_get", {{kDevVendorIDFName}, {}}}, {"rsmi_dev_serial_number_get", {{kDevSerialNumberFName}, {}}}, @@ -798,6 +803,8 @@ int Device::readDevInfo(DevInfoTypes type, std::string *val) { case kDevPerfLevel: case kDevUsage: case kDevOverDriveLevel: + case kDevDevProdName: + case kDevDevProdNum: case kDevDevID: case kDevSubSysDevID: case kDevSubSysVendorID: