[SWDEV-529889] Fixed incorrect vendor_id reporting in amdsmi_get_gpu_asic_info

Signed-off-by: gabrpham_amdeng <Gabriel.Pham@amd.com>


[ROCm/amdsmi commit: 1ab57ce7dd]
Этот коммит содержится в:
gabrpham_amdeng
2025-04-28 14:46:11 -05:00
коммит произвёл Maisam Arif
родитель 597edb6eb8
Коммит 640eba7764
2 изменённых файлов: 63 добавлений и 28 удалений
+27 -1
Просмотреть файл
@@ -1012,11 +1012,37 @@ rsmi_dev_subsystem_id_get(uint32_t dv_ind, uint16_t *id) {
rsmi_status_t
rsmi_dev_vendor_id_get(uint32_t dv_ind, uint16_t *id) {
// need to get this to fall back to kfd if sysfs doesn't work
std::ostringstream ss;
ss << __PRETTY_FUNCTION__ << "| ======= start =======";
LOG_TRACE(ss);
CHK_SUPPORT_NAME_ONLY(id)
return get_id(dv_ind, amd::smi::kDevVendorID, id);
rsmi_status_t status = get_id(dv_ind, amd::smi::kDevVendorID, id);
if (status != RSMI_STATUS_SUCCESS)
{
// /sys/class/kfd/kfd/topology/nodes/*/properties
GET_DEV_AND_KFDNODE_FROM_INDX
uint32_t node_id;
uint64_t kfd_vendor_id;
int ret_kfd = kfd_node->get_node_id(&node_id);
ret_kfd = amd::smi::read_node_properties(node_id, "vendor_id", &kfd_vendor_id);
if (ret_kfd == 0) {
*id = kfd_vendor_id;
status = RSMI_STATUS_SUCCESS;
} else {
*id = std::numeric_limits<uint16_t>::max();
status = RSMI_STATUS_NOT_SUPPORTED;
}
ss << __PRETTY_FUNCTION__
<< " | Issue: Could not read device from sysfs, falling back to KFD" << "\n"
<< " ; Device #: " << std::to_string(dv_ind) << "\n"
<< " ; ret_kfd: " << std::to_string(ret_kfd) << "\n"
<< " ; node: " << std::to_string(node_id) << "\n"
<< " ; Data: vendor_id (from KFD)= " << std::to_string(*id) << "\n"
<< " ; ret = " << getRSMIStatusString(status, false);
LOG_DEBUG(ss);
}
return status;
}
rsmi_status_t
+36 -27
Просмотреть файл
@@ -1353,35 +1353,44 @@ amdsmi_get_gpu_asic_info(amdsmi_processor_handle processor_handle, amdsmi_asic_i
amdsmi_status_t status;
amd::smi::AMDSmiSystem::getInstance().init_drm();
if (gpu_device->check_if_drm_is_supported()) {
status = gpu_device->amdgpu_query_info(AMDGPU_INFO_DEV_INFO,
sizeof(struct drm_amdgpu_info_device), &dev_info);
ss << __PRETTY_FUNCTION__
<< " | amdgpu_query_info(): "
<< smi_amdgpu_get_status_string(status, true);
LOG_INFO(ss);
if (status != AMDSMI_STATUS_SUCCESS) {
amd::smi::AMDSmiSystem::getInstance().clean_up_drm();
return status;
}
SMIGPUDEVICE_MUTEX(gpu_device->get_mutex())
status = smi_amdgpu_get_market_name_from_dev_id(gpu_device, info->market_name);
if (status != AMDSMI_STATUS_SUCCESS) {
rsmi_wrapper(rsmi_dev_brand_get, processor_handle, 0,
info->market_name, AMDSMI_256_LENGTH);
}
// removing drm check for now due to drm issues
// if (gpu_device->check_if_drm_is_supported()) {
// status = gpu_device->amdgpu_query_info(AMDGPU_INFO_DEV_INFO,
// sizeof(struct drm_amdgpu_info_device), &dev_info);
// ss << __PRETTY_FUNCTION__
// << " | amdgpu_query_info(): "
// << smi_amdgpu_get_status_string(status, true);
// LOG_INFO(ss);
// if (status != AMDSMI_STATUS_SUCCESS) {
// amd::smi::AMDSmiSystem::getInstance().clean_up_drm();
// return status;
// }
// SMIGPUDEVICE_MUTEX(gpu_device->get_mutex())
// status = smi_amdgpu_get_market_name_from_dev_id(gpu_device, info->market_name);
// if (status != AMDSMI_STATUS_SUCCESS) {
// rsmi_wrapper(rsmi_dev_brand_get, processor_handle, 0,
// info->market_name, AMDSMI_256_LENGTH);
// }
info->device_id = dev_info.device_id;
info->rev_id = dev_info.pci_rev;
info->vendor_id = gpu_device->get_vendor_id();
} else {
status = rsmi_wrapper(rsmi_dev_brand_get, processor_handle, 0,
info->market_name, AMDSMI_256_LENGTH);
// info->device_id = dev_info.device_id;
// info->rev_id = dev_info.pci_rev;
// info->vendor_id = gpu_device->get_vendor_id();
// } else {
uint16_t device_id = std::numeric_limits<uint16_t>::max();
status = rsmi_wrapper(rsmi_dev_id_get, processor_handle, 0, &device_id);
info->device_id = static_cast<uint64_t>(device_id);
status = rsmi_wrapper(rsmi_dev_vendor_id_get, processor_handle, 0,
&vendor_id);
if (status == AMDSMI_STATUS_SUCCESS) info->vendor_id = vendor_id;
}
uint16_t rev_id = std::numeric_limits<uint16_t>::max();
status = rsmi_wrapper(rsmi_dev_revision_get, processor_handle, 0, &rev_id);
info->rev_id = static_cast<uint32_t>(rev_id);
status = rsmi_wrapper(rsmi_dev_brand_get, processor_handle, 0,
info->market_name, AMDSMI_256_LENGTH);
status = rsmi_wrapper(rsmi_dev_vendor_id_get, processor_handle, 0,
&vendor_id);
if (status == AMDSMI_STATUS_SUCCESS) info->vendor_id = vendor_id;
// }
// For other sysfs related information, get from rocm-smi
// Ensure asic_serial defaults to an unsupported value