[SWDEV-529889] Fixed incorrect vendor_id reporting in amdsmi_get_gpu_asic_info
Signed-off-by: gabrpham_amdeng <Gabriel.Pham@amd.com>
[ROCm/amdsmi commit: 1ab57ce7dd]
Этот коммит содержится в:
коммит произвёл
Maisam Arif
родитель
597edb6eb8
Коммит
640eba7764
@@ -1012,11 +1012,37 @@ rsmi_dev_subsystem_id_get(uint32_t dv_ind, uint16_t *id) {
|
||||
|
||||
rsmi_status_t
|
||||
rsmi_dev_vendor_id_get(uint32_t dv_ind, uint16_t *id) {
|
||||
// need to get this to fall back to kfd if sysfs doesn't work
|
||||
std::ostringstream ss;
|
||||
ss << __PRETTY_FUNCTION__ << "| ======= start =======";
|
||||
LOG_TRACE(ss);
|
||||
CHK_SUPPORT_NAME_ONLY(id)
|
||||
return get_id(dv_ind, amd::smi::kDevVendorID, id);
|
||||
rsmi_status_t status = get_id(dv_ind, amd::smi::kDevVendorID, id);
|
||||
if (status != RSMI_STATUS_SUCCESS)
|
||||
{
|
||||
// /sys/class/kfd/kfd/topology/nodes/*/properties
|
||||
GET_DEV_AND_KFDNODE_FROM_INDX
|
||||
uint32_t node_id;
|
||||
uint64_t kfd_vendor_id;
|
||||
int ret_kfd = kfd_node->get_node_id(&node_id);
|
||||
ret_kfd = amd::smi::read_node_properties(node_id, "vendor_id", &kfd_vendor_id);
|
||||
if (ret_kfd == 0) {
|
||||
*id = kfd_vendor_id;
|
||||
status = RSMI_STATUS_SUCCESS;
|
||||
} else {
|
||||
*id = std::numeric_limits<uint16_t>::max();
|
||||
status = RSMI_STATUS_NOT_SUPPORTED;
|
||||
}
|
||||
ss << __PRETTY_FUNCTION__
|
||||
<< " | Issue: Could not read device from sysfs, falling back to KFD" << "\n"
|
||||
<< " ; Device #: " << std::to_string(dv_ind) << "\n"
|
||||
<< " ; ret_kfd: " << std::to_string(ret_kfd) << "\n"
|
||||
<< " ; node: " << std::to_string(node_id) << "\n"
|
||||
<< " ; Data: vendor_id (from KFD)= " << std::to_string(*id) << "\n"
|
||||
<< " ; ret = " << getRSMIStatusString(status, false);
|
||||
LOG_DEBUG(ss);
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
rsmi_status_t
|
||||
|
||||
@@ -1353,35 +1353,44 @@ amdsmi_get_gpu_asic_info(amdsmi_processor_handle processor_handle, amdsmi_asic_i
|
||||
|
||||
amdsmi_status_t status;
|
||||
amd::smi::AMDSmiSystem::getInstance().init_drm();
|
||||
if (gpu_device->check_if_drm_is_supported()) {
|
||||
status = gpu_device->amdgpu_query_info(AMDGPU_INFO_DEV_INFO,
|
||||
sizeof(struct drm_amdgpu_info_device), &dev_info);
|
||||
ss << __PRETTY_FUNCTION__
|
||||
<< " | amdgpu_query_info(): "
|
||||
<< smi_amdgpu_get_status_string(status, true);
|
||||
LOG_INFO(ss);
|
||||
if (status != AMDSMI_STATUS_SUCCESS) {
|
||||
amd::smi::AMDSmiSystem::getInstance().clean_up_drm();
|
||||
return status;
|
||||
}
|
||||
SMIGPUDEVICE_MUTEX(gpu_device->get_mutex())
|
||||
status = smi_amdgpu_get_market_name_from_dev_id(gpu_device, info->market_name);
|
||||
if (status != AMDSMI_STATUS_SUCCESS) {
|
||||
rsmi_wrapper(rsmi_dev_brand_get, processor_handle, 0,
|
||||
info->market_name, AMDSMI_256_LENGTH);
|
||||
}
|
||||
// removing drm check for now due to drm issues
|
||||
// if (gpu_device->check_if_drm_is_supported()) {
|
||||
// status = gpu_device->amdgpu_query_info(AMDGPU_INFO_DEV_INFO,
|
||||
// sizeof(struct drm_amdgpu_info_device), &dev_info);
|
||||
// ss << __PRETTY_FUNCTION__
|
||||
// << " | amdgpu_query_info(): "
|
||||
// << smi_amdgpu_get_status_string(status, true);
|
||||
// LOG_INFO(ss);
|
||||
// if (status != AMDSMI_STATUS_SUCCESS) {
|
||||
// amd::smi::AMDSmiSystem::getInstance().clean_up_drm();
|
||||
// return status;
|
||||
// }
|
||||
// SMIGPUDEVICE_MUTEX(gpu_device->get_mutex())
|
||||
// status = smi_amdgpu_get_market_name_from_dev_id(gpu_device, info->market_name);
|
||||
// if (status != AMDSMI_STATUS_SUCCESS) {
|
||||
// rsmi_wrapper(rsmi_dev_brand_get, processor_handle, 0,
|
||||
// info->market_name, AMDSMI_256_LENGTH);
|
||||
// }
|
||||
|
||||
info->device_id = dev_info.device_id;
|
||||
info->rev_id = dev_info.pci_rev;
|
||||
info->vendor_id = gpu_device->get_vendor_id();
|
||||
} else {
|
||||
status = rsmi_wrapper(rsmi_dev_brand_get, processor_handle, 0,
|
||||
info->market_name, AMDSMI_256_LENGTH);
|
||||
// info->device_id = dev_info.device_id;
|
||||
// info->rev_id = dev_info.pci_rev;
|
||||
// info->vendor_id = gpu_device->get_vendor_id();
|
||||
// } else {
|
||||
uint16_t device_id = std::numeric_limits<uint16_t>::max();
|
||||
status = rsmi_wrapper(rsmi_dev_id_get, processor_handle, 0, &device_id);
|
||||
info->device_id = static_cast<uint64_t>(device_id);
|
||||
|
||||
status = rsmi_wrapper(rsmi_dev_vendor_id_get, processor_handle, 0,
|
||||
&vendor_id);
|
||||
if (status == AMDSMI_STATUS_SUCCESS) info->vendor_id = vendor_id;
|
||||
}
|
||||
uint16_t rev_id = std::numeric_limits<uint16_t>::max();
|
||||
status = rsmi_wrapper(rsmi_dev_revision_get, processor_handle, 0, &rev_id);
|
||||
info->rev_id = static_cast<uint32_t>(rev_id);
|
||||
|
||||
status = rsmi_wrapper(rsmi_dev_brand_get, processor_handle, 0,
|
||||
info->market_name, AMDSMI_256_LENGTH);
|
||||
|
||||
status = rsmi_wrapper(rsmi_dev_vendor_id_get, processor_handle, 0,
|
||||
&vendor_id);
|
||||
if (status == AMDSMI_STATUS_SUCCESS) info->vendor_id = vendor_id;
|
||||
// }
|
||||
// For other sysfs related information, get from rocm-smi
|
||||
|
||||
// Ensure asic_serial defaults to an unsupported value
|
||||
|
||||
Ссылка в новой задаче
Block a user