diff --git a/projects/amdsmi/include/amd_smi/impl/amd_smi_system.h b/projects/amdsmi/include/amd_smi/impl/amd_smi_system.h index 348782cf1b..596c909204 100644 --- a/projects/amdsmi/include/amd_smi/impl/amd_smi_system.h +++ b/projects/amdsmi/include/amd_smi/impl/amd_smi_system.h @@ -99,7 +99,12 @@ class AMDSmiSystem { #endif private: AMDSmiSystem() : init_flag_(AMDSMI_INIT_AMD_GPUS) {} - amdsmi_status_t get_gpu_bdf_by_index(uint32_t index, std::string& bdf); + + /* The GPU socket id is used to identify the socket, so that the XCDs + on the same physical device will be collected under the same socket. + The BD part of the BDF is used as GPU socket to represent a phyiscal device. + */ + amdsmi_status_t get_gpu_socket_id(uint32_t index, std::string& socketid); amdsmi_status_t populate_amd_gpu_devices(); uint64_t init_flag_; AMDSmiDrm drm_; diff --git a/projects/amdsmi/src/amd_smi/amd_smi_system.cc b/projects/amdsmi/src/amd_smi/amd_smi_system.cc index 939dea6f5d..e9fa857bdf 100644 --- a/projects/amdsmi/src/amd_smi/amd_smi_system.cc +++ b/projects/amdsmi/src/amd_smi/amd_smi_system.cc @@ -231,7 +231,7 @@ amdsmi_status_t AMDSmiSystem::populate_amd_gpu_devices() { for (uint32_t i=0; i < device_count; i++) { // GPU device uses the bdf as the socket id std::string socket_id; - amd_smi_status = get_gpu_bdf_by_index(i, socket_id); + amd_smi_status = get_gpu_socket_id(i, socket_id); if (amd_smi_status != AMDSMI_STATUS_SUCCESS) { return amd_smi_status; } @@ -256,8 +256,8 @@ amdsmi_status_t AMDSmiSystem::populate_amd_gpu_devices() { return AMDSMI_STATUS_SUCCESS; } -amdsmi_status_t AMDSmiSystem::get_gpu_bdf_by_index(uint32_t index, - std::string& bdf) { +amdsmi_status_t AMDSmiSystem::get_gpu_socket_id(uint32_t index, + std::string& socket_id) { uint64_t bdfid = 0; rsmi_status_t ret = rsmi_dev_pci_id_get(index, &bdfid); if (ret != RSMI_STATUS_SUCCESS) { @@ -269,11 +269,13 @@ amdsmi_status_t AMDSmiSystem::get_gpu_bdf_by_index(uint32_t index, uint64_t device_id = (bdfid >> 3) & 0x1f; uint64_t function = bdfid & 0x7; + // The BD part of the BDF is used as the socket id as it + // represents a physical device. std::stringstream ss; ss << std::setfill('0') << std::uppercase << std::hex << std::setw(4) << domain << ":" << std::setw(2) << bus << ":" - << std::setw(2) << device_id << "." << std::setw(2) << function; - bdf = ss.str(); + << std::setw(2) << device_id; + socket_id = ss.str(); return AMDSMI_STATUS_SUCCESS; }