Collect compute partition devices under the same socket

The socket represents a physical device, and the partition devices
should belong to the socket. The partition devices are only
different in function id in BDF. Use the BD part of the BDF to
identify a socket.

Change-Id: I5d355a6f5db02faa7555b760a36c7351b8d8d835


[ROCm/amdsmi commit: de7e74f7db]
Este commit está contenido en:
Bill(Shuzhou) Liu
2023-11-29 08:23:23 -06:00
padre a8138bfd5e
commit 985ddbc5d5
Se han modificado 2 ficheros con 13 adiciones y 6 borrados
@@ -99,7 +99,12 @@ class AMDSmiSystem {
#endif
private:
AMDSmiSystem() : init_flag_(AMDSMI_INIT_AMD_GPUS) {}
amdsmi_status_t get_gpu_bdf_by_index(uint32_t index, std::string& bdf);
/* The GPU socket id is used to identify the socket, so that the XCDs
on the same physical device will be collected under the same socket.
The BD part of the BDF is used as GPU socket to represent a phyiscal device.
*/
amdsmi_status_t get_gpu_socket_id(uint32_t index, std::string& socketid);
amdsmi_status_t populate_amd_gpu_devices();
uint64_t init_flag_;
AMDSmiDrm drm_;
+7 -5
Ver fichero
@@ -231,7 +231,7 @@ amdsmi_status_t AMDSmiSystem::populate_amd_gpu_devices() {
for (uint32_t i=0; i < device_count; i++) {
// GPU device uses the bdf as the socket id
std::string socket_id;
amd_smi_status = get_gpu_bdf_by_index(i, socket_id);
amd_smi_status = get_gpu_socket_id(i, socket_id);
if (amd_smi_status != AMDSMI_STATUS_SUCCESS) {
return amd_smi_status;
}
@@ -256,8 +256,8 @@ amdsmi_status_t AMDSmiSystem::populate_amd_gpu_devices() {
return AMDSMI_STATUS_SUCCESS;
}
amdsmi_status_t AMDSmiSystem::get_gpu_bdf_by_index(uint32_t index,
std::string& bdf) {
amdsmi_status_t AMDSmiSystem::get_gpu_socket_id(uint32_t index,
std::string& socket_id) {
uint64_t bdfid = 0;
rsmi_status_t ret = rsmi_dev_pci_id_get(index, &bdfid);
if (ret != RSMI_STATUS_SUCCESS) {
@@ -269,11 +269,13 @@ amdsmi_status_t AMDSmiSystem::get_gpu_bdf_by_index(uint32_t index,
uint64_t device_id = (bdfid >> 3) & 0x1f;
uint64_t function = bdfid & 0x7;
// The BD part of the BDF is used as the socket id as it
// represents a physical device.
std::stringstream ss;
ss << std::setfill('0') << std::uppercase << std::hex
<< std::setw(4) << domain << ":" << std::setw(2) << bus << ":"
<< std::setw(2) << device_id << "." << std::setw(2) << function;
bdf = ss.str();
<< std::setw(2) << device_id;
socket_id = ss.str();
return AMDSMI_STATUS_SUCCESS;
}