Collect compute partition devices under the same socket
The socket represents a physical device, and the partition devices
should belong to the socket. The partition devices are only
different in function id in BDF. Use the BD part of the BDF to
identify a socket.
Change-Id: I5d355a6f5db02faa7555b760a36c7351b8d8d835
[ROCm/amdsmi commit: de7e74f7db]
Este commit está contenido en:
@@ -99,7 +99,12 @@ class AMDSmiSystem {
|
||||
#endif
|
||||
private:
|
||||
AMDSmiSystem() : init_flag_(AMDSMI_INIT_AMD_GPUS) {}
|
||||
amdsmi_status_t get_gpu_bdf_by_index(uint32_t index, std::string& bdf);
|
||||
|
||||
/* The GPU socket id is used to identify the socket, so that the XCDs
|
||||
on the same physical device will be collected under the same socket.
|
||||
The BD part of the BDF is used as GPU socket to represent a phyiscal device.
|
||||
*/
|
||||
amdsmi_status_t get_gpu_socket_id(uint32_t index, std::string& socketid);
|
||||
amdsmi_status_t populate_amd_gpu_devices();
|
||||
uint64_t init_flag_;
|
||||
AMDSmiDrm drm_;
|
||||
|
||||
@@ -231,7 +231,7 @@ amdsmi_status_t AMDSmiSystem::populate_amd_gpu_devices() {
|
||||
for (uint32_t i=0; i < device_count; i++) {
|
||||
// GPU device uses the bdf as the socket id
|
||||
std::string socket_id;
|
||||
amd_smi_status = get_gpu_bdf_by_index(i, socket_id);
|
||||
amd_smi_status = get_gpu_socket_id(i, socket_id);
|
||||
if (amd_smi_status != AMDSMI_STATUS_SUCCESS) {
|
||||
return amd_smi_status;
|
||||
}
|
||||
@@ -256,8 +256,8 @@ amdsmi_status_t AMDSmiSystem::populate_amd_gpu_devices() {
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
amdsmi_status_t AMDSmiSystem::get_gpu_bdf_by_index(uint32_t index,
|
||||
std::string& bdf) {
|
||||
amdsmi_status_t AMDSmiSystem::get_gpu_socket_id(uint32_t index,
|
||||
std::string& socket_id) {
|
||||
uint64_t bdfid = 0;
|
||||
rsmi_status_t ret = rsmi_dev_pci_id_get(index, &bdfid);
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
@@ -269,11 +269,13 @@ amdsmi_status_t AMDSmiSystem::get_gpu_bdf_by_index(uint32_t index,
|
||||
uint64_t device_id = (bdfid >> 3) & 0x1f;
|
||||
uint64_t function = bdfid & 0x7;
|
||||
|
||||
// The BD part of the BDF is used as the socket id as it
|
||||
// represents a physical device.
|
||||
std::stringstream ss;
|
||||
ss << std::setfill('0') << std::uppercase << std::hex
|
||||
<< std::setw(4) << domain << ":" << std::setw(2) << bus << ":"
|
||||
<< std::setw(2) << device_id << "." << std::setw(2) << function;
|
||||
bdf = ss.str();
|
||||
<< std::setw(2) << device_id;
|
||||
socket_id = ss.str();
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
Referencia en una nueva incidencia
Block a user