diff --git a/projects/amdsmi/amdsmi_cli/amdsmi_commands.py b/projects/amdsmi/amdsmi_cli/amdsmi_commands.py index 1e16e05d55..3f0bb65613 100644 --- a/projects/amdsmi/amdsmi_cli/amdsmi_commands.py +++ b/projects/amdsmi/amdsmi_cli/amdsmi_commands.py @@ -904,16 +904,24 @@ class AMDSMICommands(): logging.debug("Failed to get cpu affinity for gpu %s | %s", gpu_id, e.get_error_info()) try: - cpusockets = amdsmi_interface.amdsmi_get_cpu_affinity_with_scope(args.gpu, amdsmi_interface.AmdSmiAffinityScope.SOCKET_SCOPE) - cpusockets = {f'socket_{i}': socket for i, socket in enumerate(set(cpusockets))} + socket_set = amdsmi_interface.amdsmi_get_cpu_affinity_with_scope(args.gpu, amdsmi_interface.AmdSmiAffinityScope.SOCKET_SCOPE) + socket_set = [f"{cpus:016X}" for cpus in socket_set] + socket_set = {f'cpu_list_{i}': f"{cpus}" for i, cpus in enumerate(socket_set)} + socket_bitmask_ranges = self.helpers.get_bitmask_ranges(socket_set) + socket_affinity = {} + for key in socket_set: + socket_affinity[key] = { + "bitmask": socket_set[key], + "cpu_cores_affinity": socket_bitmask_ranges.get(key, "N/A") + } except amdsmi_exception.AmdSmiLibraryException as e: - cpusockets = {} + socket_affinity = "N/A" logging.debug("Failed to get socket affinity for gpu %s | %s", gpu_id, e.get_error_info()) static_dict['numa'] = { 'node' : numa_node_number, 'affinity' : numa_affinity, 'cpu_affinity' : cpu_affinity, - 'socket_affinity' : cpusockets if cpusockets else "N/A"} + 'socket_affinity' : socket_affinity} if args.vram: vram_info_dict = {"type" : "N/A", "vendor" : "N/A", diff --git a/projects/amdsmi/include/amd_smi/impl/amd_smi_gpu_device.h b/projects/amdsmi/include/amd_smi/impl/amd_smi_gpu_device.h index 29e290e0cc..370155f3cc 100644 --- a/projects/amdsmi/include/amd_smi/impl/amd_smi_gpu_device.h +++ b/projects/amdsmi/include/amd_smi/impl/amd_smi_gpu_device.h @@ -72,6 +72,7 @@ class AMDSmiGPUDevice: public AMDSmiProcessor { // New methods for -e feature std::string bdf_to_string() const; // -e feature std::vector get_bitmask_from_numa_node(int32_t node_id, uint32_t size) const; + std::vector get_bitmask_from_local_cpulist(uint32_t drm_card, uint32_t size) const; private: uint32_t gpu_id_; diff --git a/projects/amdsmi/src/amd_smi/amd_smi.cc b/projects/amdsmi/src/amd_smi/amd_smi.cc index a1e2ba305a..c6490dcdfb 100644 --- a/projects/amdsmi/src/amd_smi/amd_smi.cc +++ b/projects/amdsmi/src/amd_smi/amd_smi.cc @@ -5199,27 +5199,12 @@ amdsmi_status_t amdsmi_get_cpu_affinity_with_scope(amdsmi_processor_handle proce case AMDSMI_AFFINITY_SCOPE_SOCKET: { - std::vector sockets = amd::smi::AMDSmiSystem::getInstance().get_cpu_sockets_from_numa_node(node_id); - - if(sockets[0] == std::numeric_limits::max()){ + uint32_t drm_card = gpu_device->get_card_id(); + std::vector bitmask = gpu_device->get_bitmask_from_local_cpulist(drm_card, cpu_set_size); + if(bitmask[0] == std::numeric_limits::max()){ return AMDSMI_STATUS_REFCOUNT_OVERFLOW; } else { - for (uint32_t idx : sockets) { - cpu_set[idx] = idx; - } - - std::sort(cpu_set, cpu_set + cpu_set_size); - - // Discard duplicates - uint32_t temp_size = 0; - for (uint32_t i = 0; i < cpu_set_size; ++i) { - if (i == 0 || cpu_set[i] != cpu_set[i - 1]) { - cpu_set[temp_size++] = cpu_set[i]; - } - } - - // Update the size to the temp size after discarding duplicates - cpu_set_size = temp_size; + std::memcpy(cpu_set, bitmask.data(), cpu_set_size * sizeof(uint64_t)); } break; } diff --git a/projects/amdsmi/src/amd_smi/amd_smi_gpu_device.cc b/projects/amdsmi/src/amd_smi/amd_smi_gpu_device.cc index 2a188144d1..107d215898 100644 --- a/projects/amdsmi/src/amd_smi/amd_smi_gpu_device.cc +++ b/projects/amdsmi/src/amd_smi/amd_smi_gpu_device.cc @@ -310,4 +310,38 @@ std::vector AMDSmiGPUDevice::get_bitmask_from_numa_node(int32_t node_i return bitmask; } +std::vector AMDSmiGPUDevice::get_bitmask_from_local_cpulist(uint32_t drm_card, uint32_t size) const { + std::vector bitmask(size, 0); + + if (drm_card < 0) { + bitmask[0] = std::numeric_limits::max(); + return bitmask; + } + + std::string path = "/sys/class/drm/card" + std::to_string(drm_card) + "/device/local_cpulist"; + std::ifstream file(path); + + if (file.is_open()) { + std::string info; + while (std::getline(file, info)) { + std::istringstream sstr(info); + std::string node_cpus; + while (std::getline(sstr, node_cpus, ',')) { + size_t hyphen = node_cpus.find('-'); + if (hyphen != std::string::npos) { + int start = std::stoi(node_cpus.substr(0, hyphen)); + int end = std::stoi(node_cpus.substr(hyphen + 1)); + for (int i = start; i <= end; ++i) { + bitmask[i / 64] |= (1ULL << (i % 64)); + } + } else { + int core = std::stoi(node_cpus); + bitmask[core / 64] |= (1ULL << (core % 64)); + } + } + } + } + return bitmask; +} + } // namespace amd::smi