From 6ada77a9ad388a8eaeabe50b75979cebb6d38f4a Mon Sep 17 00:00:00 2001 From: Aryan Salmanpour Date: Sat, 18 Jan 2025 10:12:02 -0500 Subject: [PATCH] Optimize finding the compute partition mode (#493) --- src/rocdecode/vaapi/vaapi_videodecoder.cpp | 194 ++++++++++----------- src/rocdecode/vaapi/vaapi_videodecoder.h | 5 +- 2 files changed, 95 insertions(+), 104 deletions(-) diff --git a/src/rocdecode/vaapi/vaapi_videodecoder.cpp b/src/rocdecode/vaapi/vaapi_videodecoder.cpp index e7de24c2f3..f3a473377d 100644 --- a/src/rocdecode/vaapi/vaapi_videodecoder.cpp +++ b/src/rocdecode/vaapi/vaapi_videodecoder.cpp @@ -558,11 +558,8 @@ rocDecStatus VaContext::GetVaContext(int device_id, uint32_t *va_ctx_id) { GetVisibleDevices(visible_devices); int offset = 0; - std::vector current_compute_partitions; - GetCurrentComputePartition(current_compute_partitions); - if (!current_compute_partitions.empty()) { - GetDrmNodeOffset(va_contexts_[va_ctx_idx].hip_dev_prop.name, va_contexts_[va_ctx_idx].device_id, visible_devices, current_compute_partitions, offset); - } + ComputePartition current_compute_partition = (gpu_uuids_to_compute_partition_map_.find(gpu_uuid) != gpu_uuids_to_compute_partition_map_.end()) ? gpu_uuids_to_compute_partition_map_[gpu_uuid] : kSpx; + GetDrmNodeOffset(va_contexts_[va_ctx_idx].hip_dev_prop.name, va_contexts_[va_ctx_idx].device_id, visible_devices, current_compute_partition, offset); std::string drm_node = "/dev/dri/renderD"; int render_node_id = (gpu_uuids_to_render_nodes_map_.find(gpu_uuid) != gpu_uuids_to_render_nodes_map_.end()) ? gpu_uuids_to_render_nodes_map_[gpu_uuid] : 128; @@ -840,127 +837,122 @@ void VaContext::GetVisibleDevices(std::vector& visible_devices_vetor) { } } -void VaContext::GetCurrentComputePartition(std::vector ¤t_compute_partitions) { - std::string search_path = "/sys/devices/"; - std::string partition_file = "current_compute_partition"; - std::error_code ec; - if (fs::exists(search_path)) { - for (auto it = fs::recursive_directory_iterator(search_path, fs::directory_options::skip_permission_denied); it != fs::recursive_directory_iterator(); ) { - try { - if (it->path().filename() == partition_file) { - std::ifstream file(it->path()); - if (file.is_open()) { - std::string partition; - std::getline(file, partition); - if (partition.compare("SPX") == 0 || partition.compare("spx") == 0) { - current_compute_partitions.push_back(kSpx); - } else if (partition.compare("DPX") == 0 || partition.compare("dpx") == 0) { - current_compute_partitions.push_back(kDpx); - } else if (partition.compare("TPX") == 0 || partition.compare("tpx") == 0) { - current_compute_partitions.push_back(kTpx); - } else if (partition.compare("QPX") == 0 || partition.compare("qpx") == 0) { - current_compute_partitions.push_back(kQpx); - } else if (partition.compare("CPX") == 0 || partition.compare("cpx") == 0) { - current_compute_partitions.push_back(kCpx); - } - file.close(); - } - } - ++it; - } catch (fs::filesystem_error& e) { - it.increment(ec); - } - } - } -} +void VaContext::GetDrmNodeOffset(std::string device_name, uint8_t device_id, std::vector& visible_devices, ComputePartition current_compute_partition, int &offset) { -void VaContext::GetDrmNodeOffset(std::string device_name, uint8_t device_id, std::vector& visible_devices, std::vector ¤t_compute_partitions, int &offset) { - if (!current_compute_partitions.empty()) { - switch (current_compute_partitions[0]) { - case kSpx: - offset = 0; - break; - case kDpx: + switch (current_compute_partition) { + case kSpx: + offset = 0; + break; + case kDpx: + if (device_id < visible_devices.size()) { + offset = (visible_devices[device_id] % 2); + } else { + offset = (device_id % 2); + } + break; + case kTpx: + if (device_id < visible_devices.size()) { + offset = (visible_devices[device_id] % 3); + } else { + offset = (device_id % 3); + } + break; + case kQpx: + if (device_id < visible_devices.size()) { + offset = (visible_devices[device_id] % 4); + } else { + offset = (device_id % 4); + } + break; + case kCpx: + // Note: The MI300 series share the same gfx_arch_name (gfx942). + // Therefore, we cannot use gfx942 to distinguish between MI300X, MI300A etc. + // Instead, use the device name to identify MI300A etc. + std::string mi300a = "MI300A"; + size_t found_mi300a = device_name.find(mi300a); + if (found_mi300a != std::string::npos) { if (device_id < visible_devices.size()) { - offset = (visible_devices[device_id] % 2); + offset = (visible_devices[device_id] % 6); } else { - offset = (device_id % 2); + offset = (device_id % 6); } - break; - case kTpx: + } else { if (device_id < visible_devices.size()) { - offset = (visible_devices[device_id] % 3); + offset = (visible_devices[device_id] % 8); } else { - offset = (device_id % 3); + offset = (device_id % 8); } - break; - case kQpx: - if (device_id < visible_devices.size()) { - offset = (visible_devices[device_id] % 4); - } else { - offset = (device_id % 4); - } - break; - case kCpx: - // Note: The MI300 series share the same gfx_arch_name (gfx942). - // Therefore, we cannot use gfx942 to distinguish between MI300X, MI300A etc. - // Instead, use the device name to identify MI300A etc. - std::string mi300a = "MI300A"; - size_t found_mi300a = device_name.find(mi300a); - if (found_mi300a != std::string::npos) { - if (device_id < visible_devices.size()) { - offset = (visible_devices[device_id] % 6); - } else { - offset = (device_id % 6); - } - } else { - if (device_id < visible_devices.size()) { - offset = (visible_devices[device_id] % 8); - } else { - offset = (device_id % 8); - } - } - break; - } + } + break; } } /** - * @brief Retrieves GPU UUIDs and maps them to render node IDs. + * @brief Retrieves GPU UUIDs and maps them to render node IDs and compute partitions. * * This function iterates through all render nodes in the /dev/dri directory, * extracts the render node ID from the filename, and then reads the unique GPU * UUID from the corresponding sysfs path. It maps each unique GPU UUID to its * corresponding render node ID and stores this mapping in the gpu_uuids_to_render_nodes_map_. + * Additionally, it maps the unique GPU UUID to the current compute partition if available. */ void VaContext::GetGpuUuids() { std::string dri_path = "/dev/dri"; // Iterate through all render nodes - for (const auto& entry : fs::directory_iterator(dri_path, fs::directory_options::skip_permission_denied)) { - try { - std::string filename = entry.path().filename().string(); - // Check if the file name starts with "renderD" - if (filename.find("renderD") == 0) { - // Extract the integer part from the render node name (e.g., 128 from renderD128) - int render_id = std::stoi(filename.substr(7)); - std::string sys_device_path = "/sys/class/drm/" + filename + "/device"; - if (fs::exists(sys_device_path)) { - std::string unique_id_path = sys_device_path + "/unique_id"; - if (fs::exists(unique_id_path)) { - std::ifstream unique_id_file(unique_id_path); + if (fs::exists(dri_path)) { + for (const auto& entry : fs::directory_iterator(dri_path, fs::directory_options::skip_permission_denied)) { + try { + std::string filename = entry.path().filename().string(); + // Check if the file name starts with "renderD" + if (filename.find("renderD") == 0) { + // Extract the integer part from the render node name (e.g., 128 from renderD128) + int render_id = std::stoi(filename.substr(7)); + std::string sys_device_path = "/sys/class/drm/" + filename + "/device"; + if (fs::exists(sys_device_path)) { + std::string unique_id_path = sys_device_path + "/unique_id"; std::string unique_id; - if (unique_id_file.is_open() && std::getline(unique_id_file, unique_id)) { - if (!unique_id.empty()) { - // Map the unique GPU UUID to the render node ID - gpu_uuids_to_render_nodes_map_[unique_id] = render_id; + if (fs::exists(unique_id_path)) { + std::ifstream unique_id_file(unique_id_path); + if (unique_id_file.is_open() && std::getline(unique_id_file, unique_id)) { + if (!unique_id.empty()) { + // Map the unique GPU UUID to the render node ID + gpu_uuids_to_render_nodes_map_[unique_id] = render_id; + } + } + unique_id_file.close(); + } + if (!unique_id.empty()) { + unique_id_path = sys_device_path + "/current_compute_partition"; + if (fs::exists(unique_id_path)) { + std::ifstream unique_id_file(unique_id_path); + std::string partition; + ComputePartition current_compute_partition = kSpx; + if (unique_id_file.is_open() && std::getline(unique_id_file, partition)) { + if (!partition.empty()) { + if (partition.compare("SPX") == 0 || partition.compare("spx") == 0) { + current_compute_partition = kSpx; + } else if (partition.compare("DPX") == 0 || partition.compare("dpx") == 0) { + current_compute_partition = kDpx; + } else if (partition.compare("TPX") == 0 || partition.compare("tpx") == 0) { + current_compute_partition = kTpx; + } else if (partition.compare("QPX") == 0 || partition.compare("qpx") == 0) { + current_compute_partition = kQpx; + } else if (partition.compare("CPX") == 0 || partition.compare("cpx") == 0) { + current_compute_partition = kCpx; + } + // Map the unique GPU UUID to the compute partition + gpu_uuids_to_compute_partition_map_[unique_id] = current_compute_partition; + } + unique_id_file.close(); + } } } } } + } catch (const std::exception& e) { + // If an exception occurs, continue with the next entry + continue; } - } catch (const std::exception& e) { - // If an exception occurs, continue with the next entry - continue; } } -} +} \ No newline at end of file diff --git a/src/rocdecode/vaapi/vaapi_videodecoder.h b/src/rocdecode/vaapi/vaapi_videodecoder.h index 86f5e99f5a..792bed7484 100644 --- a/src/rocdecode/vaapi/vaapi_videodecoder.h +++ b/src/rocdecode/vaapi/vaapi_videodecoder.h @@ -153,7 +153,7 @@ private: * retrieve the render node index for a given GPU UUID. */ std::unordered_map gpu_uuids_to_render_nodes_map_; - + std::unordered_map gpu_uuids_to_compute_partition_map_; VaContext(); VaContext(const VaContext&) = delete; VaContext& operator = (const VaContext) = delete; @@ -162,7 +162,6 @@ private: rocDecStatus InitHIP(int device_id, hipDeviceProp_t& hip_dev_prop); rocDecStatus InitVAAPI(int va_ctx_idx, std::string drm_node); void GetVisibleDevices(std::vector& visible_devices_vetor); - void GetCurrentComputePartition(std::vector ¤t_compute_partitions); - void GetDrmNodeOffset(std::string device_name, uint8_t device_id, std::vector& visible_devices, std::vector ¤t_compute_partitions, int &offset); + void GetDrmNodeOffset(std::string device_name, uint8_t device_id, std::vector& visible_devices, ComputePartition current_compute_partition, int &offset); void GetGpuUuids(); }; \ No newline at end of file