diff --git a/projects/rocjpeg/src/rocjpeg_decoder.cpp b/projects/rocjpeg/src/rocjpeg_decoder.cpp index 66c8b5d6ae..b12be78eed 100644 --- a/projects/rocjpeg/src/rocjpeg_decoder.cpp +++ b/projects/rocjpeg/src/rocjpeg_decoder.cpp @@ -79,7 +79,8 @@ RocJpegStatus RocJpegDecoder::InitializeDecoder() { return rocjpeg_status; } if (backend_ == ROCJPEG_BACKEND_HARDWARE) { - rocjpeg_status = jpeg_vaapi_decoder_.InitializeDecoder(hip_dev_prop_.name, hip_dev_prop_.gcnArchName, device_id_); + std::string gpu_uuid(hip_dev_prop_.uuid.bytes, sizeof(hip_dev_prop_.uuid.bytes)); + rocjpeg_status = jpeg_vaapi_decoder_.InitializeDecoder(hip_dev_prop_.name, hip_dev_prop_.gcnArchName, device_id_, gpu_uuid); if (rocjpeg_status != ROCJPEG_STATUS_SUCCESS) { ERR("ERROR: Failed to initialize the VA-API JPEG decoder!"); return rocjpeg_status; diff --git a/projects/rocjpeg/src/rocjpeg_vaapi_decoder.cpp b/projects/rocjpeg/src/rocjpeg_vaapi_decoder.cpp index f86ab3ce1a..76dbe39c6f 100644 --- a/projects/rocjpeg/src/rocjpeg_vaapi_decoder.cpp +++ b/projects/rocjpeg/src/rocjpeg_vaapi_decoder.cpp @@ -359,9 +359,10 @@ RocJpegVappiDecoder::~RocJpegVappiDecoder() { * @param device_name The name of the device. * @param gcn_arch_name The name of the GCN architecture. * @param device_id The ID of the device. + * @param gpu_uuid The UUID of the GPU. * @return The status of the initialization process. */ -RocJpegStatus RocJpegVappiDecoder::InitializeDecoder(std::string device_name, std::string gcn_arch_name, int device_id) { +RocJpegStatus RocJpegVappiDecoder::InitializeDecoder(std::string device_name, std::string gcn_arch_name, int device_id, std::string& gpu_uuid) { device_id_ = device_id; std::size_t pos = gcn_arch_name.find_first_of(":"); std::string gcn_arch_name_base = (pos != std::string::npos) ? gcn_arch_name.substr(0, pos) : gcn_arch_name; @@ -379,29 +380,21 @@ RocJpegStatus RocJpegVappiDecoder::InitializeDecoder(std::string device_name, st std::vector visible_devices; GetVisibleDevices(visible_devices); + GetGpuUuids(); int offset = 0; if (gcn_arch_name_base.compare("gfx942") == 0) { - std::vector current_compute_partitions; - GetCurrentComputePartition(current_compute_partitions); - if (current_compute_partitions.empty()) { - //if the current_compute_partitions is empty then the default SPX mode is assumed. - if (device_id_ < visible_devices.size()) { - offset = visible_devices[device_id_] * 7; - } else { - offset = device_id_ * 7; - } - } else { - GetDrmNodeOffset(device_name, device_id_, visible_devices, current_compute_partitions, offset); - } + std::vector current_compute_partitions; + GetCurrentComputePartition(current_compute_partitions); + if (!current_compute_partitions.empty()) { + GetDrmNodeOffset(device_name, device_id_, visible_devices, current_compute_partitions, offset); + } } std::string drm_node = "/dev/dri/renderD"; - if (device_id_ < visible_devices.size()) { - drm_node += std::to_string(128 + offset + visible_devices[device_id_]); - } else { - drm_node += std::to_string(128 + offset + device_id_); - } + int render_node_id = (gpu_uuids_to_render_nodes_map_.find(gpu_uuid) != gpu_uuids_to_render_nodes_map_.end()) ? gpu_uuids_to_render_nodes_map_[gpu_uuid] : 128; + drm_node += std::to_string(render_node_id + offset); + CHECK_ROCJPEG(InitVAAPI(drm_node)); CHECK_ROCJPEG(CreateDecoderConfig()); CHECK_ROCJPEG(CreateDecoderContext()); @@ -913,7 +906,12 @@ RocJpegStatus RocJpegVappiDecoder::GetHipInteropMem(VASurfaceID surface_id, HipI * @param visible_devices_vector The vector to store the visible devices. */ void RocJpegVappiDecoder::GetVisibleDevices(std::vector& visible_devices_vetor) { - char *visible_devices = std::getenv("HIP_VISIBLE_DEVICES"); + // First, check if the ROCR_VISIBLE_DEVICES environment variable is present + char *visible_devices = std::getenv("ROCR_VISIBLE_DEVICES"); + // If ROCR_VISIBLE_DEVICES is not present, check if HIP_VISIBLE_DEVICES is present + if (visible_devices == nullptr) { + visible_devices = std::getenv("HIP_VISIBLE_DEVICES"); + } if (visible_devices != nullptr) { char *token = std::strtok(visible_devices,","); while (token != nullptr) { @@ -983,49 +981,54 @@ void RocJpegVappiDecoder::GetDrmNodeOffset(std::string device_name, uint8_t devi if (!current_compute_partitions.empty()) { switch (current_compute_partitions[0]) { case kSpx: - if (device_id < visible_devices.size()) { - offset = visible_devices[device_id] * 7; - } else { - offset = device_id * 7; - } + offset = 0; break; case kDpx: if (device_id < visible_devices.size()) { - offset = (visible_devices[device_id] / 2) * 6; + offset = (visible_devices[device_id] % 2); } else { - offset = (device_id / 2) * 6; + offset = (device_id % 2); } break; case kTpx: - // Please note that although there are only 6 XCCs per socket on MI300A, - // there are two dummy render nodes added by the driver. - // This needs to be taken into account when creating drm_node on each socket in TPX mode. if (device_id < visible_devices.size()) { - offset = (visible_devices[device_id] / 3) * 5; + offset = (visible_devices[device_id] % 3); } else { - offset = (device_id / 3) * 5; + offset = (device_id % 3); } break; case kQpx: if (device_id < visible_devices.size()) { - offset = (visible_devices[device_id] / 4) * 4; + offset = (visible_devices[device_id] % 4); } else { - offset = (device_id / 4) * 4; + offset = (device_id % 4); } break; case kCpx: - // Please note that both MI300A and MI300X have the same gfx_arch_name which is - // gfx942. Therefore we cannot use the gfx942 to identify MI300A. - // instead use the device name and look for MI300A - // Also, as explained aboe in the TPX mode section, we need to be taken into account - // the extra two dummy nodes when creating drm_node on each socket in CPX mode as well. + // Note: The MI300 series share the same gfx_arch_name (gfx942). + // Therefore, we cannot use gfx942 to distinguish between MI300A, MI308, etc. + // Instead, use the device name to identify MI300A, MI308, etc. std::string mi300a = "MI300A"; size_t found_mi300a = device_name.find(mi300a); - if (found_mi300a != std::string::npos) { + std::string mi308 = "MI308"; + size_t found_mi308 = device_name.find(mi308); + if (found_mi308 != std::string::npos) { if (device_id < visible_devices.size()) { - offset = (visible_devices[device_id] / 6) * 2; + offset = (visible_devices[device_id] % 4); } else { - offset = (device_id / 6) * 2; + offset = (device_id % 4); + } + } else if (found_mi300a != std::string::npos) { + if (device_id < visible_devices.size()) { + offset = (visible_devices[device_id] % 6); + } else { + offset = (device_id % 6); + } + } else { + if (device_id < visible_devices.size()) { + offset = (visible_devices[device_id] % 8); + } else { + offset = (device_id % 8); } } break; @@ -1048,4 +1051,46 @@ RocJpegStatus RocJpegVappiDecoder::SetSurfaceAsIdle(VASurfaceID surface_id) { return ROCJPEG_STATUS_INVALID_PARAMETER; } return ROCJPEG_STATUS_SUCCESS; +} + +/** + * @brief Retrieves GPU UUIDs and maps them to render node IDs. + * + * This function iterates through all render nodes in the /dev/dri directory, + * extracts the render node ID from the filename, and then reads the unique GPU + * UUID from the corresponding sysfs path. It maps each unique GPU UUID to its + * corresponding render node ID and stores this mapping in the gpu_uuids_to_render_nodes_map_. + */ +void RocJpegVappiDecoder::GetGpuUuids() { + std::string dri_path = "/dev/dri"; + // Iterate through all render nodes + if (fs::exists(dri_path)) { + for (const auto& entry : fs::directory_iterator(dri_path, fs::directory_options::skip_permission_denied)) { + try { + std::string filename = entry.path().filename().string(); + // Check if the file name starts with "renderD" + if (filename.find("renderD") == 0) { + // Extract the integer part from the render node name (e.g., 128 from renderD128) + int render_id = std::stoi(filename.substr(7)); + std::string sys_device_path = "/sys/class/drm/" + filename + "/device"; + if (fs::exists(sys_device_path)) { + std::string unique_id_path = sys_device_path + "/unique_id"; + if (fs::exists(unique_id_path)) { + std::ifstream unique_id_file(unique_id_path); + std::string unique_id; + if (unique_id_file.is_open() && std::getline(unique_id_file, unique_id)) { + if (!unique_id.empty()) { + // Map the unique GPU UUID to the render node ID + gpu_uuids_to_render_nodes_map_[unique_id] = render_id; + } + } + } + } + } + } catch (const std::exception& e) { + // If an exception occurs, continue with the next entry + continue; + } + } + } } \ No newline at end of file diff --git a/projects/rocjpeg/src/rocjpeg_vaapi_decoder.h b/projects/rocjpeg/src/rocjpeg_vaapi_decoder.h index dd4ac2dd3d..1792caa8ba 100644 --- a/projects/rocjpeg/src/rocjpeg_vaapi_decoder.h +++ b/projects/rocjpeg/src/rocjpeg_vaapi_decoder.h @@ -282,9 +282,10 @@ public: * @param device_name The name of the device. * @param gcn_arch_name The name of the GCN architecture. * @param device_id The ID of the device. + * @param gpu_uuid The UUID of the GPU. * @return The status of the initialization. */ - RocJpegStatus InitializeDecoder(std::string device_name, std::string gcn_arch_name, int device_id); + RocJpegStatus InitializeDecoder(std::string device_name, std::string gcn_arch_name, int device_id, std::string& gpu_uuid); /** * @brief Submits a JPEG stream for decoding. @@ -355,6 +356,14 @@ private: VABufferID va_huffmantable_buf_id_; // The VAAPI Huffman table buffer ID VABufferID va_slice_param_buf_id_; // The VAAPI slice parameter buffer ID VABufferID va_slice_data_buf_id_; // The VAAPI slice data buffer ID + /** + * @brief A map that associates GPU UUIDs with their corresponding render node indices. + * + * This unordered map uses GPU UUIDs as keys (std::string) and maps them to their + * respective render node indices (int). It provides a fast lookup mechanism to + * retrieve the render node index for a given GPU UUID. + */ + std::unordered_map gpu_uuids_to_render_nodes_map_; /** * @brief Initializes the VAAPI with the specified DRM node. @@ -408,6 +417,10 @@ private: void GetDrmNodeOffset(std::string device_name, uint8_t device_id, std::vector& visible_devices, std::vector ¤t_compute_partitions, int &offset); + /** + * @brief Retrieves GPU UUIDs and maps them to render node IDs. + */ + void GetGpuUuids(); }; #endif // ROC_JPEG_VAAPI_DECODER_H_ \ No newline at end of file