diff --git a/src/rocdecode/roc_decoder.cpp b/src/rocdecode/roc_decoder.cpp index e38effb18c..321105336f 100644 --- a/src/rocdecode/roc_decoder.cpp +++ b/src/rocdecode/roc_decoder.cpp @@ -59,7 +59,7 @@ RocDecoder::RocDecoder(RocDecoderCreateInfo& decoder_create_info): va_video_deco memset((void *)&hip_interop_[i], 0, sizeof(hip_interop_[i])); } - rocdec_status = va_video_decoder_.InitializeDecoder(hip_dev_prop_.gcnArchName); + rocdec_status = va_video_decoder_.InitializeDecoder(hip_dev_prop_.name, hip_dev_prop_.gcnArchName); if (rocdec_status != ROCDEC_SUCCESS) { ERR("Failed to initilize the VAAPI Video decoder."); return rocdec_status; diff --git a/src/rocdecode/vaapi/vaapi_videodecoder.cpp b/src/rocdecode/vaapi/vaapi_videodecoder.cpp index d88023ca79..b535126bb2 100644 --- a/src/rocdecode/vaapi/vaapi_videodecoder.cpp +++ b/src/rocdecode/vaapi/vaapi_videodecoder.cpp @@ -59,7 +59,7 @@ VaapiVideoDecoder::~VaapiVideoDecoder() { } } -rocDecStatus VaapiVideoDecoder::InitializeDecoder(std::string gcn_arch_name) { +rocDecStatus VaapiVideoDecoder::InitializeDecoder(std::string device_name, std::string gcn_arch_name) { rocDecStatus rocdec_status = ROCDEC_SUCCESS; //Before initializing the VAAPI, first check to see if the requested codec config is supported @@ -73,17 +73,29 @@ rocDecStatus VaapiVideoDecoder::InitializeDecoder(std::string gcn_arch_name) { std::size_t pos = gcn_arch_name.find_first_of(":"); std::string gcn_arch_name_base = (pos != std::string::npos) ? gcn_arch_name.substr(0, pos) : gcn_arch_name; - // There are 8 renderDXXX per physical device on gfx940/gfx941/gfx942 - int num_render_cards_per_device = ((gcn_arch_name_base.compare("gfx940") == 0) || - (gcn_arch_name_base.compare("gfx941") == 0) || - (gcn_arch_name_base.compare("gfx942") == 0)) ? 8 : 1; std::vector visible_devices; GetVisibleDevices(visible_devices); - std::string drm_node; + + int num_render_cards_per_socket = 1; + int offset = 0; + if (gcn_arch_name_base.compare("gfx940") == 0 || + gcn_arch_name_base.compare("gfx941") == 0 || + gcn_arch_name_base.compare("gfx942") == 0) { + std::vector current_compute_partitions; + GetCurrentComputePartition(current_compute_partitions); + if (current_compute_partitions.empty()) { + //if the current_compute_partitions is empty then the default SPX mode is assumed. + num_render_cards_per_socket = 8; + } else { + GetNumRenderCardsPerDevice(device_name, decoder_create_info_.device_id, visible_devices, current_compute_partitions, num_render_cards_per_socket, offset); + } + } + + std::string drm_node = "/dev/dri/renderD"; if (decoder_create_info_.device_id < visible_devices.size()) { - drm_node = "/dev/dri/renderD" + std::to_string(128 + visible_devices[decoder_create_info_.device_id] * num_render_cards_per_device); + drm_node += std::to_string(128 + offset + visible_devices[decoder_create_info_.device_id] * num_render_cards_per_socket); } else { - drm_node = "/dev/dri/renderD" + std::to_string(128 + decoder_create_info_.device_id * num_render_cards_per_device); + drm_node += std::to_string(128 + offset + decoder_create_info_.device_id * num_render_cards_per_socket); } rocdec_status = InitVAAPI(drm_node); if (rocdec_status != ROCDEC_SUCCESS) { @@ -423,4 +435,73 @@ void VaapiVideoDecoder::GetVisibleDevices(std::vector& visible_devices_veto } std::sort(visible_devices_vetor.begin(), visible_devices_vetor.end()); } +} + +void VaapiVideoDecoder::GetCurrentComputePartition(std::vector ¤t_compute_partitions) { + std::string search_path = "/sys/devices/"; + std::string partition_file = "current_compute_partition"; + for (const auto& entry : std::filesystem::recursive_directory_iterator(search_path)) { + if (entry.path().filename() == partition_file) { + std::ifstream file(entry.path()); + if (file.is_open()) { + std::string partition; + std::getline(file, partition); + if (partition.compare("SPX") == 0 || partition.compare("spx") == 0) { + current_compute_partitions.push_back(kSpx); + } else if (partition.compare("DPX") == 0 || partition.compare("dpx") == 0) { + current_compute_partitions.push_back(kDpx); + } else if (partition.compare("TPX") == 0 || partition.compare("tpx") == 0) { + current_compute_partitions.push_back(kTpx); + } else if (partition.compare("QPX") == 0 || partition.compare("qpx") == 0) { + current_compute_partitions.push_back(kQpx); + } else if (partition.compare("CPX") == 0 || partition.compare("cpx") == 0) { + current_compute_partitions.push_back(kCpx); + } + file.close(); + } + } + } +} + +void VaapiVideoDecoder::GetNumRenderCardsPerDevice(std::string device_name, uint8_t device_id, std::vector& visible_devices, + std::vector ¤t_compute_partitions, + int &num_render_cards_per_socket, int &offset) { + offset = 0; + if (!current_compute_partitions.empty()) { + switch (current_compute_partitions[0]) { + case kSpx: + num_render_cards_per_socket = 8; + break; + case kDpx: + num_render_cards_per_socket = 4; + break; + case kTpx: + num_render_cards_per_socket = 2; + // Please note that although there are only 6 XCCs per socket on MI300A, + // there are two dummy render nodes added by the driver. + // This needs to be taken into account when creating drm_node on each socket in TPX mode. + if (device_id < visible_devices.size()) { + offset = (visible_devices[device_id] / 3) * 2; + } else { + offset = (device_id / 3) * 2; + } + break; + case kQpx: + num_render_cards_per_socket = 2; + break; + case kCpx: + num_render_cards_per_socket = 1; + // Please note that both MI300A and MI300X have the same gfx_arch_name which is + // gfx942. Therefore we cannot use the gfx942 to identify MI300A. + // instead use the device name and look for MI300A + // Also, as explained aboe in the TPX mode section, we need to be taken into account + // the extra two dummy nodes when creating drm_node on each socket in CPX mode as well. + std::string mi300a = "MI300A"; + size_t found_mi300a = device_name.find(mi300a); + if (found_mi300a != std::string::npos) { + offset = (device_id / 6) * 2; + } + break; + } + } } \ No newline at end of file diff --git a/src/rocdecode/vaapi/vaapi_videodecoder.h b/src/rocdecode/vaapi/vaapi_videodecoder.h index 78d6f08ccf..5c68677501 100644 --- a/src/rocdecode/vaapi/vaapi_videodecoder.h +++ b/src/rocdecode/vaapi/vaapi_videodecoder.h @@ -23,11 +23,13 @@ THE SOFTWARE. #pragma once #include +#include #include #include #include #include #include +#include #include #include #include @@ -45,11 +47,19 @@ THE SOFTWARE. #define INIT_SLICE_PARAM_LIST_NUM 16 // initial slice parameter buffer list size +typedef enum { + kSpx = 0, // Single Partition Accelerator + kDpx = 1, // Dual Partition Accelerator + kTpx = 2, // Triple Partition Accelerator + kQpx = 3, // Quad Partition Accelerator + kCpx = 4, // Core Partition Accelerator +} ComputePartition; + class VaapiVideoDecoder { public: VaapiVideoDecoder(RocDecoderCreateInfo &decoder_create_info); ~VaapiVideoDecoder(); - rocDecStatus InitializeDecoder(std::string gcn_arch_name); + rocDecStatus InitializeDecoder(std::string device_name, std::string gcn_arch_name); rocDecStatus SubmitDecode(RocdecPicParams *pPicParams); rocDecStatus GetDecodeStatus(int pic_idx, RocdecDecodeStatus* decode_status); rocDecStatus ExportSurface(int pic_idx, VADRMPRIMESurfaceDescriptor &va_drm_prime_surface_desc); @@ -78,4 +88,8 @@ private: rocDecStatus CreateContext(); rocDecStatus DestroyDataBuffers(); void GetVisibleDevices(std::vector& visible_devices); + void GetCurrentComputePartition(std::vector &currnet_compute_partitions); + void GetNumRenderCardsPerDevice(std::string device_name, uint8_t device_id, std::vector& visible_devices, + std::vector ¤t_compute_partitions, + int &num_render_cards_per_socket, int &offset); }; \ No newline at end of file