From 8f9b191453cce71be01171b02bb36baae83da96d Mon Sep 17 00:00:00 2001 From: Aryan Salmanpour Date: Mon, 1 Apr 2024 14:51:56 -0400 Subject: [PATCH] Add support for various partition modes for MI300 (#7) [ROCm/rocjpeg commit: 1844e3349c9bdd7f7d103d414362b8daa23e7199] --- projects/rocjpeg/src/rocjpeg_decoder.cpp | 2 +- .../rocjpeg/src/rocjpeg_vaapi_decoder.cpp | 97 +++++++++++++++++-- projects/rocjpeg/src/rocjpeg_vaapi_decoder.h | 16 ++- 3 files changed, 105 insertions(+), 10 deletions(-) diff --git a/projects/rocjpeg/src/rocjpeg_decoder.cpp b/projects/rocjpeg/src/rocjpeg_decoder.cpp index 409720da68..6ba8c40803 100644 --- a/projects/rocjpeg/src/rocjpeg_decoder.cpp +++ b/projects/rocjpeg/src/rocjpeg_decoder.cpp @@ -56,7 +56,7 @@ RocJpegStatus ROCJpegDecoder::InitializeDecoder() { return rocjpeg_status; } if (backend_ == ROCJPEG_BACKEND_HARDWARE) { - rocjpeg_status = jpeg_vaapi_decoder_.InitializeDecoder(hip_dev_prop_.gcnArchName); + rocjpeg_status = jpeg_vaapi_decoder_.InitializeDecoder(hip_dev_prop_.name, hip_dev_prop_.gcnArchName); if (rocjpeg_status != ROCJPEG_STATUS_SUCCESS) { ERR("ERROR: Failed to initialize the VA-API JPEG decoder!"); return rocjpeg_status; diff --git a/projects/rocjpeg/src/rocjpeg_vaapi_decoder.cpp b/projects/rocjpeg/src/rocjpeg_vaapi_decoder.cpp index c62cd0c816..4d0ffa7a7e 100644 --- a/projects/rocjpeg/src/rocjpeg_vaapi_decoder.cpp +++ b/projects/rocjpeg/src/rocjpeg_vaapi_decoder.cpp @@ -63,21 +63,33 @@ RocJpegVappiDecoder::~RocJpegVappiDecoder() { } } -RocJpegStatus RocJpegVappiDecoder::InitializeDecoder(std::string gcn_arch_name) { +RocJpegStatus RocJpegVappiDecoder::InitializeDecoder(std::string device_name, std::string gcn_arch_name) { std::size_t pos = gcn_arch_name.find_first_of(":"); std::string gcn_arch_name_base = (pos != std::string::npos) ? gcn_arch_name.substr(0, pos) : gcn_arch_name; - // There are 8 renderDXXX per physical device on gfx940, gfx941, and gfx942 - int num_render_cards_per_device = ((gcn_arch_name_base.compare("gfx940") == 0) || - (gcn_arch_name_base.compare("gfx941") == 0) || - (gcn_arch_name_base.compare("gfx942") == 0)) ? 8 : 1; std::vector visible_devices; GetVisibleDevices(visible_devices); - std::string drm_node; + + int num_render_cards_per_socket = 1; + int offset = 0; + if (gcn_arch_name_base.compare("gfx940") == 0 || + gcn_arch_name_base.compare("gfx941") == 0 || + gcn_arch_name_base.compare("gfx942") == 0) { + std::vector current_compute_partitions; + GetCurrentComputePartition(current_compute_partitions); + if (current_compute_partitions.empty()) { + //if the current_compute_partitions is empty then the default SPX mode is assumed. + num_render_cards_per_socket = 8; + } else { + GetNumRenderCardsPerDevice(device_name, device_id_, visible_devices, current_compute_partitions, num_render_cards_per_socket, offset); + } + } + + std::string drm_node = "/dev/dri/renderD"; if (device_id_ < visible_devices.size()) { - drm_node = "/dev/dri/renderD" + std::to_string(128 + visible_devices[device_id_] * num_render_cards_per_device); + drm_node += std::to_string(128 + offset + visible_devices[device_id_] * num_render_cards_per_socket); } else { - drm_node = "/dev/dri/renderD" + std::to_string(128 + device_id_ * num_render_cards_per_device); + drm_node += std::to_string(128 + offset + device_id_ * num_render_cards_per_socket); } CHECK_ROCJPEG(InitVAAPI(drm_node)); CHECK_ROCJPEG(CreateDecoderConfig()); @@ -320,4 +332,73 @@ void RocJpegVappiDecoder::GetVisibleDevices(std::vector& visible_devices_ve } std::sort(visible_devices_vetor.begin(), visible_devices_vetor.end()); } +} + +void RocJpegVappiDecoder::GetCurrentComputePartition(std::vector ¤t_compute_partitions) { + std::string search_path = "/sys/devices/"; + std::string partition_file = "current_compute_partition"; + for (const auto& entry : std::filesystem::recursive_directory_iterator(search_path)) { + if (entry.path().filename() == partition_file) { + std::ifstream file(entry.path()); + if (file.is_open()) { + std::string partition; + std::getline(file, partition); + if (partition.compare("SPX") == 0 || partition.compare("spx") == 0) { + current_compute_partitions.push_back(kSpx); + } else if (partition.compare("DPX") == 0 || partition.compare("dpx") == 0) { + current_compute_partitions.push_back(kDpx); + } else if (partition.compare("TPX") == 0 || partition.compare("tpx") == 0) { + current_compute_partitions.push_back(kTpx); + } else if (partition.compare("QPX") == 0 || partition.compare("qpx") == 0) { + current_compute_partitions.push_back(kQpx); + } else if (partition.compare("CPX") == 0 || partition.compare("cpx") == 0) { + current_compute_partitions.push_back(kCpx); + } + file.close(); + } + } + } +} + +void RocJpegVappiDecoder::GetNumRenderCardsPerDevice(std::string device_name, uint8_t device_id, std::vector& visible_devices, + std::vector ¤t_compute_partitions, + int &num_render_cards_per_socket, int &offset) { + offset = 0; + if (!current_compute_partitions.empty()) { + switch (current_compute_partitions[0]) { + case kSpx: + num_render_cards_per_socket = 8; + break; + case kDpx: + num_render_cards_per_socket = 4; + break; + case kTpx: + num_render_cards_per_socket = 2; + // Please note that although there are only 6 XCCs per socket on MI300A, + // there are two dummy render nodes added by the driver. + // This needs to be taken into account when creating drm_node on each socket in TPX mode. + if (device_id < visible_devices.size()) { + offset = (visible_devices[device_id] / 3) * 2; + } else { + offset = (device_id / 3) * 2; + } + break; + case kQpx: + num_render_cards_per_socket = 2; + break; + case kCpx: + num_render_cards_per_socket = 1; + // Please note that both MI300A and MI300X have the same gfx_arch_name which is + // gfx942. Therefore we cannot use the gfx942 to identify MI300A. + // instead use the device name and look for MI300A + // Also, as explained aboe in the TPX mode section, we need to be taken into account + // the extra two dummy nodes when creating drm_node on each socket in CPX mode as well. + std::string mi300a = "MI300A"; + size_t found_mi300a = device_name.find(mi300a); + if (found_mi300a != std::string::npos) { + offset = (device_id / 6) * 2; + } + break; + } + } } \ No newline at end of file diff --git a/projects/rocjpeg/src/rocjpeg_vaapi_decoder.h b/projects/rocjpeg/src/rocjpeg_vaapi_decoder.h index 13c81a10b6..c595308d74 100644 --- a/projects/rocjpeg/src/rocjpeg_vaapi_decoder.h +++ b/projects/rocjpeg/src/rocjpeg_vaapi_decoder.h @@ -26,10 +26,12 @@ THE SOFTWARE. #pragma once #include +#include #include #include #include #include +#include #include #include #include @@ -40,11 +42,19 @@ THE SOFTWARE. /*Note: va.h doesn't have VA_FOURCC_YUYV defined but vaExportSurfaceHandle returns 0x56595559 for packed YUYV for YUV 4:2:2*/ #define ROCJPEG_FOURCC_YUYV 0x56595559 +typedef enum { + kSpx = 0, // Single Partition Accelerator + kDpx = 1, // Dual Partition Accelerator + kTpx = 2, // Triple Partition Accelerator + kQpx = 3, // Quad Partition Accelerator + kCpx = 4, // Core Partition Accelerator +} ComputePartition; + class RocJpegVappiDecoder { public: RocJpegVappiDecoder(int device_id = 0); ~RocJpegVappiDecoder(); - RocJpegStatus InitializeDecoder(std::string gcn_arch_name); + RocJpegStatus InitializeDecoder(std::string device_name, std::string gcn_arch_name); RocJpegStatus SubmitDecode(const JpegStreamParameters *jpeg_stream_params, uint32_t &surface_id); RocJpegStatus ExportSurface(VASurfaceID surface_id, VADRMPRIMESurfaceDescriptor &va_drm_prime_surface_desc); RocJpegStatus SyncSurface(VASurfaceID surface_id); @@ -71,6 +81,10 @@ private: RocJpegStatus CreateDecoderConfig(); RocJpegStatus DestroyDataBuffers(); void GetVisibleDevices(std::vector& visible_devices); + void GetCurrentComputePartition(std::vector &currnet_compute_partitions); + void GetNumRenderCardsPerDevice(std::string device_name, uint8_t device_id, std::vector& visible_devices, + std::vector ¤t_compute_partitions, + int &num_render_cards_per_socket, int &offset); }; #endif // ROC_JPEG_VAAPI_DECODER_H_ \ No newline at end of file