Added the new GPU VA context class to handle VA connections to GPU. Removed the hard coded HW decoder capability info. (#415)

* * rocDecode: Removed the hard coded HW decoder capability info.
 - We now probe HW decoder capabilities through VA-API from the driver.

* * rocDecode: Added number of decoder prob and a few missing tear down calls.

* * rocDecode/HW cap change: Added a new singleton class, GpuVaContext, to handle HIP and VA initialization and VA attributes probe for both HW capability check and decoder initialization.

* * rocDecode/HW caps: Removed GpuVaContext class out of vaapi_videodecoder.h and into a new file. Removed debug logs. Removed roc_decoder_caps.h.

* * rocDecode/HW caps: Fixed a crash issue with multi-thread cases. Called to vaInitialize() and vaTerminte() should be paired.

* * rocDecode/HW caps: Added multi-GPU support.

* * rocDecode/HW cap: Moved GpuVaContext class implementation back to VAAPI layer.

* * rocDecode/HW cap: Added changed based on review comments.

* * rocDecode/HW cap: Added changes based on review comments.

* * rocDecode/HW cap: GPU VA context class name change based on review comment.

---------

Co-authored-by: Kiriti Gowda <kiritigowda@gmail.com>
This commit is contained in:
jeffqjiangNew
2025-01-10 14:44:06 -05:00
committed by GitHub
vanhempi 6477cd1100
commit ef06f12dfa
7 muutettua tiedostoa jossa 578 lisäystä ja 405 poistoa
+70 -12
Näytä tiedosto
@@ -29,6 +29,9 @@ THE SOFTWARE.
#include <fcntl.h>
#include <unistd.h>
#include <cstring>
#include <mutex>
#include <algorithm>
#include <unordered_map>
#if __cplusplus >= 201703L && __has_include(<filesystem>)
#include <filesystem>
namespace fs = std::filesystem;
@@ -36,13 +39,22 @@ THE SOFTWARE.
#include <experimental/filesystem>
namespace fs = std::experimental::filesystem;
#endif
#include <libdrm/amdgpu_drm.h>
#include <libdrm/amdgpu.h>
#include <va/va.h>
#include <va/va_drm.h>
#include <va/va_drmcommon.h>
#include "../roc_decoder_caps.h"
#include "../../commons.h"
#include "../../../api/rocdecode.h"
#define CHECK_HIP(call) {\
hipError_t hip_status = call;\
if (hip_status != hipSuccess) {\
std::cout << "HIP failure: " << #call << " failed with 'status: " << hipGetErrorName(hip_status) << "' at " << __FILE__ << ":" << __LINE__ << std::endl;\
return ROCDEC_RUNTIME_ERROR;\
}\
}
#define CHECK_VAAPI(call) {\
VAStatus va_status = call;\
if (va_status != VA_STATUS_SUCCESS) {\
@@ -61,23 +73,44 @@ typedef enum {
kCpx = 4, // Core Partition Accelerator
} ComputePartition;
typedef struct {
int device_id;
std::string gpu_uuid;
int drm_fd;
VADisplay va_display;
hipDeviceProp_t hip_dev_prop;
uint32_t num_dec_engines;
int num_va_profiles;
std::vector<VAProfile> va_profile_list; // supported profiles by the current GPU
VAProfile va_profile; // current profile used
VAConfigID va_config_id;
bool config_attributes_probed;
uint32_t rt_format_attrib;
uint32_t output_format_mask;
uint32_t max_width;
uint32_t max_height;
uint32_t min_width;
uint32_t min_height;
} VaContextInfo;
class VaapiVideoDecoder {
public:
VaapiVideoDecoder(RocDecoderCreateInfo &decoder_create_info);
~VaapiVideoDecoder();
rocDecStatus InitializeDecoder(std::string device_name, std::string gcn_arch_name, std::string& gpu_uuid);
rocDecStatus InitializeDecoder();
rocDecStatus SubmitDecode(RocdecPicParams *pPicParams);
rocDecStatus GetDecodeStatus(int pic_idx, RocdecDecodeStatus* decode_status);
rocDecStatus ExportSurface(int pic_idx, VADRMPRIMESurfaceDescriptor &va_drm_prime_surface_desc);
rocDecStatus SyncSurface(int pic_idx);
rocDecStatus ReconfigureDecoder(RocdecReconfigureDecoderInfo *reconfig_params);
private:
RocDecoderCreateInfo decoder_create_info_;
int drm_fd_;
VADisplay va_display_;
VAProfile va_profile_;
VAConfigAttrib va_config_attrib_;
VAConfigID va_config_id_;
VAProfile va_profile_;
VAContextID va_context_id_;
std::vector<VASurfaceID> va_surface_ids_;
bool supports_modifiers_;
@@ -88,6 +121,30 @@ private:
uint32_t num_slices_;
VABufferID slice_data_buf_id_;
uint32_t slice_data_buf_size_;
bool IsCodecConfigSupported(int device_id, rocDecVideoCodec codec_type, rocDecVideoChromaFormat chroma_format, uint32_t bit_depth_minus8, rocDecVideoSurfaceFormat output_format);
rocDecStatus CreateDecoderConfig();
rocDecStatus CreateSurfaces();
rocDecStatus CreateContext();
rocDecStatus DestroyDataBuffers();
};
// The VaContext singleton class providing access to the the GPU VA services
class VaContext {
public:
int num_devices_;
std::vector<VaContextInfo> va_contexts_;
static VaContext& GetInstance() {
static VaContext instance;
return instance;
}
rocDecStatus GetVaContext(int device_id, uint32_t *va_ctx_id);
rocDecStatus GetVaDisplay(uint32_t va_ctx_id, VADisplay *va_display);
rocDecStatus CheckDecCapForCodecType(RocdecDecodeCaps *dec_cap);
private:
std::mutex mutex;
/**
* @brief A map that associates GPU UUIDs with their corresponding render node indices.
*
@@ -97,14 +154,15 @@ private:
*/
std::unordered_map<std::string, int> gpu_uuids_to_render_nodes_map_;
rocDecStatus InitVAAPI(std::string drm_node);
rocDecStatus CreateDecoderConfig();
rocDecStatus CreateSurfaces();
rocDecStatus CreateContext();
rocDecStatus DestroyDataBuffers();
VaContext();
VaContext(const VaContext&) = delete;
VaContext& operator = (const VaContext) = delete;
~VaContext();
rocDecStatus InitHIP(int device_id, hipDeviceProp_t& hip_dev_prop);
rocDecStatus InitVAAPI(int va_ctx_idx, std::string drm_node);
void GetVisibleDevices(std::vector<int>& visible_devices_vetor);
void GetCurrentComputePartition(std::vector<ComputePartition> &current_compute_partitions);
void GetDrmNodeOffset(std::string device_name, uint8_t device_id, std::vector<int>& visible_devices, std::vector<ComputePartition> &current_compute_partitions, int &offset);
void GetGpuUuids();
void GetVisibleDevices(std::vector<int>& visible_devices);
void GetCurrentComputePartition(std::vector<ComputePartition> &currnet_compute_partitions);
void GetDrmNodeOffset(std::string device_name, uint8_t device_id, std::vector<int>& visible_devices,
std::vector<ComputePartition> &current_compute_partitions, int &offset);
};