Added the new GPU VA context class to handle VA connections to GPU. Removed the hard coded HW decoder capability info. (#415)
* * rocDecode: Removed the hard coded HW decoder capability info. - We now probe HW decoder capabilities through VA-API from the driver. * * rocDecode: Added number of decoder prob and a few missing tear down calls. * * rocDecode/HW cap change: Added a new singleton class, GpuVaContext, to handle HIP and VA initialization and VA attributes probe for both HW capability check and decoder initialization. * * rocDecode/HW caps: Removed GpuVaContext class out of vaapi_videodecoder.h and into a new file. Removed debug logs. Removed roc_decoder_caps.h. * * rocDecode/HW caps: Fixed a crash issue with multi-thread cases. Called to vaInitialize() and vaTerminte() should be paired. * * rocDecode/HW caps: Added multi-GPU support. * * rocDecode/HW cap: Moved GpuVaContext class implementation back to VAAPI layer. * * rocDecode/HW cap: Added changed based on review comments. * * rocDecode/HW cap: Added changes based on review comments. * * rocDecode/HW cap: GPU VA context class name change based on review comment. --------- Co-authored-by: Kiriti Gowda <kiritigowda@gmail.com>
This commit is contained in:
@@ -29,6 +29,9 @@ THE SOFTWARE.
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
#include <cstring>
|
||||
#include <mutex>
|
||||
#include <algorithm>
|
||||
#include <unordered_map>
|
||||
#if __cplusplus >= 201703L && __has_include(<filesystem>)
|
||||
#include <filesystem>
|
||||
namespace fs = std::filesystem;
|
||||
@@ -36,13 +39,22 @@ THE SOFTWARE.
|
||||
#include <experimental/filesystem>
|
||||
namespace fs = std::experimental::filesystem;
|
||||
#endif
|
||||
#include <libdrm/amdgpu_drm.h>
|
||||
#include <libdrm/amdgpu.h>
|
||||
#include <va/va.h>
|
||||
#include <va/va_drm.h>
|
||||
#include <va/va_drmcommon.h>
|
||||
#include "../roc_decoder_caps.h"
|
||||
#include "../../commons.h"
|
||||
#include "../../../api/rocdecode.h"
|
||||
|
||||
#define CHECK_HIP(call) {\
|
||||
hipError_t hip_status = call;\
|
||||
if (hip_status != hipSuccess) {\
|
||||
std::cout << "HIP failure: " << #call << " failed with 'status: " << hipGetErrorName(hip_status) << "' at " << __FILE__ << ":" << __LINE__ << std::endl;\
|
||||
return ROCDEC_RUNTIME_ERROR;\
|
||||
}\
|
||||
}
|
||||
|
||||
#define CHECK_VAAPI(call) {\
|
||||
VAStatus va_status = call;\
|
||||
if (va_status != VA_STATUS_SUCCESS) {\
|
||||
@@ -61,23 +73,44 @@ typedef enum {
|
||||
kCpx = 4, // Core Partition Accelerator
|
||||
} ComputePartition;
|
||||
|
||||
typedef struct {
|
||||
int device_id;
|
||||
std::string gpu_uuid;
|
||||
int drm_fd;
|
||||
VADisplay va_display;
|
||||
hipDeviceProp_t hip_dev_prop;
|
||||
uint32_t num_dec_engines;
|
||||
int num_va_profiles;
|
||||
std::vector<VAProfile> va_profile_list; // supported profiles by the current GPU
|
||||
VAProfile va_profile; // current profile used
|
||||
VAConfigID va_config_id;
|
||||
bool config_attributes_probed;
|
||||
uint32_t rt_format_attrib;
|
||||
uint32_t output_format_mask;
|
||||
uint32_t max_width;
|
||||
uint32_t max_height;
|
||||
uint32_t min_width;
|
||||
uint32_t min_height;
|
||||
} VaContextInfo;
|
||||
|
||||
class VaapiVideoDecoder {
|
||||
public:
|
||||
VaapiVideoDecoder(RocDecoderCreateInfo &decoder_create_info);
|
||||
~VaapiVideoDecoder();
|
||||
rocDecStatus InitializeDecoder(std::string device_name, std::string gcn_arch_name, std::string& gpu_uuid);
|
||||
rocDecStatus InitializeDecoder();
|
||||
rocDecStatus SubmitDecode(RocdecPicParams *pPicParams);
|
||||
rocDecStatus GetDecodeStatus(int pic_idx, RocdecDecodeStatus* decode_status);
|
||||
rocDecStatus ExportSurface(int pic_idx, VADRMPRIMESurfaceDescriptor &va_drm_prime_surface_desc);
|
||||
rocDecStatus SyncSurface(int pic_idx);
|
||||
rocDecStatus ReconfigureDecoder(RocdecReconfigureDecoderInfo *reconfig_params);
|
||||
|
||||
private:
|
||||
RocDecoderCreateInfo decoder_create_info_;
|
||||
int drm_fd_;
|
||||
VADisplay va_display_;
|
||||
VAProfile va_profile_;
|
||||
VAConfigAttrib va_config_attrib_;
|
||||
VAConfigID va_config_id_;
|
||||
VAProfile va_profile_;
|
||||
VAContextID va_context_id_;
|
||||
std::vector<VASurfaceID> va_surface_ids_;
|
||||
bool supports_modifiers_;
|
||||
@@ -88,6 +121,30 @@ private:
|
||||
uint32_t num_slices_;
|
||||
VABufferID slice_data_buf_id_;
|
||||
uint32_t slice_data_buf_size_;
|
||||
|
||||
bool IsCodecConfigSupported(int device_id, rocDecVideoCodec codec_type, rocDecVideoChromaFormat chroma_format, uint32_t bit_depth_minus8, rocDecVideoSurfaceFormat output_format);
|
||||
rocDecStatus CreateDecoderConfig();
|
||||
rocDecStatus CreateSurfaces();
|
||||
rocDecStatus CreateContext();
|
||||
rocDecStatus DestroyDataBuffers();
|
||||
};
|
||||
|
||||
// The VaContext singleton class providing access to the the GPU VA services
|
||||
class VaContext {
|
||||
public:
|
||||
int num_devices_;
|
||||
std::vector<VaContextInfo> va_contexts_;
|
||||
|
||||
static VaContext& GetInstance() {
|
||||
static VaContext instance;
|
||||
return instance;
|
||||
}
|
||||
rocDecStatus GetVaContext(int device_id, uint32_t *va_ctx_id);
|
||||
rocDecStatus GetVaDisplay(uint32_t va_ctx_id, VADisplay *va_display);
|
||||
rocDecStatus CheckDecCapForCodecType(RocdecDecodeCaps *dec_cap);
|
||||
|
||||
private:
|
||||
std::mutex mutex;
|
||||
/**
|
||||
* @brief A map that associates GPU UUIDs with their corresponding render node indices.
|
||||
*
|
||||
@@ -97,14 +154,15 @@ private:
|
||||
*/
|
||||
std::unordered_map<std::string, int> gpu_uuids_to_render_nodes_map_;
|
||||
|
||||
rocDecStatus InitVAAPI(std::string drm_node);
|
||||
rocDecStatus CreateDecoderConfig();
|
||||
rocDecStatus CreateSurfaces();
|
||||
rocDecStatus CreateContext();
|
||||
rocDecStatus DestroyDataBuffers();
|
||||
VaContext();
|
||||
VaContext(const VaContext&) = delete;
|
||||
VaContext& operator = (const VaContext) = delete;
|
||||
~VaContext();
|
||||
|
||||
rocDecStatus InitHIP(int device_id, hipDeviceProp_t& hip_dev_prop);
|
||||
rocDecStatus InitVAAPI(int va_ctx_idx, std::string drm_node);
|
||||
void GetVisibleDevices(std::vector<int>& visible_devices_vetor);
|
||||
void GetCurrentComputePartition(std::vector<ComputePartition> ¤t_compute_partitions);
|
||||
void GetDrmNodeOffset(std::string device_name, uint8_t device_id, std::vector<int>& visible_devices, std::vector<ComputePartition> ¤t_compute_partitions, int &offset);
|
||||
void GetGpuUuids();
|
||||
void GetVisibleDevices(std::vector<int>& visible_devices);
|
||||
void GetCurrentComputePartition(std::vector<ComputePartition> &currnet_compute_partitions);
|
||||
void GetDrmNodeOffset(std::string device_name, uint8_t device_id, std::vector<int>& visible_devices,
|
||||
std::vector<ComputePartition> ¤t_compute_partitions, int &offset);
|
||||
};
|
||||
Viittaa uudesa ongelmassa
Block a user