Perform the VA-API/HIP interop once for each surface in the memory pool and store it for reuse (#157)
* Perform the VA-API/HIP interop once for each surface in the memory pool and store it for reuse
* rename the compiler option to ENABLE_INTEROP_BUFFER_REUSE
[ROCm/rocjpeg commit: 78de581773]
Este cometimento está contido em:
cometido por
GitHub
ascendente
6f9cb446e3
cometimento
4509fc4165
@@ -3,7 +3,7 @@
|
||||
Documentation for rocJPEG is available at
|
||||
[https://rocm.docs.amd.com/projects/rocJPEG/en/latest/](https://rocm.docs.amd.com/projects/rocJPEG/en/latest/)
|
||||
|
||||
## rocJPEG 0.12.2 (unreleased)
|
||||
## rocJPEG 0.13.0 (unreleased)
|
||||
|
||||
## Added
|
||||
* cmake config files
|
||||
|
||||
@@ -42,7 +42,7 @@ endif()
|
||||
|
||||
# rocJPEG Version
|
||||
# NOTE: package version and rocjpeg_version.h is generated with this version
|
||||
set(VERSION "0.12.2")
|
||||
set(VERSION "0.13.0")
|
||||
|
||||
# Set Project Version and Language
|
||||
project(rocjpeg VERSION ${VERSION} LANGUAGES CXX)
|
||||
@@ -199,6 +199,39 @@ if(HIP_FOUND AND Libva_FOUND AND Libdrm_amdgpu_FOUND)
|
||||
ROCJPEG_ROCP_REG_VERSION_PATCH=${VERSION_PATCH})
|
||||
endif()
|
||||
|
||||
# Set a default compile option to reuse the interop buffer. Disable this option if the ROCm version is below 7.0.0.
|
||||
target_compile_definitions(${PROJECT_NAME} PRIVATE ENABLE_INTEROP_BUFFER_REUSE=1)
|
||||
# rocm version
|
||||
set(ROCM_VERSION_FILE ${ROCM_PATH}/.info/version-rocm)
|
||||
if(EXISTS "${ROCM_VERSION_FILE}")
|
||||
file(READ "${ROCM_VERSION_FILE}" ROCM_VERSION_STRING)
|
||||
string(STRIP "${ROCM_VERSION_STRING}" ROCM_VERSION_STRING)
|
||||
message(STATUS "Full ROCm version string: ${ROCM_VERSION_STRING}")
|
||||
# Match version string with regex
|
||||
string(REGEX MATCH "^([0-9]+)\\.([0-9]+)\\.([0-9]+)-([0-9]+)$" _ "${ROCM_VERSION_STRING}")
|
||||
if(CMAKE_MATCH_COUNT GREATER 0)
|
||||
set(rocm_major_version "${CMAKE_MATCH_1}")
|
||||
set(rocm_minor_version "${CMAKE_MATCH_2}")
|
||||
set(rocm_patch_version "${CMAKE_MATCH_3}")
|
||||
set(rocm_build_number "${CMAKE_MATCH_4}")
|
||||
message(STATUS "ROCm Major Version: ${rocm_major_version}")
|
||||
message(STATUS "ROCm Minor Version: ${rocm_minor_version}")
|
||||
message(STATUS "ROCm Patch Version: ${rocm_patch_version}")
|
||||
message(STATUS "ROCm Build Number: ${rocm_build_number}")
|
||||
# Convert strings to integers
|
||||
math(EXPR ROCM_VERSION_INT "${rocm_major_version} * 10000 + ${rocm_minor_version} * 100 + ${rocm_patch_version}")
|
||||
set(TARGET_VERSION_INT 70000)
|
||||
if(ROCM_VERSION_INT LESS TARGET_VERSION_INT)
|
||||
message(STATUS "ROCm version is below the required threshold (>= 7.0.0) for reusing the interop buffer, so the compile option: ENABLE_INTEROP_BUFFER_REUSE is disabled.")
|
||||
target_compile_definitions(${PROJECT_NAME} PRIVATE ENABLE_INTEROP_BUFFER_REUSE=0)
|
||||
endif()
|
||||
else()
|
||||
message(WARNING "Failed to parse ROCm version string: ${ROCM_VERSION_STRING}")
|
||||
endif()
|
||||
else()
|
||||
message(WARNING "ROCm version file not found at ${ROCM_VERSION_FILE}")
|
||||
endif()
|
||||
|
||||
#Generate BUILD_INFO
|
||||
configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/api/rocjpeg_version.h.in ${CMAKE_CURRENT_BINARY_DIR}/rocjpeg_version.h @ONLY )
|
||||
|
||||
|
||||
@@ -223,47 +223,88 @@ RocJpegStatus RocJpegVaapiMemoryPool::GetHipInteropMem(VASurfaceID surface_id, H
|
||||
[surface_id](const RocJpegVaapiMemPoolEntry& entry){return std::find(entry.va_surface_ids.begin(), entry.va_surface_ids.end(), surface_id) != entry.va_surface_ids.end();});
|
||||
if (it != entries.end()) {
|
||||
auto idx = std::distance(it->va_surface_ids.begin(), std::find(it->va_surface_ids.begin(), it->va_surface_ids.end(), surface_id));
|
||||
if (it->hip_interops[idx].hip_mapped_device_mem != nullptr) {
|
||||
CHECK_HIP(hipFree(it->hip_interops[idx].hip_mapped_device_mem));
|
||||
if (it->hip_interops[idx].hip_ext_mem != nullptr) {
|
||||
CHECK_HIP(hipDestroyExternalMemory(it->hip_interops[idx].hip_ext_mem));
|
||||
#if defined(ENABLE_INTEROP_BUFFER_REUSE) && ENABLE_INTEROP_BUFFER_REUSE > 0
|
||||
// Perform the VA-API/HIP interop once for each surface in the memory pool and store it for reuse.
|
||||
if (it->hip_interops[idx].hip_mapped_device_mem == nullptr) {
|
||||
VADRMPRIMESurfaceDescriptor va_drm_prime_surface_desc = {};
|
||||
CHECK_VAAPI(vaExportSurfaceHandle(va_display_, surface_id, VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2,
|
||||
VA_EXPORT_SURFACE_READ_ONLY | VA_EXPORT_SURFACE_SEPARATE_LAYERS,
|
||||
&va_drm_prime_surface_desc));
|
||||
|
||||
hipExternalMemoryHandleDesc external_mem_handle_desc = {};
|
||||
hipExternalMemoryBufferDesc external_mem_buffer_desc = {};
|
||||
external_mem_handle_desc.type = hipExternalMemoryHandleTypeOpaqueFd;
|
||||
external_mem_handle_desc.handle.fd = va_drm_prime_surface_desc.objects[0].fd;
|
||||
external_mem_handle_desc.size = va_drm_prime_surface_desc.objects[0].size;
|
||||
|
||||
CHECK_HIP(hipImportExternalMemory(&it->hip_interops[idx].hip_ext_mem, &external_mem_handle_desc));
|
||||
external_mem_buffer_desc.size = va_drm_prime_surface_desc.objects[0].size;
|
||||
CHECK_HIP(hipExternalMemoryGetMappedBuffer((void**)&it->hip_interops[idx].hip_mapped_device_mem, it->hip_interops[idx].hip_ext_mem, &external_mem_buffer_desc));
|
||||
|
||||
uint32_t surface_format = va_drm_prime_surface_desc.fourcc;
|
||||
// Workaround Mesa <= 24.3 returning non-standard VA fourcc
|
||||
if (surface_format == VA_FOURCC('Y', 'U', 'Y', 'V'))
|
||||
surface_format = VA_FOURCC_YUY2;
|
||||
|
||||
it->hip_interops[idx].surface_format = surface_format;
|
||||
it->hip_interops[idx].width = va_drm_prime_surface_desc.width;
|
||||
it->hip_interops[idx].height = va_drm_prime_surface_desc.height;
|
||||
it->hip_interops[idx].size = va_drm_prime_surface_desc.objects[0].size;
|
||||
it->hip_interops[idx].offset[0] = va_drm_prime_surface_desc.layers[0].offset[0];
|
||||
it->hip_interops[idx].offset[1] = va_drm_prime_surface_desc.layers[1].offset[0];
|
||||
it->hip_interops[idx].offset[2] = va_drm_prime_surface_desc.layers[2].offset[0];
|
||||
it->hip_interops[idx].pitch[0] = va_drm_prime_surface_desc.layers[0].pitch[0];
|
||||
it->hip_interops[idx].pitch[1] = va_drm_prime_surface_desc.layers[1].pitch[0];
|
||||
it->hip_interops[idx].pitch[2] = va_drm_prime_surface_desc.layers[2].pitch[0];
|
||||
it->hip_interops[idx].num_layers = va_drm_prime_surface_desc.num_layers;
|
||||
|
||||
for (uint32_t i = 0; i < va_drm_prime_surface_desc.num_objects; ++i) {
|
||||
close(va_drm_prime_surface_desc.objects[i].fd);
|
||||
}
|
||||
}
|
||||
}
|
||||
VADRMPRIMESurfaceDescriptor va_drm_prime_surface_desc = {};
|
||||
CHECK_VAAPI(vaExportSurfaceHandle(va_display_, surface_id, VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2,
|
||||
VA_EXPORT_SURFACE_READ_ONLY | VA_EXPORT_SURFACE_SEPARATE_LAYERS,
|
||||
&va_drm_prime_surface_desc));
|
||||
#else
|
||||
if (it->hip_interops[idx].hip_mapped_device_mem != nullptr) {
|
||||
CHECK_HIP(hipFree(it->hip_interops[idx].hip_mapped_device_mem));
|
||||
if (it->hip_interops[idx].hip_ext_mem != nullptr) {
|
||||
CHECK_HIP(hipDestroyExternalMemory(it->hip_interops[idx].hip_ext_mem));
|
||||
}
|
||||
}
|
||||
VADRMPRIMESurfaceDescriptor va_drm_prime_surface_desc = {};
|
||||
CHECK_VAAPI(vaExportSurfaceHandle(va_display_, surface_id, VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2,
|
||||
VA_EXPORT_SURFACE_READ_ONLY | VA_EXPORT_SURFACE_SEPARATE_LAYERS,
|
||||
&va_drm_prime_surface_desc));
|
||||
|
||||
hipExternalMemoryHandleDesc external_mem_handle_desc = {};
|
||||
hipExternalMemoryBufferDesc external_mem_buffer_desc = {};
|
||||
external_mem_handle_desc.type = hipExternalMemoryHandleTypeOpaqueFd;
|
||||
external_mem_handle_desc.handle.fd = va_drm_prime_surface_desc.objects[0].fd;
|
||||
external_mem_handle_desc.size = va_drm_prime_surface_desc.objects[0].size;
|
||||
hipExternalMemoryHandleDesc external_mem_handle_desc = {};
|
||||
hipExternalMemoryBufferDesc external_mem_buffer_desc = {};
|
||||
external_mem_handle_desc.type = hipExternalMemoryHandleTypeOpaqueFd;
|
||||
external_mem_handle_desc.handle.fd = va_drm_prime_surface_desc.objects[0].fd;
|
||||
external_mem_handle_desc.size = va_drm_prime_surface_desc.objects[0].size;
|
||||
|
||||
CHECK_HIP(hipImportExternalMemory(&it->hip_interops[idx].hip_ext_mem, &external_mem_handle_desc));
|
||||
external_mem_buffer_desc.size = va_drm_prime_surface_desc.objects[0].size;
|
||||
CHECK_HIP(hipExternalMemoryGetMappedBuffer((void**)&it->hip_interops[idx].hip_mapped_device_mem, it->hip_interops[idx].hip_ext_mem, &external_mem_buffer_desc));
|
||||
CHECK_HIP(hipImportExternalMemory(&it->hip_interops[idx].hip_ext_mem, &external_mem_handle_desc));
|
||||
external_mem_buffer_desc.size = va_drm_prime_surface_desc.objects[0].size;
|
||||
CHECK_HIP(hipExternalMemoryGetMappedBuffer((void**)&it->hip_interops[idx].hip_mapped_device_mem, it->hip_interops[idx].hip_ext_mem, &external_mem_buffer_desc));
|
||||
|
||||
uint32_t surface_format = va_drm_prime_surface_desc.fourcc;
|
||||
// Workaround Mesa <= 24.3 returning non-standard VA fourcc
|
||||
if (surface_format == VA_FOURCC('Y', 'U', 'Y', 'V'))
|
||||
surface_format = VA_FOURCC_YUY2;
|
||||
uint32_t surface_format = va_drm_prime_surface_desc.fourcc;
|
||||
// Workaround Mesa <= 24.3 returning non-standard VA fourcc
|
||||
if (surface_format == VA_FOURCC('Y', 'U', 'Y', 'V'))
|
||||
surface_format = VA_FOURCC_YUY2;
|
||||
|
||||
it->hip_interops[idx].surface_format = surface_format;
|
||||
it->hip_interops[idx].width = va_drm_prime_surface_desc.width;
|
||||
it->hip_interops[idx].height = va_drm_prime_surface_desc.height;
|
||||
it->hip_interops[idx].size = va_drm_prime_surface_desc.objects[0].size;
|
||||
it->hip_interops[idx].offset[0] = va_drm_prime_surface_desc.layers[0].offset[0];
|
||||
it->hip_interops[idx].offset[1] = va_drm_prime_surface_desc.layers[1].offset[0];
|
||||
it->hip_interops[idx].offset[2] = va_drm_prime_surface_desc.layers[2].offset[0];
|
||||
it->hip_interops[idx].pitch[0] = va_drm_prime_surface_desc.layers[0].pitch[0];
|
||||
it->hip_interops[idx].pitch[1] = va_drm_prime_surface_desc.layers[1].pitch[0];
|
||||
it->hip_interops[idx].pitch[2] = va_drm_prime_surface_desc.layers[2].pitch[0];
|
||||
it->hip_interops[idx].num_layers = va_drm_prime_surface_desc.num_layers;
|
||||
it->hip_interops[idx].surface_format = surface_format;
|
||||
it->hip_interops[idx].width = va_drm_prime_surface_desc.width;
|
||||
it->hip_interops[idx].height = va_drm_prime_surface_desc.height;
|
||||
it->hip_interops[idx].size = va_drm_prime_surface_desc.objects[0].size;
|
||||
it->hip_interops[idx].offset[0] = va_drm_prime_surface_desc.layers[0].offset[0];
|
||||
it->hip_interops[idx].offset[1] = va_drm_prime_surface_desc.layers[1].offset[0];
|
||||
it->hip_interops[idx].offset[2] = va_drm_prime_surface_desc.layers[2].offset[0];
|
||||
it->hip_interops[idx].pitch[0] = va_drm_prime_surface_desc.layers[0].pitch[0];
|
||||
it->hip_interops[idx].pitch[1] = va_drm_prime_surface_desc.layers[1].pitch[0];
|
||||
it->hip_interops[idx].pitch[2] = va_drm_prime_surface_desc.layers[2].pitch[0];
|
||||
it->hip_interops[idx].num_layers = va_drm_prime_surface_desc.num_layers;
|
||||
|
||||
for (uint32_t i = 0; i < va_drm_prime_surface_desc.num_objects; ++i) {
|
||||
close(va_drm_prime_surface_desc.objects[i].fd);
|
||||
}
|
||||
for (uint32_t i = 0; i < va_drm_prime_surface_desc.num_objects; ++i) {
|
||||
close(va_drm_prime_surface_desc.objects[i].fd);
|
||||
}
|
||||
#endif
|
||||
hip_interop = it->hip_interops[idx];
|
||||
return ROCJPEG_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
Criar uma nova questão referindo esta
Bloquear um utilizador