2
0

Perform the VA-API/HIP interop once for each surface in the memory pool and store it for reuse (#157)

* Perform the VA-API/HIP interop once for each surface in the memory pool and store it for reuse

* rename the compiler option to ENABLE_INTEROP_BUFFER_REUSE

[ROCm/rocjpeg commit: 78de581773]
Este cometimento está contido em:
Aryan Salmanpour
2025-06-11 15:49:40 -04:00
cometido por GitHub
ascendente 6f9cb446e3
cometimento 4509fc4165
3 ficheiros modificados com 111 adições e 37 eliminações
+1 -1
Ver ficheiro
@@ -3,7 +3,7 @@
Documentation for rocJPEG is available at
[https://rocm.docs.amd.com/projects/rocJPEG/en/latest/](https://rocm.docs.amd.com/projects/rocJPEG/en/latest/)
## rocJPEG 0.12.2 (unreleased)
## rocJPEG 0.13.0 (unreleased)
## Added
* cmake config files
+34 -1
Ver ficheiro
@@ -42,7 +42,7 @@ endif()
# rocJPEG Version
# NOTE: package version and rocjpeg_version.h is generated with this version
set(VERSION "0.12.2")
set(VERSION "0.13.0")
# Set Project Version and Language
project(rocjpeg VERSION ${VERSION} LANGUAGES CXX)
@@ -199,6 +199,39 @@ if(HIP_FOUND AND Libva_FOUND AND Libdrm_amdgpu_FOUND)
ROCJPEG_ROCP_REG_VERSION_PATCH=${VERSION_PATCH})
endif()
# Set a default compile option to reuse the interop buffer. Disable this option if the ROCm version is below 7.0.0.
target_compile_definitions(${PROJECT_NAME} PRIVATE ENABLE_INTEROP_BUFFER_REUSE=1)
# rocm version
set(ROCM_VERSION_FILE ${ROCM_PATH}/.info/version-rocm)
if(EXISTS "${ROCM_VERSION_FILE}")
file(READ "${ROCM_VERSION_FILE}" ROCM_VERSION_STRING)
string(STRIP "${ROCM_VERSION_STRING}" ROCM_VERSION_STRING)
message(STATUS "Full ROCm version string: ${ROCM_VERSION_STRING}")
# Match version string with regex
string(REGEX MATCH "^([0-9]+)\\.([0-9]+)\\.([0-9]+)-([0-9]+)$" _ "${ROCM_VERSION_STRING}")
if(CMAKE_MATCH_COUNT GREATER 0)
set(rocm_major_version "${CMAKE_MATCH_1}")
set(rocm_minor_version "${CMAKE_MATCH_2}")
set(rocm_patch_version "${CMAKE_MATCH_3}")
set(rocm_build_number "${CMAKE_MATCH_4}")
message(STATUS "ROCm Major Version: ${rocm_major_version}")
message(STATUS "ROCm Minor Version: ${rocm_minor_version}")
message(STATUS "ROCm Patch Version: ${rocm_patch_version}")
message(STATUS "ROCm Build Number: ${rocm_build_number}")
# Convert strings to integers
math(EXPR ROCM_VERSION_INT "${rocm_major_version} * 10000 + ${rocm_minor_version} * 100 + ${rocm_patch_version}")
set(TARGET_VERSION_INT 70000)
if(ROCM_VERSION_INT LESS TARGET_VERSION_INT)
message(STATUS "ROCm version is below the required threshold (>= 7.0.0) for reusing the interop buffer, so the compile option: ENABLE_INTEROP_BUFFER_REUSE is disabled.")
target_compile_definitions(${PROJECT_NAME} PRIVATE ENABLE_INTEROP_BUFFER_REUSE=0)
endif()
else()
message(WARNING "Failed to parse ROCm version string: ${ROCM_VERSION_STRING}")
endif()
else()
message(WARNING "ROCm version file not found at ${ROCM_VERSION_FILE}")
endif()
#Generate BUILD_INFO
configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/api/rocjpeg_version.h.in ${CMAKE_CURRENT_BINARY_DIR}/rocjpeg_version.h @ONLY )
+76 -35
Ver ficheiro
@@ -223,47 +223,88 @@ RocJpegStatus RocJpegVaapiMemoryPool::GetHipInteropMem(VASurfaceID surface_id, H
[surface_id](const RocJpegVaapiMemPoolEntry& entry){return std::find(entry.va_surface_ids.begin(), entry.va_surface_ids.end(), surface_id) != entry.va_surface_ids.end();});
if (it != entries.end()) {
auto idx = std::distance(it->va_surface_ids.begin(), std::find(it->va_surface_ids.begin(), it->va_surface_ids.end(), surface_id));
if (it->hip_interops[idx].hip_mapped_device_mem != nullptr) {
CHECK_HIP(hipFree(it->hip_interops[idx].hip_mapped_device_mem));
if (it->hip_interops[idx].hip_ext_mem != nullptr) {
CHECK_HIP(hipDestroyExternalMemory(it->hip_interops[idx].hip_ext_mem));
#if defined(ENABLE_INTEROP_BUFFER_REUSE) && ENABLE_INTEROP_BUFFER_REUSE > 0
// Perform the VA-API/HIP interop once for each surface in the memory pool and store it for reuse.
if (it->hip_interops[idx].hip_mapped_device_mem == nullptr) {
VADRMPRIMESurfaceDescriptor va_drm_prime_surface_desc = {};
CHECK_VAAPI(vaExportSurfaceHandle(va_display_, surface_id, VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2,
VA_EXPORT_SURFACE_READ_ONLY | VA_EXPORT_SURFACE_SEPARATE_LAYERS,
&va_drm_prime_surface_desc));
hipExternalMemoryHandleDesc external_mem_handle_desc = {};
hipExternalMemoryBufferDesc external_mem_buffer_desc = {};
external_mem_handle_desc.type = hipExternalMemoryHandleTypeOpaqueFd;
external_mem_handle_desc.handle.fd = va_drm_prime_surface_desc.objects[0].fd;
external_mem_handle_desc.size = va_drm_prime_surface_desc.objects[0].size;
CHECK_HIP(hipImportExternalMemory(&it->hip_interops[idx].hip_ext_mem, &external_mem_handle_desc));
external_mem_buffer_desc.size = va_drm_prime_surface_desc.objects[0].size;
CHECK_HIP(hipExternalMemoryGetMappedBuffer((void**)&it->hip_interops[idx].hip_mapped_device_mem, it->hip_interops[idx].hip_ext_mem, &external_mem_buffer_desc));
uint32_t surface_format = va_drm_prime_surface_desc.fourcc;
// Workaround Mesa <= 24.3 returning non-standard VA fourcc
if (surface_format == VA_FOURCC('Y', 'U', 'Y', 'V'))
surface_format = VA_FOURCC_YUY2;
it->hip_interops[idx].surface_format = surface_format;
it->hip_interops[idx].width = va_drm_prime_surface_desc.width;
it->hip_interops[idx].height = va_drm_prime_surface_desc.height;
it->hip_interops[idx].size = va_drm_prime_surface_desc.objects[0].size;
it->hip_interops[idx].offset[0] = va_drm_prime_surface_desc.layers[0].offset[0];
it->hip_interops[idx].offset[1] = va_drm_prime_surface_desc.layers[1].offset[0];
it->hip_interops[idx].offset[2] = va_drm_prime_surface_desc.layers[2].offset[0];
it->hip_interops[idx].pitch[0] = va_drm_prime_surface_desc.layers[0].pitch[0];
it->hip_interops[idx].pitch[1] = va_drm_prime_surface_desc.layers[1].pitch[0];
it->hip_interops[idx].pitch[2] = va_drm_prime_surface_desc.layers[2].pitch[0];
it->hip_interops[idx].num_layers = va_drm_prime_surface_desc.num_layers;
for (uint32_t i = 0; i < va_drm_prime_surface_desc.num_objects; ++i) {
close(va_drm_prime_surface_desc.objects[i].fd);
}
}
}
VADRMPRIMESurfaceDescriptor va_drm_prime_surface_desc = {};
CHECK_VAAPI(vaExportSurfaceHandle(va_display_, surface_id, VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2,
VA_EXPORT_SURFACE_READ_ONLY | VA_EXPORT_SURFACE_SEPARATE_LAYERS,
&va_drm_prime_surface_desc));
#else
if (it->hip_interops[idx].hip_mapped_device_mem != nullptr) {
CHECK_HIP(hipFree(it->hip_interops[idx].hip_mapped_device_mem));
if (it->hip_interops[idx].hip_ext_mem != nullptr) {
CHECK_HIP(hipDestroyExternalMemory(it->hip_interops[idx].hip_ext_mem));
}
}
VADRMPRIMESurfaceDescriptor va_drm_prime_surface_desc = {};
CHECK_VAAPI(vaExportSurfaceHandle(va_display_, surface_id, VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2,
VA_EXPORT_SURFACE_READ_ONLY | VA_EXPORT_SURFACE_SEPARATE_LAYERS,
&va_drm_prime_surface_desc));
hipExternalMemoryHandleDesc external_mem_handle_desc = {};
hipExternalMemoryBufferDesc external_mem_buffer_desc = {};
external_mem_handle_desc.type = hipExternalMemoryHandleTypeOpaqueFd;
external_mem_handle_desc.handle.fd = va_drm_prime_surface_desc.objects[0].fd;
external_mem_handle_desc.size = va_drm_prime_surface_desc.objects[0].size;
hipExternalMemoryHandleDesc external_mem_handle_desc = {};
hipExternalMemoryBufferDesc external_mem_buffer_desc = {};
external_mem_handle_desc.type = hipExternalMemoryHandleTypeOpaqueFd;
external_mem_handle_desc.handle.fd = va_drm_prime_surface_desc.objects[0].fd;
external_mem_handle_desc.size = va_drm_prime_surface_desc.objects[0].size;
CHECK_HIP(hipImportExternalMemory(&it->hip_interops[idx].hip_ext_mem, &external_mem_handle_desc));
external_mem_buffer_desc.size = va_drm_prime_surface_desc.objects[0].size;
CHECK_HIP(hipExternalMemoryGetMappedBuffer((void**)&it->hip_interops[idx].hip_mapped_device_mem, it->hip_interops[idx].hip_ext_mem, &external_mem_buffer_desc));
CHECK_HIP(hipImportExternalMemory(&it->hip_interops[idx].hip_ext_mem, &external_mem_handle_desc));
external_mem_buffer_desc.size = va_drm_prime_surface_desc.objects[0].size;
CHECK_HIP(hipExternalMemoryGetMappedBuffer((void**)&it->hip_interops[idx].hip_mapped_device_mem, it->hip_interops[idx].hip_ext_mem, &external_mem_buffer_desc));
uint32_t surface_format = va_drm_prime_surface_desc.fourcc;
// Workaround Mesa <= 24.3 returning non-standard VA fourcc
if (surface_format == VA_FOURCC('Y', 'U', 'Y', 'V'))
surface_format = VA_FOURCC_YUY2;
uint32_t surface_format = va_drm_prime_surface_desc.fourcc;
// Workaround Mesa <= 24.3 returning non-standard VA fourcc
if (surface_format == VA_FOURCC('Y', 'U', 'Y', 'V'))
surface_format = VA_FOURCC_YUY2;
it->hip_interops[idx].surface_format = surface_format;
it->hip_interops[idx].width = va_drm_prime_surface_desc.width;
it->hip_interops[idx].height = va_drm_prime_surface_desc.height;
it->hip_interops[idx].size = va_drm_prime_surface_desc.objects[0].size;
it->hip_interops[idx].offset[0] = va_drm_prime_surface_desc.layers[0].offset[0];
it->hip_interops[idx].offset[1] = va_drm_prime_surface_desc.layers[1].offset[0];
it->hip_interops[idx].offset[2] = va_drm_prime_surface_desc.layers[2].offset[0];
it->hip_interops[idx].pitch[0] = va_drm_prime_surface_desc.layers[0].pitch[0];
it->hip_interops[idx].pitch[1] = va_drm_prime_surface_desc.layers[1].pitch[0];
it->hip_interops[idx].pitch[2] = va_drm_prime_surface_desc.layers[2].pitch[0];
it->hip_interops[idx].num_layers = va_drm_prime_surface_desc.num_layers;
it->hip_interops[idx].surface_format = surface_format;
it->hip_interops[idx].width = va_drm_prime_surface_desc.width;
it->hip_interops[idx].height = va_drm_prime_surface_desc.height;
it->hip_interops[idx].size = va_drm_prime_surface_desc.objects[0].size;
it->hip_interops[idx].offset[0] = va_drm_prime_surface_desc.layers[0].offset[0];
it->hip_interops[idx].offset[1] = va_drm_prime_surface_desc.layers[1].offset[0];
it->hip_interops[idx].offset[2] = va_drm_prime_surface_desc.layers[2].offset[0];
it->hip_interops[idx].pitch[0] = va_drm_prime_surface_desc.layers[0].pitch[0];
it->hip_interops[idx].pitch[1] = va_drm_prime_surface_desc.layers[1].pitch[0];
it->hip_interops[idx].pitch[2] = va_drm_prime_surface_desc.layers[2].pitch[0];
it->hip_interops[idx].num_layers = va_drm_prime_surface_desc.num_layers;
for (uint32_t i = 0; i < va_drm_prime_surface_desc.num_objects; ++i) {
close(va_drm_prime_surface_desc.objects[i].fd);
}
for (uint32_t i = 0; i < va_drm_prime_surface_desc.num_objects; ++i) {
close(va_drm_prime_surface_desc.objects[i].fd);
}
#endif
hip_interop = it->hip_interops[idx];
return ROCJPEG_STATUS_SUCCESS;
}