diff --git a/projects/rocjpeg/CHANGELOG.md b/projects/rocjpeg/CHANGELOG.md index 578b9a022b..74dd4e9319 100644 --- a/projects/rocjpeg/CHANGELOG.md +++ b/projects/rocjpeg/CHANGELOG.md @@ -3,7 +3,7 @@ Documentation for rocJPEG is available at [https://rocm.docs.amd.com/projects/rocJPEG/en/latest/](https://rocm.docs.amd.com/projects/rocJPEG/en/latest/) -## rocJPEG 0.12.2 (unreleased) +## rocJPEG 0.13.0 (unreleased) ## Added * cmake config files diff --git a/projects/rocjpeg/CMakeLists.txt b/projects/rocjpeg/CMakeLists.txt index b725778714..8f242f2a3d 100644 --- a/projects/rocjpeg/CMakeLists.txt +++ b/projects/rocjpeg/CMakeLists.txt @@ -42,7 +42,7 @@ endif() # rocJPEG Version # NOTE: package version and rocjpeg_version.h is generated with this version -set(VERSION "0.12.2") +set(VERSION "0.13.0") # Set Project Version and Language project(rocjpeg VERSION ${VERSION} LANGUAGES CXX) @@ -199,6 +199,39 @@ if(HIP_FOUND AND Libva_FOUND AND Libdrm_amdgpu_FOUND) ROCJPEG_ROCP_REG_VERSION_PATCH=${VERSION_PATCH}) endif() + # Set a default compile option to reuse the interop buffer. Disable this option if the ROCm version is below 7.0.0. + target_compile_definitions(${PROJECT_NAME} PRIVATE ENABLE_INTEROP_BUFFER_REUSE=1) + # rocm version + set(ROCM_VERSION_FILE ${ROCM_PATH}/.info/version-rocm) + if(EXISTS "${ROCM_VERSION_FILE}") + file(READ "${ROCM_VERSION_FILE}" ROCM_VERSION_STRING) + string(STRIP "${ROCM_VERSION_STRING}" ROCM_VERSION_STRING) + message(STATUS "Full ROCm version string: ${ROCM_VERSION_STRING}") + # Match version string with regex + string(REGEX MATCH "^([0-9]+)\\.([0-9]+)\\.([0-9]+)-([0-9]+)$" _ "${ROCM_VERSION_STRING}") + if(CMAKE_MATCH_COUNT GREATER 0) + set(rocm_major_version "${CMAKE_MATCH_1}") + set(rocm_minor_version "${CMAKE_MATCH_2}") + set(rocm_patch_version "${CMAKE_MATCH_3}") + set(rocm_build_number "${CMAKE_MATCH_4}") + message(STATUS "ROCm Major Version: ${rocm_major_version}") + message(STATUS "ROCm Minor Version: ${rocm_minor_version}") + message(STATUS "ROCm Patch Version: ${rocm_patch_version}") + message(STATUS "ROCm Build Number: ${rocm_build_number}") + # Convert strings to integers + math(EXPR ROCM_VERSION_INT "${rocm_major_version} * 10000 + ${rocm_minor_version} * 100 + ${rocm_patch_version}") + set(TARGET_VERSION_INT 70000) + if(ROCM_VERSION_INT LESS TARGET_VERSION_INT) + message(STATUS "ROCm version is below the required threshold (>= 7.0.0) for reusing the interop buffer, so the compile option: ENABLE_INTEROP_BUFFER_REUSE is disabled.") + target_compile_definitions(${PROJECT_NAME} PRIVATE ENABLE_INTEROP_BUFFER_REUSE=0) + endif() + else() + message(WARNING "Failed to parse ROCm version string: ${ROCM_VERSION_STRING}") + endif() + else() + message(WARNING "ROCm version file not found at ${ROCM_VERSION_FILE}") + endif() + #Generate BUILD_INFO configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/api/rocjpeg_version.h.in ${CMAKE_CURRENT_BINARY_DIR}/rocjpeg_version.h @ONLY ) diff --git a/projects/rocjpeg/src/rocjpeg_vaapi_decoder.cpp b/projects/rocjpeg/src/rocjpeg_vaapi_decoder.cpp index c2999eb8c1..e914e028e4 100644 --- a/projects/rocjpeg/src/rocjpeg_vaapi_decoder.cpp +++ b/projects/rocjpeg/src/rocjpeg_vaapi_decoder.cpp @@ -223,47 +223,88 @@ RocJpegStatus RocJpegVaapiMemoryPool::GetHipInteropMem(VASurfaceID surface_id, H [surface_id](const RocJpegVaapiMemPoolEntry& entry){return std::find(entry.va_surface_ids.begin(), entry.va_surface_ids.end(), surface_id) != entry.va_surface_ids.end();}); if (it != entries.end()) { auto idx = std::distance(it->va_surface_ids.begin(), std::find(it->va_surface_ids.begin(), it->va_surface_ids.end(), surface_id)); - if (it->hip_interops[idx].hip_mapped_device_mem != nullptr) { - CHECK_HIP(hipFree(it->hip_interops[idx].hip_mapped_device_mem)); - if (it->hip_interops[idx].hip_ext_mem != nullptr) { - CHECK_HIP(hipDestroyExternalMemory(it->hip_interops[idx].hip_ext_mem)); + #if defined(ENABLE_INTEROP_BUFFER_REUSE) && ENABLE_INTEROP_BUFFER_REUSE > 0 + // Perform the VA-API/HIP interop once for each surface in the memory pool and store it for reuse. + if (it->hip_interops[idx].hip_mapped_device_mem == nullptr) { + VADRMPRIMESurfaceDescriptor va_drm_prime_surface_desc = {}; + CHECK_VAAPI(vaExportSurfaceHandle(va_display_, surface_id, VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2, + VA_EXPORT_SURFACE_READ_ONLY | VA_EXPORT_SURFACE_SEPARATE_LAYERS, + &va_drm_prime_surface_desc)); + + hipExternalMemoryHandleDesc external_mem_handle_desc = {}; + hipExternalMemoryBufferDesc external_mem_buffer_desc = {}; + external_mem_handle_desc.type = hipExternalMemoryHandleTypeOpaqueFd; + external_mem_handle_desc.handle.fd = va_drm_prime_surface_desc.objects[0].fd; + external_mem_handle_desc.size = va_drm_prime_surface_desc.objects[0].size; + + CHECK_HIP(hipImportExternalMemory(&it->hip_interops[idx].hip_ext_mem, &external_mem_handle_desc)); + external_mem_buffer_desc.size = va_drm_prime_surface_desc.objects[0].size; + CHECK_HIP(hipExternalMemoryGetMappedBuffer((void**)&it->hip_interops[idx].hip_mapped_device_mem, it->hip_interops[idx].hip_ext_mem, &external_mem_buffer_desc)); + + uint32_t surface_format = va_drm_prime_surface_desc.fourcc; + // Workaround Mesa <= 24.3 returning non-standard VA fourcc + if (surface_format == VA_FOURCC('Y', 'U', 'Y', 'V')) + surface_format = VA_FOURCC_YUY2; + + it->hip_interops[idx].surface_format = surface_format; + it->hip_interops[idx].width = va_drm_prime_surface_desc.width; + it->hip_interops[idx].height = va_drm_prime_surface_desc.height; + it->hip_interops[idx].size = va_drm_prime_surface_desc.objects[0].size; + it->hip_interops[idx].offset[0] = va_drm_prime_surface_desc.layers[0].offset[0]; + it->hip_interops[idx].offset[1] = va_drm_prime_surface_desc.layers[1].offset[0]; + it->hip_interops[idx].offset[2] = va_drm_prime_surface_desc.layers[2].offset[0]; + it->hip_interops[idx].pitch[0] = va_drm_prime_surface_desc.layers[0].pitch[0]; + it->hip_interops[idx].pitch[1] = va_drm_prime_surface_desc.layers[1].pitch[0]; + it->hip_interops[idx].pitch[2] = va_drm_prime_surface_desc.layers[2].pitch[0]; + it->hip_interops[idx].num_layers = va_drm_prime_surface_desc.num_layers; + + for (uint32_t i = 0; i < va_drm_prime_surface_desc.num_objects; ++i) { + close(va_drm_prime_surface_desc.objects[i].fd); + } } - } - VADRMPRIMESurfaceDescriptor va_drm_prime_surface_desc = {}; - CHECK_VAAPI(vaExportSurfaceHandle(va_display_, surface_id, VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2, - VA_EXPORT_SURFACE_READ_ONLY | VA_EXPORT_SURFACE_SEPARATE_LAYERS, - &va_drm_prime_surface_desc)); + #else + if (it->hip_interops[idx].hip_mapped_device_mem != nullptr) { + CHECK_HIP(hipFree(it->hip_interops[idx].hip_mapped_device_mem)); + if (it->hip_interops[idx].hip_ext_mem != nullptr) { + CHECK_HIP(hipDestroyExternalMemory(it->hip_interops[idx].hip_ext_mem)); + } + } + VADRMPRIMESurfaceDescriptor va_drm_prime_surface_desc = {}; + CHECK_VAAPI(vaExportSurfaceHandle(va_display_, surface_id, VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2, + VA_EXPORT_SURFACE_READ_ONLY | VA_EXPORT_SURFACE_SEPARATE_LAYERS, + &va_drm_prime_surface_desc)); - hipExternalMemoryHandleDesc external_mem_handle_desc = {}; - hipExternalMemoryBufferDesc external_mem_buffer_desc = {}; - external_mem_handle_desc.type = hipExternalMemoryHandleTypeOpaqueFd; - external_mem_handle_desc.handle.fd = va_drm_prime_surface_desc.objects[0].fd; - external_mem_handle_desc.size = va_drm_prime_surface_desc.objects[0].size; + hipExternalMemoryHandleDesc external_mem_handle_desc = {}; + hipExternalMemoryBufferDesc external_mem_buffer_desc = {}; + external_mem_handle_desc.type = hipExternalMemoryHandleTypeOpaqueFd; + external_mem_handle_desc.handle.fd = va_drm_prime_surface_desc.objects[0].fd; + external_mem_handle_desc.size = va_drm_prime_surface_desc.objects[0].size; - CHECK_HIP(hipImportExternalMemory(&it->hip_interops[idx].hip_ext_mem, &external_mem_handle_desc)); - external_mem_buffer_desc.size = va_drm_prime_surface_desc.objects[0].size; - CHECK_HIP(hipExternalMemoryGetMappedBuffer((void**)&it->hip_interops[idx].hip_mapped_device_mem, it->hip_interops[idx].hip_ext_mem, &external_mem_buffer_desc)); + CHECK_HIP(hipImportExternalMemory(&it->hip_interops[idx].hip_ext_mem, &external_mem_handle_desc)); + external_mem_buffer_desc.size = va_drm_prime_surface_desc.objects[0].size; + CHECK_HIP(hipExternalMemoryGetMappedBuffer((void**)&it->hip_interops[idx].hip_mapped_device_mem, it->hip_interops[idx].hip_ext_mem, &external_mem_buffer_desc)); - uint32_t surface_format = va_drm_prime_surface_desc.fourcc; - // Workaround Mesa <= 24.3 returning non-standard VA fourcc - if (surface_format == VA_FOURCC('Y', 'U', 'Y', 'V')) - surface_format = VA_FOURCC_YUY2; + uint32_t surface_format = va_drm_prime_surface_desc.fourcc; + // Workaround Mesa <= 24.3 returning non-standard VA fourcc + if (surface_format == VA_FOURCC('Y', 'U', 'Y', 'V')) + surface_format = VA_FOURCC_YUY2; - it->hip_interops[idx].surface_format = surface_format; - it->hip_interops[idx].width = va_drm_prime_surface_desc.width; - it->hip_interops[idx].height = va_drm_prime_surface_desc.height; - it->hip_interops[idx].size = va_drm_prime_surface_desc.objects[0].size; - it->hip_interops[idx].offset[0] = va_drm_prime_surface_desc.layers[0].offset[0]; - it->hip_interops[idx].offset[1] = va_drm_prime_surface_desc.layers[1].offset[0]; - it->hip_interops[idx].offset[2] = va_drm_prime_surface_desc.layers[2].offset[0]; - it->hip_interops[idx].pitch[0] = va_drm_prime_surface_desc.layers[0].pitch[0]; - it->hip_interops[idx].pitch[1] = va_drm_prime_surface_desc.layers[1].pitch[0]; - it->hip_interops[idx].pitch[2] = va_drm_prime_surface_desc.layers[2].pitch[0]; - it->hip_interops[idx].num_layers = va_drm_prime_surface_desc.num_layers; + it->hip_interops[idx].surface_format = surface_format; + it->hip_interops[idx].width = va_drm_prime_surface_desc.width; + it->hip_interops[idx].height = va_drm_prime_surface_desc.height; + it->hip_interops[idx].size = va_drm_prime_surface_desc.objects[0].size; + it->hip_interops[idx].offset[0] = va_drm_prime_surface_desc.layers[0].offset[0]; + it->hip_interops[idx].offset[1] = va_drm_prime_surface_desc.layers[1].offset[0]; + it->hip_interops[idx].offset[2] = va_drm_prime_surface_desc.layers[2].offset[0]; + it->hip_interops[idx].pitch[0] = va_drm_prime_surface_desc.layers[0].pitch[0]; + it->hip_interops[idx].pitch[1] = va_drm_prime_surface_desc.layers[1].pitch[0]; + it->hip_interops[idx].pitch[2] = va_drm_prime_surface_desc.layers[2].pitch[0]; + it->hip_interops[idx].num_layers = va_drm_prime_surface_desc.num_layers; - for (uint32_t i = 0; i < va_drm_prime_surface_desc.num_objects; ++i) { - close(va_drm_prime_surface_desc.objects[i].fd); - } + for (uint32_t i = 0; i < va_drm_prime_surface_desc.num_objects; ++i) { + close(va_drm_prime_surface_desc.objects[i].fd); + } + #endif hip_interop = it->hip_interops[idx]; return ROCJPEG_STATUS_SUCCESS; }