diff --git a/projects/rocjpeg/.gitignore b/projects/rocjpeg/.gitignore new file mode 100644 index 0000000000..d04c127eba --- /dev/null +++ b/projects/rocjpeg/.gitignore @@ -0,0 +1,5 @@ +build/ +samples/*/build +.vscode/ +doxygen_output* +_doxygen/ \ No newline at end of file diff --git a/projects/rocjpeg/.jenkins/common.groovy b/projects/rocjpeg/.jenkins/common.groovy new file mode 100644 index 0000000000..8f8a7f5971 --- /dev/null +++ b/projects/rocjpeg/.jenkins/common.groovy @@ -0,0 +1,121 @@ +// This file is for internal AMD use. +// If you are interested in running your own Jenkins, please raise a github issue for assistance. + +def runCompileCommand(platform, project, jobName, boolean debug=false, boolean staticLibrary=false) { + project.paths.construct_build_prefix() + + String buildTypeArg = debug ? '-DCMAKE_BUILD_TYPE=Debug' : '-DCMAKE_BUILD_TYPE=Release' + String buildTypeDir = debug ? 'debug' : 'release' + + def command = """#!/usr/bin/env bash + set -ex + echo Build rocJPEG - ${buildTypeDir} + cd ${project.paths.project_build_prefix} + mkdir -p build/${buildTypeDir} && cd build/${buildTypeDir} + cmake ${buildTypeArg} ../.. + make -j\$(nproc) + sudo make install + sudo make package + ldd -v /opt/rocm/lib/librocjpeg.so.so + """ + + platform.runCommand(this, command) +} + +def runTestCommand (platform, project) { + + String libLocation = '' + + if (platform.jenkinsLabel.contains('rhel')) { + libLocation = ':/usr/local/lib' + } + else if (platform.jenkinsLabel.contains('sles')) { + libLocation = ':/usr/local/lib' + } + + def command = """#!/usr/bin/env bash + set -ex + export HOME=/home/jenkins + echo make test + cd ${project.paths.project_build_prefix}/build/release + LD_LIBRARY_PATH=\$LD_LIBRARY_PATH:/opt/rocm/lib${libLocation} make test ARGS="-VV --rerun-failed --output-on-failure" + echo rocjpeg-sample - jpegDecode + mkdir -p rocjpeg-sample && cd rocjpeg-sample + cmake /opt/rocm/share/rocjpeg/samples/jpegDecode/ + make -j8 + LD_LIBRARY_PATH=\$LD_LIBRARY_PATH:/opt/rocm/lib${libLocation} ./jpegdecode -i /opt/rocm/share/rocjpeg/images/ + echo rocjpeg-test package verification + cd ../ && mkdir -p rocjpeg-test && cd rocjpeg-test + cmake /opt/rocm/share/rocjpeg/test/ + LD_LIBRARY_PATH=\$LD_LIBRARY_PATH:/opt/rocm/lib${libLocation} ctest -VV --rerun-failed --output-on-failure + echo rocjpeg conformance tests + """ + + platform.runCommand(this, command) +// Unit tests - TBD +} + +def runPackageCommand(platform, project) { + + def packageHelper = platform.makePackage(platform.jenkinsLabel, "${project.paths.project_build_prefix}/build/release") + + String packageType = '' + String packageInfo = '' + String packageDetail = '' + String osType = '' + String packageRunTime = '' + + if (platform.jenkinsLabel.contains('centos') || platform.jenkinsLabel.contains('rhel') || platform.jenkinsLabel.contains('sles')) { + packageType = 'rpm' + packageInfo = 'rpm -qlp' + packageDetail = 'rpm -qi' + packageRunTime = 'rocjpeg-*' + + if (platform.jenkinsLabel.contains('sles')) { + osType = 'sles' + } + else if (platform.jenkinsLabel.contains('rhel8')) { + osType = 'rhel8' + } + else if (platform.jenkinsLabel.contains('rhel9')) { + osType = 'rhel9' + } + } + else + { + packageType = 'deb' + packageInfo = 'dpkg -c' + packageDetail = 'dpkg -I' + packageRunTime = 'rocjpeg_*' + + if (platform.jenkinsLabel.contains('ubuntu20')) { + osType = 'ubuntu20' + } + else if (platform.jenkinsLabel.contains('ubuntu22')) { + osType = 'ubuntu22' + } + } + + def command = """#!/usr/bin/env bash + set -ex + export HOME=/home/jenkins + echo Make rocJPEG Package + cd ${project.paths.project_build_prefix}/build/release + sudo make package + mkdir -p package + mv rocjpeg-dev*.${packageType} package/${osType}-rocjpeg-dev.${packageType} + mv rocjpeg-test*.${packageType} package/${osType}-rocjpeg-test.${packageType} + mv ${packageRunTime}.${packageType} package/${osType}-rocjpeg.${packageType} + ${packageDetail} package/${osType}-rocjpeg-dev.${packageType} + ${packageDetail} package/${osType}-rocjpeg-test.${packageType} + ${packageDetail} package/${osType}-rocjpeg.${packageType} + ${packageInfo} package/${osType}-rocjpeg-dev.${packageType} + ${packageInfo} package/${osType}-rocjpeg-test.${packageType} + ${packageInfo} package/${osType}-rocjpeg.${packageType} + """ + + platform.runCommand(this, command) + platform.archiveArtifacts(this, packageHelper[1]) +} + +return this diff --git a/projects/rocjpeg/.jenkins/precheckin.groovy b/projects/rocjpeg/.jenkins/precheckin.groovy new file mode 100644 index 0000000000..be7dc0a2a1 --- /dev/null +++ b/projects/rocjpeg/.jenkins/precheckin.groovy @@ -0,0 +1,78 @@ +#!/usr/bin/env groovy +@Library('rocJenkins@pong') _ +import com.amd.project.* +import com.amd.docker.* + +def runCI = +{ + nodeDetails, jobName-> + + def prj = new rocProject('rocJPEG', 'PreCheckin') + + def nodes = new dockerNodes(nodeDetails, jobName, prj) + + def commonGroovy + + boolean formatCheck = false + + def compileCommand = + { + platform, project-> + + commonGroovy = load "${project.paths.project_src_prefix}/.jenkins/common.groovy" + commonGroovy.runCompileCommand(platform, project, jobName) + } + + + def testCommand = + { + platform, project-> + + commonGroovy.runTestCommand(platform, project) + } + + def packageCommand = + { + platform, project-> + + commonGroovy.runPackageCommand(platform, project) + } + + buildProject(prj, formatCheck, nodes.dockerArray, compileCommand, testCommand, packageCommand) +} + +ci: { + String urlJobName = auxiliary.getTopJobName(env.BUILD_URL) + + def propertyList = ["compute-rocm-dkms-no-npi-hipclang":[pipelineTriggers([cron('0 1 * * 0')])]] + propertyList = auxiliary.appendPropertyList(propertyList) + + def jobNameList = ["compute-rocm-dkms-no-npi-hipclang":([ubuntu20:['gfx90a'], ubuntu22:['gfx1101'], sles15sp1:['gfx908'], rhel8:['gfx1030'], rhel9:['gfx942']])] + jobNameList = auxiliary.appendJobNameList(jobNameList) + + propertyList.each + { + jobName, property-> + if (urlJobName == jobName) { + properties(auxiliary.addCommonProperties(property)) + } + } + + jobNameList.each + { + jobName, nodeDetails-> + if (urlJobName == jobName) { + stage(jobName) { + runCI(nodeDetails, jobName) + } + } + } + + // For url job names that are not listed by the jobNameList i.e. compute-rocm-dkms-no-npi-1901 + if(!jobNameList.keySet().contains(urlJobName)) { + properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * *')])])) + stage(urlJobName) { + runCI([ubuntu22:['gfx942']], urlJobName) + } + } +} diff --git a/projects/rocjpeg/CMakeLists.txt b/projects/rocjpeg/CMakeLists.txt new file mode 100644 index 0000000000..47ae154c1b --- /dev/null +++ b/projects/rocjpeg/CMakeLists.txt @@ -0,0 +1,319 @@ +# ############################################################################## +# Copyright (c) 2024 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +# ############################################################################## + +cmake_minimum_required (VERSION 3.5) + +set(VERSION "0.1.0") +set(CMAKE_CXX_STANDARD 17) + +# Set Project Version and Language +project(rocjpeg VERSION ${VERSION} LANGUAGES CXX) + +set(CMAKE_INSTALL_LIBDIR "lib" CACHE STRING "Library install directory") +set(CMAKE_INSTALL_INCLUDEDIR "include" CACHE STRING "Include install directory") +include(GNUInstallDirs) + +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_LIBDIR}) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_LIBDIR}) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_BINDIR}) + +find_program(MAKE_NSIS_EXE makensis) +find_program(RPMBUILD_EXE rpmbuild) +find_program(DPKG_EXE dpkg) + +# ROCM Path +if(DEFINED ENV{ROCM_PATH}) + set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Default ROCm installation path") +elseif(ROCM_PATH) + message("-- INFO:ROCM_PATH Set -- ${ROCM_PATH}") +else() + set(ROCM_PATH /opt/rocm CACHE PATH "Default ROCm installation path") +endif() +# avoid setting the default installation path to /usr/local +if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) + set(CMAKE_INSTALL_PREFIX ${ROCM_PATH} CACHE PATH "rocJPEG default installation path" FORCE) +endif(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) + +set(DEFAULT_BUILD_TYPE "Release") +set(ENHANCED_MESSAGE ON) + +if(ENHANCED_MESSAGE) + string(ASCII 27 Esc) + set(ColourReset "${Esc}[m") + set(Red "${Esc}[31m") + set(Green "${Esc}[32m") + set(Yellow "${Esc}[33m") + set(Blue "${Esc}[34m") + set(BoldBlue "${Esc}[1;34m") + set(Magenta "${Esc}[35m") + set(Cyan "${Esc}[36m") + set(White "${Esc}[37m") +endif() +message("-- ${BoldBlue}rocJPEG Version -- ${VERSION}${ColourReset}") +message("-- ${BoldBlue}rocJPEG Install Path -- ${CMAKE_INSTALL_PREFIX}${ColourReset}") + +list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) +list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH} ${ROCM_PATH}/hip) +set(CMAKE_CXX_COMPILER ${ROCM_PATH}/llvm/bin/clang++) + +# rocJPEG Default Options +option(BUILD_WITH_AMD_ADVANCE "Build rocJPEG for advanced AMD GPU Architecture" OFF) + +# rocJPEG Build Type +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE "${DEFAULT_BUILD_TYPE}" CACHE STRING "rocJPEG Default Build Type" FORCE) + set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release") +endif() +if(CMAKE_BUILD_TYPE MATCHES Debug) + # -O0 -- Don't Optimize output file + # -g -- generate debugging information, generate debugging information, dwarf-4 for making valgrind work + # -Og -- Optimize for debugging experience rather than speed or size + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -gdwarf-4 -Og") +else() + # -O3 -- Optimize output file + # -DNDEBUG -- turn off asserts + # -fPIC -- Generate position-independent code if possible + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -DNDEBUG -fPIC") +endif() +message("-- ${BoldBlue}rocJPEG Build Type -- ${CMAKE_BUILD_TYPE}${ColourReset}") +set(DEFAULT_AMDGPU_TARGETS "gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1031;gfx1032;gfx1100;gfx1101;gfx1102") +set(AMDGPU_TARGETS "${DEFAULT_AMDGPU_TARGETS}" CACHE STRING "List of specific machine types for library to target") + +find_package(HIP QUIET) +find_package(Libva QUIET) + +if(HIP_FOUND AND Libva_FOUND) + + # HIP + set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} hip::device) + # LibVA + include_directories(${LIBVA_INCLUDE_DIR}) + set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} ${LIBVA_LIBRARY}) + set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} ${LIBVA_DRM_LIBRARY}) + + # local include files + include_directories(api src) + # source files + file(GLOB_RECURSE SOURCES "./src/*.cpp") + # rocJPEG.so + add_library(${PROJECT_NAME} SHARED ${SOURCES}) + + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=gnu++17") + target_link_libraries(${PROJECT_NAME} ${LINK_LIBRARY_LIST}) + + set_target_properties(${PROJECT_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON) + set_target_properties(${PROJECT_NAME} PROPERTIES LINKER_LANGUAGE CXX) + set_target_properties(${PROJECT_NAME} PROPERTIES VERSION ${PROJECT_VERSION} SOVERSION ${PROJECT_VERSION_MAJOR}) + + # install rocJPEG libs -- {ROCM_PATH}/lib + install(TARGETS ${PROJECT_NAME} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT runtime NAMELINK_SKIP) + install(TARGETS ${PROJECT_NAME} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT dev NAMELINK_ONLY) + install(TARGETS ${PROJECT_NAME} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT asan) + # install rocJPEG include files -- {ROCM_PATH}/include/rocJPEG + install(FILES api/rocjpeg.h + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME} COMPONENT dev) + # install rocJPEG samples -- {ROCM_PATH}/share/rocJPEG + install(DIRECTORY cmake DESTINATION ${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME} COMPONENT dev) + + install(DIRECTORY samples/jpegDecode DESTINATION ${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME}/samples COMPONENT dev) + install(DIRECTORY data/images DESTINATION ${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME}/ COMPONENT dev) + # install license information - {ROCM_PATH}/share/doc/rocJPEG + set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE") + install(FILES ${CPACK_RESOURCE_FILE_LICENSE} DESTINATION ${CMAKE_INSTALL_DOCDIR} COMPONENT runtime) + install(FILES ${CPACK_RESOURCE_FILE_LICENSE} DESTINATION ${CMAKE_INSTALL_DOCDIR}-asan COMPONENT asan) + install(FILES ${CPACK_RESOURCE_FILE_LICENSE} DESTINATION ${CMAKE_INSTALL_DOCDIR}-dev COMPONENT dev) + install(FILES ${CPACK_RESOURCE_FILE_LICENSE} DESTINATION ${CMAKE_INSTALL_DOCDIR}-test COMPONENT test) + # install test cmake + install(FILES test/CMakeLists.txt DESTINATION ${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME}/test COMPONENT test) + + message("-- ${White}AMD ROCm rocJPEG -- CMAKE_CXX_FLAGS:${CMAKE_CXX_FLAGS}${ColourReset}") + message("-- ${White}AMD ROCm rocJPEG -- Link Libraries: ${LINK_LIBRARY_LIST}${ColourReset}") + + # make test with CTest + enable_testing() + include(CTest) + add_subdirectory(samples) + + # set package information + set(CPACK_PACKAGE_VERSION_MAJOR ${PROJECT_VERSION_MAJOR}) + set(CPACK_PACKAGE_VERSION_MINOR ${PROJECT_VERSION_MINOR}) + set(CPACK_PACKAGE_VERSION_PATCH ${PROJECT_VERSION_PATCH}) + set(CPACK_PACKAGE_VERSION "${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}") + set(CPACK_PACKAGE_LICENSE "MIT") + set(CPACK_PACKAGE_CONTACT "rocJPEG Support ") + set(CPACK_PACKAGE_VENDOR "AMD ROCm") + set(CPACK_PACKAGE_GROUP "Development/Tools") + set(CPACK_PACKAGE_HOMEPAGE "https://github.com/ROCm/rocJPEG") + + if(ENABLE_ASAN_PACKAGING) + set(CPACK_DEBIAN_PACKAGE_DESCRIPTION "AMD rocJPEG address sanitizer libraries") + set(CPACK_RPM_PACKAGE_SUMMARY "AMD rocJPEG address sanitizer libraries") + else() + set(CPACK_DEBIAN_PACKAGE_DESCRIPTION "AMD rocJPEG is a high performance JPEG decode SDK for AMD GPUs") + set(CPACK_RPM_PACKAGE_SUMMARY "AMD rocJPEG is a high performance JPEG decode SDK for AMD GPUs") + endif() + + if(DEFINED ENV{ROCM_LIBPATCH_VERSION}) + set(CPACK_PACKAGE_VERSION "${CPACK_PACKAGE_VERSION}.$ENV{ROCM_LIBPATCH_VERSION}") + endif() + + # Find Ubuntu 22.04 - Add libstdc++-12-dev package deps for Dev Package + file(READ "/etc/os-release" OS_RELEASE) + string(REGEX MATCH "22.04" UBUNTU_22_FOUND ${OS_RELEASE}) + + # Set the dependent packages + set(rocJPEG_DEBIAN_PACKAGE_LIST "rocm-hip-runtime, libdrm-amdgpu1, libva-amdgpu-drm2, mesa-amdgpu-va-drivers") + set(rocJPEG_RPM_PACKAGE_LIST "rocm-hip-runtime, libdrm-amdgpu, libva-amdgpu, mesa-amdgpu-dri-drivers") + set(rocJPEG_DEBIAN_DEV_PACKAGE_LIST "rocm-hip-runtime-dev, libva-amdgpu-dev, pkg-config") + if(UBUNTU_22_FOUND) + set(rocJPEG_DEBIAN_DEV_PACKAGE_LIST "${rocJPEG_DEBIAN_DEV_PACKAGE_LIST}, libstdc++-12-dev") + endif() + set(rocJPEG_RPM_DEV_PACKAGE_LIST "rocm-hip-runtime-devel, libva-amdgpu-devel, pkg-config") + + # '%{?dist}' breaks manual builds on debian systems due to empty Provides + execute_process( + COMMAND rpm --eval %{?dist} + RESULT_VARIABLE PROC_RESULT + OUTPUT_VARIABLE EVAL_RESULT + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + if(PROC_RESULT EQUAL "0" AND NOT EVAL_RESULT STREQUAL "") + string(APPEND CPACK_RPM_PACKAGE_RELEASE "%{?dist}") + endif() + + # set package + set(CPACK_PACKAGING_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX}) + set(CPACK_DEBIAN_PACKAGE_HOMEPAGE ${CPACK_PACKAGE_HOMEPAGE}) + set(CPACK_RPM_PACKAGE_URL ${CPACK_PACKAGE_HOMEPAGE}) + set(CPACK_RPM_PACKAGE_AUTOREQPROV "no") + # Debian package + set(CPACK_DEB_COMPONENT_INSTALL ON) + set(CPACK_DEBIAN_RUNTIME_PACKAGE_NAME "${PROJECT_NAME}") + set(CPACK_DEBIAN_RUNTIME_PACKAGE_DEPENDS "rocm-core, ${rocJPEG_DEBIAN_PACKAGE_LIST}") + set(CPACK_DEBIAN_DEV_PACKAGE_NAME "${PROJECT_NAME}-dev") + set(CPACK_DEBIAN_DEV_PACKAGE_DEPENDS + "rocm-core, ${PROJECT_NAME}, ${rocJPEG_DEBIAN_DEV_PACKAGE_LIST}") + # Debian package - specific variable for ASAN + set(CPACK_DEBIAN_ASAN_PACKAGE_NAME "${PROJECT_NAME}-asan" ) + set(CPACK_DEBIAN_ASAN_PACKAGE_DEPENDS "rocm-core-asan, ${rocJPEG_DEBIAN_PACKAGE_LIST}" ) + # Debian package - Test + set(CPACK_DEBIAN_TEST_PACKAGE_NAME "${PROJECT_NAME}-test" ) + set(CPACK_DEBIAN_TEST_PACKAGE_DEPENDS "rocm-core, ${CPACK_DEBIAN_DEV_PACKAGE_NAME}" ) + # RPM package + set(CPACK_RPM_COMPONENT_INSTALL ON) + set(CPACK_RPM_RUNTIME_PACKAGE_NAME "${PROJECT_NAME}") + set(CPACK_RPM_RUNTIME_PACKAGE_REQUIRES "rocm-core, ${rocJPEG_RPM_PACKAGE_LIST}") + set(CPACK_RPM_RUNTIME_PACKAGE_PROVIDES "${PROJECT_NAME}") + set(CPACK_RPM_RUNTIME_PACKAGE_OBSOLETES "${PROJECT_NAME}") + set(CPACK_RPM_DEV_PACKAGE_NAME "${PROJECT_NAME}-devel") + set(CPACK_RPM_DEV_PACKAGE_REQUIRES "rocm-core, ${PROJECT_NAME}, ${rocJPEG_RPM_DEV_PACKAGE_LIST}") + set(CPACK_RPM_DEV_PACKAGE_PROVIDES "${PROJECT_NAME}-devel") + set(CPACK_RPM_DEV_PACKAGE_OBSOLETES "${PROJECT_NAME}-devel") + set(CPACK_RPM_PACKAGE_LICENSE "MIT" ) + # RPM package specific variable for ASAN + set(CPACK_RPM_ASAN_PACKAGE_NAME "${PROJECT_NAME}-asan" ) + set(CPACK_RPM_ASAN_PACKAGE_REQUIRES "rocm-core-asan, ${rocJPEG_RPM_PACKAGE_LIST}" ) + set(CPACK_RPM_ASAN_PACKAGE_PROVIDES "${PROJECT_NAME}-asan") + set(CPACK_RPM_ASAN_PACKAGE_OBSOLETES "${PROJECT_NAME}-asan") + # RPM package specific variable for Test + set(CPACK_RPM_TEST_PACKAGE_NAME "${PROJECT_NAME}-test" ) + set(CPACK_RPM_TEST_PACKAGE_REQUIRES "rocm-core, ${CPACK_RPM_DEV_PACKAGE_NAME}" ) + set(CPACK_RPM_TEST_PACKAGE_PROVIDES "${PROJECT_NAME}-test") + set(CPACK_RPM_TEST_PACKAGE_OBSOLETES "${PROJECT_NAME}-test") + + if(NOT ROCM_DEP_ROCMCORE) + string(REGEX REPLACE ",? ?rocm-core," "" CPACK_RPM_RUNTIME_PACKAGE_REQUIRES ${CPACK_RPM_RUNTIME_PACKAGE_REQUIRES}) + string(REGEX REPLACE ",? ?rocm-core-asan," "" CPACK_RPM_ASAN_PACKAGE_REQUIRES ${CPACK_RPM_ASAN_PACKAGE_REQUIRES}) + string(REGEX REPLACE ",? ?rocm-core," "" CPACK_RPM_DEV_PACKAGE_REQUIRES ${CPACK_RPM_DEV_PACKAGE_REQUIRES}) + string(REGEX REPLACE ",? ?rocm-core," "" CPACK_RPM_TEST_PACKAGE_REQUIRES ${CPACK_RPM_TEST_PACKAGE_REQUIRES}) + string(REGEX REPLACE ",? ?rocm-core," "" CPACK_DEBIAN_RUNTIME_PACKAGE_DEPENDS ${CPACK_DEBIAN_RUNTIME_PACKAGE_DEPENDS}) + string(REGEX REPLACE ",? ?rocm-core-asan," "" CPACK_DEBIAN_ASAN_PACKAGE_DEPENDS ${CPACK_DEBIAN_ASAN_PACKAGE_DEPENDS}) + string(REGEX REPLACE ",? ?rocm-core," "" CPACK_DEBIAN_DEV_PACKAGE_DEPENDS ${CPACK_DEBIAN_DEV_PACKAGE_DEPENDS}) + string(REGEX REPLACE ",? ?rocm-core," "" CPACK_DEBIAN_TEST_PACKAGE_DEPENDS ${CPACK_DEBIAN_TEST_PACKAGE_DEPENDS}) + endif() + + if(ENABLE_ASAN_PACKAGING) + # ASAN Package requires asan component with only libraries and license file + set(CPACK_COMPONENTS_ALL asan) + else() + set(CPACK_COMPONENTS_ALL runtime dev test) + endif() + + set(CPACK_DEBIAN_FILE_NAME "DEB-DEFAULT") + set(CPACK_RPM_FILE_NAME "RPM-DEFAULT") + set(CPACK_DEBIAN_PACKAGE_RELEASE "local") + set(CPACK_RPM_PACKAGE_RELEASE "local") + + if(DEFINED ENV{CPACK_DEBIAN_PACKAGE_RELEASE}) + set(CPACK_DEBIAN_PACKAGE_RELEASE $ENV{CPACK_DEBIAN_PACKAGE_RELEASE}) + endif() + if(DEFINED ENV{CPACK_RPM_PACKAGE_RELEASE}) + set(CPACK_RPM_PACKAGE_RELEASE $ENV{CPACK_RPM_PACKAGE_RELEASE}) + endif() + + set(CPACK_GENERATOR "TGZ;ZIP") + if(EXISTS ${MAKE_NSIS_EXE}) + list(APPEND CPACK_GENERATOR "NSIS") + endif() + if(EXISTS ${RPMBUILD_EXE}) + list(APPEND CPACK_GENERATOR "RPM") + message("-- ${White}AMD ROCm rocJPEG RunTime Package -- ${CPACK_RPM_RUNTIME_PACKAGE_REQUIRES}${ColourReset}") + message("-- ${White}AMD ROCm rocJPEG Dev Package -- ${CPACK_RPM_DEV_PACKAGE_REQUIRES}${ColourReset}") + endif() + if(EXISTS ${DPKG_EXE}) + list(APPEND CPACK_GENERATOR "DEB") + message("-- ${White}AMD ROCm rocJPEG RunTime Package -- ${CPACK_DEBIAN_RUNTIME_PACKAGE_DEPENDS}${ColourReset}") + message("-- ${White}AMD ROCm rocJPEG Dev Package -- ${CPACK_DEBIAN_DEV_PACKAGE_DEPENDS}${ColourReset}") + endif() + + include(CPack) + + cpack_add_component(runtime + DISPLAY_NAME "rocJPEG Runtime Package" + DESCRIPTION "AMD rocJPEG is a high performance JPEG decode SDK for AMD GPUs. \ +rocJPEG runtime package provides rocJPEG library and license.txt") + + cpack_add_component(dev + DISPLAY_NAME "rocJPEG Develop Package" + DESCRIPTION "AMD rocJPEG is a high performance JPEG decode SDK for AMD GPUs. \ +rocJPEG develop package provides rocJPEG library, header files, samples, and license.txt") + + cpack_add_component(asan + DISPLAY_NAME "rocJPEG ASAN Package" + DESCRIPTION "AMD rocJPEG is a high performance JPEG decode SDK for AMD GPUs. \ +rocJPEG ASAN package provides rocJPEG ASAN libraries") + + cpack_add_component(test + DISPLAY_NAME "rocJPEG Test Package" + DESCRIPTION "AMD rocJPEG is a high performance JPEG decode SDK for AMD GPUs. \ +rocJPEG Test package provides rocJPEG Test Components") + +else() + message("-- ${Red}AMD ROCm rocJPEG -- unmet dependencies${ColourReset}") + if(NOT HIP_FOUND) + message(FATAL_ERROR "-- ERROR!: HIP Not Found! - please install rocm-hip-runtime-dev!") + endif() + if(NOT Libva_FOUND) + message(FATAL_ERROR "-- ERROR!: libva Not Found - please install libva-amdgpu-dev(DBEAIN)/libva-amdgpu-devel(RPM) package!") + endif() +endif() \ No newline at end of file diff --git a/projects/rocjpeg/LICENSE b/projects/rocjpeg/LICENSE index dfe2c51bc1..0826de90bd 100644 --- a/projects/rocjpeg/LICENSE +++ b/projects/rocjpeg/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2023 ROCm Software Platform +Copyright (c) 2024 ROCm Software Platform Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/projects/rocjpeg/README.md b/projects/rocjpeg/README.md index 33e2ec3c40..af26df2728 100644 --- a/projects/rocjpeg/README.md +++ b/projects/rocjpeg/README.md @@ -1,2 +1,202 @@ -# rocJPEG -rocJPEG is a high-performance jpeg decode SDK for decoding images using a hardware-accelerated jpeg decoder on AMD’s GPUs. +[![MIT licensed](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT) + + +

+ +rocJPEG is a high performance JPEG decode SDK for AMD GPUs. Using the rocJPEG API, you can access the JPEG decoding features available on your GPU. + +## Supported JPEG chroma subsampling + +* YUV 4:4:4 +* YUV 4:2:2 +* YUV 4:2:0 +* YUV 4:0:0 + +## Prerequisites + +* Linux distribution + * Ubuntu - `20.04` / `22.04` + * RHEL - `8` / `9` + * SLES - `15-SP5` + +* [ROCm supported hardware](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html) +> [!IMPORTANT] +> `gfx908` or higher GPU required + +* Install ROCm `6.1.0` or later with [amdgpu-install](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/amdgpu-install.html): Required usecase - rocm +> [!IMPORTANT] +> `sudo amdgpu-install --usecase=rocm` + +* AMD Multimedia packages + ```shell + sudo apt install libva-amdgpu-dev libdrm-amdgpu1 mesa-amdgpu-va-drivers + ``` +> [!NOTE] +> RPM Packages for `RHEL`/`SLES` - `libva-amdgpu-devel libdrm-amdgpu mesa-amdgpu-dri-drivers` + +* CMake `3.5` or later + + ```shell + sudo apt install cmake + ``` + +* [pkg-config](https://en.wikipedia.org/wiki/Pkg-config) + + ```shell + sudo apt install pkg-config + ``` + +> [!IMPORTANT] +> * If using Ubuntu 22.04, you must install `libstdc++-12-dev` +> +> ```shell +> sudo apt install libstdc++-12-dev +> ``` + +>[!NOTE] +> * All package installs are shown with the `apt` package manager. Use the appropriate package manager for your operating system. +> * To install rocDecode with minimum requirements, follow the [quick-start](./docs/install/quick-start.rst) instructions + + +#### Prerequisites setup script for Linux +For your convenience, we provide the setup script, +[rocJPEG-setup.py](rocJPEG-setup.py) which installs all required dependencies. Run this script only once. + +**Usage:** + +```shell + python rocJPEG-setup.py --rocm_path [ ROCm Installation Path - optional (default:/opt/rocm)] + --developer [ Setup Developer Options - optional (default:ON) [options:ON/OFF]] +``` + +**NOTE:** This script only needs to be executed once. + +## Installation instructions + +The installation process uses the following steps: + +* [ROCm-supported hardware](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html) install verification + +* Install ROCm `6.1.0` or later with [amdgpu-install](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/amdgpu-install.html) with `--usecase=rocm` + +* Use either [Package install](#package-install) or [Source install](#source-install) as described below. + +### Package install + +Install rocJPEG runtime, development, and test packages. + +* Runtime package - `rocjpeg` only provides the rocjpeg library `librocjpeg.so` +* Development package - `rocjpeg-dev`/`rocjpeg-devel` provides the library, header files, and samples +* Test package - `rocjpeg-test` provides CTest to verify installation + +#### Ubuntu + +```shell +sudo apt install rocjpeg rocjpeg-dev rocjpeg-test +``` + +#### RHEL + +```shell +sudo yum install rocjpeg rocjpeg-devel rocjpeg-test +``` + +#### SLES + +```shell +sudo zypper install rocjpeg rocjpeg-devel rocjpeg-test +``` +>[!NOTE] +> Package install auto installs all dependencies. + +### Source install + +```shell +git clone https://github.com/ROCm/rocJPEG.git +cd rocJPEG +mkdir build && cd build +cmake ../ +make -j8 +sudo make install +``` + +#### Run tests + + ```shell + make test + ``` + + **NOTE:** run tests with verbose option `make test ARGS="-VV"` + +#### Make package + + ```shell + sudo make package + ``` + +## Verify installation + +The installer will copy + +* Libraries into `/opt/rocm/lib` +* Header files into `/opt/rocm/include/rocjpeg` +* Samples folder into `/opt/rocm/share/rocjpeg` +* Documents folder into `/opt/rocm/share/doc/rocjpeg` + +### Using sample application + +To verify your installation using a sample application, run: + +```shell +mkdir rocjpeg-sample && cd rocjpeg-sample +cmake /opt/rocm/share/rocjpeg/samples/jpegDecode/ +make -j8 +./videodecode -i /opt/rocm/share/rocjpeg/image/mug_420.jpg +``` + +### Using test package + +To verify your installation using the `rocjpeg-test` package, run: + +```shell +mkdir rocjpeg-test && cd rocjpeg-test +cmake /opt/rocm/share/rocjpeg/test/ +ctest -VV +``` + +## Samples + +The tool provides a few samples to decode JPEG images [here](samples/). Please refer to the individual folders to build and run the samples. +You can access samples to decode your images in our +[GitHub repository](https://github.com/ROCm/rocJPEG/tree/develop/samples). Refer to the +individual folders to build and run the samples. + +## Docker + +You can find rocDecode Docker containers in our +[GitHub repository](https://github.com/ROCm/rocJPEG/tree/develop/docker). + +## Documentation + +Run the following code to build our documentation locally. + +```shell +cd docs +pip3 install -r sphinx/requirements.txt +python3 -m sphinx -T -E -b html -d _build/doctrees -D language=en . _build/html +``` + +For more information on documentation builds, refer to the +[Building documentation](https://rocm.docs.amd.com/en/latest/contribute/building.html) +page. + +## Tested configurations + +* Linux + * Ubuntu - `20.04` / `22.04` + * RHEL - `8` / `9` + * SLES - `15-SP5` +* ROCm: + * rocm-core - `6.1.0.60100-62` + * amdgpu-core - `1:6.1.60100-1741643` +* rocJPEG Setup Script - `V1.0` \ No newline at end of file diff --git a/projects/rocjpeg/api/rocjpeg.h b/projects/rocjpeg/api/rocjpeg.h new file mode 100644 index 0000000000..a7de25845e --- /dev/null +++ b/projects/rocjpeg/api/rocjpeg.h @@ -0,0 +1,233 @@ +/* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef ROC_JPEG_H +#define ROC_JPEG_H + +#define ROCJPEGAPI + +#pragma once +#include "hip/hip_runtime.h" + +/*****************************************************************************************************************/ +//! \file rocjpeg.h +//! \brief The AMD rocJPEG Library. +//! \defgroup group_amd_rocjepg rocJPEG: AMD ROCm JPEG Decode API +//! \brief rocJPEG API is a toolkit to decode JPEG images using a hardware-accelerated JPEG decoder on AMD’s GPUs. +/******************************************************************************************************************/ + +#if defined(__cplusplus) +extern "C" { +#endif // __cplusplus + +//! \def +//! \ingroup group_amd_rocjpeg +//! Maximum number of channels rocJPEG supports +#define ROCJPEG_MAX_COMPONENT 4 + +/*****************************************************/ +//! \enum RocJpegStatus +//! \ingroup group_amd_rocjpeg +//! rocJPEG return status enums +//! These enums are used in all API calls to rocJPEG +/*****************************************************/ +typedef enum { + ROCJPEG_STATUS_SUCCESS = 0, + ROCJPEG_STATUS_NOT_INITIALIZED = -1, + ROCJPEG_STATUS_INVALID_PARAMETER = -2, + ROCJPEG_STATUS_BAD_JPEG = -3, + ROCJPEG_STATUS_JPEG_NOT_SUPPORTED = -4, + ROCJPEG_STATUS_OUTOF_MEMORY = -5, + ROCJPEG_STATUS_EXECUTION_FAILED = -6, + ROCJPEG_STATUS_ARCH_MISMATCH = -7, + ROCJPEG_STATUS_INTERNAL_ERROR = -8, + ROCJPEG_STATUS_IMPLEMENTATION_NOT_SUPPORTED = -9, + ROCJPEG_STATUS_HW_JPEG_DECODER_NOT_SUPPORTED = -10, + ROCJPEG_STATUS_RUNTIME_ERROR = -11, + ROCJPEG_STATUS_NOT_IMPLEMENTED = -12, +} RocJpegStatus; + +/*****************************************************/ +//! \enum RocJpegChromaSubsampling +//! \ingroup group_amd_rocjpeg +//! RocJpegChromaSubsampling enum identifies image chroma subsampling values stored inside JPEG input stream +/*****************************************************/ +typedef enum { + ROCJPEG_CSS_444 = 0, + ROCJPEG_CSS_440 = 1, + ROCJPEG_CSS_422 = 2, + ROCJPEG_CSS_420 = 3, + ROCJPEG_CSS_411 = 4, + ROCJPEG_CSS_400 = 5, + ROCJPEG_CSS_UNKNOWN = -1 +} RocJpegChromaSubsampling; + +/*****************************************************/ +//! \struct RocJpegImage +//! \ingroup group_amd_rocjpeg +//! this structure is jpeg image descriptor used to return the decoded output image. User must allocate device +//! memories for each channel for this structure and pass it to the decoder API. +//! the decoder APIs then copies the decode image to this struct based on the requested output format (see RocJpegOutputFormat). +/*****************************************************/ +typedef struct { + uint8_t* channel[ROCJPEG_MAX_COMPONENT]; + uint32_t pitch[ROCJPEG_MAX_COMPONENT]; // pitch of each channel +} RocJpegImage; + +/*****************************************************/ +//! \enum RocJpegOutputFormat +//! \ingroup group_amd_rocjpeg +//! RocJpegOutputFormat enum specifies what type of output user wants for image decoding +/*****************************************************/ +typedef enum { + // return native unchanged decoded YUV image from the VCN JPEG deocder. + // For ROCJPEG_CSS_444 write Y, U, and V to first, second, and third channels of RocJpegImage + // For ROCJPEG_CSS_422 write YUYV (packed) to first channel of RocJpegImage + // For ROCJPEG_CSS_420 write Y to first channel and UV (interleaved) to second channel of RocJpegImage + // For ROCJPEG_CSS_400 write Y to first channel of RocJpegImage + ROCJPEG_OUTPUT_NATIVE = 0, + // extract Y, U, and V channels from the decoded YUV image from the VCN JPEG deocder and write into first, second, and thrid channel of RocJpegImage. + // For ROCJPEG_CSS_400 write Y to first channel of RocJpegImage + ROCJPEG_OUTPUT_YUV_PLANAR = 1, + // return luma component (Y) and write to first channel of RocJpegImage + ROCJPEG_OUTPUT_Y = 2, + // convert to interleaved RGB using HIP kernels and write to first channel of RocJpegImage + ROCJPEG_OUTPUT_RGB = 3, + // maximum allowed value + ROCJPEG_OUTPUT_FORMAT_MAX = 4 +} RocJpegOutputFormat; + +/*****************************************************/ +//! \enum RocJpegBackend +//! \ingroup group_amd_rocjpeg +//! RocJpegBackend enum specifies what type of backend to use for JPEG decoding +//! ROCJPEG_BACKEND_HARDWARE : supports baseline JPEG bitstream using VCN hardware-accelarted JPEG decoder +//! ROCJPEG_BACKEND_HYBRID : uses CPU for Huffman decode and GPU for IDCT using HIP kernesl. This mode doesn't use VCN JPEG hardware decoder +/*****************************************************/ +typedef enum { + ROCJPEG_BACKEND_HARDWARE = 0, + ROCJPEG_BACKEND_HYBRID = 1 +} RocJpegBackend; + +/*****************************************************/ +// Opaque library handle identifier. +//struct RocJpegDecoderHandle; +//typedef struct RocJpegDecoderHandle* RocJpegHandle; +//! Used in subsequent API calls after rocJpegCreate +/*****************************************************/ +typedef void *RocJpegHandle; + +/*****************************************************************************************************/ +//! \fn RocJpegStatus ROCJPEGAPI rocJpegCreate(RocJpegBackend backend, int device_id, RocJpegHandle *handle); +//! \ingroup group_amd_rocjpeg +//! Create the decoder object based on backend and device_id. A handle to the created decoder is returned +//! Initalization of rocjpeg handle. This handle is used for all consecutive calls +//! IN backend : Backend to use. +//! IN device_id : the GPU device id for which a decoder should be created. For example, use 0 for the first GPU device, +//! and 1 for the second GPU device on the system, etc. +//! IN/OUT handle : rocjpeg handle, jpeg decoder instance to use for +/*****************************************************************************************************/ +RocJpegStatus ROCJPEGAPI rocJpegCreate(RocJpegBackend backend, int device_id, RocJpegHandle *handle); + +/*****************************************************************************************************/ +//! \fn RocJpegStatus ROCJPEGAPI rocJpegDestroy(RocJpegHandle handle); +//! \ingroup group_amd_rocjpeg +//! Release the decoder object and resources. +//! IN/OUT handle: instance handle to release +/*****************************************************************************************************/ +RocJpegStatus ROCJPEGAPI rocJpegDestroy(RocJpegHandle handle); + +/*****************************************************************************************************/ +//! \fn RocJpegStatus ROCJPEGAPI rocJpegGetImageInfo(RocJpegHandle handle, const uint8_t *data, size_t length, uint8_t *num_components, RocJpegChromaSubsampling *subsampling, uint32_t *widths, uint32_t *heights); +//! \ingroup group_amd_rocjpeg +//! Retrieve the image info, including channel, width and height of each component, and chroma subsampling. +//! If less than ROCJPEG_MAX_COMPONENT channels are encoded, then zeros would be set to absent channels information +//! If the image is 3-channel, all three groups are valid. +//! IN handle : rocJpeg handle +//! IN data : Pointer to the buffer containing the jpeg stream data to be decoded. +//! IN length : Length of the jpeg image buffer. +//! OUT num_component : Number of channels in the decoded output image +//! OUT subsampling : Chroma subsampling used in this JPEG, see RocJpegChromaSubsampling. +//! OUT widths : pointer to ROCJPEG_MAX_COMPONENT of uint32_t, returns width of each channel. +//! OUT heights : pointer to ROCJPEG_MAX_COMPONENT of uint32_t, returns height of each channel. +//! \return ROCJPEG_STATUS_SUCCESS if successful +/*****************************************************************************************************/ +RocJpegStatus ROCJPEGAPI rocJpegGetImageInfo(RocJpegHandle handle, const uint8_t *data, size_t length, uint8_t *num_components, RocJpegChromaSubsampling *subsampling, uint32_t *widths, uint32_t *heights); + +/*****************************************************************************************************/ +//! \fn RocJpegStatus ROCJPEGAPI rocJpegDecode(RocJpegHandle handle, const uint8_t *data, size_t length, RocJpegOutputFormat output_format, RocJpegImage *destination, hipStream_t stream); +//! \ingroup group_amd_rocjpeg +//! Decodes single image based on the backend used to create the rocJpeg handle in rocJpegCreate API. +//! Destination buffers should be large enough to be able to store output of specified format. These buffers should be pre-allocted by the user in the device memories. +//! For each color plane (channel) sizes could be retrieved for image using rocJpegGetImageInfo API +//! and minimum required memory buffer for each plane is plane_height * plane_pitch where plane_pitch >= plane_width for +//! planar output formats and plane_pitch >= plane_width * num_components for interleaved output format. +//! IN handle : rocJpeg handle +//! IN data : Pointer to the buffer containing the jpeg stream to be decoded. +//! IN length : Length of the jpeg image buffer. +//! IN output_format : Output data format. See RocJpegOutputFormat for description +//! IN/OUT destination : Pointer to structure with information about output buffers. See RocJpegImage description. +//! \return ROCJPEG_STATUS_SUCCESS if successful +/*****************************************************************************************************/ +RocJpegStatus ROCJPEGAPI rocJpegDecode(RocJpegHandle handle, const uint8_t *data, size_t length, RocJpegOutputFormat output_format, RocJpegImage *destination); + +/*****************************************************************************************************/ +//! \fn RocJpegStatus ROCJPEGAPI rocJpegDecodeBatchedInitialize(RocJpegHandle handle, int batch_size, int max_cpu_threads, RocJpegOutputFormat output_format); +//! \ingroup group_amd_rocjpeg +//! Resets and initializes batch decoder for working on the batches of specified size +//! Should be called once for decoding batches of this specific size, also use to reset failed batches +//! IN/OUT handle : Library handle +//! IN batch_size : Size of the batch +//! IN max_cpu_threads : Maximum number of CPU threads that will be processing this batch +//! IN output_format : Output data format. Will be the same for every image in batch +//! \return ROCJPEG_STATUS_SUCCESS if successful +/*****************************************************************************************************/ +RocJpegStatus ROCJPEGAPI rocJpegDecodeBatchedInitialize(RocJpegHandle handle, int batch_size, int max_cpu_threads, RocJpegOutputFormat output_format); + +/*****************************************************************************************************/ +//! \fn RocJpegStatus ROCJPEGAPI rocJpegDecodeBatched(RocJpegHandle handle, const uint8_t *data, const size_t *lengths, RocJpegImage *destinations, hipStream_t stream); +//! \ingroup group_amd_rocjpeg +//! Decodes batch of images. Output buffers should be large enough to be able to store +//! outputs of specified format, see single image decoding description for details. Call to +//! rocjpegDecodeBatchedInitialize() is required prior to this call, batch size is expected to be the same as +//! parameter to this batch initialization function. +//! +//! IN/OUT handle : Library handle +//! INT/OUT jpeg_handle : Decoded jpeg image state handle +//! IN data : Array of size batch_size of pointers to the input buffers containing the jpeg images to be decoded. +//! IN lengths : Array of size batch_size with lengths of the jpeg images' buffers in the batch. +//! IN/OUT destinations : Array of size batch_size with pointers to structure with information about output buffers, +//! \return ROCJPEG_STATUS_SUCCESS if successful +/*****************************************************************************************************/ +RocJpegStatus ROCJPEGAPI rocJpegDecodeBatched(RocJpegHandle handle, const uint8_t *data, const size_t *lengths, RocJpegImage *destinations); + +/*****************************************************************************************************/ +//! \fn extern const char* ROCDECAPI rocJpegGetErrorName(RocJpegStatus rocjpeg_status); +//! \ingroup group_amd_rocjpeg +//! Return name of the specified error code in text form. +/*****************************************************************************************************/ +extern const char* ROCJPEGAPI rocJpegGetErrorName(RocJpegStatus rocjpeg_status); + +#if defined(__cplusplus) + } +#endif + +#endif // ROC_JPEG_H diff --git a/projects/rocjpeg/cmake/FindLibva.cmake b/projects/rocjpeg/cmake/FindLibva.cmake new file mode 100644 index 0000000000..08a2214b24 --- /dev/null +++ b/projects/rocjpeg/cmake/FindLibva.cmake @@ -0,0 +1,49 @@ +################################################################################ +# Copyright (c) 2024 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +################################################################################ + +find_library(LIBVA_LIBRARY NAMES va HINTS /opt/amdgpu/lib/x86_64-linux-gnu /opt/amdgpu/lib64 NO_DEFAULT_PATH) +find_library(LIBVA_DRM_LIBRARY NAMES va-drm HINTS /opt/amdgpu/lib/x86_64-linux-gnu /opt/amdgpu/lib64 NO_DEFAULT_PATH) +find_path(LIBVA_INCLUDE_DIR NAMES va/va.h PATHS /opt/amdgpu/include NO_DEFAULT_PATH) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(Libva DEFAULT_MSG LIBVA_INCLUDE_DIR LIBVA_LIBRARY) +mark_as_advanced(LIBVA_INCLUDE_DIR LIBVA_LIBRARY LIBVA_DRM_LIBRARY) + +if(Libva_FOUND) + if(NOT TARGET Libva::va) + add_library(Libva::va UNKNOWN IMPORTED) + set_target_properties(Libva::va PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${LIBVA_INCLUDE_DIR}" + IMPORTED_LOCATION "${LIBVA_LIBRARY}") + endif() + if(NOT TARGET Libva::va_drm) + add_library(Libva::va_drm UNKNOWN IMPORTED) + set_target_properties(Libva::va_drm PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${LIBVA_INCLUDE_DIR}" + IMPORTED_LOCATION "${LIBVA_DRM_LIBRARY}") + endif() + message("-- ${White}Using Libva -- \n\tLibraries:${LIBVA_LIBRARY} \n\tIncludes:${LIBVA_INCLUDE_DIR}${ColourReset}") + message("-- ${White}Using Libva-drm -- \n\tLibraries:${LIBVA_DRM_LIBRARY}${ColourReset}") +else() + if(Libva_FIND_REQUIRED) + message(FATAL_ERROR "{Red}FindLibva -- Libva NOT FOUND${ColourReset}") + endif() +endif() \ No newline at end of file diff --git a/projects/rocjpeg/data/images/mug_400.jpg b/projects/rocjpeg/data/images/mug_400.jpg new file mode 100644 index 0000000000..5c71e7b5b5 Binary files /dev/null and b/projects/rocjpeg/data/images/mug_400.jpg differ diff --git a/projects/rocjpeg/data/images/mug_420.jpg b/projects/rocjpeg/data/images/mug_420.jpg new file mode 100644 index 0000000000..bb9fddc945 Binary files /dev/null and b/projects/rocjpeg/data/images/mug_420.jpg differ diff --git a/projects/rocjpeg/data/images/mug_422.jpg b/projects/rocjpeg/data/images/mug_422.jpg new file mode 100644 index 0000000000..bf899d2e1a Binary files /dev/null and b/projects/rocjpeg/data/images/mug_422.jpg differ diff --git a/projects/rocjpeg/docker/README.md b/projects/rocjpeg/docker/README.md new file mode 100644 index 0000000000..0d5cbe54a1 --- /dev/null +++ b/projects/rocjpeg/docker/README.md @@ -0,0 +1,13 @@ +# rocJEPG Docker + +## Build - dockerfiles + +``` +sudo docker build -f {DOCKER_FILE_NAME}.dockerfile -t {DOCKER_IMAGE_NAME} . +``` + +## Run - docker + +``` +sudo docker run -it --device=/dev/kfd --device=/dev/dri --cap-add=SYS_RAWIO --device=/dev/mem --group-add video --network host --privileged {DOCKER_IMAGE_NAME} +``` \ No newline at end of file diff --git a/projects/rocjpeg/docker/rocJPEG-on-ubuntu20.dockerfile b/projects/rocjpeg/docker/rocJPEG-on-ubuntu20.dockerfile new file mode 100644 index 0000000000..5c1aed2c24 --- /dev/null +++ b/projects/rocjpeg/docker/rocJPEG-on-ubuntu20.dockerfile @@ -0,0 +1,17 @@ +FROM ubuntu:20.04 + +# install base dependencies +RUN apt-get update -y +#RUN apt-get dist-upgrade -y +RUN DEBIAN_FRONTEND=noninteractive apt-get -y install gcc g++ cmake pkg-config git apt-utils sudo vainfo dialog + +# install ROCm +RUN DEBIAN_FRONTEND=noninteractive apt-get -y install initramfs-tools libnuma-dev wget keyboard-configuration && \ + wget https://repo.radeon.com/amdgpu-install/6.1/ubuntu/focal/amdgpu-install_6.1.60100-1_all.deb && \ + sudo apt-get install ./amdgpu-install_6.1.60100-1_all.deb && \ + sudo amdgpu-install -y --usecase=rocm + +WORKDIR /workspace + +# install rocJPEG package +RUN DEBIAN_FRONTEND=noninteractive sudo apt install -y rocjpeg rocjpeg-dev rocjpeg-test \ No newline at end of file diff --git a/projects/rocjpeg/docker/rocJPEG-on-ubuntu22.dockerfile b/projects/rocjpeg/docker/rocJPEG-on-ubuntu22.dockerfile new file mode 100644 index 0000000000..fd2f4124ae --- /dev/null +++ b/projects/rocjpeg/docker/rocJPEG-on-ubuntu22.dockerfile @@ -0,0 +1,17 @@ +FROM ubuntu:22.04 + +# install base dependencies +RUN apt-get update -y +#RUN apt-get dist-upgrade -y +RUN DEBIAN_FRONTEND=noninteractive apt-get -y install gcc g++ cmake pkg-config git apt-utils sudo vainfo dialog libstdc++-12-dev + +# install ROCm +RUN DEBIAN_FRONTEND=noninteractive apt-get -y install initramfs-tools libnuma-dev wget keyboard-configuration && \ + wget https://repo.radeon.com/amdgpu-install/6.1/ubuntu/jammy/amdgpu-install_6.1.60100-1_all.deb && \ + sudo apt-get install ./amdgpu-install_6.1.60100-1_all.deb && \ + sudo amdgpu-install -y --usecase=rocm + +WORKDIR /workspace + +# install rocJPEG package +RUN DEBIAN_FRONTEND=noninteractive sudo apt install -y rocjpeg rocjpeg-dev rocjpeg-test \ No newline at end of file diff --git a/projects/rocjpeg/docs/data/AMD-Logo-Doxygen.png b/projects/rocjpeg/docs/data/AMD-Logo-Doxygen.png new file mode 100644 index 0000000000..16a2e61cee Binary files /dev/null and b/projects/rocjpeg/docs/data/AMD-Logo-Doxygen.png differ diff --git a/projects/rocjpeg/docs/data/AMD_rocJPEG_Logo.png b/projects/rocjpeg/docs/data/AMD_rocJPEG_Logo.png new file mode 100644 index 0000000000..a5d7b480f0 Binary files /dev/null and b/projects/rocjpeg/docs/data/AMD_rocJPEG_Logo.png differ diff --git a/projects/rocjpeg/rocJPEG-setup.py b/projects/rocjpeg/rocJPEG-setup.py new file mode 100644 index 0000000000..bfdf7b3618 --- /dev/null +++ b/projects/rocjpeg/rocJPEG-setup.py @@ -0,0 +1,136 @@ +# Copyright (c) 2023 - 2024 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +import os +import sys +import argparse +import platform +if sys.version_info[0] < 3: + import commands +else: + import subprocess + +__copyright__ = "Copyright (c) 2024, AMD ROCm rocJPEG" +__version__ = "1.0" +__email__ = "mivisionx.support@amd.com" +__status__ = "Shipping" + +# Arguments +parser = argparse.ArgumentParser() +parser.add_argument('--rocm_path', type=str, default='/opt/rocm', + help='ROCm Installation Path - optional (default:/opt/rocm) - ROCm Installation Required') +parser.add_argument('--developer', type=str, default='ON', + help='Setup Developer Options - optional (default:ON) [options:ON/OFF]') + +args = parser.parse_args() +developerInstall = args.developer.upper() + +ROCM_PATH = args.rocm_path + +if "ROCM_PATH" in os.environ: + ROCM_PATH = os.environ.get('ROCM_PATH') +print("\nROCm PATH set to -- "+ROCM_PATH+"\n") + +# check ROCm installation +if os.path.exists(ROCM_PATH): + print("\nROCm Installation Found -- "+ROCM_PATH+"\n") + os.system('echo ROCm Info -- && '+ROCM_PATH+'/bin/rocminfo') +else: + print( + "WARNING: If ROCm installed, set ROCm Path with \"--rocm_path\" option for full installation [Default:/opt/rocm]\n") + print("ERROR: rocJPEG Setup requires ROCm install\n") + exit(-1) + +if developerInstall not in ('OFF', 'ON'): + print( + "ERROR: Developer Option Not Supported - [Supported Options: OFF or ON]\n") + exit() + +# get platfrom info +platfromInfo = platform.platform() + +# sudo requirement check +sudoLocation = '' +userName = '' +if sys.version_info[0] < 3: + status, sudoLocation = commands.getstatusoutput("which sudo") + if sudoLocation != '/usr/bin/sudo': + status, userName = commands.getstatusoutput("whoami") +else: + status, sudoLocation = subprocess.getstatusoutput("which sudo") + if sudoLocation != '/usr/bin/sudo': + status, userName = subprocess.getstatusoutput("whoami") + +# setup for Linux +linuxSystemInstall = '' +linuxCMake = 'cmake' +linuxSystemInstall_check = '' +linuxFlag = '' +if "centos" in platfromInfo or "redhat" in platfromInfo or os.path.exists('/usr/bin/yum'): + linuxSystemInstall = 'yum -y' + linuxSystemInstall_check = '--nogpgcheck' + if "centos-7" in platfromInfo or "redhat-7" in platfromInfo: + linuxCMake = 'cmake3' + os.system(linuxSystemInstall+' install cmake3') + if not "centos" in platfromInfo or not "redhat" in platfromInfo: + platfromInfo = platfromInfo+'-redhat' +elif "Ubuntu" in platfromInfo or os.path.exists('/usr/bin/apt-get'): + linuxSystemInstall = 'apt-get -y' + linuxSystemInstall_check = '--allow-unauthenticated' + linuxFlag = '-S' + if not "Ubuntu" in platfromInfo: + platfromInfo = platfromInfo+'-Ubuntu' +elif os.path.exists('/usr/bin/zypper'): + linuxSystemInstall = 'zypper -n' + linuxSystemInstall_check = '--no-gpg-checks' + platfromInfo = platfromInfo+'-SLES' +else: + print("\nrocJPEG Setup on "+platfromInfo+" is unsupported\n") + print("\nrocJPEG Setup Supported on: Ubuntu 20/22; CentOS 7/8; RedHat 8/9; & SLES 15 SP4\n") + exit(-1) + +# rocJPEG Setup +print("\nrocJPEG Setup on: "+platfromInfo+"\n") +print("\nrocJPEG Dependencies Installation with rocJPEG-setup.py V-"+__version__+"\n") + +if userName == 'root': + os.system(linuxSystemInstall+' update') + os.system(linuxSystemInstall+' install sudo') + +# install pre-reqs +os.system('sudo -v') +os.system(linuxSystemInstall+' update') +os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' ' + + linuxSystemInstall_check+' install gcc cmake pkg-config') + +# rocJPEG Core - VA/DRM Requirements +if "Ubuntu" in platfromInfo: + os.system('sudo -v') + os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check + + ' install vainfo libdrm-amdgpu1 libva-amdgpu-dev mesa-amdgpu-va-drivers') + if "22.04" in platform.version(): + os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check + + ' install libstdc++-12-dev') +else: + os.system('sudo -v') + os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check + + ' install libdrm-amdgpu libva-amdgpu-devel mesa-amdgpu-dri-drivers') + +print("\nrocJPEG Dependencies Installed with rocJPEG-setup.py V-"+__version__+"\n") diff --git a/projects/rocjpeg/samples/CMakeLists.txt b/projects/rocjpeg/samples/CMakeLists.txt new file mode 100644 index 0000000000..fd95ff1aa0 --- /dev/null +++ b/projects/rocjpeg/samples/CMakeLists.txt @@ -0,0 +1,71 @@ +# ############################################################################## +# Copyright (c) 2023 - 2024 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +# ############################################################################## +cmake_minimum_required(VERSION 3.5) + +add_test( + NAME + jpeg-decode-fmt-unchanged + COMMAND + "${CMAKE_CTEST_COMMAND}" + --build-and-test "${CMAKE_CURRENT_SOURCE_DIR}/jpegDecode" + "${CMAKE_CURRENT_BINARY_DIR}/jpegDecode" + --build-generator "${CMAKE_GENERATOR}" + --test-command "jpegdecode" + -i ${CMAKE_SOURCE_DIR}/data/images/ +) + +add_test( + NAME + jpeg-decode-fmt-yuv + COMMAND + "${CMAKE_CTEST_COMMAND}" + --build-and-test "${CMAKE_CURRENT_SOURCE_DIR}/jpegDecode" + "${CMAKE_CURRENT_BINARY_DIR}/jpegDecode" + --build-generator "${CMAKE_GENERATOR}" + --test-command "jpegdecode" + -i ${CMAKE_SOURCE_DIR}/data/images/ -fmt yuv +) + +add_test( + NAME + jpeg-decode-fmt-y + COMMAND + "${CMAKE_CTEST_COMMAND}" + --build-and-test "${CMAKE_CURRENT_SOURCE_DIR}/jpegDecode" + "${CMAKE_CURRENT_BINARY_DIR}/jpegDecode" + --build-generator "${CMAKE_GENERATOR}" + --test-command "jpegdecode" + -i ${CMAKE_SOURCE_DIR}/data/images/ -fmt y +) + +add_test( + NAME + jpeg-decode-fmt-rgbi + COMMAND + "${CMAKE_CTEST_COMMAND}" + --build-and-test "${CMAKE_CURRENT_SOURCE_DIR}/jpegDecode" + "${CMAKE_CURRENT_BINARY_DIR}/jpegDecode" + --build-generator "${CMAKE_GENERATOR}" + --test-command "jpegdecode" + -i ${CMAKE_SOURCE_DIR}/data/images/ -fmt rgbi +) \ No newline at end of file diff --git a/projects/rocjpeg/samples/jpegDecode/CMakeLists.txt b/projects/rocjpeg/samples/jpegDecode/CMakeLists.txt new file mode 100644 index 0000000000..f3200a2bd1 --- /dev/null +++ b/projects/rocjpeg/samples/jpegDecode/CMakeLists.txt @@ -0,0 +1,73 @@ +################################################################################ +# Copyright (c) 2024 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +################################################################################ + +cmake_minimum_required (VERSION 3.0) +project(jpegdecode) +set(CMAKE_CXX_STANDARD 17) + +# ROCM Path +if(DEFINED ENV{ROCM_PATH}) + set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Default ROCm installation path") +elseif(ROCM_PATH) + message("-- INFO:ROCM_PATH Set -- ${ROCM_PATH}") +else() + set(ROCM_PATH /opt/rocm CACHE PATH "Default ROCm installation path") +endif() + +list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/../../cmake) +list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH}/hip ${ROCM_PATH}) +set(CMAKE_CXX_COMPILER ${ROCM_PATH}/llvm/bin/clang++) + +set(DEFAULT_AMDGPU_TARGETS "gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1031;gfx1032;gfx1100;gfx1101;gfx1102") +set(AMDGPU_TARGETS "${DEFAULT_AMDGPU_TARGETS}" CACHE STRING "List of specific machine types for library to target") + +find_package(HIP QUIET) + +# find rocJPEG +find_library(ROCJPEG_LIBRARY NAMES rocjpeg HINTS {ROCM_PATH}/lib) +find_path(ROCJPEG_INCLUDE_DIR NAMES rocjpeg.h PATHS /opt/rocm/include/rocjpeg {ROCM_PATH}/include/rocjpeg) + +if(ROCJPEG_LIBRARY AND ROCJPEG_INCLUDE_DIR) + set(ROCJPEG_FOUND TRUE) + message("-- ${White}Using rocJPEG -- \n\tLibraries:${ROCJPEG_LIBRARY} \n\tIncludes:${ROCJPEG_INCLUDE_DIR}${ColourReset}") +endif() + +if(HIP_FOUND AND ROCJPEG_FOUND) + # HIP + set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} hip::device) + # rocJPEG + include_directories (${ROCJPEG_INCLUDE_DIR}) + set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} ${ROCJPEG_LIBRARY}) + list(APPEND SOURCES ${PROJECT_SOURCE_DIR} jpegdecode.cpp) + add_executable(${PROJECT_NAME} ${SOURCES}) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=gnu++17") + target_link_libraries(${PROJECT_NAME} ${LINK_LIBRARY_LIST}) +else() + message("-- ERROR!: ${PROJECT_NAME} excluded! please install all the dependencies and try again!") + if (NOT HIP_FOUND) + message(FATAL_ERROR "-- ERROR!: HIP Not Found! - please install ROCm and HIP!") + endif() + if (NOT ROCJPEG_FOUND) + message(FATAL_ERROR "-- ERROR!: rocDecode Not Found! - please install rocDecode!") + endif() +endif() \ No newline at end of file diff --git a/projects/rocjpeg/samples/jpegDecode/README.md b/projects/rocjpeg/samples/jpegDecode/README.md new file mode 100644 index 0000000000..3c2931ed56 --- /dev/null +++ b/projects/rocjpeg/samples/jpegDecode/README.md @@ -0,0 +1,25 @@ +# JPEG decode sample + +The jpeg decode sample illustrates decoding a JPEG images using rocJPEG library to get the individual decoded images in one of the supported output format (i.e., unchanged, yuv, y, rgbi). This sample can be configured with a device ID and optionally able to dump the output to a file. + +## Prerequisites: + +* Install [rocJPEG](../../README.md#build-and-install-instructions) + +## Build + +```shell +mkdir jpeg_decode_sample && cd jpeg_decode_sample +cmake ../ +make -j +``` + +## Run + +```shell + ./jpegdecode -i + -be + -o + -d +``` \ No newline at end of file diff --git a/projects/rocjpeg/samples/jpegDecode/jpegdecode.cpp b/projects/rocjpeg/samples/jpegDecode/jpegdecode.cpp new file mode 100644 index 0000000000..64836b1d91 --- /dev/null +++ b/projects/rocjpeg/samples/jpegDecode/jpegdecode.cpp @@ -0,0 +1,531 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "rocjpeg.h" + +#define CHECK_ROCJPEG(call) { \ + RocJpegStatus rocjpeg_status = (call); \ + if (rocjpeg_status != ROCJPEG_STATUS_SUCCESS) { \ + std::cerr << #call << " returned " << rocJpegGetErrorName(rocjpeg_status) << " at " << __FILE__ << ":" << __LINE__ << std::endl;\ + exit(1); \ + } \ +} + +#define CHECK_HIP(call) { \ + hipError_t hip_status = (call); \ + if (hip_status != hipSuccess) { \ + std::cout << "rocJPEG failure: '#" << hip_status << "' at " << __FILE__ << ":" << __LINE__ << std::endl;\ + exit(1); \ + } \ +} + +void ShowHelpAndExit(const char *option = NULL) { + std::cout << "Options:" << std::endl + << "-i Path to single image or directory of images - required" << std::endl + << "-be Select rocJPEG backend (0 for ROCJPEG_BACKEND_HARDWARE, using VCN hardware-accelarated JPEG decoder, 1 ROCJPEG_BACKEND_HYBRID, using CPU and GPU HIP kernles for JPEG decoding); optional; default: 0" << std::endl + << "-fmt Select rocJPEG output format for decoding, one of the [native, yuv, y, rgb]; optional; default: native" << std::endl + << "-o Output file path or directory - Write decoded images based on the selected outfut format to this file or directory; optional;" << std::endl + << "-d GPU device id (0 for the first GPU device, 1 for the second GPU device, etc.); optional; default: 0" << std::endl; + exit(0); +} + +void ParseCommandLine(std::string &input_path, std::string &output_file_path, int &dump_output_frames, int &device_id, RocJpegBackend &rocjpeg_backend, RocJpegOutputFormat &output_format, int argc, char *argv[]) { + if(argc <= 1) { + ShowHelpAndExit(); + } + for (int i = 1; i < argc; i++) { + if (!strcmp(argv[i], "-h")) { + ShowHelpAndExit(); + } + if (!strcmp(argv[i], "-i")) { + if (++i == argc) { + ShowHelpAndExit("-i"); + } + input_path = argv[i]; + continue; + } + if (!strcmp(argv[i], "-o")) { + if (++i == argc) { + ShowHelpAndExit("-o"); + } + output_file_path = argv[i]; + dump_output_frames = 1; + continue; + } + if (!strcmp(argv[i], "-d")) { + if (++i == argc) { + ShowHelpAndExit("-d"); + } + device_id = atoi(argv[i]); + continue; + } + if (!strcmp(argv[i], "-be")) { + if (++i == argc) { + ShowHelpAndExit("-be"); + } + rocjpeg_backend = static_cast(atoi(argv[i])); + continue; + } + if (!strcmp(argv[i], "-fmt")) { + if (++i == argc) { + ShowHelpAndExit("-fmt"); + } + std::string selected_output_format = argv[i]; + if (selected_output_format == "native") { + output_format = ROCJPEG_OUTPUT_NATIVE; + } else if (selected_output_format == "yuv") { + output_format = ROCJPEG_OUTPUT_YUV_PLANAR; + } else if (selected_output_format == "y") { + output_format = ROCJPEG_OUTPUT_Y; + } else if (selected_output_format == "rgb") { + output_format = ROCJPEG_OUTPUT_RGB; + } else { + ShowHelpAndExit(argv[i]); + } + continue; + } + ShowHelpAndExit(argv[i]); + } +} + +void SaveImage(std::string output_file_name, RocJpegImage *output_image, uint32_t img_width, uint32_t img_height, RocJpegChromaSubsampling subsampling, RocJpegOutputFormat output_format) { + + uint8_t *hst_ptr = nullptr; + FILE *fp; + hipError_t hip_status = hipSuccess; + + if (output_image == nullptr || output_image->channel[0] == nullptr || output_image->pitch[0] == 0) { + return; + } + + uint32_t widths[ROCJPEG_MAX_COMPONENT] = {}; + uint32_t heights[ROCJPEG_MAX_COMPONENT] = {}; + + switch (output_format) { + case ROCJPEG_OUTPUT_NATIVE: + switch (subsampling) { + case ROCJPEG_CSS_444: + widths[2] = widths[1] = widths[0] = img_width; + heights[2] = heights[1] = heights[0] = img_height; + break; + case ROCJPEG_CSS_422: + widths[0] = img_width * 2; + heights[0] = img_height; + break; + case ROCJPEG_CSS_420: + widths[1] = widths[0] = img_width; + heights[0] = img_height; + heights[1] = img_height >> 1; + break; + case ROCJPEG_CSS_400: + widths[0] = img_width; + heights[0] = img_height; + break; + default: + std::cout << "Unknown chroma subsampling!" << std::endl; + return; + } + break; + case ROCJPEG_OUTPUT_YUV_PLANAR: + switch (subsampling) { + case ROCJPEG_CSS_444: + widths[2] = widths[1] = widths[0] = img_width; + heights[2] = heights[1] = heights[0] = img_height; + break; + case ROCJPEG_CSS_422: + widths[0] = img_width; + widths[2] = widths[1] = widths[0] >> 1; + heights[2] = heights[1] = heights[0] = img_height; + break; + case ROCJPEG_CSS_420: + widths[0] = img_width; + widths[2] = widths[1] = widths[0] >> 1; + heights[0] = img_height; + heights[2] = heights[1] = img_height >> 1; + break; + case ROCJPEG_CSS_400: + widths[0] = img_width; + heights[0] = img_height; + break; + default: + std::cout << "Unknown chroma subsampling!" << std::endl; + return; + } + break; + case ROCJPEG_OUTPUT_Y: + widths[0] = img_width; + heights[0] = img_height; + break; + case ROCJPEG_OUTPUT_RGB: + widths[0] = img_width * 3; + heights[0] = img_height; + break; + default: + std::cout << "Unknown output format!" << std::endl; + return; + } + + uint32_t channel0_size = output_image->pitch[0] * heights[0]; + uint32_t channel1_size = output_image->pitch[1] * heights[1]; + uint32_t channel2_size = output_image->pitch[2] * heights[2]; + + uint32_t output_image_size = channel0_size + channel1_size + channel2_size; + + if (hst_ptr == nullptr) { + hst_ptr = new uint8_t [output_image_size]; + } + + CHECK_HIP(hipMemcpyDtoH((void *)hst_ptr, output_image->channel[0], channel0_size)); + + uint8_t *tmp_hst_ptr = hst_ptr; + fp = fopen(output_file_name.c_str(), "wb"); + if (fp) { + // write channel0 + if (widths[0] == output_image->pitch[0]) { + fwrite(hst_ptr, 1, channel0_size, fp); + } else { + for (int i = 0; i < heights[0]; i++) { + fwrite(tmp_hst_ptr, 1, widths[0], fp); + tmp_hst_ptr += output_image->pitch[0]; + } + } + // write channel1 + if (channel1_size != 0 && output_image->channel[1] != nullptr) { + uint8_t *channel1_hst_ptr = hst_ptr + channel0_size; + CHECK_HIP(hipMemcpyDtoH((void *)channel1_hst_ptr, output_image->channel[1], channel1_size)); + if (widths[1] == output_image->pitch[1]) { + fwrite(channel1_hst_ptr, 1, channel1_size, fp); + } else { + for (int i = 0; i < heights[1]; i++) { + fwrite(channel1_hst_ptr, 1, widths[1], fp); + channel1_hst_ptr += output_image->pitch[1]; + } + } + } + // write channel2 + if (channel2_size != 0 && output_image->channel[2] != nullptr) { + uint8_t *channel2_hst_ptr = hst_ptr + channel0_size + channel1_size; + CHECK_HIP(hipMemcpyDtoH((void *)channel2_hst_ptr, output_image->channel[2], channel2_size)); + if (widths[2] == output_image->pitch[2]) { + fwrite(channel2_hst_ptr, 1, channel2_size, fp); + } else { + for (int i = 0; i < heights[2]; i++) { + fwrite(channel2_hst_ptr, 1, widths[2], fp); + channel2_hst_ptr += output_image->pitch[2]; + } + } + } + fclose(fp); + } + + if (hst_ptr != nullptr) { + delete [] hst_ptr; + hst_ptr = nullptr; + tmp_hst_ptr = nullptr; + } +} + +bool GetFilePaths(std::string &input_path, std::vector &file_paths, bool &is_dir, bool &is_file) { + is_dir = std::filesystem::is_directory(input_path); + is_file = std::filesystem::is_regular_file(input_path); + if (is_dir) { + for (const auto &entry : std::filesystem::directory_iterator(input_path)) + file_paths.push_back(entry.path()); + } else if (is_file) { + file_paths.push_back(input_path); + } else { + std::cerr << "ERROR: the input path is not valid!" << std::endl; + return false; + } + return true; +} + +bool InitHipDevice(int device_id) { + int num_devices; + hipDeviceProp_t hip_dev_prop; + CHECK_HIP(hipGetDeviceCount(&num_devices)); + if (num_devices < 1) { + std::cerr << "ERROR: didn't find any GPU!" << std::endl; + return false; + } + if (device_id >= num_devices) { + std::cerr << "ERROR: the requested device_id is not found!" << std::endl; + return false; + } + CHECK_HIP(hipSetDevice(device_id)); + CHECK_HIP(hipGetDeviceProperties(&hip_dev_prop, device_id)); + + std::cout << "info: Using GPU device " << device_id << ": " << hip_dev_prop.name << "[" << hip_dev_prop.gcnArchName << "] on PCI bus " << + std::setfill('0') << std::setw(2) << std::right << std::hex << hip_dev_prop.pciBusID << ":" << std::setfill('0') << std::setw(2) << + std::right << std::hex << hip_dev_prop.pciDomainID << "." << hip_dev_prop.pciDeviceID << std::dec << std::endl; + + return true; +} +int main(int argc, char **argv) { + int device_id = 0; + int dump_output_frames = 0; + uint8_t num_components; + uint32_t widths[ROCJPEG_MAX_COMPONENT] = {}; + uint32_t heights[ROCJPEG_MAX_COMPONENT] = {}; + uint32_t channel_sizes[ROCJPEG_MAX_COMPONENT] = {}; + uint32_t num_channels = 0; + int total_images_all = 0; + double time_per_image_all = 0; + double m_pixels_all = 0; + double image_per_sec_all = 0; + std::string chroma_sub_sampling = ""; + std::string input_path, output_file_path; + std::vector file_paths = {}; + bool is_dir = false; + bool is_file = false; + RocJpegChromaSubsampling subsampling; + RocJpegBackend rocjpeg_backend = ROCJPEG_BACKEND_HARDWARE; + RocJpegHandle rocjpeg_handle = nullptr; + RocJpegImage output_image = {}; + RocJpegOutputFormat output_format = ROCJPEG_OUTPUT_NATIVE; + + ParseCommandLine(input_path, output_file_path, dump_output_frames, device_id, rocjpeg_backend, output_format, argc, argv); + if (!GetFilePaths(input_path, file_paths, is_dir, is_file)) { + std::cerr << "Failed to get input file paths!" << std::endl; + return -1; + } + if (!InitHipDevice(device_id)) { + std::cerr << "Failed to initialize HIP!" << std::endl; + return -1; + } + + CHECK_ROCJPEG(rocJpegCreate(rocjpeg_backend, device_id, &rocjpeg_handle)); + + int counter = 0; + std::vector> file_data(file_paths.size()); + std::vector file_sizes(file_paths.size()); + + for (auto file_path : file_paths) { + std::string base_file_name = file_path.substr(file_path.find_last_of("/\\") + 1); + int image_count = 0; + + // Read an image from disk. + std::ifstream input(file_path.c_str(), std::ios::in | std::ios::binary | std::ios::ate); + if (!(input.is_open())) { + std::cerr << "ERROR: Cannot open image: " << file_path << std::endl; + return 0; + } + // Get the size + std::streamsize file_size = input.tellg(); + input.seekg(0, std::ios::beg); + // resize if buffer is too small + if (file_data[counter].size() < file_size) { + file_data[counter].resize(file_size); + } + if (!input.read(file_data[counter].data(), file_size)) { + std::cerr << "Cannot read from file: " << file_path << std::endl; + return 0; + } + file_sizes[counter] = file_size; + + CHECK_ROCJPEG(rocJpegGetImageInfo(rocjpeg_handle, reinterpret_cast(file_data[counter].data()), file_size, &num_components, &subsampling, widths, heights)); + + std::cout << "info: input file name: " << base_file_name << std::endl; + std::cout << "info: input image resolution: " << widths[0] << "x" << heights[0] << std::endl; + + switch (subsampling) { + case ROCJPEG_CSS_444: + chroma_sub_sampling = "YUV 4:4:4"; + break; + case ROCJPEG_CSS_440: + chroma_sub_sampling = "YUV 4:4:0"; + break; + case ROCJPEG_CSS_422: + chroma_sub_sampling = "YUV 4:2:2"; + break; + case ROCJPEG_CSS_420: + chroma_sub_sampling = "YUV 4:2:0"; + break; + case ROCJPEG_CSS_411: + chroma_sub_sampling = "YUV 4:1:1"; + break; + case ROCJPEG_CSS_400: + chroma_sub_sampling = "YUV 4:0:0"; + break; + case ROCJPEG_CSS_UNKNOWN: + std::cout << "info: Unknown chroma subsampling" << std::endl; + return EXIT_FAILURE; + } + std::cout << "info: chroma subsampling: " + chroma_sub_sampling << std::endl; + + if (subsampling == ROCJPEG_CSS_440 || subsampling == ROCJPEG_CSS_411) { + std::cout << "The chroma sub-sampling is not supported by VCN Hardware" << std::endl; + if (is_dir) { + std::cout << std::endl; + continue; + } else + return EXIT_FAILURE; + } + + switch (output_format) { + case ROCJPEG_OUTPUT_NATIVE: + switch (subsampling) { + case ROCJPEG_CSS_444: + num_channels = 3; + output_image.pitch[2] = output_image.pitch[1] = output_image.pitch[0] = widths[0]; + channel_sizes[2] = channel_sizes[1] = channel_sizes[0] = output_image.pitch[0] * heights[0]; + break; + case ROCJPEG_CSS_422: + num_channels = 1; + output_image.pitch[0] = widths[0] * 2; + channel_sizes[0] = output_image.pitch[0] * heights[0]; + break; + case ROCJPEG_CSS_420: + num_channels = 2; + output_image.pitch[1] = output_image.pitch[0] = widths[0]; + channel_sizes[0] = output_image.pitch[0] * heights[0]; + channel_sizes[1] = output_image.pitch[1] * (heights[0] >> 1); + break; + case ROCJPEG_CSS_400: + num_channels = 1; + output_image.pitch[0] = widths[0]; + channel_sizes[0] = output_image.pitch[0] * heights[0]; + break; + default: + std::cout << "Unknown chroma subsampling!" << std::endl; + return EXIT_FAILURE; + } + break; + case ROCJPEG_OUTPUT_YUV_PLANAR: + if (subsampling == ROCJPEG_CSS_400) { + num_channels = 1; + output_image.pitch[0] = widths[0]; + channel_sizes[0] = output_image.pitch[0] * heights[0]; + } else { + num_channels = 3; + output_image.pitch[0] = widths[0]; + output_image.pitch[1] = widths[1]; + output_image.pitch[2] = widths[2]; + channel_sizes[0] = output_image.pitch[0] * heights[0]; + channel_sizes[1] = output_image.pitch[1] * heights[1]; + channel_sizes[2] = output_image.pitch[2] * heights[2]; + } + break; + case ROCJPEG_OUTPUT_Y: + num_channels = 1; + output_image.pitch[0] = widths[0]; + channel_sizes[0] = output_image.pitch[0] * heights[0]; + break; + case ROCJPEG_OUTPUT_RGB: + num_channels = 1; + output_image.pitch[0] = widths[0] * 3; + channel_sizes[0] = output_image.pitch[0] * heights[0]; + break; + default: + std::cout << "Unknown output format!" << std::endl; + return EXIT_FAILURE; + } + // allocate memory for each channel + for (int i = 0; i < num_channels; i++) { + CHECK_HIP(hipMalloc(&output_image.channel[i], channel_sizes[i])); + } + + std::cout << "info: decoding started, please wait! ... " << std::endl; + auto start_time = std::chrono::high_resolution_clock::now(); + CHECK_ROCJPEG(rocJpegDecode(rocjpeg_handle, reinterpret_cast(file_data[counter].data()), file_size, output_format, &output_image)); + auto end_time = std::chrono::high_resolution_clock::now(); + std::chrono::duration decoder_time = end_time - start_time; + double time_per_image = decoder_time.count() * 1000; + double ips = (1 / time_per_image) * 1000; + double mpixels = ((double)widths[0] * (double)heights[0] / 1000000) * ips; + image_count++; + + if (dump_output_frames) { + std::string::size_type const p(base_file_name.find_last_of('.')); + std::string file_name_no_ext = base_file_name.substr(0, p); + std::string file_extension; + switch (output_format) { + case ROCJPEG_OUTPUT_NATIVE: + file_extension = "native"; + break; + case ROCJPEG_OUTPUT_YUV_PLANAR: + file_extension = "yuv"; + break; + case ROCJPEG_OUTPUT_Y: + file_extension = "y"; + break; + case ROCJPEG_OUTPUT_RGB: + file_extension = "rgb"; + break; + default: + file_extension = ""; + break; + } + + std::string file_name_for_saving = output_file_path + "//" + file_name_no_ext + "_" + std::to_string(widths[0]) + "x" + + std::to_string(heights[0]) + "." + file_extension; + std::string image_save_path = is_dir ? file_name_for_saving : output_file_path; + SaveImage(image_save_path, &output_image, widths[0], heights[0], subsampling, output_format); + } + + for (int i = 0; i < num_channels; i++) { + if (output_image.channel[i] != nullptr) { + CHECK_HIP(hipFree((void*)output_image.channel[i])); + output_image.channel[i] = nullptr; + output_image.pitch[i] = 0; + } + } + + std::cout << "info: total decoded images: " << image_count << std::endl; + std::cout << "info: average processing time per image (ms): " << time_per_image << std::endl; + std::cout << "info: average images per sec: " << (1 / time_per_image) * 1000 << std::endl; + std::cout << "info: total elapsed time (s): " << decoder_time.count() << std::endl; + + if (is_dir) { + std::cout << std::endl; + total_images_all += image_count; + time_per_image_all += time_per_image; + image_per_sec_all += ips; + m_pixels_all += mpixels; + } + counter++; + } + + if (is_dir) { + std::cout << "info: total decoded images: " << total_images_all << std::endl; + if (total_images_all) { + std::cout << "info: average processing time per image (ms): " << time_per_image_all / total_images_all << std::endl; + std::cout << "info: average decoded images per sec: " << image_per_sec_all / total_images_all << std::endl; + std::cout << "info: average decoded mpixels per sec: " << m_pixels_all / total_images_all << std::endl; + } + std::cout << std::endl; + } + + CHECK_ROCJPEG(rocJpegDestroy(rocjpeg_handle)); + std::cout << "info: decoding completed!" << std::endl; + + return 0; +} \ No newline at end of file diff --git a/projects/rocjpeg/src/rocjpeg_api.cpp b/projects/rocjpeg/src/rocjpeg_api.cpp new file mode 100644 index 0000000000..267ba67e51 --- /dev/null +++ b/projects/rocjpeg/src/rocjpeg_api.cpp @@ -0,0 +1,171 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "rocjpeg_api_handle.h" +#include "rocjpeg_commons.h" + +/*****************************************************************************************************/ +//! \fn RocJpegStatus ROCJPEGAPI rocJpegCreate(RocJpegBackend backend, int device_id, RocJpegHandle *handle) +//! Create the decoder object based on backend and device_id. A handle to the created decoder is returned +/*****************************************************************************************************/ +RocJpegStatus ROCJPEGAPI rocJpegCreate(RocJpegBackend backend, int device_id, RocJpegHandle *handle) { + if (handle == nullptr) { + return ROCJPEG_STATUS_INVALID_PARAMETER; + } + RocJpegHandle rocjpeg_handle = nullptr; + try { + rocjpeg_handle = new RocJpegDecoderHandle(backend, device_id); + } catch(const std::exception& e) { + ERR(STR("Failed to init the rocJPEG handle, ") + STR(e.what())); + return ROCJPEG_STATUS_NOT_INITIALIZED; + } + *handle = rocjpeg_handle; + return static_cast(rocjpeg_handle)->rocjpeg_decoder->InitializeDecoder(); +} + +/*****************************************************************************************************/ +//! \fn RocJpegStatus ROCJPEGAPI rocJpegDestroy(RocJpegHandle handle) +//! Release the decoder object and resources. +/*****************************************************************************************************/ +RocJpegStatus ROCJPEGAPI rocJpegDestroy(RocJpegHandle handle) { + if (handle == nullptr) { + return ROCJPEG_STATUS_INVALID_PARAMETER; + } + auto rocjpeg_handle = static_cast(handle); + delete rocjpeg_handle; + return ROCJPEG_STATUS_SUCCESS; +} + +/*****************************************************************************************************/ +//! \fn RocJpegStatus ROCJPEGAPI rocJpegGetImageInfo(RocJpegHandle handle, const uint8_t *data, size_t length, +//! int *num_components, RocJpegChromaSubsampling *subsampling, int *widths, int *heights) +//! Retrieve the image info, including channel, width and height of each component, and chroma subsampling. +/*****************************************************************************************************/ +RocJpegStatus ROCJPEGAPI rocJpegGetImageInfo(RocJpegHandle handle, const uint8_t *data, size_t length, uint8_t *num_components, + RocJpegChromaSubsampling *subsampling, uint32_t *widths, uint32_t *heights) { + if (handle == nullptr || data == nullptr || num_components == nullptr || + subsampling == nullptr || widths == nullptr || heights == nullptr) { + return ROCJPEG_STATUS_INVALID_PARAMETER; + } + RocJpegStatus rocjpeg_status = ROCJPEG_STATUS_SUCCESS; + auto rocjpeg_handle = static_cast(handle); + try { + rocjpeg_status = rocjpeg_handle->rocjpeg_decoder->GetImageInfo(data, length, num_components, subsampling, widths, heights); + } catch (const std::exception& e) { + rocjpeg_handle->CaptureError(e.what()); + ERR(e.what()); + return ROCJPEG_STATUS_RUNTIME_ERROR; + } + + return rocjpeg_status; +} + +/*****************************************************************************************************/ +//! \fn RocJpegStatus ROCJPEGAPI rocJpegDecode(RocJpegHandle handle, const uint8_t *data, size_t length, RocJpegOutputFormat output_format, RocJpegImage *destination, hipStream_t stream); +//! \ingroup group_amd_rocjpeg +//! Decodes single image based on the backend used to create the rocJpeg handle in rocJpegCreate API. +//! Destination buffers should be large enough to be able to store output of specified format. These buffers should be pre-allocted by the user in the device memories. +//! For each color plane (channel) sizes could be retrieved for image using rocJpegGetImageInfo API +//! and minimum required memory buffer for each plane is plane_height * plane_pitch where plane_pitch >= plane_width for +//! planar output formats and plane_pitch >= plane_width * num_components for interleaved output format. +/*****************************************************************************************************/ +RocJpegStatus ROCJPEGAPI rocJpegDecode(RocJpegHandle handle, const uint8_t *data, size_t length, RocJpegOutputFormat output_format, + RocJpegImage *destination) { + + if (handle == nullptr || data == nullptr) { + return ROCJPEG_STATUS_INVALID_PARAMETER; + } + RocJpegStatus rocjpeg_status = ROCJPEG_STATUS_SUCCESS; + auto rocjpeg_handle = static_cast(handle); + try { + rocjpeg_status = rocjpeg_handle->rocjpeg_decoder->Decode(data, length, output_format, destination); + } catch (const std::exception& e) { + rocjpeg_handle->CaptureError(e.what()); + ERR(e.what()); + return ROCJPEG_STATUS_RUNTIME_ERROR; + } + + return rocjpeg_status; +} + +/*****************************************************************************************************/ +//! \fn extern const char* ROCDECAPI rocJpegGetErrorName(RocJpegStatus rocjpeg_status); +//! \ingroup group_amd_rocjpeg +//! Return name of the specified error code in text form. +/*****************************************************************************************************/ +extern const char* ROCJPEGAPI rocJpegGetErrorName(RocJpegStatus rocjpeg_status) { + switch (rocjpeg_status) { + case ROCJPEG_STATUS_SUCCESS: + return "ROCJPEG_STATUS_SUCCESS"; + case ROCJPEG_STATUS_NOT_INITIALIZED: + return "ROCJPEG_STATUS_NOT_INITIALIZED"; + case ROCJPEG_STATUS_INVALID_PARAMETER: + return "ROCJPEG_STATUS_INVALID_PARAMETER"; + case ROCJPEG_STATUS_BAD_JPEG: + return "ROCJPEG_STATUS_BAD_JPEG"; + case ROCJPEG_STATUS_JPEG_NOT_SUPPORTED: + return "ROCJPEG_STATUS_JPEG_NOT_SUPPORTED"; + case ROCJPEG_STATUS_EXECUTION_FAILED: + return "ROCJPEG_STATUS_EXECUTION_FAILED"; + case ROCJPEG_STATUS_ARCH_MISMATCH: + return "ROCJPEG_STATUS_ARCH_MISMATCH"; + case ROCJPEG_STATUS_INTERNAL_ERROR: + return "ROCJPEG_STATUS_INTERNAL_ERROR"; + case ROCJPEG_STATUS_IMPLEMENTATION_NOT_SUPPORTED: + return "ROCJPEG_STATUS_IMPLEMENTATION_NOT_SUPPORTED"; + case ROCJPEG_STATUS_HW_JPEG_DECODER_NOT_SUPPORTED: + return "ROCJPEG_STATUS_HW_JPEG_DECODER_NOT_SUPPORTED"; + case ROCJPEG_STATUS_RUNTIME_ERROR: + return "ROCJPEG_STATUS_RUNTIME_ERROR"; + case ROCJPEG_STATUS_OUTOF_MEMORY: + return "ROCJPEG_STATUS_OUTOF_MEMORY"; + case ROCJPEG_STATUS_NOT_IMPLEMENTED: + return "ROCJPEG_STATUS_NOT_IMPLEMENTED"; + default: + return "UNKNOWN_ERROR"; + } +} + +/*****************************************************************************************************/ +//! \fn RocJpegStatus ROCJPEGAPI rocJpegDecodeBatchedInitialize(RocJpegHandle handle, int batch_size, int max_cpu_threads, RocJpegOutputFormat output_format); +//! \ingroup group_amd_rocjpeg +//! Resets and initializes batch decoder for working on the batches of specified size +//! Should be called once for decoding batches of this specific size, also use to reset failed batches +//! \return ROCJPEG_STATUS_SUCCESS if successful +/*****************************************************************************************************/ +RocJpegStatus ROCJPEGAPI rocJpegDecodeBatchedInitialize(RocJpegHandle handle, int batch_size, int max_cpu_threads, RocJpegOutputFormat output_format) { + return ROCJPEG_STATUS_NOT_IMPLEMENTED; +} + + +/*****************************************************************************************************/ +//! \fn RocJpegStatus ROCJPEGAPI rocJpegDecodeBatched(RocJpegHandle handle, const uint8_t *data, const size_t *lengths, RocJpegImage *destinations, hipStream_t stream); +//! \ingroup group_amd_rocjpeg +//! Decodes batch of images. Output buffers should be large enough to be able to store +//! outputs of specified format, see single image decoding description for details. Call to +//! rocjpegDecodeBatchedInitialize() is required prior to this call, batch size is expected to be the same as +//! parameter to this batch initialization function. +//! \return ROCJPEG_STATUS_SUCCESS if successful +/*****************************************************************************************************/ +RocJpegStatus ROCJPEGAPI rocJpegDecodeBatched(RocJpegHandle handle, const uint8_t *data, const size_t *lengths, RocJpegImage *destinations) { + return ROCJPEG_STATUS_NOT_IMPLEMENTED; +} diff --git a/projects/rocjpeg/src/rocjpeg_api_handle.h b/projects/rocjpeg/src/rocjpeg_api_handle.h new file mode 100644 index 0000000000..436a80e446 --- /dev/null +++ b/projects/rocjpeg/src/rocjpeg_api_handle.h @@ -0,0 +1,50 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef ROC_JPEG_HANDLE_H +#define ROC_JPEG_HANDLE_H + +#pragma once + +#include +#include + +#include "rocjpeg_decoder.h" + +/** + * @brief RocJpegHandle class + * + */ +class RocJpegDecoderHandle { + public: + explicit RocJpegDecoderHandle(RocJpegBackend backend, int device_id) : rocjpeg_decoder(std::make_shared(backend, device_id)) {}; + ~RocJpegDecoderHandle() { ClearErrors(); } + std::shared_ptr rocjpeg_decoder; + bool NoError() { return error_.empty(); } + const char* ErrorMsg() { return error_.c_str(); } + void CaptureError(const std::string& err_msg) { error_ = err_msg; } + private: + void ClearErrors() { error_ = "";} + std::string error_; +}; + +#endif //ROC_JPEG_HANDLE_H \ No newline at end of file diff --git a/projects/rocjpeg/src/rocjpeg_commons.h b/projects/rocjpeg/src/rocjpeg_commons.h new file mode 100644 index 0000000000..c0caa3bd60 --- /dev/null +++ b/projects/rocjpeg/src/rocjpeg_commons.h @@ -0,0 +1,92 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef ROC_JPEG_COMMON_H_ +#define ROC_JPEG_COMMON_H_ + +#pragma once +#include +#include +#include +#include +#include + +#define TOSTR(X) std::to_string(static_cast(X)) +#define STR(X) std::string(X) + +#if DBGINFO +#define INFO(X) std::clog << "[INF] " << " {" << __func__ <<"} " << " " << X << std::endl; +#else +#define INFO(X) ; +#endif +#define ERR(X) std::cerr << "[ERR] " << " {" << __func__ <<"} " << " " << X << std::endl; + +#define CHECK_VAAPI(call) { \ + VAStatus va_status = (call); \ + if (va_status != VA_STATUS_SUCCESS) { \ + std::cerr << "VAAPI failure: " << #call << " failed with status: " << std::hex << "0x" << va_status << std::dec << " = '" << vaErrorStr(va_status) << "' at " << __FILE__ << ":" << __LINE__ << std::endl;\ + return ROCJPEG_STATUS_EXECUTION_FAILED; \ + } \ +} + +#define CHECK_HIP(call) { \ + hipError_t hip_status = (call); \ + if (hip_status != hipSuccess) { \ + std::cerr << "HIP failure: 'status: " << hipGetErrorName(hip_status) << "' at " << __FILE__ << ":" << __LINE__ << std::endl;\ + return ROCJPEG_STATUS_EXECUTION_FAILED; \ + } \ +} + +#define CHECK_ROCJPEG(call) { \ + RocJpegStatus rocjpeg_status = (call); \ + if (rocjpeg_status != ROCJPEG_STATUS_SUCCESS) { \ + std::cerr << #call << " returned " << rocJpegGetErrorName(rocjpeg_status) << " at " << __FILE__ << ":" << __LINE__ << std::endl;\ + return rocjpeg_status; \ + } \ +} + +static bool GetEnv(const char *name, char *value, size_t valueSize) { + const char *v = getenv(name); + if (v) { + strncpy(value, v, valueSize); + value[valueSize - 1] = 0; + } + return v ? true : false; +} + +static inline int align(int value, int alignment) { + return (value + alignment - 1) & ~(alignment - 1); +} + +class RocJpegException : public std::exception { + public: + explicit RocJpegException(const std::string& message):message_(message){} + virtual const char* what() const throw() override { + return message_.c_str(); + } + private: + std::string message_; +}; + +#define THROW(X) throw RocJpegException(" { "+std::string(__func__)+" } " + X); + +#endif //ROC_JPEG_COMMON_H_ \ No newline at end of file diff --git a/projects/rocjpeg/src/rocjpeg_decoder.cpp b/projects/rocjpeg/src/rocjpeg_decoder.cpp new file mode 100644 index 0000000000..409720da68 --- /dev/null +++ b/projects/rocjpeg/src/rocjpeg_decoder.cpp @@ -0,0 +1,331 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "rocjpeg_decoder.h" + +ROCJpegDecoder::ROCJpegDecoder(RocJpegBackend backend, int device_id) : + num_devices_{0}, device_id_ {device_id}, hip_stream_ {0}, backend_{backend}, hip_interop_{} {} + +ROCJpegDecoder::~ROCJpegDecoder() { + if (hip_stream_) { + hipError_t hip_status = hipStreamDestroy(hip_stream_); + } +} + +RocJpegStatus ROCJpegDecoder::InitHIP(int device_id) { + hipError_t hip_status = hipSuccess; + CHECK_HIP(hipGetDeviceCount(&num_devices_)); + if (num_devices_ < 1) { + ERR("ERROR: Failed to find any GPU!"); + return ROCJPEG_STATUS_NOT_INITIALIZED; + } + if (device_id >= num_devices_) { + ERR("ERROR: the requested device_id is not found!"); + return ROCJPEG_STATUS_INVALID_PARAMETER; + } + CHECK_HIP(hipSetDevice(device_id)); + CHECK_HIP(hipGetDeviceProperties(&hip_dev_prop_, device_id)); + CHECK_HIP(hipStreamCreate(&hip_stream_)); + return ROCJPEG_STATUS_SUCCESS; +} + +RocJpegStatus ROCJpegDecoder::InitializeDecoder() { + RocJpegStatus rocjpeg_status = ROCJPEG_STATUS_SUCCESS; + rocjpeg_status = InitHIP(device_id_); + if (rocjpeg_status != ROCJPEG_STATUS_SUCCESS) { + ERR("ERROR: Failed to initilize the HIP!"); + return rocjpeg_status; + } + if (backend_ == ROCJPEG_BACKEND_HARDWARE) { + rocjpeg_status = jpeg_vaapi_decoder_.InitializeDecoder(hip_dev_prop_.gcnArchName); + if (rocjpeg_status != ROCJPEG_STATUS_SUCCESS) { + ERR("ERROR: Failed to initialize the VA-API JPEG decoder!"); + return rocjpeg_status; + } + } else if (backend_ == ROCJPEG_BACKEND_HYBRID) { + return ROCJPEG_STATUS_NOT_IMPLEMENTED; + } + return rocjpeg_status; +} + +RocJpegStatus ROCJpegDecoder::Decode(const uint8_t *data, size_t length, RocJpegOutputFormat output_format, RocJpegImage *destination) { + std::lock_guard lock(mutex_); + RocJpegStatus rocjpeg_status = ROCJPEG_STATUS_SUCCESS; + + if (!jpeg_parser_.ParseJpegStream(data, length)) { + ERR("ERROR: Failed to parse the jpeg stream!"); + return ROCJPEG_STATUS_BAD_JPEG; + } + + const JpegStreamParameters *jpeg_stream_params = jpeg_parser_.GetJpegStreamParameters(); + VASurfaceID current_surface_id; + CHECK_ROCJPEG(jpeg_vaapi_decoder_.SubmitDecode(jpeg_stream_params, current_surface_id)); + + if (destination != nullptr) { + VADRMPRIMESurfaceDescriptor va_drm_prime_surface_desc = {}; + CHECK_ROCJPEG(jpeg_vaapi_decoder_.SyncSurface(current_surface_id)); + CHECK_ROCJPEG(jpeg_vaapi_decoder_.ExportSurface(current_surface_id, va_drm_prime_surface_desc)); + CHECK_ROCJPEG(GetHipInteropMem(va_drm_prime_surface_desc)); + + uint16_t chroma_height = 0; + CHECK_ROCJPEG(GetChromaHeight(jpeg_stream_params->picture_parameter_buffer.picture_height, chroma_height)); + + switch (output_format) { + case ROCJPEG_OUTPUT_NATIVE: + // copy the native decoded output buffers from interop memory directly to the destination buffers + CHECK_ROCJPEG(CopyLuma(destination, jpeg_stream_params->picture_parameter_buffer.picture_height)); + CHECK_ROCJPEG(CopyChroma(destination, chroma_height)); + break; + case ROCJPEG_OUTPUT_YUV_PLANAR: + CHECK_ROCJPEG(GetPlanarYUVOutputFormat(jpeg_stream_params->picture_parameter_buffer.picture_width, + jpeg_stream_params->picture_parameter_buffer.picture_height, chroma_height, destination)); + break; + case ROCJPEG_OUTPUT_Y: + CHECK_ROCJPEG(GetYOutputFormat(jpeg_stream_params->picture_parameter_buffer.picture_width, + jpeg_stream_params->picture_parameter_buffer.picture_height, destination)); + break; + case ROCJPEG_OUTPUT_RGB: + CHECK_ROCJPEG(ColorConvertToRGB(jpeg_stream_params->picture_parameter_buffer.picture_width, + jpeg_stream_params->picture_parameter_buffer.picture_height, destination)); + break; + default: + break; + } + + CHECK_HIP(hipStreamSynchronize(hip_stream_)); + + CHECK_ROCJPEG(ReleaseHipInteropMem(current_surface_id)); + } + + return ROCJPEG_STATUS_SUCCESS; + +} + +RocJpegStatus ROCJpegDecoder::GetImageInfo(const uint8_t *data, size_t length, uint8_t *num_components, RocJpegChromaSubsampling *subsampling, uint32_t *widths, uint32_t *heights){ + std::lock_guard lock(mutex_); + if (widths == nullptr || heights == nullptr || num_components == nullptr) { + return ROCJPEG_STATUS_INVALID_PARAMETER; + } + if (!jpeg_parser_.ParseJpegStream(data, length)) { + ERR("ERROR: jpeg parser failed!"); + return ROCJPEG_STATUS_BAD_JPEG; + } + const JpegStreamParameters *jpeg_stream_params = jpeg_parser_.GetJpegStreamParameters(); + *num_components = jpeg_stream_params->picture_parameter_buffer.num_components; + widths[0] = jpeg_stream_params->picture_parameter_buffer.picture_width; + heights[0] = jpeg_stream_params->picture_parameter_buffer.picture_height; + widths[3] = 0; + heights[3] = 0; + + switch (jpeg_stream_params->chroma_subsampling) { + case CSS_444: + *subsampling = ROCJPEG_CSS_444; + widths[2] = widths[1] = widths[0]; + heights[2] = heights[1] = heights[0]; + break; + case CSS_422: + *subsampling = ROCJPEG_CSS_422; + widths[2] = widths[1] = widths[0] >> 1; + heights[2] = heights[1] = heights[0]; + break; + case CSS_420: + *subsampling = ROCJPEG_CSS_420; + widths[2] = widths[1] = widths[0] >> 1; + heights[2] = heights[1] = heights[0] >> 1; + break; + case CSS_400: + *subsampling = ROCJPEG_CSS_400; + widths[3] = widths[2] = widths[1] = 0; + heights[3] = heights[2] = heights[1] = 0; + break; + case CSS_411: + *subsampling = ROCJPEG_CSS_411; + widths[2] = widths[1] = widths[0] >> 2; + heights[2] = heights[1] = heights[0]; + break; + case CSS_440: + *subsampling = ROCJPEG_CSS_440; + widths[2] = widths[1] = widths[0] >> 1; + heights[2] = heights[1] = heights[0] >> 1; + break; + default: + *subsampling = ROCJPEG_CSS_UNKNOWN; + break; + } + + return ROCJPEG_STATUS_SUCCESS; +} + +RocJpegStatus ROCJpegDecoder::GetHipInteropMem(VADRMPRIMESurfaceDescriptor &va_drm_prime_surface_desc) { + hipExternalMemoryHandleDesc external_mem_handle_desc = {}; + hipExternalMemoryBufferDesc external_mem_buffer_desc = {}; + external_mem_handle_desc.type = hipExternalMemoryHandleTypeOpaqueFd; + external_mem_handle_desc.handle.fd = va_drm_prime_surface_desc.objects[0].fd; + external_mem_handle_desc.size = va_drm_prime_surface_desc.objects[0].size; + + CHECK_HIP(hipImportExternalMemory(&hip_interop_.hip_ext_mem, &external_mem_handle_desc)); + external_mem_buffer_desc.size = va_drm_prime_surface_desc.objects[0].size; + CHECK_HIP(hipExternalMemoryGetMappedBuffer((void**)&hip_interop_.hip_mapped_device_mem, hip_interop_.hip_ext_mem, &external_mem_buffer_desc)); + + hip_interop_.surface_format = va_drm_prime_surface_desc.fourcc; + hip_interop_.width = va_drm_prime_surface_desc.width; + hip_interop_.height = va_drm_prime_surface_desc.height; + hip_interop_.offset[0] = va_drm_prime_surface_desc.layers[0].offset[0]; + hip_interop_.offset[1] = va_drm_prime_surface_desc.layers[1].offset[0]; + hip_interop_.offset[2] = va_drm_prime_surface_desc.layers[2].offset[0]; + hip_interop_.pitch[0] = va_drm_prime_surface_desc.layers[0].pitch[0]; + hip_interop_.pitch[1] = va_drm_prime_surface_desc.layers[1].pitch[0]; + hip_interop_.pitch[2] = va_drm_prime_surface_desc.layers[2].pitch[0]; + hip_interop_.num_layers = va_drm_prime_surface_desc.num_layers; + + for (uint32_t i = 0; i < va_drm_prime_surface_desc.num_objects; ++i) { + close(va_drm_prime_surface_desc.objects[i].fd); + } + return ROCJPEG_STATUS_SUCCESS; +} + +RocJpegStatus ROCJpegDecoder::ReleaseHipInteropMem(VASurfaceID current_surface_id) { + if (hip_interop_.hip_mapped_device_mem != nullptr) { + CHECK_HIP(hipFree(hip_interop_.hip_mapped_device_mem)); + } + if (hip_interop_.hip_ext_mem != nullptr) { + CHECK_HIP(hipDestroyExternalMemory(hip_interop_.hip_ext_mem)); + } + memset((void*)&hip_interop_, 0, sizeof(hip_interop_)); + + CHECK_ROCJPEG(jpeg_vaapi_decoder_.ReleaseSurface(current_surface_id)); + + return ROCJPEG_STATUS_SUCCESS; +} + +RocJpegStatus ROCJpegDecoder::CopyLuma(RocJpegImage *destination, uint16_t picture_height) { + if (hip_interop_.pitch[0] != 0 && destination->pitch[0] != 0 && destination->channel[0] != nullptr) { + if (destination->pitch[0] == hip_interop_.pitch[0]) { + uint32_t luma_size = destination->pitch[0] * picture_height; + CHECK_HIP(hipMemcpyDtoDAsync(destination->channel[0], hip_interop_.hip_mapped_device_mem, luma_size, hip_stream_)); + } else { + CHECK_HIP(hipMemcpy2DAsync(destination->channel[0], destination->pitch[0], hip_interop_.hip_mapped_device_mem, hip_interop_.pitch[0], + destination->pitch[0], picture_height, hipMemcpyDeviceToDevice, hip_stream_)); + } + } + return ROCJPEG_STATUS_SUCCESS; +} + +RocJpegStatus ROCJpegDecoder::CopyChroma(RocJpegImage *destination, uint16_t chroma_height) { + // copy channel1 + if (hip_interop_.pitch[1] != 0 && destination->pitch[1] != 0 && destination->channel[1] != nullptr) { + uint32_t chroma_size = destination->pitch[1] * chroma_height; + uint8_t *layer1_mem = hip_interop_.hip_mapped_device_mem + hip_interop_.offset[1]; + if (destination->pitch[1] == hip_interop_.pitch[1]) { + CHECK_HIP(hipMemcpyDtoDAsync(destination->channel[1], layer1_mem, chroma_size, hip_stream_)); + } else { + CHECK_HIP(hipMemcpy2DAsync(destination->channel[1], destination->pitch[1], layer1_mem, hip_interop_.pitch[1], + destination->pitch[1], chroma_height, hipMemcpyDeviceToDevice, hip_stream_)); + } + } + // copy channel2 + if (hip_interop_.pitch[2] != 0 && destination->pitch[2] != 0 && destination->channel[2] != nullptr) { + uint32_t chroma_size = destination->pitch[2] * chroma_height; + uint8_t *layer2_mem = hip_interop_.hip_mapped_device_mem + hip_interop_.offset[2]; + if (destination->pitch[2] == hip_interop_.pitch[2]) { + CHECK_HIP(hipMemcpyDtoDAsync(destination->channel[2], layer2_mem, chroma_size, hip_stream_)); + } else { + CHECK_HIP(hipMemcpy2DAsync(destination->channel[2], destination->pitch[2], layer2_mem, hip_interop_.pitch[2], + destination->pitch[2], chroma_height, hipMemcpyDeviceToDevice, hip_stream_)); + } + } + return ROCJPEG_STATUS_SUCCESS; +} + +RocJpegStatus ROCJpegDecoder::GetChromaHeight(uint16_t picture_height, uint16_t &chroma_height) { + switch (hip_interop_.surface_format) { + case VA_FOURCC_NV12: /*NV12: two-plane 8-bit YUV 4:2:0*/ + chroma_height = picture_height >> 1; + break; + case VA_FOURCC_444P: /*444P: three-plane 8-bit YUV 4:4:4*/ + chroma_height = picture_height; + break; + case VA_FOURCC_Y800: /*Y800: one-plane 8-bit greyscale YUV 4:0:0*/ + chroma_height = 0; + break; + case ROCJPEG_FOURCC_YUYV: /*YUYV: one-plane packed 8-bit YUV 4:2:2. Four bytes per pair of pixels: Y, U, Y, V*/ + chroma_height = picture_height; + break; + default: + return ROCJPEG_STATUS_JPEG_NOT_SUPPORTED; + } + return ROCJPEG_STATUS_SUCCESS; +} + +RocJpegStatus ROCJpegDecoder::ColorConvertToRGB(uint32_t picture_width, uint32_t picture_height, RocJpegImage *destination) { + switch (hip_interop_.surface_format) { + case VA_FOURCC_444P: + ColorConvertYUV444ToRGB(hip_stream_, picture_width, picture_height, destination->channel[0], destination->pitch[0], + hip_interop_.hip_mapped_device_mem, hip_interop_.pitch[0], hip_interop_.offset[1]); + break; + case ROCJPEG_FOURCC_YUYV: + ColorConvertYUYVToRGB(hip_stream_, picture_width, picture_height, destination->channel[0], destination->pitch[0], + hip_interop_.hip_mapped_device_mem, hip_interop_.pitch[0]); + break; + case VA_FOURCC_NV12: + ColorConvertNV12ToRGB(hip_stream_, picture_width, picture_height, destination->channel[0], destination->pitch[0], + hip_interop_.hip_mapped_device_mem, hip_interop_.pitch[0], + hip_interop_.hip_mapped_device_mem + hip_interop_.offset[1], hip_interop_.pitch[1]); + break; + case VA_FOURCC_Y800: + ColorConvertYUV400ToRGB(hip_stream_, picture_width, picture_height, destination->channel[0], destination->pitch[0], + hip_interop_.hip_mapped_device_mem, hip_interop_.pitch[0]); + break; + default: + ERR("ERROR! surface format is not supported!"); + return ROCJPEG_STATUS_JPEG_NOT_SUPPORTED; + } + return ROCJPEG_STATUS_SUCCESS; +} + +RocJpegStatus ROCJpegDecoder::GetPlanarYUVOutputFormat(uint32_t picture_width, uint32_t picture_height, uint16_t chroma_height, RocJpegImage *destination) { + if (hip_interop_.surface_format == ROCJPEG_FOURCC_YUYV) { + // Extract the packed YUYV and copy them into the first, second, and thrid channels of the destination. + ConvertPackedYUYVToPlanarYUV(hip_stream_, picture_width, picture_height, destination->channel[0], destination->channel[1], destination->channel[2], + destination->pitch[0], destination->pitch[1], hip_interop_.hip_mapped_device_mem, hip_interop_.pitch[0]); + } else { + CHECK_ROCJPEG(CopyLuma(destination, picture_height)); + if (hip_interop_.surface_format == VA_FOURCC_NV12) { + // Extract the interleaved UV channels and copy them into the second and thrid channels of the destination. + ConvertInterleavedUVToPlanarUV(hip_stream_, picture_width >> 1, picture_height >> 1, destination->channel[1], destination->channel[2], + destination->pitch[1], hip_interop_.hip_mapped_device_mem + hip_interop_.offset[1] , hip_interop_.pitch[1]); + } else { + CHECK_ROCJPEG(CopyChroma(destination, chroma_height)); + } + } + return ROCJPEG_STATUS_SUCCESS; +} + +RocJpegStatus ROCJpegDecoder::GetYOutputFormat(uint32_t picture_width, uint32_t picture_height, RocJpegImage *destination) { + if (hip_interop_.surface_format == ROCJPEG_FOURCC_YUYV) { + ExtractYFromPackedYUYV(hip_stream_, picture_width, picture_height, destination->channel[0], destination->pitch[0], + hip_interop_.hip_mapped_device_mem, hip_interop_.pitch[0]); + } else { + CHECK_ROCJPEG(CopyLuma(destination, picture_height)); + } + return ROCJPEG_STATUS_SUCCESS; +} \ No newline at end of file diff --git a/projects/rocjpeg/src/rocjpeg_decoder.h b/projects/rocjpeg/src/rocjpeg_decoder.h new file mode 100644 index 0000000000..5fdeddb108 --- /dev/null +++ b/projects/rocjpeg/src/rocjpeg_decoder.h @@ -0,0 +1,76 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef ROC_JPEG_DECODER_H_ +#define ROC_JPEG_DECODER_H_ + +#include +#include +#include +#include +#include +#include "../api/rocjpeg.h" +#include "rocjpeg_parser.h" +#include "rocjpeg_commons.h" +#include "rocjpeg_vaapi_decoder.h" +#include "rocjpeg_hip_kernels.h" + +struct HipInteropDeviceMem { + hipExternalMemory_t hip_ext_mem; // Interface to the vaapi-hip interop + uint8_t* hip_mapped_device_mem; // Mapped device memory for the YUV plane + uint32_t surface_format; // Pixel format fourcc of the whole surface + uint32_t width; // Width of the surface in pixels. + uint32_t height; // Height of the surface in pixels. + uint32_t offset[3]; // Offset of each plane + uint32_t pitch[3]; // Pitch of each plane + uint32_t num_layers; // Number of layers making up the surface +}; + +class ROCJpegDecoder { + public: + ROCJpegDecoder(RocJpegBackend backend = ROCJPEG_BACKEND_HARDWARE, int device_id = 0); + ~ROCJpegDecoder(); + RocJpegStatus InitializeDecoder(); + RocJpegStatus GetImageInfo(const uint8_t *data, size_t length, uint8_t *num_components, RocJpegChromaSubsampling *subsampling, uint32_t *widths, uint32_t *heights); + RocJpegStatus Decode(const uint8_t *data, size_t length, RocJpegOutputFormat output_format, RocJpegImage *destination); + private: + RocJpegStatus InitHIP(int device_id); + RocJpegStatus GetHipInteropMem(VADRMPRIMESurfaceDescriptor &va_drm_prime_surface_desc); + RocJpegStatus ReleaseHipInteropMem(VASurfaceID current_surface_id); + RocJpegStatus GetChromaHeight(uint16_t picture_height, uint16_t &chroma_height); + RocJpegStatus CopyLuma(RocJpegImage *destination, uint16_t picture_height); + RocJpegStatus CopyChroma(RocJpegImage *destination, uint16_t chroma_height); + RocJpegStatus ColorConvertToRGB(uint32_t picture_width, uint32_t picture_height, RocJpegImage *destination); + RocJpegStatus GetPlanarYUVOutputFormat(uint32_t picture_width, uint32_t picture_height, uint16_t chroma_height, RocJpegImage *destination); + RocJpegStatus GetYOutputFormat(uint32_t picture_width, uint32_t picture_height, RocJpegImage *destination); + int num_devices_; + int device_id_; + hipDeviceProp_t hip_dev_prop_; + hipStream_t hip_stream_; + std::mutex mutex_; + JpegParser jpeg_parser_; + RocJpegBackend backend_; + RocJpegVappiDecoder jpeg_vaapi_decoder_; + HipInteropDeviceMem hip_interop_; +}; + +#endif //ROC_JPEG_DECODER_H_ \ No newline at end of file diff --git a/projects/rocjpeg/src/rocjpeg_hip_kernels.cpp b/projects/rocjpeg/src/rocjpeg_hip_kernels.cpp new file mode 100644 index 0000000000..9d75648ad0 --- /dev/null +++ b/projects/rocjpeg/src/rocjpeg_hip_kernels.cpp @@ -0,0 +1,961 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "rocjpeg_hip_kernels.h" + +__device__ __forceinline__ uint32_t hipPack(float4 src) { + return __builtin_amdgcn_cvt_pk_u8_f32(src.w, 3, + __builtin_amdgcn_cvt_pk_u8_f32(src.z, 2, + __builtin_amdgcn_cvt_pk_u8_f32(src.y, 1, + __builtin_amdgcn_cvt_pk_u8_f32(src.x, 0, 0)))); +} + +__device__ __forceinline__ float hipUnpack0(uint32_t src) { + return (float)(src & 0xFF); +} + +__device__ __forceinline__ float hipUnpack1(uint32_t src) { + return (float)((src >> 8) & 0xFF); +} + +__device__ __forceinline__ float hipUnpack2(uint32_t src) { + return (float)((src >> 16) & 0xFF); +} + +__device__ __forceinline__ float hipUnpack3(uint32_t src) { + return (float)((src >> 24) & 0xFF); +} + +__device__ __forceinline__ float4 hipUnpack(uint32_t src) { + return make_float4(hipUnpack0(src), hipUnpack1(src), hipUnpack2(src), hipUnpack3(src)); +} + +__global__ void ColorConvertYUV444ToRGBKernel(uint32_t dst_width, uint32_t dst_height, uint8_t *dst_image, uint32_t dst_image_stride_in_bytes, + uint32_t dst_image_stride_in_bytes_comp, const uint8_t *src_y_image, const uint8_t *src_u_image, const uint8_t *src_v_image, + uint32_t src_yuv_image_stride_in_bytes, uint32_t dst_width_comp, uint32_t dst_height_comp, uint32_t src_yuv_image_stride_in_bytes_comp) { + + int32_t x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + int32_t y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; + + if ((x < dst_width_comp) && (y < dst_height_comp)) { + uint32_t src_y0_idx = y * src_yuv_image_stride_in_bytes_comp + (x << 3); + uint32_t src_y1_idx = src_y0_idx + src_yuv_image_stride_in_bytes; + + + uint2 y0 = *((uint2 *)(&src_y_image[src_y0_idx])); + uint2 y1 = *((uint2 *)(&src_y_image[src_y1_idx])); + + uint2 u0 = *((uint2 *)(&src_u_image[src_y0_idx])); + uint2 u1 = *((uint2 *)(&src_u_image[src_y1_idx])); + + uint2 v0 = *((uint2 *)(&src_v_image[src_y0_idx])); + uint2 v1 = *((uint2 *)(&src_v_image[src_y1_idx])); + + uint32_t rgb0_idx = y * dst_image_stride_in_bytes_comp + (x * 24); + uint32_t rgb1_idx = rgb0_idx + dst_image_stride_in_bytes; + + float2 cr = make_float2( 0.0000f, 1.5748f); + float2 cg = make_float2(-0.1873f, -0.4681f); + float2 cb = make_float2( 1.8556f, 0.0000f); + float3 yuv; + DUINT6 rgb0, rgb1; + float4 f; + + yuv.x = hipUnpack0(y0.x); + yuv.y = hipUnpack0(u0.x); + yuv.z = hipUnpack0(v0.x); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.x = fmaf(cr.y, yuv.z, yuv.x); + f.y = fmaf(cg.x, yuv.y, yuv.x); + f.y = fmaf(cg.y, yuv.z, f.y); + f.z = fmaf(cb.x, yuv.y, yuv.x); + yuv.x = hipUnpack1(y0.x); + yuv.y = hipUnpack1(u0.x); + yuv.z = hipUnpack1(v0.x); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.w = fmaf(cr.y, yuv.z, yuv.x); + rgb0.data[0] = hipPack(f); + + f.x = fmaf(cg.x, yuv.y, yuv.x); + f.x = fmaf(cg.y, yuv.z, f.x); + f.y = fmaf(cb.x, yuv.y, yuv.x); + yuv.x = hipUnpack2(y0.x); + yuv.y = hipUnpack2(u0.x); + yuv.z = hipUnpack2(v0.x); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.z = fmaf(cr.y, yuv.z, yuv.x); + f.w = fmaf(cg.x, yuv.y, yuv.x); + f.w = fmaf(cg.y, yuv.z, f.w); + rgb0.data[1] = hipPack(f); + + f.x = fmaf(cb.x, yuv.y, yuv.x); + yuv.x = hipUnpack3(y0.x); + yuv.y = hipUnpack3(u0.x); + yuv.z = hipUnpack3(v0.x); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.y = fmaf(cr.y, yuv.z, yuv.x); + f.z = fmaf(cg.x, yuv.y, yuv.x); + f.z = fmaf(cg.y, yuv.z, f.z); + f.w = fmaf(cb.x, yuv.y, yuv.x); + rgb0.data[2] = hipPack(f); + + yuv.x = hipUnpack0(y0.y); + yuv.y = hipUnpack0(u0.y); + yuv.z = hipUnpack0(v0.y); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.x = fmaf(cr.y, yuv.z, yuv.x); + f.y = fmaf(cg.x, yuv.y, yuv.x); + f.y = fmaf(cg.y, yuv.z, f.y); + f.z = fmaf(cb.x, yuv.y, yuv.x); + yuv.x = hipUnpack1(y0.y); + yuv.y = hipUnpack1(u0.y); + yuv.z = hipUnpack1(v0.y); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.w = fmaf(cr.y, yuv.z, yuv.x); + rgb0.data[3] = hipPack(f); + + f.x = fmaf(cg.x, yuv.y, yuv.x); + f.x = fmaf(cg.y, yuv.z, f.x); + f.y = fmaf(cb.x, yuv.y, yuv.x); + yuv.x = hipUnpack2(y0.y); + yuv.y = hipUnpack2(u0.y); + yuv.z = hipUnpack2(v0.y); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.z = fmaf(cr.y, yuv.z, yuv.x); + f.w = fmaf(cg.x, yuv.y, yuv.x); + f.w = fmaf(cg.y, yuv.z, f.w); + rgb0.data[4] = hipPack(f); + + f.x = fmaf(cb.x, yuv.y, yuv.x); + yuv.x = hipUnpack3(y0.y); + yuv.y = hipUnpack3(u0.y); + yuv.z = hipUnpack3(v0.y); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.y = fmaf(cr.y, yuv.z, yuv.x); + f.z = fmaf(cg.x, yuv.y, yuv.x); + f.z = fmaf(cg.y, yuv.z, f.z); + f.w = fmaf(cb.x, yuv.y, yuv.x); + rgb0.data[5] = hipPack(f); + + yuv.x = hipUnpack0(y1.x); + yuv.y = hipUnpack0(u1.x); + yuv.z = hipUnpack0(v1.x); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.x = fmaf(cr.y, yuv.z, yuv.x); + f.y = fmaf(cg.x, yuv.y, yuv.x); + f.y = fmaf(cg.y, yuv.z, f.y); + f.z = fmaf(cb.x, yuv.y, yuv.x); + yuv.x = hipUnpack1(y1.x); + yuv.y = hipUnpack1(u1.x); + yuv.z = hipUnpack1(v1.x); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.w = fmaf(cr.y, yuv.z, yuv.x); + rgb1.data[0] = hipPack(f); + + f.x = fmaf(cg.x, yuv.y, yuv.x); + f.x = fmaf(cg.y, yuv.z, f.x); + f.y = fmaf(cb.x, yuv.y, yuv.x); + yuv.x = hipUnpack2(y1.x); + yuv.y = hipUnpack2(u1.x); + yuv.z = hipUnpack2(v1.x); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.z = fmaf(cr.y, yuv.z, yuv.x); + f.w = fmaf(cg.x, yuv.y, yuv.x); + f.w = fmaf(cg.y, yuv.z, f.w); + rgb1.data[1] = hipPack(f); + + f.x = fmaf(cb.x, yuv.y, yuv.x); + yuv.x = hipUnpack3(y1.x); + yuv.y = hipUnpack3(u1.x); + yuv.z = hipUnpack3(v1.x); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.y = fmaf(cr.y, yuv.z, yuv.x); + f.z = fmaf(cg.x, yuv.y, yuv.x); + f.z = fmaf(cg.y, yuv.z, f.z); + f.w = fmaf(cb.x, yuv.y, yuv.x); + rgb1.data[2] = hipPack(f); + + yuv.x = hipUnpack0(y1.y); + yuv.y = hipUnpack0(u1.y); + yuv.z = hipUnpack0(v1.y); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.x = fmaf(cr.y, yuv.z, yuv.x); + f.y = fmaf(cg.x, yuv.y, yuv.x); + f.y = fmaf(cg.y, yuv.z, f.y); + f.z = fmaf(cb.x, yuv.y, yuv.x); + yuv.x = hipUnpack1(y1.y); + yuv.y = hipUnpack1(u1.y); + yuv.z = hipUnpack1(v1.y); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.w = fmaf(cr.y, yuv.z, yuv.x); + rgb1.data[3] = hipPack(f); + + f.x = fmaf(cg.x, yuv.y, yuv.x); + f.x = fmaf(cg.y, yuv.z, f.x); + f.y = fmaf(cb.x, yuv.y, yuv.x); + yuv.x = hipUnpack2(y1.y); + yuv.y = hipUnpack2(u1.y); + yuv.z = hipUnpack2(v1.y); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.z = fmaf(cr.y, yuv.z, yuv.x); + f.w = fmaf(cg.x, yuv.y, yuv.x); + f.w = fmaf(cg.y, yuv.z, f.w); + rgb1.data[4] = hipPack(f); + + f.x = fmaf(cb.x, yuv.y, yuv.x); + yuv.x = hipUnpack3(y1.y); + yuv.y = hipUnpack3(u1.y); + yuv.z = hipUnpack3(v1.y); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.y = fmaf(cr.y, yuv.z, yuv.x); + f.z = fmaf(cg.x, yuv.y, yuv.x); + f.z = fmaf(cg.y, yuv.z, f.z); + f.w = fmaf(cb.x, yuv.y, yuv.x); + rgb1.data[5] = hipPack(f); + + *((DUINT6 *)(&dst_image[rgb0_idx])) = rgb0; + *((DUINT6 *)(&dst_image[rgb1_idx])) = rgb1; + } +} + +void ColorConvertYUV444ToRGB(hipStream_t stream, uint32_t dst_width, uint32_t dst_height, + uint8_t *dst_image, uint32_t dst_image_stride_in_bytes, const uint8_t *src_yuv_image, + uint32_t src_yuv_image_stride_in_bytes, uint32_t src_u_image_offset) { + + int32_t local_threads_x = 16; + int32_t local_threads_y = 4; + int32_t global_threads_x = (dst_width + 7) >> 3; + int32_t global_threads_y = (dst_height + 1) >> 1; + + uint32_t dst_width_comp = (dst_width + 7) / 8; + uint32_t dst_height_comp = (dst_height + 1) / 2; + uint32_t dst_image_stride_in_bytes_comp = dst_image_stride_in_bytes * 2; + uint32_t src_yuv_image_stride_in_bytes_comp = src_yuv_image_stride_in_bytes * 2; + + ColorConvertYUV444ToRGBKernel<<(global_threads_x) / local_threads_x), ceil(static_cast(global_threads_y) / local_threads_y)), + dim3(local_threads_x, local_threads_y), 0, stream>>>(dst_width, dst_height, (uint8_t *)dst_image, + dst_image_stride_in_bytes, dst_image_stride_in_bytes_comp, src_yuv_image, src_yuv_image + src_u_image_offset, + src_yuv_image + (src_u_image_offset * 2), src_yuv_image_stride_in_bytes, + dst_width_comp, dst_height_comp, src_yuv_image_stride_in_bytes_comp); +} + +__global__ void ColorConvertYUYVToRGBKernel(uint32_t dst_width, uint32_t dst_height, + uint8_t *dst_image, uint32_t dst_image_stride_in_bytes, uint32_t dst_image_stride_in_bytes_comp, + const uint8_t *src_image, uint32_t src_image_stride_in_bytes, uint32_t src_image_stride_in_bytes_comp, + uint32_t dst_width_comp, uint32_t dst_height_comp) { + + int32_t x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + int32_t y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; + + if ((x < dst_width_comp) && (y < dst_height_comp)) { + uint32_t l0_idx = y * src_image_stride_in_bytes_comp + (x << 4); + uint32_t l1_idx = l0_idx + src_image_stride_in_bytes; + uint4 l0 = *((uint4 *)(&src_image[l0_idx])); + uint4 l1 = *((uint4 *)(&src_image[l1_idx])); + + uint32_t rgb0_idx = y * dst_image_stride_in_bytes_comp + (x * 24); + uint32_t rgb1_idx = rgb0_idx + dst_image_stride_in_bytes; + + float4 f; + + uint2 py0, py1; + uint2 pu0, pu1; + uint2 pv0, pv1; + + py0.x = hipPack(make_float4(hipUnpack0(l0.x), hipUnpack2(l0.x), hipUnpack0(l0.y), hipUnpack2(l0.y))); + py0.y = hipPack(make_float4(hipUnpack0(l0.z), hipUnpack2(l0.z), hipUnpack0(l0.w), hipUnpack2(l0.w))); + py1.x = hipPack(make_float4(hipUnpack0(l1.x), hipUnpack2(l1.x), hipUnpack0(l1.y), hipUnpack2(l1.y))); + py1.y = hipPack(make_float4(hipUnpack0(l1.z), hipUnpack2(l1.z), hipUnpack0(l1.w), hipUnpack2(l1.w))); + pu0.x = hipPack(make_float4(hipUnpack1(l0.x), hipUnpack1(l0.x), hipUnpack1(l0.y), hipUnpack1(l0.y))); + pu0.y = hipPack(make_float4(hipUnpack1(l0.z), hipUnpack1(l0.z), hipUnpack1(l0.w), hipUnpack1(l0.w))); + pu1.x = hipPack(make_float4(hipUnpack1(l1.x), hipUnpack1(l1.x), hipUnpack1(l1.y), hipUnpack1(l1.y))); + pu1.y = hipPack(make_float4(hipUnpack1(l1.z), hipUnpack1(l1.z), hipUnpack1(l1.w), hipUnpack1(l1.w))); + pv0.x = hipPack(make_float4(hipUnpack3(l0.x), hipUnpack3(l0.x), hipUnpack3(l0.y), hipUnpack3(l0.y))); + pv0.y = hipPack(make_float4(hipUnpack3(l0.z), hipUnpack3(l0.z), hipUnpack3(l0.w), hipUnpack3(l0.w))); + pv1.x = hipPack(make_float4(hipUnpack3(l1.x), hipUnpack3(l1.x), hipUnpack3(l1.y), hipUnpack3(l1.y))); + pv1.y = hipPack(make_float4(hipUnpack3(l1.z), hipUnpack3(l1.z), hipUnpack3(l1.w), hipUnpack3(l1.w))); + + float2 cr = make_float2( 0.0000f, 1.5748f); + float2 cg = make_float2(-0.1873f, -0.4681f); + float2 cb = make_float2( 1.8556f, 0.0000f); + float3 yuv; + DUINT6 prgb0, prgb1; + + yuv.x = hipUnpack0(py0.x); + yuv.y = hipUnpack0(pu0.x); + yuv.z = hipUnpack0(pv0.x); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.x = fmaf(cr.y, yuv.z, yuv.x); + f.y = fmaf(cg.x, yuv.y, yuv.x); + f.y = fmaf(cg.y, yuv.z, f.y); + f.z = fmaf(cb.x, yuv.y, yuv.x); + yuv.x = hipUnpack1(py0.x); + yuv.y = hipUnpack1(pu0.x); + yuv.z = hipUnpack1(pv0.x); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.w = fmaf(cr.y, yuv.z, yuv.x); + prgb0.data[0] = hipPack(f); + + f.x = fmaf(cg.x, yuv.y, yuv.x); + f.x = fmaf(cg.y, yuv.z, f.x); + f.y = fmaf(cb.x, yuv.y, yuv.x); + yuv.x = hipUnpack2(py0.x); + yuv.y = hipUnpack2(pu0.x); + yuv.z = hipUnpack2(pv0.x); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.z = fmaf(cr.y, yuv.z, yuv.x); + f.w = fmaf(cg.x, yuv.y, yuv.x); + f.w = fmaf(cg.y, yuv.z, f.w); + prgb0.data[1] = hipPack(f); + + f.x = fmaf(cb.x, yuv.y, yuv.x); + yuv.x = hipUnpack3(py0.x); + yuv.y = hipUnpack3(pu0.x); + yuv.z = hipUnpack3(pv0.x); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.y = fmaf(cr.y, yuv.z, yuv.x); + f.z = fmaf(cg.x, yuv.y, yuv.x); + f.z = fmaf(cg.y, yuv.z, f.z); + f.w = fmaf(cb.x, yuv.y, yuv.x); + prgb0.data[2] = hipPack(f); + + yuv.x = hipUnpack0(py0.y); + yuv.y = hipUnpack0(pu0.y); + yuv.z = hipUnpack0(pv0.y); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.x = fmaf(cr.y, yuv.z, yuv.x); + f.y = fmaf(cg.x, yuv.y, yuv.x); + f.y = fmaf(cg.y, yuv.z, f.y); + f.z = fmaf(cb.x, yuv.y, yuv.x); + yuv.x = hipUnpack1(py0.y); + yuv.y = hipUnpack1(pu0.y); + yuv.z = hipUnpack1(pv0.y); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.w = fmaf(cr.y, yuv.z, yuv.x); + prgb0.data[3] = hipPack(f); + + f.x = fmaf(cg.x, yuv.y, yuv.x); + f.x = fmaf(cg.y, yuv.z, f.x); + f.y = fmaf(cb.x, yuv.y, yuv.x); + yuv.x = hipUnpack2(py0.y); + yuv.y = hipUnpack2(pu0.y); + yuv.z = hipUnpack2(pv0.y); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.z = fmaf(cr.y, yuv.z, yuv.x); + f.w = fmaf(cg.x, yuv.y, yuv.x); + f.w = fmaf(cg.y, yuv.z, f.w); + prgb0.data[4] = hipPack(f); + + f.x = fmaf(cb.x, yuv.y, yuv.x); + yuv.x = hipUnpack3(py0.y); + yuv.y = hipUnpack3(pu0.y); + yuv.z = hipUnpack3(pv0.y); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.y = fmaf(cr.y, yuv.z, yuv.x); + f.z = fmaf(cg.x, yuv.y, yuv.x); + f.z = fmaf(cg.y, yuv.z, f.z); + f.w = fmaf(cb.x, yuv.y, yuv.x); + prgb0.data[5] = hipPack(f); + + yuv.x = hipUnpack0(py1.x); + yuv.y = hipUnpack0(pu1.x); + yuv.z = hipUnpack0(pv1.x); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.x = fmaf(cr.y, yuv.z, yuv.x); + f.y = fmaf(cg.x, yuv.y, yuv.x); + f.y = fmaf(cg.y, yuv.z, f.y); + f.z = fmaf(cb.x, yuv.y, yuv.x); + yuv.x = hipUnpack1(py1.x); + yuv.y = hipUnpack1(pu1.x); + yuv.z = hipUnpack1(pv1.x); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.w = fmaf(cr.y, yuv.z, yuv.x); + prgb1.data[0] = hipPack(f); + + f.x = fmaf(cg.x, yuv.y, yuv.x); + f.x = fmaf(cg.y, yuv.z, f.x); + f.y = fmaf(cb.x, yuv.y, yuv.x); + yuv.x = hipUnpack2(py1.x); + yuv.y = hipUnpack2(pu1.x); + yuv.z = hipUnpack2(pv1.x); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.z = fmaf(cr.y, yuv.z, yuv.x); + f.w = fmaf(cg.x, yuv.y, yuv.x); + f.w = fmaf(cg.y, yuv.z, f.w); + prgb1.data[1] = hipPack(f); + + f.x = fmaf(cb.x, yuv.y, yuv.x); + yuv.x = hipUnpack3(py1.x); + yuv.y = hipUnpack3(pu1.x); + yuv.z = hipUnpack3(pv1.x); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.y = fmaf(cr.y, yuv.z, yuv.x); + f.z = fmaf(cg.x, yuv.y, yuv.x); + f.z = fmaf(cg.y, yuv.z, f.z); + f.w = fmaf(cb.x, yuv.y, yuv.x); + prgb1.data[2] = hipPack(f); + + yuv.x = hipUnpack0(py1.y); + yuv.y = hipUnpack0(pu1.y); + yuv.z = hipUnpack0(pv1.y); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.x = fmaf(cr.y, yuv.z, yuv.x); + f.y = fmaf(cg.x, yuv.y, yuv.x); + f.y = fmaf(cg.y, yuv.z, f.y); + f.z = fmaf(cb.x, yuv.y, yuv.x); + yuv.x = hipUnpack1(py1.y); + yuv.y = hipUnpack1(pu1.y); + yuv.z = hipUnpack1(pv1.y); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.w = fmaf(cr.y, yuv.z, yuv.x); + prgb1.data[3] = hipPack(f); + + f.x = fmaf(cg.x, yuv.y, yuv.x); + f.x = fmaf(cg.y, yuv.z, f.x); + f.y = fmaf(cb.x, yuv.y, yuv.x); + yuv.x = hipUnpack2(py1.y); + yuv.y = hipUnpack2(pu1.y); + yuv.z = hipUnpack2(pv1.y); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.z = fmaf(cr.y, yuv.z, yuv.x); + f.w = fmaf(cg.x, yuv.y, yuv.x); + f.w = fmaf(cg.y, yuv.z, f.w); + prgb1.data[4] = hipPack(f); + + f.x = fmaf(cb.x, yuv.y, yuv.x); + yuv.x = hipUnpack3(py1.y); + yuv.y = hipUnpack3(pu1.y); + yuv.z = hipUnpack3(pv1.y); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.y = fmaf(cr.y, yuv.z, yuv.x); + f.z = fmaf(cg.x, yuv.y, yuv.x); + f.z = fmaf(cg.y, yuv.z, f.z); + f.w = fmaf(cb.x, yuv.y, yuv.x); + prgb1.data[5] = hipPack(f); + + *((DUINT6 *)(&dst_image[rgb0_idx])) = prgb0; + *((DUINT6 *)(&dst_image[rgb1_idx])) = prgb1; + } +} + +void ColorConvertYUYVToRGB(hipStream_t stream, uint32_t dst_width, uint32_t dst_height, + uint8_t *dst_image, uint32_t dst_image_stride_in_bytes, + const uint8_t *src_image, uint32_t src_image_stride_in_bytes) { + int32_t local_threads_x = 16; + int32_t local_threads_y = 4; + int32_t global_threads_x = (dst_width + 7) >> 3; + int32_t global_threads_y = (dst_height + 1) >> 1; + + uint32_t dst_width_comp = (dst_width + 7) / 8; + uint32_t dst_height_comp = (dst_height + 1) / 2; + uint32_t dst_image_stride_in_bytes_comp = dst_image_stride_in_bytes * 2; + uint32_t src_image_stride_in_bytes_comp = src_image_stride_in_bytes * 2; + + ColorConvertYUYVToRGBKernel<<(global_threads_x) / local_threads_x), ceil(static_cast(global_threads_y) / local_threads_y)), + dim3(local_threads_x, local_threads_y), 0, stream>>>(dst_width, dst_height, (uint8_t *)dst_image, + dst_image_stride_in_bytes, dst_image_stride_in_bytes_comp, src_image, src_image_stride_in_bytes, + src_image_stride_in_bytes_comp, dst_width_comp, dst_height_comp); +} + +__global__ void ColorConvertNV12ToRGBKernel(uint32_t dst_width, uint32_t dst_height, + uint8_t *dst_image, uint32_t dst_image_stride_in_bytes, uint32_t dst_image_stride_in_bytes_comp, + const uint8_t *src_luma_image, uint32_t src_luma_image_stride_in_bytes, + const uint8_t *src_chroma_image, uint32_t src_chroma_image_stride_in_bytes, + uint32_t dst_width_comp, uint32_t dst_height_comp, uint32_t src_luma_image_stride_in_bytes_comp) { + + int32_t x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + int32_t y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; + + if ((x < dst_width_comp) && (y < dst_height_comp)) { + uint32_t src_y0_idx = y * src_luma_image_stride_in_bytes_comp + (x << 3); + uint32_t src_y1_idx = src_y0_idx + src_luma_image_stride_in_bytes; + uint32_t src_uv_idx = y * src_chroma_image_stride_in_bytes + (x << 3); + uint2 y0 = *((uint2 *)(&src_luma_image[src_y0_idx])); + uint2 y1 = *((uint2 *)(&src_luma_image[src_y1_idx])); + uint2 uv = *((uint2 *)(&src_chroma_image[src_uv_idx])); + + uint32_t rgb0_idx = y * dst_image_stride_in_bytes_comp + (x * 24); + uint32_t rgb1_idx = rgb0_idx + dst_image_stride_in_bytes; + + float4 f; + uint2 u0, u1; + uint2 v0, v1; + + f.x = hipUnpack0(uv.x); + f.y = f.x; + f.z = hipUnpack2(uv.x); + f.w = f.z; + u0.x = hipPack(f); + + f.x = hipUnpack0(uv.y); + f.y = f.x; + f.z = hipUnpack2(uv.y); + f.w = f.z; + u0.y = hipPack(f); + + u1.x = u0.x; + u1.y = u0.y; + + f.x = hipUnpack1(uv.x); + f.y = f.x; + f.z = hipUnpack3(uv.x); + f.w = f.z; + v0.x = hipPack(f); + + f.x = hipUnpack1(uv.y); + f.y = f.x; + f.z = hipUnpack3(uv.y); + f.w = f.z; + v0.y = hipPack(f); + + v1.x = v0.x; + v1.y = v0.y; + + float2 cr = make_float2( 0.0000f, 1.5748f); + float2 cg = make_float2(-0.1873f, -0.4681f); + float2 cb = make_float2( 1.8556f, 0.0000f); + float3 yuv; + DUINT6 rgb0, rgb1; + + yuv.x = hipUnpack0(y0.x); + yuv.y = hipUnpack0(u0.x); + yuv.z = hipUnpack0(v0.x); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.x = fmaf(cr.y, yuv.z, yuv.x); + f.y = fmaf(cg.x, yuv.y, yuv.x); + f.y = fmaf(cg.y, yuv.z, f.y); + f.z = fmaf(cb.x, yuv.y, yuv.x); + yuv.x = hipUnpack1(y0.x); + yuv.y = hipUnpack1(u0.x); + yuv.z = hipUnpack1(v0.x); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.w = fmaf(cr.y, yuv.z, yuv.x); + rgb0.data[0] = hipPack(f); + + f.x = fmaf(cg.x, yuv.y, yuv.x); + f.x = fmaf(cg.y, yuv.z, f.x); + f.y = fmaf(cb.x, yuv.y, yuv.x); + yuv.x = hipUnpack2(y0.x); + yuv.y = hipUnpack2(u0.x); + yuv.z = hipUnpack2(v0.x); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.z = fmaf(cr.y, yuv.z, yuv.x); + f.w = fmaf(cg.x, yuv.y, yuv.x); + f.w = fmaf(cg.y, yuv.z, f.w); + rgb0.data[1] = hipPack(f); + + f.x = fmaf(cb.x, yuv.y, yuv.x); + yuv.x = hipUnpack3(y0.x); + yuv.y = hipUnpack3(u0.x); + yuv.z = hipUnpack3(v0.x); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.y = fmaf(cr.y, yuv.z, yuv.x); + f.z = fmaf(cg.x, yuv.y, yuv.x); + f.z = fmaf(cg.y, yuv.z, f.z); + f.w = fmaf(cb.x, yuv.y, yuv.x); + rgb0.data[2] = hipPack(f); + + yuv.x = hipUnpack0(y0.y); + yuv.y = hipUnpack0(u0.y); + yuv.z = hipUnpack0(v0.y); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.x = fmaf(cr.y, yuv.z, yuv.x); + f.y = fmaf(cg.x, yuv.y, yuv.x); + f.y = fmaf(cg.y, yuv.z, f.y); + f.z = fmaf(cb.x, yuv.y, yuv.x); + yuv.x = hipUnpack1(y0.y); + yuv.y = hipUnpack1(u0.y); + yuv.z = hipUnpack1(v0.y); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.w = fmaf(cr.y, yuv.z, yuv.x); + rgb0.data[3] = hipPack(f); + + f.x = fmaf(cg.x, yuv.y, yuv.x); + f.x = fmaf(cg.y, yuv.z, f.x); + f.y = fmaf(cb.x, yuv.y, yuv.x); + yuv.x = hipUnpack2(y0.y); + yuv.y = hipUnpack2(u0.y); + yuv.z = hipUnpack2(v0.y); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.z = fmaf(cr.y, yuv.z, yuv.x); + f.w = fmaf(cg.x, yuv.y, yuv.x); + f.w = fmaf(cg.y, yuv.z, f.w); + rgb0.data[4] = hipPack(f); + + f.x = fmaf(cb.x, yuv.y, yuv.x); + yuv.x = hipUnpack3(y0.y); + yuv.y = hipUnpack3(u0.y); + yuv.z = hipUnpack3(v0.y); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.y = fmaf(cr.y, yuv.z, yuv.x); + f.z = fmaf(cg.x, yuv.y, yuv.x); + f.z = fmaf(cg.y, yuv.z, f.z); + f.w = fmaf(cb.x, yuv.y, yuv.x); + rgb0.data[5] = hipPack(f); + + yuv.x = hipUnpack0(y1.x); + yuv.y = hipUnpack0(u1.x); + yuv.z = hipUnpack0(v1.x); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.x = fmaf(cr.y, yuv.z, yuv.x); + f.y = fmaf(cg.x, yuv.y, yuv.x); + f.y = fmaf(cg.y, yuv.z, f.y); + f.z = fmaf(cb.x, yuv.y, yuv.x); + yuv.x = hipUnpack1(y1.x); + yuv.y = hipUnpack1(u1.x); + yuv.z = hipUnpack1(v1.x); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.w = fmaf(cr.y, yuv.z, yuv.x); + rgb1.data[0] = hipPack(f); + + f.x = fmaf(cg.x, yuv.y, yuv.x); + f.x = fmaf(cg.y, yuv.z, f.x); + f.y = fmaf(cb.x, yuv.y, yuv.x); + yuv.x = hipUnpack2(y1.x); + yuv.y = hipUnpack2(u1.x); + yuv.z = hipUnpack2(v1.x); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.z = fmaf(cr.y, yuv.z, yuv.x); + f.w = fmaf(cg.x, yuv.y, yuv.x); + f.w = fmaf(cg.y, yuv.z, f.w); + rgb1.data[1] = hipPack(f); + + f.x = fmaf(cb.x, yuv.y, yuv.x); + yuv.x = hipUnpack3(y1.x); + yuv.y = hipUnpack3(u1.x); + yuv.z = hipUnpack3(v1.x); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.y = fmaf(cr.y, yuv.z, yuv.x); + f.z = fmaf(cg.x, yuv.y, yuv.x); + f.z = fmaf(cg.y, yuv.z, f.z); + f.w = fmaf(cb.x, yuv.y, yuv.x); + rgb1.data[2] = hipPack(f); + + yuv.x = hipUnpack0(y1.y); + yuv.y = hipUnpack0(u1.y); + yuv.z = hipUnpack0(v1.y); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.x = fmaf(cr.y, yuv.z, yuv.x); + f.y = fmaf(cg.x, yuv.y, yuv.x); + f.y = fmaf(cg.y, yuv.z, f.y); + f.z = fmaf(cb.x, yuv.y, yuv.x); + yuv.x = hipUnpack1(y1.y); + yuv.y = hipUnpack1(u1.y); + yuv.z = hipUnpack1(v1.y); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.w = fmaf(cr.y, yuv.z, yuv.x); + rgb1.data[3] = hipPack(f); + + f.x = fmaf(cg.x, yuv.y, yuv.x); + f.x = fmaf(cg.y, yuv.z, f.x); + f.y = fmaf(cb.x, yuv.y, yuv.x); + yuv.x = hipUnpack2(y1.y); + yuv.y = hipUnpack2(u1.y); + yuv.z = hipUnpack2(v1.y); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.z = fmaf(cr.y, yuv.z, yuv.x); + f.w = fmaf(cg.x, yuv.y, yuv.x); + f.w = fmaf(cg.y, yuv.z, f.w); + rgb1.data[4] = hipPack(f); + + f.x = fmaf(cb.x, yuv.y, yuv.x); + yuv.x = hipUnpack3(y1.y); + yuv.y = hipUnpack3(u1.y); + yuv.z = hipUnpack3(v1.y); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.y = fmaf(cr.y, yuv.z, yuv.x); + f.z = fmaf(cg.x, yuv.y, yuv.x); + f.z = fmaf(cg.y, yuv.z, f.z); + f.w = fmaf(cb.x, yuv.y, yuv.x); + rgb1.data[5] = hipPack(f); + + *((DUINT6 *)(&dst_image[rgb0_idx])) = rgb0; + *((DUINT6 *)(&dst_image[rgb1_idx])) = rgb1; + } +} + +void ColorConvertNV12ToRGB(hipStream_t stream, uint32_t dst_width, uint32_t dst_height, + uint8_t *dst_image, uint32_t dst_image_stride_in_bytes, + const uint8_t *src_luma_image, uint32_t src_luma_image_stride_in_bytes, + const uint8_t *src_chroma_image, uint32_t src_chroma_image_stride_in_bytes) { + int32_t local_threads_x = 16; + int32_t local_threads_y = 4; + int32_t global_threads_x = (dst_width + 7) >> 3; + int32_t global_threads_y = (dst_height + 1) >> 1; + + uint32_t dst_width_comp = (dst_width + 7) / 8; + uint32_t dst_height_comp = (dst_height + 1) / 2; + uint32_t dst_image_stride_in_bytes_comp = dst_image_stride_in_bytes * 2; + uint32_t src_luma_image_stride_in_bytes_comp = src_luma_image_stride_in_bytes * 2; + + ColorConvertNV12ToRGBKernel<<(global_threads_x) / local_threads_x), ceil(static_cast(global_threads_y) / local_threads_y)), + dim3(local_threads_x, local_threads_y), 0, stream>>>(dst_width, dst_height, dst_image, dst_image_stride_in_bytes, + dst_image_stride_in_bytes_comp, src_luma_image, src_luma_image_stride_in_bytes, src_chroma_image, + src_chroma_image_stride_in_bytes, dst_width_comp, dst_height_comp, src_luma_image_stride_in_bytes_comp); +} + +__global__ void ColorConvertYUV400ToRGBKernel(uint32_t dst_width, uint32_t dst_height, + uint8_t *dst_image, uint32_t dst_image_stride_in_bytes, uint32_t dst_image_stride_in_bytes_comp, + const uint8_t *src_luma_image, uint32_t src_luma_image_stride_in_bytes, + uint32_t dst_width_comp, uint32_t dst_height_comp, uint32_t src_luma_image_stride_in_bytes_comp) { + + int32_t x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + int32_t y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; + + if ((x < dst_width_comp) && (y < dst_height_comp)) { + uint32_t src_y0_idx = y * src_luma_image_stride_in_bytes_comp + (x << 3); + uint32_t src_y1_idx = src_y0_idx + src_luma_image_stride_in_bytes; + + uint2 y0 = *((uint2 *)(&src_luma_image[src_y0_idx])); + uint2 y1 = *((uint2 *)(&src_luma_image[src_y1_idx])); + + uint32_t rgb0_idx = y * dst_image_stride_in_bytes_comp + (x * 24); + uint32_t rgb1_idx = rgb0_idx + dst_image_stride_in_bytes; + + DUINT6 rgb0, rgb1; + + uint8_t y0_b0, y0_b1, y0_b2, y0_b3, y0_b4, y0_b5, y0_b6, y0_b7; + uint8_t y1_b0, y1_b1, y1_b2, y1_b3, y1_b4, y1_b5, y1_b6, y1_b7; + + y0_b0 = hipUnpack0(y0.x); + y0_b1 = hipUnpack1(y0.x); + y0_b2 = hipUnpack2(y0.x); + y0_b3 = hipUnpack3(y0.x); + y0_b4 = hipUnpack0(y0.y); + y0_b5 = hipUnpack1(y0.y); + y0_b6 = hipUnpack2(y0.y); + y0_b7 = hipUnpack3(y0.y); + + y1_b0 = hipUnpack0(y1.x); + y1_b1 = hipUnpack1(y1.x); + y1_b2 = hipUnpack2(y1.x); + y1_b3 = hipUnpack3(y1.x); + y1_b4 = hipUnpack0(y1.y); + y1_b5 = hipUnpack1(y1.y); + y1_b6 = hipUnpack2(y1.y); + y1_b7 = hipUnpack3(y1.y); + + rgb0.data[0] = hipPack(make_float4(y0_b0, y0_b0, y0_b0, y0_b1)); + rgb0.data[1] = hipPack(make_float4(y0_b1, y0_b1, y0_b2, y0_b2)); + rgb0.data[2] = hipPack(make_float4(y0_b2, y0_b3, y0_b3, y0_b3)); + rgb0.data[3] = hipPack(make_float4(y0_b4, y0_b4, y0_b4, y0_b5)); + rgb0.data[4] = hipPack(make_float4(y0_b5, y0_b5, y0_b6, y0_b6)); + rgb0.data[5] = hipPack(make_float4(y0_b6, y0_b7, y0_b7, y0_b7)); + + rgb1.data[0] = hipPack(make_float4(y1_b0, y1_b0, y1_b0, y1_b1)); + rgb1.data[1] = hipPack(make_float4(y1_b1, y1_b1, y1_b2, y1_b2)); + rgb1.data[2] = hipPack(make_float4(y1_b2, y1_b3, y1_b3, y1_b3)); + rgb1.data[3] = hipPack(make_float4(y1_b4, y1_b4, y1_b4, y1_b5)); + rgb1.data[4] = hipPack(make_float4(y1_b5, y1_b5, y1_b6, y1_b6)); + rgb1.data[5] = hipPack(make_float4(y1_b6, y1_b7, y1_b7, y1_b7)); + + *((DUINT6 *)(&dst_image[rgb0_idx])) = rgb0; + *((DUINT6 *)(&dst_image[rgb1_idx])) = rgb1; + } +} + +void ColorConvertYUV400ToRGB(hipStream_t stream, uint32_t dst_width, uint32_t dst_height, + uint8_t *dst_image, uint32_t dst_image_stride_in_bytes, + const uint8_t *src_luma_image, uint32_t src_luma_image_stride_in_bytes){ + + int32_t local_threads_x = 16; + int32_t local_threads_y = 4; + int32_t global_threads_x = (dst_width + 7) >> 3; + int32_t global_threads_y = (dst_height + 1) >> 1; + + uint32_t dst_width_comp = (dst_width + 7) / 8; + uint32_t dst_height_comp = (dst_height + 1) / 2; + uint32_t dst_image_stride_in_bytes_comp = dst_image_stride_in_bytes * 2; + uint32_t src_luma_image_stride_in_bytes_comp = src_luma_image_stride_in_bytes * 2; + + ColorConvertYUV400ToRGBKernel<<(global_threads_x) / local_threads_x), ceil(static_cast(global_threads_y) / local_threads_y)), + dim3(local_threads_x, local_threads_y), 0, stream>>>(dst_width, dst_height, dst_image, dst_image_stride_in_bytes, + dst_image_stride_in_bytes_comp, src_luma_image, src_luma_image_stride_in_bytes, dst_width_comp, dst_height_comp, + src_luma_image_stride_in_bytes_comp); + +} + + +__global__ void ConvertInterleavedUVToPlanarUVKernel(uint32_t dst_width, uint32_t dst_height, + uint8_t *dst_image1, uint8_t *dst_image2, uint32_t dst_image_stride_in_bytes, + const uint8_t *src_image, uint32_t src_image_stride_in_bytes) { + + int32_t x = (hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x) * 8; + int32_t y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; + + if (x >= dst_width || y >= dst_height) { + return; + } + + uint32_t src_idx = y * src_image_stride_in_bytes + x + x; + uint32_t dst_idx = y * dst_image_stride_in_bytes + x; + + uint4 src = *((uint4 *)(&src_image[src_idx])); + uint2 dst1, dst2; + + dst1.x = hipPack(make_float4(hipUnpack0(src.x), hipUnpack2(src.x), hipUnpack0(src.y), hipUnpack2(src.y))); + dst1.y = hipPack(make_float4(hipUnpack0(src.z), hipUnpack2(src.z), hipUnpack0(src.w), hipUnpack2(src.w))); + dst2.x = hipPack(make_float4(hipUnpack1(src.x), hipUnpack3(src.x), hipUnpack1(src.y), hipUnpack3(src.y))); + dst2.y = hipPack(make_float4(hipUnpack1(src.z), hipUnpack3(src.z), hipUnpack1(src.w), hipUnpack3(src.w))); + + *((uint2 *)(&dst_image1[dst_idx])) = dst1; + *((uint2 *)(&dst_image2[dst_idx])) = dst2; + +} +void ConvertInterleavedUVToPlanarUV(hipStream_t stream, uint32_t dst_width, uint32_t dst_height, + uint8_t *dst_image1, uint8_t *dst_image2, uint32_t dst_image_stride_in_bytes, + const uint8_t *src_image1, uint32_t src_image1_stride_in_bytes) { + int32_t local_threads_x = 16, local_threads_y = 16; + int32_t global_threads_x = (dst_width + 7) >> 3; + int32_t global_threads_y = dst_height; + + ConvertInterleavedUVToPlanarUVKernel<<(global_threads_x) / local_threads_x), ceil(static_cast(global_threads_y) / local_threads_y)), + dim3(local_threads_x, local_threads_y), 0, stream>>>(dst_width, dst_height, dst_image1, dst_image2, + dst_image_stride_in_bytes, src_image1, src_image1_stride_in_bytes); + +} + +__global__ void ExtractYFromPackedYUYVKernel(uint32_t dst_width, uint32_t dst_height, + uint8_t *destination_y, uint32_t dst_luma_stride_in_bytes, + const uint8_t *src_image, uint32_t src_image_stride_in_bytes, + uint32_t dst_width_comp) { + + int32_t x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + int32_t y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; + + if (x < dst_width_comp && y < dst_height) { + uint32_t src_idx = y * src_image_stride_in_bytes + (x << 4); + uint32_t dst_idx = y * dst_luma_stride_in_bytes + (x << 3); + + uint4 src = *((uint4 *)(&src_image[src_idx])); + uint2 dst_y; + dst_y.x = hipPack(make_float4(hipUnpack0(src.x), hipUnpack2(src.x), hipUnpack0(src.y), hipUnpack2(src.y))); + dst_y.y = hipPack(make_float4(hipUnpack0(src.z), hipUnpack2(src.z), hipUnpack0(src.w), hipUnpack2(src.w))); + + *((uint2 *)(&destination_y[dst_idx])) = dst_y; + } +} + +void ExtractYFromPackedYUYV(hipStream_t stream, uint32_t dst_width, uint32_t dst_height, + uint8_t *destination_y, uint32_t dst_luma_stride_in_bytes, const uint8_t *src_image, uint32_t src_image_stride_in_bytes) { + int32_t local_threads_x = 16; + int32_t local_threads_y = 4; + int32_t global_threads_x = (dst_width + 7) >> 3; + int32_t global_threads_y = dst_height; + + uint32_t dst_width_comp = (dst_width + 7) / 8; + + ExtractYFromPackedYUYVKernel<<(global_threads_x) / local_threads_x), ceil(static_cast(global_threads_y) / local_threads_y)), + dim3(local_threads_x, local_threads_y), 0, stream>>>(dst_width, dst_height, destination_y, + dst_luma_stride_in_bytes, src_image, src_image_stride_in_bytes, dst_width_comp); +} + +__global__ void ConvertPackedYUYVToPlanarYUVKernel(uint32_t dst_width, uint32_t dst_height, + uint8_t *destination_y, uint8_t *destination_u, uint8_t *destination_v, uint32_t dst_luma_stride_in_bytes, uint32_t dst_chroma_stride_in_bytes, + const uint8_t *src_image, uint32_t src_image_stride_in_bytes, + uint32_t dst_width_comp) { + + int32_t x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + int32_t y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; + + if ((x < dst_width_comp && y < dst_height)) { + uint32_t src_idx = y * src_image_stride_in_bytes + (x << 4); + uint32_t dst_y_idx = y * dst_luma_stride_in_bytes + (x << 3); + uint32_t dst_uv_idx = y * dst_chroma_stride_in_bytes + (x << 2); + + uint4 src = *((uint4 *)(&src_image[src_idx])); + uint2 dst_y; + uint32_t dst_u, dst_v; + + dst_y.x = hipPack(make_float4(hipUnpack0(src.x), hipUnpack2(src.x), hipUnpack0(src.y), hipUnpack2(src.y))); + dst_y.y = hipPack(make_float4(hipUnpack0(src.z), hipUnpack2(src.z), hipUnpack0(src.w), hipUnpack2(src.w))); + dst_u = hipPack(make_float4(hipUnpack1(src.x), hipUnpack1(src.y), hipUnpack1(src.z), hipUnpack1(src.w))); + dst_v = hipPack(make_float4(hipUnpack3(src.x), hipUnpack3(src.y), hipUnpack3(src.z), hipUnpack3(src.w))); + + *((uint2 *)(&destination_y[dst_y_idx])) = dst_y; + *((uint32_t *)(&destination_u[dst_uv_idx])) = dst_u; + *((uint32_t *)(&destination_v[dst_uv_idx])) = dst_v; + } +} + +void ConvertPackedYUYVToPlanarYUV(hipStream_t stream, uint32_t dst_width, uint32_t dst_height, + uint8_t *destination_y, uint8_t *destination_u, uint8_t *destination_v, uint32_t dst_luma_stride_in_bytes, uint32_t dst_chroma_stride_in_bytes, + const uint8_t *src_image, uint32_t src_image_stride_in_bytes) { + + int32_t local_threads_x = 16; + int32_t local_threads_y = 4; + int32_t global_threads_x = (dst_width + 7) >> 3; + int32_t global_threads_y = dst_height; + uint32_t dst_width_comp = (dst_width + 7) / 8; + + ConvertPackedYUYVToPlanarYUVKernel<<(global_threads_x) / local_threads_x), ceil(static_cast(global_threads_y) / local_threads_y)), + dim3(local_threads_x, local_threads_y), 0, stream>>>(dst_width, dst_height, destination_y, destination_u, + destination_v, dst_luma_stride_in_bytes, dst_chroma_stride_in_bytes, src_image, src_image_stride_in_bytes, dst_width_comp); +} \ No newline at end of file diff --git a/projects/rocjpeg/src/rocjpeg_hip_kernels.h b/projects/rocjpeg/src/rocjpeg_hip_kernels.h new file mode 100644 index 0000000000..a279d318f4 --- /dev/null +++ b/projects/rocjpeg/src/rocjpeg_hip_kernels.h @@ -0,0 +1,62 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef ROC_JPEG_HIP_KERNELS_H_ +#define ROC_JPEG_HIP_KERNELS_H_ + +#pragma once + +#include + +void ColorConvertYUV444ToRGB(hipStream_t stream, uint32_t dst_width, uint32_t dst_height, + uint8_t *dst_image, uint32_t dst_image_stride_in_bytes, const uint8_t *src_yuv_image, + uint32_t src_yuv_image_stride_in_bytes, uint32_t src_u_image_offset); + +void ColorConvertYUYVToRGB(hipStream_t stream, uint32_t dst_width, uint32_t dst_height, + uint8_t *dst_image, uint32_t dst_image_stride_in_bytes, + const uint8_t *src_image, uint32_t src_image_stride_in_bytes); + +void ColorConvertNV12ToRGB(hipStream_t stream, uint32_t dst_width, uint32_t dst_height, + uint8_t *dst_image, uint32_t dst_image_stride_in_bytes, + const uint8_t *src_luma_image, uint32_t src_luma_image_stride_in_bytes, + const uint8_t *src_chroma_image, uint32_t src_chroma_image_stride_in_bytes); + +void ColorConvertYUV400ToRGB(hipStream_t stream, uint32_t dst_width, uint32_t dst_height, + uint8_t *dst_image, uint32_t dst_image_stride_in_bytes, + const uint8_t *src_luma_image, uint32_t src_luma_image_stride_in_bytes); + +void ConvertInterleavedUVToPlanarUV(hipStream_t stream, uint32_t dst_width, uint32_t dst_height, + uint8_t *dst_image1, uint8_t *dst_image2, uint32_t dst_image_stride_in_bytes, + const uint8_t *src_image1, uint32_t src_image1_stride_in_bytes); + +void ExtractYFromPackedYUYV(hipStream_t stream, uint32_t dst_width, uint32_t dst_height, + uint8_t *destination_y, uint32_t dst_luma_stride_in_bytes, const uint8_t *src_image, uint32_t src_image_stride_in_bytes); + +void ConvertPackedYUYVToPlanarYUV(hipStream_t stream, uint32_t dst_width, uint32_t dst_height, + uint8_t *destination_y, uint8_t *destination_u, uint8_t *destination_v, uint32_t dst_luma_stride_in_bytes, + uint32_t dst_chroma_stride_in_bytes, const uint8_t *src_image, uint32_t src_image_stride_in_bytes); + +typedef struct UINT6TYPE { + uint data[6]; +} DUINT6; + +#endif //ROC_JPEG_HIP_KERNELS_H_ \ No newline at end of file diff --git a/projects/rocjpeg/src/rocjpeg_parser.cpp b/projects/rocjpeg/src/rocjpeg_parser.cpp new file mode 100644 index 0000000000..a4edbffa30 --- /dev/null +++ b/projects/rocjpeg/src/rocjpeg_parser.cpp @@ -0,0 +1,387 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include "rocjpeg_parser.h" + +JpegParser::JpegParser() : stream_{nullptr}, stream_end_{nullptr}, stream_length_{0}, + jpeg_stream_parameters_{{}} { +} + +JpegParser::~JpegParser() { + stream_ = nullptr; + stream_end_ = nullptr; + stream_length_ = 0; +} + +bool JpegParser::ParseJpegStream(const uint8_t *jpeg_stream, uint32_t jpeg_stream_size) { + if (jpeg_stream == nullptr) { + ERR("invalid argument!"); + return false; + } + + stream_ = jpeg_stream; + stream_length_ = jpeg_stream_size; + stream_end_ = stream_ + stream_length_; + + jpeg_stream_parameters_ = {}; + bool soi_marker_found = false; + bool sos_marker_found = false; + bool dht_marker_found = false; + bool dqt_marker_found = false; + uint8_t marker; + const uint8_t *next_chunck; + int32_t chuck_len; + + // The first two bytes of a JPEG must be 0XFFD8 + if (*stream_ != 0xFF || *(stream_ + 1) != SOI) { + ERR("Invalid JPEG!"); + return false; + } + + soi_marker_found = ParseSOI(); + if (!soi_marker_found) { + ERR("failed to find the SOI marker!"); + } + + while (!sos_marker_found && stream_ <= stream_end_) { + while ((*stream_ == 0xFF)) + stream_++; + marker = *stream_++; + chuck_len = swap_bytes(stream_); + next_chunck = stream_ + chuck_len; + + switch (marker) { + case SOF: + if (!ParseSOF()) + return false; + break; + case DHT: + if (!ParseDHT()) + return false; + dht_marker_found = true; + break; + case DQT: + if (!ParseDQT()) + return false; + dqt_marker_found = true; + break; + case DRI: + if (!ParseDRI()) + return false; + break; + case SOS: + if (!ParseSOS()) + return false; + sos_marker_found = true; + break; + default: + break; + } + stream_ = next_chunck; + } + + if (!dht_marker_found) { + ERR("didn't find any Huffman table!"); + return false; + } + if (!dqt_marker_found) { + ERR("didn't find any quantization table!"); + return false; + } + + if (!ParseEOI()) + return false; + + return true; +} + +bool JpegParser::ParseSOI() { + if (stream_ == nullptr) { + return false; + } + while (!(*stream_ == 0xFF && *(stream_ + 1) == SOI)) { + if (stream_ <= stream_end_) { + stream_++; + continue; + } else + return false; + } + stream_ += 2; + + return true; +} + +bool JpegParser::ParseSOF() { + uint32_t component_id, sampling_factor; + uint8_t quantiser_table_selector; + + if (stream_ == nullptr) { + return false; + } + + jpeg_stream_parameters_.picture_parameter_buffer.picture_height = swap_bytes(stream_ + 3); + jpeg_stream_parameters_.picture_parameter_buffer.picture_width = swap_bytes(stream_ + 5); + jpeg_stream_parameters_.picture_parameter_buffer.num_components = stream_[7]; + + if (jpeg_stream_parameters_.picture_parameter_buffer.num_components > NUM_COMPONENTS - 1) { + ERR("invalid number of JPEG components!"); + return false; + } + + stream_ += 8; + + for (int32_t i = 0; i < jpeg_stream_parameters_.picture_parameter_buffer.num_components; i++) { + component_id = *stream_++; + sampling_factor = *stream_++; + quantiser_table_selector = *stream_++; + + jpeg_stream_parameters_.picture_parameter_buffer.components[i].component_id = component_id; + if (quantiser_table_selector >= NUM_COMPONENTS) { + ERR("invalid number of the quantization table!"); + return false; + } + jpeg_stream_parameters_.picture_parameter_buffer.components[i].v_sampling_factor = sampling_factor & 0xF; + jpeg_stream_parameters_.picture_parameter_buffer.components[i].h_sampling_factor = sampling_factor >> 4; + jpeg_stream_parameters_.picture_parameter_buffer.components[i].quantiser_table_selector = quantiser_table_selector; + } + + uint8_t max_h_factor = jpeg_stream_parameters_.picture_parameter_buffer.components[0].h_sampling_factor; + uint8_t max_v_factor = jpeg_stream_parameters_.picture_parameter_buffer.components[0].v_sampling_factor; + + jpeg_stream_parameters_.slice_parameter_buffer.num_mcus = ((jpeg_stream_parameters_.picture_parameter_buffer.picture_width + max_h_factor * 8 - 1) / (max_h_factor * 8)) * + ((jpeg_stream_parameters_.picture_parameter_buffer.picture_height + max_v_factor * 8 - 1) / (max_v_factor * 8)); + + jpeg_stream_parameters_.chroma_subsampling = GetChromaSubsampling(jpeg_stream_parameters_.picture_parameter_buffer.components[0].h_sampling_factor, + jpeg_stream_parameters_.picture_parameter_buffer.components[1].h_sampling_factor, + jpeg_stream_parameters_.picture_parameter_buffer.components[2].h_sampling_factor, + jpeg_stream_parameters_.picture_parameter_buffer.components[0].v_sampling_factor, + jpeg_stream_parameters_.picture_parameter_buffer.components[1].v_sampling_factor, + jpeg_stream_parameters_.picture_parameter_buffer.components[2].v_sampling_factor); + return true; +} + +bool JpegParser::ParseDQT() { + int32_t quantization_table_index = 0; + const uint8_t *dqt_block_end; + + if (stream_ == nullptr) { + return false; + } + + dqt_block_end = stream_ + swap_bytes(stream_); + stream_ += 2; + + while (stream_ < dqt_block_end) { + quantization_table_index = *stream_++; + if (quantization_table_index >> 4) { + ERR("16 bits quantization table is not supported!"); + return false; + } + if (quantization_table_index >= 4) { + ERR("invalid number of quantization table!"); + return false; + } + + std::memcpy(jpeg_stream_parameters_.quantization_matrix_buffer.quantiser_table[quantization_table_index & 0x0F], stream_, 64); + jpeg_stream_parameters_.quantization_matrix_buffer.load_quantiser_table[quantization_table_index & 0x0F] = 1; + + stream_ += 64; + } + + return true; +} + +bool JpegParser::ParseDHT() { + uint32_t count, i; + int32_t length, index; + uint8_t ac_huffman_table, huffman_table_id; + + if (stream_ == nullptr) { + return false; + } + + length = swap_bytes(stream_) - 2; + stream_ += 2; + + while (length > 0) { + index = *stream_++; + + ac_huffman_table = index & 0xF0; + huffman_table_id = index & 0x0F; + + if (huffman_table_id >= HUFFMAN_TABLES) { + ERR("invlaid number of Huffman table!"); + return false; + } + + if (ac_huffman_table) { + std::memcpy(jpeg_stream_parameters_.huffman_table_buffer.huffman_table[huffman_table_id].num_ac_codes, stream_, 16); + } else { + std::memcpy(jpeg_stream_parameters_.huffman_table_buffer.huffman_table[huffman_table_id].num_dc_codes, stream_, 16); + } + + count = 0; + for (i = 0; i < 16; i++) { + count += *stream_++; + } + + if (ac_huffman_table) { + if (count > AC_HUFFMAN_TABLE_VALUES_SIZE) { + ERR("invalid AC Huffman table!"); + return false; + } + std::memcpy(jpeg_stream_parameters_.huffman_table_buffer.huffman_table[huffman_table_id].ac_values, stream_, count); + jpeg_stream_parameters_.huffman_table_buffer.load_huffman_table[huffman_table_id] = 1; + } else { + if (count > DC_HUFFMAN_TABLE_VALUES_SIZE) { + ERR("invlaid DC Huffman table!") + return false; + } + std::memcpy(jpeg_stream_parameters_.huffman_table_buffer.huffman_table[huffman_table_id].dc_values, stream_, count); + jpeg_stream_parameters_.huffman_table_buffer.load_huffman_table[huffman_table_id] = 1; + } + + length -= 1; + length -= 16; + length -= count; + stream_ += count; + } + + return true; +} + +bool JpegParser::ParseSOS() { + uint32_t component_id, table; + + if (stream_ == nullptr) { + return false; + } + + uint32_t num_components = stream_[2]; + + if (num_components > NUM_COMPONENTS - 1) { + ERR("invalid number of component!") + return false; + } + jpeg_stream_parameters_.slice_parameter_buffer.num_components = num_components; + + stream_ += 3; + for (int32_t i = 0; i < num_components; i++) { + component_id = *stream_++; + table = *stream_++; + jpeg_stream_parameters_.slice_parameter_buffer.components[i].component_selector = component_id; + jpeg_stream_parameters_.slice_parameter_buffer.components[i].dc_table_selector = ((table >> 4) & 0x0F); + jpeg_stream_parameters_.slice_parameter_buffer.components[i].ac_table_selector = (table & 0x0F); + + if ((table & 0xF) >= 4) { + ERR("invalid number of AC Huffman table!"); + return false; + } + if ((table >> 4) >= 4) { + ERR("invalid number of DC Huffman table!"); + return false; + } + if (component_id != jpeg_stream_parameters_.picture_parameter_buffer.components[i].component_id) { + ERR("component id mismatch between SOS and SOF marker!"); + return false; + } + } + stream_ += 3; + + return true; +} + + +bool JpegParser::ParseDRI() { + uint32_t length; + + if (stream_ == nullptr) { + return false; + } + + length = swap_bytes(stream_); + if (length != 4) { + ERR("invalid size for DRI marker"); + return false; + } + + jpeg_stream_parameters_.slice_parameter_buffer.restart_interval = swap_bytes(stream_ + 2); + + return true; +} + +bool JpegParser::ParseEOI() { + + if (stream_ == nullptr) { + return false; + } + + const uint8_t *stream_temp = stream_; + while (stream_temp <= stream_end_ && !(*stream_temp == 0xFF && *(stream_temp + 1) == EOI)) { + stream_temp++; + continue; + } + + jpeg_stream_parameters_.slice_parameter_buffer.slice_data_size = stream_temp - stream_; + jpeg_stream_parameters_.slice_data_buffer = stream_; + + return true; +} + +ChromaSubsampling JpegParser::GetChromaSubsampling(uint8_t c1_h_sampling_factor, uint8_t c2_h_sampling_factor, uint8_t c3_h_sampling_factor, + uint8_t c1_v_sampling_factor, uint8_t c2_v_sampling_factor, uint8_t c3_v_sampling_factor) { + + ChromaSubsampling subsampling; + + if ((c1_h_sampling_factor == 1 && c2_h_sampling_factor == 1 && c3_h_sampling_factor == 1 && + c1_v_sampling_factor == 1 && c2_v_sampling_factor == 1 && c3_v_sampling_factor == 1) || + (c1_h_sampling_factor == 2 && c2_h_sampling_factor == 2 && c3_h_sampling_factor == 2 && + c1_v_sampling_factor == 2 && c2_v_sampling_factor == 2 && c3_v_sampling_factor == 2) || + (c1_h_sampling_factor == 4 && c2_h_sampling_factor == 4 && c3_h_sampling_factor == 4 && + c1_v_sampling_factor == 4 && c2_v_sampling_factor == 4 && c3_v_sampling_factor == 4)) { + subsampling = CSS_444; + } else if (c1_h_sampling_factor == 1 && c2_h_sampling_factor == 1 && c3_h_sampling_factor == 1 && + c1_v_sampling_factor == 2 && c2_v_sampling_factor == 1 && c3_v_sampling_factor == 1) { + subsampling = CSS_440; + } else if ((c1_h_sampling_factor == 2 && c2_h_sampling_factor == 1 && c3_h_sampling_factor == 1 && + c1_v_sampling_factor == 1 && c2_v_sampling_factor == 1 && c3_v_sampling_factor == 1) || + (c1_h_sampling_factor == 2 && c2_h_sampling_factor == 1 && c3_h_sampling_factor == 1 && + c1_v_sampling_factor == 2 && c2_v_sampling_factor == 2 && c3_v_sampling_factor == 2) || + (c1_h_sampling_factor == 2 && c2_h_sampling_factor == 2 && c3_h_sampling_factor == 2 && + c1_v_sampling_factor == 2 && c2_v_sampling_factor == 1 && c3_v_sampling_factor == 1)) { + subsampling = CSS_422; + } else if (c1_h_sampling_factor == 2 && c2_h_sampling_factor == 1 && c3_h_sampling_factor == 1 && + c1_v_sampling_factor == 2 && c2_v_sampling_factor == 1 && c3_v_sampling_factor == 1) { + subsampling = CSS_420; + } else if (c1_h_sampling_factor == 4 && c2_h_sampling_factor == 1 && c3_h_sampling_factor == 1 && + c1_v_sampling_factor == 1 && c2_v_sampling_factor == 1 && c3_v_sampling_factor == 1) { + subsampling = CSS_411; + } else if ((c1_h_sampling_factor == 1 && c2_h_sampling_factor == 0 && c3_h_sampling_factor == 0 && + c1_v_sampling_factor == 1 && c2_v_sampling_factor == 0 && c3_v_sampling_factor == 0) || + (c1_h_sampling_factor == 4 && c2_h_sampling_factor == 0 && c3_h_sampling_factor == 0 && + c1_v_sampling_factor == 4 && c2_v_sampling_factor == 0 && c3_v_sampling_factor == 0)) { + subsampling = CSS_400; + } else { + subsampling = CSS_UNKNOWN; + } + + return subsampling; +} diff --git a/projects/rocjpeg/src/rocjpeg_parser.h b/projects/rocjpeg/src/rocjpeg_parser.h new file mode 100644 index 0000000000..ff31ef7154 --- /dev/null +++ b/projects/rocjpeg/src/rocjpeg_parser.h @@ -0,0 +1,177 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + + +#ifndef ROC_JPEG_PARSER_H_ +#define ROC_JPEG_PARSER_H_ + +#include +#include +#include +#include "rocjpeg_commons.h" + +#pragma once + +#define NUM_COMPONENTS 4 +#define HUFFMAN_TABLES 2 +#define AC_HUFFMAN_TABLE_VALUES_SIZE 162 +#define DC_HUFFMAN_TABLE_VALUES_SIZE 12 +#define swap_bytes(x) (((x)[0] << 8) | (x)[1]) + +/***************************************************************/ +//! \enum enum JpegMarker +//! common JPEG markers +/***************************************************************/ +enum JpegMarkers { + SOI = 0xD8, /* Start Of Image */ + SOF = 0xC0, /* Start Of Frame for a baseline DCT-based JPEG. */ + DHT = 0xC4, /* Define Huffman Table */ + DQT = 0xDB, /* Define Quantization Table */ + DRI = 0xDD, /* Define Restart Interval */ + SOS = 0xDA, /* Start of Scan */ + EOI = 0xD9, /* End Of Image */ +}; + +/***************************************************************/ +//! \struct Picture parameter for JPEG decoding. +//! This structure holds information from the frame +//! header and definitions from additional segments. + /**************************************************************/ +typedef struct PictureParameterBufferType { + uint16_t picture_width; + uint16_t picture_height; + struct { + uint8_t component_id; + uint8_t h_sampling_factor; + uint8_t v_sampling_factor; + uint8_t quantiser_table_selector; + } components[255]; + uint8_t num_components; + uint8_t color_space; + uint32_t rotation; + uint32_t reserved[7]; +} PictureParameterBuffer; + +/***************************************************************/ +//! \struct Quantization table for JPEG decoding. +//! This structure holds the quantization tables. +//! The maximum number of quatization tables is four. +//! The #load_quantization_table array can be used as a hint to notify +//! which table(s) has valid values. +//! The #quantiser_table values are specified in zig-zag scan order. +/***************************************************************/ +typedef struct QuantizationMatrixBufferType { + uint8_t load_quantiser_table[4]; + uint8_t quantiser_table[4][64]; + uint32_t reserved[4]; +} QuantizationMatrixBuffer; + +/***************************************************************/ +//! \struct Huffman table for JPEG decoding. +//! This structure holds the Huffman tables. +//! The maximum number of Huffman tables is two. +//! The #load_huffman_table array can be used as a hint to notify the +//! which table(s) has valid values. +/***************************************************************/ +typedef struct HuffmanTableBufferType { + uint8_t load_huffman_table[2]; + struct { + uint8_t num_dc_codes[16]; + uint8_t dc_values[12]; + uint8_t num_ac_codes[16]; + uint8_t ac_values[162]; + uint8_t pad[2]; + } huffman_table[2]; + uint32_t reserved[4]; +} HuffmanTableBuffer; + +/***************************************************************/ +//! \struct Slice parameter for JPEG decoding. +//! This structure holds information from the scan header, and +//! definitions from additional segments. +/***************************************************************/ +typedef struct SliceParameterBufferType { + uint32_t slice_data_size; + uint32_t slice_data_offset; + uint32_t slice_data_flag; + uint32_t slice_horizontal_position; + uint32_t slice_vertical_position; + struct { + uint8_t component_selector; + uint8_t dc_table_selector; + uint8_t ac_table_selector; + } components[4]; + uint8_t num_components; + uint16_t restart_interval; + uint32_t num_mcus; + uint32_t reserved[4]; +} SliceParameterBuffer; + +/***************************************************************/ +//! \struct Enum identifies image chroma subsampling values stored inside JPEG input stream +/***************************************************************/ +typedef enum { + CSS_444 = 0, + CSS_440 = 1, + CSS_422 = 2, + CSS_420 = 3, + CSS_411 = 4, + CSS_400 = 5, + CSS_UNKNOWN = -1 +} ChromaSubsampling; + +/***************************************************************/ +//! \struct Jpeg stream parameters. +//! This structure holds all information from a JPEG stream for decoding +/***************************************************************/ +typedef struct JpegParameterBuffersType { + PictureParameterBuffer picture_parameter_buffer; + QuantizationMatrixBuffer quantization_matrix_buffer; + HuffmanTableBuffer huffman_table_buffer; + SliceParameterBuffer slice_parameter_buffer; + ChromaSubsampling chroma_subsampling; + const uint8_t* slice_data_buffer; +} JpegStreamParameters; + +class JpegParser { + public: + JpegParser(); + ~JpegParser(); + bool ParseJpegStream(const uint8_t* jpeg_stream, uint32_t jpeg_stream_size); + const JpegStreamParameters* GetJpegStreamParameters() const {return &jpeg_stream_parameters_;}; + private: + bool ParseSOI(); + bool ParseSOF(); + bool ParseDQT(); + bool ParseSOS(); + bool ParseDHT(); + bool ParseDRI(); + bool ParseEOI(); + ChromaSubsampling GetChromaSubsampling(uint8_t c1_h_sampling_factor, uint8_t c2_h_sampling_factor, uint8_t c3_h_sampling_factor, + uint8_t c1_v_sampling_factor, uint8_t c2_v_sampling_factor, uint8_t c3_v_sampling_factor); + const uint8_t *stream_; + const uint8_t *stream_end_; + uint32_t stream_length_; + JpegStreamParameters jpeg_stream_parameters_; +}; + +#endif // ROC_JPEG_PARSER_H_ \ No newline at end of file diff --git a/projects/rocjpeg/src/rocjpeg_vaapi_decoder.cpp b/projects/rocjpeg/src/rocjpeg_vaapi_decoder.cpp new file mode 100644 index 0000000000..706f419208 --- /dev/null +++ b/projects/rocjpeg/src/rocjpeg_vaapi_decoder.cpp @@ -0,0 +1,301 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "rocjpeg_vaapi_decoder.h" + +RocJpegVappiDecoder::RocJpegVappiDecoder(int device_id) : device_id_{device_id}, drm_fd_{-1}, min_picture_width_{64}, min_picture_height_{64}, + max_picture_width_{4096}, max_picture_height_{4096}, va_display_{0}, va_config_attrib_{{}}, va_config_id_{0}, va_profile_{VAProfileJPEGBaseline}, + va_context_id_{0}, va_surface_ids_{}, va_picture_parameter_buf_id_{0}, va_quantization_matrix_buf_id_{0}, va_huffmantable_buf_id_{0}, + va_slice_param_buf_id_{0}, va_slice_data_buf_id_{0} {}; + +RocJpegVappiDecoder::~RocJpegVappiDecoder() { + if (drm_fd_ != -1) { + close(drm_fd_); + } + if (va_display_) { + RocJpegStatus rocjpeg_status = DestroyDataBuffers(); + if (rocjpeg_status != ROCJPEG_STATUS_SUCCESS) { + ERR("Error: Failed to destroy VAAPI buffer"); + } + VAStatus va_status; + if (va_surface_ids_.size() > 0) { + va_status = vaDestroySurfaces(va_display_, va_surface_ids_.data(), va_surface_ids_.size()); + if (va_status != VA_STATUS_SUCCESS) { + ERR("ERROR: vaDestroySurfaces failed!"); + } + } + if (va_context_id_) { + va_status = vaDestroyContext(va_display_, va_context_id_); + if (va_status != VA_STATUS_SUCCESS) { + ERR("ERROR: vaDestroyContext failed!"); + } + } + if (va_config_id_) { + va_status = vaDestroyConfig(va_display_, va_config_id_); + if (va_status != VA_STATUS_SUCCESS) { + ERR("ERROR: vaDestroyConfig failed!"); + } + } + va_status = vaTerminate(va_display_); + if (va_status != VA_STATUS_SUCCESS) { + ERR("ERROR: vaTerminate failed!"); + } + + } +} + +RocJpegStatus RocJpegVappiDecoder::InitializeDecoder(std::string gcn_arch_name) { + // There are 8 renderDXXX per physical device on gfx940, gfx941, and gfx942 + int num_render_cards_per_device = ((gcn_arch_name.compare("gfx940") == 0) || + (gcn_arch_name.compare("gfx941") == 0) || + (gcn_arch_name.compare("gfx942") == 0)) ? 8 : 1; + std::string drm_node = "/dev/dri/renderD" + std::to_string(128 + device_id_ * num_render_cards_per_device); + CHECK_ROCJPEG(InitVAAPI(drm_node)); + CHECK_ROCJPEG(CreateDecoderConfig()); + + return ROCJPEG_STATUS_SUCCESS; +} + +RocJpegStatus RocJpegVappiDecoder::InitVAAPI(std::string drm_node) { + drm_fd_ = open(drm_node.c_str(), O_RDWR); + if (drm_fd_ < 0) { + ERR("ERROR: failed to open drm node " + drm_node); + return ROCJPEG_STATUS_NOT_INITIALIZED; + } + va_display_ = vaGetDisplayDRM(drm_fd_); + if (!va_display_) { + ERR("ERROR: failed to create va_display!"); + return ROCJPEG_STATUS_NOT_INITIALIZED; + } + vaSetInfoCallback(va_display_, NULL, NULL); + int major_version = 0, minor_version = 0; + CHECK_VAAPI(vaInitialize(va_display_, &major_version, &minor_version)) + return ROCJPEG_STATUS_SUCCESS; +} + +RocJpegStatus RocJpegVappiDecoder::CreateDecoderConfig() { + int max_num_entrypoints = vaMaxNumEntrypoints(va_display_); + std::vector jpeg_entrypoint_list; + jpeg_entrypoint_list.resize(max_num_entrypoints); + int num_entrypoints = 0; + CHECK_VAAPI(vaQueryConfigEntrypoints(va_display_, va_profile_, jpeg_entrypoint_list.data(), &num_entrypoints)); + bool hw_jpeg_decoder_supported = false; + if (num_entrypoints > 0) { + for (auto entry_point : jpeg_entrypoint_list) { + if (entry_point == VAEntrypointVLD) { + hw_jpeg_decoder_supported = true; + break; + } + } + } else { + return ROCJPEG_STATUS_HW_JPEG_DECODER_NOT_SUPPORTED; + } + + if (hw_jpeg_decoder_supported) { + va_config_attrib_.resize(3); + va_config_attrib_[0].type = VAConfigAttribRTFormat; + va_config_attrib_[1].type = VAConfigAttribMaxPictureWidth; + va_config_attrib_[2].type = VAConfigAttribMaxPictureHeight; + CHECK_VAAPI(vaGetConfigAttributes(va_display_, va_profile_, VAEntrypointVLD, va_config_attrib_.data(), va_config_attrib_.size())); + CHECK_VAAPI(vaCreateConfig(va_display_, va_profile_, VAEntrypointVLD, &va_config_attrib_[0], 1, &va_config_id_)); + if (va_config_attrib_[1].value != VA_ATTRIB_NOT_SUPPORTED) { + max_picture_width_ = va_config_attrib_[1].value; + } + if (va_config_attrib_[2].value != VA_ATTRIB_NOT_SUPPORTED) { + max_picture_height_ = va_config_attrib_[2].value; + } + return ROCJPEG_STATUS_SUCCESS; + } else { + return ROCJPEG_STATUS_HW_JPEG_DECODER_NOT_SUPPORTED; + } +} + +RocJpegStatus RocJpegVappiDecoder::DestroyDataBuffers() { + if (va_picture_parameter_buf_id_) { + CHECK_VAAPI(vaDestroyBuffer(va_display_, va_picture_parameter_buf_id_)); + va_picture_parameter_buf_id_ = 0; + } + if (va_quantization_matrix_buf_id_) { + CHECK_VAAPI(vaDestroyBuffer(va_display_, va_quantization_matrix_buf_id_)); + va_quantization_matrix_buf_id_ = 0; + } + if (va_huffmantable_buf_id_) { + CHECK_VAAPI(vaDestroyBuffer(va_display_, va_huffmantable_buf_id_)); + va_huffmantable_buf_id_ = 0; + } + if (va_slice_param_buf_id_) { + CHECK_VAAPI(vaDestroyBuffer(va_display_, va_slice_param_buf_id_)); + va_slice_param_buf_id_ = 0; + } + if (va_slice_data_buf_id_) { + CHECK_VAAPI(vaDestroyBuffer(va_display_, va_slice_data_buf_id_)); + va_slice_data_buf_id_ = 0; + } + return ROCJPEG_STATUS_SUCCESS; +} + +RocJpegStatus RocJpegVappiDecoder::SubmitDecode(const JpegStreamParameters *jpeg_stream_params, uint32_t &surface_id) { + if (jpeg_stream_params == nullptr) { + return ROCJPEG_STATUS_INVALID_PARAMETER; + } + + if (sizeof(jpeg_stream_params->picture_parameter_buffer) != sizeof(VAPictureParameterBufferJPEGBaseline) || + sizeof(jpeg_stream_params->quantization_matrix_buffer) != sizeof(VAIQMatrixBufferJPEGBaseline) || + sizeof(jpeg_stream_params->huffman_table_buffer) != sizeof(VAHuffmanTableBufferJPEGBaseline) || + sizeof(jpeg_stream_params->slice_parameter_buffer) != sizeof(VASliceParameterBufferJPEGBaseline)) { + return ROCJPEG_STATUS_INVALID_PARAMETER; + } + + if (jpeg_stream_params->picture_parameter_buffer.picture_width < min_picture_width_ || + jpeg_stream_params->picture_parameter_buffer.picture_height < min_picture_height_ || + jpeg_stream_params->picture_parameter_buffer.picture_width > max_picture_width_ || + jpeg_stream_params->picture_parameter_buffer.picture_height > max_picture_height_) { + ERR("The JPEG image resolution is not supported!"); + return ROCJPEG_STATUS_JPEG_NOT_SUPPORTED; + } + + uint8_t surface_format; + switch (jpeg_stream_params->chroma_subsampling) { + case CSS_444: + surface_format = VA_RT_FORMAT_YUV444; + break; + case CSS_422: + surface_format = VA_RT_FORMAT_YUV422; + break; + case CSS_420: + surface_format = VA_RT_FORMAT_YUV420; + break; + case CSS_400: + surface_format = VA_RT_FORMAT_YUV400; + break; + default: + ERR("ERROR: The chroma subsampling is not supported by the VCN hardware!"); + return ROCJPEG_STATUS_JPEG_NOT_SUPPORTED; + break; + } + + VASurfaceID va_surface_id; + CHECK_VAAPI(vaCreateSurfaces(va_display_, surface_format, jpeg_stream_params->picture_parameter_buffer.picture_width, jpeg_stream_params->picture_parameter_buffer.picture_height, &va_surface_id, 1, nullptr, 1)); + va_surface_ids_.push_back(va_surface_id); + surface_id = va_surface_id; + + if (va_context_id_) { + vaDestroyContext(va_display_, va_context_id_); + va_context_id_ = 0; + } + CHECK_VAAPI(vaCreateContext(va_display_, va_config_id_, jpeg_stream_params->picture_parameter_buffer.picture_width, jpeg_stream_params->picture_parameter_buffer.picture_height, VA_PROGRESSIVE, &va_surface_id, 1, &va_context_id_)); + + CHECK_ROCJPEG(DestroyDataBuffers()); + + CHECK_VAAPI(vaCreateBuffer(va_display_, va_context_id_, VAPictureParameterBufferType, sizeof(VAPictureParameterBufferJPEGBaseline), 1, (void *)&jpeg_stream_params->picture_parameter_buffer, &va_picture_parameter_buf_id_)); + CHECK_VAAPI(vaCreateBuffer(va_display_, va_context_id_, VAIQMatrixBufferType, sizeof(VAIQMatrixBufferJPEGBaseline), 1, (void *)&jpeg_stream_params->quantization_matrix_buffer, &va_quantization_matrix_buf_id_)); + CHECK_VAAPI(vaCreateBuffer(va_display_, va_context_id_, VAHuffmanTableBufferType, sizeof(VAHuffmanTableBufferJPEGBaseline), 1, (void *)&jpeg_stream_params->huffman_table_buffer, &va_huffmantable_buf_id_)); + CHECK_VAAPI(vaCreateBuffer(va_display_, va_context_id_, VASliceParameterBufferType, sizeof(VASliceParameterBufferJPEGBaseline), 1, (void *)&jpeg_stream_params->slice_parameter_buffer, &va_slice_param_buf_id_)); + CHECK_VAAPI(vaCreateBuffer(va_display_, va_context_id_, VASliceDataBufferType, jpeg_stream_params->slice_parameter_buffer.slice_data_size, 1, (void *)jpeg_stream_params->slice_data_buffer, &va_slice_data_buf_id_)); + + CHECK_VAAPI(vaBeginPicture(va_display_, va_context_id_, va_surface_id)); + CHECK_VAAPI(vaRenderPicture(va_display_, va_context_id_, &va_picture_parameter_buf_id_, 1)); + CHECK_VAAPI(vaRenderPicture(va_display_, va_context_id_, &va_quantization_matrix_buf_id_, 1)); + CHECK_VAAPI(vaRenderPicture(va_display_, va_context_id_, &va_huffmantable_buf_id_, 1)); + CHECK_VAAPI(vaRenderPicture(va_display_, va_context_id_, &va_slice_param_buf_id_, 1)); + CHECK_VAAPI(vaRenderPicture(va_display_, va_context_id_, &va_slice_data_buf_id_, 1)); + CHECK_VAAPI(vaEndPicture(va_display_, va_context_id_)); + + return ROCJPEG_STATUS_SUCCESS; +} + +RocJpegStatus RocJpegVappiDecoder::ExportSurface(VASurfaceID surface_id, VADRMPRIMESurfaceDescriptor &va_drm_prime_surface_desc) { + + bool is_surface_id_found = false; + int idx = 0; + for (idx = 0; idx < va_surface_ids_.size(); idx++) { + if (va_surface_ids_[idx] == surface_id) { + is_surface_id_found = true; + break; + } + } + if (!is_surface_id_found) { + return ROCJPEG_STATUS_INVALID_PARAMETER; + } + CHECK_VAAPI(vaExportSurfaceHandle(va_display_, surface_id, + VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2, + VA_EXPORT_SURFACE_READ_ONLY | + VA_EXPORT_SURFACE_SEPARATE_LAYERS, + &va_drm_prime_surface_desc)); + + return ROCJPEG_STATUS_SUCCESS; +} + +RocJpegStatus RocJpegVappiDecoder::SyncSurface(VASurfaceID surface_id) { + VASurfaceStatus surface_status; + bool is_surface_id_found = false; + int idx = 0; + + for (idx = 0; idx < va_surface_ids_.size(); idx++) { + if (va_surface_ids_[idx] == surface_id) { + is_surface_id_found = true; + break; + } + } + + if (!is_surface_id_found) { + return ROCJPEG_STATUS_INVALID_PARAMETER; + } + + CHECK_VAAPI(vaQuerySurfaceStatus(va_display_, surface_id, &surface_status)); + while (surface_status != VASurfaceReady) { + VAStatus va_status = vaSyncSurface(va_display_, surface_id); + if (va_status != VA_STATUS_SUCCESS) { + if (va_status == 0x26 /*VA_STATUS_ERROR_TIMEDOUT*/) { + CHECK_VAAPI(vaQuerySurfaceStatus(va_display_, surface_id, &surface_status)); + } else { + std::cout << "vaSyncSurface() failed with error code: 0x" << std::hex << va_status << + std::dec << "', status: " << vaErrorStr(va_status) << "' at " << __FILE__ << ":" << __LINE__ << std::endl; + return ROCJPEG_STATUS_RUNTIME_ERROR; + } + } else { + break; + } + } + return ROCJPEG_STATUS_SUCCESS; +} + +RocJpegStatus RocJpegVappiDecoder::ReleaseSurface(VASurfaceID surface_id) { + bool is_surface_id_found = false; + int idx = 0; + + for (idx = 0; idx < va_surface_ids_.size(); idx++) { + if (va_surface_ids_[idx] == surface_id) { + is_surface_id_found = true; + break; + } + } + + if (!is_surface_id_found) { + return ROCJPEG_STATUS_INVALID_PARAMETER; + } + + CHECK_VAAPI(vaDestroySurfaces(va_display_, &va_surface_ids_[idx], 1)); + va_surface_ids_.erase(va_surface_ids_.begin() + idx); + + return ROCJPEG_STATUS_SUCCESS; +} \ No newline at end of file diff --git a/projects/rocjpeg/src/rocjpeg_vaapi_decoder.h b/projects/rocjpeg/src/rocjpeg_vaapi_decoder.h new file mode 100644 index 0000000000..b622f0a6a8 --- /dev/null +++ b/projects/rocjpeg/src/rocjpeg_vaapi_decoder.h @@ -0,0 +1,75 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef ROC_JPEG_VAAPI_DECODER_H_ +#define ROC_JPEG_VAAPI_DECODER_H_ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include "rocjpeg_commons.h" +#include "rocjpeg_parser.h" +#include "../api/rocjpeg.h" + +/*Note: va.h doesn't have VA_FOURCC_YUYV defined but vaExportSurfaceHandle returns 0x56595559 for packed YUYV for YUV 4:2:2*/ +#define ROCJPEG_FOURCC_YUYV 0x56595559 + +class RocJpegVappiDecoder { +public: + RocJpegVappiDecoder(int device_id = 0); + ~RocJpegVappiDecoder(); + RocJpegStatus InitializeDecoder(std::string gcn_arch_name); + RocJpegStatus SubmitDecode(const JpegStreamParameters *jpeg_stream_params, uint32_t &surface_id); + RocJpegStatus ExportSurface(VASurfaceID surface_id, VADRMPRIMESurfaceDescriptor &va_drm_prime_surface_desc); + RocJpegStatus SyncSurface(VASurfaceID surface_id); + RocJpegStatus ReleaseSurface(VASurfaceID surface_id); +private: + int device_id_; + int drm_fd_; + uint32_t min_picture_width_; + uint32_t min_picture_height_; + uint32_t max_picture_width_; + uint32_t max_picture_height_; + VADisplay va_display_; + std::vector va_config_attrib_; + VAConfigID va_config_id_; + VAProfile va_profile_; + VAContextID va_context_id_; + std::vector va_surface_ids_; + VABufferID va_picture_parameter_buf_id_; + VABufferID va_quantization_matrix_buf_id_; + VABufferID va_huffmantable_buf_id_; + VABufferID va_slice_param_buf_id_; + VABufferID va_slice_data_buf_id_; + RocJpegStatus InitVAAPI(std::string drm_node); + RocJpegStatus CreateDecoderConfig(); + RocJpegStatus DestroyDataBuffers(); +}; + +#endif // ROC_JPEG_VAAPI_DECODER_H_ \ No newline at end of file diff --git a/projects/rocjpeg/test/CMakeLists.txt b/projects/rocjpeg/test/CMakeLists.txt new file mode 100644 index 0000000000..b323826c78 --- /dev/null +++ b/projects/rocjpeg/test/CMakeLists.txt @@ -0,0 +1,99 @@ +# ############################################################################## +# Copyright (c) 2024 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +# ############################################################################## +cmake_minimum_required(VERSION 3.5) + +project(ROCJPEG-test) + +# make test with CTest +enable_testing() +include(CTest) + +# ROCM Path +if(DEFINED ENV{ROCM_PATH}) + set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "${White}${PROJECT_NAME}: Default ROCm installation path${ColourReset}") +elseif(ROCM_PATH) + message("-- ${White}${PROJECT_NAME} :ROCM_PATH Set -- ${ROCM_PATH}${ColourReset}") +else() + set(ROCM_PATH /opt/rocm CACHE PATH "${White}${PROJECT_NAME}: Default ROCm installation path${ColourReset}") +endif() + +# find rocJPEG +find_library(ROCJPEG_LIBRARY NAMES ROCJPEG HINTS ${ROCM_PATH}/lib) +find_path(ROCJPEG_INCLUDE_DIR NAMES rocjpeg.h PATHS /opt/rocm/include/rocjpeg ${ROCM_PATH}/include/rocjpeg) + +if(ROCJPEG_LIBRARY AND ROCJPEG_INCLUDE_DIR) + set(ROCJPEG_FOUND TRUE) + message("-- ${White}Using ROCJPEG -- \n\tLibraries:${ROCJPEG_LIBRARY} \n\tIncludes:${ROCJPEG_INCLUDE_DIR}${ColourReset}") +endif() + +if(NOT ROCJPEG_FOUND) + message("-- ${Yellow}${PROJECT_NAME} requires rocJPEG. Install rocJPEG before running CTests") +endif() + +add_test( + NAME + jpeg-decode-fmt-unchanged + COMMAND + "${CMAKE_CTEST_COMMAND}" + --build-and-test "${ROCM_PATH}/share/rocjpeg/samples/jpegDecode" + "${CMAKE_CURRENT_BINARY_DIR}/jpegDecode" + --build-generator "${CMAKE_GENERATOR}" + --test-command "jpegdecode" + -i ${ROCM_PATH}/share/rocjpeg/images/ +) + +add_test( + NAME + jpeg-decode-fmt-yuv + COMMAND + "${CMAKE_CTEST_COMMAND}" + --build-and-test "${ROCM_PATH}/share/rocjpeg/samples/jpegDecode" + "${CMAKE_CURRENT_BINARY_DIR}/jpegDecode" + --build-generator "${CMAKE_GENERATOR}" + --test-command "jpegdecode" + -i ${ROCM_PATH}/share/rocjpeg/images/ -fmt yuv +) + +add_test( + NAME + jpeg-decode-fmt-y + COMMAND + "${CMAKE_CTEST_COMMAND}" + --build-and-test "${ROCM_PATH}/share/rocjpeg/samples/jpegDecode" + "${CMAKE_CURRENT_BINARY_DIR}/jpegDecode" + --build-generator "${CMAKE_GENERATOR}" + --test-command "jpegdecode" + -i ${ROCM_PATH}/share/rocjpeg/images/ -fmt y +) + +add_test( + NAME + jpeg-decode-fmt-rgbi + COMMAND + "${CMAKE_CTEST_COMMAND}" + --build-and-test "${ROCM_PATH}/share/rocjpeg/samples/jpegDecode" + "${CMAKE_CURRENT_BINARY_DIR}/jpegDecode" + --build-generator "${CMAKE_GENERATOR}" + --test-command "jpegdecode" + -i ${ROCM_PATH}/share/rocjpeg/images/ -fmt rgbi +) \ No newline at end of file