Initial commit of the rocJPEG library (#1)

* rocJPEG initial commit

* Add gitignore file

* clean up

* code clean up

* CMakeLists update

* Update README

* Update readme and setup script

* add docs folder

* update rocjepg headers

* update the rocjepg header

* update the copyright year

* Add nullptr check for all APIs

* update the hip kernels execution

* follow google c++ style guide

* code clean up

* use google c++ style guide for hip kernels

* add support for rocJpegGetErrorName API

* Add ParseCommandLine function

* update the rocjpeg sample

* Update the readme, LIBVA instructions

* udate the rocJPEG script

* Update CMAKE and libdrm

* Add support for ROCJPEG_OUTPUT_UNCHANGED and SaveImage function

* code  clean up

* update the drm dependencies

* remove the unused hipstream

* code clean up

* add -fmt option for selection the output format

* update the GetImageInfo API

* update the sample

* update the save image function based onn the surface and putput format

* add new sample images

* Put hip interop mem into separate functions and some code clean up

* Add support for CopyLuma and CopyChroma

* Add support for the yuv output format for NV12 surface

* Update HIP kernels

* add support for rgbi format conversion for NV12 and YUv444

* Update the status

* Add support for converting YUYV to RGB

* Add support for unpacking YUYV format

* Fix unpacking Y fron YUYV hip kernel

* Add suppoort for extractign packed YUYV to YUV planar

* hip code clean up

* code clean up

* Update the headers

* code clean up

* change hip kernel names

* hip kernel clean up

* hip clean up

* code clean up

* code clean up

* code clean up

* code clean up

* code clean up

* code clean up for jpege decoder class

* code clean up

* code clean up

* code clean up

* code clean up

* update the dockers

* code clean up

* code clean up

* hip kernels clean up

* remove unused hip kernels

* add additional test cases

* update the APIs

* add new hip yuv400torgbi kernel

* update yuv400torgbi kernel

* restructure files

* code clean up

* code clean up

* add jenkins

* code clean up

* code clean up

* update readme

* update docker's README

* make changes based on the reviewers comments

* make changes based on the reviewers comments

* return ROCJPEG_STATUS_JPEG_NOT_SUPPORTED if the resolution of the jpeg is not supported for HW decoding

[ROCm/rocjpeg commit: 6ad4a76177]
Этот коммит содержится в:
Aryan Salmanpour
2024-03-25 17:05:31 -04:00
коммит произвёл GitHub
родитель 52b4d4dd5f
Коммит 929a892e8d
33 изменённых файлов: 4673 добавлений и 3 удалений
+5
Просмотреть файл
@@ -0,0 +1,5 @@
build/
samples/*/build
.vscode/
doxygen_output*
_doxygen/
+121
Просмотреть файл
@@ -0,0 +1,121 @@
// This file is for internal AMD use.
// If you are interested in running your own Jenkins, please raise a github issue for assistance.
def runCompileCommand(platform, project, jobName, boolean debug=false, boolean staticLibrary=false) {
project.paths.construct_build_prefix()
String buildTypeArg = debug ? '-DCMAKE_BUILD_TYPE=Debug' : '-DCMAKE_BUILD_TYPE=Release'
String buildTypeDir = debug ? 'debug' : 'release'
def command = """#!/usr/bin/env bash
set -ex
echo Build rocJPEG - ${buildTypeDir}
cd ${project.paths.project_build_prefix}
mkdir -p build/${buildTypeDir} && cd build/${buildTypeDir}
cmake ${buildTypeArg} ../..
make -j\$(nproc)
sudo make install
sudo make package
ldd -v /opt/rocm/lib/librocjpeg.so.so
"""
platform.runCommand(this, command)
}
def runTestCommand (platform, project) {
String libLocation = ''
if (platform.jenkinsLabel.contains('rhel')) {
libLocation = ':/usr/local/lib'
}
else if (platform.jenkinsLabel.contains('sles')) {
libLocation = ':/usr/local/lib'
}
def command = """#!/usr/bin/env bash
set -ex
export HOME=/home/jenkins
echo make test
cd ${project.paths.project_build_prefix}/build/release
LD_LIBRARY_PATH=\$LD_LIBRARY_PATH:/opt/rocm/lib${libLocation} make test ARGS="-VV --rerun-failed --output-on-failure"
echo rocjpeg-sample - jpegDecode
mkdir -p rocjpeg-sample && cd rocjpeg-sample
cmake /opt/rocm/share/rocjpeg/samples/jpegDecode/
make -j8
LD_LIBRARY_PATH=\$LD_LIBRARY_PATH:/opt/rocm/lib${libLocation} ./jpegdecode -i /opt/rocm/share/rocjpeg/images/
echo rocjpeg-test package verification
cd ../ && mkdir -p rocjpeg-test && cd rocjpeg-test
cmake /opt/rocm/share/rocjpeg/test/
LD_LIBRARY_PATH=\$LD_LIBRARY_PATH:/opt/rocm/lib${libLocation} ctest -VV --rerun-failed --output-on-failure
echo rocjpeg conformance tests
"""
platform.runCommand(this, command)
// Unit tests - TBD
}
def runPackageCommand(platform, project) {
def packageHelper = platform.makePackage(platform.jenkinsLabel, "${project.paths.project_build_prefix}/build/release")
String packageType = ''
String packageInfo = ''
String packageDetail = ''
String osType = ''
String packageRunTime = ''
if (platform.jenkinsLabel.contains('centos') || platform.jenkinsLabel.contains('rhel') || platform.jenkinsLabel.contains('sles')) {
packageType = 'rpm'
packageInfo = 'rpm -qlp'
packageDetail = 'rpm -qi'
packageRunTime = 'rocjpeg-*'
if (platform.jenkinsLabel.contains('sles')) {
osType = 'sles'
}
else if (platform.jenkinsLabel.contains('rhel8')) {
osType = 'rhel8'
}
else if (platform.jenkinsLabel.contains('rhel9')) {
osType = 'rhel9'
}
}
else
{
packageType = 'deb'
packageInfo = 'dpkg -c'
packageDetail = 'dpkg -I'
packageRunTime = 'rocjpeg_*'
if (platform.jenkinsLabel.contains('ubuntu20')) {
osType = 'ubuntu20'
}
else if (platform.jenkinsLabel.contains('ubuntu22')) {
osType = 'ubuntu22'
}
}
def command = """#!/usr/bin/env bash
set -ex
export HOME=/home/jenkins
echo Make rocJPEG Package
cd ${project.paths.project_build_prefix}/build/release
sudo make package
mkdir -p package
mv rocjpeg-dev*.${packageType} package/${osType}-rocjpeg-dev.${packageType}
mv rocjpeg-test*.${packageType} package/${osType}-rocjpeg-test.${packageType}
mv ${packageRunTime}.${packageType} package/${osType}-rocjpeg.${packageType}
${packageDetail} package/${osType}-rocjpeg-dev.${packageType}
${packageDetail} package/${osType}-rocjpeg-test.${packageType}
${packageDetail} package/${osType}-rocjpeg.${packageType}
${packageInfo} package/${osType}-rocjpeg-dev.${packageType}
${packageInfo} package/${osType}-rocjpeg-test.${packageType}
${packageInfo} package/${osType}-rocjpeg.${packageType}
"""
platform.runCommand(this, command)
platform.archiveArtifacts(this, packageHelper[1])
}
return this
+78
Просмотреть файл
@@ -0,0 +1,78 @@
#!/usr/bin/env groovy
@Library('rocJenkins@pong') _
import com.amd.project.*
import com.amd.docker.*
def runCI =
{
nodeDetails, jobName->
def prj = new rocProject('rocJPEG', 'PreCheckin')
def nodes = new dockerNodes(nodeDetails, jobName, prj)
def commonGroovy
boolean formatCheck = false
def compileCommand =
{
platform, project->
commonGroovy = load "${project.paths.project_src_prefix}/.jenkins/common.groovy"
commonGroovy.runCompileCommand(platform, project, jobName)
}
def testCommand =
{
platform, project->
commonGroovy.runTestCommand(platform, project)
}
def packageCommand =
{
platform, project->
commonGroovy.runPackageCommand(platform, project)
}
buildProject(prj, formatCheck, nodes.dockerArray, compileCommand, testCommand, packageCommand)
}
ci: {
String urlJobName = auxiliary.getTopJobName(env.BUILD_URL)
def propertyList = ["compute-rocm-dkms-no-npi-hipclang":[pipelineTriggers([cron('0 1 * * 0')])]]
propertyList = auxiliary.appendPropertyList(propertyList)
def jobNameList = ["compute-rocm-dkms-no-npi-hipclang":([ubuntu20:['gfx90a'], ubuntu22:['gfx1101'], sles15sp1:['gfx908'], rhel8:['gfx1030'], rhel9:['gfx942']])]
jobNameList = auxiliary.appendJobNameList(jobNameList)
propertyList.each
{
jobName, property->
if (urlJobName == jobName) {
properties(auxiliary.addCommonProperties(property))
}
}
jobNameList.each
{
jobName, nodeDetails->
if (urlJobName == jobName) {
stage(jobName) {
runCI(nodeDetails, jobName)
}
}
}
// For url job names that are not listed by the jobNameList i.e. compute-rocm-dkms-no-npi-1901
if(!jobNameList.keySet().contains(urlJobName)) {
properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * *')])]))
stage(urlJobName) {
runCI([ubuntu22:['gfx942']], urlJobName)
}
}
}
+319
Просмотреть файл
@@ -0,0 +1,319 @@
# ##############################################################################
# Copyright (c) 2024 Advanced Micro Devices, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#
# ##############################################################################
cmake_minimum_required (VERSION 3.5)
set(VERSION "0.1.0")
set(CMAKE_CXX_STANDARD 17)
# Set Project Version and Language
project(rocjpeg VERSION ${VERSION} LANGUAGES CXX)
set(CMAKE_INSTALL_LIBDIR "lib" CACHE STRING "Library install directory")
set(CMAKE_INSTALL_INCLUDEDIR "include" CACHE STRING "Include install directory")
include(GNUInstallDirs)
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_LIBDIR})
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_LIBDIR})
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_BINDIR})
find_program(MAKE_NSIS_EXE makensis)
find_program(RPMBUILD_EXE rpmbuild)
find_program(DPKG_EXE dpkg)
# ROCM Path
if(DEFINED ENV{ROCM_PATH})
set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Default ROCm installation path")
elseif(ROCM_PATH)
message("-- INFO:ROCM_PATH Set -- ${ROCM_PATH}")
else()
set(ROCM_PATH /opt/rocm CACHE PATH "Default ROCm installation path")
endif()
# avoid setting the default installation path to /usr/local
if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
set(CMAKE_INSTALL_PREFIX ${ROCM_PATH} CACHE PATH "rocJPEG default installation path" FORCE)
endif(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
set(DEFAULT_BUILD_TYPE "Release")
set(ENHANCED_MESSAGE ON)
if(ENHANCED_MESSAGE)
string(ASCII 27 Esc)
set(ColourReset "${Esc}[m")
set(Red "${Esc}[31m")
set(Green "${Esc}[32m")
set(Yellow "${Esc}[33m")
set(Blue "${Esc}[34m")
set(BoldBlue "${Esc}[1;34m")
set(Magenta "${Esc}[35m")
set(Cyan "${Esc}[36m")
set(White "${Esc}[37m")
endif()
message("-- ${BoldBlue}rocJPEG Version -- ${VERSION}${ColourReset}")
message("-- ${BoldBlue}rocJPEG Install Path -- ${CMAKE_INSTALL_PREFIX}${ColourReset}")
list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH} ${ROCM_PATH}/hip)
set(CMAKE_CXX_COMPILER ${ROCM_PATH}/llvm/bin/clang++)
# rocJPEG Default Options
option(BUILD_WITH_AMD_ADVANCE "Build rocJPEG for advanced AMD GPU Architecture" OFF)
# rocJPEG Build Type
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "${DEFAULT_BUILD_TYPE}" CACHE STRING "rocJPEG Default Build Type" FORCE)
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release")
endif()
if(CMAKE_BUILD_TYPE MATCHES Debug)
# -O0 -- Don't Optimize output file
# -g -- generate debugging information, generate debugging information, dwarf-4 for making valgrind work
# -Og -- Optimize for debugging experience rather than speed or size
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -gdwarf-4 -Og")
else()
# -O3 -- Optimize output file
# -DNDEBUG -- turn off asserts
# -fPIC -- Generate position-independent code if possible
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -DNDEBUG -fPIC")
endif()
message("-- ${BoldBlue}rocJPEG Build Type -- ${CMAKE_BUILD_TYPE}${ColourReset}")
set(DEFAULT_AMDGPU_TARGETS "gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1031;gfx1032;gfx1100;gfx1101;gfx1102")
set(AMDGPU_TARGETS "${DEFAULT_AMDGPU_TARGETS}" CACHE STRING "List of specific machine types for library to target")
find_package(HIP QUIET)
find_package(Libva QUIET)
if(HIP_FOUND AND Libva_FOUND)
# HIP
set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} hip::device)
# LibVA
include_directories(${LIBVA_INCLUDE_DIR})
set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} ${LIBVA_LIBRARY})
set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} ${LIBVA_DRM_LIBRARY})
# local include files
include_directories(api src)
# source files
file(GLOB_RECURSE SOURCES "./src/*.cpp")
# rocJPEG.so
add_library(${PROJECT_NAME} SHARED ${SOURCES})
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=gnu++17")
target_link_libraries(${PROJECT_NAME} ${LINK_LIBRARY_LIST})
set_target_properties(${PROJECT_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
set_target_properties(${PROJECT_NAME} PROPERTIES LINKER_LANGUAGE CXX)
set_target_properties(${PROJECT_NAME} PROPERTIES VERSION ${PROJECT_VERSION} SOVERSION ${PROJECT_VERSION_MAJOR})
# install rocJPEG libs -- {ROCM_PATH}/lib
install(TARGETS ${PROJECT_NAME} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT runtime NAMELINK_SKIP)
install(TARGETS ${PROJECT_NAME} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT dev NAMELINK_ONLY)
install(TARGETS ${PROJECT_NAME} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT asan)
# install rocJPEG include files -- {ROCM_PATH}/include/rocJPEG
install(FILES api/rocjpeg.h
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME} COMPONENT dev)
# install rocJPEG samples -- {ROCM_PATH}/share/rocJPEG
install(DIRECTORY cmake DESTINATION ${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME} COMPONENT dev)
install(DIRECTORY samples/jpegDecode DESTINATION ${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME}/samples COMPONENT dev)
install(DIRECTORY data/images DESTINATION ${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME}/ COMPONENT dev)
# install license information - {ROCM_PATH}/share/doc/rocJPEG
set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE")
install(FILES ${CPACK_RESOURCE_FILE_LICENSE} DESTINATION ${CMAKE_INSTALL_DOCDIR} COMPONENT runtime)
install(FILES ${CPACK_RESOURCE_FILE_LICENSE} DESTINATION ${CMAKE_INSTALL_DOCDIR}-asan COMPONENT asan)
install(FILES ${CPACK_RESOURCE_FILE_LICENSE} DESTINATION ${CMAKE_INSTALL_DOCDIR}-dev COMPONENT dev)
install(FILES ${CPACK_RESOURCE_FILE_LICENSE} DESTINATION ${CMAKE_INSTALL_DOCDIR}-test COMPONENT test)
# install test cmake
install(FILES test/CMakeLists.txt DESTINATION ${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME}/test COMPONENT test)
message("-- ${White}AMD ROCm rocJPEG -- CMAKE_CXX_FLAGS:${CMAKE_CXX_FLAGS}${ColourReset}")
message("-- ${White}AMD ROCm rocJPEG -- Link Libraries: ${LINK_LIBRARY_LIST}${ColourReset}")
# make test with CTest
enable_testing()
include(CTest)
add_subdirectory(samples)
# set package information
set(CPACK_PACKAGE_VERSION_MAJOR ${PROJECT_VERSION_MAJOR})
set(CPACK_PACKAGE_VERSION_MINOR ${PROJECT_VERSION_MINOR})
set(CPACK_PACKAGE_VERSION_PATCH ${PROJECT_VERSION_PATCH})
set(CPACK_PACKAGE_VERSION "${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}")
set(CPACK_PACKAGE_LICENSE "MIT")
set(CPACK_PACKAGE_CONTACT "rocJPEG Support <mivisionx.support@amd.com>")
set(CPACK_PACKAGE_VENDOR "AMD ROCm")
set(CPACK_PACKAGE_GROUP "Development/Tools")
set(CPACK_PACKAGE_HOMEPAGE "https://github.com/ROCm/rocJPEG")
if(ENABLE_ASAN_PACKAGING)
set(CPACK_DEBIAN_PACKAGE_DESCRIPTION "AMD rocJPEG address sanitizer libraries")
set(CPACK_RPM_PACKAGE_SUMMARY "AMD rocJPEG address sanitizer libraries")
else()
set(CPACK_DEBIAN_PACKAGE_DESCRIPTION "AMD rocJPEG is a high performance JPEG decode SDK for AMD GPUs")
set(CPACK_RPM_PACKAGE_SUMMARY "AMD rocJPEG is a high performance JPEG decode SDK for AMD GPUs")
endif()
if(DEFINED ENV{ROCM_LIBPATCH_VERSION})
set(CPACK_PACKAGE_VERSION "${CPACK_PACKAGE_VERSION}.$ENV{ROCM_LIBPATCH_VERSION}")
endif()
# Find Ubuntu 22.04 - Add libstdc++-12-dev package deps for Dev Package
file(READ "/etc/os-release" OS_RELEASE)
string(REGEX MATCH "22.04" UBUNTU_22_FOUND ${OS_RELEASE})
# Set the dependent packages
set(rocJPEG_DEBIAN_PACKAGE_LIST "rocm-hip-runtime, libdrm-amdgpu1, libva-amdgpu-drm2, mesa-amdgpu-va-drivers")
set(rocJPEG_RPM_PACKAGE_LIST "rocm-hip-runtime, libdrm-amdgpu, libva-amdgpu, mesa-amdgpu-dri-drivers")
set(rocJPEG_DEBIAN_DEV_PACKAGE_LIST "rocm-hip-runtime-dev, libva-amdgpu-dev, pkg-config")
if(UBUNTU_22_FOUND)
set(rocJPEG_DEBIAN_DEV_PACKAGE_LIST "${rocJPEG_DEBIAN_DEV_PACKAGE_LIST}, libstdc++-12-dev")
endif()
set(rocJPEG_RPM_DEV_PACKAGE_LIST "rocm-hip-runtime-devel, libva-amdgpu-devel, pkg-config")
# '%{?dist}' breaks manual builds on debian systems due to empty Provides
execute_process(
COMMAND rpm --eval %{?dist}
RESULT_VARIABLE PROC_RESULT
OUTPUT_VARIABLE EVAL_RESULT
OUTPUT_STRIP_TRAILING_WHITESPACE
)
if(PROC_RESULT EQUAL "0" AND NOT EVAL_RESULT STREQUAL "")
string(APPEND CPACK_RPM_PACKAGE_RELEASE "%{?dist}")
endif()
# set package
set(CPACK_PACKAGING_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX})
set(CPACK_DEBIAN_PACKAGE_HOMEPAGE ${CPACK_PACKAGE_HOMEPAGE})
set(CPACK_RPM_PACKAGE_URL ${CPACK_PACKAGE_HOMEPAGE})
set(CPACK_RPM_PACKAGE_AUTOREQPROV "no")
# Debian package
set(CPACK_DEB_COMPONENT_INSTALL ON)
set(CPACK_DEBIAN_RUNTIME_PACKAGE_NAME "${PROJECT_NAME}")
set(CPACK_DEBIAN_RUNTIME_PACKAGE_DEPENDS "rocm-core, ${rocJPEG_DEBIAN_PACKAGE_LIST}")
set(CPACK_DEBIAN_DEV_PACKAGE_NAME "${PROJECT_NAME}-dev")
set(CPACK_DEBIAN_DEV_PACKAGE_DEPENDS
"rocm-core, ${PROJECT_NAME}, ${rocJPEG_DEBIAN_DEV_PACKAGE_LIST}")
# Debian package - specific variable for ASAN
set(CPACK_DEBIAN_ASAN_PACKAGE_NAME "${PROJECT_NAME}-asan" )
set(CPACK_DEBIAN_ASAN_PACKAGE_DEPENDS "rocm-core-asan, ${rocJPEG_DEBIAN_PACKAGE_LIST}" )
# Debian package - Test
set(CPACK_DEBIAN_TEST_PACKAGE_NAME "${PROJECT_NAME}-test" )
set(CPACK_DEBIAN_TEST_PACKAGE_DEPENDS "rocm-core, ${CPACK_DEBIAN_DEV_PACKAGE_NAME}" )
# RPM package
set(CPACK_RPM_COMPONENT_INSTALL ON)
set(CPACK_RPM_RUNTIME_PACKAGE_NAME "${PROJECT_NAME}")
set(CPACK_RPM_RUNTIME_PACKAGE_REQUIRES "rocm-core, ${rocJPEG_RPM_PACKAGE_LIST}")
set(CPACK_RPM_RUNTIME_PACKAGE_PROVIDES "${PROJECT_NAME}")
set(CPACK_RPM_RUNTIME_PACKAGE_OBSOLETES "${PROJECT_NAME}")
set(CPACK_RPM_DEV_PACKAGE_NAME "${PROJECT_NAME}-devel")
set(CPACK_RPM_DEV_PACKAGE_REQUIRES "rocm-core, ${PROJECT_NAME}, ${rocJPEG_RPM_DEV_PACKAGE_LIST}")
set(CPACK_RPM_DEV_PACKAGE_PROVIDES "${PROJECT_NAME}-devel")
set(CPACK_RPM_DEV_PACKAGE_OBSOLETES "${PROJECT_NAME}-devel")
set(CPACK_RPM_PACKAGE_LICENSE "MIT" )
# RPM package specific variable for ASAN
set(CPACK_RPM_ASAN_PACKAGE_NAME "${PROJECT_NAME}-asan" )
set(CPACK_RPM_ASAN_PACKAGE_REQUIRES "rocm-core-asan, ${rocJPEG_RPM_PACKAGE_LIST}" )
set(CPACK_RPM_ASAN_PACKAGE_PROVIDES "${PROJECT_NAME}-asan")
set(CPACK_RPM_ASAN_PACKAGE_OBSOLETES "${PROJECT_NAME}-asan")
# RPM package specific variable for Test
set(CPACK_RPM_TEST_PACKAGE_NAME "${PROJECT_NAME}-test" )
set(CPACK_RPM_TEST_PACKAGE_REQUIRES "rocm-core, ${CPACK_RPM_DEV_PACKAGE_NAME}" )
set(CPACK_RPM_TEST_PACKAGE_PROVIDES "${PROJECT_NAME}-test")
set(CPACK_RPM_TEST_PACKAGE_OBSOLETES "${PROJECT_NAME}-test")
if(NOT ROCM_DEP_ROCMCORE)
string(REGEX REPLACE ",? ?rocm-core," "" CPACK_RPM_RUNTIME_PACKAGE_REQUIRES ${CPACK_RPM_RUNTIME_PACKAGE_REQUIRES})
string(REGEX REPLACE ",? ?rocm-core-asan," "" CPACK_RPM_ASAN_PACKAGE_REQUIRES ${CPACK_RPM_ASAN_PACKAGE_REQUIRES})
string(REGEX REPLACE ",? ?rocm-core," "" CPACK_RPM_DEV_PACKAGE_REQUIRES ${CPACK_RPM_DEV_PACKAGE_REQUIRES})
string(REGEX REPLACE ",? ?rocm-core," "" CPACK_RPM_TEST_PACKAGE_REQUIRES ${CPACK_RPM_TEST_PACKAGE_REQUIRES})
string(REGEX REPLACE ",? ?rocm-core," "" CPACK_DEBIAN_RUNTIME_PACKAGE_DEPENDS ${CPACK_DEBIAN_RUNTIME_PACKAGE_DEPENDS})
string(REGEX REPLACE ",? ?rocm-core-asan," "" CPACK_DEBIAN_ASAN_PACKAGE_DEPENDS ${CPACK_DEBIAN_ASAN_PACKAGE_DEPENDS})
string(REGEX REPLACE ",? ?rocm-core," "" CPACK_DEBIAN_DEV_PACKAGE_DEPENDS ${CPACK_DEBIAN_DEV_PACKAGE_DEPENDS})
string(REGEX REPLACE ",? ?rocm-core," "" CPACK_DEBIAN_TEST_PACKAGE_DEPENDS ${CPACK_DEBIAN_TEST_PACKAGE_DEPENDS})
endif()
if(ENABLE_ASAN_PACKAGING)
# ASAN Package requires asan component with only libraries and license file
set(CPACK_COMPONENTS_ALL asan)
else()
set(CPACK_COMPONENTS_ALL runtime dev test)
endif()
set(CPACK_DEBIAN_FILE_NAME "DEB-DEFAULT")
set(CPACK_RPM_FILE_NAME "RPM-DEFAULT")
set(CPACK_DEBIAN_PACKAGE_RELEASE "local")
set(CPACK_RPM_PACKAGE_RELEASE "local")
if(DEFINED ENV{CPACK_DEBIAN_PACKAGE_RELEASE})
set(CPACK_DEBIAN_PACKAGE_RELEASE $ENV{CPACK_DEBIAN_PACKAGE_RELEASE})
endif()
if(DEFINED ENV{CPACK_RPM_PACKAGE_RELEASE})
set(CPACK_RPM_PACKAGE_RELEASE $ENV{CPACK_RPM_PACKAGE_RELEASE})
endif()
set(CPACK_GENERATOR "TGZ;ZIP")
if(EXISTS ${MAKE_NSIS_EXE})
list(APPEND CPACK_GENERATOR "NSIS")
endif()
if(EXISTS ${RPMBUILD_EXE})
list(APPEND CPACK_GENERATOR "RPM")
message("-- ${White}AMD ROCm rocJPEG RunTime Package -- ${CPACK_RPM_RUNTIME_PACKAGE_REQUIRES}${ColourReset}")
message("-- ${White}AMD ROCm rocJPEG Dev Package -- ${CPACK_RPM_DEV_PACKAGE_REQUIRES}${ColourReset}")
endif()
if(EXISTS ${DPKG_EXE})
list(APPEND CPACK_GENERATOR "DEB")
message("-- ${White}AMD ROCm rocJPEG RunTime Package -- ${CPACK_DEBIAN_RUNTIME_PACKAGE_DEPENDS}${ColourReset}")
message("-- ${White}AMD ROCm rocJPEG Dev Package -- ${CPACK_DEBIAN_DEV_PACKAGE_DEPENDS}${ColourReset}")
endif()
include(CPack)
cpack_add_component(runtime
DISPLAY_NAME "rocJPEG Runtime Package"
DESCRIPTION "AMD rocJPEG is a high performance JPEG decode SDK for AMD GPUs. \
rocJPEG runtime package provides rocJPEG library and license.txt")
cpack_add_component(dev
DISPLAY_NAME "rocJPEG Develop Package"
DESCRIPTION "AMD rocJPEG is a high performance JPEG decode SDK for AMD GPUs. \
rocJPEG develop package provides rocJPEG library, header files, samples, and license.txt")
cpack_add_component(asan
DISPLAY_NAME "rocJPEG ASAN Package"
DESCRIPTION "AMD rocJPEG is a high performance JPEG decode SDK for AMD GPUs. \
rocJPEG ASAN package provides rocJPEG ASAN libraries")
cpack_add_component(test
DISPLAY_NAME "rocJPEG Test Package"
DESCRIPTION "AMD rocJPEG is a high performance JPEG decode SDK for AMD GPUs. \
rocJPEG Test package provides rocJPEG Test Components")
else()
message("-- ${Red}AMD ROCm rocJPEG -- unmet dependencies${ColourReset}")
if(NOT HIP_FOUND)
message(FATAL_ERROR "-- ERROR!: HIP Not Found! - please install rocm-hip-runtime-dev!")
endif()
if(NOT Libva_FOUND)
message(FATAL_ERROR "-- ERROR!: libva Not Found - please install libva-amdgpu-dev(DBEAIN)/libva-amdgpu-devel(RPM) package!")
endif()
endif()
+1 -1
Просмотреть файл
@@ -1,6 +1,6 @@
MIT License
Copyright (c) 2023 ROCm Software Platform
Copyright (c) 2024 ROCm Software Platform
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
+202 -2
Просмотреть файл
@@ -1,2 +1,202 @@
# rocJPEG
rocJPEG is a high-performance jpeg decode SDK for decoding images using a hardware-accelerated jpeg decoder on AMDs GPUs.
[![MIT licensed](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT)
<p align="center"><img width="70%" src="docs/data/AMD_rocJPEG_Logo.png" /></p>
rocJPEG is a high performance JPEG decode SDK for AMD GPUs. Using the rocJPEG API, you can access the JPEG decoding features available on your GPU.
## Supported JPEG chroma subsampling
* YUV 4:4:4
* YUV 4:2:2
* YUV 4:2:0
* YUV 4:0:0
## Prerequisites
* Linux distribution
* Ubuntu - `20.04` / `22.04`
* RHEL - `8` / `9`
* SLES - `15-SP5`
* [ROCm supported hardware](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html)
> [!IMPORTANT]
> `gfx908` or higher GPU required
* Install ROCm `6.1.0` or later with [amdgpu-install](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/amdgpu-install.html): Required usecase - rocm
> [!IMPORTANT]
> `sudo amdgpu-install --usecase=rocm`
* AMD Multimedia packages
```shell
sudo apt install libva-amdgpu-dev libdrm-amdgpu1 mesa-amdgpu-va-drivers
```
> [!NOTE]
> RPM Packages for `RHEL`/`SLES` - `libva-amdgpu-devel libdrm-amdgpu mesa-amdgpu-dri-drivers`
* CMake `3.5` or later
```shell
sudo apt install cmake
```
* [pkg-config](https://en.wikipedia.org/wiki/Pkg-config)
```shell
sudo apt install pkg-config
```
> [!IMPORTANT]
> * If using Ubuntu 22.04, you must install `libstdc++-12-dev`
>
> ```shell
> sudo apt install libstdc++-12-dev
> ```
>[!NOTE]
> * All package installs are shown with the `apt` package manager. Use the appropriate package manager for your operating system.
> * To install rocDecode with minimum requirements, follow the [quick-start](./docs/install/quick-start.rst) instructions
#### Prerequisites setup script for Linux
For your convenience, we provide the setup script,
[rocJPEG-setup.py](rocJPEG-setup.py) which installs all required dependencies. Run this script only once.
**Usage:**
```shell
python rocJPEG-setup.py --rocm_path [ ROCm Installation Path - optional (default:/opt/rocm)]
--developer [ Setup Developer Options - optional (default:ON) [options:ON/OFF]]
```
**NOTE:** This script only needs to be executed once.
## Installation instructions
The installation process uses the following steps:
* [ROCm-supported hardware](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html) install verification
* Install ROCm `6.1.0` or later with [amdgpu-install](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/amdgpu-install.html) with `--usecase=rocm`
* Use either [Package install](#package-install) or [Source install](#source-install) as described below.
### Package install
Install rocJPEG runtime, development, and test packages.
* Runtime package - `rocjpeg` only provides the rocjpeg library `librocjpeg.so`
* Development package - `rocjpeg-dev`/`rocjpeg-devel` provides the library, header files, and samples
* Test package - `rocjpeg-test` provides CTest to verify installation
#### Ubuntu
```shell
sudo apt install rocjpeg rocjpeg-dev rocjpeg-test
```
#### RHEL
```shell
sudo yum install rocjpeg rocjpeg-devel rocjpeg-test
```
#### SLES
```shell
sudo zypper install rocjpeg rocjpeg-devel rocjpeg-test
```
>[!NOTE]
> Package install auto installs all dependencies.
### Source install
```shell
git clone https://github.com/ROCm/rocJPEG.git
cd rocJPEG
mkdir build && cd build
cmake ../
make -j8
sudo make install
```
#### Run tests
```shell
make test
```
**NOTE:** run tests with verbose option `make test ARGS="-VV"`
#### Make package
```shell
sudo make package
```
## Verify installation
The installer will copy
* Libraries into `/opt/rocm/lib`
* Header files into `/opt/rocm/include/rocjpeg`
* Samples folder into `/opt/rocm/share/rocjpeg`
* Documents folder into `/opt/rocm/share/doc/rocjpeg`
### Using sample application
To verify your installation using a sample application, run:
```shell
mkdir rocjpeg-sample && cd rocjpeg-sample
cmake /opt/rocm/share/rocjpeg/samples/jpegDecode/
make -j8
./videodecode -i /opt/rocm/share/rocjpeg/image/mug_420.jpg
```
### Using test package
To verify your installation using the `rocjpeg-test` package, run:
```shell
mkdir rocjpeg-test && cd rocjpeg-test
cmake /opt/rocm/share/rocjpeg/test/
ctest -VV
```
## Samples
The tool provides a few samples to decode JPEG images [here](samples/). Please refer to the individual folders to build and run the samples.
You can access samples to decode your images in our
[GitHub repository](https://github.com/ROCm/rocJPEG/tree/develop/samples). Refer to the
individual folders to build and run the samples.
## Docker
You can find rocDecode Docker containers in our
[GitHub repository](https://github.com/ROCm/rocJPEG/tree/develop/docker).
## Documentation
Run the following code to build our documentation locally.
```shell
cd docs
pip3 install -r sphinx/requirements.txt
python3 -m sphinx -T -E -b html -d _build/doctrees -D language=en . _build/html
```
For more information on documentation builds, refer to the
[Building documentation](https://rocm.docs.amd.com/en/latest/contribute/building.html)
page.
## Tested configurations
* Linux
* Ubuntu - `20.04` / `22.04`
* RHEL - `8` / `9`
* SLES - `15-SP5`
* ROCm:
* rocm-core - `6.1.0.60100-62`
* amdgpu-core - `1:6.1.60100-1741643`
* rocJPEG Setup Script - `V1.0`
+233
Просмотреть файл
@@ -0,0 +1,233 @@
/* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef ROC_JPEG_H
#define ROC_JPEG_H
#define ROCJPEGAPI
#pragma once
#include "hip/hip_runtime.h"
/*****************************************************************************************************************/
//! \file rocjpeg.h
//! \brief The AMD rocJPEG Library.
//! \defgroup group_amd_rocjepg rocJPEG: AMD ROCm JPEG Decode API
//! \brief rocJPEG API is a toolkit to decode JPEG images using a hardware-accelerated JPEG decoder on AMDs GPUs.
/******************************************************************************************************************/
#if defined(__cplusplus)
extern "C" {
#endif // __cplusplus
//! \def
//! \ingroup group_amd_rocjpeg
//! Maximum number of channels rocJPEG supports
#define ROCJPEG_MAX_COMPONENT 4
/*****************************************************/
//! \enum RocJpegStatus
//! \ingroup group_amd_rocjpeg
//! rocJPEG return status enums
//! These enums are used in all API calls to rocJPEG
/*****************************************************/
typedef enum {
ROCJPEG_STATUS_SUCCESS = 0,
ROCJPEG_STATUS_NOT_INITIALIZED = -1,
ROCJPEG_STATUS_INVALID_PARAMETER = -2,
ROCJPEG_STATUS_BAD_JPEG = -3,
ROCJPEG_STATUS_JPEG_NOT_SUPPORTED = -4,
ROCJPEG_STATUS_OUTOF_MEMORY = -5,
ROCJPEG_STATUS_EXECUTION_FAILED = -6,
ROCJPEG_STATUS_ARCH_MISMATCH = -7,
ROCJPEG_STATUS_INTERNAL_ERROR = -8,
ROCJPEG_STATUS_IMPLEMENTATION_NOT_SUPPORTED = -9,
ROCJPEG_STATUS_HW_JPEG_DECODER_NOT_SUPPORTED = -10,
ROCJPEG_STATUS_RUNTIME_ERROR = -11,
ROCJPEG_STATUS_NOT_IMPLEMENTED = -12,
} RocJpegStatus;
/*****************************************************/
//! \enum RocJpegChromaSubsampling
//! \ingroup group_amd_rocjpeg
//! RocJpegChromaSubsampling enum identifies image chroma subsampling values stored inside JPEG input stream
/*****************************************************/
typedef enum {
ROCJPEG_CSS_444 = 0,
ROCJPEG_CSS_440 = 1,
ROCJPEG_CSS_422 = 2,
ROCJPEG_CSS_420 = 3,
ROCJPEG_CSS_411 = 4,
ROCJPEG_CSS_400 = 5,
ROCJPEG_CSS_UNKNOWN = -1
} RocJpegChromaSubsampling;
/*****************************************************/
//! \struct RocJpegImage
//! \ingroup group_amd_rocjpeg
//! this structure is jpeg image descriptor used to return the decoded output image. User must allocate device
//! memories for each channel for this structure and pass it to the decoder API.
//! the decoder APIs then copies the decode image to this struct based on the requested output format (see RocJpegOutputFormat).
/*****************************************************/
typedef struct {
uint8_t* channel[ROCJPEG_MAX_COMPONENT];
uint32_t pitch[ROCJPEG_MAX_COMPONENT]; // pitch of each channel
} RocJpegImage;
/*****************************************************/
//! \enum RocJpegOutputFormat
//! \ingroup group_amd_rocjpeg
//! RocJpegOutputFormat enum specifies what type of output user wants for image decoding
/*****************************************************/
typedef enum {
// return native unchanged decoded YUV image from the VCN JPEG deocder.
// For ROCJPEG_CSS_444 write Y, U, and V to first, second, and third channels of RocJpegImage
// For ROCJPEG_CSS_422 write YUYV (packed) to first channel of RocJpegImage
// For ROCJPEG_CSS_420 write Y to first channel and UV (interleaved) to second channel of RocJpegImage
// For ROCJPEG_CSS_400 write Y to first channel of RocJpegImage
ROCJPEG_OUTPUT_NATIVE = 0,
// extract Y, U, and V channels from the decoded YUV image from the VCN JPEG deocder and write into first, second, and thrid channel of RocJpegImage.
// For ROCJPEG_CSS_400 write Y to first channel of RocJpegImage
ROCJPEG_OUTPUT_YUV_PLANAR = 1,
// return luma component (Y) and write to first channel of RocJpegImage
ROCJPEG_OUTPUT_Y = 2,
// convert to interleaved RGB using HIP kernels and write to first channel of RocJpegImage
ROCJPEG_OUTPUT_RGB = 3,
// maximum allowed value
ROCJPEG_OUTPUT_FORMAT_MAX = 4
} RocJpegOutputFormat;
/*****************************************************/
//! \enum RocJpegBackend
//! \ingroup group_amd_rocjpeg
//! RocJpegBackend enum specifies what type of backend to use for JPEG decoding
//! ROCJPEG_BACKEND_HARDWARE : supports baseline JPEG bitstream using VCN hardware-accelarted JPEG decoder
//! ROCJPEG_BACKEND_HYBRID : uses CPU for Huffman decode and GPU for IDCT using HIP kernesl. This mode doesn't use VCN JPEG hardware decoder
/*****************************************************/
typedef enum {
ROCJPEG_BACKEND_HARDWARE = 0,
ROCJPEG_BACKEND_HYBRID = 1
} RocJpegBackend;
/*****************************************************/
// Opaque library handle identifier.
//struct RocJpegDecoderHandle;
//typedef struct RocJpegDecoderHandle* RocJpegHandle;
//! Used in subsequent API calls after rocJpegCreate
/*****************************************************/
typedef void *RocJpegHandle;
/*****************************************************************************************************/
//! \fn RocJpegStatus ROCJPEGAPI rocJpegCreate(RocJpegBackend backend, int device_id, RocJpegHandle *handle);
//! \ingroup group_amd_rocjpeg
//! Create the decoder object based on backend and device_id. A handle to the created decoder is returned
//! Initalization of rocjpeg handle. This handle is used for all consecutive calls
//! IN backend : Backend to use.
//! IN device_id : the GPU device id for which a decoder should be created. For example, use 0 for the first GPU device,
//! and 1 for the second GPU device on the system, etc.
//! IN/OUT handle : rocjpeg handle, jpeg decoder instance to use for
/*****************************************************************************************************/
RocJpegStatus ROCJPEGAPI rocJpegCreate(RocJpegBackend backend, int device_id, RocJpegHandle *handle);
/*****************************************************************************************************/
//! \fn RocJpegStatus ROCJPEGAPI rocJpegDestroy(RocJpegHandle handle);
//! \ingroup group_amd_rocjpeg
//! Release the decoder object and resources.
//! IN/OUT handle: instance handle to release
/*****************************************************************************************************/
RocJpegStatus ROCJPEGAPI rocJpegDestroy(RocJpegHandle handle);
/*****************************************************************************************************/
//! \fn RocJpegStatus ROCJPEGAPI rocJpegGetImageInfo(RocJpegHandle handle, const uint8_t *data, size_t length, uint8_t *num_components, RocJpegChromaSubsampling *subsampling, uint32_t *widths, uint32_t *heights);
//! \ingroup group_amd_rocjpeg
//! Retrieve the image info, including channel, width and height of each component, and chroma subsampling.
//! If less than ROCJPEG_MAX_COMPONENT channels are encoded, then zeros would be set to absent channels information
//! If the image is 3-channel, all three groups are valid.
//! IN handle : rocJpeg handle
//! IN data : Pointer to the buffer containing the jpeg stream data to be decoded.
//! IN length : Length of the jpeg image buffer.
//! OUT num_component : Number of channels in the decoded output image
//! OUT subsampling : Chroma subsampling used in this JPEG, see RocJpegChromaSubsampling.
//! OUT widths : pointer to ROCJPEG_MAX_COMPONENT of uint32_t, returns width of each channel.
//! OUT heights : pointer to ROCJPEG_MAX_COMPONENT of uint32_t, returns height of each channel.
//! \return ROCJPEG_STATUS_SUCCESS if successful
/*****************************************************************************************************/
RocJpegStatus ROCJPEGAPI rocJpegGetImageInfo(RocJpegHandle handle, const uint8_t *data, size_t length, uint8_t *num_components, RocJpegChromaSubsampling *subsampling, uint32_t *widths, uint32_t *heights);
/*****************************************************************************************************/
//! \fn RocJpegStatus ROCJPEGAPI rocJpegDecode(RocJpegHandle handle, const uint8_t *data, size_t length, RocJpegOutputFormat output_format, RocJpegImage *destination, hipStream_t stream);
//! \ingroup group_amd_rocjpeg
//! Decodes single image based on the backend used to create the rocJpeg handle in rocJpegCreate API.
//! Destination buffers should be large enough to be able to store output of specified format. These buffers should be pre-allocted by the user in the device memories.
//! For each color plane (channel) sizes could be retrieved for image using rocJpegGetImageInfo API
//! and minimum required memory buffer for each plane is plane_height * plane_pitch where plane_pitch >= plane_width for
//! planar output formats and plane_pitch >= plane_width * num_components for interleaved output format.
//! IN handle : rocJpeg handle
//! IN data : Pointer to the buffer containing the jpeg stream to be decoded.
//! IN length : Length of the jpeg image buffer.
//! IN output_format : Output data format. See RocJpegOutputFormat for description
//! IN/OUT destination : Pointer to structure with information about output buffers. See RocJpegImage description.
//! \return ROCJPEG_STATUS_SUCCESS if successful
/*****************************************************************************************************/
RocJpegStatus ROCJPEGAPI rocJpegDecode(RocJpegHandle handle, const uint8_t *data, size_t length, RocJpegOutputFormat output_format, RocJpegImage *destination);
/*****************************************************************************************************/
//! \fn RocJpegStatus ROCJPEGAPI rocJpegDecodeBatchedInitialize(RocJpegHandle handle, int batch_size, int max_cpu_threads, RocJpegOutputFormat output_format);
//! \ingroup group_amd_rocjpeg
//! Resets and initializes batch decoder for working on the batches of specified size
//! Should be called once for decoding batches of this specific size, also use to reset failed batches
//! IN/OUT handle : Library handle
//! IN batch_size : Size of the batch
//! IN max_cpu_threads : Maximum number of CPU threads that will be processing this batch
//! IN output_format : Output data format. Will be the same for every image in batch
//! \return ROCJPEG_STATUS_SUCCESS if successful
/*****************************************************************************************************/
RocJpegStatus ROCJPEGAPI rocJpegDecodeBatchedInitialize(RocJpegHandle handle, int batch_size, int max_cpu_threads, RocJpegOutputFormat output_format);
/*****************************************************************************************************/
//! \fn RocJpegStatus ROCJPEGAPI rocJpegDecodeBatched(RocJpegHandle handle, const uint8_t *data, const size_t *lengths, RocJpegImage *destinations, hipStream_t stream);
//! \ingroup group_amd_rocjpeg
//! Decodes batch of images. Output buffers should be large enough to be able to store
//! outputs of specified format, see single image decoding description for details. Call to
//! rocjpegDecodeBatchedInitialize() is required prior to this call, batch size is expected to be the same as
//! parameter to this batch initialization function.
//!
//! IN/OUT handle : Library handle
//! INT/OUT jpeg_handle : Decoded jpeg image state handle
//! IN data : Array of size batch_size of pointers to the input buffers containing the jpeg images to be decoded.
//! IN lengths : Array of size batch_size with lengths of the jpeg images' buffers in the batch.
//! IN/OUT destinations : Array of size batch_size with pointers to structure with information about output buffers,
//! \return ROCJPEG_STATUS_SUCCESS if successful
/*****************************************************************************************************/
RocJpegStatus ROCJPEGAPI rocJpegDecodeBatched(RocJpegHandle handle, const uint8_t *data, const size_t *lengths, RocJpegImage *destinations);
/*****************************************************************************************************/
//! \fn extern const char* ROCDECAPI rocJpegGetErrorName(RocJpegStatus rocjpeg_status);
//! \ingroup group_amd_rocjpeg
//! Return name of the specified error code in text form.
/*****************************************************************************************************/
extern const char* ROCJPEGAPI rocJpegGetErrorName(RocJpegStatus rocjpeg_status);
#if defined(__cplusplus)
}
#endif
#endif // ROC_JPEG_H
+49
Просмотреть файл
@@ -0,0 +1,49 @@
################################################################################
# Copyright (c) 2024 Advanced Micro Devices, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#
################################################################################
find_library(LIBVA_LIBRARY NAMES va HINTS /opt/amdgpu/lib/x86_64-linux-gnu /opt/amdgpu/lib64 NO_DEFAULT_PATH)
find_library(LIBVA_DRM_LIBRARY NAMES va-drm HINTS /opt/amdgpu/lib/x86_64-linux-gnu /opt/amdgpu/lib64 NO_DEFAULT_PATH)
find_path(LIBVA_INCLUDE_DIR NAMES va/va.h PATHS /opt/amdgpu/include NO_DEFAULT_PATH)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(Libva DEFAULT_MSG LIBVA_INCLUDE_DIR LIBVA_LIBRARY)
mark_as_advanced(LIBVA_INCLUDE_DIR LIBVA_LIBRARY LIBVA_DRM_LIBRARY)
if(Libva_FOUND)
if(NOT TARGET Libva::va)
add_library(Libva::va UNKNOWN IMPORTED)
set_target_properties(Libva::va PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${LIBVA_INCLUDE_DIR}"
IMPORTED_LOCATION "${LIBVA_LIBRARY}")
endif()
if(NOT TARGET Libva::va_drm)
add_library(Libva::va_drm UNKNOWN IMPORTED)
set_target_properties(Libva::va_drm PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${LIBVA_INCLUDE_DIR}"
IMPORTED_LOCATION "${LIBVA_DRM_LIBRARY}")
endif()
message("-- ${White}Using Libva -- \n\tLibraries:${LIBVA_LIBRARY} \n\tIncludes:${LIBVA_INCLUDE_DIR}${ColourReset}")
message("-- ${White}Using Libva-drm -- \n\tLibraries:${LIBVA_DRM_LIBRARY}${ColourReset}")
else()
if(Libva_FIND_REQUIRED)
message(FATAL_ERROR "{Red}FindLibva -- Libva NOT FOUND${ColourReset}")
endif()
endif()
Двоичный файл не отображается.

После

Ширина:  |  Высота:  |  Размер: 1.8 MiB

Двоичный файл не отображается.

После

Ширина:  |  Высота:  |  Размер: 2.2 MiB

Двоичный файл не отображается.

После

Ширина:  |  Высота:  |  Размер: 181 KiB

+13
Просмотреть файл
@@ -0,0 +1,13 @@
# rocJEPG Docker
## Build - dockerfiles
```
sudo docker build -f {DOCKER_FILE_NAME}.dockerfile -t {DOCKER_IMAGE_NAME} .
```
## Run - docker
```
sudo docker run -it --device=/dev/kfd --device=/dev/dri --cap-add=SYS_RAWIO --device=/dev/mem --group-add video --network host --privileged {DOCKER_IMAGE_NAME}
```
+17
Просмотреть файл
@@ -0,0 +1,17 @@
FROM ubuntu:20.04
# install base dependencies
RUN apt-get update -y
#RUN apt-get dist-upgrade -y
RUN DEBIAN_FRONTEND=noninteractive apt-get -y install gcc g++ cmake pkg-config git apt-utils sudo vainfo dialog
# install ROCm
RUN DEBIAN_FRONTEND=noninteractive apt-get -y install initramfs-tools libnuma-dev wget keyboard-configuration && \
wget https://repo.radeon.com/amdgpu-install/6.1/ubuntu/focal/amdgpu-install_6.1.60100-1_all.deb && \
sudo apt-get install ./amdgpu-install_6.1.60100-1_all.deb && \
sudo amdgpu-install -y --usecase=rocm
WORKDIR /workspace
# install rocJPEG package
RUN DEBIAN_FRONTEND=noninteractive sudo apt install -y rocjpeg rocjpeg-dev rocjpeg-test
+17
Просмотреть файл
@@ -0,0 +1,17 @@
FROM ubuntu:22.04
# install base dependencies
RUN apt-get update -y
#RUN apt-get dist-upgrade -y
RUN DEBIAN_FRONTEND=noninteractive apt-get -y install gcc g++ cmake pkg-config git apt-utils sudo vainfo dialog libstdc++-12-dev
# install ROCm
RUN DEBIAN_FRONTEND=noninteractive apt-get -y install initramfs-tools libnuma-dev wget keyboard-configuration && \
wget https://repo.radeon.com/amdgpu-install/6.1/ubuntu/jammy/amdgpu-install_6.1.60100-1_all.deb && \
sudo apt-get install ./amdgpu-install_6.1.60100-1_all.deb && \
sudo amdgpu-install -y --usecase=rocm
WORKDIR /workspace
# install rocJPEG package
RUN DEBIAN_FRONTEND=noninteractive sudo apt install -y rocjpeg rocjpeg-dev rocjpeg-test
Двоичный файл не отображается.

После

Ширина:  |  Высота:  |  Размер: 5.8 KiB

Двоичный файл не отображается.

После

Ширина:  |  Высота:  |  Размер: 30 KiB

+136
Просмотреть файл
@@ -0,0 +1,136 @@
# Copyright (c) 2023 - 2024 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
import os
import sys
import argparse
import platform
if sys.version_info[0] < 3:
import commands
else:
import subprocess
__copyright__ = "Copyright (c) 2024, AMD ROCm rocJPEG"
__version__ = "1.0"
__email__ = "mivisionx.support@amd.com"
__status__ = "Shipping"
# Arguments
parser = argparse.ArgumentParser()
parser.add_argument('--rocm_path', type=str, default='/opt/rocm',
help='ROCm Installation Path - optional (default:/opt/rocm) - ROCm Installation Required')
parser.add_argument('--developer', type=str, default='ON',
help='Setup Developer Options - optional (default:ON) [options:ON/OFF]')
args = parser.parse_args()
developerInstall = args.developer.upper()
ROCM_PATH = args.rocm_path
if "ROCM_PATH" in os.environ:
ROCM_PATH = os.environ.get('ROCM_PATH')
print("\nROCm PATH set to -- "+ROCM_PATH+"\n")
# check ROCm installation
if os.path.exists(ROCM_PATH):
print("\nROCm Installation Found -- "+ROCM_PATH+"\n")
os.system('echo ROCm Info -- && '+ROCM_PATH+'/bin/rocminfo')
else:
print(
"WARNING: If ROCm installed, set ROCm Path with \"--rocm_path\" option for full installation [Default:/opt/rocm]\n")
print("ERROR: rocJPEG Setup requires ROCm install\n")
exit(-1)
if developerInstall not in ('OFF', 'ON'):
print(
"ERROR: Developer Option Not Supported - [Supported Options: OFF or ON]\n")
exit()
# get platfrom info
platfromInfo = platform.platform()
# sudo requirement check
sudoLocation = ''
userName = ''
if sys.version_info[0] < 3:
status, sudoLocation = commands.getstatusoutput("which sudo")
if sudoLocation != '/usr/bin/sudo':
status, userName = commands.getstatusoutput("whoami")
else:
status, sudoLocation = subprocess.getstatusoutput("which sudo")
if sudoLocation != '/usr/bin/sudo':
status, userName = subprocess.getstatusoutput("whoami")
# setup for Linux
linuxSystemInstall = ''
linuxCMake = 'cmake'
linuxSystemInstall_check = ''
linuxFlag = ''
if "centos" in platfromInfo or "redhat" in platfromInfo or os.path.exists('/usr/bin/yum'):
linuxSystemInstall = 'yum -y'
linuxSystemInstall_check = '--nogpgcheck'
if "centos-7" in platfromInfo or "redhat-7" in platfromInfo:
linuxCMake = 'cmake3'
os.system(linuxSystemInstall+' install cmake3')
if not "centos" in platfromInfo or not "redhat" in platfromInfo:
platfromInfo = platfromInfo+'-redhat'
elif "Ubuntu" in platfromInfo or os.path.exists('/usr/bin/apt-get'):
linuxSystemInstall = 'apt-get -y'
linuxSystemInstall_check = '--allow-unauthenticated'
linuxFlag = '-S'
if not "Ubuntu" in platfromInfo:
platfromInfo = platfromInfo+'-Ubuntu'
elif os.path.exists('/usr/bin/zypper'):
linuxSystemInstall = 'zypper -n'
linuxSystemInstall_check = '--no-gpg-checks'
platfromInfo = platfromInfo+'-SLES'
else:
print("\nrocJPEG Setup on "+platfromInfo+" is unsupported\n")
print("\nrocJPEG Setup Supported on: Ubuntu 20/22; CentOS 7/8; RedHat 8/9; & SLES 15 SP4\n")
exit(-1)
# rocJPEG Setup
print("\nrocJPEG Setup on: "+platfromInfo+"\n")
print("\nrocJPEG Dependencies Installation with rocJPEG-setup.py V-"+__version__+"\n")
if userName == 'root':
os.system(linuxSystemInstall+' update')
os.system(linuxSystemInstall+' install sudo')
# install pre-reqs
os.system('sudo -v')
os.system(linuxSystemInstall+' update')
os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' ' +
linuxSystemInstall_check+' install gcc cmake pkg-config')
# rocJPEG Core - VA/DRM Requirements
if "Ubuntu" in platfromInfo:
os.system('sudo -v')
os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check +
' install vainfo libdrm-amdgpu1 libva-amdgpu-dev mesa-amdgpu-va-drivers')
if "22.04" in platform.version():
os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check +
' install libstdc++-12-dev')
else:
os.system('sudo -v')
os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check +
' install libdrm-amdgpu libva-amdgpu-devel mesa-amdgpu-dri-drivers')
print("\nrocJPEG Dependencies Installed with rocJPEG-setup.py V-"+__version__+"\n")
+71
Просмотреть файл
@@ -0,0 +1,71 @@
# ##############################################################################
# Copyright (c) 2023 - 2024 Advanced Micro Devices, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#
# ##############################################################################
cmake_minimum_required(VERSION 3.5)
add_test(
NAME
jpeg-decode-fmt-unchanged
COMMAND
"${CMAKE_CTEST_COMMAND}"
--build-and-test "${CMAKE_CURRENT_SOURCE_DIR}/jpegDecode"
"${CMAKE_CURRENT_BINARY_DIR}/jpegDecode"
--build-generator "${CMAKE_GENERATOR}"
--test-command "jpegdecode"
-i ${CMAKE_SOURCE_DIR}/data/images/
)
add_test(
NAME
jpeg-decode-fmt-yuv
COMMAND
"${CMAKE_CTEST_COMMAND}"
--build-and-test "${CMAKE_CURRENT_SOURCE_DIR}/jpegDecode"
"${CMAKE_CURRENT_BINARY_DIR}/jpegDecode"
--build-generator "${CMAKE_GENERATOR}"
--test-command "jpegdecode"
-i ${CMAKE_SOURCE_DIR}/data/images/ -fmt yuv
)
add_test(
NAME
jpeg-decode-fmt-y
COMMAND
"${CMAKE_CTEST_COMMAND}"
--build-and-test "${CMAKE_CURRENT_SOURCE_DIR}/jpegDecode"
"${CMAKE_CURRENT_BINARY_DIR}/jpegDecode"
--build-generator "${CMAKE_GENERATOR}"
--test-command "jpegdecode"
-i ${CMAKE_SOURCE_DIR}/data/images/ -fmt y
)
add_test(
NAME
jpeg-decode-fmt-rgbi
COMMAND
"${CMAKE_CTEST_COMMAND}"
--build-and-test "${CMAKE_CURRENT_SOURCE_DIR}/jpegDecode"
"${CMAKE_CURRENT_BINARY_DIR}/jpegDecode"
--build-generator "${CMAKE_GENERATOR}"
--test-command "jpegdecode"
-i ${CMAKE_SOURCE_DIR}/data/images/ -fmt rgbi
)
+73
Просмотреть файл
@@ -0,0 +1,73 @@
################################################################################
# Copyright (c) 2024 Advanced Micro Devices, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#
################################################################################
cmake_minimum_required (VERSION 3.0)
project(jpegdecode)
set(CMAKE_CXX_STANDARD 17)
# ROCM Path
if(DEFINED ENV{ROCM_PATH})
set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Default ROCm installation path")
elseif(ROCM_PATH)
message("-- INFO:ROCM_PATH Set -- ${ROCM_PATH}")
else()
set(ROCM_PATH /opt/rocm CACHE PATH "Default ROCm installation path")
endif()
list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/../../cmake)
list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH}/hip ${ROCM_PATH})
set(CMAKE_CXX_COMPILER ${ROCM_PATH}/llvm/bin/clang++)
set(DEFAULT_AMDGPU_TARGETS "gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1031;gfx1032;gfx1100;gfx1101;gfx1102")
set(AMDGPU_TARGETS "${DEFAULT_AMDGPU_TARGETS}" CACHE STRING "List of specific machine types for library to target")
find_package(HIP QUIET)
# find rocJPEG
find_library(ROCJPEG_LIBRARY NAMES rocjpeg HINTS {ROCM_PATH}/lib)
find_path(ROCJPEG_INCLUDE_DIR NAMES rocjpeg.h PATHS /opt/rocm/include/rocjpeg {ROCM_PATH}/include/rocjpeg)
if(ROCJPEG_LIBRARY AND ROCJPEG_INCLUDE_DIR)
set(ROCJPEG_FOUND TRUE)
message("-- ${White}Using rocJPEG -- \n\tLibraries:${ROCJPEG_LIBRARY} \n\tIncludes:${ROCJPEG_INCLUDE_DIR}${ColourReset}")
endif()
if(HIP_FOUND AND ROCJPEG_FOUND)
# HIP
set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} hip::device)
# rocJPEG
include_directories (${ROCJPEG_INCLUDE_DIR})
set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} ${ROCJPEG_LIBRARY})
list(APPEND SOURCES ${PROJECT_SOURCE_DIR} jpegdecode.cpp)
add_executable(${PROJECT_NAME} ${SOURCES})
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=gnu++17")
target_link_libraries(${PROJECT_NAME} ${LINK_LIBRARY_LIST})
else()
message("-- ERROR!: ${PROJECT_NAME} excluded! please install all the dependencies and try again!")
if (NOT HIP_FOUND)
message(FATAL_ERROR "-- ERROR!: HIP Not Found! - please install ROCm and HIP!")
endif()
if (NOT ROCJPEG_FOUND)
message(FATAL_ERROR "-- ERROR!: rocDecode Not Found! - please install rocDecode!")
endif()
endif()
+25
Просмотреть файл
@@ -0,0 +1,25 @@
# JPEG decode sample
The jpeg decode sample illustrates decoding a JPEG images using rocJPEG library to get the individual decoded images in one of the supported output format (i.e., unchanged, yuv, y, rgbi). This sample can be configured with a device ID and optionally able to dump the output to a file.
## Prerequisites:
* Install [rocJPEG](../../README.md#build-and-install-instructions)
## Build
```shell
mkdir jpeg_decode_sample && cd jpeg_decode_sample
cmake ../
make -j
```
## Run
```shell
./jpegdecode -i <Path to single image or directory of images - [required]>
-be <Select rocJPEG backend (0 for ROCJPEG_BACKEND_HARDWARE, using VCN hardware-accelarated JPEG decoder, 1 ROCJPEG_BACKEND_HYBRID, using CPU and GPU HIP kernles for JPEG decoding) [optional]>
-fmt <Select rocJPEG output format for decoding, one of the [native, yuv, y, rgbi] [optional - default: native]>
-o <Output file path or directory - Write decoded images based on the selected outfut format to this file or directory [optional]>
-d <GPU device id (0 for the first GPU device, 1 for the second GPU device, etc.) [optional - default: 0]>
```
+531
Просмотреть файл
@@ -0,0 +1,531 @@
/*
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <iostream>
#include <unistd.h>
#include <vector>
#include <string>
#include <chrono>
#include <sys/stat.h>
#include <libgen.h>
#include <filesystem>
#include <fstream>
#include "rocjpeg.h"
#define CHECK_ROCJPEG(call) { \
RocJpegStatus rocjpeg_status = (call); \
if (rocjpeg_status != ROCJPEG_STATUS_SUCCESS) { \
std::cerr << #call << " returned " << rocJpegGetErrorName(rocjpeg_status) << " at " << __FILE__ << ":" << __LINE__ << std::endl;\
exit(1); \
} \
}
#define CHECK_HIP(call) { \
hipError_t hip_status = (call); \
if (hip_status != hipSuccess) { \
std::cout << "rocJPEG failure: '#" << hip_status << "' at " << __FILE__ << ":" << __LINE__ << std::endl;\
exit(1); \
} \
}
void ShowHelpAndExit(const char *option = NULL) {
std::cout << "Options:" << std::endl
<< "-i Path to single image or directory of images - required" << std::endl
<< "-be Select rocJPEG backend (0 for ROCJPEG_BACKEND_HARDWARE, using VCN hardware-accelarated JPEG decoder, 1 ROCJPEG_BACKEND_HYBRID, using CPU and GPU HIP kernles for JPEG decoding); optional; default: 0" << std::endl
<< "-fmt Select rocJPEG output format for decoding, one of the [native, yuv, y, rgb]; optional; default: native" << std::endl
<< "-o Output file path or directory - Write decoded images based on the selected outfut format to this file or directory; optional;" << std::endl
<< "-d GPU device id (0 for the first GPU device, 1 for the second GPU device, etc.); optional; default: 0" << std::endl;
exit(0);
}
void ParseCommandLine(std::string &input_path, std::string &output_file_path, int &dump_output_frames, int &device_id, RocJpegBackend &rocjpeg_backend, RocJpegOutputFormat &output_format, int argc, char *argv[]) {
if(argc <= 1) {
ShowHelpAndExit();
}
for (int i = 1; i < argc; i++) {
if (!strcmp(argv[i], "-h")) {
ShowHelpAndExit();
}
if (!strcmp(argv[i], "-i")) {
if (++i == argc) {
ShowHelpAndExit("-i");
}
input_path = argv[i];
continue;
}
if (!strcmp(argv[i], "-o")) {
if (++i == argc) {
ShowHelpAndExit("-o");
}
output_file_path = argv[i];
dump_output_frames = 1;
continue;
}
if (!strcmp(argv[i], "-d")) {
if (++i == argc) {
ShowHelpAndExit("-d");
}
device_id = atoi(argv[i]);
continue;
}
if (!strcmp(argv[i], "-be")) {
if (++i == argc) {
ShowHelpAndExit("-be");
}
rocjpeg_backend = static_cast<RocJpegBackend>(atoi(argv[i]));
continue;
}
if (!strcmp(argv[i], "-fmt")) {
if (++i == argc) {
ShowHelpAndExit("-fmt");
}
std::string selected_output_format = argv[i];
if (selected_output_format == "native") {
output_format = ROCJPEG_OUTPUT_NATIVE;
} else if (selected_output_format == "yuv") {
output_format = ROCJPEG_OUTPUT_YUV_PLANAR;
} else if (selected_output_format == "y") {
output_format = ROCJPEG_OUTPUT_Y;
} else if (selected_output_format == "rgb") {
output_format = ROCJPEG_OUTPUT_RGB;
} else {
ShowHelpAndExit(argv[i]);
}
continue;
}
ShowHelpAndExit(argv[i]);
}
}
void SaveImage(std::string output_file_name, RocJpegImage *output_image, uint32_t img_width, uint32_t img_height, RocJpegChromaSubsampling subsampling, RocJpegOutputFormat output_format) {
uint8_t *hst_ptr = nullptr;
FILE *fp;
hipError_t hip_status = hipSuccess;
if (output_image == nullptr || output_image->channel[0] == nullptr || output_image->pitch[0] == 0) {
return;
}
uint32_t widths[ROCJPEG_MAX_COMPONENT] = {};
uint32_t heights[ROCJPEG_MAX_COMPONENT] = {};
switch (output_format) {
case ROCJPEG_OUTPUT_NATIVE:
switch (subsampling) {
case ROCJPEG_CSS_444:
widths[2] = widths[1] = widths[0] = img_width;
heights[2] = heights[1] = heights[0] = img_height;
break;
case ROCJPEG_CSS_422:
widths[0] = img_width * 2;
heights[0] = img_height;
break;
case ROCJPEG_CSS_420:
widths[1] = widths[0] = img_width;
heights[0] = img_height;
heights[1] = img_height >> 1;
break;
case ROCJPEG_CSS_400:
widths[0] = img_width;
heights[0] = img_height;
break;
default:
std::cout << "Unknown chroma subsampling!" << std::endl;
return;
}
break;
case ROCJPEG_OUTPUT_YUV_PLANAR:
switch (subsampling) {
case ROCJPEG_CSS_444:
widths[2] = widths[1] = widths[0] = img_width;
heights[2] = heights[1] = heights[0] = img_height;
break;
case ROCJPEG_CSS_422:
widths[0] = img_width;
widths[2] = widths[1] = widths[0] >> 1;
heights[2] = heights[1] = heights[0] = img_height;
break;
case ROCJPEG_CSS_420:
widths[0] = img_width;
widths[2] = widths[1] = widths[0] >> 1;
heights[0] = img_height;
heights[2] = heights[1] = img_height >> 1;
break;
case ROCJPEG_CSS_400:
widths[0] = img_width;
heights[0] = img_height;
break;
default:
std::cout << "Unknown chroma subsampling!" << std::endl;
return;
}
break;
case ROCJPEG_OUTPUT_Y:
widths[0] = img_width;
heights[0] = img_height;
break;
case ROCJPEG_OUTPUT_RGB:
widths[0] = img_width * 3;
heights[0] = img_height;
break;
default:
std::cout << "Unknown output format!" << std::endl;
return;
}
uint32_t channel0_size = output_image->pitch[0] * heights[0];
uint32_t channel1_size = output_image->pitch[1] * heights[1];
uint32_t channel2_size = output_image->pitch[2] * heights[2];
uint32_t output_image_size = channel0_size + channel1_size + channel2_size;
if (hst_ptr == nullptr) {
hst_ptr = new uint8_t [output_image_size];
}
CHECK_HIP(hipMemcpyDtoH((void *)hst_ptr, output_image->channel[0], channel0_size));
uint8_t *tmp_hst_ptr = hst_ptr;
fp = fopen(output_file_name.c_str(), "wb");
if (fp) {
// write channel0
if (widths[0] == output_image->pitch[0]) {
fwrite(hst_ptr, 1, channel0_size, fp);
} else {
for (int i = 0; i < heights[0]; i++) {
fwrite(tmp_hst_ptr, 1, widths[0], fp);
tmp_hst_ptr += output_image->pitch[0];
}
}
// write channel1
if (channel1_size != 0 && output_image->channel[1] != nullptr) {
uint8_t *channel1_hst_ptr = hst_ptr + channel0_size;
CHECK_HIP(hipMemcpyDtoH((void *)channel1_hst_ptr, output_image->channel[1], channel1_size));
if (widths[1] == output_image->pitch[1]) {
fwrite(channel1_hst_ptr, 1, channel1_size, fp);
} else {
for (int i = 0; i < heights[1]; i++) {
fwrite(channel1_hst_ptr, 1, widths[1], fp);
channel1_hst_ptr += output_image->pitch[1];
}
}
}
// write channel2
if (channel2_size != 0 && output_image->channel[2] != nullptr) {
uint8_t *channel2_hst_ptr = hst_ptr + channel0_size + channel1_size;
CHECK_HIP(hipMemcpyDtoH((void *)channel2_hst_ptr, output_image->channel[2], channel2_size));
if (widths[2] == output_image->pitch[2]) {
fwrite(channel2_hst_ptr, 1, channel2_size, fp);
} else {
for (int i = 0; i < heights[2]; i++) {
fwrite(channel2_hst_ptr, 1, widths[2], fp);
channel2_hst_ptr += output_image->pitch[2];
}
}
}
fclose(fp);
}
if (hst_ptr != nullptr) {
delete [] hst_ptr;
hst_ptr = nullptr;
tmp_hst_ptr = nullptr;
}
}
bool GetFilePaths(std::string &input_path, std::vector<std::string> &file_paths, bool &is_dir, bool &is_file) {
is_dir = std::filesystem::is_directory(input_path);
is_file = std::filesystem::is_regular_file(input_path);
if (is_dir) {
for (const auto &entry : std::filesystem::directory_iterator(input_path))
file_paths.push_back(entry.path());
} else if (is_file) {
file_paths.push_back(input_path);
} else {
std::cerr << "ERROR: the input path is not valid!" << std::endl;
return false;
}
return true;
}
bool InitHipDevice(int device_id) {
int num_devices;
hipDeviceProp_t hip_dev_prop;
CHECK_HIP(hipGetDeviceCount(&num_devices));
if (num_devices < 1) {
std::cerr << "ERROR: didn't find any GPU!" << std::endl;
return false;
}
if (device_id >= num_devices) {
std::cerr << "ERROR: the requested device_id is not found!" << std::endl;
return false;
}
CHECK_HIP(hipSetDevice(device_id));
CHECK_HIP(hipGetDeviceProperties(&hip_dev_prop, device_id));
std::cout << "info: Using GPU device " << device_id << ": " << hip_dev_prop.name << "[" << hip_dev_prop.gcnArchName << "] on PCI bus " <<
std::setfill('0') << std::setw(2) << std::right << std::hex << hip_dev_prop.pciBusID << ":" << std::setfill('0') << std::setw(2) <<
std::right << std::hex << hip_dev_prop.pciDomainID << "." << hip_dev_prop.pciDeviceID << std::dec << std::endl;
return true;
}
int main(int argc, char **argv) {
int device_id = 0;
int dump_output_frames = 0;
uint8_t num_components;
uint32_t widths[ROCJPEG_MAX_COMPONENT] = {};
uint32_t heights[ROCJPEG_MAX_COMPONENT] = {};
uint32_t channel_sizes[ROCJPEG_MAX_COMPONENT] = {};
uint32_t num_channels = 0;
int total_images_all = 0;
double time_per_image_all = 0;
double m_pixels_all = 0;
double image_per_sec_all = 0;
std::string chroma_sub_sampling = "";
std::string input_path, output_file_path;
std::vector<std::string> file_paths = {};
bool is_dir = false;
bool is_file = false;
RocJpegChromaSubsampling subsampling;
RocJpegBackend rocjpeg_backend = ROCJPEG_BACKEND_HARDWARE;
RocJpegHandle rocjpeg_handle = nullptr;
RocJpegImage output_image = {};
RocJpegOutputFormat output_format = ROCJPEG_OUTPUT_NATIVE;
ParseCommandLine(input_path, output_file_path, dump_output_frames, device_id, rocjpeg_backend, output_format, argc, argv);
if (!GetFilePaths(input_path, file_paths, is_dir, is_file)) {
std::cerr << "Failed to get input file paths!" << std::endl;
return -1;
}
if (!InitHipDevice(device_id)) {
std::cerr << "Failed to initialize HIP!" << std::endl;
return -1;
}
CHECK_ROCJPEG(rocJpegCreate(rocjpeg_backend, device_id, &rocjpeg_handle));
int counter = 0;
std::vector<std::vector<char>> file_data(file_paths.size());
std::vector<size_t> file_sizes(file_paths.size());
for (auto file_path : file_paths) {
std::string base_file_name = file_path.substr(file_path.find_last_of("/\\") + 1);
int image_count = 0;
// Read an image from disk.
std::ifstream input(file_path.c_str(), std::ios::in | std::ios::binary | std::ios::ate);
if (!(input.is_open())) {
std::cerr << "ERROR: Cannot open image: " << file_path << std::endl;
return 0;
}
// Get the size
std::streamsize file_size = input.tellg();
input.seekg(0, std::ios::beg);
// resize if buffer is too small
if (file_data[counter].size() < file_size) {
file_data[counter].resize(file_size);
}
if (!input.read(file_data[counter].data(), file_size)) {
std::cerr << "Cannot read from file: " << file_path << std::endl;
return 0;
}
file_sizes[counter] = file_size;
CHECK_ROCJPEG(rocJpegGetImageInfo(rocjpeg_handle, reinterpret_cast<uint8_t*>(file_data[counter].data()), file_size, &num_components, &subsampling, widths, heights));
std::cout << "info: input file name: " << base_file_name << std::endl;
std::cout << "info: input image resolution: " << widths[0] << "x" << heights[0] << std::endl;
switch (subsampling) {
case ROCJPEG_CSS_444:
chroma_sub_sampling = "YUV 4:4:4";
break;
case ROCJPEG_CSS_440:
chroma_sub_sampling = "YUV 4:4:0";
break;
case ROCJPEG_CSS_422:
chroma_sub_sampling = "YUV 4:2:2";
break;
case ROCJPEG_CSS_420:
chroma_sub_sampling = "YUV 4:2:0";
break;
case ROCJPEG_CSS_411:
chroma_sub_sampling = "YUV 4:1:1";
break;
case ROCJPEG_CSS_400:
chroma_sub_sampling = "YUV 4:0:0";
break;
case ROCJPEG_CSS_UNKNOWN:
std::cout << "info: Unknown chroma subsampling" << std::endl;
return EXIT_FAILURE;
}
std::cout << "info: chroma subsampling: " + chroma_sub_sampling << std::endl;
if (subsampling == ROCJPEG_CSS_440 || subsampling == ROCJPEG_CSS_411) {
std::cout << "The chroma sub-sampling is not supported by VCN Hardware" << std::endl;
if (is_dir) {
std::cout << std::endl;
continue;
} else
return EXIT_FAILURE;
}
switch (output_format) {
case ROCJPEG_OUTPUT_NATIVE:
switch (subsampling) {
case ROCJPEG_CSS_444:
num_channels = 3;
output_image.pitch[2] = output_image.pitch[1] = output_image.pitch[0] = widths[0];
channel_sizes[2] = channel_sizes[1] = channel_sizes[0] = output_image.pitch[0] * heights[0];
break;
case ROCJPEG_CSS_422:
num_channels = 1;
output_image.pitch[0] = widths[0] * 2;
channel_sizes[0] = output_image.pitch[0] * heights[0];
break;
case ROCJPEG_CSS_420:
num_channels = 2;
output_image.pitch[1] = output_image.pitch[0] = widths[0];
channel_sizes[0] = output_image.pitch[0] * heights[0];
channel_sizes[1] = output_image.pitch[1] * (heights[0] >> 1);
break;
case ROCJPEG_CSS_400:
num_channels = 1;
output_image.pitch[0] = widths[0];
channel_sizes[0] = output_image.pitch[0] * heights[0];
break;
default:
std::cout << "Unknown chroma subsampling!" << std::endl;
return EXIT_FAILURE;
}
break;
case ROCJPEG_OUTPUT_YUV_PLANAR:
if (subsampling == ROCJPEG_CSS_400) {
num_channels = 1;
output_image.pitch[0] = widths[0];
channel_sizes[0] = output_image.pitch[0] * heights[0];
} else {
num_channels = 3;
output_image.pitch[0] = widths[0];
output_image.pitch[1] = widths[1];
output_image.pitch[2] = widths[2];
channel_sizes[0] = output_image.pitch[0] * heights[0];
channel_sizes[1] = output_image.pitch[1] * heights[1];
channel_sizes[2] = output_image.pitch[2] * heights[2];
}
break;
case ROCJPEG_OUTPUT_Y:
num_channels = 1;
output_image.pitch[0] = widths[0];
channel_sizes[0] = output_image.pitch[0] * heights[0];
break;
case ROCJPEG_OUTPUT_RGB:
num_channels = 1;
output_image.pitch[0] = widths[0] * 3;
channel_sizes[0] = output_image.pitch[0] * heights[0];
break;
default:
std::cout << "Unknown output format!" << std::endl;
return EXIT_FAILURE;
}
// allocate memory for each channel
for (int i = 0; i < num_channels; i++) {
CHECK_HIP(hipMalloc(&output_image.channel[i], channel_sizes[i]));
}
std::cout << "info: decoding started, please wait! ... " << std::endl;
auto start_time = std::chrono::high_resolution_clock::now();
CHECK_ROCJPEG(rocJpegDecode(rocjpeg_handle, reinterpret_cast<uint8_t*>(file_data[counter].data()), file_size, output_format, &output_image));
auto end_time = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> decoder_time = end_time - start_time;
double time_per_image = decoder_time.count() * 1000;
double ips = (1 / time_per_image) * 1000;
double mpixels = ((double)widths[0] * (double)heights[0] / 1000000) * ips;
image_count++;
if (dump_output_frames) {
std::string::size_type const p(base_file_name.find_last_of('.'));
std::string file_name_no_ext = base_file_name.substr(0, p);
std::string file_extension;
switch (output_format) {
case ROCJPEG_OUTPUT_NATIVE:
file_extension = "native";
break;
case ROCJPEG_OUTPUT_YUV_PLANAR:
file_extension = "yuv";
break;
case ROCJPEG_OUTPUT_Y:
file_extension = "y";
break;
case ROCJPEG_OUTPUT_RGB:
file_extension = "rgb";
break;
default:
file_extension = "";
break;
}
std::string file_name_for_saving = output_file_path + "//" + file_name_no_ext + "_" + std::to_string(widths[0]) + "x"
+ std::to_string(heights[0]) + "." + file_extension;
std::string image_save_path = is_dir ? file_name_for_saving : output_file_path;
SaveImage(image_save_path, &output_image, widths[0], heights[0], subsampling, output_format);
}
for (int i = 0; i < num_channels; i++) {
if (output_image.channel[i] != nullptr) {
CHECK_HIP(hipFree((void*)output_image.channel[i]));
output_image.channel[i] = nullptr;
output_image.pitch[i] = 0;
}
}
std::cout << "info: total decoded images: " << image_count << std::endl;
std::cout << "info: average processing time per image (ms): " << time_per_image << std::endl;
std::cout << "info: average images per sec: " << (1 / time_per_image) * 1000 << std::endl;
std::cout << "info: total elapsed time (s): " << decoder_time.count() << std::endl;
if (is_dir) {
std::cout << std::endl;
total_images_all += image_count;
time_per_image_all += time_per_image;
image_per_sec_all += ips;
m_pixels_all += mpixels;
}
counter++;
}
if (is_dir) {
std::cout << "info: total decoded images: " << total_images_all << std::endl;
if (total_images_all) {
std::cout << "info: average processing time per image (ms): " << time_per_image_all / total_images_all << std::endl;
std::cout << "info: average decoded images per sec: " << image_per_sec_all / total_images_all << std::endl;
std::cout << "info: average decoded mpixels per sec: " << m_pixels_all / total_images_all << std::endl;
}
std::cout << std::endl;
}
CHECK_ROCJPEG(rocJpegDestroy(rocjpeg_handle));
std::cout << "info: decoding completed!" << std::endl;
return 0;
}
+171
Просмотреть файл
@@ -0,0 +1,171 @@
/*
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "rocjpeg_api_handle.h"
#include "rocjpeg_commons.h"
/*****************************************************************************************************/
//! \fn RocJpegStatus ROCJPEGAPI rocJpegCreate(RocJpegBackend backend, int device_id, RocJpegHandle *handle)
//! Create the decoder object based on backend and device_id. A handle to the created decoder is returned
/*****************************************************************************************************/
RocJpegStatus ROCJPEGAPI rocJpegCreate(RocJpegBackend backend, int device_id, RocJpegHandle *handle) {
if (handle == nullptr) {
return ROCJPEG_STATUS_INVALID_PARAMETER;
}
RocJpegHandle rocjpeg_handle = nullptr;
try {
rocjpeg_handle = new RocJpegDecoderHandle(backend, device_id);
} catch(const std::exception& e) {
ERR(STR("Failed to init the rocJPEG handle, ") + STR(e.what()));
return ROCJPEG_STATUS_NOT_INITIALIZED;
}
*handle = rocjpeg_handle;
return static_cast<RocJpegDecoderHandle *>(rocjpeg_handle)->rocjpeg_decoder->InitializeDecoder();
}
/*****************************************************************************************************/
//! \fn RocJpegStatus ROCJPEGAPI rocJpegDestroy(RocJpegHandle handle)
//! Release the decoder object and resources.
/*****************************************************************************************************/
RocJpegStatus ROCJPEGAPI rocJpegDestroy(RocJpegHandle handle) {
if (handle == nullptr) {
return ROCJPEG_STATUS_INVALID_PARAMETER;
}
auto rocjpeg_handle = static_cast<RocJpegDecoderHandle*>(handle);
delete rocjpeg_handle;
return ROCJPEG_STATUS_SUCCESS;
}
/*****************************************************************************************************/
//! \fn RocJpegStatus ROCJPEGAPI rocJpegGetImageInfo(RocJpegHandle handle, const uint8_t *data, size_t length,
//! int *num_components, RocJpegChromaSubsampling *subsampling, int *widths, int *heights)
//! Retrieve the image info, including channel, width and height of each component, and chroma subsampling.
/*****************************************************************************************************/
RocJpegStatus ROCJPEGAPI rocJpegGetImageInfo(RocJpegHandle handle, const uint8_t *data, size_t length, uint8_t *num_components,
RocJpegChromaSubsampling *subsampling, uint32_t *widths, uint32_t *heights) {
if (handle == nullptr || data == nullptr || num_components == nullptr ||
subsampling == nullptr || widths == nullptr || heights == nullptr) {
return ROCJPEG_STATUS_INVALID_PARAMETER;
}
RocJpegStatus rocjpeg_status = ROCJPEG_STATUS_SUCCESS;
auto rocjpeg_handle = static_cast<RocJpegDecoderHandle*>(handle);
try {
rocjpeg_status = rocjpeg_handle->rocjpeg_decoder->GetImageInfo(data, length, num_components, subsampling, widths, heights);
} catch (const std::exception& e) {
rocjpeg_handle->CaptureError(e.what());
ERR(e.what());
return ROCJPEG_STATUS_RUNTIME_ERROR;
}
return rocjpeg_status;
}
/*****************************************************************************************************/
//! \fn RocJpegStatus ROCJPEGAPI rocJpegDecode(RocJpegHandle handle, const uint8_t *data, size_t length, RocJpegOutputFormat output_format, RocJpegImage *destination, hipStream_t stream);
//! \ingroup group_amd_rocjpeg
//! Decodes single image based on the backend used to create the rocJpeg handle in rocJpegCreate API.
//! Destination buffers should be large enough to be able to store output of specified format. These buffers should be pre-allocted by the user in the device memories.
//! For each color plane (channel) sizes could be retrieved for image using rocJpegGetImageInfo API
//! and minimum required memory buffer for each plane is plane_height * plane_pitch where plane_pitch >= plane_width for
//! planar output formats and plane_pitch >= plane_width * num_components for interleaved output format.
/*****************************************************************************************************/
RocJpegStatus ROCJPEGAPI rocJpegDecode(RocJpegHandle handle, const uint8_t *data, size_t length, RocJpegOutputFormat output_format,
RocJpegImage *destination) {
if (handle == nullptr || data == nullptr) {
return ROCJPEG_STATUS_INVALID_PARAMETER;
}
RocJpegStatus rocjpeg_status = ROCJPEG_STATUS_SUCCESS;
auto rocjpeg_handle = static_cast<RocJpegDecoderHandle*>(handle);
try {
rocjpeg_status = rocjpeg_handle->rocjpeg_decoder->Decode(data, length, output_format, destination);
} catch (const std::exception& e) {
rocjpeg_handle->CaptureError(e.what());
ERR(e.what());
return ROCJPEG_STATUS_RUNTIME_ERROR;
}
return rocjpeg_status;
}
/*****************************************************************************************************/
//! \fn extern const char* ROCDECAPI rocJpegGetErrorName(RocJpegStatus rocjpeg_status);
//! \ingroup group_amd_rocjpeg
//! Return name of the specified error code in text form.
/*****************************************************************************************************/
extern const char* ROCJPEGAPI rocJpegGetErrorName(RocJpegStatus rocjpeg_status) {
switch (rocjpeg_status) {
case ROCJPEG_STATUS_SUCCESS:
return "ROCJPEG_STATUS_SUCCESS";
case ROCJPEG_STATUS_NOT_INITIALIZED:
return "ROCJPEG_STATUS_NOT_INITIALIZED";
case ROCJPEG_STATUS_INVALID_PARAMETER:
return "ROCJPEG_STATUS_INVALID_PARAMETER";
case ROCJPEG_STATUS_BAD_JPEG:
return "ROCJPEG_STATUS_BAD_JPEG";
case ROCJPEG_STATUS_JPEG_NOT_SUPPORTED:
return "ROCJPEG_STATUS_JPEG_NOT_SUPPORTED";
case ROCJPEG_STATUS_EXECUTION_FAILED:
return "ROCJPEG_STATUS_EXECUTION_FAILED";
case ROCJPEG_STATUS_ARCH_MISMATCH:
return "ROCJPEG_STATUS_ARCH_MISMATCH";
case ROCJPEG_STATUS_INTERNAL_ERROR:
return "ROCJPEG_STATUS_INTERNAL_ERROR";
case ROCJPEG_STATUS_IMPLEMENTATION_NOT_SUPPORTED:
return "ROCJPEG_STATUS_IMPLEMENTATION_NOT_SUPPORTED";
case ROCJPEG_STATUS_HW_JPEG_DECODER_NOT_SUPPORTED:
return "ROCJPEG_STATUS_HW_JPEG_DECODER_NOT_SUPPORTED";
case ROCJPEG_STATUS_RUNTIME_ERROR:
return "ROCJPEG_STATUS_RUNTIME_ERROR";
case ROCJPEG_STATUS_OUTOF_MEMORY:
return "ROCJPEG_STATUS_OUTOF_MEMORY";
case ROCJPEG_STATUS_NOT_IMPLEMENTED:
return "ROCJPEG_STATUS_NOT_IMPLEMENTED";
default:
return "UNKNOWN_ERROR";
}
}
/*****************************************************************************************************/
//! \fn RocJpegStatus ROCJPEGAPI rocJpegDecodeBatchedInitialize(RocJpegHandle handle, int batch_size, int max_cpu_threads, RocJpegOutputFormat output_format);
//! \ingroup group_amd_rocjpeg
//! Resets and initializes batch decoder for working on the batches of specified size
//! Should be called once for decoding batches of this specific size, also use to reset failed batches
//! \return ROCJPEG_STATUS_SUCCESS if successful
/*****************************************************************************************************/
RocJpegStatus ROCJPEGAPI rocJpegDecodeBatchedInitialize(RocJpegHandle handle, int batch_size, int max_cpu_threads, RocJpegOutputFormat output_format) {
return ROCJPEG_STATUS_NOT_IMPLEMENTED;
}
/*****************************************************************************************************/
//! \fn RocJpegStatus ROCJPEGAPI rocJpegDecodeBatched(RocJpegHandle handle, const uint8_t *data, const size_t *lengths, RocJpegImage *destinations, hipStream_t stream);
//! \ingroup group_amd_rocjpeg
//! Decodes batch of images. Output buffers should be large enough to be able to store
//! outputs of specified format, see single image decoding description for details. Call to
//! rocjpegDecodeBatchedInitialize() is required prior to this call, batch size is expected to be the same as
//! parameter to this batch initialization function.
//! \return ROCJPEG_STATUS_SUCCESS if successful
/*****************************************************************************************************/
RocJpegStatus ROCJPEGAPI rocJpegDecodeBatched(RocJpegHandle handle, const uint8_t *data, const size_t *lengths, RocJpegImage *destinations) {
return ROCJPEG_STATUS_NOT_IMPLEMENTED;
}
+50
Просмотреть файл
@@ -0,0 +1,50 @@
/*
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef ROC_JPEG_HANDLE_H
#define ROC_JPEG_HANDLE_H
#pragma once
#include <memory>
#include <string>
#include "rocjpeg_decoder.h"
/**
* @brief RocJpegHandle class
*
*/
class RocJpegDecoderHandle {
public:
explicit RocJpegDecoderHandle(RocJpegBackend backend, int device_id) : rocjpeg_decoder(std::make_shared<ROCJpegDecoder>(backend, device_id)) {};
~RocJpegDecoderHandle() { ClearErrors(); }
std::shared_ptr<ROCJpegDecoder> rocjpeg_decoder;
bool NoError() { return error_.empty(); }
const char* ErrorMsg() { return error_.c_str(); }
void CaptureError(const std::string& err_msg) { error_ = err_msg; }
private:
void ClearErrors() { error_ = "";}
std::string error_;
};
#endif //ROC_JPEG_HANDLE_H
+92
Просмотреть файл
@@ -0,0 +1,92 @@
/*
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef ROC_JPEG_COMMON_H_
#define ROC_JPEG_COMMON_H_
#pragma once
#include <stdexcept>
#include <exception>
#include <string>
#include <iostream>
#include <cstring>
#define TOSTR(X) std::to_string(static_cast<int>(X))
#define STR(X) std::string(X)
#if DBGINFO
#define INFO(X) std::clog << "[INF] " << " {" << __func__ <<"} " << " " << X << std::endl;
#else
#define INFO(X) ;
#endif
#define ERR(X) std::cerr << "[ERR] " << " {" << __func__ <<"} " << " " << X << std::endl;
#define CHECK_VAAPI(call) { \
VAStatus va_status = (call); \
if (va_status != VA_STATUS_SUCCESS) { \
std::cerr << "VAAPI failure: " << #call << " failed with status: " << std::hex << "0x" << va_status << std::dec << " = '" << vaErrorStr(va_status) << "' at " << __FILE__ << ":" << __LINE__ << std::endl;\
return ROCJPEG_STATUS_EXECUTION_FAILED; \
} \
}
#define CHECK_HIP(call) { \
hipError_t hip_status = (call); \
if (hip_status != hipSuccess) { \
std::cerr << "HIP failure: 'status: " << hipGetErrorName(hip_status) << "' at " << __FILE__ << ":" << __LINE__ << std::endl;\
return ROCJPEG_STATUS_EXECUTION_FAILED; \
} \
}
#define CHECK_ROCJPEG(call) { \
RocJpegStatus rocjpeg_status = (call); \
if (rocjpeg_status != ROCJPEG_STATUS_SUCCESS) { \
std::cerr << #call << " returned " << rocJpegGetErrorName(rocjpeg_status) << " at " << __FILE__ << ":" << __LINE__ << std::endl;\
return rocjpeg_status; \
} \
}
static bool GetEnv(const char *name, char *value, size_t valueSize) {
const char *v = getenv(name);
if (v) {
strncpy(value, v, valueSize);
value[valueSize - 1] = 0;
}
return v ? true : false;
}
static inline int align(int value, int alignment) {
return (value + alignment - 1) & ~(alignment - 1);
}
class RocJpegException : public std::exception {
public:
explicit RocJpegException(const std::string& message):message_(message){}
virtual const char* what() const throw() override {
return message_.c_str();
}
private:
std::string message_;
};
#define THROW(X) throw RocJpegException(" { "+std::string(__func__)+" } " + X);
#endif //ROC_JPEG_COMMON_H_
+331
Просмотреть файл
@@ -0,0 +1,331 @@
/*
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "rocjpeg_decoder.h"
ROCJpegDecoder::ROCJpegDecoder(RocJpegBackend backend, int device_id) :
num_devices_{0}, device_id_ {device_id}, hip_stream_ {0}, backend_{backend}, hip_interop_{} {}
ROCJpegDecoder::~ROCJpegDecoder() {
if (hip_stream_) {
hipError_t hip_status = hipStreamDestroy(hip_stream_);
}
}
RocJpegStatus ROCJpegDecoder::InitHIP(int device_id) {
hipError_t hip_status = hipSuccess;
CHECK_HIP(hipGetDeviceCount(&num_devices_));
if (num_devices_ < 1) {
ERR("ERROR: Failed to find any GPU!");
return ROCJPEG_STATUS_NOT_INITIALIZED;
}
if (device_id >= num_devices_) {
ERR("ERROR: the requested device_id is not found!");
return ROCJPEG_STATUS_INVALID_PARAMETER;
}
CHECK_HIP(hipSetDevice(device_id));
CHECK_HIP(hipGetDeviceProperties(&hip_dev_prop_, device_id));
CHECK_HIP(hipStreamCreate(&hip_stream_));
return ROCJPEG_STATUS_SUCCESS;
}
RocJpegStatus ROCJpegDecoder::InitializeDecoder() {
RocJpegStatus rocjpeg_status = ROCJPEG_STATUS_SUCCESS;
rocjpeg_status = InitHIP(device_id_);
if (rocjpeg_status != ROCJPEG_STATUS_SUCCESS) {
ERR("ERROR: Failed to initilize the HIP!");
return rocjpeg_status;
}
if (backend_ == ROCJPEG_BACKEND_HARDWARE) {
rocjpeg_status = jpeg_vaapi_decoder_.InitializeDecoder(hip_dev_prop_.gcnArchName);
if (rocjpeg_status != ROCJPEG_STATUS_SUCCESS) {
ERR("ERROR: Failed to initialize the VA-API JPEG decoder!");
return rocjpeg_status;
}
} else if (backend_ == ROCJPEG_BACKEND_HYBRID) {
return ROCJPEG_STATUS_NOT_IMPLEMENTED;
}
return rocjpeg_status;
}
RocJpegStatus ROCJpegDecoder::Decode(const uint8_t *data, size_t length, RocJpegOutputFormat output_format, RocJpegImage *destination) {
std::lock_guard<std::mutex> lock(mutex_);
RocJpegStatus rocjpeg_status = ROCJPEG_STATUS_SUCCESS;
if (!jpeg_parser_.ParseJpegStream(data, length)) {
ERR("ERROR: Failed to parse the jpeg stream!");
return ROCJPEG_STATUS_BAD_JPEG;
}
const JpegStreamParameters *jpeg_stream_params = jpeg_parser_.GetJpegStreamParameters();
VASurfaceID current_surface_id;
CHECK_ROCJPEG(jpeg_vaapi_decoder_.SubmitDecode(jpeg_stream_params, current_surface_id));
if (destination != nullptr) {
VADRMPRIMESurfaceDescriptor va_drm_prime_surface_desc = {};
CHECK_ROCJPEG(jpeg_vaapi_decoder_.SyncSurface(current_surface_id));
CHECK_ROCJPEG(jpeg_vaapi_decoder_.ExportSurface(current_surface_id, va_drm_prime_surface_desc));
CHECK_ROCJPEG(GetHipInteropMem(va_drm_prime_surface_desc));
uint16_t chroma_height = 0;
CHECK_ROCJPEG(GetChromaHeight(jpeg_stream_params->picture_parameter_buffer.picture_height, chroma_height));
switch (output_format) {
case ROCJPEG_OUTPUT_NATIVE:
// copy the native decoded output buffers from interop memory directly to the destination buffers
CHECK_ROCJPEG(CopyLuma(destination, jpeg_stream_params->picture_parameter_buffer.picture_height));
CHECK_ROCJPEG(CopyChroma(destination, chroma_height));
break;
case ROCJPEG_OUTPUT_YUV_PLANAR:
CHECK_ROCJPEG(GetPlanarYUVOutputFormat(jpeg_stream_params->picture_parameter_buffer.picture_width,
jpeg_stream_params->picture_parameter_buffer.picture_height, chroma_height, destination));
break;
case ROCJPEG_OUTPUT_Y:
CHECK_ROCJPEG(GetYOutputFormat(jpeg_stream_params->picture_parameter_buffer.picture_width,
jpeg_stream_params->picture_parameter_buffer.picture_height, destination));
break;
case ROCJPEG_OUTPUT_RGB:
CHECK_ROCJPEG(ColorConvertToRGB(jpeg_stream_params->picture_parameter_buffer.picture_width,
jpeg_stream_params->picture_parameter_buffer.picture_height, destination));
break;
default:
break;
}
CHECK_HIP(hipStreamSynchronize(hip_stream_));
CHECK_ROCJPEG(ReleaseHipInteropMem(current_surface_id));
}
return ROCJPEG_STATUS_SUCCESS;
}
RocJpegStatus ROCJpegDecoder::GetImageInfo(const uint8_t *data, size_t length, uint8_t *num_components, RocJpegChromaSubsampling *subsampling, uint32_t *widths, uint32_t *heights){
std::lock_guard<std::mutex> lock(mutex_);
if (widths == nullptr || heights == nullptr || num_components == nullptr) {
return ROCJPEG_STATUS_INVALID_PARAMETER;
}
if (!jpeg_parser_.ParseJpegStream(data, length)) {
ERR("ERROR: jpeg parser failed!");
return ROCJPEG_STATUS_BAD_JPEG;
}
const JpegStreamParameters *jpeg_stream_params = jpeg_parser_.GetJpegStreamParameters();
*num_components = jpeg_stream_params->picture_parameter_buffer.num_components;
widths[0] = jpeg_stream_params->picture_parameter_buffer.picture_width;
heights[0] = jpeg_stream_params->picture_parameter_buffer.picture_height;
widths[3] = 0;
heights[3] = 0;
switch (jpeg_stream_params->chroma_subsampling) {
case CSS_444:
*subsampling = ROCJPEG_CSS_444;
widths[2] = widths[1] = widths[0];
heights[2] = heights[1] = heights[0];
break;
case CSS_422:
*subsampling = ROCJPEG_CSS_422;
widths[2] = widths[1] = widths[0] >> 1;
heights[2] = heights[1] = heights[0];
break;
case CSS_420:
*subsampling = ROCJPEG_CSS_420;
widths[2] = widths[1] = widths[0] >> 1;
heights[2] = heights[1] = heights[0] >> 1;
break;
case CSS_400:
*subsampling = ROCJPEG_CSS_400;
widths[3] = widths[2] = widths[1] = 0;
heights[3] = heights[2] = heights[1] = 0;
break;
case CSS_411:
*subsampling = ROCJPEG_CSS_411;
widths[2] = widths[1] = widths[0] >> 2;
heights[2] = heights[1] = heights[0];
break;
case CSS_440:
*subsampling = ROCJPEG_CSS_440;
widths[2] = widths[1] = widths[0] >> 1;
heights[2] = heights[1] = heights[0] >> 1;
break;
default:
*subsampling = ROCJPEG_CSS_UNKNOWN;
break;
}
return ROCJPEG_STATUS_SUCCESS;
}
RocJpegStatus ROCJpegDecoder::GetHipInteropMem(VADRMPRIMESurfaceDescriptor &va_drm_prime_surface_desc) {
hipExternalMemoryHandleDesc external_mem_handle_desc = {};
hipExternalMemoryBufferDesc external_mem_buffer_desc = {};
external_mem_handle_desc.type = hipExternalMemoryHandleTypeOpaqueFd;
external_mem_handle_desc.handle.fd = va_drm_prime_surface_desc.objects[0].fd;
external_mem_handle_desc.size = va_drm_prime_surface_desc.objects[0].size;
CHECK_HIP(hipImportExternalMemory(&hip_interop_.hip_ext_mem, &external_mem_handle_desc));
external_mem_buffer_desc.size = va_drm_prime_surface_desc.objects[0].size;
CHECK_HIP(hipExternalMemoryGetMappedBuffer((void**)&hip_interop_.hip_mapped_device_mem, hip_interop_.hip_ext_mem, &external_mem_buffer_desc));
hip_interop_.surface_format = va_drm_prime_surface_desc.fourcc;
hip_interop_.width = va_drm_prime_surface_desc.width;
hip_interop_.height = va_drm_prime_surface_desc.height;
hip_interop_.offset[0] = va_drm_prime_surface_desc.layers[0].offset[0];
hip_interop_.offset[1] = va_drm_prime_surface_desc.layers[1].offset[0];
hip_interop_.offset[2] = va_drm_prime_surface_desc.layers[2].offset[0];
hip_interop_.pitch[0] = va_drm_prime_surface_desc.layers[0].pitch[0];
hip_interop_.pitch[1] = va_drm_prime_surface_desc.layers[1].pitch[0];
hip_interop_.pitch[2] = va_drm_prime_surface_desc.layers[2].pitch[0];
hip_interop_.num_layers = va_drm_prime_surface_desc.num_layers;
for (uint32_t i = 0; i < va_drm_prime_surface_desc.num_objects; ++i) {
close(va_drm_prime_surface_desc.objects[i].fd);
}
return ROCJPEG_STATUS_SUCCESS;
}
RocJpegStatus ROCJpegDecoder::ReleaseHipInteropMem(VASurfaceID current_surface_id) {
if (hip_interop_.hip_mapped_device_mem != nullptr) {
CHECK_HIP(hipFree(hip_interop_.hip_mapped_device_mem));
}
if (hip_interop_.hip_ext_mem != nullptr) {
CHECK_HIP(hipDestroyExternalMemory(hip_interop_.hip_ext_mem));
}
memset((void*)&hip_interop_, 0, sizeof(hip_interop_));
CHECK_ROCJPEG(jpeg_vaapi_decoder_.ReleaseSurface(current_surface_id));
return ROCJPEG_STATUS_SUCCESS;
}
RocJpegStatus ROCJpegDecoder::CopyLuma(RocJpegImage *destination, uint16_t picture_height) {
if (hip_interop_.pitch[0] != 0 && destination->pitch[0] != 0 && destination->channel[0] != nullptr) {
if (destination->pitch[0] == hip_interop_.pitch[0]) {
uint32_t luma_size = destination->pitch[0] * picture_height;
CHECK_HIP(hipMemcpyDtoDAsync(destination->channel[0], hip_interop_.hip_mapped_device_mem, luma_size, hip_stream_));
} else {
CHECK_HIP(hipMemcpy2DAsync(destination->channel[0], destination->pitch[0], hip_interop_.hip_mapped_device_mem, hip_interop_.pitch[0],
destination->pitch[0], picture_height, hipMemcpyDeviceToDevice, hip_stream_));
}
}
return ROCJPEG_STATUS_SUCCESS;
}
RocJpegStatus ROCJpegDecoder::CopyChroma(RocJpegImage *destination, uint16_t chroma_height) {
// copy channel1
if (hip_interop_.pitch[1] != 0 && destination->pitch[1] != 0 && destination->channel[1] != nullptr) {
uint32_t chroma_size = destination->pitch[1] * chroma_height;
uint8_t *layer1_mem = hip_interop_.hip_mapped_device_mem + hip_interop_.offset[1];
if (destination->pitch[1] == hip_interop_.pitch[1]) {
CHECK_HIP(hipMemcpyDtoDAsync(destination->channel[1], layer1_mem, chroma_size, hip_stream_));
} else {
CHECK_HIP(hipMemcpy2DAsync(destination->channel[1], destination->pitch[1], layer1_mem, hip_interop_.pitch[1],
destination->pitch[1], chroma_height, hipMemcpyDeviceToDevice, hip_stream_));
}
}
// copy channel2
if (hip_interop_.pitch[2] != 0 && destination->pitch[2] != 0 && destination->channel[2] != nullptr) {
uint32_t chroma_size = destination->pitch[2] * chroma_height;
uint8_t *layer2_mem = hip_interop_.hip_mapped_device_mem + hip_interop_.offset[2];
if (destination->pitch[2] == hip_interop_.pitch[2]) {
CHECK_HIP(hipMemcpyDtoDAsync(destination->channel[2], layer2_mem, chroma_size, hip_stream_));
} else {
CHECK_HIP(hipMemcpy2DAsync(destination->channel[2], destination->pitch[2], layer2_mem, hip_interop_.pitch[2],
destination->pitch[2], chroma_height, hipMemcpyDeviceToDevice, hip_stream_));
}
}
return ROCJPEG_STATUS_SUCCESS;
}
RocJpegStatus ROCJpegDecoder::GetChromaHeight(uint16_t picture_height, uint16_t &chroma_height) {
switch (hip_interop_.surface_format) {
case VA_FOURCC_NV12: /*NV12: two-plane 8-bit YUV 4:2:0*/
chroma_height = picture_height >> 1;
break;
case VA_FOURCC_444P: /*444P: three-plane 8-bit YUV 4:4:4*/
chroma_height = picture_height;
break;
case VA_FOURCC_Y800: /*Y800: one-plane 8-bit greyscale YUV 4:0:0*/
chroma_height = 0;
break;
case ROCJPEG_FOURCC_YUYV: /*YUYV: one-plane packed 8-bit YUV 4:2:2. Four bytes per pair of pixels: Y, U, Y, V*/
chroma_height = picture_height;
break;
default:
return ROCJPEG_STATUS_JPEG_NOT_SUPPORTED;
}
return ROCJPEG_STATUS_SUCCESS;
}
RocJpegStatus ROCJpegDecoder::ColorConvertToRGB(uint32_t picture_width, uint32_t picture_height, RocJpegImage *destination) {
switch (hip_interop_.surface_format) {
case VA_FOURCC_444P:
ColorConvertYUV444ToRGB(hip_stream_, picture_width, picture_height, destination->channel[0], destination->pitch[0],
hip_interop_.hip_mapped_device_mem, hip_interop_.pitch[0], hip_interop_.offset[1]);
break;
case ROCJPEG_FOURCC_YUYV:
ColorConvertYUYVToRGB(hip_stream_, picture_width, picture_height, destination->channel[0], destination->pitch[0],
hip_interop_.hip_mapped_device_mem, hip_interop_.pitch[0]);
break;
case VA_FOURCC_NV12:
ColorConvertNV12ToRGB(hip_stream_, picture_width, picture_height, destination->channel[0], destination->pitch[0],
hip_interop_.hip_mapped_device_mem, hip_interop_.pitch[0],
hip_interop_.hip_mapped_device_mem + hip_interop_.offset[1], hip_interop_.pitch[1]);
break;
case VA_FOURCC_Y800:
ColorConvertYUV400ToRGB(hip_stream_, picture_width, picture_height, destination->channel[0], destination->pitch[0],
hip_interop_.hip_mapped_device_mem, hip_interop_.pitch[0]);
break;
default:
ERR("ERROR! surface format is not supported!");
return ROCJPEG_STATUS_JPEG_NOT_SUPPORTED;
}
return ROCJPEG_STATUS_SUCCESS;
}
RocJpegStatus ROCJpegDecoder::GetPlanarYUVOutputFormat(uint32_t picture_width, uint32_t picture_height, uint16_t chroma_height, RocJpegImage *destination) {
if (hip_interop_.surface_format == ROCJPEG_FOURCC_YUYV) {
// Extract the packed YUYV and copy them into the first, second, and thrid channels of the destination.
ConvertPackedYUYVToPlanarYUV(hip_stream_, picture_width, picture_height, destination->channel[0], destination->channel[1], destination->channel[2],
destination->pitch[0], destination->pitch[1], hip_interop_.hip_mapped_device_mem, hip_interop_.pitch[0]);
} else {
CHECK_ROCJPEG(CopyLuma(destination, picture_height));
if (hip_interop_.surface_format == VA_FOURCC_NV12) {
// Extract the interleaved UV channels and copy them into the second and thrid channels of the destination.
ConvertInterleavedUVToPlanarUV(hip_stream_, picture_width >> 1, picture_height >> 1, destination->channel[1], destination->channel[2],
destination->pitch[1], hip_interop_.hip_mapped_device_mem + hip_interop_.offset[1] , hip_interop_.pitch[1]);
} else {
CHECK_ROCJPEG(CopyChroma(destination, chroma_height));
}
}
return ROCJPEG_STATUS_SUCCESS;
}
RocJpegStatus ROCJpegDecoder::GetYOutputFormat(uint32_t picture_width, uint32_t picture_height, RocJpegImage *destination) {
if (hip_interop_.surface_format == ROCJPEG_FOURCC_YUYV) {
ExtractYFromPackedYUYV(hip_stream_, picture_width, picture_height, destination->channel[0], destination->pitch[0],
hip_interop_.hip_mapped_device_mem, hip_interop_.pitch[0]);
} else {
CHECK_ROCJPEG(CopyLuma(destination, picture_height));
}
return ROCJPEG_STATUS_SUCCESS;
}
+76
Просмотреть файл
@@ -0,0 +1,76 @@
/*
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef ROC_JPEG_DECODER_H_
#define ROC_JPEG_DECODER_H_
#include <unistd.h>
#include <vector>
#include <hip/hip_runtime.h>
#include <mutex>
#include <queue>
#include "../api/rocjpeg.h"
#include "rocjpeg_parser.h"
#include "rocjpeg_commons.h"
#include "rocjpeg_vaapi_decoder.h"
#include "rocjpeg_hip_kernels.h"
struct HipInteropDeviceMem {
hipExternalMemory_t hip_ext_mem; // Interface to the vaapi-hip interop
uint8_t* hip_mapped_device_mem; // Mapped device memory for the YUV plane
uint32_t surface_format; // Pixel format fourcc of the whole surface
uint32_t width; // Width of the surface in pixels.
uint32_t height; // Height of the surface in pixels.
uint32_t offset[3]; // Offset of each plane
uint32_t pitch[3]; // Pitch of each plane
uint32_t num_layers; // Number of layers making up the surface
};
class ROCJpegDecoder {
public:
ROCJpegDecoder(RocJpegBackend backend = ROCJPEG_BACKEND_HARDWARE, int device_id = 0);
~ROCJpegDecoder();
RocJpegStatus InitializeDecoder();
RocJpegStatus GetImageInfo(const uint8_t *data, size_t length, uint8_t *num_components, RocJpegChromaSubsampling *subsampling, uint32_t *widths, uint32_t *heights);
RocJpegStatus Decode(const uint8_t *data, size_t length, RocJpegOutputFormat output_format, RocJpegImage *destination);
private:
RocJpegStatus InitHIP(int device_id);
RocJpegStatus GetHipInteropMem(VADRMPRIMESurfaceDescriptor &va_drm_prime_surface_desc);
RocJpegStatus ReleaseHipInteropMem(VASurfaceID current_surface_id);
RocJpegStatus GetChromaHeight(uint16_t picture_height, uint16_t &chroma_height);
RocJpegStatus CopyLuma(RocJpegImage *destination, uint16_t picture_height);
RocJpegStatus CopyChroma(RocJpegImage *destination, uint16_t chroma_height);
RocJpegStatus ColorConvertToRGB(uint32_t picture_width, uint32_t picture_height, RocJpegImage *destination);
RocJpegStatus GetPlanarYUVOutputFormat(uint32_t picture_width, uint32_t picture_height, uint16_t chroma_height, RocJpegImage *destination);
RocJpegStatus GetYOutputFormat(uint32_t picture_width, uint32_t picture_height, RocJpegImage *destination);
int num_devices_;
int device_id_;
hipDeviceProp_t hip_dev_prop_;
hipStream_t hip_stream_;
std::mutex mutex_;
JpegParser jpeg_parser_;
RocJpegBackend backend_;
RocJpegVappiDecoder jpeg_vaapi_decoder_;
HipInteropDeviceMem hip_interop_;
};
#endif //ROC_JPEG_DECODER_H_
+961
Просмотреть файл
@@ -0,0 +1,961 @@
/*
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "rocjpeg_hip_kernels.h"
__device__ __forceinline__ uint32_t hipPack(float4 src) {
return __builtin_amdgcn_cvt_pk_u8_f32(src.w, 3,
__builtin_amdgcn_cvt_pk_u8_f32(src.z, 2,
__builtin_amdgcn_cvt_pk_u8_f32(src.y, 1,
__builtin_amdgcn_cvt_pk_u8_f32(src.x, 0, 0))));
}
__device__ __forceinline__ float hipUnpack0(uint32_t src) {
return (float)(src & 0xFF);
}
__device__ __forceinline__ float hipUnpack1(uint32_t src) {
return (float)((src >> 8) & 0xFF);
}
__device__ __forceinline__ float hipUnpack2(uint32_t src) {
return (float)((src >> 16) & 0xFF);
}
__device__ __forceinline__ float hipUnpack3(uint32_t src) {
return (float)((src >> 24) & 0xFF);
}
__device__ __forceinline__ float4 hipUnpack(uint32_t src) {
return make_float4(hipUnpack0(src), hipUnpack1(src), hipUnpack2(src), hipUnpack3(src));
}
__global__ void ColorConvertYUV444ToRGBKernel(uint32_t dst_width, uint32_t dst_height, uint8_t *dst_image, uint32_t dst_image_stride_in_bytes,
uint32_t dst_image_stride_in_bytes_comp, const uint8_t *src_y_image, const uint8_t *src_u_image, const uint8_t *src_v_image,
uint32_t src_yuv_image_stride_in_bytes, uint32_t dst_width_comp, uint32_t dst_height_comp, uint32_t src_yuv_image_stride_in_bytes_comp) {
int32_t x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
int32_t y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
if ((x < dst_width_comp) && (y < dst_height_comp)) {
uint32_t src_y0_idx = y * src_yuv_image_stride_in_bytes_comp + (x << 3);
uint32_t src_y1_idx = src_y0_idx + src_yuv_image_stride_in_bytes;
uint2 y0 = *((uint2 *)(&src_y_image[src_y0_idx]));
uint2 y1 = *((uint2 *)(&src_y_image[src_y1_idx]));
uint2 u0 = *((uint2 *)(&src_u_image[src_y0_idx]));
uint2 u1 = *((uint2 *)(&src_u_image[src_y1_idx]));
uint2 v0 = *((uint2 *)(&src_v_image[src_y0_idx]));
uint2 v1 = *((uint2 *)(&src_v_image[src_y1_idx]));
uint32_t rgb0_idx = y * dst_image_stride_in_bytes_comp + (x * 24);
uint32_t rgb1_idx = rgb0_idx + dst_image_stride_in_bytes;
float2 cr = make_float2( 0.0000f, 1.5748f);
float2 cg = make_float2(-0.1873f, -0.4681f);
float2 cb = make_float2( 1.8556f, 0.0000f);
float3 yuv;
DUINT6 rgb0, rgb1;
float4 f;
yuv.x = hipUnpack0(y0.x);
yuv.y = hipUnpack0(u0.x);
yuv.z = hipUnpack0(v0.x);
yuv.y -= 128.0f;
yuv.z -= 128.0f;
f.x = fmaf(cr.y, yuv.z, yuv.x);
f.y = fmaf(cg.x, yuv.y, yuv.x);
f.y = fmaf(cg.y, yuv.z, f.y);
f.z = fmaf(cb.x, yuv.y, yuv.x);
yuv.x = hipUnpack1(y0.x);
yuv.y = hipUnpack1(u0.x);
yuv.z = hipUnpack1(v0.x);
yuv.y -= 128.0f;
yuv.z -= 128.0f;
f.w = fmaf(cr.y, yuv.z, yuv.x);
rgb0.data[0] = hipPack(f);
f.x = fmaf(cg.x, yuv.y, yuv.x);
f.x = fmaf(cg.y, yuv.z, f.x);
f.y = fmaf(cb.x, yuv.y, yuv.x);
yuv.x = hipUnpack2(y0.x);
yuv.y = hipUnpack2(u0.x);
yuv.z = hipUnpack2(v0.x);
yuv.y -= 128.0f;
yuv.z -= 128.0f;
f.z = fmaf(cr.y, yuv.z, yuv.x);
f.w = fmaf(cg.x, yuv.y, yuv.x);
f.w = fmaf(cg.y, yuv.z, f.w);
rgb0.data[1] = hipPack(f);
f.x = fmaf(cb.x, yuv.y, yuv.x);
yuv.x = hipUnpack3(y0.x);
yuv.y = hipUnpack3(u0.x);
yuv.z = hipUnpack3(v0.x);
yuv.y -= 128.0f;
yuv.z -= 128.0f;
f.y = fmaf(cr.y, yuv.z, yuv.x);
f.z = fmaf(cg.x, yuv.y, yuv.x);
f.z = fmaf(cg.y, yuv.z, f.z);
f.w = fmaf(cb.x, yuv.y, yuv.x);
rgb0.data[2] = hipPack(f);
yuv.x = hipUnpack0(y0.y);
yuv.y = hipUnpack0(u0.y);
yuv.z = hipUnpack0(v0.y);
yuv.y -= 128.0f;
yuv.z -= 128.0f;
f.x = fmaf(cr.y, yuv.z, yuv.x);
f.y = fmaf(cg.x, yuv.y, yuv.x);
f.y = fmaf(cg.y, yuv.z, f.y);
f.z = fmaf(cb.x, yuv.y, yuv.x);
yuv.x = hipUnpack1(y0.y);
yuv.y = hipUnpack1(u0.y);
yuv.z = hipUnpack1(v0.y);
yuv.y -= 128.0f;
yuv.z -= 128.0f;
f.w = fmaf(cr.y, yuv.z, yuv.x);
rgb0.data[3] = hipPack(f);
f.x = fmaf(cg.x, yuv.y, yuv.x);
f.x = fmaf(cg.y, yuv.z, f.x);
f.y = fmaf(cb.x, yuv.y, yuv.x);
yuv.x = hipUnpack2(y0.y);
yuv.y = hipUnpack2(u0.y);
yuv.z = hipUnpack2(v0.y);
yuv.y -= 128.0f;
yuv.z -= 128.0f;
f.z = fmaf(cr.y, yuv.z, yuv.x);
f.w = fmaf(cg.x, yuv.y, yuv.x);
f.w = fmaf(cg.y, yuv.z, f.w);
rgb0.data[4] = hipPack(f);
f.x = fmaf(cb.x, yuv.y, yuv.x);
yuv.x = hipUnpack3(y0.y);
yuv.y = hipUnpack3(u0.y);
yuv.z = hipUnpack3(v0.y);
yuv.y -= 128.0f;
yuv.z -= 128.0f;
f.y = fmaf(cr.y, yuv.z, yuv.x);
f.z = fmaf(cg.x, yuv.y, yuv.x);
f.z = fmaf(cg.y, yuv.z, f.z);
f.w = fmaf(cb.x, yuv.y, yuv.x);
rgb0.data[5] = hipPack(f);
yuv.x = hipUnpack0(y1.x);
yuv.y = hipUnpack0(u1.x);
yuv.z = hipUnpack0(v1.x);
yuv.y -= 128.0f;
yuv.z -= 128.0f;
f.x = fmaf(cr.y, yuv.z, yuv.x);
f.y = fmaf(cg.x, yuv.y, yuv.x);
f.y = fmaf(cg.y, yuv.z, f.y);
f.z = fmaf(cb.x, yuv.y, yuv.x);
yuv.x = hipUnpack1(y1.x);
yuv.y = hipUnpack1(u1.x);
yuv.z = hipUnpack1(v1.x);
yuv.y -= 128.0f;
yuv.z -= 128.0f;
f.w = fmaf(cr.y, yuv.z, yuv.x);
rgb1.data[0] = hipPack(f);
f.x = fmaf(cg.x, yuv.y, yuv.x);
f.x = fmaf(cg.y, yuv.z, f.x);
f.y = fmaf(cb.x, yuv.y, yuv.x);
yuv.x = hipUnpack2(y1.x);
yuv.y = hipUnpack2(u1.x);
yuv.z = hipUnpack2(v1.x);
yuv.y -= 128.0f;
yuv.z -= 128.0f;
f.z = fmaf(cr.y, yuv.z, yuv.x);
f.w = fmaf(cg.x, yuv.y, yuv.x);
f.w = fmaf(cg.y, yuv.z, f.w);
rgb1.data[1] = hipPack(f);
f.x = fmaf(cb.x, yuv.y, yuv.x);
yuv.x = hipUnpack3(y1.x);
yuv.y = hipUnpack3(u1.x);
yuv.z = hipUnpack3(v1.x);
yuv.y -= 128.0f;
yuv.z -= 128.0f;
f.y = fmaf(cr.y, yuv.z, yuv.x);
f.z = fmaf(cg.x, yuv.y, yuv.x);
f.z = fmaf(cg.y, yuv.z, f.z);
f.w = fmaf(cb.x, yuv.y, yuv.x);
rgb1.data[2] = hipPack(f);
yuv.x = hipUnpack0(y1.y);
yuv.y = hipUnpack0(u1.y);
yuv.z = hipUnpack0(v1.y);
yuv.y -= 128.0f;
yuv.z -= 128.0f;
f.x = fmaf(cr.y, yuv.z, yuv.x);
f.y = fmaf(cg.x, yuv.y, yuv.x);
f.y = fmaf(cg.y, yuv.z, f.y);
f.z = fmaf(cb.x, yuv.y, yuv.x);
yuv.x = hipUnpack1(y1.y);
yuv.y = hipUnpack1(u1.y);
yuv.z = hipUnpack1(v1.y);
yuv.y -= 128.0f;
yuv.z -= 128.0f;
f.w = fmaf(cr.y, yuv.z, yuv.x);
rgb1.data[3] = hipPack(f);
f.x = fmaf(cg.x, yuv.y, yuv.x);
f.x = fmaf(cg.y, yuv.z, f.x);
f.y = fmaf(cb.x, yuv.y, yuv.x);
yuv.x = hipUnpack2(y1.y);
yuv.y = hipUnpack2(u1.y);
yuv.z = hipUnpack2(v1.y);
yuv.y -= 128.0f;
yuv.z -= 128.0f;
f.z = fmaf(cr.y, yuv.z, yuv.x);
f.w = fmaf(cg.x, yuv.y, yuv.x);
f.w = fmaf(cg.y, yuv.z, f.w);
rgb1.data[4] = hipPack(f);
f.x = fmaf(cb.x, yuv.y, yuv.x);
yuv.x = hipUnpack3(y1.y);
yuv.y = hipUnpack3(u1.y);
yuv.z = hipUnpack3(v1.y);
yuv.y -= 128.0f;
yuv.z -= 128.0f;
f.y = fmaf(cr.y, yuv.z, yuv.x);
f.z = fmaf(cg.x, yuv.y, yuv.x);
f.z = fmaf(cg.y, yuv.z, f.z);
f.w = fmaf(cb.x, yuv.y, yuv.x);
rgb1.data[5] = hipPack(f);
*((DUINT6 *)(&dst_image[rgb0_idx])) = rgb0;
*((DUINT6 *)(&dst_image[rgb1_idx])) = rgb1;
}
}
void ColorConvertYUV444ToRGB(hipStream_t stream, uint32_t dst_width, uint32_t dst_height,
uint8_t *dst_image, uint32_t dst_image_stride_in_bytes, const uint8_t *src_yuv_image,
uint32_t src_yuv_image_stride_in_bytes, uint32_t src_u_image_offset) {
int32_t local_threads_x = 16;
int32_t local_threads_y = 4;
int32_t global_threads_x = (dst_width + 7) >> 3;
int32_t global_threads_y = (dst_height + 1) >> 1;
uint32_t dst_width_comp = (dst_width + 7) / 8;
uint32_t dst_height_comp = (dst_height + 1) / 2;
uint32_t dst_image_stride_in_bytes_comp = dst_image_stride_in_bytes * 2;
uint32_t src_yuv_image_stride_in_bytes_comp = src_yuv_image_stride_in_bytes * 2;
ColorConvertYUV444ToRGBKernel<<<dim3(ceil(static_cast<float>(global_threads_x) / local_threads_x), ceil(static_cast<float>(global_threads_y) / local_threads_y)),
dim3(local_threads_x, local_threads_y), 0, stream>>>(dst_width, dst_height, (uint8_t *)dst_image,
dst_image_stride_in_bytes, dst_image_stride_in_bytes_comp, src_yuv_image, src_yuv_image + src_u_image_offset,
src_yuv_image + (src_u_image_offset * 2), src_yuv_image_stride_in_bytes,
dst_width_comp, dst_height_comp, src_yuv_image_stride_in_bytes_comp);
}
__global__ void ColorConvertYUYVToRGBKernel(uint32_t dst_width, uint32_t dst_height,
uint8_t *dst_image, uint32_t dst_image_stride_in_bytes, uint32_t dst_image_stride_in_bytes_comp,
const uint8_t *src_image, uint32_t src_image_stride_in_bytes, uint32_t src_image_stride_in_bytes_comp,
uint32_t dst_width_comp, uint32_t dst_height_comp) {
int32_t x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
int32_t y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
if ((x < dst_width_comp) && (y < dst_height_comp)) {
uint32_t l0_idx = y * src_image_stride_in_bytes_comp + (x << 4);
uint32_t l1_idx = l0_idx + src_image_stride_in_bytes;
uint4 l0 = *((uint4 *)(&src_image[l0_idx]));
uint4 l1 = *((uint4 *)(&src_image[l1_idx]));
uint32_t rgb0_idx = y * dst_image_stride_in_bytes_comp + (x * 24);
uint32_t rgb1_idx = rgb0_idx + dst_image_stride_in_bytes;
float4 f;
uint2 py0, py1;
uint2 pu0, pu1;
uint2 pv0, pv1;
py0.x = hipPack(make_float4(hipUnpack0(l0.x), hipUnpack2(l0.x), hipUnpack0(l0.y), hipUnpack2(l0.y)));
py0.y = hipPack(make_float4(hipUnpack0(l0.z), hipUnpack2(l0.z), hipUnpack0(l0.w), hipUnpack2(l0.w)));
py1.x = hipPack(make_float4(hipUnpack0(l1.x), hipUnpack2(l1.x), hipUnpack0(l1.y), hipUnpack2(l1.y)));
py1.y = hipPack(make_float4(hipUnpack0(l1.z), hipUnpack2(l1.z), hipUnpack0(l1.w), hipUnpack2(l1.w)));
pu0.x = hipPack(make_float4(hipUnpack1(l0.x), hipUnpack1(l0.x), hipUnpack1(l0.y), hipUnpack1(l0.y)));
pu0.y = hipPack(make_float4(hipUnpack1(l0.z), hipUnpack1(l0.z), hipUnpack1(l0.w), hipUnpack1(l0.w)));
pu1.x = hipPack(make_float4(hipUnpack1(l1.x), hipUnpack1(l1.x), hipUnpack1(l1.y), hipUnpack1(l1.y)));
pu1.y = hipPack(make_float4(hipUnpack1(l1.z), hipUnpack1(l1.z), hipUnpack1(l1.w), hipUnpack1(l1.w)));
pv0.x = hipPack(make_float4(hipUnpack3(l0.x), hipUnpack3(l0.x), hipUnpack3(l0.y), hipUnpack3(l0.y)));
pv0.y = hipPack(make_float4(hipUnpack3(l0.z), hipUnpack3(l0.z), hipUnpack3(l0.w), hipUnpack3(l0.w)));
pv1.x = hipPack(make_float4(hipUnpack3(l1.x), hipUnpack3(l1.x), hipUnpack3(l1.y), hipUnpack3(l1.y)));
pv1.y = hipPack(make_float4(hipUnpack3(l1.z), hipUnpack3(l1.z), hipUnpack3(l1.w), hipUnpack3(l1.w)));
float2 cr = make_float2( 0.0000f, 1.5748f);
float2 cg = make_float2(-0.1873f, -0.4681f);
float2 cb = make_float2( 1.8556f, 0.0000f);
float3 yuv;
DUINT6 prgb0, prgb1;
yuv.x = hipUnpack0(py0.x);
yuv.y = hipUnpack0(pu0.x);
yuv.z = hipUnpack0(pv0.x);
yuv.y -= 128.0f;
yuv.z -= 128.0f;
f.x = fmaf(cr.y, yuv.z, yuv.x);
f.y = fmaf(cg.x, yuv.y, yuv.x);
f.y = fmaf(cg.y, yuv.z, f.y);
f.z = fmaf(cb.x, yuv.y, yuv.x);
yuv.x = hipUnpack1(py0.x);
yuv.y = hipUnpack1(pu0.x);
yuv.z = hipUnpack1(pv0.x);
yuv.y -= 128.0f;
yuv.z -= 128.0f;
f.w = fmaf(cr.y, yuv.z, yuv.x);
prgb0.data[0] = hipPack(f);
f.x = fmaf(cg.x, yuv.y, yuv.x);
f.x = fmaf(cg.y, yuv.z, f.x);
f.y = fmaf(cb.x, yuv.y, yuv.x);
yuv.x = hipUnpack2(py0.x);
yuv.y = hipUnpack2(pu0.x);
yuv.z = hipUnpack2(pv0.x);
yuv.y -= 128.0f;
yuv.z -= 128.0f;
f.z = fmaf(cr.y, yuv.z, yuv.x);
f.w = fmaf(cg.x, yuv.y, yuv.x);
f.w = fmaf(cg.y, yuv.z, f.w);
prgb0.data[1] = hipPack(f);
f.x = fmaf(cb.x, yuv.y, yuv.x);
yuv.x = hipUnpack3(py0.x);
yuv.y = hipUnpack3(pu0.x);
yuv.z = hipUnpack3(pv0.x);
yuv.y -= 128.0f;
yuv.z -= 128.0f;
f.y = fmaf(cr.y, yuv.z, yuv.x);
f.z = fmaf(cg.x, yuv.y, yuv.x);
f.z = fmaf(cg.y, yuv.z, f.z);
f.w = fmaf(cb.x, yuv.y, yuv.x);
prgb0.data[2] = hipPack(f);
yuv.x = hipUnpack0(py0.y);
yuv.y = hipUnpack0(pu0.y);
yuv.z = hipUnpack0(pv0.y);
yuv.y -= 128.0f;
yuv.z -= 128.0f;
f.x = fmaf(cr.y, yuv.z, yuv.x);
f.y = fmaf(cg.x, yuv.y, yuv.x);
f.y = fmaf(cg.y, yuv.z, f.y);
f.z = fmaf(cb.x, yuv.y, yuv.x);
yuv.x = hipUnpack1(py0.y);
yuv.y = hipUnpack1(pu0.y);
yuv.z = hipUnpack1(pv0.y);
yuv.y -= 128.0f;
yuv.z -= 128.0f;
f.w = fmaf(cr.y, yuv.z, yuv.x);
prgb0.data[3] = hipPack(f);
f.x = fmaf(cg.x, yuv.y, yuv.x);
f.x = fmaf(cg.y, yuv.z, f.x);
f.y = fmaf(cb.x, yuv.y, yuv.x);
yuv.x = hipUnpack2(py0.y);
yuv.y = hipUnpack2(pu0.y);
yuv.z = hipUnpack2(pv0.y);
yuv.y -= 128.0f;
yuv.z -= 128.0f;
f.z = fmaf(cr.y, yuv.z, yuv.x);
f.w = fmaf(cg.x, yuv.y, yuv.x);
f.w = fmaf(cg.y, yuv.z, f.w);
prgb0.data[4] = hipPack(f);
f.x = fmaf(cb.x, yuv.y, yuv.x);
yuv.x = hipUnpack3(py0.y);
yuv.y = hipUnpack3(pu0.y);
yuv.z = hipUnpack3(pv0.y);
yuv.y -= 128.0f;
yuv.z -= 128.0f;
f.y = fmaf(cr.y, yuv.z, yuv.x);
f.z = fmaf(cg.x, yuv.y, yuv.x);
f.z = fmaf(cg.y, yuv.z, f.z);
f.w = fmaf(cb.x, yuv.y, yuv.x);
prgb0.data[5] = hipPack(f);
yuv.x = hipUnpack0(py1.x);
yuv.y = hipUnpack0(pu1.x);
yuv.z = hipUnpack0(pv1.x);
yuv.y -= 128.0f;
yuv.z -= 128.0f;
f.x = fmaf(cr.y, yuv.z, yuv.x);
f.y = fmaf(cg.x, yuv.y, yuv.x);
f.y = fmaf(cg.y, yuv.z, f.y);
f.z = fmaf(cb.x, yuv.y, yuv.x);
yuv.x = hipUnpack1(py1.x);
yuv.y = hipUnpack1(pu1.x);
yuv.z = hipUnpack1(pv1.x);
yuv.y -= 128.0f;
yuv.z -= 128.0f;
f.w = fmaf(cr.y, yuv.z, yuv.x);
prgb1.data[0] = hipPack(f);
f.x = fmaf(cg.x, yuv.y, yuv.x);
f.x = fmaf(cg.y, yuv.z, f.x);
f.y = fmaf(cb.x, yuv.y, yuv.x);
yuv.x = hipUnpack2(py1.x);
yuv.y = hipUnpack2(pu1.x);
yuv.z = hipUnpack2(pv1.x);
yuv.y -= 128.0f;
yuv.z -= 128.0f;
f.z = fmaf(cr.y, yuv.z, yuv.x);
f.w = fmaf(cg.x, yuv.y, yuv.x);
f.w = fmaf(cg.y, yuv.z, f.w);
prgb1.data[1] = hipPack(f);
f.x = fmaf(cb.x, yuv.y, yuv.x);
yuv.x = hipUnpack3(py1.x);
yuv.y = hipUnpack3(pu1.x);
yuv.z = hipUnpack3(pv1.x);
yuv.y -= 128.0f;
yuv.z -= 128.0f;
f.y = fmaf(cr.y, yuv.z, yuv.x);
f.z = fmaf(cg.x, yuv.y, yuv.x);
f.z = fmaf(cg.y, yuv.z, f.z);
f.w = fmaf(cb.x, yuv.y, yuv.x);
prgb1.data[2] = hipPack(f);
yuv.x = hipUnpack0(py1.y);
yuv.y = hipUnpack0(pu1.y);
yuv.z = hipUnpack0(pv1.y);
yuv.y -= 128.0f;
yuv.z -= 128.0f;
f.x = fmaf(cr.y, yuv.z, yuv.x);
f.y = fmaf(cg.x, yuv.y, yuv.x);
f.y = fmaf(cg.y, yuv.z, f.y);
f.z = fmaf(cb.x, yuv.y, yuv.x);
yuv.x = hipUnpack1(py1.y);
yuv.y = hipUnpack1(pu1.y);
yuv.z = hipUnpack1(pv1.y);
yuv.y -= 128.0f;
yuv.z -= 128.0f;
f.w = fmaf(cr.y, yuv.z, yuv.x);
prgb1.data[3] = hipPack(f);
f.x = fmaf(cg.x, yuv.y, yuv.x);
f.x = fmaf(cg.y, yuv.z, f.x);
f.y = fmaf(cb.x, yuv.y, yuv.x);
yuv.x = hipUnpack2(py1.y);
yuv.y = hipUnpack2(pu1.y);
yuv.z = hipUnpack2(pv1.y);
yuv.y -= 128.0f;
yuv.z -= 128.0f;
f.z = fmaf(cr.y, yuv.z, yuv.x);
f.w = fmaf(cg.x, yuv.y, yuv.x);
f.w = fmaf(cg.y, yuv.z, f.w);
prgb1.data[4] = hipPack(f);
f.x = fmaf(cb.x, yuv.y, yuv.x);
yuv.x = hipUnpack3(py1.y);
yuv.y = hipUnpack3(pu1.y);
yuv.z = hipUnpack3(pv1.y);
yuv.y -= 128.0f;
yuv.z -= 128.0f;
f.y = fmaf(cr.y, yuv.z, yuv.x);
f.z = fmaf(cg.x, yuv.y, yuv.x);
f.z = fmaf(cg.y, yuv.z, f.z);
f.w = fmaf(cb.x, yuv.y, yuv.x);
prgb1.data[5] = hipPack(f);
*((DUINT6 *)(&dst_image[rgb0_idx])) = prgb0;
*((DUINT6 *)(&dst_image[rgb1_idx])) = prgb1;
}
}
void ColorConvertYUYVToRGB(hipStream_t stream, uint32_t dst_width, uint32_t dst_height,
uint8_t *dst_image, uint32_t dst_image_stride_in_bytes,
const uint8_t *src_image, uint32_t src_image_stride_in_bytes) {
int32_t local_threads_x = 16;
int32_t local_threads_y = 4;
int32_t global_threads_x = (dst_width + 7) >> 3;
int32_t global_threads_y = (dst_height + 1) >> 1;
uint32_t dst_width_comp = (dst_width + 7) / 8;
uint32_t dst_height_comp = (dst_height + 1) / 2;
uint32_t dst_image_stride_in_bytes_comp = dst_image_stride_in_bytes * 2;
uint32_t src_image_stride_in_bytes_comp = src_image_stride_in_bytes * 2;
ColorConvertYUYVToRGBKernel<<<dim3(ceil(static_cast<float>(global_threads_x) / local_threads_x), ceil(static_cast<float>(global_threads_y) / local_threads_y)),
dim3(local_threads_x, local_threads_y), 0, stream>>>(dst_width, dst_height, (uint8_t *)dst_image,
dst_image_stride_in_bytes, dst_image_stride_in_bytes_comp, src_image, src_image_stride_in_bytes,
src_image_stride_in_bytes_comp, dst_width_comp, dst_height_comp);
}
__global__ void ColorConvertNV12ToRGBKernel(uint32_t dst_width, uint32_t dst_height,
uint8_t *dst_image, uint32_t dst_image_stride_in_bytes, uint32_t dst_image_stride_in_bytes_comp,
const uint8_t *src_luma_image, uint32_t src_luma_image_stride_in_bytes,
const uint8_t *src_chroma_image, uint32_t src_chroma_image_stride_in_bytes,
uint32_t dst_width_comp, uint32_t dst_height_comp, uint32_t src_luma_image_stride_in_bytes_comp) {
int32_t x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
int32_t y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
if ((x < dst_width_comp) && (y < dst_height_comp)) {
uint32_t src_y0_idx = y * src_luma_image_stride_in_bytes_comp + (x << 3);
uint32_t src_y1_idx = src_y0_idx + src_luma_image_stride_in_bytes;
uint32_t src_uv_idx = y * src_chroma_image_stride_in_bytes + (x << 3);
uint2 y0 = *((uint2 *)(&src_luma_image[src_y0_idx]));
uint2 y1 = *((uint2 *)(&src_luma_image[src_y1_idx]));
uint2 uv = *((uint2 *)(&src_chroma_image[src_uv_idx]));
uint32_t rgb0_idx = y * dst_image_stride_in_bytes_comp + (x * 24);
uint32_t rgb1_idx = rgb0_idx + dst_image_stride_in_bytes;
float4 f;
uint2 u0, u1;
uint2 v0, v1;
f.x = hipUnpack0(uv.x);
f.y = f.x;
f.z = hipUnpack2(uv.x);
f.w = f.z;
u0.x = hipPack(f);
f.x = hipUnpack0(uv.y);
f.y = f.x;
f.z = hipUnpack2(uv.y);
f.w = f.z;
u0.y = hipPack(f);
u1.x = u0.x;
u1.y = u0.y;
f.x = hipUnpack1(uv.x);
f.y = f.x;
f.z = hipUnpack3(uv.x);
f.w = f.z;
v0.x = hipPack(f);
f.x = hipUnpack1(uv.y);
f.y = f.x;
f.z = hipUnpack3(uv.y);
f.w = f.z;
v0.y = hipPack(f);
v1.x = v0.x;
v1.y = v0.y;
float2 cr = make_float2( 0.0000f, 1.5748f);
float2 cg = make_float2(-0.1873f, -0.4681f);
float2 cb = make_float2( 1.8556f, 0.0000f);
float3 yuv;
DUINT6 rgb0, rgb1;
yuv.x = hipUnpack0(y0.x);
yuv.y = hipUnpack0(u0.x);
yuv.z = hipUnpack0(v0.x);
yuv.y -= 128.0f;
yuv.z -= 128.0f;
f.x = fmaf(cr.y, yuv.z, yuv.x);
f.y = fmaf(cg.x, yuv.y, yuv.x);
f.y = fmaf(cg.y, yuv.z, f.y);
f.z = fmaf(cb.x, yuv.y, yuv.x);
yuv.x = hipUnpack1(y0.x);
yuv.y = hipUnpack1(u0.x);
yuv.z = hipUnpack1(v0.x);
yuv.y -= 128.0f;
yuv.z -= 128.0f;
f.w = fmaf(cr.y, yuv.z, yuv.x);
rgb0.data[0] = hipPack(f);
f.x = fmaf(cg.x, yuv.y, yuv.x);
f.x = fmaf(cg.y, yuv.z, f.x);
f.y = fmaf(cb.x, yuv.y, yuv.x);
yuv.x = hipUnpack2(y0.x);
yuv.y = hipUnpack2(u0.x);
yuv.z = hipUnpack2(v0.x);
yuv.y -= 128.0f;
yuv.z -= 128.0f;
f.z = fmaf(cr.y, yuv.z, yuv.x);
f.w = fmaf(cg.x, yuv.y, yuv.x);
f.w = fmaf(cg.y, yuv.z, f.w);
rgb0.data[1] = hipPack(f);
f.x = fmaf(cb.x, yuv.y, yuv.x);
yuv.x = hipUnpack3(y0.x);
yuv.y = hipUnpack3(u0.x);
yuv.z = hipUnpack3(v0.x);
yuv.y -= 128.0f;
yuv.z -= 128.0f;
f.y = fmaf(cr.y, yuv.z, yuv.x);
f.z = fmaf(cg.x, yuv.y, yuv.x);
f.z = fmaf(cg.y, yuv.z, f.z);
f.w = fmaf(cb.x, yuv.y, yuv.x);
rgb0.data[2] = hipPack(f);
yuv.x = hipUnpack0(y0.y);
yuv.y = hipUnpack0(u0.y);
yuv.z = hipUnpack0(v0.y);
yuv.y -= 128.0f;
yuv.z -= 128.0f;
f.x = fmaf(cr.y, yuv.z, yuv.x);
f.y = fmaf(cg.x, yuv.y, yuv.x);
f.y = fmaf(cg.y, yuv.z, f.y);
f.z = fmaf(cb.x, yuv.y, yuv.x);
yuv.x = hipUnpack1(y0.y);
yuv.y = hipUnpack1(u0.y);
yuv.z = hipUnpack1(v0.y);
yuv.y -= 128.0f;
yuv.z -= 128.0f;
f.w = fmaf(cr.y, yuv.z, yuv.x);
rgb0.data[3] = hipPack(f);
f.x = fmaf(cg.x, yuv.y, yuv.x);
f.x = fmaf(cg.y, yuv.z, f.x);
f.y = fmaf(cb.x, yuv.y, yuv.x);
yuv.x = hipUnpack2(y0.y);
yuv.y = hipUnpack2(u0.y);
yuv.z = hipUnpack2(v0.y);
yuv.y -= 128.0f;
yuv.z -= 128.0f;
f.z = fmaf(cr.y, yuv.z, yuv.x);
f.w = fmaf(cg.x, yuv.y, yuv.x);
f.w = fmaf(cg.y, yuv.z, f.w);
rgb0.data[4] = hipPack(f);
f.x = fmaf(cb.x, yuv.y, yuv.x);
yuv.x = hipUnpack3(y0.y);
yuv.y = hipUnpack3(u0.y);
yuv.z = hipUnpack3(v0.y);
yuv.y -= 128.0f;
yuv.z -= 128.0f;
f.y = fmaf(cr.y, yuv.z, yuv.x);
f.z = fmaf(cg.x, yuv.y, yuv.x);
f.z = fmaf(cg.y, yuv.z, f.z);
f.w = fmaf(cb.x, yuv.y, yuv.x);
rgb0.data[5] = hipPack(f);
yuv.x = hipUnpack0(y1.x);
yuv.y = hipUnpack0(u1.x);
yuv.z = hipUnpack0(v1.x);
yuv.y -= 128.0f;
yuv.z -= 128.0f;
f.x = fmaf(cr.y, yuv.z, yuv.x);
f.y = fmaf(cg.x, yuv.y, yuv.x);
f.y = fmaf(cg.y, yuv.z, f.y);
f.z = fmaf(cb.x, yuv.y, yuv.x);
yuv.x = hipUnpack1(y1.x);
yuv.y = hipUnpack1(u1.x);
yuv.z = hipUnpack1(v1.x);
yuv.y -= 128.0f;
yuv.z -= 128.0f;
f.w = fmaf(cr.y, yuv.z, yuv.x);
rgb1.data[0] = hipPack(f);
f.x = fmaf(cg.x, yuv.y, yuv.x);
f.x = fmaf(cg.y, yuv.z, f.x);
f.y = fmaf(cb.x, yuv.y, yuv.x);
yuv.x = hipUnpack2(y1.x);
yuv.y = hipUnpack2(u1.x);
yuv.z = hipUnpack2(v1.x);
yuv.y -= 128.0f;
yuv.z -= 128.0f;
f.z = fmaf(cr.y, yuv.z, yuv.x);
f.w = fmaf(cg.x, yuv.y, yuv.x);
f.w = fmaf(cg.y, yuv.z, f.w);
rgb1.data[1] = hipPack(f);
f.x = fmaf(cb.x, yuv.y, yuv.x);
yuv.x = hipUnpack3(y1.x);
yuv.y = hipUnpack3(u1.x);
yuv.z = hipUnpack3(v1.x);
yuv.y -= 128.0f;
yuv.z -= 128.0f;
f.y = fmaf(cr.y, yuv.z, yuv.x);
f.z = fmaf(cg.x, yuv.y, yuv.x);
f.z = fmaf(cg.y, yuv.z, f.z);
f.w = fmaf(cb.x, yuv.y, yuv.x);
rgb1.data[2] = hipPack(f);
yuv.x = hipUnpack0(y1.y);
yuv.y = hipUnpack0(u1.y);
yuv.z = hipUnpack0(v1.y);
yuv.y -= 128.0f;
yuv.z -= 128.0f;
f.x = fmaf(cr.y, yuv.z, yuv.x);
f.y = fmaf(cg.x, yuv.y, yuv.x);
f.y = fmaf(cg.y, yuv.z, f.y);
f.z = fmaf(cb.x, yuv.y, yuv.x);
yuv.x = hipUnpack1(y1.y);
yuv.y = hipUnpack1(u1.y);
yuv.z = hipUnpack1(v1.y);
yuv.y -= 128.0f;
yuv.z -= 128.0f;
f.w = fmaf(cr.y, yuv.z, yuv.x);
rgb1.data[3] = hipPack(f);
f.x = fmaf(cg.x, yuv.y, yuv.x);
f.x = fmaf(cg.y, yuv.z, f.x);
f.y = fmaf(cb.x, yuv.y, yuv.x);
yuv.x = hipUnpack2(y1.y);
yuv.y = hipUnpack2(u1.y);
yuv.z = hipUnpack2(v1.y);
yuv.y -= 128.0f;
yuv.z -= 128.0f;
f.z = fmaf(cr.y, yuv.z, yuv.x);
f.w = fmaf(cg.x, yuv.y, yuv.x);
f.w = fmaf(cg.y, yuv.z, f.w);
rgb1.data[4] = hipPack(f);
f.x = fmaf(cb.x, yuv.y, yuv.x);
yuv.x = hipUnpack3(y1.y);
yuv.y = hipUnpack3(u1.y);
yuv.z = hipUnpack3(v1.y);
yuv.y -= 128.0f;
yuv.z -= 128.0f;
f.y = fmaf(cr.y, yuv.z, yuv.x);
f.z = fmaf(cg.x, yuv.y, yuv.x);
f.z = fmaf(cg.y, yuv.z, f.z);
f.w = fmaf(cb.x, yuv.y, yuv.x);
rgb1.data[5] = hipPack(f);
*((DUINT6 *)(&dst_image[rgb0_idx])) = rgb0;
*((DUINT6 *)(&dst_image[rgb1_idx])) = rgb1;
}
}
void ColorConvertNV12ToRGB(hipStream_t stream, uint32_t dst_width, uint32_t dst_height,
uint8_t *dst_image, uint32_t dst_image_stride_in_bytes,
const uint8_t *src_luma_image, uint32_t src_luma_image_stride_in_bytes,
const uint8_t *src_chroma_image, uint32_t src_chroma_image_stride_in_bytes) {
int32_t local_threads_x = 16;
int32_t local_threads_y = 4;
int32_t global_threads_x = (dst_width + 7) >> 3;
int32_t global_threads_y = (dst_height + 1) >> 1;
uint32_t dst_width_comp = (dst_width + 7) / 8;
uint32_t dst_height_comp = (dst_height + 1) / 2;
uint32_t dst_image_stride_in_bytes_comp = dst_image_stride_in_bytes * 2;
uint32_t src_luma_image_stride_in_bytes_comp = src_luma_image_stride_in_bytes * 2;
ColorConvertNV12ToRGBKernel<<<dim3(ceil(static_cast<float>(global_threads_x) / local_threads_x), ceil(static_cast<float>(global_threads_y) / local_threads_y)),
dim3(local_threads_x, local_threads_y), 0, stream>>>(dst_width, dst_height, dst_image, dst_image_stride_in_bytes,
dst_image_stride_in_bytes_comp, src_luma_image, src_luma_image_stride_in_bytes, src_chroma_image,
src_chroma_image_stride_in_bytes, dst_width_comp, dst_height_comp, src_luma_image_stride_in_bytes_comp);
}
__global__ void ColorConvertYUV400ToRGBKernel(uint32_t dst_width, uint32_t dst_height,
uint8_t *dst_image, uint32_t dst_image_stride_in_bytes, uint32_t dst_image_stride_in_bytes_comp,
const uint8_t *src_luma_image, uint32_t src_luma_image_stride_in_bytes,
uint32_t dst_width_comp, uint32_t dst_height_comp, uint32_t src_luma_image_stride_in_bytes_comp) {
int32_t x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
int32_t y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
if ((x < dst_width_comp) && (y < dst_height_comp)) {
uint32_t src_y0_idx = y * src_luma_image_stride_in_bytes_comp + (x << 3);
uint32_t src_y1_idx = src_y0_idx + src_luma_image_stride_in_bytes;
uint2 y0 = *((uint2 *)(&src_luma_image[src_y0_idx]));
uint2 y1 = *((uint2 *)(&src_luma_image[src_y1_idx]));
uint32_t rgb0_idx = y * dst_image_stride_in_bytes_comp + (x * 24);
uint32_t rgb1_idx = rgb0_idx + dst_image_stride_in_bytes;
DUINT6 rgb0, rgb1;
uint8_t y0_b0, y0_b1, y0_b2, y0_b3, y0_b4, y0_b5, y0_b6, y0_b7;
uint8_t y1_b0, y1_b1, y1_b2, y1_b3, y1_b4, y1_b5, y1_b6, y1_b7;
y0_b0 = hipUnpack0(y0.x);
y0_b1 = hipUnpack1(y0.x);
y0_b2 = hipUnpack2(y0.x);
y0_b3 = hipUnpack3(y0.x);
y0_b4 = hipUnpack0(y0.y);
y0_b5 = hipUnpack1(y0.y);
y0_b6 = hipUnpack2(y0.y);
y0_b7 = hipUnpack3(y0.y);
y1_b0 = hipUnpack0(y1.x);
y1_b1 = hipUnpack1(y1.x);
y1_b2 = hipUnpack2(y1.x);
y1_b3 = hipUnpack3(y1.x);
y1_b4 = hipUnpack0(y1.y);
y1_b5 = hipUnpack1(y1.y);
y1_b6 = hipUnpack2(y1.y);
y1_b7 = hipUnpack3(y1.y);
rgb0.data[0] = hipPack(make_float4(y0_b0, y0_b0, y0_b0, y0_b1));
rgb0.data[1] = hipPack(make_float4(y0_b1, y0_b1, y0_b2, y0_b2));
rgb0.data[2] = hipPack(make_float4(y0_b2, y0_b3, y0_b3, y0_b3));
rgb0.data[3] = hipPack(make_float4(y0_b4, y0_b4, y0_b4, y0_b5));
rgb0.data[4] = hipPack(make_float4(y0_b5, y0_b5, y0_b6, y0_b6));
rgb0.data[5] = hipPack(make_float4(y0_b6, y0_b7, y0_b7, y0_b7));
rgb1.data[0] = hipPack(make_float4(y1_b0, y1_b0, y1_b0, y1_b1));
rgb1.data[1] = hipPack(make_float4(y1_b1, y1_b1, y1_b2, y1_b2));
rgb1.data[2] = hipPack(make_float4(y1_b2, y1_b3, y1_b3, y1_b3));
rgb1.data[3] = hipPack(make_float4(y1_b4, y1_b4, y1_b4, y1_b5));
rgb1.data[4] = hipPack(make_float4(y1_b5, y1_b5, y1_b6, y1_b6));
rgb1.data[5] = hipPack(make_float4(y1_b6, y1_b7, y1_b7, y1_b7));
*((DUINT6 *)(&dst_image[rgb0_idx])) = rgb0;
*((DUINT6 *)(&dst_image[rgb1_idx])) = rgb1;
}
}
void ColorConvertYUV400ToRGB(hipStream_t stream, uint32_t dst_width, uint32_t dst_height,
uint8_t *dst_image, uint32_t dst_image_stride_in_bytes,
const uint8_t *src_luma_image, uint32_t src_luma_image_stride_in_bytes){
int32_t local_threads_x = 16;
int32_t local_threads_y = 4;
int32_t global_threads_x = (dst_width + 7) >> 3;
int32_t global_threads_y = (dst_height + 1) >> 1;
uint32_t dst_width_comp = (dst_width + 7) / 8;
uint32_t dst_height_comp = (dst_height + 1) / 2;
uint32_t dst_image_stride_in_bytes_comp = dst_image_stride_in_bytes * 2;
uint32_t src_luma_image_stride_in_bytes_comp = src_luma_image_stride_in_bytes * 2;
ColorConvertYUV400ToRGBKernel<<<dim3(ceil(static_cast<float>(global_threads_x) / local_threads_x), ceil(static_cast<float>(global_threads_y) / local_threads_y)),
dim3(local_threads_x, local_threads_y), 0, stream>>>(dst_width, dst_height, dst_image, dst_image_stride_in_bytes,
dst_image_stride_in_bytes_comp, src_luma_image, src_luma_image_stride_in_bytes, dst_width_comp, dst_height_comp,
src_luma_image_stride_in_bytes_comp);
}
__global__ void ConvertInterleavedUVToPlanarUVKernel(uint32_t dst_width, uint32_t dst_height,
uint8_t *dst_image1, uint8_t *dst_image2, uint32_t dst_image_stride_in_bytes,
const uint8_t *src_image, uint32_t src_image_stride_in_bytes) {
int32_t x = (hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x) * 8;
int32_t y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
if (x >= dst_width || y >= dst_height) {
return;
}
uint32_t src_idx = y * src_image_stride_in_bytes + x + x;
uint32_t dst_idx = y * dst_image_stride_in_bytes + x;
uint4 src = *((uint4 *)(&src_image[src_idx]));
uint2 dst1, dst2;
dst1.x = hipPack(make_float4(hipUnpack0(src.x), hipUnpack2(src.x), hipUnpack0(src.y), hipUnpack2(src.y)));
dst1.y = hipPack(make_float4(hipUnpack0(src.z), hipUnpack2(src.z), hipUnpack0(src.w), hipUnpack2(src.w)));
dst2.x = hipPack(make_float4(hipUnpack1(src.x), hipUnpack3(src.x), hipUnpack1(src.y), hipUnpack3(src.y)));
dst2.y = hipPack(make_float4(hipUnpack1(src.z), hipUnpack3(src.z), hipUnpack1(src.w), hipUnpack3(src.w)));
*((uint2 *)(&dst_image1[dst_idx])) = dst1;
*((uint2 *)(&dst_image2[dst_idx])) = dst2;
}
void ConvertInterleavedUVToPlanarUV(hipStream_t stream, uint32_t dst_width, uint32_t dst_height,
uint8_t *dst_image1, uint8_t *dst_image2, uint32_t dst_image_stride_in_bytes,
const uint8_t *src_image1, uint32_t src_image1_stride_in_bytes) {
int32_t local_threads_x = 16, local_threads_y = 16;
int32_t global_threads_x = (dst_width + 7) >> 3;
int32_t global_threads_y = dst_height;
ConvertInterleavedUVToPlanarUVKernel<<<dim3(ceil(static_cast<float>(global_threads_x) / local_threads_x), ceil(static_cast<float>(global_threads_y) / local_threads_y)),
dim3(local_threads_x, local_threads_y), 0, stream>>>(dst_width, dst_height, dst_image1, dst_image2,
dst_image_stride_in_bytes, src_image1, src_image1_stride_in_bytes);
}
__global__ void ExtractYFromPackedYUYVKernel(uint32_t dst_width, uint32_t dst_height,
uint8_t *destination_y, uint32_t dst_luma_stride_in_bytes,
const uint8_t *src_image, uint32_t src_image_stride_in_bytes,
uint32_t dst_width_comp) {
int32_t x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
int32_t y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
if (x < dst_width_comp && y < dst_height) {
uint32_t src_idx = y * src_image_stride_in_bytes + (x << 4);
uint32_t dst_idx = y * dst_luma_stride_in_bytes + (x << 3);
uint4 src = *((uint4 *)(&src_image[src_idx]));
uint2 dst_y;
dst_y.x = hipPack(make_float4(hipUnpack0(src.x), hipUnpack2(src.x), hipUnpack0(src.y), hipUnpack2(src.y)));
dst_y.y = hipPack(make_float4(hipUnpack0(src.z), hipUnpack2(src.z), hipUnpack0(src.w), hipUnpack2(src.w)));
*((uint2 *)(&destination_y[dst_idx])) = dst_y;
}
}
void ExtractYFromPackedYUYV(hipStream_t stream, uint32_t dst_width, uint32_t dst_height,
uint8_t *destination_y, uint32_t dst_luma_stride_in_bytes, const uint8_t *src_image, uint32_t src_image_stride_in_bytes) {
int32_t local_threads_x = 16;
int32_t local_threads_y = 4;
int32_t global_threads_x = (dst_width + 7) >> 3;
int32_t global_threads_y = dst_height;
uint32_t dst_width_comp = (dst_width + 7) / 8;
ExtractYFromPackedYUYVKernel<<<dim3(ceil(static_cast<float>(global_threads_x) / local_threads_x), ceil(static_cast<float>(global_threads_y) / local_threads_y)),
dim3(local_threads_x, local_threads_y), 0, stream>>>(dst_width, dst_height, destination_y,
dst_luma_stride_in_bytes, src_image, src_image_stride_in_bytes, dst_width_comp);
}
__global__ void ConvertPackedYUYVToPlanarYUVKernel(uint32_t dst_width, uint32_t dst_height,
uint8_t *destination_y, uint8_t *destination_u, uint8_t *destination_v, uint32_t dst_luma_stride_in_bytes, uint32_t dst_chroma_stride_in_bytes,
const uint8_t *src_image, uint32_t src_image_stride_in_bytes,
uint32_t dst_width_comp) {
int32_t x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
int32_t y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
if ((x < dst_width_comp && y < dst_height)) {
uint32_t src_idx = y * src_image_stride_in_bytes + (x << 4);
uint32_t dst_y_idx = y * dst_luma_stride_in_bytes + (x << 3);
uint32_t dst_uv_idx = y * dst_chroma_stride_in_bytes + (x << 2);
uint4 src = *((uint4 *)(&src_image[src_idx]));
uint2 dst_y;
uint32_t dst_u, dst_v;
dst_y.x = hipPack(make_float4(hipUnpack0(src.x), hipUnpack2(src.x), hipUnpack0(src.y), hipUnpack2(src.y)));
dst_y.y = hipPack(make_float4(hipUnpack0(src.z), hipUnpack2(src.z), hipUnpack0(src.w), hipUnpack2(src.w)));
dst_u = hipPack(make_float4(hipUnpack1(src.x), hipUnpack1(src.y), hipUnpack1(src.z), hipUnpack1(src.w)));
dst_v = hipPack(make_float4(hipUnpack3(src.x), hipUnpack3(src.y), hipUnpack3(src.z), hipUnpack3(src.w)));
*((uint2 *)(&destination_y[dst_y_idx])) = dst_y;
*((uint32_t *)(&destination_u[dst_uv_idx])) = dst_u;
*((uint32_t *)(&destination_v[dst_uv_idx])) = dst_v;
}
}
void ConvertPackedYUYVToPlanarYUV(hipStream_t stream, uint32_t dst_width, uint32_t dst_height,
uint8_t *destination_y, uint8_t *destination_u, uint8_t *destination_v, uint32_t dst_luma_stride_in_bytes, uint32_t dst_chroma_stride_in_bytes,
const uint8_t *src_image, uint32_t src_image_stride_in_bytes) {
int32_t local_threads_x = 16;
int32_t local_threads_y = 4;
int32_t global_threads_x = (dst_width + 7) >> 3;
int32_t global_threads_y = dst_height;
uint32_t dst_width_comp = (dst_width + 7) / 8;
ConvertPackedYUYVToPlanarYUVKernel<<<dim3(ceil(static_cast<float>(global_threads_x) / local_threads_x), ceil(static_cast<float>(global_threads_y) / local_threads_y)),
dim3(local_threads_x, local_threads_y), 0, stream>>>(dst_width, dst_height, destination_y, destination_u,
destination_v, dst_luma_stride_in_bytes, dst_chroma_stride_in_bytes, src_image, src_image_stride_in_bytes, dst_width_comp);
}
+62
Просмотреть файл
@@ -0,0 +1,62 @@
/*
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef ROC_JPEG_HIP_KERNELS_H_
#define ROC_JPEG_HIP_KERNELS_H_
#pragma once
#include <hip/hip_runtime.h>
void ColorConvertYUV444ToRGB(hipStream_t stream, uint32_t dst_width, uint32_t dst_height,
uint8_t *dst_image, uint32_t dst_image_stride_in_bytes, const uint8_t *src_yuv_image,
uint32_t src_yuv_image_stride_in_bytes, uint32_t src_u_image_offset);
void ColorConvertYUYVToRGB(hipStream_t stream, uint32_t dst_width, uint32_t dst_height,
uint8_t *dst_image, uint32_t dst_image_stride_in_bytes,
const uint8_t *src_image, uint32_t src_image_stride_in_bytes);
void ColorConvertNV12ToRGB(hipStream_t stream, uint32_t dst_width, uint32_t dst_height,
uint8_t *dst_image, uint32_t dst_image_stride_in_bytes,
const uint8_t *src_luma_image, uint32_t src_luma_image_stride_in_bytes,
const uint8_t *src_chroma_image, uint32_t src_chroma_image_stride_in_bytes);
void ColorConvertYUV400ToRGB(hipStream_t stream, uint32_t dst_width, uint32_t dst_height,
uint8_t *dst_image, uint32_t dst_image_stride_in_bytes,
const uint8_t *src_luma_image, uint32_t src_luma_image_stride_in_bytes);
void ConvertInterleavedUVToPlanarUV(hipStream_t stream, uint32_t dst_width, uint32_t dst_height,
uint8_t *dst_image1, uint8_t *dst_image2, uint32_t dst_image_stride_in_bytes,
const uint8_t *src_image1, uint32_t src_image1_stride_in_bytes);
void ExtractYFromPackedYUYV(hipStream_t stream, uint32_t dst_width, uint32_t dst_height,
uint8_t *destination_y, uint32_t dst_luma_stride_in_bytes, const uint8_t *src_image, uint32_t src_image_stride_in_bytes);
void ConvertPackedYUYVToPlanarYUV(hipStream_t stream, uint32_t dst_width, uint32_t dst_height,
uint8_t *destination_y, uint8_t *destination_u, uint8_t *destination_v, uint32_t dst_luma_stride_in_bytes,
uint32_t dst_chroma_stride_in_bytes, const uint8_t *src_image, uint32_t src_image_stride_in_bytes);
typedef struct UINT6TYPE {
uint data[6];
} DUINT6;
#endif //ROC_JPEG_HIP_KERNELS_H_
+387
Просмотреть файл
@@ -0,0 +1,387 @@
/*
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "rocjpeg_parser.h"
JpegParser::JpegParser() : stream_{nullptr}, stream_end_{nullptr}, stream_length_{0},
jpeg_stream_parameters_{{}} {
}
JpegParser::~JpegParser() {
stream_ = nullptr;
stream_end_ = nullptr;
stream_length_ = 0;
}
bool JpegParser::ParseJpegStream(const uint8_t *jpeg_stream, uint32_t jpeg_stream_size) {
if (jpeg_stream == nullptr) {
ERR("invalid argument!");
return false;
}
stream_ = jpeg_stream;
stream_length_ = jpeg_stream_size;
stream_end_ = stream_ + stream_length_;
jpeg_stream_parameters_ = {};
bool soi_marker_found = false;
bool sos_marker_found = false;
bool dht_marker_found = false;
bool dqt_marker_found = false;
uint8_t marker;
const uint8_t *next_chunck;
int32_t chuck_len;
// The first two bytes of a JPEG must be 0XFFD8
if (*stream_ != 0xFF || *(stream_ + 1) != SOI) {
ERR("Invalid JPEG!");
return false;
}
soi_marker_found = ParseSOI();
if (!soi_marker_found) {
ERR("failed to find the SOI marker!");
}
while (!sos_marker_found && stream_ <= stream_end_) {
while ((*stream_ == 0xFF))
stream_++;
marker = *stream_++;
chuck_len = swap_bytes(stream_);
next_chunck = stream_ + chuck_len;
switch (marker) {
case SOF:
if (!ParseSOF())
return false;
break;
case DHT:
if (!ParseDHT())
return false;
dht_marker_found = true;
break;
case DQT:
if (!ParseDQT())
return false;
dqt_marker_found = true;
break;
case DRI:
if (!ParseDRI())
return false;
break;
case SOS:
if (!ParseSOS())
return false;
sos_marker_found = true;
break;
default:
break;
}
stream_ = next_chunck;
}
if (!dht_marker_found) {
ERR("didn't find any Huffman table!");
return false;
}
if (!dqt_marker_found) {
ERR("didn't find any quantization table!");
return false;
}
if (!ParseEOI())
return false;
return true;
}
bool JpegParser::ParseSOI() {
if (stream_ == nullptr) {
return false;
}
while (!(*stream_ == 0xFF && *(stream_ + 1) == SOI)) {
if (stream_ <= stream_end_) {
stream_++;
continue;
} else
return false;
}
stream_ += 2;
return true;
}
bool JpegParser::ParseSOF() {
uint32_t component_id, sampling_factor;
uint8_t quantiser_table_selector;
if (stream_ == nullptr) {
return false;
}
jpeg_stream_parameters_.picture_parameter_buffer.picture_height = swap_bytes(stream_ + 3);
jpeg_stream_parameters_.picture_parameter_buffer.picture_width = swap_bytes(stream_ + 5);
jpeg_stream_parameters_.picture_parameter_buffer.num_components = stream_[7];
if (jpeg_stream_parameters_.picture_parameter_buffer.num_components > NUM_COMPONENTS - 1) {
ERR("invalid number of JPEG components!");
return false;
}
stream_ += 8;
for (int32_t i = 0; i < jpeg_stream_parameters_.picture_parameter_buffer.num_components; i++) {
component_id = *stream_++;
sampling_factor = *stream_++;
quantiser_table_selector = *stream_++;
jpeg_stream_parameters_.picture_parameter_buffer.components[i].component_id = component_id;
if (quantiser_table_selector >= NUM_COMPONENTS) {
ERR("invalid number of the quantization table!");
return false;
}
jpeg_stream_parameters_.picture_parameter_buffer.components[i].v_sampling_factor = sampling_factor & 0xF;
jpeg_stream_parameters_.picture_parameter_buffer.components[i].h_sampling_factor = sampling_factor >> 4;
jpeg_stream_parameters_.picture_parameter_buffer.components[i].quantiser_table_selector = quantiser_table_selector;
}
uint8_t max_h_factor = jpeg_stream_parameters_.picture_parameter_buffer.components[0].h_sampling_factor;
uint8_t max_v_factor = jpeg_stream_parameters_.picture_parameter_buffer.components[0].v_sampling_factor;
jpeg_stream_parameters_.slice_parameter_buffer.num_mcus = ((jpeg_stream_parameters_.picture_parameter_buffer.picture_width + max_h_factor * 8 - 1) / (max_h_factor * 8)) *
((jpeg_stream_parameters_.picture_parameter_buffer.picture_height + max_v_factor * 8 - 1) / (max_v_factor * 8));
jpeg_stream_parameters_.chroma_subsampling = GetChromaSubsampling(jpeg_stream_parameters_.picture_parameter_buffer.components[0].h_sampling_factor,
jpeg_stream_parameters_.picture_parameter_buffer.components[1].h_sampling_factor,
jpeg_stream_parameters_.picture_parameter_buffer.components[2].h_sampling_factor,
jpeg_stream_parameters_.picture_parameter_buffer.components[0].v_sampling_factor,
jpeg_stream_parameters_.picture_parameter_buffer.components[1].v_sampling_factor,
jpeg_stream_parameters_.picture_parameter_buffer.components[2].v_sampling_factor);
return true;
}
bool JpegParser::ParseDQT() {
int32_t quantization_table_index = 0;
const uint8_t *dqt_block_end;
if (stream_ == nullptr) {
return false;
}
dqt_block_end = stream_ + swap_bytes(stream_);
stream_ += 2;
while (stream_ < dqt_block_end) {
quantization_table_index = *stream_++;
if (quantization_table_index >> 4) {
ERR("16 bits quantization table is not supported!");
return false;
}
if (quantization_table_index >= 4) {
ERR("invalid number of quantization table!");
return false;
}
std::memcpy(jpeg_stream_parameters_.quantization_matrix_buffer.quantiser_table[quantization_table_index & 0x0F], stream_, 64);
jpeg_stream_parameters_.quantization_matrix_buffer.load_quantiser_table[quantization_table_index & 0x0F] = 1;
stream_ += 64;
}
return true;
}
bool JpegParser::ParseDHT() {
uint32_t count, i;
int32_t length, index;
uint8_t ac_huffman_table, huffman_table_id;
if (stream_ == nullptr) {
return false;
}
length = swap_bytes(stream_) - 2;
stream_ += 2;
while (length > 0) {
index = *stream_++;
ac_huffman_table = index & 0xF0;
huffman_table_id = index & 0x0F;
if (huffman_table_id >= HUFFMAN_TABLES) {
ERR("invlaid number of Huffman table!");
return false;
}
if (ac_huffman_table) {
std::memcpy(jpeg_stream_parameters_.huffman_table_buffer.huffman_table[huffman_table_id].num_ac_codes, stream_, 16);
} else {
std::memcpy(jpeg_stream_parameters_.huffman_table_buffer.huffman_table[huffman_table_id].num_dc_codes, stream_, 16);
}
count = 0;
for (i = 0; i < 16; i++) {
count += *stream_++;
}
if (ac_huffman_table) {
if (count > AC_HUFFMAN_TABLE_VALUES_SIZE) {
ERR("invalid AC Huffman table!");
return false;
}
std::memcpy(jpeg_stream_parameters_.huffman_table_buffer.huffman_table[huffman_table_id].ac_values, stream_, count);
jpeg_stream_parameters_.huffman_table_buffer.load_huffman_table[huffman_table_id] = 1;
} else {
if (count > DC_HUFFMAN_TABLE_VALUES_SIZE) {
ERR("invlaid DC Huffman table!")
return false;
}
std::memcpy(jpeg_stream_parameters_.huffman_table_buffer.huffman_table[huffman_table_id].dc_values, stream_, count);
jpeg_stream_parameters_.huffman_table_buffer.load_huffman_table[huffman_table_id] = 1;
}
length -= 1;
length -= 16;
length -= count;
stream_ += count;
}
return true;
}
bool JpegParser::ParseSOS() {
uint32_t component_id, table;
if (stream_ == nullptr) {
return false;
}
uint32_t num_components = stream_[2];
if (num_components > NUM_COMPONENTS - 1) {
ERR("invalid number of component!")
return false;
}
jpeg_stream_parameters_.slice_parameter_buffer.num_components = num_components;
stream_ += 3;
for (int32_t i = 0; i < num_components; i++) {
component_id = *stream_++;
table = *stream_++;
jpeg_stream_parameters_.slice_parameter_buffer.components[i].component_selector = component_id;
jpeg_stream_parameters_.slice_parameter_buffer.components[i].dc_table_selector = ((table >> 4) & 0x0F);
jpeg_stream_parameters_.slice_parameter_buffer.components[i].ac_table_selector = (table & 0x0F);
if ((table & 0xF) >= 4) {
ERR("invalid number of AC Huffman table!");
return false;
}
if ((table >> 4) >= 4) {
ERR("invalid number of DC Huffman table!");
return false;
}
if (component_id != jpeg_stream_parameters_.picture_parameter_buffer.components[i].component_id) {
ERR("component id mismatch between SOS and SOF marker!");
return false;
}
}
stream_ += 3;
return true;
}
bool JpegParser::ParseDRI() {
uint32_t length;
if (stream_ == nullptr) {
return false;
}
length = swap_bytes(stream_);
if (length != 4) {
ERR("invalid size for DRI marker");
return false;
}
jpeg_stream_parameters_.slice_parameter_buffer.restart_interval = swap_bytes(stream_ + 2);
return true;
}
bool JpegParser::ParseEOI() {
if (stream_ == nullptr) {
return false;
}
const uint8_t *stream_temp = stream_;
while (stream_temp <= stream_end_ && !(*stream_temp == 0xFF && *(stream_temp + 1) == EOI)) {
stream_temp++;
continue;
}
jpeg_stream_parameters_.slice_parameter_buffer.slice_data_size = stream_temp - stream_;
jpeg_stream_parameters_.slice_data_buffer = stream_;
return true;
}
ChromaSubsampling JpegParser::GetChromaSubsampling(uint8_t c1_h_sampling_factor, uint8_t c2_h_sampling_factor, uint8_t c3_h_sampling_factor,
uint8_t c1_v_sampling_factor, uint8_t c2_v_sampling_factor, uint8_t c3_v_sampling_factor) {
ChromaSubsampling subsampling;
if ((c1_h_sampling_factor == 1 && c2_h_sampling_factor == 1 && c3_h_sampling_factor == 1 &&
c1_v_sampling_factor == 1 && c2_v_sampling_factor == 1 && c3_v_sampling_factor == 1) ||
(c1_h_sampling_factor == 2 && c2_h_sampling_factor == 2 && c3_h_sampling_factor == 2 &&
c1_v_sampling_factor == 2 && c2_v_sampling_factor == 2 && c3_v_sampling_factor == 2) ||
(c1_h_sampling_factor == 4 && c2_h_sampling_factor == 4 && c3_h_sampling_factor == 4 &&
c1_v_sampling_factor == 4 && c2_v_sampling_factor == 4 && c3_v_sampling_factor == 4)) {
subsampling = CSS_444;
} else if (c1_h_sampling_factor == 1 && c2_h_sampling_factor == 1 && c3_h_sampling_factor == 1 &&
c1_v_sampling_factor == 2 && c2_v_sampling_factor == 1 && c3_v_sampling_factor == 1) {
subsampling = CSS_440;
} else if ((c1_h_sampling_factor == 2 && c2_h_sampling_factor == 1 && c3_h_sampling_factor == 1 &&
c1_v_sampling_factor == 1 && c2_v_sampling_factor == 1 && c3_v_sampling_factor == 1) ||
(c1_h_sampling_factor == 2 && c2_h_sampling_factor == 1 && c3_h_sampling_factor == 1 &&
c1_v_sampling_factor == 2 && c2_v_sampling_factor == 2 && c3_v_sampling_factor == 2) ||
(c1_h_sampling_factor == 2 && c2_h_sampling_factor == 2 && c3_h_sampling_factor == 2 &&
c1_v_sampling_factor == 2 && c2_v_sampling_factor == 1 && c3_v_sampling_factor == 1)) {
subsampling = CSS_422;
} else if (c1_h_sampling_factor == 2 && c2_h_sampling_factor == 1 && c3_h_sampling_factor == 1 &&
c1_v_sampling_factor == 2 && c2_v_sampling_factor == 1 && c3_v_sampling_factor == 1) {
subsampling = CSS_420;
} else if (c1_h_sampling_factor == 4 && c2_h_sampling_factor == 1 && c3_h_sampling_factor == 1 &&
c1_v_sampling_factor == 1 && c2_v_sampling_factor == 1 && c3_v_sampling_factor == 1) {
subsampling = CSS_411;
} else if ((c1_h_sampling_factor == 1 && c2_h_sampling_factor == 0 && c3_h_sampling_factor == 0 &&
c1_v_sampling_factor == 1 && c2_v_sampling_factor == 0 && c3_v_sampling_factor == 0) ||
(c1_h_sampling_factor == 4 && c2_h_sampling_factor == 0 && c3_h_sampling_factor == 0 &&
c1_v_sampling_factor == 4 && c2_v_sampling_factor == 0 && c3_v_sampling_factor == 0)) {
subsampling = CSS_400;
} else {
subsampling = CSS_UNKNOWN;
}
return subsampling;
}
+177
Просмотреть файл
@@ -0,0 +1,177 @@
/*
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef ROC_JPEG_PARSER_H_
#define ROC_JPEG_PARSER_H_
#include <stdint.h>
#include <iostream>
#include <cstring>
#include "rocjpeg_commons.h"
#pragma once
#define NUM_COMPONENTS 4
#define HUFFMAN_TABLES 2
#define AC_HUFFMAN_TABLE_VALUES_SIZE 162
#define DC_HUFFMAN_TABLE_VALUES_SIZE 12
#define swap_bytes(x) (((x)[0] << 8) | (x)[1])
/***************************************************************/
//! \enum enum JpegMarker
//! common JPEG markers
/***************************************************************/
enum JpegMarkers {
SOI = 0xD8, /* Start Of Image */
SOF = 0xC0, /* Start Of Frame for a baseline DCT-based JPEG. */
DHT = 0xC4, /* Define Huffman Table */
DQT = 0xDB, /* Define Quantization Table */
DRI = 0xDD, /* Define Restart Interval */
SOS = 0xDA, /* Start of Scan */
EOI = 0xD9, /* End Of Image */
};
/***************************************************************/
//! \struct Picture parameter for JPEG decoding.
//! This structure holds information from the frame
//! header and definitions from additional segments.
/**************************************************************/
typedef struct PictureParameterBufferType {
uint16_t picture_width;
uint16_t picture_height;
struct {
uint8_t component_id;
uint8_t h_sampling_factor;
uint8_t v_sampling_factor;
uint8_t quantiser_table_selector;
} components[255];
uint8_t num_components;
uint8_t color_space;
uint32_t rotation;
uint32_t reserved[7];
} PictureParameterBuffer;
/***************************************************************/
//! \struct Quantization table for JPEG decoding.
//! This structure holds the quantization tables.
//! The maximum number of quatization tables is four.
//! The #load_quantization_table array can be used as a hint to notify
//! which table(s) has valid values.
//! The #quantiser_table values are specified in zig-zag scan order.
/***************************************************************/
typedef struct QuantizationMatrixBufferType {
uint8_t load_quantiser_table[4];
uint8_t quantiser_table[4][64];
uint32_t reserved[4];
} QuantizationMatrixBuffer;
/***************************************************************/
//! \struct Huffman table for JPEG decoding.
//! This structure holds the Huffman tables.
//! The maximum number of Huffman tables is two.
//! The #load_huffman_table array can be used as a hint to notify the
//! which table(s) has valid values.
/***************************************************************/
typedef struct HuffmanTableBufferType {
uint8_t load_huffman_table[2];
struct {
uint8_t num_dc_codes[16];
uint8_t dc_values[12];
uint8_t num_ac_codes[16];
uint8_t ac_values[162];
uint8_t pad[2];
} huffman_table[2];
uint32_t reserved[4];
} HuffmanTableBuffer;
/***************************************************************/
//! \struct Slice parameter for JPEG decoding.
//! This structure holds information from the scan header, and
//! definitions from additional segments.
/***************************************************************/
typedef struct SliceParameterBufferType {
uint32_t slice_data_size;
uint32_t slice_data_offset;
uint32_t slice_data_flag;
uint32_t slice_horizontal_position;
uint32_t slice_vertical_position;
struct {
uint8_t component_selector;
uint8_t dc_table_selector;
uint8_t ac_table_selector;
} components[4];
uint8_t num_components;
uint16_t restart_interval;
uint32_t num_mcus;
uint32_t reserved[4];
} SliceParameterBuffer;
/***************************************************************/
//! \struct Enum identifies image chroma subsampling values stored inside JPEG input stream
/***************************************************************/
typedef enum {
CSS_444 = 0,
CSS_440 = 1,
CSS_422 = 2,
CSS_420 = 3,
CSS_411 = 4,
CSS_400 = 5,
CSS_UNKNOWN = -1
} ChromaSubsampling;
/***************************************************************/
//! \struct Jpeg stream parameters.
//! This structure holds all information from a JPEG stream for decoding
/***************************************************************/
typedef struct JpegParameterBuffersType {
PictureParameterBuffer picture_parameter_buffer;
QuantizationMatrixBuffer quantization_matrix_buffer;
HuffmanTableBuffer huffman_table_buffer;
SliceParameterBuffer slice_parameter_buffer;
ChromaSubsampling chroma_subsampling;
const uint8_t* slice_data_buffer;
} JpegStreamParameters;
class JpegParser {
public:
JpegParser();
~JpegParser();
bool ParseJpegStream(const uint8_t* jpeg_stream, uint32_t jpeg_stream_size);
const JpegStreamParameters* GetJpegStreamParameters() const {return &jpeg_stream_parameters_;};
private:
bool ParseSOI();
bool ParseSOF();
bool ParseDQT();
bool ParseSOS();
bool ParseDHT();
bool ParseDRI();
bool ParseEOI();
ChromaSubsampling GetChromaSubsampling(uint8_t c1_h_sampling_factor, uint8_t c2_h_sampling_factor, uint8_t c3_h_sampling_factor,
uint8_t c1_v_sampling_factor, uint8_t c2_v_sampling_factor, uint8_t c3_v_sampling_factor);
const uint8_t *stream_;
const uint8_t *stream_end_;
uint32_t stream_length_;
JpegStreamParameters jpeg_stream_parameters_;
};
#endif // ROC_JPEG_PARSER_H_
+301
Просмотреть файл
@@ -0,0 +1,301 @@
/*
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "rocjpeg_vaapi_decoder.h"
RocJpegVappiDecoder::RocJpegVappiDecoder(int device_id) : device_id_{device_id}, drm_fd_{-1}, min_picture_width_{64}, min_picture_height_{64},
max_picture_width_{4096}, max_picture_height_{4096}, va_display_{0}, va_config_attrib_{{}}, va_config_id_{0}, va_profile_{VAProfileJPEGBaseline},
va_context_id_{0}, va_surface_ids_{}, va_picture_parameter_buf_id_{0}, va_quantization_matrix_buf_id_{0}, va_huffmantable_buf_id_{0},
va_slice_param_buf_id_{0}, va_slice_data_buf_id_{0} {};
RocJpegVappiDecoder::~RocJpegVappiDecoder() {
if (drm_fd_ != -1) {
close(drm_fd_);
}
if (va_display_) {
RocJpegStatus rocjpeg_status = DestroyDataBuffers();
if (rocjpeg_status != ROCJPEG_STATUS_SUCCESS) {
ERR("Error: Failed to destroy VAAPI buffer");
}
VAStatus va_status;
if (va_surface_ids_.size() > 0) {
va_status = vaDestroySurfaces(va_display_, va_surface_ids_.data(), va_surface_ids_.size());
if (va_status != VA_STATUS_SUCCESS) {
ERR("ERROR: vaDestroySurfaces failed!");
}
}
if (va_context_id_) {
va_status = vaDestroyContext(va_display_, va_context_id_);
if (va_status != VA_STATUS_SUCCESS) {
ERR("ERROR: vaDestroyContext failed!");
}
}
if (va_config_id_) {
va_status = vaDestroyConfig(va_display_, va_config_id_);
if (va_status != VA_STATUS_SUCCESS) {
ERR("ERROR: vaDestroyConfig failed!");
}
}
va_status = vaTerminate(va_display_);
if (va_status != VA_STATUS_SUCCESS) {
ERR("ERROR: vaTerminate failed!");
}
}
}
RocJpegStatus RocJpegVappiDecoder::InitializeDecoder(std::string gcn_arch_name) {
// There are 8 renderDXXX per physical device on gfx940, gfx941, and gfx942
int num_render_cards_per_device = ((gcn_arch_name.compare("gfx940") == 0) ||
(gcn_arch_name.compare("gfx941") == 0) ||
(gcn_arch_name.compare("gfx942") == 0)) ? 8 : 1;
std::string drm_node = "/dev/dri/renderD" + std::to_string(128 + device_id_ * num_render_cards_per_device);
CHECK_ROCJPEG(InitVAAPI(drm_node));
CHECK_ROCJPEG(CreateDecoderConfig());
return ROCJPEG_STATUS_SUCCESS;
}
RocJpegStatus RocJpegVappiDecoder::InitVAAPI(std::string drm_node) {
drm_fd_ = open(drm_node.c_str(), O_RDWR);
if (drm_fd_ < 0) {
ERR("ERROR: failed to open drm node " + drm_node);
return ROCJPEG_STATUS_NOT_INITIALIZED;
}
va_display_ = vaGetDisplayDRM(drm_fd_);
if (!va_display_) {
ERR("ERROR: failed to create va_display!");
return ROCJPEG_STATUS_NOT_INITIALIZED;
}
vaSetInfoCallback(va_display_, NULL, NULL);
int major_version = 0, minor_version = 0;
CHECK_VAAPI(vaInitialize(va_display_, &major_version, &minor_version))
return ROCJPEG_STATUS_SUCCESS;
}
RocJpegStatus RocJpegVappiDecoder::CreateDecoderConfig() {
int max_num_entrypoints = vaMaxNumEntrypoints(va_display_);
std::vector<VAEntrypoint> jpeg_entrypoint_list;
jpeg_entrypoint_list.resize(max_num_entrypoints);
int num_entrypoints = 0;
CHECK_VAAPI(vaQueryConfigEntrypoints(va_display_, va_profile_, jpeg_entrypoint_list.data(), &num_entrypoints));
bool hw_jpeg_decoder_supported = false;
if (num_entrypoints > 0) {
for (auto entry_point : jpeg_entrypoint_list) {
if (entry_point == VAEntrypointVLD) {
hw_jpeg_decoder_supported = true;
break;
}
}
} else {
return ROCJPEG_STATUS_HW_JPEG_DECODER_NOT_SUPPORTED;
}
if (hw_jpeg_decoder_supported) {
va_config_attrib_.resize(3);
va_config_attrib_[0].type = VAConfigAttribRTFormat;
va_config_attrib_[1].type = VAConfigAttribMaxPictureWidth;
va_config_attrib_[2].type = VAConfigAttribMaxPictureHeight;
CHECK_VAAPI(vaGetConfigAttributes(va_display_, va_profile_, VAEntrypointVLD, va_config_attrib_.data(), va_config_attrib_.size()));
CHECK_VAAPI(vaCreateConfig(va_display_, va_profile_, VAEntrypointVLD, &va_config_attrib_[0], 1, &va_config_id_));
if (va_config_attrib_[1].value != VA_ATTRIB_NOT_SUPPORTED) {
max_picture_width_ = va_config_attrib_[1].value;
}
if (va_config_attrib_[2].value != VA_ATTRIB_NOT_SUPPORTED) {
max_picture_height_ = va_config_attrib_[2].value;
}
return ROCJPEG_STATUS_SUCCESS;
} else {
return ROCJPEG_STATUS_HW_JPEG_DECODER_NOT_SUPPORTED;
}
}
RocJpegStatus RocJpegVappiDecoder::DestroyDataBuffers() {
if (va_picture_parameter_buf_id_) {
CHECK_VAAPI(vaDestroyBuffer(va_display_, va_picture_parameter_buf_id_));
va_picture_parameter_buf_id_ = 0;
}
if (va_quantization_matrix_buf_id_) {
CHECK_VAAPI(vaDestroyBuffer(va_display_, va_quantization_matrix_buf_id_));
va_quantization_matrix_buf_id_ = 0;
}
if (va_huffmantable_buf_id_) {
CHECK_VAAPI(vaDestroyBuffer(va_display_, va_huffmantable_buf_id_));
va_huffmantable_buf_id_ = 0;
}
if (va_slice_param_buf_id_) {
CHECK_VAAPI(vaDestroyBuffer(va_display_, va_slice_param_buf_id_));
va_slice_param_buf_id_ = 0;
}
if (va_slice_data_buf_id_) {
CHECK_VAAPI(vaDestroyBuffer(va_display_, va_slice_data_buf_id_));
va_slice_data_buf_id_ = 0;
}
return ROCJPEG_STATUS_SUCCESS;
}
RocJpegStatus RocJpegVappiDecoder::SubmitDecode(const JpegStreamParameters *jpeg_stream_params, uint32_t &surface_id) {
if (jpeg_stream_params == nullptr) {
return ROCJPEG_STATUS_INVALID_PARAMETER;
}
if (sizeof(jpeg_stream_params->picture_parameter_buffer) != sizeof(VAPictureParameterBufferJPEGBaseline) ||
sizeof(jpeg_stream_params->quantization_matrix_buffer) != sizeof(VAIQMatrixBufferJPEGBaseline) ||
sizeof(jpeg_stream_params->huffman_table_buffer) != sizeof(VAHuffmanTableBufferJPEGBaseline) ||
sizeof(jpeg_stream_params->slice_parameter_buffer) != sizeof(VASliceParameterBufferJPEGBaseline)) {
return ROCJPEG_STATUS_INVALID_PARAMETER;
}
if (jpeg_stream_params->picture_parameter_buffer.picture_width < min_picture_width_ ||
jpeg_stream_params->picture_parameter_buffer.picture_height < min_picture_height_ ||
jpeg_stream_params->picture_parameter_buffer.picture_width > max_picture_width_ ||
jpeg_stream_params->picture_parameter_buffer.picture_height > max_picture_height_) {
ERR("The JPEG image resolution is not supported!");
return ROCJPEG_STATUS_JPEG_NOT_SUPPORTED;
}
uint8_t surface_format;
switch (jpeg_stream_params->chroma_subsampling) {
case CSS_444:
surface_format = VA_RT_FORMAT_YUV444;
break;
case CSS_422:
surface_format = VA_RT_FORMAT_YUV422;
break;
case CSS_420:
surface_format = VA_RT_FORMAT_YUV420;
break;
case CSS_400:
surface_format = VA_RT_FORMAT_YUV400;
break;
default:
ERR("ERROR: The chroma subsampling is not supported by the VCN hardware!");
return ROCJPEG_STATUS_JPEG_NOT_SUPPORTED;
break;
}
VASurfaceID va_surface_id;
CHECK_VAAPI(vaCreateSurfaces(va_display_, surface_format, jpeg_stream_params->picture_parameter_buffer.picture_width, jpeg_stream_params->picture_parameter_buffer.picture_height, &va_surface_id, 1, nullptr, 1));
va_surface_ids_.push_back(va_surface_id);
surface_id = va_surface_id;
if (va_context_id_) {
vaDestroyContext(va_display_, va_context_id_);
va_context_id_ = 0;
}
CHECK_VAAPI(vaCreateContext(va_display_, va_config_id_, jpeg_stream_params->picture_parameter_buffer.picture_width, jpeg_stream_params->picture_parameter_buffer.picture_height, VA_PROGRESSIVE, &va_surface_id, 1, &va_context_id_));
CHECK_ROCJPEG(DestroyDataBuffers());
CHECK_VAAPI(vaCreateBuffer(va_display_, va_context_id_, VAPictureParameterBufferType, sizeof(VAPictureParameterBufferJPEGBaseline), 1, (void *)&jpeg_stream_params->picture_parameter_buffer, &va_picture_parameter_buf_id_));
CHECK_VAAPI(vaCreateBuffer(va_display_, va_context_id_, VAIQMatrixBufferType, sizeof(VAIQMatrixBufferJPEGBaseline), 1, (void *)&jpeg_stream_params->quantization_matrix_buffer, &va_quantization_matrix_buf_id_));
CHECK_VAAPI(vaCreateBuffer(va_display_, va_context_id_, VAHuffmanTableBufferType, sizeof(VAHuffmanTableBufferJPEGBaseline), 1, (void *)&jpeg_stream_params->huffman_table_buffer, &va_huffmantable_buf_id_));
CHECK_VAAPI(vaCreateBuffer(va_display_, va_context_id_, VASliceParameterBufferType, sizeof(VASliceParameterBufferJPEGBaseline), 1, (void *)&jpeg_stream_params->slice_parameter_buffer, &va_slice_param_buf_id_));
CHECK_VAAPI(vaCreateBuffer(va_display_, va_context_id_, VASliceDataBufferType, jpeg_stream_params->slice_parameter_buffer.slice_data_size, 1, (void *)jpeg_stream_params->slice_data_buffer, &va_slice_data_buf_id_));
CHECK_VAAPI(vaBeginPicture(va_display_, va_context_id_, va_surface_id));
CHECK_VAAPI(vaRenderPicture(va_display_, va_context_id_, &va_picture_parameter_buf_id_, 1));
CHECK_VAAPI(vaRenderPicture(va_display_, va_context_id_, &va_quantization_matrix_buf_id_, 1));
CHECK_VAAPI(vaRenderPicture(va_display_, va_context_id_, &va_huffmantable_buf_id_, 1));
CHECK_VAAPI(vaRenderPicture(va_display_, va_context_id_, &va_slice_param_buf_id_, 1));
CHECK_VAAPI(vaRenderPicture(va_display_, va_context_id_, &va_slice_data_buf_id_, 1));
CHECK_VAAPI(vaEndPicture(va_display_, va_context_id_));
return ROCJPEG_STATUS_SUCCESS;
}
RocJpegStatus RocJpegVappiDecoder::ExportSurface(VASurfaceID surface_id, VADRMPRIMESurfaceDescriptor &va_drm_prime_surface_desc) {
bool is_surface_id_found = false;
int idx = 0;
for (idx = 0; idx < va_surface_ids_.size(); idx++) {
if (va_surface_ids_[idx] == surface_id) {
is_surface_id_found = true;
break;
}
}
if (!is_surface_id_found) {
return ROCJPEG_STATUS_INVALID_PARAMETER;
}
CHECK_VAAPI(vaExportSurfaceHandle(va_display_, surface_id,
VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2,
VA_EXPORT_SURFACE_READ_ONLY |
VA_EXPORT_SURFACE_SEPARATE_LAYERS,
&va_drm_prime_surface_desc));
return ROCJPEG_STATUS_SUCCESS;
}
RocJpegStatus RocJpegVappiDecoder::SyncSurface(VASurfaceID surface_id) {
VASurfaceStatus surface_status;
bool is_surface_id_found = false;
int idx = 0;
for (idx = 0; idx < va_surface_ids_.size(); idx++) {
if (va_surface_ids_[idx] == surface_id) {
is_surface_id_found = true;
break;
}
}
if (!is_surface_id_found) {
return ROCJPEG_STATUS_INVALID_PARAMETER;
}
CHECK_VAAPI(vaQuerySurfaceStatus(va_display_, surface_id, &surface_status));
while (surface_status != VASurfaceReady) {
VAStatus va_status = vaSyncSurface(va_display_, surface_id);
if (va_status != VA_STATUS_SUCCESS) {
if (va_status == 0x26 /*VA_STATUS_ERROR_TIMEDOUT*/) {
CHECK_VAAPI(vaQuerySurfaceStatus(va_display_, surface_id, &surface_status));
} else {
std::cout << "vaSyncSurface() failed with error code: 0x" << std::hex << va_status <<
std::dec << "', status: " << vaErrorStr(va_status) << "' at " << __FILE__ << ":" << __LINE__ << std::endl;
return ROCJPEG_STATUS_RUNTIME_ERROR;
}
} else {
break;
}
}
return ROCJPEG_STATUS_SUCCESS;
}
RocJpegStatus RocJpegVappiDecoder::ReleaseSurface(VASurfaceID surface_id) {
bool is_surface_id_found = false;
int idx = 0;
for (idx = 0; idx < va_surface_ids_.size(); idx++) {
if (va_surface_ids_[idx] == surface_id) {
is_surface_id_found = true;
break;
}
}
if (!is_surface_id_found) {
return ROCJPEG_STATUS_INVALID_PARAMETER;
}
CHECK_VAAPI(vaDestroySurfaces(va_display_, &va_surface_ids_[idx], 1));
va_surface_ids_.erase(va_surface_ids_.begin() + idx);
return ROCJPEG_STATUS_SUCCESS;
}
+75
Просмотреть файл
@@ -0,0 +1,75 @@
/*
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef ROC_JPEG_VAAPI_DECODER_H_
#define ROC_JPEG_VAAPI_DECODER_H_
#pragma once
#include <iostream>
#include <vector>
#include <string>
#include <fcntl.h>
#include <unistd.h>
#include <va/va.h>
#include <va/va_drm.h>
#include <va/va_drmcommon.h>
#include "rocjpeg_commons.h"
#include "rocjpeg_parser.h"
#include "../api/rocjpeg.h"
/*Note: va.h doesn't have VA_FOURCC_YUYV defined but vaExportSurfaceHandle returns 0x56595559 for packed YUYV for YUV 4:2:2*/
#define ROCJPEG_FOURCC_YUYV 0x56595559
class RocJpegVappiDecoder {
public:
RocJpegVappiDecoder(int device_id = 0);
~RocJpegVappiDecoder();
RocJpegStatus InitializeDecoder(std::string gcn_arch_name);
RocJpegStatus SubmitDecode(const JpegStreamParameters *jpeg_stream_params, uint32_t &surface_id);
RocJpegStatus ExportSurface(VASurfaceID surface_id, VADRMPRIMESurfaceDescriptor &va_drm_prime_surface_desc);
RocJpegStatus SyncSurface(VASurfaceID surface_id);
RocJpegStatus ReleaseSurface(VASurfaceID surface_id);
private:
int device_id_;
int drm_fd_;
uint32_t min_picture_width_;
uint32_t min_picture_height_;
uint32_t max_picture_width_;
uint32_t max_picture_height_;
VADisplay va_display_;
std::vector<VAConfigAttrib> va_config_attrib_;
VAConfigID va_config_id_;
VAProfile va_profile_;
VAContextID va_context_id_;
std::vector<VASurfaceID> va_surface_ids_;
VABufferID va_picture_parameter_buf_id_;
VABufferID va_quantization_matrix_buf_id_;
VABufferID va_huffmantable_buf_id_;
VABufferID va_slice_param_buf_id_;
VABufferID va_slice_data_buf_id_;
RocJpegStatus InitVAAPI(std::string drm_node);
RocJpegStatus CreateDecoderConfig();
RocJpegStatus DestroyDataBuffers();
};
#endif // ROC_JPEG_VAAPI_DECODER_H_
+99
Просмотреть файл
@@ -0,0 +1,99 @@
# ##############################################################################
# Copyright (c) 2024 Advanced Micro Devices, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#
# ##############################################################################
cmake_minimum_required(VERSION 3.5)
project(ROCJPEG-test)
# make test with CTest
enable_testing()
include(CTest)
# ROCM Path
if(DEFINED ENV{ROCM_PATH})
set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "${White}${PROJECT_NAME}: Default ROCm installation path${ColourReset}")
elseif(ROCM_PATH)
message("-- ${White}${PROJECT_NAME} :ROCM_PATH Set -- ${ROCM_PATH}${ColourReset}")
else()
set(ROCM_PATH /opt/rocm CACHE PATH "${White}${PROJECT_NAME}: Default ROCm installation path${ColourReset}")
endif()
# find rocJPEG
find_library(ROCJPEG_LIBRARY NAMES ROCJPEG HINTS ${ROCM_PATH}/lib)
find_path(ROCJPEG_INCLUDE_DIR NAMES rocjpeg.h PATHS /opt/rocm/include/rocjpeg ${ROCM_PATH}/include/rocjpeg)
if(ROCJPEG_LIBRARY AND ROCJPEG_INCLUDE_DIR)
set(ROCJPEG_FOUND TRUE)
message("-- ${White}Using ROCJPEG -- \n\tLibraries:${ROCJPEG_LIBRARY} \n\tIncludes:${ROCJPEG_INCLUDE_DIR}${ColourReset}")
endif()
if(NOT ROCJPEG_FOUND)
message("-- ${Yellow}${PROJECT_NAME} requires rocJPEG. Install rocJPEG before running CTests")
endif()
add_test(
NAME
jpeg-decode-fmt-unchanged
COMMAND
"${CMAKE_CTEST_COMMAND}"
--build-and-test "${ROCM_PATH}/share/rocjpeg/samples/jpegDecode"
"${CMAKE_CURRENT_BINARY_DIR}/jpegDecode"
--build-generator "${CMAKE_GENERATOR}"
--test-command "jpegdecode"
-i ${ROCM_PATH}/share/rocjpeg/images/
)
add_test(
NAME
jpeg-decode-fmt-yuv
COMMAND
"${CMAKE_CTEST_COMMAND}"
--build-and-test "${ROCM_PATH}/share/rocjpeg/samples/jpegDecode"
"${CMAKE_CURRENT_BINARY_DIR}/jpegDecode"
--build-generator "${CMAKE_GENERATOR}"
--test-command "jpegdecode"
-i ${ROCM_PATH}/share/rocjpeg/images/ -fmt yuv
)
add_test(
NAME
jpeg-decode-fmt-y
COMMAND
"${CMAKE_CTEST_COMMAND}"
--build-and-test "${ROCM_PATH}/share/rocjpeg/samples/jpegDecode"
"${CMAKE_CURRENT_BINARY_DIR}/jpegDecode"
--build-generator "${CMAKE_GENERATOR}"
--test-command "jpegdecode"
-i ${ROCM_PATH}/share/rocjpeg/images/ -fmt y
)
add_test(
NAME
jpeg-decode-fmt-rgbi
COMMAND
"${CMAKE_CTEST_COMMAND}"
--build-and-test "${ROCM_PATH}/share/rocjpeg/samples/jpegDecode"
"${CMAKE_CURRENT_BINARY_DIR}/jpegDecode"
--build-generator "${CMAKE_GENERATOR}"
--test-command "jpegdecode"
-i ${ROCM_PATH}/share/rocjpeg/images/ -fmt rgbi
)