Initial commit of the rocJPEG library (#1)
* rocJPEG initial commit * Add gitignore file * clean up * code clean up * CMakeLists update * Update README * Update readme and setup script * add docs folder * update rocjepg headers * update the rocjepg header * update the copyright year * Add nullptr check for all APIs * update the hip kernels execution * follow google c++ style guide * code clean up * use google c++ style guide for hip kernels * add support for rocJpegGetErrorName API * Add ParseCommandLine function * update the rocjpeg sample * Update the readme, LIBVA instructions * udate the rocJPEG script * Update CMAKE and libdrm * Add support for ROCJPEG_OUTPUT_UNCHANGED and SaveImage function * code clean up * update the drm dependencies * remove the unused hipstream * code clean up * add -fmt option for selection the output format * update the GetImageInfo API * update the sample * update the save image function based onn the surface and putput format * add new sample images * Put hip interop mem into separate functions and some code clean up * Add support for CopyLuma and CopyChroma * Add support for the yuv output format for NV12 surface * Update HIP kernels * add support for rgbi format conversion for NV12 and YUv444 * Update the status * Add support for converting YUYV to RGB * Add support for unpacking YUYV format * Fix unpacking Y fron YUYV hip kernel * Add suppoort for extractign packed YUYV to YUV planar * hip code clean up * code clean up * Update the headers * code clean up * change hip kernel names * hip kernel clean up * hip clean up * code clean up * code clean up * code clean up * code clean up * code clean up * code clean up for jpege decoder class * code clean up * code clean up * code clean up * code clean up * update the dockers * code clean up * code clean up * hip kernels clean up * remove unused hip kernels * add additional test cases * update the APIs * add new hip yuv400torgbi kernel * update yuv400torgbi kernel * restructure files * code clean up * code clean up * add jenkins * code clean up * code clean up * update readme * update docker's README * make changes based on the reviewers comments * make changes based on the reviewers comments * return ROCJPEG_STATUS_JPEG_NOT_SUPPORTED if the resolution of the jpeg is not supported for HW decoding
This commit is contained in:
zatwierdzone przez
GitHub
rodzic
acdfe62307
commit
6ad4a76177
@@ -0,0 +1,5 @@
|
||||
build/
|
||||
samples/*/build
|
||||
.vscode/
|
||||
doxygen_output*
|
||||
_doxygen/
|
||||
@@ -0,0 +1,121 @@
|
||||
// This file is for internal AMD use.
|
||||
// If you are interested in running your own Jenkins, please raise a github issue for assistance.
|
||||
|
||||
def runCompileCommand(platform, project, jobName, boolean debug=false, boolean staticLibrary=false) {
|
||||
project.paths.construct_build_prefix()
|
||||
|
||||
String buildTypeArg = debug ? '-DCMAKE_BUILD_TYPE=Debug' : '-DCMAKE_BUILD_TYPE=Release'
|
||||
String buildTypeDir = debug ? 'debug' : 'release'
|
||||
|
||||
def command = """#!/usr/bin/env bash
|
||||
set -ex
|
||||
echo Build rocJPEG - ${buildTypeDir}
|
||||
cd ${project.paths.project_build_prefix}
|
||||
mkdir -p build/${buildTypeDir} && cd build/${buildTypeDir}
|
||||
cmake ${buildTypeArg} ../..
|
||||
make -j\$(nproc)
|
||||
sudo make install
|
||||
sudo make package
|
||||
ldd -v /opt/rocm/lib/librocjpeg.so.so
|
||||
"""
|
||||
|
||||
platform.runCommand(this, command)
|
||||
}
|
||||
|
||||
def runTestCommand (platform, project) {
|
||||
|
||||
String libLocation = ''
|
||||
|
||||
if (platform.jenkinsLabel.contains('rhel')) {
|
||||
libLocation = ':/usr/local/lib'
|
||||
}
|
||||
else if (platform.jenkinsLabel.contains('sles')) {
|
||||
libLocation = ':/usr/local/lib'
|
||||
}
|
||||
|
||||
def command = """#!/usr/bin/env bash
|
||||
set -ex
|
||||
export HOME=/home/jenkins
|
||||
echo make test
|
||||
cd ${project.paths.project_build_prefix}/build/release
|
||||
LD_LIBRARY_PATH=\$LD_LIBRARY_PATH:/opt/rocm/lib${libLocation} make test ARGS="-VV --rerun-failed --output-on-failure"
|
||||
echo rocjpeg-sample - jpegDecode
|
||||
mkdir -p rocjpeg-sample && cd rocjpeg-sample
|
||||
cmake /opt/rocm/share/rocjpeg/samples/jpegDecode/
|
||||
make -j8
|
||||
LD_LIBRARY_PATH=\$LD_LIBRARY_PATH:/opt/rocm/lib${libLocation} ./jpegdecode -i /opt/rocm/share/rocjpeg/images/
|
||||
echo rocjpeg-test package verification
|
||||
cd ../ && mkdir -p rocjpeg-test && cd rocjpeg-test
|
||||
cmake /opt/rocm/share/rocjpeg/test/
|
||||
LD_LIBRARY_PATH=\$LD_LIBRARY_PATH:/opt/rocm/lib${libLocation} ctest -VV --rerun-failed --output-on-failure
|
||||
echo rocjpeg conformance tests
|
||||
"""
|
||||
|
||||
platform.runCommand(this, command)
|
||||
// Unit tests - TBD
|
||||
}
|
||||
|
||||
def runPackageCommand(platform, project) {
|
||||
|
||||
def packageHelper = platform.makePackage(platform.jenkinsLabel, "${project.paths.project_build_prefix}/build/release")
|
||||
|
||||
String packageType = ''
|
||||
String packageInfo = ''
|
||||
String packageDetail = ''
|
||||
String osType = ''
|
||||
String packageRunTime = ''
|
||||
|
||||
if (platform.jenkinsLabel.contains('centos') || platform.jenkinsLabel.contains('rhel') || platform.jenkinsLabel.contains('sles')) {
|
||||
packageType = 'rpm'
|
||||
packageInfo = 'rpm -qlp'
|
||||
packageDetail = 'rpm -qi'
|
||||
packageRunTime = 'rocjpeg-*'
|
||||
|
||||
if (platform.jenkinsLabel.contains('sles')) {
|
||||
osType = 'sles'
|
||||
}
|
||||
else if (platform.jenkinsLabel.contains('rhel8')) {
|
||||
osType = 'rhel8'
|
||||
}
|
||||
else if (platform.jenkinsLabel.contains('rhel9')) {
|
||||
osType = 'rhel9'
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
packageType = 'deb'
|
||||
packageInfo = 'dpkg -c'
|
||||
packageDetail = 'dpkg -I'
|
||||
packageRunTime = 'rocjpeg_*'
|
||||
|
||||
if (platform.jenkinsLabel.contains('ubuntu20')) {
|
||||
osType = 'ubuntu20'
|
||||
}
|
||||
else if (platform.jenkinsLabel.contains('ubuntu22')) {
|
||||
osType = 'ubuntu22'
|
||||
}
|
||||
}
|
||||
|
||||
def command = """#!/usr/bin/env bash
|
||||
set -ex
|
||||
export HOME=/home/jenkins
|
||||
echo Make rocJPEG Package
|
||||
cd ${project.paths.project_build_prefix}/build/release
|
||||
sudo make package
|
||||
mkdir -p package
|
||||
mv rocjpeg-dev*.${packageType} package/${osType}-rocjpeg-dev.${packageType}
|
||||
mv rocjpeg-test*.${packageType} package/${osType}-rocjpeg-test.${packageType}
|
||||
mv ${packageRunTime}.${packageType} package/${osType}-rocjpeg.${packageType}
|
||||
${packageDetail} package/${osType}-rocjpeg-dev.${packageType}
|
||||
${packageDetail} package/${osType}-rocjpeg-test.${packageType}
|
||||
${packageDetail} package/${osType}-rocjpeg.${packageType}
|
||||
${packageInfo} package/${osType}-rocjpeg-dev.${packageType}
|
||||
${packageInfo} package/${osType}-rocjpeg-test.${packageType}
|
||||
${packageInfo} package/${osType}-rocjpeg.${packageType}
|
||||
"""
|
||||
|
||||
platform.runCommand(this, command)
|
||||
platform.archiveArtifacts(this, packageHelper[1])
|
||||
}
|
||||
|
||||
return this
|
||||
@@ -0,0 +1,78 @@
|
||||
#!/usr/bin/env groovy
|
||||
@Library('rocJenkins@pong') _
|
||||
import com.amd.project.*
|
||||
import com.amd.docker.*
|
||||
|
||||
def runCI =
|
||||
{
|
||||
nodeDetails, jobName->
|
||||
|
||||
def prj = new rocProject('rocJPEG', 'PreCheckin')
|
||||
|
||||
def nodes = new dockerNodes(nodeDetails, jobName, prj)
|
||||
|
||||
def commonGroovy
|
||||
|
||||
boolean formatCheck = false
|
||||
|
||||
def compileCommand =
|
||||
{
|
||||
platform, project->
|
||||
|
||||
commonGroovy = load "${project.paths.project_src_prefix}/.jenkins/common.groovy"
|
||||
commonGroovy.runCompileCommand(platform, project, jobName)
|
||||
}
|
||||
|
||||
|
||||
def testCommand =
|
||||
{
|
||||
platform, project->
|
||||
|
||||
commonGroovy.runTestCommand(platform, project)
|
||||
}
|
||||
|
||||
def packageCommand =
|
||||
{
|
||||
platform, project->
|
||||
|
||||
commonGroovy.runPackageCommand(platform, project)
|
||||
}
|
||||
|
||||
buildProject(prj, formatCheck, nodes.dockerArray, compileCommand, testCommand, packageCommand)
|
||||
}
|
||||
|
||||
ci: {
|
||||
String urlJobName = auxiliary.getTopJobName(env.BUILD_URL)
|
||||
|
||||
def propertyList = ["compute-rocm-dkms-no-npi-hipclang":[pipelineTriggers([cron('0 1 * * 0')])]]
|
||||
propertyList = auxiliary.appendPropertyList(propertyList)
|
||||
|
||||
def jobNameList = ["compute-rocm-dkms-no-npi-hipclang":([ubuntu20:['gfx90a'], ubuntu22:['gfx1101'], sles15sp1:['gfx908'], rhel8:['gfx1030'], rhel9:['gfx942']])]
|
||||
jobNameList = auxiliary.appendJobNameList(jobNameList)
|
||||
|
||||
propertyList.each
|
||||
{
|
||||
jobName, property->
|
||||
if (urlJobName == jobName) {
|
||||
properties(auxiliary.addCommonProperties(property))
|
||||
}
|
||||
}
|
||||
|
||||
jobNameList.each
|
||||
{
|
||||
jobName, nodeDetails->
|
||||
if (urlJobName == jobName) {
|
||||
stage(jobName) {
|
||||
runCI(nodeDetails, jobName)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// For url job names that are not listed by the jobNameList i.e. compute-rocm-dkms-no-npi-1901
|
||||
if(!jobNameList.keySet().contains(urlJobName)) {
|
||||
properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * *')])]))
|
||||
stage(urlJobName) {
|
||||
runCI([ubuntu22:['gfx942']], urlJobName)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,319 @@
|
||||
# ##############################################################################
|
||||
# Copyright (c) 2024 Advanced Micro Devices, Inc.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
#
|
||||
# ##############################################################################
|
||||
|
||||
cmake_minimum_required (VERSION 3.5)
|
||||
|
||||
set(VERSION "0.1.0")
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
# Set Project Version and Language
|
||||
project(rocjpeg VERSION ${VERSION} LANGUAGES CXX)
|
||||
|
||||
set(CMAKE_INSTALL_LIBDIR "lib" CACHE STRING "Library install directory")
|
||||
set(CMAKE_INSTALL_INCLUDEDIR "include" CACHE STRING "Include install directory")
|
||||
include(GNUInstallDirs)
|
||||
|
||||
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_LIBDIR})
|
||||
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_LIBDIR})
|
||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_BINDIR})
|
||||
|
||||
find_program(MAKE_NSIS_EXE makensis)
|
||||
find_program(RPMBUILD_EXE rpmbuild)
|
||||
find_program(DPKG_EXE dpkg)
|
||||
|
||||
# ROCM Path
|
||||
if(DEFINED ENV{ROCM_PATH})
|
||||
set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Default ROCm installation path")
|
||||
elseif(ROCM_PATH)
|
||||
message("-- INFO:ROCM_PATH Set -- ${ROCM_PATH}")
|
||||
else()
|
||||
set(ROCM_PATH /opt/rocm CACHE PATH "Default ROCm installation path")
|
||||
endif()
|
||||
# avoid setting the default installation path to /usr/local
|
||||
if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
|
||||
set(CMAKE_INSTALL_PREFIX ${ROCM_PATH} CACHE PATH "rocJPEG default installation path" FORCE)
|
||||
endif(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
|
||||
|
||||
set(DEFAULT_BUILD_TYPE "Release")
|
||||
set(ENHANCED_MESSAGE ON)
|
||||
|
||||
if(ENHANCED_MESSAGE)
|
||||
string(ASCII 27 Esc)
|
||||
set(ColourReset "${Esc}[m")
|
||||
set(Red "${Esc}[31m")
|
||||
set(Green "${Esc}[32m")
|
||||
set(Yellow "${Esc}[33m")
|
||||
set(Blue "${Esc}[34m")
|
||||
set(BoldBlue "${Esc}[1;34m")
|
||||
set(Magenta "${Esc}[35m")
|
||||
set(Cyan "${Esc}[36m")
|
||||
set(White "${Esc}[37m")
|
||||
endif()
|
||||
message("-- ${BoldBlue}rocJPEG Version -- ${VERSION}${ColourReset}")
|
||||
message("-- ${BoldBlue}rocJPEG Install Path -- ${CMAKE_INSTALL_PREFIX}${ColourReset}")
|
||||
|
||||
list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
|
||||
list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH} ${ROCM_PATH}/hip)
|
||||
set(CMAKE_CXX_COMPILER ${ROCM_PATH}/llvm/bin/clang++)
|
||||
|
||||
# rocJPEG Default Options
|
||||
option(BUILD_WITH_AMD_ADVANCE "Build rocJPEG for advanced AMD GPU Architecture" OFF)
|
||||
|
||||
# rocJPEG Build Type
|
||||
if(NOT CMAKE_BUILD_TYPE)
|
||||
set(CMAKE_BUILD_TYPE "${DEFAULT_BUILD_TYPE}" CACHE STRING "rocJPEG Default Build Type" FORCE)
|
||||
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release")
|
||||
endif()
|
||||
if(CMAKE_BUILD_TYPE MATCHES Debug)
|
||||
# -O0 -- Don't Optimize output file
|
||||
# -g -- generate debugging information, generate debugging information, dwarf-4 for making valgrind work
|
||||
# -Og -- Optimize for debugging experience rather than speed or size
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -gdwarf-4 -Og")
|
||||
else()
|
||||
# -O3 -- Optimize output file
|
||||
# -DNDEBUG -- turn off asserts
|
||||
# -fPIC -- Generate position-independent code if possible
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -DNDEBUG -fPIC")
|
||||
endif()
|
||||
message("-- ${BoldBlue}rocJPEG Build Type -- ${CMAKE_BUILD_TYPE}${ColourReset}")
|
||||
set(DEFAULT_AMDGPU_TARGETS "gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1031;gfx1032;gfx1100;gfx1101;gfx1102")
|
||||
set(AMDGPU_TARGETS "${DEFAULT_AMDGPU_TARGETS}" CACHE STRING "List of specific machine types for library to target")
|
||||
|
||||
find_package(HIP QUIET)
|
||||
find_package(Libva QUIET)
|
||||
|
||||
if(HIP_FOUND AND Libva_FOUND)
|
||||
|
||||
# HIP
|
||||
set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} hip::device)
|
||||
# LibVA
|
||||
include_directories(${LIBVA_INCLUDE_DIR})
|
||||
set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} ${LIBVA_LIBRARY})
|
||||
set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} ${LIBVA_DRM_LIBRARY})
|
||||
|
||||
# local include files
|
||||
include_directories(api src)
|
||||
# source files
|
||||
file(GLOB_RECURSE SOURCES "./src/*.cpp")
|
||||
# rocJPEG.so
|
||||
add_library(${PROJECT_NAME} SHARED ${SOURCES})
|
||||
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=gnu++17")
|
||||
target_link_libraries(${PROJECT_NAME} ${LINK_LIBRARY_LIST})
|
||||
|
||||
set_target_properties(${PROJECT_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
set_target_properties(${PROJECT_NAME} PROPERTIES LINKER_LANGUAGE CXX)
|
||||
set_target_properties(${PROJECT_NAME} PROPERTIES VERSION ${PROJECT_VERSION} SOVERSION ${PROJECT_VERSION_MAJOR})
|
||||
|
||||
# install rocJPEG libs -- {ROCM_PATH}/lib
|
||||
install(TARGETS ${PROJECT_NAME} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT runtime NAMELINK_SKIP)
|
||||
install(TARGETS ${PROJECT_NAME} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT dev NAMELINK_ONLY)
|
||||
install(TARGETS ${PROJECT_NAME} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT asan)
|
||||
# install rocJPEG include files -- {ROCM_PATH}/include/rocJPEG
|
||||
install(FILES api/rocjpeg.h
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME} COMPONENT dev)
|
||||
# install rocJPEG samples -- {ROCM_PATH}/share/rocJPEG
|
||||
install(DIRECTORY cmake DESTINATION ${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME} COMPONENT dev)
|
||||
|
||||
install(DIRECTORY samples/jpegDecode DESTINATION ${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME}/samples COMPONENT dev)
|
||||
install(DIRECTORY data/images DESTINATION ${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME}/ COMPONENT dev)
|
||||
# install license information - {ROCM_PATH}/share/doc/rocJPEG
|
||||
set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE")
|
||||
install(FILES ${CPACK_RESOURCE_FILE_LICENSE} DESTINATION ${CMAKE_INSTALL_DOCDIR} COMPONENT runtime)
|
||||
install(FILES ${CPACK_RESOURCE_FILE_LICENSE} DESTINATION ${CMAKE_INSTALL_DOCDIR}-asan COMPONENT asan)
|
||||
install(FILES ${CPACK_RESOURCE_FILE_LICENSE} DESTINATION ${CMAKE_INSTALL_DOCDIR}-dev COMPONENT dev)
|
||||
install(FILES ${CPACK_RESOURCE_FILE_LICENSE} DESTINATION ${CMAKE_INSTALL_DOCDIR}-test COMPONENT test)
|
||||
# install test cmake
|
||||
install(FILES test/CMakeLists.txt DESTINATION ${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME}/test COMPONENT test)
|
||||
|
||||
message("-- ${White}AMD ROCm rocJPEG -- CMAKE_CXX_FLAGS:${CMAKE_CXX_FLAGS}${ColourReset}")
|
||||
message("-- ${White}AMD ROCm rocJPEG -- Link Libraries: ${LINK_LIBRARY_LIST}${ColourReset}")
|
||||
|
||||
# make test with CTest
|
||||
enable_testing()
|
||||
include(CTest)
|
||||
add_subdirectory(samples)
|
||||
|
||||
# set package information
|
||||
set(CPACK_PACKAGE_VERSION_MAJOR ${PROJECT_VERSION_MAJOR})
|
||||
set(CPACK_PACKAGE_VERSION_MINOR ${PROJECT_VERSION_MINOR})
|
||||
set(CPACK_PACKAGE_VERSION_PATCH ${PROJECT_VERSION_PATCH})
|
||||
set(CPACK_PACKAGE_VERSION "${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}")
|
||||
set(CPACK_PACKAGE_LICENSE "MIT")
|
||||
set(CPACK_PACKAGE_CONTACT "rocJPEG Support <mivisionx.support@amd.com>")
|
||||
set(CPACK_PACKAGE_VENDOR "AMD ROCm")
|
||||
set(CPACK_PACKAGE_GROUP "Development/Tools")
|
||||
set(CPACK_PACKAGE_HOMEPAGE "https://github.com/ROCm/rocJPEG")
|
||||
|
||||
if(ENABLE_ASAN_PACKAGING)
|
||||
set(CPACK_DEBIAN_PACKAGE_DESCRIPTION "AMD rocJPEG address sanitizer libraries")
|
||||
set(CPACK_RPM_PACKAGE_SUMMARY "AMD rocJPEG address sanitizer libraries")
|
||||
else()
|
||||
set(CPACK_DEBIAN_PACKAGE_DESCRIPTION "AMD rocJPEG is a high performance JPEG decode SDK for AMD GPUs")
|
||||
set(CPACK_RPM_PACKAGE_SUMMARY "AMD rocJPEG is a high performance JPEG decode SDK for AMD GPUs")
|
||||
endif()
|
||||
|
||||
if(DEFINED ENV{ROCM_LIBPATCH_VERSION})
|
||||
set(CPACK_PACKAGE_VERSION "${CPACK_PACKAGE_VERSION}.$ENV{ROCM_LIBPATCH_VERSION}")
|
||||
endif()
|
||||
|
||||
# Find Ubuntu 22.04 - Add libstdc++-12-dev package deps for Dev Package
|
||||
file(READ "/etc/os-release" OS_RELEASE)
|
||||
string(REGEX MATCH "22.04" UBUNTU_22_FOUND ${OS_RELEASE})
|
||||
|
||||
# Set the dependent packages
|
||||
set(rocJPEG_DEBIAN_PACKAGE_LIST "rocm-hip-runtime, libdrm-amdgpu1, libva-amdgpu-drm2, mesa-amdgpu-va-drivers")
|
||||
set(rocJPEG_RPM_PACKAGE_LIST "rocm-hip-runtime, libdrm-amdgpu, libva-amdgpu, mesa-amdgpu-dri-drivers")
|
||||
set(rocJPEG_DEBIAN_DEV_PACKAGE_LIST "rocm-hip-runtime-dev, libva-amdgpu-dev, pkg-config")
|
||||
if(UBUNTU_22_FOUND)
|
||||
set(rocJPEG_DEBIAN_DEV_PACKAGE_LIST "${rocJPEG_DEBIAN_DEV_PACKAGE_LIST}, libstdc++-12-dev")
|
||||
endif()
|
||||
set(rocJPEG_RPM_DEV_PACKAGE_LIST "rocm-hip-runtime-devel, libva-amdgpu-devel, pkg-config")
|
||||
|
||||
# '%{?dist}' breaks manual builds on debian systems due to empty Provides
|
||||
execute_process(
|
||||
COMMAND rpm --eval %{?dist}
|
||||
RESULT_VARIABLE PROC_RESULT
|
||||
OUTPUT_VARIABLE EVAL_RESULT
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||
)
|
||||
if(PROC_RESULT EQUAL "0" AND NOT EVAL_RESULT STREQUAL "")
|
||||
string(APPEND CPACK_RPM_PACKAGE_RELEASE "%{?dist}")
|
||||
endif()
|
||||
|
||||
# set package
|
||||
set(CPACK_PACKAGING_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX})
|
||||
set(CPACK_DEBIAN_PACKAGE_HOMEPAGE ${CPACK_PACKAGE_HOMEPAGE})
|
||||
set(CPACK_RPM_PACKAGE_URL ${CPACK_PACKAGE_HOMEPAGE})
|
||||
set(CPACK_RPM_PACKAGE_AUTOREQPROV "no")
|
||||
# Debian package
|
||||
set(CPACK_DEB_COMPONENT_INSTALL ON)
|
||||
set(CPACK_DEBIAN_RUNTIME_PACKAGE_NAME "${PROJECT_NAME}")
|
||||
set(CPACK_DEBIAN_RUNTIME_PACKAGE_DEPENDS "rocm-core, ${rocJPEG_DEBIAN_PACKAGE_LIST}")
|
||||
set(CPACK_DEBIAN_DEV_PACKAGE_NAME "${PROJECT_NAME}-dev")
|
||||
set(CPACK_DEBIAN_DEV_PACKAGE_DEPENDS
|
||||
"rocm-core, ${PROJECT_NAME}, ${rocJPEG_DEBIAN_DEV_PACKAGE_LIST}")
|
||||
# Debian package - specific variable for ASAN
|
||||
set(CPACK_DEBIAN_ASAN_PACKAGE_NAME "${PROJECT_NAME}-asan" )
|
||||
set(CPACK_DEBIAN_ASAN_PACKAGE_DEPENDS "rocm-core-asan, ${rocJPEG_DEBIAN_PACKAGE_LIST}" )
|
||||
# Debian package - Test
|
||||
set(CPACK_DEBIAN_TEST_PACKAGE_NAME "${PROJECT_NAME}-test" )
|
||||
set(CPACK_DEBIAN_TEST_PACKAGE_DEPENDS "rocm-core, ${CPACK_DEBIAN_DEV_PACKAGE_NAME}" )
|
||||
# RPM package
|
||||
set(CPACK_RPM_COMPONENT_INSTALL ON)
|
||||
set(CPACK_RPM_RUNTIME_PACKAGE_NAME "${PROJECT_NAME}")
|
||||
set(CPACK_RPM_RUNTIME_PACKAGE_REQUIRES "rocm-core, ${rocJPEG_RPM_PACKAGE_LIST}")
|
||||
set(CPACK_RPM_RUNTIME_PACKAGE_PROVIDES "${PROJECT_NAME}")
|
||||
set(CPACK_RPM_RUNTIME_PACKAGE_OBSOLETES "${PROJECT_NAME}")
|
||||
set(CPACK_RPM_DEV_PACKAGE_NAME "${PROJECT_NAME}-devel")
|
||||
set(CPACK_RPM_DEV_PACKAGE_REQUIRES "rocm-core, ${PROJECT_NAME}, ${rocJPEG_RPM_DEV_PACKAGE_LIST}")
|
||||
set(CPACK_RPM_DEV_PACKAGE_PROVIDES "${PROJECT_NAME}-devel")
|
||||
set(CPACK_RPM_DEV_PACKAGE_OBSOLETES "${PROJECT_NAME}-devel")
|
||||
set(CPACK_RPM_PACKAGE_LICENSE "MIT" )
|
||||
# RPM package specific variable for ASAN
|
||||
set(CPACK_RPM_ASAN_PACKAGE_NAME "${PROJECT_NAME}-asan" )
|
||||
set(CPACK_RPM_ASAN_PACKAGE_REQUIRES "rocm-core-asan, ${rocJPEG_RPM_PACKAGE_LIST}" )
|
||||
set(CPACK_RPM_ASAN_PACKAGE_PROVIDES "${PROJECT_NAME}-asan")
|
||||
set(CPACK_RPM_ASAN_PACKAGE_OBSOLETES "${PROJECT_NAME}-asan")
|
||||
# RPM package specific variable for Test
|
||||
set(CPACK_RPM_TEST_PACKAGE_NAME "${PROJECT_NAME}-test" )
|
||||
set(CPACK_RPM_TEST_PACKAGE_REQUIRES "rocm-core, ${CPACK_RPM_DEV_PACKAGE_NAME}" )
|
||||
set(CPACK_RPM_TEST_PACKAGE_PROVIDES "${PROJECT_NAME}-test")
|
||||
set(CPACK_RPM_TEST_PACKAGE_OBSOLETES "${PROJECT_NAME}-test")
|
||||
|
||||
if(NOT ROCM_DEP_ROCMCORE)
|
||||
string(REGEX REPLACE ",? ?rocm-core," "" CPACK_RPM_RUNTIME_PACKAGE_REQUIRES ${CPACK_RPM_RUNTIME_PACKAGE_REQUIRES})
|
||||
string(REGEX REPLACE ",? ?rocm-core-asan," "" CPACK_RPM_ASAN_PACKAGE_REQUIRES ${CPACK_RPM_ASAN_PACKAGE_REQUIRES})
|
||||
string(REGEX REPLACE ",? ?rocm-core," "" CPACK_RPM_DEV_PACKAGE_REQUIRES ${CPACK_RPM_DEV_PACKAGE_REQUIRES})
|
||||
string(REGEX REPLACE ",? ?rocm-core," "" CPACK_RPM_TEST_PACKAGE_REQUIRES ${CPACK_RPM_TEST_PACKAGE_REQUIRES})
|
||||
string(REGEX REPLACE ",? ?rocm-core," "" CPACK_DEBIAN_RUNTIME_PACKAGE_DEPENDS ${CPACK_DEBIAN_RUNTIME_PACKAGE_DEPENDS})
|
||||
string(REGEX REPLACE ",? ?rocm-core-asan," "" CPACK_DEBIAN_ASAN_PACKAGE_DEPENDS ${CPACK_DEBIAN_ASAN_PACKAGE_DEPENDS})
|
||||
string(REGEX REPLACE ",? ?rocm-core," "" CPACK_DEBIAN_DEV_PACKAGE_DEPENDS ${CPACK_DEBIAN_DEV_PACKAGE_DEPENDS})
|
||||
string(REGEX REPLACE ",? ?rocm-core," "" CPACK_DEBIAN_TEST_PACKAGE_DEPENDS ${CPACK_DEBIAN_TEST_PACKAGE_DEPENDS})
|
||||
endif()
|
||||
|
||||
if(ENABLE_ASAN_PACKAGING)
|
||||
# ASAN Package requires asan component with only libraries and license file
|
||||
set(CPACK_COMPONENTS_ALL asan)
|
||||
else()
|
||||
set(CPACK_COMPONENTS_ALL runtime dev test)
|
||||
endif()
|
||||
|
||||
set(CPACK_DEBIAN_FILE_NAME "DEB-DEFAULT")
|
||||
set(CPACK_RPM_FILE_NAME "RPM-DEFAULT")
|
||||
set(CPACK_DEBIAN_PACKAGE_RELEASE "local")
|
||||
set(CPACK_RPM_PACKAGE_RELEASE "local")
|
||||
|
||||
if(DEFINED ENV{CPACK_DEBIAN_PACKAGE_RELEASE})
|
||||
set(CPACK_DEBIAN_PACKAGE_RELEASE $ENV{CPACK_DEBIAN_PACKAGE_RELEASE})
|
||||
endif()
|
||||
if(DEFINED ENV{CPACK_RPM_PACKAGE_RELEASE})
|
||||
set(CPACK_RPM_PACKAGE_RELEASE $ENV{CPACK_RPM_PACKAGE_RELEASE})
|
||||
endif()
|
||||
|
||||
set(CPACK_GENERATOR "TGZ;ZIP")
|
||||
if(EXISTS ${MAKE_NSIS_EXE})
|
||||
list(APPEND CPACK_GENERATOR "NSIS")
|
||||
endif()
|
||||
if(EXISTS ${RPMBUILD_EXE})
|
||||
list(APPEND CPACK_GENERATOR "RPM")
|
||||
message("-- ${White}AMD ROCm rocJPEG RunTime Package -- ${CPACK_RPM_RUNTIME_PACKAGE_REQUIRES}${ColourReset}")
|
||||
message("-- ${White}AMD ROCm rocJPEG Dev Package -- ${CPACK_RPM_DEV_PACKAGE_REQUIRES}${ColourReset}")
|
||||
endif()
|
||||
if(EXISTS ${DPKG_EXE})
|
||||
list(APPEND CPACK_GENERATOR "DEB")
|
||||
message("-- ${White}AMD ROCm rocJPEG RunTime Package -- ${CPACK_DEBIAN_RUNTIME_PACKAGE_DEPENDS}${ColourReset}")
|
||||
message("-- ${White}AMD ROCm rocJPEG Dev Package -- ${CPACK_DEBIAN_DEV_PACKAGE_DEPENDS}${ColourReset}")
|
||||
endif()
|
||||
|
||||
include(CPack)
|
||||
|
||||
cpack_add_component(runtime
|
||||
DISPLAY_NAME "rocJPEG Runtime Package"
|
||||
DESCRIPTION "AMD rocJPEG is a high performance JPEG decode SDK for AMD GPUs. \
|
||||
rocJPEG runtime package provides rocJPEG library and license.txt")
|
||||
|
||||
cpack_add_component(dev
|
||||
DISPLAY_NAME "rocJPEG Develop Package"
|
||||
DESCRIPTION "AMD rocJPEG is a high performance JPEG decode SDK for AMD GPUs. \
|
||||
rocJPEG develop package provides rocJPEG library, header files, samples, and license.txt")
|
||||
|
||||
cpack_add_component(asan
|
||||
DISPLAY_NAME "rocJPEG ASAN Package"
|
||||
DESCRIPTION "AMD rocJPEG is a high performance JPEG decode SDK for AMD GPUs. \
|
||||
rocJPEG ASAN package provides rocJPEG ASAN libraries")
|
||||
|
||||
cpack_add_component(test
|
||||
DISPLAY_NAME "rocJPEG Test Package"
|
||||
DESCRIPTION "AMD rocJPEG is a high performance JPEG decode SDK for AMD GPUs. \
|
||||
rocJPEG Test package provides rocJPEG Test Components")
|
||||
|
||||
else()
|
||||
message("-- ${Red}AMD ROCm rocJPEG -- unmet dependencies${ColourReset}")
|
||||
if(NOT HIP_FOUND)
|
||||
message(FATAL_ERROR "-- ERROR!: HIP Not Found! - please install rocm-hip-runtime-dev!")
|
||||
endif()
|
||||
if(NOT Libva_FOUND)
|
||||
message(FATAL_ERROR "-- ERROR!: libva Not Found - please install libva-amdgpu-dev(DBEAIN)/libva-amdgpu-devel(RPM) package!")
|
||||
endif()
|
||||
endif()
|
||||
+1
-1
@@ -1,6 +1,6 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2023 ROCm Software Platform
|
||||
Copyright (c) 2024 ROCm Software Platform
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
|
||||
+202
-2
@@ -1,2 +1,202 @@
|
||||
# rocJPEG
|
||||
rocJPEG is a high-performance jpeg decode SDK for decoding images using a hardware-accelerated jpeg decoder on AMD’s GPUs.
|
||||
[](https://opensource.org/licenses/MIT)
|
||||
|
||||
|
||||
<p align="center"><img width="70%" src="docs/data/AMD_rocJPEG_Logo.png" /></p>
|
||||
|
||||
rocJPEG is a high performance JPEG decode SDK for AMD GPUs. Using the rocJPEG API, you can access the JPEG decoding features available on your GPU.
|
||||
|
||||
## Supported JPEG chroma subsampling
|
||||
|
||||
* YUV 4:4:4
|
||||
* YUV 4:2:2
|
||||
* YUV 4:2:0
|
||||
* YUV 4:0:0
|
||||
|
||||
## Prerequisites
|
||||
|
||||
* Linux distribution
|
||||
* Ubuntu - `20.04` / `22.04`
|
||||
* RHEL - `8` / `9`
|
||||
* SLES - `15-SP5`
|
||||
|
||||
* [ROCm supported hardware](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html)
|
||||
> [!IMPORTANT]
|
||||
> `gfx908` or higher GPU required
|
||||
|
||||
* Install ROCm `6.1.0` or later with [amdgpu-install](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/amdgpu-install.html): Required usecase - rocm
|
||||
> [!IMPORTANT]
|
||||
> `sudo amdgpu-install --usecase=rocm`
|
||||
|
||||
* AMD Multimedia packages
|
||||
```shell
|
||||
sudo apt install libva-amdgpu-dev libdrm-amdgpu1 mesa-amdgpu-va-drivers
|
||||
```
|
||||
> [!NOTE]
|
||||
> RPM Packages for `RHEL`/`SLES` - `libva-amdgpu-devel libdrm-amdgpu mesa-amdgpu-dri-drivers`
|
||||
|
||||
* CMake `3.5` or later
|
||||
|
||||
```shell
|
||||
sudo apt install cmake
|
||||
```
|
||||
|
||||
* [pkg-config](https://en.wikipedia.org/wiki/Pkg-config)
|
||||
|
||||
```shell
|
||||
sudo apt install pkg-config
|
||||
```
|
||||
|
||||
> [!IMPORTANT]
|
||||
> * If using Ubuntu 22.04, you must install `libstdc++-12-dev`
|
||||
>
|
||||
> ```shell
|
||||
> sudo apt install libstdc++-12-dev
|
||||
> ```
|
||||
|
||||
>[!NOTE]
|
||||
> * All package installs are shown with the `apt` package manager. Use the appropriate package manager for your operating system.
|
||||
> * To install rocDecode with minimum requirements, follow the [quick-start](./docs/install/quick-start.rst) instructions
|
||||
|
||||
|
||||
#### Prerequisites setup script for Linux
|
||||
For your convenience, we provide the setup script,
|
||||
[rocJPEG-setup.py](rocJPEG-setup.py) which installs all required dependencies. Run this script only once.
|
||||
|
||||
**Usage:**
|
||||
|
||||
```shell
|
||||
python rocJPEG-setup.py --rocm_path [ ROCm Installation Path - optional (default:/opt/rocm)]
|
||||
--developer [ Setup Developer Options - optional (default:ON) [options:ON/OFF]]
|
||||
```
|
||||
|
||||
**NOTE:** This script only needs to be executed once.
|
||||
|
||||
## Installation instructions
|
||||
|
||||
The installation process uses the following steps:
|
||||
|
||||
* [ROCm-supported hardware](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html) install verification
|
||||
|
||||
* Install ROCm `6.1.0` or later with [amdgpu-install](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/amdgpu-install.html) with `--usecase=rocm`
|
||||
|
||||
* Use either [Package install](#package-install) or [Source install](#source-install) as described below.
|
||||
|
||||
### Package install
|
||||
|
||||
Install rocJPEG runtime, development, and test packages.
|
||||
|
||||
* Runtime package - `rocjpeg` only provides the rocjpeg library `librocjpeg.so`
|
||||
* Development package - `rocjpeg-dev`/`rocjpeg-devel` provides the library, header files, and samples
|
||||
* Test package - `rocjpeg-test` provides CTest to verify installation
|
||||
|
||||
#### Ubuntu
|
||||
|
||||
```shell
|
||||
sudo apt install rocjpeg rocjpeg-dev rocjpeg-test
|
||||
```
|
||||
|
||||
#### RHEL
|
||||
|
||||
```shell
|
||||
sudo yum install rocjpeg rocjpeg-devel rocjpeg-test
|
||||
```
|
||||
|
||||
#### SLES
|
||||
|
||||
```shell
|
||||
sudo zypper install rocjpeg rocjpeg-devel rocjpeg-test
|
||||
```
|
||||
>[!NOTE]
|
||||
> Package install auto installs all dependencies.
|
||||
|
||||
### Source install
|
||||
|
||||
```shell
|
||||
git clone https://github.com/ROCm/rocJPEG.git
|
||||
cd rocJPEG
|
||||
mkdir build && cd build
|
||||
cmake ../
|
||||
make -j8
|
||||
sudo make install
|
||||
```
|
||||
|
||||
#### Run tests
|
||||
|
||||
```shell
|
||||
make test
|
||||
```
|
||||
|
||||
**NOTE:** run tests with verbose option `make test ARGS="-VV"`
|
||||
|
||||
#### Make package
|
||||
|
||||
```shell
|
||||
sudo make package
|
||||
```
|
||||
|
||||
## Verify installation
|
||||
|
||||
The installer will copy
|
||||
|
||||
* Libraries into `/opt/rocm/lib`
|
||||
* Header files into `/opt/rocm/include/rocjpeg`
|
||||
* Samples folder into `/opt/rocm/share/rocjpeg`
|
||||
* Documents folder into `/opt/rocm/share/doc/rocjpeg`
|
||||
|
||||
### Using sample application
|
||||
|
||||
To verify your installation using a sample application, run:
|
||||
|
||||
```shell
|
||||
mkdir rocjpeg-sample && cd rocjpeg-sample
|
||||
cmake /opt/rocm/share/rocjpeg/samples/jpegDecode/
|
||||
make -j8
|
||||
./videodecode -i /opt/rocm/share/rocjpeg/image/mug_420.jpg
|
||||
```
|
||||
|
||||
### Using test package
|
||||
|
||||
To verify your installation using the `rocjpeg-test` package, run:
|
||||
|
||||
```shell
|
||||
mkdir rocjpeg-test && cd rocjpeg-test
|
||||
cmake /opt/rocm/share/rocjpeg/test/
|
||||
ctest -VV
|
||||
```
|
||||
|
||||
## Samples
|
||||
|
||||
The tool provides a few samples to decode JPEG images [here](samples/). Please refer to the individual folders to build and run the samples.
|
||||
You can access samples to decode your images in our
|
||||
[GitHub repository](https://github.com/ROCm/rocJPEG/tree/develop/samples). Refer to the
|
||||
individual folders to build and run the samples.
|
||||
|
||||
## Docker
|
||||
|
||||
You can find rocDecode Docker containers in our
|
||||
[GitHub repository](https://github.com/ROCm/rocJPEG/tree/develop/docker).
|
||||
|
||||
## Documentation
|
||||
|
||||
Run the following code to build our documentation locally.
|
||||
|
||||
```shell
|
||||
cd docs
|
||||
pip3 install -r sphinx/requirements.txt
|
||||
python3 -m sphinx -T -E -b html -d _build/doctrees -D language=en . _build/html
|
||||
```
|
||||
|
||||
For more information on documentation builds, refer to the
|
||||
[Building documentation](https://rocm.docs.amd.com/en/latest/contribute/building.html)
|
||||
page.
|
||||
|
||||
## Tested configurations
|
||||
|
||||
* Linux
|
||||
* Ubuntu - `20.04` / `22.04`
|
||||
* RHEL - `8` / `9`
|
||||
* SLES - `15-SP5`
|
||||
* ROCm:
|
||||
* rocm-core - `6.1.0.60100-62`
|
||||
* amdgpu-core - `1:6.1.60100-1741643`
|
||||
* rocJPEG Setup Script - `V1.0`
|
||||
@@ -0,0 +1,233 @@
|
||||
/* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef ROC_JPEG_H
|
||||
#define ROC_JPEG_H
|
||||
|
||||
#define ROCJPEGAPI
|
||||
|
||||
#pragma once
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
/*****************************************************************************************************************/
|
||||
//! \file rocjpeg.h
|
||||
//! \brief The AMD rocJPEG Library.
|
||||
//! \defgroup group_amd_rocjepg rocJPEG: AMD ROCm JPEG Decode API
|
||||
//! \brief rocJPEG API is a toolkit to decode JPEG images using a hardware-accelerated JPEG decoder on AMD’s GPUs.
|
||||
/******************************************************************************************************************/
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif // __cplusplus
|
||||
|
||||
//! \def
|
||||
//! \ingroup group_amd_rocjpeg
|
||||
//! Maximum number of channels rocJPEG supports
|
||||
#define ROCJPEG_MAX_COMPONENT 4
|
||||
|
||||
/*****************************************************/
|
||||
//! \enum RocJpegStatus
|
||||
//! \ingroup group_amd_rocjpeg
|
||||
//! rocJPEG return status enums
|
||||
//! These enums are used in all API calls to rocJPEG
|
||||
/*****************************************************/
|
||||
typedef enum {
|
||||
ROCJPEG_STATUS_SUCCESS = 0,
|
||||
ROCJPEG_STATUS_NOT_INITIALIZED = -1,
|
||||
ROCJPEG_STATUS_INVALID_PARAMETER = -2,
|
||||
ROCJPEG_STATUS_BAD_JPEG = -3,
|
||||
ROCJPEG_STATUS_JPEG_NOT_SUPPORTED = -4,
|
||||
ROCJPEG_STATUS_OUTOF_MEMORY = -5,
|
||||
ROCJPEG_STATUS_EXECUTION_FAILED = -6,
|
||||
ROCJPEG_STATUS_ARCH_MISMATCH = -7,
|
||||
ROCJPEG_STATUS_INTERNAL_ERROR = -8,
|
||||
ROCJPEG_STATUS_IMPLEMENTATION_NOT_SUPPORTED = -9,
|
||||
ROCJPEG_STATUS_HW_JPEG_DECODER_NOT_SUPPORTED = -10,
|
||||
ROCJPEG_STATUS_RUNTIME_ERROR = -11,
|
||||
ROCJPEG_STATUS_NOT_IMPLEMENTED = -12,
|
||||
} RocJpegStatus;
|
||||
|
||||
/*****************************************************/
|
||||
//! \enum RocJpegChromaSubsampling
|
||||
//! \ingroup group_amd_rocjpeg
|
||||
//! RocJpegChromaSubsampling enum identifies image chroma subsampling values stored inside JPEG input stream
|
||||
/*****************************************************/
|
||||
typedef enum {
|
||||
ROCJPEG_CSS_444 = 0,
|
||||
ROCJPEG_CSS_440 = 1,
|
||||
ROCJPEG_CSS_422 = 2,
|
||||
ROCJPEG_CSS_420 = 3,
|
||||
ROCJPEG_CSS_411 = 4,
|
||||
ROCJPEG_CSS_400 = 5,
|
||||
ROCJPEG_CSS_UNKNOWN = -1
|
||||
} RocJpegChromaSubsampling;
|
||||
|
||||
/*****************************************************/
|
||||
//! \struct RocJpegImage
|
||||
//! \ingroup group_amd_rocjpeg
|
||||
//! this structure is jpeg image descriptor used to return the decoded output image. User must allocate device
|
||||
//! memories for each channel for this structure and pass it to the decoder API.
|
||||
//! the decoder APIs then copies the decode image to this struct based on the requested output format (see RocJpegOutputFormat).
|
||||
/*****************************************************/
|
||||
typedef struct {
|
||||
uint8_t* channel[ROCJPEG_MAX_COMPONENT];
|
||||
uint32_t pitch[ROCJPEG_MAX_COMPONENT]; // pitch of each channel
|
||||
} RocJpegImage;
|
||||
|
||||
/*****************************************************/
|
||||
//! \enum RocJpegOutputFormat
|
||||
//! \ingroup group_amd_rocjpeg
|
||||
//! RocJpegOutputFormat enum specifies what type of output user wants for image decoding
|
||||
/*****************************************************/
|
||||
typedef enum {
|
||||
// return native unchanged decoded YUV image from the VCN JPEG deocder.
|
||||
// For ROCJPEG_CSS_444 write Y, U, and V to first, second, and third channels of RocJpegImage
|
||||
// For ROCJPEG_CSS_422 write YUYV (packed) to first channel of RocJpegImage
|
||||
// For ROCJPEG_CSS_420 write Y to first channel and UV (interleaved) to second channel of RocJpegImage
|
||||
// For ROCJPEG_CSS_400 write Y to first channel of RocJpegImage
|
||||
ROCJPEG_OUTPUT_NATIVE = 0,
|
||||
// extract Y, U, and V channels from the decoded YUV image from the VCN JPEG deocder and write into first, second, and thrid channel of RocJpegImage.
|
||||
// For ROCJPEG_CSS_400 write Y to first channel of RocJpegImage
|
||||
ROCJPEG_OUTPUT_YUV_PLANAR = 1,
|
||||
// return luma component (Y) and write to first channel of RocJpegImage
|
||||
ROCJPEG_OUTPUT_Y = 2,
|
||||
// convert to interleaved RGB using HIP kernels and write to first channel of RocJpegImage
|
||||
ROCJPEG_OUTPUT_RGB = 3,
|
||||
// maximum allowed value
|
||||
ROCJPEG_OUTPUT_FORMAT_MAX = 4
|
||||
} RocJpegOutputFormat;
|
||||
|
||||
/*****************************************************/
|
||||
//! \enum RocJpegBackend
|
||||
//! \ingroup group_amd_rocjpeg
|
||||
//! RocJpegBackend enum specifies what type of backend to use for JPEG decoding
|
||||
//! ROCJPEG_BACKEND_HARDWARE : supports baseline JPEG bitstream using VCN hardware-accelarted JPEG decoder
|
||||
//! ROCJPEG_BACKEND_HYBRID : uses CPU for Huffman decode and GPU for IDCT using HIP kernesl. This mode doesn't use VCN JPEG hardware decoder
|
||||
/*****************************************************/
|
||||
typedef enum {
|
||||
ROCJPEG_BACKEND_HARDWARE = 0,
|
||||
ROCJPEG_BACKEND_HYBRID = 1
|
||||
} RocJpegBackend;
|
||||
|
||||
/*****************************************************/
|
||||
// Opaque library handle identifier.
|
||||
//struct RocJpegDecoderHandle;
|
||||
//typedef struct RocJpegDecoderHandle* RocJpegHandle;
|
||||
//! Used in subsequent API calls after rocJpegCreate
|
||||
/*****************************************************/
|
||||
typedef void *RocJpegHandle;
|
||||
|
||||
/*****************************************************************************************************/
|
||||
//! \fn RocJpegStatus ROCJPEGAPI rocJpegCreate(RocJpegBackend backend, int device_id, RocJpegHandle *handle);
|
||||
//! \ingroup group_amd_rocjpeg
|
||||
//! Create the decoder object based on backend and device_id. A handle to the created decoder is returned
|
||||
//! Initalization of rocjpeg handle. This handle is used for all consecutive calls
|
||||
//! IN backend : Backend to use.
|
||||
//! IN device_id : the GPU device id for which a decoder should be created. For example, use 0 for the first GPU device,
|
||||
//! and 1 for the second GPU device on the system, etc.
|
||||
//! IN/OUT handle : rocjpeg handle, jpeg decoder instance to use for
|
||||
/*****************************************************************************************************/
|
||||
RocJpegStatus ROCJPEGAPI rocJpegCreate(RocJpegBackend backend, int device_id, RocJpegHandle *handle);
|
||||
|
||||
/*****************************************************************************************************/
|
||||
//! \fn RocJpegStatus ROCJPEGAPI rocJpegDestroy(RocJpegHandle handle);
|
||||
//! \ingroup group_amd_rocjpeg
|
||||
//! Release the decoder object and resources.
|
||||
//! IN/OUT handle: instance handle to release
|
||||
/*****************************************************************************************************/
|
||||
RocJpegStatus ROCJPEGAPI rocJpegDestroy(RocJpegHandle handle);
|
||||
|
||||
/*****************************************************************************************************/
|
||||
//! \fn RocJpegStatus ROCJPEGAPI rocJpegGetImageInfo(RocJpegHandle handle, const uint8_t *data, size_t length, uint8_t *num_components, RocJpegChromaSubsampling *subsampling, uint32_t *widths, uint32_t *heights);
|
||||
//! \ingroup group_amd_rocjpeg
|
||||
//! Retrieve the image info, including channel, width and height of each component, and chroma subsampling.
|
||||
//! If less than ROCJPEG_MAX_COMPONENT channels are encoded, then zeros would be set to absent channels information
|
||||
//! If the image is 3-channel, all three groups are valid.
|
||||
//! IN handle : rocJpeg handle
|
||||
//! IN data : Pointer to the buffer containing the jpeg stream data to be decoded.
|
||||
//! IN length : Length of the jpeg image buffer.
|
||||
//! OUT num_component : Number of channels in the decoded output image
|
||||
//! OUT subsampling : Chroma subsampling used in this JPEG, see RocJpegChromaSubsampling.
|
||||
//! OUT widths : pointer to ROCJPEG_MAX_COMPONENT of uint32_t, returns width of each channel.
|
||||
//! OUT heights : pointer to ROCJPEG_MAX_COMPONENT of uint32_t, returns height of each channel.
|
||||
//! \return ROCJPEG_STATUS_SUCCESS if successful
|
||||
/*****************************************************************************************************/
|
||||
RocJpegStatus ROCJPEGAPI rocJpegGetImageInfo(RocJpegHandle handle, const uint8_t *data, size_t length, uint8_t *num_components, RocJpegChromaSubsampling *subsampling, uint32_t *widths, uint32_t *heights);
|
||||
|
||||
/*****************************************************************************************************/
|
||||
//! \fn RocJpegStatus ROCJPEGAPI rocJpegDecode(RocJpegHandle handle, const uint8_t *data, size_t length, RocJpegOutputFormat output_format, RocJpegImage *destination, hipStream_t stream);
|
||||
//! \ingroup group_amd_rocjpeg
|
||||
//! Decodes single image based on the backend used to create the rocJpeg handle in rocJpegCreate API.
|
||||
//! Destination buffers should be large enough to be able to store output of specified format. These buffers should be pre-allocted by the user in the device memories.
|
||||
//! For each color plane (channel) sizes could be retrieved for image using rocJpegGetImageInfo API
|
||||
//! and minimum required memory buffer for each plane is plane_height * plane_pitch where plane_pitch >= plane_width for
|
||||
//! planar output formats and plane_pitch >= plane_width * num_components for interleaved output format.
|
||||
//! IN handle : rocJpeg handle
|
||||
//! IN data : Pointer to the buffer containing the jpeg stream to be decoded.
|
||||
//! IN length : Length of the jpeg image buffer.
|
||||
//! IN output_format : Output data format. See RocJpegOutputFormat for description
|
||||
//! IN/OUT destination : Pointer to structure with information about output buffers. See RocJpegImage description.
|
||||
//! \return ROCJPEG_STATUS_SUCCESS if successful
|
||||
/*****************************************************************************************************/
|
||||
RocJpegStatus ROCJPEGAPI rocJpegDecode(RocJpegHandle handle, const uint8_t *data, size_t length, RocJpegOutputFormat output_format, RocJpegImage *destination);
|
||||
|
||||
/*****************************************************************************************************/
|
||||
//! \fn RocJpegStatus ROCJPEGAPI rocJpegDecodeBatchedInitialize(RocJpegHandle handle, int batch_size, int max_cpu_threads, RocJpegOutputFormat output_format);
|
||||
//! \ingroup group_amd_rocjpeg
|
||||
//! Resets and initializes batch decoder for working on the batches of specified size
|
||||
//! Should be called once for decoding batches of this specific size, also use to reset failed batches
|
||||
//! IN/OUT handle : Library handle
|
||||
//! IN batch_size : Size of the batch
|
||||
//! IN max_cpu_threads : Maximum number of CPU threads that will be processing this batch
|
||||
//! IN output_format : Output data format. Will be the same for every image in batch
|
||||
//! \return ROCJPEG_STATUS_SUCCESS if successful
|
||||
/*****************************************************************************************************/
|
||||
RocJpegStatus ROCJPEGAPI rocJpegDecodeBatchedInitialize(RocJpegHandle handle, int batch_size, int max_cpu_threads, RocJpegOutputFormat output_format);
|
||||
|
||||
/*****************************************************************************************************/
|
||||
//! \fn RocJpegStatus ROCJPEGAPI rocJpegDecodeBatched(RocJpegHandle handle, const uint8_t *data, const size_t *lengths, RocJpegImage *destinations, hipStream_t stream);
|
||||
//! \ingroup group_amd_rocjpeg
|
||||
//! Decodes batch of images. Output buffers should be large enough to be able to store
|
||||
//! outputs of specified format, see single image decoding description for details. Call to
|
||||
//! rocjpegDecodeBatchedInitialize() is required prior to this call, batch size is expected to be the same as
|
||||
//! parameter to this batch initialization function.
|
||||
//!
|
||||
//! IN/OUT handle : Library handle
|
||||
//! INT/OUT jpeg_handle : Decoded jpeg image state handle
|
||||
//! IN data : Array of size batch_size of pointers to the input buffers containing the jpeg images to be decoded.
|
||||
//! IN lengths : Array of size batch_size with lengths of the jpeg images' buffers in the batch.
|
||||
//! IN/OUT destinations : Array of size batch_size with pointers to structure with information about output buffers,
|
||||
//! \return ROCJPEG_STATUS_SUCCESS if successful
|
||||
/*****************************************************************************************************/
|
||||
RocJpegStatus ROCJPEGAPI rocJpegDecodeBatched(RocJpegHandle handle, const uint8_t *data, const size_t *lengths, RocJpegImage *destinations);
|
||||
|
||||
/*****************************************************************************************************/
|
||||
//! \fn extern const char* ROCDECAPI rocJpegGetErrorName(RocJpegStatus rocjpeg_status);
|
||||
//! \ingroup group_amd_rocjpeg
|
||||
//! Return name of the specified error code in text form.
|
||||
/*****************************************************************************************************/
|
||||
extern const char* ROCJPEGAPI rocJpegGetErrorName(RocJpegStatus rocjpeg_status);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // ROC_JPEG_H
|
||||
@@ -0,0 +1,49 @@
|
||||
################################################################################
|
||||
# Copyright (c) 2024 Advanced Micro Devices, Inc.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
#
|
||||
################################################################################
|
||||
|
||||
find_library(LIBVA_LIBRARY NAMES va HINTS /opt/amdgpu/lib/x86_64-linux-gnu /opt/amdgpu/lib64 NO_DEFAULT_PATH)
|
||||
find_library(LIBVA_DRM_LIBRARY NAMES va-drm HINTS /opt/amdgpu/lib/x86_64-linux-gnu /opt/amdgpu/lib64 NO_DEFAULT_PATH)
|
||||
find_path(LIBVA_INCLUDE_DIR NAMES va/va.h PATHS /opt/amdgpu/include NO_DEFAULT_PATH)
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(Libva DEFAULT_MSG LIBVA_INCLUDE_DIR LIBVA_LIBRARY)
|
||||
mark_as_advanced(LIBVA_INCLUDE_DIR LIBVA_LIBRARY LIBVA_DRM_LIBRARY)
|
||||
|
||||
if(Libva_FOUND)
|
||||
if(NOT TARGET Libva::va)
|
||||
add_library(Libva::va UNKNOWN IMPORTED)
|
||||
set_target_properties(Libva::va PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${LIBVA_INCLUDE_DIR}"
|
||||
IMPORTED_LOCATION "${LIBVA_LIBRARY}")
|
||||
endif()
|
||||
if(NOT TARGET Libva::va_drm)
|
||||
add_library(Libva::va_drm UNKNOWN IMPORTED)
|
||||
set_target_properties(Libva::va_drm PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${LIBVA_INCLUDE_DIR}"
|
||||
IMPORTED_LOCATION "${LIBVA_DRM_LIBRARY}")
|
||||
endif()
|
||||
message("-- ${White}Using Libva -- \n\tLibraries:${LIBVA_LIBRARY} \n\tIncludes:${LIBVA_INCLUDE_DIR}${ColourReset}")
|
||||
message("-- ${White}Using Libva-drm -- \n\tLibraries:${LIBVA_DRM_LIBRARY}${ColourReset}")
|
||||
else()
|
||||
if(Libva_FIND_REQUIRED)
|
||||
message(FATAL_ERROR "{Red}FindLibva -- Libva NOT FOUND${ColourReset}")
|
||||
endif()
|
||||
endif()
|
||||
Plik binarny nie jest wyświetlany.
|
Po Szerokość: | Wysokość: | Rozmiar: 1.8 MiB |
Plik binarny nie jest wyświetlany.
|
Po Szerokość: | Wysokość: | Rozmiar: 2.2 MiB |
Plik binarny nie jest wyświetlany.
|
Po Szerokość: | Wysokość: | Rozmiar: 181 KiB |
@@ -0,0 +1,13 @@
|
||||
# rocJEPG Docker
|
||||
|
||||
## Build - dockerfiles
|
||||
|
||||
```
|
||||
sudo docker build -f {DOCKER_FILE_NAME}.dockerfile -t {DOCKER_IMAGE_NAME} .
|
||||
```
|
||||
|
||||
## Run - docker
|
||||
|
||||
```
|
||||
sudo docker run -it --device=/dev/kfd --device=/dev/dri --cap-add=SYS_RAWIO --device=/dev/mem --group-add video --network host --privileged {DOCKER_IMAGE_NAME}
|
||||
```
|
||||
@@ -0,0 +1,17 @@
|
||||
FROM ubuntu:20.04
|
||||
|
||||
# install base dependencies
|
||||
RUN apt-get update -y
|
||||
#RUN apt-get dist-upgrade -y
|
||||
RUN DEBIAN_FRONTEND=noninteractive apt-get -y install gcc g++ cmake pkg-config git apt-utils sudo vainfo dialog
|
||||
|
||||
# install ROCm
|
||||
RUN DEBIAN_FRONTEND=noninteractive apt-get -y install initramfs-tools libnuma-dev wget keyboard-configuration && \
|
||||
wget https://repo.radeon.com/amdgpu-install/6.1/ubuntu/focal/amdgpu-install_6.1.60100-1_all.deb && \
|
||||
sudo apt-get install ./amdgpu-install_6.1.60100-1_all.deb && \
|
||||
sudo amdgpu-install -y --usecase=rocm
|
||||
|
||||
WORKDIR /workspace
|
||||
|
||||
# install rocJPEG package
|
||||
RUN DEBIAN_FRONTEND=noninteractive sudo apt install -y rocjpeg rocjpeg-dev rocjpeg-test
|
||||
@@ -0,0 +1,17 @@
|
||||
FROM ubuntu:22.04
|
||||
|
||||
# install base dependencies
|
||||
RUN apt-get update -y
|
||||
#RUN apt-get dist-upgrade -y
|
||||
RUN DEBIAN_FRONTEND=noninteractive apt-get -y install gcc g++ cmake pkg-config git apt-utils sudo vainfo dialog libstdc++-12-dev
|
||||
|
||||
# install ROCm
|
||||
RUN DEBIAN_FRONTEND=noninteractive apt-get -y install initramfs-tools libnuma-dev wget keyboard-configuration && \
|
||||
wget https://repo.radeon.com/amdgpu-install/6.1/ubuntu/jammy/amdgpu-install_6.1.60100-1_all.deb && \
|
||||
sudo apt-get install ./amdgpu-install_6.1.60100-1_all.deb && \
|
||||
sudo amdgpu-install -y --usecase=rocm
|
||||
|
||||
WORKDIR /workspace
|
||||
|
||||
# install rocJPEG package
|
||||
RUN DEBIAN_FRONTEND=noninteractive sudo apt install -y rocjpeg rocjpeg-dev rocjpeg-test
|
||||
Plik binarny nie jest wyświetlany.
|
Po Szerokość: | Wysokość: | Rozmiar: 5.8 KiB |
Plik binarny nie jest wyświetlany.
|
Po Szerokość: | Wysokość: | Rozmiar: 30 KiB |
@@ -0,0 +1,136 @@
|
||||
# Copyright (c) 2023 - 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
import platform
|
||||
if sys.version_info[0] < 3:
|
||||
import commands
|
||||
else:
|
||||
import subprocess
|
||||
|
||||
__copyright__ = "Copyright (c) 2024, AMD ROCm rocJPEG"
|
||||
__version__ = "1.0"
|
||||
__email__ = "mivisionx.support@amd.com"
|
||||
__status__ = "Shipping"
|
||||
|
||||
# Arguments
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--rocm_path', type=str, default='/opt/rocm',
|
||||
help='ROCm Installation Path - optional (default:/opt/rocm) - ROCm Installation Required')
|
||||
parser.add_argument('--developer', type=str, default='ON',
|
||||
help='Setup Developer Options - optional (default:ON) [options:ON/OFF]')
|
||||
|
||||
args = parser.parse_args()
|
||||
developerInstall = args.developer.upper()
|
||||
|
||||
ROCM_PATH = args.rocm_path
|
||||
|
||||
if "ROCM_PATH" in os.environ:
|
||||
ROCM_PATH = os.environ.get('ROCM_PATH')
|
||||
print("\nROCm PATH set to -- "+ROCM_PATH+"\n")
|
||||
|
||||
# check ROCm installation
|
||||
if os.path.exists(ROCM_PATH):
|
||||
print("\nROCm Installation Found -- "+ROCM_PATH+"\n")
|
||||
os.system('echo ROCm Info -- && '+ROCM_PATH+'/bin/rocminfo')
|
||||
else:
|
||||
print(
|
||||
"WARNING: If ROCm installed, set ROCm Path with \"--rocm_path\" option for full installation [Default:/opt/rocm]\n")
|
||||
print("ERROR: rocJPEG Setup requires ROCm install\n")
|
||||
exit(-1)
|
||||
|
||||
if developerInstall not in ('OFF', 'ON'):
|
||||
print(
|
||||
"ERROR: Developer Option Not Supported - [Supported Options: OFF or ON]\n")
|
||||
exit()
|
||||
|
||||
# get platfrom info
|
||||
platfromInfo = platform.platform()
|
||||
|
||||
# sudo requirement check
|
||||
sudoLocation = ''
|
||||
userName = ''
|
||||
if sys.version_info[0] < 3:
|
||||
status, sudoLocation = commands.getstatusoutput("which sudo")
|
||||
if sudoLocation != '/usr/bin/sudo':
|
||||
status, userName = commands.getstatusoutput("whoami")
|
||||
else:
|
||||
status, sudoLocation = subprocess.getstatusoutput("which sudo")
|
||||
if sudoLocation != '/usr/bin/sudo':
|
||||
status, userName = subprocess.getstatusoutput("whoami")
|
||||
|
||||
# setup for Linux
|
||||
linuxSystemInstall = ''
|
||||
linuxCMake = 'cmake'
|
||||
linuxSystemInstall_check = ''
|
||||
linuxFlag = ''
|
||||
if "centos" in platfromInfo or "redhat" in platfromInfo or os.path.exists('/usr/bin/yum'):
|
||||
linuxSystemInstall = 'yum -y'
|
||||
linuxSystemInstall_check = '--nogpgcheck'
|
||||
if "centos-7" in platfromInfo or "redhat-7" in platfromInfo:
|
||||
linuxCMake = 'cmake3'
|
||||
os.system(linuxSystemInstall+' install cmake3')
|
||||
if not "centos" in platfromInfo or not "redhat" in platfromInfo:
|
||||
platfromInfo = platfromInfo+'-redhat'
|
||||
elif "Ubuntu" in platfromInfo or os.path.exists('/usr/bin/apt-get'):
|
||||
linuxSystemInstall = 'apt-get -y'
|
||||
linuxSystemInstall_check = '--allow-unauthenticated'
|
||||
linuxFlag = '-S'
|
||||
if not "Ubuntu" in platfromInfo:
|
||||
platfromInfo = platfromInfo+'-Ubuntu'
|
||||
elif os.path.exists('/usr/bin/zypper'):
|
||||
linuxSystemInstall = 'zypper -n'
|
||||
linuxSystemInstall_check = '--no-gpg-checks'
|
||||
platfromInfo = platfromInfo+'-SLES'
|
||||
else:
|
||||
print("\nrocJPEG Setup on "+platfromInfo+" is unsupported\n")
|
||||
print("\nrocJPEG Setup Supported on: Ubuntu 20/22; CentOS 7/8; RedHat 8/9; & SLES 15 SP4\n")
|
||||
exit(-1)
|
||||
|
||||
# rocJPEG Setup
|
||||
print("\nrocJPEG Setup on: "+platfromInfo+"\n")
|
||||
print("\nrocJPEG Dependencies Installation with rocJPEG-setup.py V-"+__version__+"\n")
|
||||
|
||||
if userName == 'root':
|
||||
os.system(linuxSystemInstall+' update')
|
||||
os.system(linuxSystemInstall+' install sudo')
|
||||
|
||||
# install pre-reqs
|
||||
os.system('sudo -v')
|
||||
os.system(linuxSystemInstall+' update')
|
||||
os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' ' +
|
||||
linuxSystemInstall_check+' install gcc cmake pkg-config')
|
||||
|
||||
# rocJPEG Core - VA/DRM Requirements
|
||||
if "Ubuntu" in platfromInfo:
|
||||
os.system('sudo -v')
|
||||
os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check +
|
||||
' install vainfo libdrm-amdgpu1 libva-amdgpu-dev mesa-amdgpu-va-drivers')
|
||||
if "22.04" in platform.version():
|
||||
os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check +
|
||||
' install libstdc++-12-dev')
|
||||
else:
|
||||
os.system('sudo -v')
|
||||
os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check +
|
||||
' install libdrm-amdgpu libva-amdgpu-devel mesa-amdgpu-dri-drivers')
|
||||
|
||||
print("\nrocJPEG Dependencies Installed with rocJPEG-setup.py V-"+__version__+"\n")
|
||||
@@ -0,0 +1,71 @@
|
||||
# ##############################################################################
|
||||
# Copyright (c) 2023 - 2024 Advanced Micro Devices, Inc.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
#
|
||||
# ##############################################################################
|
||||
cmake_minimum_required(VERSION 3.5)
|
||||
|
||||
add_test(
|
||||
NAME
|
||||
jpeg-decode-fmt-unchanged
|
||||
COMMAND
|
||||
"${CMAKE_CTEST_COMMAND}"
|
||||
--build-and-test "${CMAKE_CURRENT_SOURCE_DIR}/jpegDecode"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/jpegDecode"
|
||||
--build-generator "${CMAKE_GENERATOR}"
|
||||
--test-command "jpegdecode"
|
||||
-i ${CMAKE_SOURCE_DIR}/data/images/
|
||||
)
|
||||
|
||||
add_test(
|
||||
NAME
|
||||
jpeg-decode-fmt-yuv
|
||||
COMMAND
|
||||
"${CMAKE_CTEST_COMMAND}"
|
||||
--build-and-test "${CMAKE_CURRENT_SOURCE_DIR}/jpegDecode"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/jpegDecode"
|
||||
--build-generator "${CMAKE_GENERATOR}"
|
||||
--test-command "jpegdecode"
|
||||
-i ${CMAKE_SOURCE_DIR}/data/images/ -fmt yuv
|
||||
)
|
||||
|
||||
add_test(
|
||||
NAME
|
||||
jpeg-decode-fmt-y
|
||||
COMMAND
|
||||
"${CMAKE_CTEST_COMMAND}"
|
||||
--build-and-test "${CMAKE_CURRENT_SOURCE_DIR}/jpegDecode"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/jpegDecode"
|
||||
--build-generator "${CMAKE_GENERATOR}"
|
||||
--test-command "jpegdecode"
|
||||
-i ${CMAKE_SOURCE_DIR}/data/images/ -fmt y
|
||||
)
|
||||
|
||||
add_test(
|
||||
NAME
|
||||
jpeg-decode-fmt-rgbi
|
||||
COMMAND
|
||||
"${CMAKE_CTEST_COMMAND}"
|
||||
--build-and-test "${CMAKE_CURRENT_SOURCE_DIR}/jpegDecode"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/jpegDecode"
|
||||
--build-generator "${CMAKE_GENERATOR}"
|
||||
--test-command "jpegdecode"
|
||||
-i ${CMAKE_SOURCE_DIR}/data/images/ -fmt rgbi
|
||||
)
|
||||
@@ -0,0 +1,73 @@
|
||||
################################################################################
|
||||
# Copyright (c) 2024 Advanced Micro Devices, Inc.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
#
|
||||
################################################################################
|
||||
|
||||
cmake_minimum_required (VERSION 3.0)
|
||||
project(jpegdecode)
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
# ROCM Path
|
||||
if(DEFINED ENV{ROCM_PATH})
|
||||
set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Default ROCm installation path")
|
||||
elseif(ROCM_PATH)
|
||||
message("-- INFO:ROCM_PATH Set -- ${ROCM_PATH}")
|
||||
else()
|
||||
set(ROCM_PATH /opt/rocm CACHE PATH "Default ROCm installation path")
|
||||
endif()
|
||||
|
||||
list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/../../cmake)
|
||||
list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH}/hip ${ROCM_PATH})
|
||||
set(CMAKE_CXX_COMPILER ${ROCM_PATH}/llvm/bin/clang++)
|
||||
|
||||
set(DEFAULT_AMDGPU_TARGETS "gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1031;gfx1032;gfx1100;gfx1101;gfx1102")
|
||||
set(AMDGPU_TARGETS "${DEFAULT_AMDGPU_TARGETS}" CACHE STRING "List of specific machine types for library to target")
|
||||
|
||||
find_package(HIP QUIET)
|
||||
|
||||
# find rocJPEG
|
||||
find_library(ROCJPEG_LIBRARY NAMES rocjpeg HINTS {ROCM_PATH}/lib)
|
||||
find_path(ROCJPEG_INCLUDE_DIR NAMES rocjpeg.h PATHS /opt/rocm/include/rocjpeg {ROCM_PATH}/include/rocjpeg)
|
||||
|
||||
if(ROCJPEG_LIBRARY AND ROCJPEG_INCLUDE_DIR)
|
||||
set(ROCJPEG_FOUND TRUE)
|
||||
message("-- ${White}Using rocJPEG -- \n\tLibraries:${ROCJPEG_LIBRARY} \n\tIncludes:${ROCJPEG_INCLUDE_DIR}${ColourReset}")
|
||||
endif()
|
||||
|
||||
if(HIP_FOUND AND ROCJPEG_FOUND)
|
||||
# HIP
|
||||
set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} hip::device)
|
||||
# rocJPEG
|
||||
include_directories (${ROCJPEG_INCLUDE_DIR})
|
||||
set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} ${ROCJPEG_LIBRARY})
|
||||
list(APPEND SOURCES ${PROJECT_SOURCE_DIR} jpegdecode.cpp)
|
||||
add_executable(${PROJECT_NAME} ${SOURCES})
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=gnu++17")
|
||||
target_link_libraries(${PROJECT_NAME} ${LINK_LIBRARY_LIST})
|
||||
else()
|
||||
message("-- ERROR!: ${PROJECT_NAME} excluded! please install all the dependencies and try again!")
|
||||
if (NOT HIP_FOUND)
|
||||
message(FATAL_ERROR "-- ERROR!: HIP Not Found! - please install ROCm and HIP!")
|
||||
endif()
|
||||
if (NOT ROCJPEG_FOUND)
|
||||
message(FATAL_ERROR "-- ERROR!: rocDecode Not Found! - please install rocDecode!")
|
||||
endif()
|
||||
endif()
|
||||
@@ -0,0 +1,25 @@
|
||||
# JPEG decode sample
|
||||
|
||||
The jpeg decode sample illustrates decoding a JPEG images using rocJPEG library to get the individual decoded images in one of the supported output format (i.e., unchanged, yuv, y, rgbi). This sample can be configured with a device ID and optionally able to dump the output to a file.
|
||||
|
||||
## Prerequisites:
|
||||
|
||||
* Install [rocJPEG](../../README.md#build-and-install-instructions)
|
||||
|
||||
## Build
|
||||
|
||||
```shell
|
||||
mkdir jpeg_decode_sample && cd jpeg_decode_sample
|
||||
cmake ../
|
||||
make -j
|
||||
```
|
||||
|
||||
## Run
|
||||
|
||||
```shell
|
||||
./jpegdecode -i <Path to single image or directory of images - [required]>
|
||||
-be <Select rocJPEG backend (0 for ROCJPEG_BACKEND_HARDWARE, using VCN hardware-accelarated JPEG decoder, 1 ROCJPEG_BACKEND_HYBRID, using CPU and GPU HIP kernles for JPEG decoding) [optional]>
|
||||
-fmt <Select rocJPEG output format for decoding, one of the [native, yuv, y, rgbi] [optional - default: native]>
|
||||
-o <Output file path or directory - Write decoded images based on the selected outfut format to this file or directory [optional]>
|
||||
-d <GPU device id (0 for the first GPU device, 1 for the second GPU device, etc.) [optional - default: 0]>
|
||||
```
|
||||
@@ -0,0 +1,531 @@
|
||||
/*
|
||||
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
#include <unistd.h>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <chrono>
|
||||
#include <sys/stat.h>
|
||||
#include <libgen.h>
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include "rocjpeg.h"
|
||||
|
||||
#define CHECK_ROCJPEG(call) { \
|
||||
RocJpegStatus rocjpeg_status = (call); \
|
||||
if (rocjpeg_status != ROCJPEG_STATUS_SUCCESS) { \
|
||||
std::cerr << #call << " returned " << rocJpegGetErrorName(rocjpeg_status) << " at " << __FILE__ << ":" << __LINE__ << std::endl;\
|
||||
exit(1); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define CHECK_HIP(call) { \
|
||||
hipError_t hip_status = (call); \
|
||||
if (hip_status != hipSuccess) { \
|
||||
std::cout << "rocJPEG failure: '#" << hip_status << "' at " << __FILE__ << ":" << __LINE__ << std::endl;\
|
||||
exit(1); \
|
||||
} \
|
||||
}
|
||||
|
||||
void ShowHelpAndExit(const char *option = NULL) {
|
||||
std::cout << "Options:" << std::endl
|
||||
<< "-i Path to single image or directory of images - required" << std::endl
|
||||
<< "-be Select rocJPEG backend (0 for ROCJPEG_BACKEND_HARDWARE, using VCN hardware-accelarated JPEG decoder, 1 ROCJPEG_BACKEND_HYBRID, using CPU and GPU HIP kernles for JPEG decoding); optional; default: 0" << std::endl
|
||||
<< "-fmt Select rocJPEG output format for decoding, one of the [native, yuv, y, rgb]; optional; default: native" << std::endl
|
||||
<< "-o Output file path or directory - Write decoded images based on the selected outfut format to this file or directory; optional;" << std::endl
|
||||
<< "-d GPU device id (0 for the first GPU device, 1 for the second GPU device, etc.); optional; default: 0" << std::endl;
|
||||
exit(0);
|
||||
}
|
||||
|
||||
void ParseCommandLine(std::string &input_path, std::string &output_file_path, int &dump_output_frames, int &device_id, RocJpegBackend &rocjpeg_backend, RocJpegOutputFormat &output_format, int argc, char *argv[]) {
|
||||
if(argc <= 1) {
|
||||
ShowHelpAndExit();
|
||||
}
|
||||
for (int i = 1; i < argc; i++) {
|
||||
if (!strcmp(argv[i], "-h")) {
|
||||
ShowHelpAndExit();
|
||||
}
|
||||
if (!strcmp(argv[i], "-i")) {
|
||||
if (++i == argc) {
|
||||
ShowHelpAndExit("-i");
|
||||
}
|
||||
input_path = argv[i];
|
||||
continue;
|
||||
}
|
||||
if (!strcmp(argv[i], "-o")) {
|
||||
if (++i == argc) {
|
||||
ShowHelpAndExit("-o");
|
||||
}
|
||||
output_file_path = argv[i];
|
||||
dump_output_frames = 1;
|
||||
continue;
|
||||
}
|
||||
if (!strcmp(argv[i], "-d")) {
|
||||
if (++i == argc) {
|
||||
ShowHelpAndExit("-d");
|
||||
}
|
||||
device_id = atoi(argv[i]);
|
||||
continue;
|
||||
}
|
||||
if (!strcmp(argv[i], "-be")) {
|
||||
if (++i == argc) {
|
||||
ShowHelpAndExit("-be");
|
||||
}
|
||||
rocjpeg_backend = static_cast<RocJpegBackend>(atoi(argv[i]));
|
||||
continue;
|
||||
}
|
||||
if (!strcmp(argv[i], "-fmt")) {
|
||||
if (++i == argc) {
|
||||
ShowHelpAndExit("-fmt");
|
||||
}
|
||||
std::string selected_output_format = argv[i];
|
||||
if (selected_output_format == "native") {
|
||||
output_format = ROCJPEG_OUTPUT_NATIVE;
|
||||
} else if (selected_output_format == "yuv") {
|
||||
output_format = ROCJPEG_OUTPUT_YUV_PLANAR;
|
||||
} else if (selected_output_format == "y") {
|
||||
output_format = ROCJPEG_OUTPUT_Y;
|
||||
} else if (selected_output_format == "rgb") {
|
||||
output_format = ROCJPEG_OUTPUT_RGB;
|
||||
} else {
|
||||
ShowHelpAndExit(argv[i]);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
ShowHelpAndExit(argv[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void SaveImage(std::string output_file_name, RocJpegImage *output_image, uint32_t img_width, uint32_t img_height, RocJpegChromaSubsampling subsampling, RocJpegOutputFormat output_format) {
|
||||
|
||||
uint8_t *hst_ptr = nullptr;
|
||||
FILE *fp;
|
||||
hipError_t hip_status = hipSuccess;
|
||||
|
||||
if (output_image == nullptr || output_image->channel[0] == nullptr || output_image->pitch[0] == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
uint32_t widths[ROCJPEG_MAX_COMPONENT] = {};
|
||||
uint32_t heights[ROCJPEG_MAX_COMPONENT] = {};
|
||||
|
||||
switch (output_format) {
|
||||
case ROCJPEG_OUTPUT_NATIVE:
|
||||
switch (subsampling) {
|
||||
case ROCJPEG_CSS_444:
|
||||
widths[2] = widths[1] = widths[0] = img_width;
|
||||
heights[2] = heights[1] = heights[0] = img_height;
|
||||
break;
|
||||
case ROCJPEG_CSS_422:
|
||||
widths[0] = img_width * 2;
|
||||
heights[0] = img_height;
|
||||
break;
|
||||
case ROCJPEG_CSS_420:
|
||||
widths[1] = widths[0] = img_width;
|
||||
heights[0] = img_height;
|
||||
heights[1] = img_height >> 1;
|
||||
break;
|
||||
case ROCJPEG_CSS_400:
|
||||
widths[0] = img_width;
|
||||
heights[0] = img_height;
|
||||
break;
|
||||
default:
|
||||
std::cout << "Unknown chroma subsampling!" << std::endl;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case ROCJPEG_OUTPUT_YUV_PLANAR:
|
||||
switch (subsampling) {
|
||||
case ROCJPEG_CSS_444:
|
||||
widths[2] = widths[1] = widths[0] = img_width;
|
||||
heights[2] = heights[1] = heights[0] = img_height;
|
||||
break;
|
||||
case ROCJPEG_CSS_422:
|
||||
widths[0] = img_width;
|
||||
widths[2] = widths[1] = widths[0] >> 1;
|
||||
heights[2] = heights[1] = heights[0] = img_height;
|
||||
break;
|
||||
case ROCJPEG_CSS_420:
|
||||
widths[0] = img_width;
|
||||
widths[2] = widths[1] = widths[0] >> 1;
|
||||
heights[0] = img_height;
|
||||
heights[2] = heights[1] = img_height >> 1;
|
||||
break;
|
||||
case ROCJPEG_CSS_400:
|
||||
widths[0] = img_width;
|
||||
heights[0] = img_height;
|
||||
break;
|
||||
default:
|
||||
std::cout << "Unknown chroma subsampling!" << std::endl;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case ROCJPEG_OUTPUT_Y:
|
||||
widths[0] = img_width;
|
||||
heights[0] = img_height;
|
||||
break;
|
||||
case ROCJPEG_OUTPUT_RGB:
|
||||
widths[0] = img_width * 3;
|
||||
heights[0] = img_height;
|
||||
break;
|
||||
default:
|
||||
std::cout << "Unknown output format!" << std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
uint32_t channel0_size = output_image->pitch[0] * heights[0];
|
||||
uint32_t channel1_size = output_image->pitch[1] * heights[1];
|
||||
uint32_t channel2_size = output_image->pitch[2] * heights[2];
|
||||
|
||||
uint32_t output_image_size = channel0_size + channel1_size + channel2_size;
|
||||
|
||||
if (hst_ptr == nullptr) {
|
||||
hst_ptr = new uint8_t [output_image_size];
|
||||
}
|
||||
|
||||
CHECK_HIP(hipMemcpyDtoH((void *)hst_ptr, output_image->channel[0], channel0_size));
|
||||
|
||||
uint8_t *tmp_hst_ptr = hst_ptr;
|
||||
fp = fopen(output_file_name.c_str(), "wb");
|
||||
if (fp) {
|
||||
// write channel0
|
||||
if (widths[0] == output_image->pitch[0]) {
|
||||
fwrite(hst_ptr, 1, channel0_size, fp);
|
||||
} else {
|
||||
for (int i = 0; i < heights[0]; i++) {
|
||||
fwrite(tmp_hst_ptr, 1, widths[0], fp);
|
||||
tmp_hst_ptr += output_image->pitch[0];
|
||||
}
|
||||
}
|
||||
// write channel1
|
||||
if (channel1_size != 0 && output_image->channel[1] != nullptr) {
|
||||
uint8_t *channel1_hst_ptr = hst_ptr + channel0_size;
|
||||
CHECK_HIP(hipMemcpyDtoH((void *)channel1_hst_ptr, output_image->channel[1], channel1_size));
|
||||
if (widths[1] == output_image->pitch[1]) {
|
||||
fwrite(channel1_hst_ptr, 1, channel1_size, fp);
|
||||
} else {
|
||||
for (int i = 0; i < heights[1]; i++) {
|
||||
fwrite(channel1_hst_ptr, 1, widths[1], fp);
|
||||
channel1_hst_ptr += output_image->pitch[1];
|
||||
}
|
||||
}
|
||||
}
|
||||
// write channel2
|
||||
if (channel2_size != 0 && output_image->channel[2] != nullptr) {
|
||||
uint8_t *channel2_hst_ptr = hst_ptr + channel0_size + channel1_size;
|
||||
CHECK_HIP(hipMemcpyDtoH((void *)channel2_hst_ptr, output_image->channel[2], channel2_size));
|
||||
if (widths[2] == output_image->pitch[2]) {
|
||||
fwrite(channel2_hst_ptr, 1, channel2_size, fp);
|
||||
} else {
|
||||
for (int i = 0; i < heights[2]; i++) {
|
||||
fwrite(channel2_hst_ptr, 1, widths[2], fp);
|
||||
channel2_hst_ptr += output_image->pitch[2];
|
||||
}
|
||||
}
|
||||
}
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
if (hst_ptr != nullptr) {
|
||||
delete [] hst_ptr;
|
||||
hst_ptr = nullptr;
|
||||
tmp_hst_ptr = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
bool GetFilePaths(std::string &input_path, std::vector<std::string> &file_paths, bool &is_dir, bool &is_file) {
|
||||
is_dir = std::filesystem::is_directory(input_path);
|
||||
is_file = std::filesystem::is_regular_file(input_path);
|
||||
if (is_dir) {
|
||||
for (const auto &entry : std::filesystem::directory_iterator(input_path))
|
||||
file_paths.push_back(entry.path());
|
||||
} else if (is_file) {
|
||||
file_paths.push_back(input_path);
|
||||
} else {
|
||||
std::cerr << "ERROR: the input path is not valid!" << std::endl;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool InitHipDevice(int device_id) {
|
||||
int num_devices;
|
||||
hipDeviceProp_t hip_dev_prop;
|
||||
CHECK_HIP(hipGetDeviceCount(&num_devices));
|
||||
if (num_devices < 1) {
|
||||
std::cerr << "ERROR: didn't find any GPU!" << std::endl;
|
||||
return false;
|
||||
}
|
||||
if (device_id >= num_devices) {
|
||||
std::cerr << "ERROR: the requested device_id is not found!" << std::endl;
|
||||
return false;
|
||||
}
|
||||
CHECK_HIP(hipSetDevice(device_id));
|
||||
CHECK_HIP(hipGetDeviceProperties(&hip_dev_prop, device_id));
|
||||
|
||||
std::cout << "info: Using GPU device " << device_id << ": " << hip_dev_prop.name << "[" << hip_dev_prop.gcnArchName << "] on PCI bus " <<
|
||||
std::setfill('0') << std::setw(2) << std::right << std::hex << hip_dev_prop.pciBusID << ":" << std::setfill('0') << std::setw(2) <<
|
||||
std::right << std::hex << hip_dev_prop.pciDomainID << "." << hip_dev_prop.pciDeviceID << std::dec << std::endl;
|
||||
|
||||
return true;
|
||||
}
|
||||
int main(int argc, char **argv) {
|
||||
int device_id = 0;
|
||||
int dump_output_frames = 0;
|
||||
uint8_t num_components;
|
||||
uint32_t widths[ROCJPEG_MAX_COMPONENT] = {};
|
||||
uint32_t heights[ROCJPEG_MAX_COMPONENT] = {};
|
||||
uint32_t channel_sizes[ROCJPEG_MAX_COMPONENT] = {};
|
||||
uint32_t num_channels = 0;
|
||||
int total_images_all = 0;
|
||||
double time_per_image_all = 0;
|
||||
double m_pixels_all = 0;
|
||||
double image_per_sec_all = 0;
|
||||
std::string chroma_sub_sampling = "";
|
||||
std::string input_path, output_file_path;
|
||||
std::vector<std::string> file_paths = {};
|
||||
bool is_dir = false;
|
||||
bool is_file = false;
|
||||
RocJpegChromaSubsampling subsampling;
|
||||
RocJpegBackend rocjpeg_backend = ROCJPEG_BACKEND_HARDWARE;
|
||||
RocJpegHandle rocjpeg_handle = nullptr;
|
||||
RocJpegImage output_image = {};
|
||||
RocJpegOutputFormat output_format = ROCJPEG_OUTPUT_NATIVE;
|
||||
|
||||
ParseCommandLine(input_path, output_file_path, dump_output_frames, device_id, rocjpeg_backend, output_format, argc, argv);
|
||||
if (!GetFilePaths(input_path, file_paths, is_dir, is_file)) {
|
||||
std::cerr << "Failed to get input file paths!" << std::endl;
|
||||
return -1;
|
||||
}
|
||||
if (!InitHipDevice(device_id)) {
|
||||
std::cerr << "Failed to initialize HIP!" << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
CHECK_ROCJPEG(rocJpegCreate(rocjpeg_backend, device_id, &rocjpeg_handle));
|
||||
|
||||
int counter = 0;
|
||||
std::vector<std::vector<char>> file_data(file_paths.size());
|
||||
std::vector<size_t> file_sizes(file_paths.size());
|
||||
|
||||
for (auto file_path : file_paths) {
|
||||
std::string base_file_name = file_path.substr(file_path.find_last_of("/\\") + 1);
|
||||
int image_count = 0;
|
||||
|
||||
// Read an image from disk.
|
||||
std::ifstream input(file_path.c_str(), std::ios::in | std::ios::binary | std::ios::ate);
|
||||
if (!(input.is_open())) {
|
||||
std::cerr << "ERROR: Cannot open image: " << file_path << std::endl;
|
||||
return 0;
|
||||
}
|
||||
// Get the size
|
||||
std::streamsize file_size = input.tellg();
|
||||
input.seekg(0, std::ios::beg);
|
||||
// resize if buffer is too small
|
||||
if (file_data[counter].size() < file_size) {
|
||||
file_data[counter].resize(file_size);
|
||||
}
|
||||
if (!input.read(file_data[counter].data(), file_size)) {
|
||||
std::cerr << "Cannot read from file: " << file_path << std::endl;
|
||||
return 0;
|
||||
}
|
||||
file_sizes[counter] = file_size;
|
||||
|
||||
CHECK_ROCJPEG(rocJpegGetImageInfo(rocjpeg_handle, reinterpret_cast<uint8_t*>(file_data[counter].data()), file_size, &num_components, &subsampling, widths, heights));
|
||||
|
||||
std::cout << "info: input file name: " << base_file_name << std::endl;
|
||||
std::cout << "info: input image resolution: " << widths[0] << "x" << heights[0] << std::endl;
|
||||
|
||||
switch (subsampling) {
|
||||
case ROCJPEG_CSS_444:
|
||||
chroma_sub_sampling = "YUV 4:4:4";
|
||||
break;
|
||||
case ROCJPEG_CSS_440:
|
||||
chroma_sub_sampling = "YUV 4:4:0";
|
||||
break;
|
||||
case ROCJPEG_CSS_422:
|
||||
chroma_sub_sampling = "YUV 4:2:2";
|
||||
break;
|
||||
case ROCJPEG_CSS_420:
|
||||
chroma_sub_sampling = "YUV 4:2:0";
|
||||
break;
|
||||
case ROCJPEG_CSS_411:
|
||||
chroma_sub_sampling = "YUV 4:1:1";
|
||||
break;
|
||||
case ROCJPEG_CSS_400:
|
||||
chroma_sub_sampling = "YUV 4:0:0";
|
||||
break;
|
||||
case ROCJPEG_CSS_UNKNOWN:
|
||||
std::cout << "info: Unknown chroma subsampling" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
std::cout << "info: chroma subsampling: " + chroma_sub_sampling << std::endl;
|
||||
|
||||
if (subsampling == ROCJPEG_CSS_440 || subsampling == ROCJPEG_CSS_411) {
|
||||
std::cout << "The chroma sub-sampling is not supported by VCN Hardware" << std::endl;
|
||||
if (is_dir) {
|
||||
std::cout << std::endl;
|
||||
continue;
|
||||
} else
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
switch (output_format) {
|
||||
case ROCJPEG_OUTPUT_NATIVE:
|
||||
switch (subsampling) {
|
||||
case ROCJPEG_CSS_444:
|
||||
num_channels = 3;
|
||||
output_image.pitch[2] = output_image.pitch[1] = output_image.pitch[0] = widths[0];
|
||||
channel_sizes[2] = channel_sizes[1] = channel_sizes[0] = output_image.pitch[0] * heights[0];
|
||||
break;
|
||||
case ROCJPEG_CSS_422:
|
||||
num_channels = 1;
|
||||
output_image.pitch[0] = widths[0] * 2;
|
||||
channel_sizes[0] = output_image.pitch[0] * heights[0];
|
||||
break;
|
||||
case ROCJPEG_CSS_420:
|
||||
num_channels = 2;
|
||||
output_image.pitch[1] = output_image.pitch[0] = widths[0];
|
||||
channel_sizes[0] = output_image.pitch[0] * heights[0];
|
||||
channel_sizes[1] = output_image.pitch[1] * (heights[0] >> 1);
|
||||
break;
|
||||
case ROCJPEG_CSS_400:
|
||||
num_channels = 1;
|
||||
output_image.pitch[0] = widths[0];
|
||||
channel_sizes[0] = output_image.pitch[0] * heights[0];
|
||||
break;
|
||||
default:
|
||||
std::cout << "Unknown chroma subsampling!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
break;
|
||||
case ROCJPEG_OUTPUT_YUV_PLANAR:
|
||||
if (subsampling == ROCJPEG_CSS_400) {
|
||||
num_channels = 1;
|
||||
output_image.pitch[0] = widths[0];
|
||||
channel_sizes[0] = output_image.pitch[0] * heights[0];
|
||||
} else {
|
||||
num_channels = 3;
|
||||
output_image.pitch[0] = widths[0];
|
||||
output_image.pitch[1] = widths[1];
|
||||
output_image.pitch[2] = widths[2];
|
||||
channel_sizes[0] = output_image.pitch[0] * heights[0];
|
||||
channel_sizes[1] = output_image.pitch[1] * heights[1];
|
||||
channel_sizes[2] = output_image.pitch[2] * heights[2];
|
||||
}
|
||||
break;
|
||||
case ROCJPEG_OUTPUT_Y:
|
||||
num_channels = 1;
|
||||
output_image.pitch[0] = widths[0];
|
||||
channel_sizes[0] = output_image.pitch[0] * heights[0];
|
||||
break;
|
||||
case ROCJPEG_OUTPUT_RGB:
|
||||
num_channels = 1;
|
||||
output_image.pitch[0] = widths[0] * 3;
|
||||
channel_sizes[0] = output_image.pitch[0] * heights[0];
|
||||
break;
|
||||
default:
|
||||
std::cout << "Unknown output format!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
// allocate memory for each channel
|
||||
for (int i = 0; i < num_channels; i++) {
|
||||
CHECK_HIP(hipMalloc(&output_image.channel[i], channel_sizes[i]));
|
||||
}
|
||||
|
||||
std::cout << "info: decoding started, please wait! ... " << std::endl;
|
||||
auto start_time = std::chrono::high_resolution_clock::now();
|
||||
CHECK_ROCJPEG(rocJpegDecode(rocjpeg_handle, reinterpret_cast<uint8_t*>(file_data[counter].data()), file_size, output_format, &output_image));
|
||||
auto end_time = std::chrono::high_resolution_clock::now();
|
||||
std::chrono::duration<double> decoder_time = end_time - start_time;
|
||||
double time_per_image = decoder_time.count() * 1000;
|
||||
double ips = (1 / time_per_image) * 1000;
|
||||
double mpixels = ((double)widths[0] * (double)heights[0] / 1000000) * ips;
|
||||
image_count++;
|
||||
|
||||
if (dump_output_frames) {
|
||||
std::string::size_type const p(base_file_name.find_last_of('.'));
|
||||
std::string file_name_no_ext = base_file_name.substr(0, p);
|
||||
std::string file_extension;
|
||||
switch (output_format) {
|
||||
case ROCJPEG_OUTPUT_NATIVE:
|
||||
file_extension = "native";
|
||||
break;
|
||||
case ROCJPEG_OUTPUT_YUV_PLANAR:
|
||||
file_extension = "yuv";
|
||||
break;
|
||||
case ROCJPEG_OUTPUT_Y:
|
||||
file_extension = "y";
|
||||
break;
|
||||
case ROCJPEG_OUTPUT_RGB:
|
||||
file_extension = "rgb";
|
||||
break;
|
||||
default:
|
||||
file_extension = "";
|
||||
break;
|
||||
}
|
||||
|
||||
std::string file_name_for_saving = output_file_path + "//" + file_name_no_ext + "_" + std::to_string(widths[0]) + "x"
|
||||
+ std::to_string(heights[0]) + "." + file_extension;
|
||||
std::string image_save_path = is_dir ? file_name_for_saving : output_file_path;
|
||||
SaveImage(image_save_path, &output_image, widths[0], heights[0], subsampling, output_format);
|
||||
}
|
||||
|
||||
for (int i = 0; i < num_channels; i++) {
|
||||
if (output_image.channel[i] != nullptr) {
|
||||
CHECK_HIP(hipFree((void*)output_image.channel[i]));
|
||||
output_image.channel[i] = nullptr;
|
||||
output_image.pitch[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "info: total decoded images: " << image_count << std::endl;
|
||||
std::cout << "info: average processing time per image (ms): " << time_per_image << std::endl;
|
||||
std::cout << "info: average images per sec: " << (1 / time_per_image) * 1000 << std::endl;
|
||||
std::cout << "info: total elapsed time (s): " << decoder_time.count() << std::endl;
|
||||
|
||||
if (is_dir) {
|
||||
std::cout << std::endl;
|
||||
total_images_all += image_count;
|
||||
time_per_image_all += time_per_image;
|
||||
image_per_sec_all += ips;
|
||||
m_pixels_all += mpixels;
|
||||
}
|
||||
counter++;
|
||||
}
|
||||
|
||||
if (is_dir) {
|
||||
std::cout << "info: total decoded images: " << total_images_all << std::endl;
|
||||
if (total_images_all) {
|
||||
std::cout << "info: average processing time per image (ms): " << time_per_image_all / total_images_all << std::endl;
|
||||
std::cout << "info: average decoded images per sec: " << image_per_sec_all / total_images_all << std::endl;
|
||||
std::cout << "info: average decoded mpixels per sec: " << m_pixels_all / total_images_all << std::endl;
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
CHECK_ROCJPEG(rocJpegDestroy(rocjpeg_handle));
|
||||
std::cout << "info: decoding completed!" << std::endl;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,171 @@
|
||||
/*
|
||||
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "rocjpeg_api_handle.h"
|
||||
#include "rocjpeg_commons.h"
|
||||
|
||||
/*****************************************************************************************************/
|
||||
//! \fn RocJpegStatus ROCJPEGAPI rocJpegCreate(RocJpegBackend backend, int device_id, RocJpegHandle *handle)
|
||||
//! Create the decoder object based on backend and device_id. A handle to the created decoder is returned
|
||||
/*****************************************************************************************************/
|
||||
RocJpegStatus ROCJPEGAPI rocJpegCreate(RocJpegBackend backend, int device_id, RocJpegHandle *handle) {
|
||||
if (handle == nullptr) {
|
||||
return ROCJPEG_STATUS_INVALID_PARAMETER;
|
||||
}
|
||||
RocJpegHandle rocjpeg_handle = nullptr;
|
||||
try {
|
||||
rocjpeg_handle = new RocJpegDecoderHandle(backend, device_id);
|
||||
} catch(const std::exception& e) {
|
||||
ERR(STR("Failed to init the rocJPEG handle, ") + STR(e.what()));
|
||||
return ROCJPEG_STATUS_NOT_INITIALIZED;
|
||||
}
|
||||
*handle = rocjpeg_handle;
|
||||
return static_cast<RocJpegDecoderHandle *>(rocjpeg_handle)->rocjpeg_decoder->InitializeDecoder();
|
||||
}
|
||||
|
||||
/*****************************************************************************************************/
|
||||
//! \fn RocJpegStatus ROCJPEGAPI rocJpegDestroy(RocJpegHandle handle)
|
||||
//! Release the decoder object and resources.
|
||||
/*****************************************************************************************************/
|
||||
RocJpegStatus ROCJPEGAPI rocJpegDestroy(RocJpegHandle handle) {
|
||||
if (handle == nullptr) {
|
||||
return ROCJPEG_STATUS_INVALID_PARAMETER;
|
||||
}
|
||||
auto rocjpeg_handle = static_cast<RocJpegDecoderHandle*>(handle);
|
||||
delete rocjpeg_handle;
|
||||
return ROCJPEG_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
/*****************************************************************************************************/
|
||||
//! \fn RocJpegStatus ROCJPEGAPI rocJpegGetImageInfo(RocJpegHandle handle, const uint8_t *data, size_t length,
|
||||
//! int *num_components, RocJpegChromaSubsampling *subsampling, int *widths, int *heights)
|
||||
//! Retrieve the image info, including channel, width and height of each component, and chroma subsampling.
|
||||
/*****************************************************************************************************/
|
||||
RocJpegStatus ROCJPEGAPI rocJpegGetImageInfo(RocJpegHandle handle, const uint8_t *data, size_t length, uint8_t *num_components,
|
||||
RocJpegChromaSubsampling *subsampling, uint32_t *widths, uint32_t *heights) {
|
||||
if (handle == nullptr || data == nullptr || num_components == nullptr ||
|
||||
subsampling == nullptr || widths == nullptr || heights == nullptr) {
|
||||
return ROCJPEG_STATUS_INVALID_PARAMETER;
|
||||
}
|
||||
RocJpegStatus rocjpeg_status = ROCJPEG_STATUS_SUCCESS;
|
||||
auto rocjpeg_handle = static_cast<RocJpegDecoderHandle*>(handle);
|
||||
try {
|
||||
rocjpeg_status = rocjpeg_handle->rocjpeg_decoder->GetImageInfo(data, length, num_components, subsampling, widths, heights);
|
||||
} catch (const std::exception& e) {
|
||||
rocjpeg_handle->CaptureError(e.what());
|
||||
ERR(e.what());
|
||||
return ROCJPEG_STATUS_RUNTIME_ERROR;
|
||||
}
|
||||
|
||||
return rocjpeg_status;
|
||||
}
|
||||
|
||||
/*****************************************************************************************************/
|
||||
//! \fn RocJpegStatus ROCJPEGAPI rocJpegDecode(RocJpegHandle handle, const uint8_t *data, size_t length, RocJpegOutputFormat output_format, RocJpegImage *destination, hipStream_t stream);
|
||||
//! \ingroup group_amd_rocjpeg
|
||||
//! Decodes single image based on the backend used to create the rocJpeg handle in rocJpegCreate API.
|
||||
//! Destination buffers should be large enough to be able to store output of specified format. These buffers should be pre-allocted by the user in the device memories.
|
||||
//! For each color plane (channel) sizes could be retrieved for image using rocJpegGetImageInfo API
|
||||
//! and minimum required memory buffer for each plane is plane_height * plane_pitch where plane_pitch >= plane_width for
|
||||
//! planar output formats and plane_pitch >= plane_width * num_components for interleaved output format.
|
||||
/*****************************************************************************************************/
|
||||
RocJpegStatus ROCJPEGAPI rocJpegDecode(RocJpegHandle handle, const uint8_t *data, size_t length, RocJpegOutputFormat output_format,
|
||||
RocJpegImage *destination) {
|
||||
|
||||
if (handle == nullptr || data == nullptr) {
|
||||
return ROCJPEG_STATUS_INVALID_PARAMETER;
|
||||
}
|
||||
RocJpegStatus rocjpeg_status = ROCJPEG_STATUS_SUCCESS;
|
||||
auto rocjpeg_handle = static_cast<RocJpegDecoderHandle*>(handle);
|
||||
try {
|
||||
rocjpeg_status = rocjpeg_handle->rocjpeg_decoder->Decode(data, length, output_format, destination);
|
||||
} catch (const std::exception& e) {
|
||||
rocjpeg_handle->CaptureError(e.what());
|
||||
ERR(e.what());
|
||||
return ROCJPEG_STATUS_RUNTIME_ERROR;
|
||||
}
|
||||
|
||||
return rocjpeg_status;
|
||||
}
|
||||
|
||||
/*****************************************************************************************************/
|
||||
//! \fn extern const char* ROCDECAPI rocJpegGetErrorName(RocJpegStatus rocjpeg_status);
|
||||
//! \ingroup group_amd_rocjpeg
|
||||
//! Return name of the specified error code in text form.
|
||||
/*****************************************************************************************************/
|
||||
extern const char* ROCJPEGAPI rocJpegGetErrorName(RocJpegStatus rocjpeg_status) {
|
||||
switch (rocjpeg_status) {
|
||||
case ROCJPEG_STATUS_SUCCESS:
|
||||
return "ROCJPEG_STATUS_SUCCESS";
|
||||
case ROCJPEG_STATUS_NOT_INITIALIZED:
|
||||
return "ROCJPEG_STATUS_NOT_INITIALIZED";
|
||||
case ROCJPEG_STATUS_INVALID_PARAMETER:
|
||||
return "ROCJPEG_STATUS_INVALID_PARAMETER";
|
||||
case ROCJPEG_STATUS_BAD_JPEG:
|
||||
return "ROCJPEG_STATUS_BAD_JPEG";
|
||||
case ROCJPEG_STATUS_JPEG_NOT_SUPPORTED:
|
||||
return "ROCJPEG_STATUS_JPEG_NOT_SUPPORTED";
|
||||
case ROCJPEG_STATUS_EXECUTION_FAILED:
|
||||
return "ROCJPEG_STATUS_EXECUTION_FAILED";
|
||||
case ROCJPEG_STATUS_ARCH_MISMATCH:
|
||||
return "ROCJPEG_STATUS_ARCH_MISMATCH";
|
||||
case ROCJPEG_STATUS_INTERNAL_ERROR:
|
||||
return "ROCJPEG_STATUS_INTERNAL_ERROR";
|
||||
case ROCJPEG_STATUS_IMPLEMENTATION_NOT_SUPPORTED:
|
||||
return "ROCJPEG_STATUS_IMPLEMENTATION_NOT_SUPPORTED";
|
||||
case ROCJPEG_STATUS_HW_JPEG_DECODER_NOT_SUPPORTED:
|
||||
return "ROCJPEG_STATUS_HW_JPEG_DECODER_NOT_SUPPORTED";
|
||||
case ROCJPEG_STATUS_RUNTIME_ERROR:
|
||||
return "ROCJPEG_STATUS_RUNTIME_ERROR";
|
||||
case ROCJPEG_STATUS_OUTOF_MEMORY:
|
||||
return "ROCJPEG_STATUS_OUTOF_MEMORY";
|
||||
case ROCJPEG_STATUS_NOT_IMPLEMENTED:
|
||||
return "ROCJPEG_STATUS_NOT_IMPLEMENTED";
|
||||
default:
|
||||
return "UNKNOWN_ERROR";
|
||||
}
|
||||
}
|
||||
|
||||
/*****************************************************************************************************/
|
||||
//! \fn RocJpegStatus ROCJPEGAPI rocJpegDecodeBatchedInitialize(RocJpegHandle handle, int batch_size, int max_cpu_threads, RocJpegOutputFormat output_format);
|
||||
//! \ingroup group_amd_rocjpeg
|
||||
//! Resets and initializes batch decoder for working on the batches of specified size
|
||||
//! Should be called once for decoding batches of this specific size, also use to reset failed batches
|
||||
//! \return ROCJPEG_STATUS_SUCCESS if successful
|
||||
/*****************************************************************************************************/
|
||||
RocJpegStatus ROCJPEGAPI rocJpegDecodeBatchedInitialize(RocJpegHandle handle, int batch_size, int max_cpu_threads, RocJpegOutputFormat output_format) {
|
||||
return ROCJPEG_STATUS_NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
|
||||
/*****************************************************************************************************/
|
||||
//! \fn RocJpegStatus ROCJPEGAPI rocJpegDecodeBatched(RocJpegHandle handle, const uint8_t *data, const size_t *lengths, RocJpegImage *destinations, hipStream_t stream);
|
||||
//! \ingroup group_amd_rocjpeg
|
||||
//! Decodes batch of images. Output buffers should be large enough to be able to store
|
||||
//! outputs of specified format, see single image decoding description for details. Call to
|
||||
//! rocjpegDecodeBatchedInitialize() is required prior to this call, batch size is expected to be the same as
|
||||
//! parameter to this batch initialization function.
|
||||
//! \return ROCJPEG_STATUS_SUCCESS if successful
|
||||
/*****************************************************************************************************/
|
||||
RocJpegStatus ROCJPEGAPI rocJpegDecodeBatched(RocJpegHandle handle, const uint8_t *data, const size_t *lengths, RocJpegImage *destinations) {
|
||||
return ROCJPEG_STATUS_NOT_IMPLEMENTED;
|
||||
}
|
||||
@@ -0,0 +1,50 @@
|
||||
/*
|
||||
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef ROC_JPEG_HANDLE_H
|
||||
#define ROC_JPEG_HANDLE_H
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "rocjpeg_decoder.h"
|
||||
|
||||
/**
|
||||
* @brief RocJpegHandle class
|
||||
*
|
||||
*/
|
||||
class RocJpegDecoderHandle {
|
||||
public:
|
||||
explicit RocJpegDecoderHandle(RocJpegBackend backend, int device_id) : rocjpeg_decoder(std::make_shared<ROCJpegDecoder>(backend, device_id)) {};
|
||||
~RocJpegDecoderHandle() { ClearErrors(); }
|
||||
std::shared_ptr<ROCJpegDecoder> rocjpeg_decoder;
|
||||
bool NoError() { return error_.empty(); }
|
||||
const char* ErrorMsg() { return error_.c_str(); }
|
||||
void CaptureError(const std::string& err_msg) { error_ = err_msg; }
|
||||
private:
|
||||
void ClearErrors() { error_ = "";}
|
||||
std::string error_;
|
||||
};
|
||||
|
||||
#endif //ROC_JPEG_HANDLE_H
|
||||
@@ -0,0 +1,92 @@
|
||||
/*
|
||||
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef ROC_JPEG_COMMON_H_
|
||||
#define ROC_JPEG_COMMON_H_
|
||||
|
||||
#pragma once
|
||||
#include <stdexcept>
|
||||
#include <exception>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <cstring>
|
||||
|
||||
#define TOSTR(X) std::to_string(static_cast<int>(X))
|
||||
#define STR(X) std::string(X)
|
||||
|
||||
#if DBGINFO
|
||||
#define INFO(X) std::clog << "[INF] " << " {" << __func__ <<"} " << " " << X << std::endl;
|
||||
#else
|
||||
#define INFO(X) ;
|
||||
#endif
|
||||
#define ERR(X) std::cerr << "[ERR] " << " {" << __func__ <<"} " << " " << X << std::endl;
|
||||
|
||||
#define CHECK_VAAPI(call) { \
|
||||
VAStatus va_status = (call); \
|
||||
if (va_status != VA_STATUS_SUCCESS) { \
|
||||
std::cerr << "VAAPI failure: " << #call << " failed with status: " << std::hex << "0x" << va_status << std::dec << " = '" << vaErrorStr(va_status) << "' at " << __FILE__ << ":" << __LINE__ << std::endl;\
|
||||
return ROCJPEG_STATUS_EXECUTION_FAILED; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define CHECK_HIP(call) { \
|
||||
hipError_t hip_status = (call); \
|
||||
if (hip_status != hipSuccess) { \
|
||||
std::cerr << "HIP failure: 'status: " << hipGetErrorName(hip_status) << "' at " << __FILE__ << ":" << __LINE__ << std::endl;\
|
||||
return ROCJPEG_STATUS_EXECUTION_FAILED; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define CHECK_ROCJPEG(call) { \
|
||||
RocJpegStatus rocjpeg_status = (call); \
|
||||
if (rocjpeg_status != ROCJPEG_STATUS_SUCCESS) { \
|
||||
std::cerr << #call << " returned " << rocJpegGetErrorName(rocjpeg_status) << " at " << __FILE__ << ":" << __LINE__ << std::endl;\
|
||||
return rocjpeg_status; \
|
||||
} \
|
||||
}
|
||||
|
||||
static bool GetEnv(const char *name, char *value, size_t valueSize) {
|
||||
const char *v = getenv(name);
|
||||
if (v) {
|
||||
strncpy(value, v, valueSize);
|
||||
value[valueSize - 1] = 0;
|
||||
}
|
||||
return v ? true : false;
|
||||
}
|
||||
|
||||
static inline int align(int value, int alignment) {
|
||||
return (value + alignment - 1) & ~(alignment - 1);
|
||||
}
|
||||
|
||||
class RocJpegException : public std::exception {
|
||||
public:
|
||||
explicit RocJpegException(const std::string& message):message_(message){}
|
||||
virtual const char* what() const throw() override {
|
||||
return message_.c_str();
|
||||
}
|
||||
private:
|
||||
std::string message_;
|
||||
};
|
||||
|
||||
#define THROW(X) throw RocJpegException(" { "+std::string(__func__)+" } " + X);
|
||||
|
||||
#endif //ROC_JPEG_COMMON_H_
|
||||
@@ -0,0 +1,331 @@
|
||||
/*
|
||||
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "rocjpeg_decoder.h"
|
||||
|
||||
ROCJpegDecoder::ROCJpegDecoder(RocJpegBackend backend, int device_id) :
|
||||
num_devices_{0}, device_id_ {device_id}, hip_stream_ {0}, backend_{backend}, hip_interop_{} {}
|
||||
|
||||
ROCJpegDecoder::~ROCJpegDecoder() {
|
||||
if (hip_stream_) {
|
||||
hipError_t hip_status = hipStreamDestroy(hip_stream_);
|
||||
}
|
||||
}
|
||||
|
||||
RocJpegStatus ROCJpegDecoder::InitHIP(int device_id) {
|
||||
hipError_t hip_status = hipSuccess;
|
||||
CHECK_HIP(hipGetDeviceCount(&num_devices_));
|
||||
if (num_devices_ < 1) {
|
||||
ERR("ERROR: Failed to find any GPU!");
|
||||
return ROCJPEG_STATUS_NOT_INITIALIZED;
|
||||
}
|
||||
if (device_id >= num_devices_) {
|
||||
ERR("ERROR: the requested device_id is not found!");
|
||||
return ROCJPEG_STATUS_INVALID_PARAMETER;
|
||||
}
|
||||
CHECK_HIP(hipSetDevice(device_id));
|
||||
CHECK_HIP(hipGetDeviceProperties(&hip_dev_prop_, device_id));
|
||||
CHECK_HIP(hipStreamCreate(&hip_stream_));
|
||||
return ROCJPEG_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
RocJpegStatus ROCJpegDecoder::InitializeDecoder() {
|
||||
RocJpegStatus rocjpeg_status = ROCJPEG_STATUS_SUCCESS;
|
||||
rocjpeg_status = InitHIP(device_id_);
|
||||
if (rocjpeg_status != ROCJPEG_STATUS_SUCCESS) {
|
||||
ERR("ERROR: Failed to initilize the HIP!");
|
||||
return rocjpeg_status;
|
||||
}
|
||||
if (backend_ == ROCJPEG_BACKEND_HARDWARE) {
|
||||
rocjpeg_status = jpeg_vaapi_decoder_.InitializeDecoder(hip_dev_prop_.gcnArchName);
|
||||
if (rocjpeg_status != ROCJPEG_STATUS_SUCCESS) {
|
||||
ERR("ERROR: Failed to initialize the VA-API JPEG decoder!");
|
||||
return rocjpeg_status;
|
||||
}
|
||||
} else if (backend_ == ROCJPEG_BACKEND_HYBRID) {
|
||||
return ROCJPEG_STATUS_NOT_IMPLEMENTED;
|
||||
}
|
||||
return rocjpeg_status;
|
||||
}
|
||||
|
||||
RocJpegStatus ROCJpegDecoder::Decode(const uint8_t *data, size_t length, RocJpegOutputFormat output_format, RocJpegImage *destination) {
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
RocJpegStatus rocjpeg_status = ROCJPEG_STATUS_SUCCESS;
|
||||
|
||||
if (!jpeg_parser_.ParseJpegStream(data, length)) {
|
||||
ERR("ERROR: Failed to parse the jpeg stream!");
|
||||
return ROCJPEG_STATUS_BAD_JPEG;
|
||||
}
|
||||
|
||||
const JpegStreamParameters *jpeg_stream_params = jpeg_parser_.GetJpegStreamParameters();
|
||||
VASurfaceID current_surface_id;
|
||||
CHECK_ROCJPEG(jpeg_vaapi_decoder_.SubmitDecode(jpeg_stream_params, current_surface_id));
|
||||
|
||||
if (destination != nullptr) {
|
||||
VADRMPRIMESurfaceDescriptor va_drm_prime_surface_desc = {};
|
||||
CHECK_ROCJPEG(jpeg_vaapi_decoder_.SyncSurface(current_surface_id));
|
||||
CHECK_ROCJPEG(jpeg_vaapi_decoder_.ExportSurface(current_surface_id, va_drm_prime_surface_desc));
|
||||
CHECK_ROCJPEG(GetHipInteropMem(va_drm_prime_surface_desc));
|
||||
|
||||
uint16_t chroma_height = 0;
|
||||
CHECK_ROCJPEG(GetChromaHeight(jpeg_stream_params->picture_parameter_buffer.picture_height, chroma_height));
|
||||
|
||||
switch (output_format) {
|
||||
case ROCJPEG_OUTPUT_NATIVE:
|
||||
// copy the native decoded output buffers from interop memory directly to the destination buffers
|
||||
CHECK_ROCJPEG(CopyLuma(destination, jpeg_stream_params->picture_parameter_buffer.picture_height));
|
||||
CHECK_ROCJPEG(CopyChroma(destination, chroma_height));
|
||||
break;
|
||||
case ROCJPEG_OUTPUT_YUV_PLANAR:
|
||||
CHECK_ROCJPEG(GetPlanarYUVOutputFormat(jpeg_stream_params->picture_parameter_buffer.picture_width,
|
||||
jpeg_stream_params->picture_parameter_buffer.picture_height, chroma_height, destination));
|
||||
break;
|
||||
case ROCJPEG_OUTPUT_Y:
|
||||
CHECK_ROCJPEG(GetYOutputFormat(jpeg_stream_params->picture_parameter_buffer.picture_width,
|
||||
jpeg_stream_params->picture_parameter_buffer.picture_height, destination));
|
||||
break;
|
||||
case ROCJPEG_OUTPUT_RGB:
|
||||
CHECK_ROCJPEG(ColorConvertToRGB(jpeg_stream_params->picture_parameter_buffer.picture_width,
|
||||
jpeg_stream_params->picture_parameter_buffer.picture_height, destination));
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
CHECK_HIP(hipStreamSynchronize(hip_stream_));
|
||||
|
||||
CHECK_ROCJPEG(ReleaseHipInteropMem(current_surface_id));
|
||||
}
|
||||
|
||||
return ROCJPEG_STATUS_SUCCESS;
|
||||
|
||||
}
|
||||
|
||||
RocJpegStatus ROCJpegDecoder::GetImageInfo(const uint8_t *data, size_t length, uint8_t *num_components, RocJpegChromaSubsampling *subsampling, uint32_t *widths, uint32_t *heights){
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
if (widths == nullptr || heights == nullptr || num_components == nullptr) {
|
||||
return ROCJPEG_STATUS_INVALID_PARAMETER;
|
||||
}
|
||||
if (!jpeg_parser_.ParseJpegStream(data, length)) {
|
||||
ERR("ERROR: jpeg parser failed!");
|
||||
return ROCJPEG_STATUS_BAD_JPEG;
|
||||
}
|
||||
const JpegStreamParameters *jpeg_stream_params = jpeg_parser_.GetJpegStreamParameters();
|
||||
*num_components = jpeg_stream_params->picture_parameter_buffer.num_components;
|
||||
widths[0] = jpeg_stream_params->picture_parameter_buffer.picture_width;
|
||||
heights[0] = jpeg_stream_params->picture_parameter_buffer.picture_height;
|
||||
widths[3] = 0;
|
||||
heights[3] = 0;
|
||||
|
||||
switch (jpeg_stream_params->chroma_subsampling) {
|
||||
case CSS_444:
|
||||
*subsampling = ROCJPEG_CSS_444;
|
||||
widths[2] = widths[1] = widths[0];
|
||||
heights[2] = heights[1] = heights[0];
|
||||
break;
|
||||
case CSS_422:
|
||||
*subsampling = ROCJPEG_CSS_422;
|
||||
widths[2] = widths[1] = widths[0] >> 1;
|
||||
heights[2] = heights[1] = heights[0];
|
||||
break;
|
||||
case CSS_420:
|
||||
*subsampling = ROCJPEG_CSS_420;
|
||||
widths[2] = widths[1] = widths[0] >> 1;
|
||||
heights[2] = heights[1] = heights[0] >> 1;
|
||||
break;
|
||||
case CSS_400:
|
||||
*subsampling = ROCJPEG_CSS_400;
|
||||
widths[3] = widths[2] = widths[1] = 0;
|
||||
heights[3] = heights[2] = heights[1] = 0;
|
||||
break;
|
||||
case CSS_411:
|
||||
*subsampling = ROCJPEG_CSS_411;
|
||||
widths[2] = widths[1] = widths[0] >> 2;
|
||||
heights[2] = heights[1] = heights[0];
|
||||
break;
|
||||
case CSS_440:
|
||||
*subsampling = ROCJPEG_CSS_440;
|
||||
widths[2] = widths[1] = widths[0] >> 1;
|
||||
heights[2] = heights[1] = heights[0] >> 1;
|
||||
break;
|
||||
default:
|
||||
*subsampling = ROCJPEG_CSS_UNKNOWN;
|
||||
break;
|
||||
}
|
||||
|
||||
return ROCJPEG_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
RocJpegStatus ROCJpegDecoder::GetHipInteropMem(VADRMPRIMESurfaceDescriptor &va_drm_prime_surface_desc) {
|
||||
hipExternalMemoryHandleDesc external_mem_handle_desc = {};
|
||||
hipExternalMemoryBufferDesc external_mem_buffer_desc = {};
|
||||
external_mem_handle_desc.type = hipExternalMemoryHandleTypeOpaqueFd;
|
||||
external_mem_handle_desc.handle.fd = va_drm_prime_surface_desc.objects[0].fd;
|
||||
external_mem_handle_desc.size = va_drm_prime_surface_desc.objects[0].size;
|
||||
|
||||
CHECK_HIP(hipImportExternalMemory(&hip_interop_.hip_ext_mem, &external_mem_handle_desc));
|
||||
external_mem_buffer_desc.size = va_drm_prime_surface_desc.objects[0].size;
|
||||
CHECK_HIP(hipExternalMemoryGetMappedBuffer((void**)&hip_interop_.hip_mapped_device_mem, hip_interop_.hip_ext_mem, &external_mem_buffer_desc));
|
||||
|
||||
hip_interop_.surface_format = va_drm_prime_surface_desc.fourcc;
|
||||
hip_interop_.width = va_drm_prime_surface_desc.width;
|
||||
hip_interop_.height = va_drm_prime_surface_desc.height;
|
||||
hip_interop_.offset[0] = va_drm_prime_surface_desc.layers[0].offset[0];
|
||||
hip_interop_.offset[1] = va_drm_prime_surface_desc.layers[1].offset[0];
|
||||
hip_interop_.offset[2] = va_drm_prime_surface_desc.layers[2].offset[0];
|
||||
hip_interop_.pitch[0] = va_drm_prime_surface_desc.layers[0].pitch[0];
|
||||
hip_interop_.pitch[1] = va_drm_prime_surface_desc.layers[1].pitch[0];
|
||||
hip_interop_.pitch[2] = va_drm_prime_surface_desc.layers[2].pitch[0];
|
||||
hip_interop_.num_layers = va_drm_prime_surface_desc.num_layers;
|
||||
|
||||
for (uint32_t i = 0; i < va_drm_prime_surface_desc.num_objects; ++i) {
|
||||
close(va_drm_prime_surface_desc.objects[i].fd);
|
||||
}
|
||||
return ROCJPEG_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
RocJpegStatus ROCJpegDecoder::ReleaseHipInteropMem(VASurfaceID current_surface_id) {
|
||||
if (hip_interop_.hip_mapped_device_mem != nullptr) {
|
||||
CHECK_HIP(hipFree(hip_interop_.hip_mapped_device_mem));
|
||||
}
|
||||
if (hip_interop_.hip_ext_mem != nullptr) {
|
||||
CHECK_HIP(hipDestroyExternalMemory(hip_interop_.hip_ext_mem));
|
||||
}
|
||||
memset((void*)&hip_interop_, 0, sizeof(hip_interop_));
|
||||
|
||||
CHECK_ROCJPEG(jpeg_vaapi_decoder_.ReleaseSurface(current_surface_id));
|
||||
|
||||
return ROCJPEG_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
RocJpegStatus ROCJpegDecoder::CopyLuma(RocJpegImage *destination, uint16_t picture_height) {
|
||||
if (hip_interop_.pitch[0] != 0 && destination->pitch[0] != 0 && destination->channel[0] != nullptr) {
|
||||
if (destination->pitch[0] == hip_interop_.pitch[0]) {
|
||||
uint32_t luma_size = destination->pitch[0] * picture_height;
|
||||
CHECK_HIP(hipMemcpyDtoDAsync(destination->channel[0], hip_interop_.hip_mapped_device_mem, luma_size, hip_stream_));
|
||||
} else {
|
||||
CHECK_HIP(hipMemcpy2DAsync(destination->channel[0], destination->pitch[0], hip_interop_.hip_mapped_device_mem, hip_interop_.pitch[0],
|
||||
destination->pitch[0], picture_height, hipMemcpyDeviceToDevice, hip_stream_));
|
||||
}
|
||||
}
|
||||
return ROCJPEG_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
RocJpegStatus ROCJpegDecoder::CopyChroma(RocJpegImage *destination, uint16_t chroma_height) {
|
||||
// copy channel1
|
||||
if (hip_interop_.pitch[1] != 0 && destination->pitch[1] != 0 && destination->channel[1] != nullptr) {
|
||||
uint32_t chroma_size = destination->pitch[1] * chroma_height;
|
||||
uint8_t *layer1_mem = hip_interop_.hip_mapped_device_mem + hip_interop_.offset[1];
|
||||
if (destination->pitch[1] == hip_interop_.pitch[1]) {
|
||||
CHECK_HIP(hipMemcpyDtoDAsync(destination->channel[1], layer1_mem, chroma_size, hip_stream_));
|
||||
} else {
|
||||
CHECK_HIP(hipMemcpy2DAsync(destination->channel[1], destination->pitch[1], layer1_mem, hip_interop_.pitch[1],
|
||||
destination->pitch[1], chroma_height, hipMemcpyDeviceToDevice, hip_stream_));
|
||||
}
|
||||
}
|
||||
// copy channel2
|
||||
if (hip_interop_.pitch[2] != 0 && destination->pitch[2] != 0 && destination->channel[2] != nullptr) {
|
||||
uint32_t chroma_size = destination->pitch[2] * chroma_height;
|
||||
uint8_t *layer2_mem = hip_interop_.hip_mapped_device_mem + hip_interop_.offset[2];
|
||||
if (destination->pitch[2] == hip_interop_.pitch[2]) {
|
||||
CHECK_HIP(hipMemcpyDtoDAsync(destination->channel[2], layer2_mem, chroma_size, hip_stream_));
|
||||
} else {
|
||||
CHECK_HIP(hipMemcpy2DAsync(destination->channel[2], destination->pitch[2], layer2_mem, hip_interop_.pitch[2],
|
||||
destination->pitch[2], chroma_height, hipMemcpyDeviceToDevice, hip_stream_));
|
||||
}
|
||||
}
|
||||
return ROCJPEG_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
RocJpegStatus ROCJpegDecoder::GetChromaHeight(uint16_t picture_height, uint16_t &chroma_height) {
|
||||
switch (hip_interop_.surface_format) {
|
||||
case VA_FOURCC_NV12: /*NV12: two-plane 8-bit YUV 4:2:0*/
|
||||
chroma_height = picture_height >> 1;
|
||||
break;
|
||||
case VA_FOURCC_444P: /*444P: three-plane 8-bit YUV 4:4:4*/
|
||||
chroma_height = picture_height;
|
||||
break;
|
||||
case VA_FOURCC_Y800: /*Y800: one-plane 8-bit greyscale YUV 4:0:0*/
|
||||
chroma_height = 0;
|
||||
break;
|
||||
case ROCJPEG_FOURCC_YUYV: /*YUYV: one-plane packed 8-bit YUV 4:2:2. Four bytes per pair of pixels: Y, U, Y, V*/
|
||||
chroma_height = picture_height;
|
||||
break;
|
||||
default:
|
||||
return ROCJPEG_STATUS_JPEG_NOT_SUPPORTED;
|
||||
}
|
||||
return ROCJPEG_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
RocJpegStatus ROCJpegDecoder::ColorConvertToRGB(uint32_t picture_width, uint32_t picture_height, RocJpegImage *destination) {
|
||||
switch (hip_interop_.surface_format) {
|
||||
case VA_FOURCC_444P:
|
||||
ColorConvertYUV444ToRGB(hip_stream_, picture_width, picture_height, destination->channel[0], destination->pitch[0],
|
||||
hip_interop_.hip_mapped_device_mem, hip_interop_.pitch[0], hip_interop_.offset[1]);
|
||||
break;
|
||||
case ROCJPEG_FOURCC_YUYV:
|
||||
ColorConvertYUYVToRGB(hip_stream_, picture_width, picture_height, destination->channel[0], destination->pitch[0],
|
||||
hip_interop_.hip_mapped_device_mem, hip_interop_.pitch[0]);
|
||||
break;
|
||||
case VA_FOURCC_NV12:
|
||||
ColorConvertNV12ToRGB(hip_stream_, picture_width, picture_height, destination->channel[0], destination->pitch[0],
|
||||
hip_interop_.hip_mapped_device_mem, hip_interop_.pitch[0],
|
||||
hip_interop_.hip_mapped_device_mem + hip_interop_.offset[1], hip_interop_.pitch[1]);
|
||||
break;
|
||||
case VA_FOURCC_Y800:
|
||||
ColorConvertYUV400ToRGB(hip_stream_, picture_width, picture_height, destination->channel[0], destination->pitch[0],
|
||||
hip_interop_.hip_mapped_device_mem, hip_interop_.pitch[0]);
|
||||
break;
|
||||
default:
|
||||
ERR("ERROR! surface format is not supported!");
|
||||
return ROCJPEG_STATUS_JPEG_NOT_SUPPORTED;
|
||||
}
|
||||
return ROCJPEG_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
RocJpegStatus ROCJpegDecoder::GetPlanarYUVOutputFormat(uint32_t picture_width, uint32_t picture_height, uint16_t chroma_height, RocJpegImage *destination) {
|
||||
if (hip_interop_.surface_format == ROCJPEG_FOURCC_YUYV) {
|
||||
// Extract the packed YUYV and copy them into the first, second, and thrid channels of the destination.
|
||||
ConvertPackedYUYVToPlanarYUV(hip_stream_, picture_width, picture_height, destination->channel[0], destination->channel[1], destination->channel[2],
|
||||
destination->pitch[0], destination->pitch[1], hip_interop_.hip_mapped_device_mem, hip_interop_.pitch[0]);
|
||||
} else {
|
||||
CHECK_ROCJPEG(CopyLuma(destination, picture_height));
|
||||
if (hip_interop_.surface_format == VA_FOURCC_NV12) {
|
||||
// Extract the interleaved UV channels and copy them into the second and thrid channels of the destination.
|
||||
ConvertInterleavedUVToPlanarUV(hip_stream_, picture_width >> 1, picture_height >> 1, destination->channel[1], destination->channel[2],
|
||||
destination->pitch[1], hip_interop_.hip_mapped_device_mem + hip_interop_.offset[1] , hip_interop_.pitch[1]);
|
||||
} else {
|
||||
CHECK_ROCJPEG(CopyChroma(destination, chroma_height));
|
||||
}
|
||||
}
|
||||
return ROCJPEG_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
RocJpegStatus ROCJpegDecoder::GetYOutputFormat(uint32_t picture_width, uint32_t picture_height, RocJpegImage *destination) {
|
||||
if (hip_interop_.surface_format == ROCJPEG_FOURCC_YUYV) {
|
||||
ExtractYFromPackedYUYV(hip_stream_, picture_width, picture_height, destination->channel[0], destination->pitch[0],
|
||||
hip_interop_.hip_mapped_device_mem, hip_interop_.pitch[0]);
|
||||
} else {
|
||||
CHECK_ROCJPEG(CopyLuma(destination, picture_height));
|
||||
}
|
||||
return ROCJPEG_STATUS_SUCCESS;
|
||||
}
|
||||
@@ -0,0 +1,76 @@
|
||||
/*
|
||||
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef ROC_JPEG_DECODER_H_
|
||||
#define ROC_JPEG_DECODER_H_
|
||||
|
||||
#include <unistd.h>
|
||||
#include <vector>
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <mutex>
|
||||
#include <queue>
|
||||
#include "../api/rocjpeg.h"
|
||||
#include "rocjpeg_parser.h"
|
||||
#include "rocjpeg_commons.h"
|
||||
#include "rocjpeg_vaapi_decoder.h"
|
||||
#include "rocjpeg_hip_kernels.h"
|
||||
|
||||
struct HipInteropDeviceMem {
|
||||
hipExternalMemory_t hip_ext_mem; // Interface to the vaapi-hip interop
|
||||
uint8_t* hip_mapped_device_mem; // Mapped device memory for the YUV plane
|
||||
uint32_t surface_format; // Pixel format fourcc of the whole surface
|
||||
uint32_t width; // Width of the surface in pixels.
|
||||
uint32_t height; // Height of the surface in pixels.
|
||||
uint32_t offset[3]; // Offset of each plane
|
||||
uint32_t pitch[3]; // Pitch of each plane
|
||||
uint32_t num_layers; // Number of layers making up the surface
|
||||
};
|
||||
|
||||
class ROCJpegDecoder {
|
||||
public:
|
||||
ROCJpegDecoder(RocJpegBackend backend = ROCJPEG_BACKEND_HARDWARE, int device_id = 0);
|
||||
~ROCJpegDecoder();
|
||||
RocJpegStatus InitializeDecoder();
|
||||
RocJpegStatus GetImageInfo(const uint8_t *data, size_t length, uint8_t *num_components, RocJpegChromaSubsampling *subsampling, uint32_t *widths, uint32_t *heights);
|
||||
RocJpegStatus Decode(const uint8_t *data, size_t length, RocJpegOutputFormat output_format, RocJpegImage *destination);
|
||||
private:
|
||||
RocJpegStatus InitHIP(int device_id);
|
||||
RocJpegStatus GetHipInteropMem(VADRMPRIMESurfaceDescriptor &va_drm_prime_surface_desc);
|
||||
RocJpegStatus ReleaseHipInteropMem(VASurfaceID current_surface_id);
|
||||
RocJpegStatus GetChromaHeight(uint16_t picture_height, uint16_t &chroma_height);
|
||||
RocJpegStatus CopyLuma(RocJpegImage *destination, uint16_t picture_height);
|
||||
RocJpegStatus CopyChroma(RocJpegImage *destination, uint16_t chroma_height);
|
||||
RocJpegStatus ColorConvertToRGB(uint32_t picture_width, uint32_t picture_height, RocJpegImage *destination);
|
||||
RocJpegStatus GetPlanarYUVOutputFormat(uint32_t picture_width, uint32_t picture_height, uint16_t chroma_height, RocJpegImage *destination);
|
||||
RocJpegStatus GetYOutputFormat(uint32_t picture_width, uint32_t picture_height, RocJpegImage *destination);
|
||||
int num_devices_;
|
||||
int device_id_;
|
||||
hipDeviceProp_t hip_dev_prop_;
|
||||
hipStream_t hip_stream_;
|
||||
std::mutex mutex_;
|
||||
JpegParser jpeg_parser_;
|
||||
RocJpegBackend backend_;
|
||||
RocJpegVappiDecoder jpeg_vaapi_decoder_;
|
||||
HipInteropDeviceMem hip_interop_;
|
||||
};
|
||||
|
||||
#endif //ROC_JPEG_DECODER_H_
|
||||
@@ -0,0 +1,961 @@
|
||||
/*
|
||||
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "rocjpeg_hip_kernels.h"
|
||||
|
||||
__device__ __forceinline__ uint32_t hipPack(float4 src) {
|
||||
return __builtin_amdgcn_cvt_pk_u8_f32(src.w, 3,
|
||||
__builtin_amdgcn_cvt_pk_u8_f32(src.z, 2,
|
||||
__builtin_amdgcn_cvt_pk_u8_f32(src.y, 1,
|
||||
__builtin_amdgcn_cvt_pk_u8_f32(src.x, 0, 0))));
|
||||
}
|
||||
|
||||
__device__ __forceinline__ float hipUnpack0(uint32_t src) {
|
||||
return (float)(src & 0xFF);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ float hipUnpack1(uint32_t src) {
|
||||
return (float)((src >> 8) & 0xFF);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ float hipUnpack2(uint32_t src) {
|
||||
return (float)((src >> 16) & 0xFF);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ float hipUnpack3(uint32_t src) {
|
||||
return (float)((src >> 24) & 0xFF);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ float4 hipUnpack(uint32_t src) {
|
||||
return make_float4(hipUnpack0(src), hipUnpack1(src), hipUnpack2(src), hipUnpack3(src));
|
||||
}
|
||||
|
||||
__global__ void ColorConvertYUV444ToRGBKernel(uint32_t dst_width, uint32_t dst_height, uint8_t *dst_image, uint32_t dst_image_stride_in_bytes,
|
||||
uint32_t dst_image_stride_in_bytes_comp, const uint8_t *src_y_image, const uint8_t *src_u_image, const uint8_t *src_v_image,
|
||||
uint32_t src_yuv_image_stride_in_bytes, uint32_t dst_width_comp, uint32_t dst_height_comp, uint32_t src_yuv_image_stride_in_bytes_comp) {
|
||||
|
||||
int32_t x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
|
||||
int32_t y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
|
||||
|
||||
if ((x < dst_width_comp) && (y < dst_height_comp)) {
|
||||
uint32_t src_y0_idx = y * src_yuv_image_stride_in_bytes_comp + (x << 3);
|
||||
uint32_t src_y1_idx = src_y0_idx + src_yuv_image_stride_in_bytes;
|
||||
|
||||
|
||||
uint2 y0 = *((uint2 *)(&src_y_image[src_y0_idx]));
|
||||
uint2 y1 = *((uint2 *)(&src_y_image[src_y1_idx]));
|
||||
|
||||
uint2 u0 = *((uint2 *)(&src_u_image[src_y0_idx]));
|
||||
uint2 u1 = *((uint2 *)(&src_u_image[src_y1_idx]));
|
||||
|
||||
uint2 v0 = *((uint2 *)(&src_v_image[src_y0_idx]));
|
||||
uint2 v1 = *((uint2 *)(&src_v_image[src_y1_idx]));
|
||||
|
||||
uint32_t rgb0_idx = y * dst_image_stride_in_bytes_comp + (x * 24);
|
||||
uint32_t rgb1_idx = rgb0_idx + dst_image_stride_in_bytes;
|
||||
|
||||
float2 cr = make_float2( 0.0000f, 1.5748f);
|
||||
float2 cg = make_float2(-0.1873f, -0.4681f);
|
||||
float2 cb = make_float2( 1.8556f, 0.0000f);
|
||||
float3 yuv;
|
||||
DUINT6 rgb0, rgb1;
|
||||
float4 f;
|
||||
|
||||
yuv.x = hipUnpack0(y0.x);
|
||||
yuv.y = hipUnpack0(u0.x);
|
||||
yuv.z = hipUnpack0(v0.x);
|
||||
yuv.y -= 128.0f;
|
||||
yuv.z -= 128.0f;
|
||||
f.x = fmaf(cr.y, yuv.z, yuv.x);
|
||||
f.y = fmaf(cg.x, yuv.y, yuv.x);
|
||||
f.y = fmaf(cg.y, yuv.z, f.y);
|
||||
f.z = fmaf(cb.x, yuv.y, yuv.x);
|
||||
yuv.x = hipUnpack1(y0.x);
|
||||
yuv.y = hipUnpack1(u0.x);
|
||||
yuv.z = hipUnpack1(v0.x);
|
||||
yuv.y -= 128.0f;
|
||||
yuv.z -= 128.0f;
|
||||
f.w = fmaf(cr.y, yuv.z, yuv.x);
|
||||
rgb0.data[0] = hipPack(f);
|
||||
|
||||
f.x = fmaf(cg.x, yuv.y, yuv.x);
|
||||
f.x = fmaf(cg.y, yuv.z, f.x);
|
||||
f.y = fmaf(cb.x, yuv.y, yuv.x);
|
||||
yuv.x = hipUnpack2(y0.x);
|
||||
yuv.y = hipUnpack2(u0.x);
|
||||
yuv.z = hipUnpack2(v0.x);
|
||||
yuv.y -= 128.0f;
|
||||
yuv.z -= 128.0f;
|
||||
f.z = fmaf(cr.y, yuv.z, yuv.x);
|
||||
f.w = fmaf(cg.x, yuv.y, yuv.x);
|
||||
f.w = fmaf(cg.y, yuv.z, f.w);
|
||||
rgb0.data[1] = hipPack(f);
|
||||
|
||||
f.x = fmaf(cb.x, yuv.y, yuv.x);
|
||||
yuv.x = hipUnpack3(y0.x);
|
||||
yuv.y = hipUnpack3(u0.x);
|
||||
yuv.z = hipUnpack3(v0.x);
|
||||
yuv.y -= 128.0f;
|
||||
yuv.z -= 128.0f;
|
||||
f.y = fmaf(cr.y, yuv.z, yuv.x);
|
||||
f.z = fmaf(cg.x, yuv.y, yuv.x);
|
||||
f.z = fmaf(cg.y, yuv.z, f.z);
|
||||
f.w = fmaf(cb.x, yuv.y, yuv.x);
|
||||
rgb0.data[2] = hipPack(f);
|
||||
|
||||
yuv.x = hipUnpack0(y0.y);
|
||||
yuv.y = hipUnpack0(u0.y);
|
||||
yuv.z = hipUnpack0(v0.y);
|
||||
yuv.y -= 128.0f;
|
||||
yuv.z -= 128.0f;
|
||||
f.x = fmaf(cr.y, yuv.z, yuv.x);
|
||||
f.y = fmaf(cg.x, yuv.y, yuv.x);
|
||||
f.y = fmaf(cg.y, yuv.z, f.y);
|
||||
f.z = fmaf(cb.x, yuv.y, yuv.x);
|
||||
yuv.x = hipUnpack1(y0.y);
|
||||
yuv.y = hipUnpack1(u0.y);
|
||||
yuv.z = hipUnpack1(v0.y);
|
||||
yuv.y -= 128.0f;
|
||||
yuv.z -= 128.0f;
|
||||
f.w = fmaf(cr.y, yuv.z, yuv.x);
|
||||
rgb0.data[3] = hipPack(f);
|
||||
|
||||
f.x = fmaf(cg.x, yuv.y, yuv.x);
|
||||
f.x = fmaf(cg.y, yuv.z, f.x);
|
||||
f.y = fmaf(cb.x, yuv.y, yuv.x);
|
||||
yuv.x = hipUnpack2(y0.y);
|
||||
yuv.y = hipUnpack2(u0.y);
|
||||
yuv.z = hipUnpack2(v0.y);
|
||||
yuv.y -= 128.0f;
|
||||
yuv.z -= 128.0f;
|
||||
f.z = fmaf(cr.y, yuv.z, yuv.x);
|
||||
f.w = fmaf(cg.x, yuv.y, yuv.x);
|
||||
f.w = fmaf(cg.y, yuv.z, f.w);
|
||||
rgb0.data[4] = hipPack(f);
|
||||
|
||||
f.x = fmaf(cb.x, yuv.y, yuv.x);
|
||||
yuv.x = hipUnpack3(y0.y);
|
||||
yuv.y = hipUnpack3(u0.y);
|
||||
yuv.z = hipUnpack3(v0.y);
|
||||
yuv.y -= 128.0f;
|
||||
yuv.z -= 128.0f;
|
||||
f.y = fmaf(cr.y, yuv.z, yuv.x);
|
||||
f.z = fmaf(cg.x, yuv.y, yuv.x);
|
||||
f.z = fmaf(cg.y, yuv.z, f.z);
|
||||
f.w = fmaf(cb.x, yuv.y, yuv.x);
|
||||
rgb0.data[5] = hipPack(f);
|
||||
|
||||
yuv.x = hipUnpack0(y1.x);
|
||||
yuv.y = hipUnpack0(u1.x);
|
||||
yuv.z = hipUnpack0(v1.x);
|
||||
yuv.y -= 128.0f;
|
||||
yuv.z -= 128.0f;
|
||||
f.x = fmaf(cr.y, yuv.z, yuv.x);
|
||||
f.y = fmaf(cg.x, yuv.y, yuv.x);
|
||||
f.y = fmaf(cg.y, yuv.z, f.y);
|
||||
f.z = fmaf(cb.x, yuv.y, yuv.x);
|
||||
yuv.x = hipUnpack1(y1.x);
|
||||
yuv.y = hipUnpack1(u1.x);
|
||||
yuv.z = hipUnpack1(v1.x);
|
||||
yuv.y -= 128.0f;
|
||||
yuv.z -= 128.0f;
|
||||
f.w = fmaf(cr.y, yuv.z, yuv.x);
|
||||
rgb1.data[0] = hipPack(f);
|
||||
|
||||
f.x = fmaf(cg.x, yuv.y, yuv.x);
|
||||
f.x = fmaf(cg.y, yuv.z, f.x);
|
||||
f.y = fmaf(cb.x, yuv.y, yuv.x);
|
||||
yuv.x = hipUnpack2(y1.x);
|
||||
yuv.y = hipUnpack2(u1.x);
|
||||
yuv.z = hipUnpack2(v1.x);
|
||||
yuv.y -= 128.0f;
|
||||
yuv.z -= 128.0f;
|
||||
f.z = fmaf(cr.y, yuv.z, yuv.x);
|
||||
f.w = fmaf(cg.x, yuv.y, yuv.x);
|
||||
f.w = fmaf(cg.y, yuv.z, f.w);
|
||||
rgb1.data[1] = hipPack(f);
|
||||
|
||||
f.x = fmaf(cb.x, yuv.y, yuv.x);
|
||||
yuv.x = hipUnpack3(y1.x);
|
||||
yuv.y = hipUnpack3(u1.x);
|
||||
yuv.z = hipUnpack3(v1.x);
|
||||
yuv.y -= 128.0f;
|
||||
yuv.z -= 128.0f;
|
||||
f.y = fmaf(cr.y, yuv.z, yuv.x);
|
||||
f.z = fmaf(cg.x, yuv.y, yuv.x);
|
||||
f.z = fmaf(cg.y, yuv.z, f.z);
|
||||
f.w = fmaf(cb.x, yuv.y, yuv.x);
|
||||
rgb1.data[2] = hipPack(f);
|
||||
|
||||
yuv.x = hipUnpack0(y1.y);
|
||||
yuv.y = hipUnpack0(u1.y);
|
||||
yuv.z = hipUnpack0(v1.y);
|
||||
yuv.y -= 128.0f;
|
||||
yuv.z -= 128.0f;
|
||||
f.x = fmaf(cr.y, yuv.z, yuv.x);
|
||||
f.y = fmaf(cg.x, yuv.y, yuv.x);
|
||||
f.y = fmaf(cg.y, yuv.z, f.y);
|
||||
f.z = fmaf(cb.x, yuv.y, yuv.x);
|
||||
yuv.x = hipUnpack1(y1.y);
|
||||
yuv.y = hipUnpack1(u1.y);
|
||||
yuv.z = hipUnpack1(v1.y);
|
||||
yuv.y -= 128.0f;
|
||||
yuv.z -= 128.0f;
|
||||
f.w = fmaf(cr.y, yuv.z, yuv.x);
|
||||
rgb1.data[3] = hipPack(f);
|
||||
|
||||
f.x = fmaf(cg.x, yuv.y, yuv.x);
|
||||
f.x = fmaf(cg.y, yuv.z, f.x);
|
||||
f.y = fmaf(cb.x, yuv.y, yuv.x);
|
||||
yuv.x = hipUnpack2(y1.y);
|
||||
yuv.y = hipUnpack2(u1.y);
|
||||
yuv.z = hipUnpack2(v1.y);
|
||||
yuv.y -= 128.0f;
|
||||
yuv.z -= 128.0f;
|
||||
f.z = fmaf(cr.y, yuv.z, yuv.x);
|
||||
f.w = fmaf(cg.x, yuv.y, yuv.x);
|
||||
f.w = fmaf(cg.y, yuv.z, f.w);
|
||||
rgb1.data[4] = hipPack(f);
|
||||
|
||||
f.x = fmaf(cb.x, yuv.y, yuv.x);
|
||||
yuv.x = hipUnpack3(y1.y);
|
||||
yuv.y = hipUnpack3(u1.y);
|
||||
yuv.z = hipUnpack3(v1.y);
|
||||
yuv.y -= 128.0f;
|
||||
yuv.z -= 128.0f;
|
||||
f.y = fmaf(cr.y, yuv.z, yuv.x);
|
||||
f.z = fmaf(cg.x, yuv.y, yuv.x);
|
||||
f.z = fmaf(cg.y, yuv.z, f.z);
|
||||
f.w = fmaf(cb.x, yuv.y, yuv.x);
|
||||
rgb1.data[5] = hipPack(f);
|
||||
|
||||
*((DUINT6 *)(&dst_image[rgb0_idx])) = rgb0;
|
||||
*((DUINT6 *)(&dst_image[rgb1_idx])) = rgb1;
|
||||
}
|
||||
}
|
||||
|
||||
void ColorConvertYUV444ToRGB(hipStream_t stream, uint32_t dst_width, uint32_t dst_height,
|
||||
uint8_t *dst_image, uint32_t dst_image_stride_in_bytes, const uint8_t *src_yuv_image,
|
||||
uint32_t src_yuv_image_stride_in_bytes, uint32_t src_u_image_offset) {
|
||||
|
||||
int32_t local_threads_x = 16;
|
||||
int32_t local_threads_y = 4;
|
||||
int32_t global_threads_x = (dst_width + 7) >> 3;
|
||||
int32_t global_threads_y = (dst_height + 1) >> 1;
|
||||
|
||||
uint32_t dst_width_comp = (dst_width + 7) / 8;
|
||||
uint32_t dst_height_comp = (dst_height + 1) / 2;
|
||||
uint32_t dst_image_stride_in_bytes_comp = dst_image_stride_in_bytes * 2;
|
||||
uint32_t src_yuv_image_stride_in_bytes_comp = src_yuv_image_stride_in_bytes * 2;
|
||||
|
||||
ColorConvertYUV444ToRGBKernel<<<dim3(ceil(static_cast<float>(global_threads_x) / local_threads_x), ceil(static_cast<float>(global_threads_y) / local_threads_y)),
|
||||
dim3(local_threads_x, local_threads_y), 0, stream>>>(dst_width, dst_height, (uint8_t *)dst_image,
|
||||
dst_image_stride_in_bytes, dst_image_stride_in_bytes_comp, src_yuv_image, src_yuv_image + src_u_image_offset,
|
||||
src_yuv_image + (src_u_image_offset * 2), src_yuv_image_stride_in_bytes,
|
||||
dst_width_comp, dst_height_comp, src_yuv_image_stride_in_bytes_comp);
|
||||
}
|
||||
|
||||
__global__ void ColorConvertYUYVToRGBKernel(uint32_t dst_width, uint32_t dst_height,
|
||||
uint8_t *dst_image, uint32_t dst_image_stride_in_bytes, uint32_t dst_image_stride_in_bytes_comp,
|
||||
const uint8_t *src_image, uint32_t src_image_stride_in_bytes, uint32_t src_image_stride_in_bytes_comp,
|
||||
uint32_t dst_width_comp, uint32_t dst_height_comp) {
|
||||
|
||||
int32_t x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
|
||||
int32_t y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
|
||||
|
||||
if ((x < dst_width_comp) && (y < dst_height_comp)) {
|
||||
uint32_t l0_idx = y * src_image_stride_in_bytes_comp + (x << 4);
|
||||
uint32_t l1_idx = l0_idx + src_image_stride_in_bytes;
|
||||
uint4 l0 = *((uint4 *)(&src_image[l0_idx]));
|
||||
uint4 l1 = *((uint4 *)(&src_image[l1_idx]));
|
||||
|
||||
uint32_t rgb0_idx = y * dst_image_stride_in_bytes_comp + (x * 24);
|
||||
uint32_t rgb1_idx = rgb0_idx + dst_image_stride_in_bytes;
|
||||
|
||||
float4 f;
|
||||
|
||||
uint2 py0, py1;
|
||||
uint2 pu0, pu1;
|
||||
uint2 pv0, pv1;
|
||||
|
||||
py0.x = hipPack(make_float4(hipUnpack0(l0.x), hipUnpack2(l0.x), hipUnpack0(l0.y), hipUnpack2(l0.y)));
|
||||
py0.y = hipPack(make_float4(hipUnpack0(l0.z), hipUnpack2(l0.z), hipUnpack0(l0.w), hipUnpack2(l0.w)));
|
||||
py1.x = hipPack(make_float4(hipUnpack0(l1.x), hipUnpack2(l1.x), hipUnpack0(l1.y), hipUnpack2(l1.y)));
|
||||
py1.y = hipPack(make_float4(hipUnpack0(l1.z), hipUnpack2(l1.z), hipUnpack0(l1.w), hipUnpack2(l1.w)));
|
||||
pu0.x = hipPack(make_float4(hipUnpack1(l0.x), hipUnpack1(l0.x), hipUnpack1(l0.y), hipUnpack1(l0.y)));
|
||||
pu0.y = hipPack(make_float4(hipUnpack1(l0.z), hipUnpack1(l0.z), hipUnpack1(l0.w), hipUnpack1(l0.w)));
|
||||
pu1.x = hipPack(make_float4(hipUnpack1(l1.x), hipUnpack1(l1.x), hipUnpack1(l1.y), hipUnpack1(l1.y)));
|
||||
pu1.y = hipPack(make_float4(hipUnpack1(l1.z), hipUnpack1(l1.z), hipUnpack1(l1.w), hipUnpack1(l1.w)));
|
||||
pv0.x = hipPack(make_float4(hipUnpack3(l0.x), hipUnpack3(l0.x), hipUnpack3(l0.y), hipUnpack3(l0.y)));
|
||||
pv0.y = hipPack(make_float4(hipUnpack3(l0.z), hipUnpack3(l0.z), hipUnpack3(l0.w), hipUnpack3(l0.w)));
|
||||
pv1.x = hipPack(make_float4(hipUnpack3(l1.x), hipUnpack3(l1.x), hipUnpack3(l1.y), hipUnpack3(l1.y)));
|
||||
pv1.y = hipPack(make_float4(hipUnpack3(l1.z), hipUnpack3(l1.z), hipUnpack3(l1.w), hipUnpack3(l1.w)));
|
||||
|
||||
float2 cr = make_float2( 0.0000f, 1.5748f);
|
||||
float2 cg = make_float2(-0.1873f, -0.4681f);
|
||||
float2 cb = make_float2( 1.8556f, 0.0000f);
|
||||
float3 yuv;
|
||||
DUINT6 prgb0, prgb1;
|
||||
|
||||
yuv.x = hipUnpack0(py0.x);
|
||||
yuv.y = hipUnpack0(pu0.x);
|
||||
yuv.z = hipUnpack0(pv0.x);
|
||||
yuv.y -= 128.0f;
|
||||
yuv.z -= 128.0f;
|
||||
f.x = fmaf(cr.y, yuv.z, yuv.x);
|
||||
f.y = fmaf(cg.x, yuv.y, yuv.x);
|
||||
f.y = fmaf(cg.y, yuv.z, f.y);
|
||||
f.z = fmaf(cb.x, yuv.y, yuv.x);
|
||||
yuv.x = hipUnpack1(py0.x);
|
||||
yuv.y = hipUnpack1(pu0.x);
|
||||
yuv.z = hipUnpack1(pv0.x);
|
||||
yuv.y -= 128.0f;
|
||||
yuv.z -= 128.0f;
|
||||
f.w = fmaf(cr.y, yuv.z, yuv.x);
|
||||
prgb0.data[0] = hipPack(f);
|
||||
|
||||
f.x = fmaf(cg.x, yuv.y, yuv.x);
|
||||
f.x = fmaf(cg.y, yuv.z, f.x);
|
||||
f.y = fmaf(cb.x, yuv.y, yuv.x);
|
||||
yuv.x = hipUnpack2(py0.x);
|
||||
yuv.y = hipUnpack2(pu0.x);
|
||||
yuv.z = hipUnpack2(pv0.x);
|
||||
yuv.y -= 128.0f;
|
||||
yuv.z -= 128.0f;
|
||||
f.z = fmaf(cr.y, yuv.z, yuv.x);
|
||||
f.w = fmaf(cg.x, yuv.y, yuv.x);
|
||||
f.w = fmaf(cg.y, yuv.z, f.w);
|
||||
prgb0.data[1] = hipPack(f);
|
||||
|
||||
f.x = fmaf(cb.x, yuv.y, yuv.x);
|
||||
yuv.x = hipUnpack3(py0.x);
|
||||
yuv.y = hipUnpack3(pu0.x);
|
||||
yuv.z = hipUnpack3(pv0.x);
|
||||
yuv.y -= 128.0f;
|
||||
yuv.z -= 128.0f;
|
||||
f.y = fmaf(cr.y, yuv.z, yuv.x);
|
||||
f.z = fmaf(cg.x, yuv.y, yuv.x);
|
||||
f.z = fmaf(cg.y, yuv.z, f.z);
|
||||
f.w = fmaf(cb.x, yuv.y, yuv.x);
|
||||
prgb0.data[2] = hipPack(f);
|
||||
|
||||
yuv.x = hipUnpack0(py0.y);
|
||||
yuv.y = hipUnpack0(pu0.y);
|
||||
yuv.z = hipUnpack0(pv0.y);
|
||||
yuv.y -= 128.0f;
|
||||
yuv.z -= 128.0f;
|
||||
f.x = fmaf(cr.y, yuv.z, yuv.x);
|
||||
f.y = fmaf(cg.x, yuv.y, yuv.x);
|
||||
f.y = fmaf(cg.y, yuv.z, f.y);
|
||||
f.z = fmaf(cb.x, yuv.y, yuv.x);
|
||||
yuv.x = hipUnpack1(py0.y);
|
||||
yuv.y = hipUnpack1(pu0.y);
|
||||
yuv.z = hipUnpack1(pv0.y);
|
||||
yuv.y -= 128.0f;
|
||||
yuv.z -= 128.0f;
|
||||
f.w = fmaf(cr.y, yuv.z, yuv.x);
|
||||
prgb0.data[3] = hipPack(f);
|
||||
|
||||
f.x = fmaf(cg.x, yuv.y, yuv.x);
|
||||
f.x = fmaf(cg.y, yuv.z, f.x);
|
||||
f.y = fmaf(cb.x, yuv.y, yuv.x);
|
||||
yuv.x = hipUnpack2(py0.y);
|
||||
yuv.y = hipUnpack2(pu0.y);
|
||||
yuv.z = hipUnpack2(pv0.y);
|
||||
yuv.y -= 128.0f;
|
||||
yuv.z -= 128.0f;
|
||||
f.z = fmaf(cr.y, yuv.z, yuv.x);
|
||||
f.w = fmaf(cg.x, yuv.y, yuv.x);
|
||||
f.w = fmaf(cg.y, yuv.z, f.w);
|
||||
prgb0.data[4] = hipPack(f);
|
||||
|
||||
f.x = fmaf(cb.x, yuv.y, yuv.x);
|
||||
yuv.x = hipUnpack3(py0.y);
|
||||
yuv.y = hipUnpack3(pu0.y);
|
||||
yuv.z = hipUnpack3(pv0.y);
|
||||
yuv.y -= 128.0f;
|
||||
yuv.z -= 128.0f;
|
||||
f.y = fmaf(cr.y, yuv.z, yuv.x);
|
||||
f.z = fmaf(cg.x, yuv.y, yuv.x);
|
||||
f.z = fmaf(cg.y, yuv.z, f.z);
|
||||
f.w = fmaf(cb.x, yuv.y, yuv.x);
|
||||
prgb0.data[5] = hipPack(f);
|
||||
|
||||
yuv.x = hipUnpack0(py1.x);
|
||||
yuv.y = hipUnpack0(pu1.x);
|
||||
yuv.z = hipUnpack0(pv1.x);
|
||||
yuv.y -= 128.0f;
|
||||
yuv.z -= 128.0f;
|
||||
f.x = fmaf(cr.y, yuv.z, yuv.x);
|
||||
f.y = fmaf(cg.x, yuv.y, yuv.x);
|
||||
f.y = fmaf(cg.y, yuv.z, f.y);
|
||||
f.z = fmaf(cb.x, yuv.y, yuv.x);
|
||||
yuv.x = hipUnpack1(py1.x);
|
||||
yuv.y = hipUnpack1(pu1.x);
|
||||
yuv.z = hipUnpack1(pv1.x);
|
||||
yuv.y -= 128.0f;
|
||||
yuv.z -= 128.0f;
|
||||
f.w = fmaf(cr.y, yuv.z, yuv.x);
|
||||
prgb1.data[0] = hipPack(f);
|
||||
|
||||
f.x = fmaf(cg.x, yuv.y, yuv.x);
|
||||
f.x = fmaf(cg.y, yuv.z, f.x);
|
||||
f.y = fmaf(cb.x, yuv.y, yuv.x);
|
||||
yuv.x = hipUnpack2(py1.x);
|
||||
yuv.y = hipUnpack2(pu1.x);
|
||||
yuv.z = hipUnpack2(pv1.x);
|
||||
yuv.y -= 128.0f;
|
||||
yuv.z -= 128.0f;
|
||||
f.z = fmaf(cr.y, yuv.z, yuv.x);
|
||||
f.w = fmaf(cg.x, yuv.y, yuv.x);
|
||||
f.w = fmaf(cg.y, yuv.z, f.w);
|
||||
prgb1.data[1] = hipPack(f);
|
||||
|
||||
f.x = fmaf(cb.x, yuv.y, yuv.x);
|
||||
yuv.x = hipUnpack3(py1.x);
|
||||
yuv.y = hipUnpack3(pu1.x);
|
||||
yuv.z = hipUnpack3(pv1.x);
|
||||
yuv.y -= 128.0f;
|
||||
yuv.z -= 128.0f;
|
||||
f.y = fmaf(cr.y, yuv.z, yuv.x);
|
||||
f.z = fmaf(cg.x, yuv.y, yuv.x);
|
||||
f.z = fmaf(cg.y, yuv.z, f.z);
|
||||
f.w = fmaf(cb.x, yuv.y, yuv.x);
|
||||
prgb1.data[2] = hipPack(f);
|
||||
|
||||
yuv.x = hipUnpack0(py1.y);
|
||||
yuv.y = hipUnpack0(pu1.y);
|
||||
yuv.z = hipUnpack0(pv1.y);
|
||||
yuv.y -= 128.0f;
|
||||
yuv.z -= 128.0f;
|
||||
f.x = fmaf(cr.y, yuv.z, yuv.x);
|
||||
f.y = fmaf(cg.x, yuv.y, yuv.x);
|
||||
f.y = fmaf(cg.y, yuv.z, f.y);
|
||||
f.z = fmaf(cb.x, yuv.y, yuv.x);
|
||||
yuv.x = hipUnpack1(py1.y);
|
||||
yuv.y = hipUnpack1(pu1.y);
|
||||
yuv.z = hipUnpack1(pv1.y);
|
||||
yuv.y -= 128.0f;
|
||||
yuv.z -= 128.0f;
|
||||
f.w = fmaf(cr.y, yuv.z, yuv.x);
|
||||
prgb1.data[3] = hipPack(f);
|
||||
|
||||
f.x = fmaf(cg.x, yuv.y, yuv.x);
|
||||
f.x = fmaf(cg.y, yuv.z, f.x);
|
||||
f.y = fmaf(cb.x, yuv.y, yuv.x);
|
||||
yuv.x = hipUnpack2(py1.y);
|
||||
yuv.y = hipUnpack2(pu1.y);
|
||||
yuv.z = hipUnpack2(pv1.y);
|
||||
yuv.y -= 128.0f;
|
||||
yuv.z -= 128.0f;
|
||||
f.z = fmaf(cr.y, yuv.z, yuv.x);
|
||||
f.w = fmaf(cg.x, yuv.y, yuv.x);
|
||||
f.w = fmaf(cg.y, yuv.z, f.w);
|
||||
prgb1.data[4] = hipPack(f);
|
||||
|
||||
f.x = fmaf(cb.x, yuv.y, yuv.x);
|
||||
yuv.x = hipUnpack3(py1.y);
|
||||
yuv.y = hipUnpack3(pu1.y);
|
||||
yuv.z = hipUnpack3(pv1.y);
|
||||
yuv.y -= 128.0f;
|
||||
yuv.z -= 128.0f;
|
||||
f.y = fmaf(cr.y, yuv.z, yuv.x);
|
||||
f.z = fmaf(cg.x, yuv.y, yuv.x);
|
||||
f.z = fmaf(cg.y, yuv.z, f.z);
|
||||
f.w = fmaf(cb.x, yuv.y, yuv.x);
|
||||
prgb1.data[5] = hipPack(f);
|
||||
|
||||
*((DUINT6 *)(&dst_image[rgb0_idx])) = prgb0;
|
||||
*((DUINT6 *)(&dst_image[rgb1_idx])) = prgb1;
|
||||
}
|
||||
}
|
||||
|
||||
void ColorConvertYUYVToRGB(hipStream_t stream, uint32_t dst_width, uint32_t dst_height,
|
||||
uint8_t *dst_image, uint32_t dst_image_stride_in_bytes,
|
||||
const uint8_t *src_image, uint32_t src_image_stride_in_bytes) {
|
||||
int32_t local_threads_x = 16;
|
||||
int32_t local_threads_y = 4;
|
||||
int32_t global_threads_x = (dst_width + 7) >> 3;
|
||||
int32_t global_threads_y = (dst_height + 1) >> 1;
|
||||
|
||||
uint32_t dst_width_comp = (dst_width + 7) / 8;
|
||||
uint32_t dst_height_comp = (dst_height + 1) / 2;
|
||||
uint32_t dst_image_stride_in_bytes_comp = dst_image_stride_in_bytes * 2;
|
||||
uint32_t src_image_stride_in_bytes_comp = src_image_stride_in_bytes * 2;
|
||||
|
||||
ColorConvertYUYVToRGBKernel<<<dim3(ceil(static_cast<float>(global_threads_x) / local_threads_x), ceil(static_cast<float>(global_threads_y) / local_threads_y)),
|
||||
dim3(local_threads_x, local_threads_y), 0, stream>>>(dst_width, dst_height, (uint8_t *)dst_image,
|
||||
dst_image_stride_in_bytes, dst_image_stride_in_bytes_comp, src_image, src_image_stride_in_bytes,
|
||||
src_image_stride_in_bytes_comp, dst_width_comp, dst_height_comp);
|
||||
}
|
||||
|
||||
__global__ void ColorConvertNV12ToRGBKernel(uint32_t dst_width, uint32_t dst_height,
|
||||
uint8_t *dst_image, uint32_t dst_image_stride_in_bytes, uint32_t dst_image_stride_in_bytes_comp,
|
||||
const uint8_t *src_luma_image, uint32_t src_luma_image_stride_in_bytes,
|
||||
const uint8_t *src_chroma_image, uint32_t src_chroma_image_stride_in_bytes,
|
||||
uint32_t dst_width_comp, uint32_t dst_height_comp, uint32_t src_luma_image_stride_in_bytes_comp) {
|
||||
|
||||
int32_t x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
|
||||
int32_t y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
|
||||
|
||||
if ((x < dst_width_comp) && (y < dst_height_comp)) {
|
||||
uint32_t src_y0_idx = y * src_luma_image_stride_in_bytes_comp + (x << 3);
|
||||
uint32_t src_y1_idx = src_y0_idx + src_luma_image_stride_in_bytes;
|
||||
uint32_t src_uv_idx = y * src_chroma_image_stride_in_bytes + (x << 3);
|
||||
uint2 y0 = *((uint2 *)(&src_luma_image[src_y0_idx]));
|
||||
uint2 y1 = *((uint2 *)(&src_luma_image[src_y1_idx]));
|
||||
uint2 uv = *((uint2 *)(&src_chroma_image[src_uv_idx]));
|
||||
|
||||
uint32_t rgb0_idx = y * dst_image_stride_in_bytes_comp + (x * 24);
|
||||
uint32_t rgb1_idx = rgb0_idx + dst_image_stride_in_bytes;
|
||||
|
||||
float4 f;
|
||||
uint2 u0, u1;
|
||||
uint2 v0, v1;
|
||||
|
||||
f.x = hipUnpack0(uv.x);
|
||||
f.y = f.x;
|
||||
f.z = hipUnpack2(uv.x);
|
||||
f.w = f.z;
|
||||
u0.x = hipPack(f);
|
||||
|
||||
f.x = hipUnpack0(uv.y);
|
||||
f.y = f.x;
|
||||
f.z = hipUnpack2(uv.y);
|
||||
f.w = f.z;
|
||||
u0.y = hipPack(f);
|
||||
|
||||
u1.x = u0.x;
|
||||
u1.y = u0.y;
|
||||
|
||||
f.x = hipUnpack1(uv.x);
|
||||
f.y = f.x;
|
||||
f.z = hipUnpack3(uv.x);
|
||||
f.w = f.z;
|
||||
v0.x = hipPack(f);
|
||||
|
||||
f.x = hipUnpack1(uv.y);
|
||||
f.y = f.x;
|
||||
f.z = hipUnpack3(uv.y);
|
||||
f.w = f.z;
|
||||
v0.y = hipPack(f);
|
||||
|
||||
v1.x = v0.x;
|
||||
v1.y = v0.y;
|
||||
|
||||
float2 cr = make_float2( 0.0000f, 1.5748f);
|
||||
float2 cg = make_float2(-0.1873f, -0.4681f);
|
||||
float2 cb = make_float2( 1.8556f, 0.0000f);
|
||||
float3 yuv;
|
||||
DUINT6 rgb0, rgb1;
|
||||
|
||||
yuv.x = hipUnpack0(y0.x);
|
||||
yuv.y = hipUnpack0(u0.x);
|
||||
yuv.z = hipUnpack0(v0.x);
|
||||
yuv.y -= 128.0f;
|
||||
yuv.z -= 128.0f;
|
||||
f.x = fmaf(cr.y, yuv.z, yuv.x);
|
||||
f.y = fmaf(cg.x, yuv.y, yuv.x);
|
||||
f.y = fmaf(cg.y, yuv.z, f.y);
|
||||
f.z = fmaf(cb.x, yuv.y, yuv.x);
|
||||
yuv.x = hipUnpack1(y0.x);
|
||||
yuv.y = hipUnpack1(u0.x);
|
||||
yuv.z = hipUnpack1(v0.x);
|
||||
yuv.y -= 128.0f;
|
||||
yuv.z -= 128.0f;
|
||||
f.w = fmaf(cr.y, yuv.z, yuv.x);
|
||||
rgb0.data[0] = hipPack(f);
|
||||
|
||||
f.x = fmaf(cg.x, yuv.y, yuv.x);
|
||||
f.x = fmaf(cg.y, yuv.z, f.x);
|
||||
f.y = fmaf(cb.x, yuv.y, yuv.x);
|
||||
yuv.x = hipUnpack2(y0.x);
|
||||
yuv.y = hipUnpack2(u0.x);
|
||||
yuv.z = hipUnpack2(v0.x);
|
||||
yuv.y -= 128.0f;
|
||||
yuv.z -= 128.0f;
|
||||
f.z = fmaf(cr.y, yuv.z, yuv.x);
|
||||
f.w = fmaf(cg.x, yuv.y, yuv.x);
|
||||
f.w = fmaf(cg.y, yuv.z, f.w);
|
||||
rgb0.data[1] = hipPack(f);
|
||||
|
||||
f.x = fmaf(cb.x, yuv.y, yuv.x);
|
||||
yuv.x = hipUnpack3(y0.x);
|
||||
yuv.y = hipUnpack3(u0.x);
|
||||
yuv.z = hipUnpack3(v0.x);
|
||||
yuv.y -= 128.0f;
|
||||
yuv.z -= 128.0f;
|
||||
f.y = fmaf(cr.y, yuv.z, yuv.x);
|
||||
f.z = fmaf(cg.x, yuv.y, yuv.x);
|
||||
f.z = fmaf(cg.y, yuv.z, f.z);
|
||||
f.w = fmaf(cb.x, yuv.y, yuv.x);
|
||||
rgb0.data[2] = hipPack(f);
|
||||
|
||||
yuv.x = hipUnpack0(y0.y);
|
||||
yuv.y = hipUnpack0(u0.y);
|
||||
yuv.z = hipUnpack0(v0.y);
|
||||
yuv.y -= 128.0f;
|
||||
yuv.z -= 128.0f;
|
||||
f.x = fmaf(cr.y, yuv.z, yuv.x);
|
||||
f.y = fmaf(cg.x, yuv.y, yuv.x);
|
||||
f.y = fmaf(cg.y, yuv.z, f.y);
|
||||
f.z = fmaf(cb.x, yuv.y, yuv.x);
|
||||
yuv.x = hipUnpack1(y0.y);
|
||||
yuv.y = hipUnpack1(u0.y);
|
||||
yuv.z = hipUnpack1(v0.y);
|
||||
yuv.y -= 128.0f;
|
||||
yuv.z -= 128.0f;
|
||||
f.w = fmaf(cr.y, yuv.z, yuv.x);
|
||||
rgb0.data[3] = hipPack(f);
|
||||
|
||||
f.x = fmaf(cg.x, yuv.y, yuv.x);
|
||||
f.x = fmaf(cg.y, yuv.z, f.x);
|
||||
f.y = fmaf(cb.x, yuv.y, yuv.x);
|
||||
yuv.x = hipUnpack2(y0.y);
|
||||
yuv.y = hipUnpack2(u0.y);
|
||||
yuv.z = hipUnpack2(v0.y);
|
||||
yuv.y -= 128.0f;
|
||||
yuv.z -= 128.0f;
|
||||
f.z = fmaf(cr.y, yuv.z, yuv.x);
|
||||
f.w = fmaf(cg.x, yuv.y, yuv.x);
|
||||
f.w = fmaf(cg.y, yuv.z, f.w);
|
||||
rgb0.data[4] = hipPack(f);
|
||||
|
||||
f.x = fmaf(cb.x, yuv.y, yuv.x);
|
||||
yuv.x = hipUnpack3(y0.y);
|
||||
yuv.y = hipUnpack3(u0.y);
|
||||
yuv.z = hipUnpack3(v0.y);
|
||||
yuv.y -= 128.0f;
|
||||
yuv.z -= 128.0f;
|
||||
f.y = fmaf(cr.y, yuv.z, yuv.x);
|
||||
f.z = fmaf(cg.x, yuv.y, yuv.x);
|
||||
f.z = fmaf(cg.y, yuv.z, f.z);
|
||||
f.w = fmaf(cb.x, yuv.y, yuv.x);
|
||||
rgb0.data[5] = hipPack(f);
|
||||
|
||||
yuv.x = hipUnpack0(y1.x);
|
||||
yuv.y = hipUnpack0(u1.x);
|
||||
yuv.z = hipUnpack0(v1.x);
|
||||
yuv.y -= 128.0f;
|
||||
yuv.z -= 128.0f;
|
||||
f.x = fmaf(cr.y, yuv.z, yuv.x);
|
||||
f.y = fmaf(cg.x, yuv.y, yuv.x);
|
||||
f.y = fmaf(cg.y, yuv.z, f.y);
|
||||
f.z = fmaf(cb.x, yuv.y, yuv.x);
|
||||
yuv.x = hipUnpack1(y1.x);
|
||||
yuv.y = hipUnpack1(u1.x);
|
||||
yuv.z = hipUnpack1(v1.x);
|
||||
yuv.y -= 128.0f;
|
||||
yuv.z -= 128.0f;
|
||||
f.w = fmaf(cr.y, yuv.z, yuv.x);
|
||||
rgb1.data[0] = hipPack(f);
|
||||
|
||||
f.x = fmaf(cg.x, yuv.y, yuv.x);
|
||||
f.x = fmaf(cg.y, yuv.z, f.x);
|
||||
f.y = fmaf(cb.x, yuv.y, yuv.x);
|
||||
yuv.x = hipUnpack2(y1.x);
|
||||
yuv.y = hipUnpack2(u1.x);
|
||||
yuv.z = hipUnpack2(v1.x);
|
||||
yuv.y -= 128.0f;
|
||||
yuv.z -= 128.0f;
|
||||
f.z = fmaf(cr.y, yuv.z, yuv.x);
|
||||
f.w = fmaf(cg.x, yuv.y, yuv.x);
|
||||
f.w = fmaf(cg.y, yuv.z, f.w);
|
||||
rgb1.data[1] = hipPack(f);
|
||||
|
||||
f.x = fmaf(cb.x, yuv.y, yuv.x);
|
||||
yuv.x = hipUnpack3(y1.x);
|
||||
yuv.y = hipUnpack3(u1.x);
|
||||
yuv.z = hipUnpack3(v1.x);
|
||||
yuv.y -= 128.0f;
|
||||
yuv.z -= 128.0f;
|
||||
f.y = fmaf(cr.y, yuv.z, yuv.x);
|
||||
f.z = fmaf(cg.x, yuv.y, yuv.x);
|
||||
f.z = fmaf(cg.y, yuv.z, f.z);
|
||||
f.w = fmaf(cb.x, yuv.y, yuv.x);
|
||||
rgb1.data[2] = hipPack(f);
|
||||
|
||||
yuv.x = hipUnpack0(y1.y);
|
||||
yuv.y = hipUnpack0(u1.y);
|
||||
yuv.z = hipUnpack0(v1.y);
|
||||
yuv.y -= 128.0f;
|
||||
yuv.z -= 128.0f;
|
||||
f.x = fmaf(cr.y, yuv.z, yuv.x);
|
||||
f.y = fmaf(cg.x, yuv.y, yuv.x);
|
||||
f.y = fmaf(cg.y, yuv.z, f.y);
|
||||
f.z = fmaf(cb.x, yuv.y, yuv.x);
|
||||
yuv.x = hipUnpack1(y1.y);
|
||||
yuv.y = hipUnpack1(u1.y);
|
||||
yuv.z = hipUnpack1(v1.y);
|
||||
yuv.y -= 128.0f;
|
||||
yuv.z -= 128.0f;
|
||||
f.w = fmaf(cr.y, yuv.z, yuv.x);
|
||||
rgb1.data[3] = hipPack(f);
|
||||
|
||||
f.x = fmaf(cg.x, yuv.y, yuv.x);
|
||||
f.x = fmaf(cg.y, yuv.z, f.x);
|
||||
f.y = fmaf(cb.x, yuv.y, yuv.x);
|
||||
yuv.x = hipUnpack2(y1.y);
|
||||
yuv.y = hipUnpack2(u1.y);
|
||||
yuv.z = hipUnpack2(v1.y);
|
||||
yuv.y -= 128.0f;
|
||||
yuv.z -= 128.0f;
|
||||
f.z = fmaf(cr.y, yuv.z, yuv.x);
|
||||
f.w = fmaf(cg.x, yuv.y, yuv.x);
|
||||
f.w = fmaf(cg.y, yuv.z, f.w);
|
||||
rgb1.data[4] = hipPack(f);
|
||||
|
||||
f.x = fmaf(cb.x, yuv.y, yuv.x);
|
||||
yuv.x = hipUnpack3(y1.y);
|
||||
yuv.y = hipUnpack3(u1.y);
|
||||
yuv.z = hipUnpack3(v1.y);
|
||||
yuv.y -= 128.0f;
|
||||
yuv.z -= 128.0f;
|
||||
f.y = fmaf(cr.y, yuv.z, yuv.x);
|
||||
f.z = fmaf(cg.x, yuv.y, yuv.x);
|
||||
f.z = fmaf(cg.y, yuv.z, f.z);
|
||||
f.w = fmaf(cb.x, yuv.y, yuv.x);
|
||||
rgb1.data[5] = hipPack(f);
|
||||
|
||||
*((DUINT6 *)(&dst_image[rgb0_idx])) = rgb0;
|
||||
*((DUINT6 *)(&dst_image[rgb1_idx])) = rgb1;
|
||||
}
|
||||
}
|
||||
|
||||
void ColorConvertNV12ToRGB(hipStream_t stream, uint32_t dst_width, uint32_t dst_height,
|
||||
uint8_t *dst_image, uint32_t dst_image_stride_in_bytes,
|
||||
const uint8_t *src_luma_image, uint32_t src_luma_image_stride_in_bytes,
|
||||
const uint8_t *src_chroma_image, uint32_t src_chroma_image_stride_in_bytes) {
|
||||
int32_t local_threads_x = 16;
|
||||
int32_t local_threads_y = 4;
|
||||
int32_t global_threads_x = (dst_width + 7) >> 3;
|
||||
int32_t global_threads_y = (dst_height + 1) >> 1;
|
||||
|
||||
uint32_t dst_width_comp = (dst_width + 7) / 8;
|
||||
uint32_t dst_height_comp = (dst_height + 1) / 2;
|
||||
uint32_t dst_image_stride_in_bytes_comp = dst_image_stride_in_bytes * 2;
|
||||
uint32_t src_luma_image_stride_in_bytes_comp = src_luma_image_stride_in_bytes * 2;
|
||||
|
||||
ColorConvertNV12ToRGBKernel<<<dim3(ceil(static_cast<float>(global_threads_x) / local_threads_x), ceil(static_cast<float>(global_threads_y) / local_threads_y)),
|
||||
dim3(local_threads_x, local_threads_y), 0, stream>>>(dst_width, dst_height, dst_image, dst_image_stride_in_bytes,
|
||||
dst_image_stride_in_bytes_comp, src_luma_image, src_luma_image_stride_in_bytes, src_chroma_image,
|
||||
src_chroma_image_stride_in_bytes, dst_width_comp, dst_height_comp, src_luma_image_stride_in_bytes_comp);
|
||||
}
|
||||
|
||||
__global__ void ColorConvertYUV400ToRGBKernel(uint32_t dst_width, uint32_t dst_height,
|
||||
uint8_t *dst_image, uint32_t dst_image_stride_in_bytes, uint32_t dst_image_stride_in_bytes_comp,
|
||||
const uint8_t *src_luma_image, uint32_t src_luma_image_stride_in_bytes,
|
||||
uint32_t dst_width_comp, uint32_t dst_height_comp, uint32_t src_luma_image_stride_in_bytes_comp) {
|
||||
|
||||
int32_t x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
|
||||
int32_t y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
|
||||
|
||||
if ((x < dst_width_comp) && (y < dst_height_comp)) {
|
||||
uint32_t src_y0_idx = y * src_luma_image_stride_in_bytes_comp + (x << 3);
|
||||
uint32_t src_y1_idx = src_y0_idx + src_luma_image_stride_in_bytes;
|
||||
|
||||
uint2 y0 = *((uint2 *)(&src_luma_image[src_y0_idx]));
|
||||
uint2 y1 = *((uint2 *)(&src_luma_image[src_y1_idx]));
|
||||
|
||||
uint32_t rgb0_idx = y * dst_image_stride_in_bytes_comp + (x * 24);
|
||||
uint32_t rgb1_idx = rgb0_idx + dst_image_stride_in_bytes;
|
||||
|
||||
DUINT6 rgb0, rgb1;
|
||||
|
||||
uint8_t y0_b0, y0_b1, y0_b2, y0_b3, y0_b4, y0_b5, y0_b6, y0_b7;
|
||||
uint8_t y1_b0, y1_b1, y1_b2, y1_b3, y1_b4, y1_b5, y1_b6, y1_b7;
|
||||
|
||||
y0_b0 = hipUnpack0(y0.x);
|
||||
y0_b1 = hipUnpack1(y0.x);
|
||||
y0_b2 = hipUnpack2(y0.x);
|
||||
y0_b3 = hipUnpack3(y0.x);
|
||||
y0_b4 = hipUnpack0(y0.y);
|
||||
y0_b5 = hipUnpack1(y0.y);
|
||||
y0_b6 = hipUnpack2(y0.y);
|
||||
y0_b7 = hipUnpack3(y0.y);
|
||||
|
||||
y1_b0 = hipUnpack0(y1.x);
|
||||
y1_b1 = hipUnpack1(y1.x);
|
||||
y1_b2 = hipUnpack2(y1.x);
|
||||
y1_b3 = hipUnpack3(y1.x);
|
||||
y1_b4 = hipUnpack0(y1.y);
|
||||
y1_b5 = hipUnpack1(y1.y);
|
||||
y1_b6 = hipUnpack2(y1.y);
|
||||
y1_b7 = hipUnpack3(y1.y);
|
||||
|
||||
rgb0.data[0] = hipPack(make_float4(y0_b0, y0_b0, y0_b0, y0_b1));
|
||||
rgb0.data[1] = hipPack(make_float4(y0_b1, y0_b1, y0_b2, y0_b2));
|
||||
rgb0.data[2] = hipPack(make_float4(y0_b2, y0_b3, y0_b3, y0_b3));
|
||||
rgb0.data[3] = hipPack(make_float4(y0_b4, y0_b4, y0_b4, y0_b5));
|
||||
rgb0.data[4] = hipPack(make_float4(y0_b5, y0_b5, y0_b6, y0_b6));
|
||||
rgb0.data[5] = hipPack(make_float4(y0_b6, y0_b7, y0_b7, y0_b7));
|
||||
|
||||
rgb1.data[0] = hipPack(make_float4(y1_b0, y1_b0, y1_b0, y1_b1));
|
||||
rgb1.data[1] = hipPack(make_float4(y1_b1, y1_b1, y1_b2, y1_b2));
|
||||
rgb1.data[2] = hipPack(make_float4(y1_b2, y1_b3, y1_b3, y1_b3));
|
||||
rgb1.data[3] = hipPack(make_float4(y1_b4, y1_b4, y1_b4, y1_b5));
|
||||
rgb1.data[4] = hipPack(make_float4(y1_b5, y1_b5, y1_b6, y1_b6));
|
||||
rgb1.data[5] = hipPack(make_float4(y1_b6, y1_b7, y1_b7, y1_b7));
|
||||
|
||||
*((DUINT6 *)(&dst_image[rgb0_idx])) = rgb0;
|
||||
*((DUINT6 *)(&dst_image[rgb1_idx])) = rgb1;
|
||||
}
|
||||
}
|
||||
|
||||
void ColorConvertYUV400ToRGB(hipStream_t stream, uint32_t dst_width, uint32_t dst_height,
|
||||
uint8_t *dst_image, uint32_t dst_image_stride_in_bytes,
|
||||
const uint8_t *src_luma_image, uint32_t src_luma_image_stride_in_bytes){
|
||||
|
||||
int32_t local_threads_x = 16;
|
||||
int32_t local_threads_y = 4;
|
||||
int32_t global_threads_x = (dst_width + 7) >> 3;
|
||||
int32_t global_threads_y = (dst_height + 1) >> 1;
|
||||
|
||||
uint32_t dst_width_comp = (dst_width + 7) / 8;
|
||||
uint32_t dst_height_comp = (dst_height + 1) / 2;
|
||||
uint32_t dst_image_stride_in_bytes_comp = dst_image_stride_in_bytes * 2;
|
||||
uint32_t src_luma_image_stride_in_bytes_comp = src_luma_image_stride_in_bytes * 2;
|
||||
|
||||
ColorConvertYUV400ToRGBKernel<<<dim3(ceil(static_cast<float>(global_threads_x) / local_threads_x), ceil(static_cast<float>(global_threads_y) / local_threads_y)),
|
||||
dim3(local_threads_x, local_threads_y), 0, stream>>>(dst_width, dst_height, dst_image, dst_image_stride_in_bytes,
|
||||
dst_image_stride_in_bytes_comp, src_luma_image, src_luma_image_stride_in_bytes, dst_width_comp, dst_height_comp,
|
||||
src_luma_image_stride_in_bytes_comp);
|
||||
|
||||
}
|
||||
|
||||
|
||||
__global__ void ConvertInterleavedUVToPlanarUVKernel(uint32_t dst_width, uint32_t dst_height,
|
||||
uint8_t *dst_image1, uint8_t *dst_image2, uint32_t dst_image_stride_in_bytes,
|
||||
const uint8_t *src_image, uint32_t src_image_stride_in_bytes) {
|
||||
|
||||
int32_t x = (hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x) * 8;
|
||||
int32_t y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
|
||||
|
||||
if (x >= dst_width || y >= dst_height) {
|
||||
return;
|
||||
}
|
||||
|
||||
uint32_t src_idx = y * src_image_stride_in_bytes + x + x;
|
||||
uint32_t dst_idx = y * dst_image_stride_in_bytes + x;
|
||||
|
||||
uint4 src = *((uint4 *)(&src_image[src_idx]));
|
||||
uint2 dst1, dst2;
|
||||
|
||||
dst1.x = hipPack(make_float4(hipUnpack0(src.x), hipUnpack2(src.x), hipUnpack0(src.y), hipUnpack2(src.y)));
|
||||
dst1.y = hipPack(make_float4(hipUnpack0(src.z), hipUnpack2(src.z), hipUnpack0(src.w), hipUnpack2(src.w)));
|
||||
dst2.x = hipPack(make_float4(hipUnpack1(src.x), hipUnpack3(src.x), hipUnpack1(src.y), hipUnpack3(src.y)));
|
||||
dst2.y = hipPack(make_float4(hipUnpack1(src.z), hipUnpack3(src.z), hipUnpack1(src.w), hipUnpack3(src.w)));
|
||||
|
||||
*((uint2 *)(&dst_image1[dst_idx])) = dst1;
|
||||
*((uint2 *)(&dst_image2[dst_idx])) = dst2;
|
||||
|
||||
}
|
||||
void ConvertInterleavedUVToPlanarUV(hipStream_t stream, uint32_t dst_width, uint32_t dst_height,
|
||||
uint8_t *dst_image1, uint8_t *dst_image2, uint32_t dst_image_stride_in_bytes,
|
||||
const uint8_t *src_image1, uint32_t src_image1_stride_in_bytes) {
|
||||
int32_t local_threads_x = 16, local_threads_y = 16;
|
||||
int32_t global_threads_x = (dst_width + 7) >> 3;
|
||||
int32_t global_threads_y = dst_height;
|
||||
|
||||
ConvertInterleavedUVToPlanarUVKernel<<<dim3(ceil(static_cast<float>(global_threads_x) / local_threads_x), ceil(static_cast<float>(global_threads_y) / local_threads_y)),
|
||||
dim3(local_threads_x, local_threads_y), 0, stream>>>(dst_width, dst_height, dst_image1, dst_image2,
|
||||
dst_image_stride_in_bytes, src_image1, src_image1_stride_in_bytes);
|
||||
|
||||
}
|
||||
|
||||
__global__ void ExtractYFromPackedYUYVKernel(uint32_t dst_width, uint32_t dst_height,
|
||||
uint8_t *destination_y, uint32_t dst_luma_stride_in_bytes,
|
||||
const uint8_t *src_image, uint32_t src_image_stride_in_bytes,
|
||||
uint32_t dst_width_comp) {
|
||||
|
||||
int32_t x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
|
||||
int32_t y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
|
||||
|
||||
if (x < dst_width_comp && y < dst_height) {
|
||||
uint32_t src_idx = y * src_image_stride_in_bytes + (x << 4);
|
||||
uint32_t dst_idx = y * dst_luma_stride_in_bytes + (x << 3);
|
||||
|
||||
uint4 src = *((uint4 *)(&src_image[src_idx]));
|
||||
uint2 dst_y;
|
||||
dst_y.x = hipPack(make_float4(hipUnpack0(src.x), hipUnpack2(src.x), hipUnpack0(src.y), hipUnpack2(src.y)));
|
||||
dst_y.y = hipPack(make_float4(hipUnpack0(src.z), hipUnpack2(src.z), hipUnpack0(src.w), hipUnpack2(src.w)));
|
||||
|
||||
*((uint2 *)(&destination_y[dst_idx])) = dst_y;
|
||||
}
|
||||
}
|
||||
|
||||
void ExtractYFromPackedYUYV(hipStream_t stream, uint32_t dst_width, uint32_t dst_height,
|
||||
uint8_t *destination_y, uint32_t dst_luma_stride_in_bytes, const uint8_t *src_image, uint32_t src_image_stride_in_bytes) {
|
||||
int32_t local_threads_x = 16;
|
||||
int32_t local_threads_y = 4;
|
||||
int32_t global_threads_x = (dst_width + 7) >> 3;
|
||||
int32_t global_threads_y = dst_height;
|
||||
|
||||
uint32_t dst_width_comp = (dst_width + 7) / 8;
|
||||
|
||||
ExtractYFromPackedYUYVKernel<<<dim3(ceil(static_cast<float>(global_threads_x) / local_threads_x), ceil(static_cast<float>(global_threads_y) / local_threads_y)),
|
||||
dim3(local_threads_x, local_threads_y), 0, stream>>>(dst_width, dst_height, destination_y,
|
||||
dst_luma_stride_in_bytes, src_image, src_image_stride_in_bytes, dst_width_comp);
|
||||
}
|
||||
|
||||
__global__ void ConvertPackedYUYVToPlanarYUVKernel(uint32_t dst_width, uint32_t dst_height,
|
||||
uint8_t *destination_y, uint8_t *destination_u, uint8_t *destination_v, uint32_t dst_luma_stride_in_bytes, uint32_t dst_chroma_stride_in_bytes,
|
||||
const uint8_t *src_image, uint32_t src_image_stride_in_bytes,
|
||||
uint32_t dst_width_comp) {
|
||||
|
||||
int32_t x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
|
||||
int32_t y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
|
||||
|
||||
if ((x < dst_width_comp && y < dst_height)) {
|
||||
uint32_t src_idx = y * src_image_stride_in_bytes + (x << 4);
|
||||
uint32_t dst_y_idx = y * dst_luma_stride_in_bytes + (x << 3);
|
||||
uint32_t dst_uv_idx = y * dst_chroma_stride_in_bytes + (x << 2);
|
||||
|
||||
uint4 src = *((uint4 *)(&src_image[src_idx]));
|
||||
uint2 dst_y;
|
||||
uint32_t dst_u, dst_v;
|
||||
|
||||
dst_y.x = hipPack(make_float4(hipUnpack0(src.x), hipUnpack2(src.x), hipUnpack0(src.y), hipUnpack2(src.y)));
|
||||
dst_y.y = hipPack(make_float4(hipUnpack0(src.z), hipUnpack2(src.z), hipUnpack0(src.w), hipUnpack2(src.w)));
|
||||
dst_u = hipPack(make_float4(hipUnpack1(src.x), hipUnpack1(src.y), hipUnpack1(src.z), hipUnpack1(src.w)));
|
||||
dst_v = hipPack(make_float4(hipUnpack3(src.x), hipUnpack3(src.y), hipUnpack3(src.z), hipUnpack3(src.w)));
|
||||
|
||||
*((uint2 *)(&destination_y[dst_y_idx])) = dst_y;
|
||||
*((uint32_t *)(&destination_u[dst_uv_idx])) = dst_u;
|
||||
*((uint32_t *)(&destination_v[dst_uv_idx])) = dst_v;
|
||||
}
|
||||
}
|
||||
|
||||
void ConvertPackedYUYVToPlanarYUV(hipStream_t stream, uint32_t dst_width, uint32_t dst_height,
|
||||
uint8_t *destination_y, uint8_t *destination_u, uint8_t *destination_v, uint32_t dst_luma_stride_in_bytes, uint32_t dst_chroma_stride_in_bytes,
|
||||
const uint8_t *src_image, uint32_t src_image_stride_in_bytes) {
|
||||
|
||||
int32_t local_threads_x = 16;
|
||||
int32_t local_threads_y = 4;
|
||||
int32_t global_threads_x = (dst_width + 7) >> 3;
|
||||
int32_t global_threads_y = dst_height;
|
||||
uint32_t dst_width_comp = (dst_width + 7) / 8;
|
||||
|
||||
ConvertPackedYUYVToPlanarYUVKernel<<<dim3(ceil(static_cast<float>(global_threads_x) / local_threads_x), ceil(static_cast<float>(global_threads_y) / local_threads_y)),
|
||||
dim3(local_threads_x, local_threads_y), 0, stream>>>(dst_width, dst_height, destination_y, destination_u,
|
||||
destination_v, dst_luma_stride_in_bytes, dst_chroma_stride_in_bytes, src_image, src_image_stride_in_bytes, dst_width_comp);
|
||||
}
|
||||
@@ -0,0 +1,62 @@
|
||||
/*
|
||||
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef ROC_JPEG_HIP_KERNELS_H_
|
||||
#define ROC_JPEG_HIP_KERNELS_H_
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
void ColorConvertYUV444ToRGB(hipStream_t stream, uint32_t dst_width, uint32_t dst_height,
|
||||
uint8_t *dst_image, uint32_t dst_image_stride_in_bytes, const uint8_t *src_yuv_image,
|
||||
uint32_t src_yuv_image_stride_in_bytes, uint32_t src_u_image_offset);
|
||||
|
||||
void ColorConvertYUYVToRGB(hipStream_t stream, uint32_t dst_width, uint32_t dst_height,
|
||||
uint8_t *dst_image, uint32_t dst_image_stride_in_bytes,
|
||||
const uint8_t *src_image, uint32_t src_image_stride_in_bytes);
|
||||
|
||||
void ColorConvertNV12ToRGB(hipStream_t stream, uint32_t dst_width, uint32_t dst_height,
|
||||
uint8_t *dst_image, uint32_t dst_image_stride_in_bytes,
|
||||
const uint8_t *src_luma_image, uint32_t src_luma_image_stride_in_bytes,
|
||||
const uint8_t *src_chroma_image, uint32_t src_chroma_image_stride_in_bytes);
|
||||
|
||||
void ColorConvertYUV400ToRGB(hipStream_t stream, uint32_t dst_width, uint32_t dst_height,
|
||||
uint8_t *dst_image, uint32_t dst_image_stride_in_bytes,
|
||||
const uint8_t *src_luma_image, uint32_t src_luma_image_stride_in_bytes);
|
||||
|
||||
void ConvertInterleavedUVToPlanarUV(hipStream_t stream, uint32_t dst_width, uint32_t dst_height,
|
||||
uint8_t *dst_image1, uint8_t *dst_image2, uint32_t dst_image_stride_in_bytes,
|
||||
const uint8_t *src_image1, uint32_t src_image1_stride_in_bytes);
|
||||
|
||||
void ExtractYFromPackedYUYV(hipStream_t stream, uint32_t dst_width, uint32_t dst_height,
|
||||
uint8_t *destination_y, uint32_t dst_luma_stride_in_bytes, const uint8_t *src_image, uint32_t src_image_stride_in_bytes);
|
||||
|
||||
void ConvertPackedYUYVToPlanarYUV(hipStream_t stream, uint32_t dst_width, uint32_t dst_height,
|
||||
uint8_t *destination_y, uint8_t *destination_u, uint8_t *destination_v, uint32_t dst_luma_stride_in_bytes,
|
||||
uint32_t dst_chroma_stride_in_bytes, const uint8_t *src_image, uint32_t src_image_stride_in_bytes);
|
||||
|
||||
typedef struct UINT6TYPE {
|
||||
uint data[6];
|
||||
} DUINT6;
|
||||
|
||||
#endif //ROC_JPEG_HIP_KERNELS_H_
|
||||
@@ -0,0 +1,387 @@
|
||||
/*
|
||||
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#include "rocjpeg_parser.h"
|
||||
|
||||
JpegParser::JpegParser() : stream_{nullptr}, stream_end_{nullptr}, stream_length_{0},
|
||||
jpeg_stream_parameters_{{}} {
|
||||
}
|
||||
|
||||
JpegParser::~JpegParser() {
|
||||
stream_ = nullptr;
|
||||
stream_end_ = nullptr;
|
||||
stream_length_ = 0;
|
||||
}
|
||||
|
||||
bool JpegParser::ParseJpegStream(const uint8_t *jpeg_stream, uint32_t jpeg_stream_size) {
|
||||
if (jpeg_stream == nullptr) {
|
||||
ERR("invalid argument!");
|
||||
return false;
|
||||
}
|
||||
|
||||
stream_ = jpeg_stream;
|
||||
stream_length_ = jpeg_stream_size;
|
||||
stream_end_ = stream_ + stream_length_;
|
||||
|
||||
jpeg_stream_parameters_ = {};
|
||||
bool soi_marker_found = false;
|
||||
bool sos_marker_found = false;
|
||||
bool dht_marker_found = false;
|
||||
bool dqt_marker_found = false;
|
||||
uint8_t marker;
|
||||
const uint8_t *next_chunck;
|
||||
int32_t chuck_len;
|
||||
|
||||
// The first two bytes of a JPEG must be 0XFFD8
|
||||
if (*stream_ != 0xFF || *(stream_ + 1) != SOI) {
|
||||
ERR("Invalid JPEG!");
|
||||
return false;
|
||||
}
|
||||
|
||||
soi_marker_found = ParseSOI();
|
||||
if (!soi_marker_found) {
|
||||
ERR("failed to find the SOI marker!");
|
||||
}
|
||||
|
||||
while (!sos_marker_found && stream_ <= stream_end_) {
|
||||
while ((*stream_ == 0xFF))
|
||||
stream_++;
|
||||
marker = *stream_++;
|
||||
chuck_len = swap_bytes(stream_);
|
||||
next_chunck = stream_ + chuck_len;
|
||||
|
||||
switch (marker) {
|
||||
case SOF:
|
||||
if (!ParseSOF())
|
||||
return false;
|
||||
break;
|
||||
case DHT:
|
||||
if (!ParseDHT())
|
||||
return false;
|
||||
dht_marker_found = true;
|
||||
break;
|
||||
case DQT:
|
||||
if (!ParseDQT())
|
||||
return false;
|
||||
dqt_marker_found = true;
|
||||
break;
|
||||
case DRI:
|
||||
if (!ParseDRI())
|
||||
return false;
|
||||
break;
|
||||
case SOS:
|
||||
if (!ParseSOS())
|
||||
return false;
|
||||
sos_marker_found = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
stream_ = next_chunck;
|
||||
}
|
||||
|
||||
if (!dht_marker_found) {
|
||||
ERR("didn't find any Huffman table!");
|
||||
return false;
|
||||
}
|
||||
if (!dqt_marker_found) {
|
||||
ERR("didn't find any quantization table!");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!ParseEOI())
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool JpegParser::ParseSOI() {
|
||||
if (stream_ == nullptr) {
|
||||
return false;
|
||||
}
|
||||
while (!(*stream_ == 0xFF && *(stream_ + 1) == SOI)) {
|
||||
if (stream_ <= stream_end_) {
|
||||
stream_++;
|
||||
continue;
|
||||
} else
|
||||
return false;
|
||||
}
|
||||
stream_ += 2;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool JpegParser::ParseSOF() {
|
||||
uint32_t component_id, sampling_factor;
|
||||
uint8_t quantiser_table_selector;
|
||||
|
||||
if (stream_ == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
jpeg_stream_parameters_.picture_parameter_buffer.picture_height = swap_bytes(stream_ + 3);
|
||||
jpeg_stream_parameters_.picture_parameter_buffer.picture_width = swap_bytes(stream_ + 5);
|
||||
jpeg_stream_parameters_.picture_parameter_buffer.num_components = stream_[7];
|
||||
|
||||
if (jpeg_stream_parameters_.picture_parameter_buffer.num_components > NUM_COMPONENTS - 1) {
|
||||
ERR("invalid number of JPEG components!");
|
||||
return false;
|
||||
}
|
||||
|
||||
stream_ += 8;
|
||||
|
||||
for (int32_t i = 0; i < jpeg_stream_parameters_.picture_parameter_buffer.num_components; i++) {
|
||||
component_id = *stream_++;
|
||||
sampling_factor = *stream_++;
|
||||
quantiser_table_selector = *stream_++;
|
||||
|
||||
jpeg_stream_parameters_.picture_parameter_buffer.components[i].component_id = component_id;
|
||||
if (quantiser_table_selector >= NUM_COMPONENTS) {
|
||||
ERR("invalid number of the quantization table!");
|
||||
return false;
|
||||
}
|
||||
jpeg_stream_parameters_.picture_parameter_buffer.components[i].v_sampling_factor = sampling_factor & 0xF;
|
||||
jpeg_stream_parameters_.picture_parameter_buffer.components[i].h_sampling_factor = sampling_factor >> 4;
|
||||
jpeg_stream_parameters_.picture_parameter_buffer.components[i].quantiser_table_selector = quantiser_table_selector;
|
||||
}
|
||||
|
||||
uint8_t max_h_factor = jpeg_stream_parameters_.picture_parameter_buffer.components[0].h_sampling_factor;
|
||||
uint8_t max_v_factor = jpeg_stream_parameters_.picture_parameter_buffer.components[0].v_sampling_factor;
|
||||
|
||||
jpeg_stream_parameters_.slice_parameter_buffer.num_mcus = ((jpeg_stream_parameters_.picture_parameter_buffer.picture_width + max_h_factor * 8 - 1) / (max_h_factor * 8)) *
|
||||
((jpeg_stream_parameters_.picture_parameter_buffer.picture_height + max_v_factor * 8 - 1) / (max_v_factor * 8));
|
||||
|
||||
jpeg_stream_parameters_.chroma_subsampling = GetChromaSubsampling(jpeg_stream_parameters_.picture_parameter_buffer.components[0].h_sampling_factor,
|
||||
jpeg_stream_parameters_.picture_parameter_buffer.components[1].h_sampling_factor,
|
||||
jpeg_stream_parameters_.picture_parameter_buffer.components[2].h_sampling_factor,
|
||||
jpeg_stream_parameters_.picture_parameter_buffer.components[0].v_sampling_factor,
|
||||
jpeg_stream_parameters_.picture_parameter_buffer.components[1].v_sampling_factor,
|
||||
jpeg_stream_parameters_.picture_parameter_buffer.components[2].v_sampling_factor);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool JpegParser::ParseDQT() {
|
||||
int32_t quantization_table_index = 0;
|
||||
const uint8_t *dqt_block_end;
|
||||
|
||||
if (stream_ == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
dqt_block_end = stream_ + swap_bytes(stream_);
|
||||
stream_ += 2;
|
||||
|
||||
while (stream_ < dqt_block_end) {
|
||||
quantization_table_index = *stream_++;
|
||||
if (quantization_table_index >> 4) {
|
||||
ERR("16 bits quantization table is not supported!");
|
||||
return false;
|
||||
}
|
||||
if (quantization_table_index >= 4) {
|
||||
ERR("invalid number of quantization table!");
|
||||
return false;
|
||||
}
|
||||
|
||||
std::memcpy(jpeg_stream_parameters_.quantization_matrix_buffer.quantiser_table[quantization_table_index & 0x0F], stream_, 64);
|
||||
jpeg_stream_parameters_.quantization_matrix_buffer.load_quantiser_table[quantization_table_index & 0x0F] = 1;
|
||||
|
||||
stream_ += 64;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool JpegParser::ParseDHT() {
|
||||
uint32_t count, i;
|
||||
int32_t length, index;
|
||||
uint8_t ac_huffman_table, huffman_table_id;
|
||||
|
||||
if (stream_ == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
length = swap_bytes(stream_) - 2;
|
||||
stream_ += 2;
|
||||
|
||||
while (length > 0) {
|
||||
index = *stream_++;
|
||||
|
||||
ac_huffman_table = index & 0xF0;
|
||||
huffman_table_id = index & 0x0F;
|
||||
|
||||
if (huffman_table_id >= HUFFMAN_TABLES) {
|
||||
ERR("invlaid number of Huffman table!");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (ac_huffman_table) {
|
||||
std::memcpy(jpeg_stream_parameters_.huffman_table_buffer.huffman_table[huffman_table_id].num_ac_codes, stream_, 16);
|
||||
} else {
|
||||
std::memcpy(jpeg_stream_parameters_.huffman_table_buffer.huffman_table[huffman_table_id].num_dc_codes, stream_, 16);
|
||||
}
|
||||
|
||||
count = 0;
|
||||
for (i = 0; i < 16; i++) {
|
||||
count += *stream_++;
|
||||
}
|
||||
|
||||
if (ac_huffman_table) {
|
||||
if (count > AC_HUFFMAN_TABLE_VALUES_SIZE) {
|
||||
ERR("invalid AC Huffman table!");
|
||||
return false;
|
||||
}
|
||||
std::memcpy(jpeg_stream_parameters_.huffman_table_buffer.huffman_table[huffman_table_id].ac_values, stream_, count);
|
||||
jpeg_stream_parameters_.huffman_table_buffer.load_huffman_table[huffman_table_id] = 1;
|
||||
} else {
|
||||
if (count > DC_HUFFMAN_TABLE_VALUES_SIZE) {
|
||||
ERR("invlaid DC Huffman table!")
|
||||
return false;
|
||||
}
|
||||
std::memcpy(jpeg_stream_parameters_.huffman_table_buffer.huffman_table[huffman_table_id].dc_values, stream_, count);
|
||||
jpeg_stream_parameters_.huffman_table_buffer.load_huffman_table[huffman_table_id] = 1;
|
||||
}
|
||||
|
||||
length -= 1;
|
||||
length -= 16;
|
||||
length -= count;
|
||||
stream_ += count;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool JpegParser::ParseSOS() {
|
||||
uint32_t component_id, table;
|
||||
|
||||
if (stream_ == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
uint32_t num_components = stream_[2];
|
||||
|
||||
if (num_components > NUM_COMPONENTS - 1) {
|
||||
ERR("invalid number of component!")
|
||||
return false;
|
||||
}
|
||||
jpeg_stream_parameters_.slice_parameter_buffer.num_components = num_components;
|
||||
|
||||
stream_ += 3;
|
||||
for (int32_t i = 0; i < num_components; i++) {
|
||||
component_id = *stream_++;
|
||||
table = *stream_++;
|
||||
jpeg_stream_parameters_.slice_parameter_buffer.components[i].component_selector = component_id;
|
||||
jpeg_stream_parameters_.slice_parameter_buffer.components[i].dc_table_selector = ((table >> 4) & 0x0F);
|
||||
jpeg_stream_parameters_.slice_parameter_buffer.components[i].ac_table_selector = (table & 0x0F);
|
||||
|
||||
if ((table & 0xF) >= 4) {
|
||||
ERR("invalid number of AC Huffman table!");
|
||||
return false;
|
||||
}
|
||||
if ((table >> 4) >= 4) {
|
||||
ERR("invalid number of DC Huffman table!");
|
||||
return false;
|
||||
}
|
||||
if (component_id != jpeg_stream_parameters_.picture_parameter_buffer.components[i].component_id) {
|
||||
ERR("component id mismatch between SOS and SOF marker!");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
stream_ += 3;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool JpegParser::ParseDRI() {
|
||||
uint32_t length;
|
||||
|
||||
if (stream_ == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
length = swap_bytes(stream_);
|
||||
if (length != 4) {
|
||||
ERR("invalid size for DRI marker");
|
||||
return false;
|
||||
}
|
||||
|
||||
jpeg_stream_parameters_.slice_parameter_buffer.restart_interval = swap_bytes(stream_ + 2);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool JpegParser::ParseEOI() {
|
||||
|
||||
if (stream_ == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const uint8_t *stream_temp = stream_;
|
||||
while (stream_temp <= stream_end_ && !(*stream_temp == 0xFF && *(stream_temp + 1) == EOI)) {
|
||||
stream_temp++;
|
||||
continue;
|
||||
}
|
||||
|
||||
jpeg_stream_parameters_.slice_parameter_buffer.slice_data_size = stream_temp - stream_;
|
||||
jpeg_stream_parameters_.slice_data_buffer = stream_;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
ChromaSubsampling JpegParser::GetChromaSubsampling(uint8_t c1_h_sampling_factor, uint8_t c2_h_sampling_factor, uint8_t c3_h_sampling_factor,
|
||||
uint8_t c1_v_sampling_factor, uint8_t c2_v_sampling_factor, uint8_t c3_v_sampling_factor) {
|
||||
|
||||
ChromaSubsampling subsampling;
|
||||
|
||||
if ((c1_h_sampling_factor == 1 && c2_h_sampling_factor == 1 && c3_h_sampling_factor == 1 &&
|
||||
c1_v_sampling_factor == 1 && c2_v_sampling_factor == 1 && c3_v_sampling_factor == 1) ||
|
||||
(c1_h_sampling_factor == 2 && c2_h_sampling_factor == 2 && c3_h_sampling_factor == 2 &&
|
||||
c1_v_sampling_factor == 2 && c2_v_sampling_factor == 2 && c3_v_sampling_factor == 2) ||
|
||||
(c1_h_sampling_factor == 4 && c2_h_sampling_factor == 4 && c3_h_sampling_factor == 4 &&
|
||||
c1_v_sampling_factor == 4 && c2_v_sampling_factor == 4 && c3_v_sampling_factor == 4)) {
|
||||
subsampling = CSS_444;
|
||||
} else if (c1_h_sampling_factor == 1 && c2_h_sampling_factor == 1 && c3_h_sampling_factor == 1 &&
|
||||
c1_v_sampling_factor == 2 && c2_v_sampling_factor == 1 && c3_v_sampling_factor == 1) {
|
||||
subsampling = CSS_440;
|
||||
} else if ((c1_h_sampling_factor == 2 && c2_h_sampling_factor == 1 && c3_h_sampling_factor == 1 &&
|
||||
c1_v_sampling_factor == 1 && c2_v_sampling_factor == 1 && c3_v_sampling_factor == 1) ||
|
||||
(c1_h_sampling_factor == 2 && c2_h_sampling_factor == 1 && c3_h_sampling_factor == 1 &&
|
||||
c1_v_sampling_factor == 2 && c2_v_sampling_factor == 2 && c3_v_sampling_factor == 2) ||
|
||||
(c1_h_sampling_factor == 2 && c2_h_sampling_factor == 2 && c3_h_sampling_factor == 2 &&
|
||||
c1_v_sampling_factor == 2 && c2_v_sampling_factor == 1 && c3_v_sampling_factor == 1)) {
|
||||
subsampling = CSS_422;
|
||||
} else if (c1_h_sampling_factor == 2 && c2_h_sampling_factor == 1 && c3_h_sampling_factor == 1 &&
|
||||
c1_v_sampling_factor == 2 && c2_v_sampling_factor == 1 && c3_v_sampling_factor == 1) {
|
||||
subsampling = CSS_420;
|
||||
} else if (c1_h_sampling_factor == 4 && c2_h_sampling_factor == 1 && c3_h_sampling_factor == 1 &&
|
||||
c1_v_sampling_factor == 1 && c2_v_sampling_factor == 1 && c3_v_sampling_factor == 1) {
|
||||
subsampling = CSS_411;
|
||||
} else if ((c1_h_sampling_factor == 1 && c2_h_sampling_factor == 0 && c3_h_sampling_factor == 0 &&
|
||||
c1_v_sampling_factor == 1 && c2_v_sampling_factor == 0 && c3_v_sampling_factor == 0) ||
|
||||
(c1_h_sampling_factor == 4 && c2_h_sampling_factor == 0 && c3_h_sampling_factor == 0 &&
|
||||
c1_v_sampling_factor == 4 && c2_v_sampling_factor == 0 && c3_v_sampling_factor == 0)) {
|
||||
subsampling = CSS_400;
|
||||
} else {
|
||||
subsampling = CSS_UNKNOWN;
|
||||
}
|
||||
|
||||
return subsampling;
|
||||
}
|
||||
@@ -0,0 +1,177 @@
|
||||
/*
|
||||
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef ROC_JPEG_PARSER_H_
|
||||
#define ROC_JPEG_PARSER_H_
|
||||
|
||||
#include <stdint.h>
|
||||
#include <iostream>
|
||||
#include <cstring>
|
||||
#include "rocjpeg_commons.h"
|
||||
|
||||
#pragma once
|
||||
|
||||
#define NUM_COMPONENTS 4
|
||||
#define HUFFMAN_TABLES 2
|
||||
#define AC_HUFFMAN_TABLE_VALUES_SIZE 162
|
||||
#define DC_HUFFMAN_TABLE_VALUES_SIZE 12
|
||||
#define swap_bytes(x) (((x)[0] << 8) | (x)[1])
|
||||
|
||||
/***************************************************************/
|
||||
//! \enum enum JpegMarker
|
||||
//! common JPEG markers
|
||||
/***************************************************************/
|
||||
enum JpegMarkers {
|
||||
SOI = 0xD8, /* Start Of Image */
|
||||
SOF = 0xC0, /* Start Of Frame for a baseline DCT-based JPEG. */
|
||||
DHT = 0xC4, /* Define Huffman Table */
|
||||
DQT = 0xDB, /* Define Quantization Table */
|
||||
DRI = 0xDD, /* Define Restart Interval */
|
||||
SOS = 0xDA, /* Start of Scan */
|
||||
EOI = 0xD9, /* End Of Image */
|
||||
};
|
||||
|
||||
/***************************************************************/
|
||||
//! \struct Picture parameter for JPEG decoding.
|
||||
//! This structure holds information from the frame
|
||||
//! header and definitions from additional segments.
|
||||
/**************************************************************/
|
||||
typedef struct PictureParameterBufferType {
|
||||
uint16_t picture_width;
|
||||
uint16_t picture_height;
|
||||
struct {
|
||||
uint8_t component_id;
|
||||
uint8_t h_sampling_factor;
|
||||
uint8_t v_sampling_factor;
|
||||
uint8_t quantiser_table_selector;
|
||||
} components[255];
|
||||
uint8_t num_components;
|
||||
uint8_t color_space;
|
||||
uint32_t rotation;
|
||||
uint32_t reserved[7];
|
||||
} PictureParameterBuffer;
|
||||
|
||||
/***************************************************************/
|
||||
//! \struct Quantization table for JPEG decoding.
|
||||
//! This structure holds the quantization tables.
|
||||
//! The maximum number of quatization tables is four.
|
||||
//! The #load_quantization_table array can be used as a hint to notify
|
||||
//! which table(s) has valid values.
|
||||
//! The #quantiser_table values are specified in zig-zag scan order.
|
||||
/***************************************************************/
|
||||
typedef struct QuantizationMatrixBufferType {
|
||||
uint8_t load_quantiser_table[4];
|
||||
uint8_t quantiser_table[4][64];
|
||||
uint32_t reserved[4];
|
||||
} QuantizationMatrixBuffer;
|
||||
|
||||
/***************************************************************/
|
||||
//! \struct Huffman table for JPEG decoding.
|
||||
//! This structure holds the Huffman tables.
|
||||
//! The maximum number of Huffman tables is two.
|
||||
//! The #load_huffman_table array can be used as a hint to notify the
|
||||
//! which table(s) has valid values.
|
||||
/***************************************************************/
|
||||
typedef struct HuffmanTableBufferType {
|
||||
uint8_t load_huffman_table[2];
|
||||
struct {
|
||||
uint8_t num_dc_codes[16];
|
||||
uint8_t dc_values[12];
|
||||
uint8_t num_ac_codes[16];
|
||||
uint8_t ac_values[162];
|
||||
uint8_t pad[2];
|
||||
} huffman_table[2];
|
||||
uint32_t reserved[4];
|
||||
} HuffmanTableBuffer;
|
||||
|
||||
/***************************************************************/
|
||||
//! \struct Slice parameter for JPEG decoding.
|
||||
//! This structure holds information from the scan header, and
|
||||
//! definitions from additional segments.
|
||||
/***************************************************************/
|
||||
typedef struct SliceParameterBufferType {
|
||||
uint32_t slice_data_size;
|
||||
uint32_t slice_data_offset;
|
||||
uint32_t slice_data_flag;
|
||||
uint32_t slice_horizontal_position;
|
||||
uint32_t slice_vertical_position;
|
||||
struct {
|
||||
uint8_t component_selector;
|
||||
uint8_t dc_table_selector;
|
||||
uint8_t ac_table_selector;
|
||||
} components[4];
|
||||
uint8_t num_components;
|
||||
uint16_t restart_interval;
|
||||
uint32_t num_mcus;
|
||||
uint32_t reserved[4];
|
||||
} SliceParameterBuffer;
|
||||
|
||||
/***************************************************************/
|
||||
//! \struct Enum identifies image chroma subsampling values stored inside JPEG input stream
|
||||
/***************************************************************/
|
||||
typedef enum {
|
||||
CSS_444 = 0,
|
||||
CSS_440 = 1,
|
||||
CSS_422 = 2,
|
||||
CSS_420 = 3,
|
||||
CSS_411 = 4,
|
||||
CSS_400 = 5,
|
||||
CSS_UNKNOWN = -1
|
||||
} ChromaSubsampling;
|
||||
|
||||
/***************************************************************/
|
||||
//! \struct Jpeg stream parameters.
|
||||
//! This structure holds all information from a JPEG stream for decoding
|
||||
/***************************************************************/
|
||||
typedef struct JpegParameterBuffersType {
|
||||
PictureParameterBuffer picture_parameter_buffer;
|
||||
QuantizationMatrixBuffer quantization_matrix_buffer;
|
||||
HuffmanTableBuffer huffman_table_buffer;
|
||||
SliceParameterBuffer slice_parameter_buffer;
|
||||
ChromaSubsampling chroma_subsampling;
|
||||
const uint8_t* slice_data_buffer;
|
||||
} JpegStreamParameters;
|
||||
|
||||
class JpegParser {
|
||||
public:
|
||||
JpegParser();
|
||||
~JpegParser();
|
||||
bool ParseJpegStream(const uint8_t* jpeg_stream, uint32_t jpeg_stream_size);
|
||||
const JpegStreamParameters* GetJpegStreamParameters() const {return &jpeg_stream_parameters_;};
|
||||
private:
|
||||
bool ParseSOI();
|
||||
bool ParseSOF();
|
||||
bool ParseDQT();
|
||||
bool ParseSOS();
|
||||
bool ParseDHT();
|
||||
bool ParseDRI();
|
||||
bool ParseEOI();
|
||||
ChromaSubsampling GetChromaSubsampling(uint8_t c1_h_sampling_factor, uint8_t c2_h_sampling_factor, uint8_t c3_h_sampling_factor,
|
||||
uint8_t c1_v_sampling_factor, uint8_t c2_v_sampling_factor, uint8_t c3_v_sampling_factor);
|
||||
const uint8_t *stream_;
|
||||
const uint8_t *stream_end_;
|
||||
uint32_t stream_length_;
|
||||
JpegStreamParameters jpeg_stream_parameters_;
|
||||
};
|
||||
|
||||
#endif // ROC_JPEG_PARSER_H_
|
||||
@@ -0,0 +1,301 @@
|
||||
/*
|
||||
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "rocjpeg_vaapi_decoder.h"
|
||||
|
||||
RocJpegVappiDecoder::RocJpegVappiDecoder(int device_id) : device_id_{device_id}, drm_fd_{-1}, min_picture_width_{64}, min_picture_height_{64},
|
||||
max_picture_width_{4096}, max_picture_height_{4096}, va_display_{0}, va_config_attrib_{{}}, va_config_id_{0}, va_profile_{VAProfileJPEGBaseline},
|
||||
va_context_id_{0}, va_surface_ids_{}, va_picture_parameter_buf_id_{0}, va_quantization_matrix_buf_id_{0}, va_huffmantable_buf_id_{0},
|
||||
va_slice_param_buf_id_{0}, va_slice_data_buf_id_{0} {};
|
||||
|
||||
RocJpegVappiDecoder::~RocJpegVappiDecoder() {
|
||||
if (drm_fd_ != -1) {
|
||||
close(drm_fd_);
|
||||
}
|
||||
if (va_display_) {
|
||||
RocJpegStatus rocjpeg_status = DestroyDataBuffers();
|
||||
if (rocjpeg_status != ROCJPEG_STATUS_SUCCESS) {
|
||||
ERR("Error: Failed to destroy VAAPI buffer");
|
||||
}
|
||||
VAStatus va_status;
|
||||
if (va_surface_ids_.size() > 0) {
|
||||
va_status = vaDestroySurfaces(va_display_, va_surface_ids_.data(), va_surface_ids_.size());
|
||||
if (va_status != VA_STATUS_SUCCESS) {
|
||||
ERR("ERROR: vaDestroySurfaces failed!");
|
||||
}
|
||||
}
|
||||
if (va_context_id_) {
|
||||
va_status = vaDestroyContext(va_display_, va_context_id_);
|
||||
if (va_status != VA_STATUS_SUCCESS) {
|
||||
ERR("ERROR: vaDestroyContext failed!");
|
||||
}
|
||||
}
|
||||
if (va_config_id_) {
|
||||
va_status = vaDestroyConfig(va_display_, va_config_id_);
|
||||
if (va_status != VA_STATUS_SUCCESS) {
|
||||
ERR("ERROR: vaDestroyConfig failed!");
|
||||
}
|
||||
}
|
||||
va_status = vaTerminate(va_display_);
|
||||
if (va_status != VA_STATUS_SUCCESS) {
|
||||
ERR("ERROR: vaTerminate failed!");
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
RocJpegStatus RocJpegVappiDecoder::InitializeDecoder(std::string gcn_arch_name) {
|
||||
// There are 8 renderDXXX per physical device on gfx940, gfx941, and gfx942
|
||||
int num_render_cards_per_device = ((gcn_arch_name.compare("gfx940") == 0) ||
|
||||
(gcn_arch_name.compare("gfx941") == 0) ||
|
||||
(gcn_arch_name.compare("gfx942") == 0)) ? 8 : 1;
|
||||
std::string drm_node = "/dev/dri/renderD" + std::to_string(128 + device_id_ * num_render_cards_per_device);
|
||||
CHECK_ROCJPEG(InitVAAPI(drm_node));
|
||||
CHECK_ROCJPEG(CreateDecoderConfig());
|
||||
|
||||
return ROCJPEG_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
RocJpegStatus RocJpegVappiDecoder::InitVAAPI(std::string drm_node) {
|
||||
drm_fd_ = open(drm_node.c_str(), O_RDWR);
|
||||
if (drm_fd_ < 0) {
|
||||
ERR("ERROR: failed to open drm node " + drm_node);
|
||||
return ROCJPEG_STATUS_NOT_INITIALIZED;
|
||||
}
|
||||
va_display_ = vaGetDisplayDRM(drm_fd_);
|
||||
if (!va_display_) {
|
||||
ERR("ERROR: failed to create va_display!");
|
||||
return ROCJPEG_STATUS_NOT_INITIALIZED;
|
||||
}
|
||||
vaSetInfoCallback(va_display_, NULL, NULL);
|
||||
int major_version = 0, minor_version = 0;
|
||||
CHECK_VAAPI(vaInitialize(va_display_, &major_version, &minor_version))
|
||||
return ROCJPEG_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
RocJpegStatus RocJpegVappiDecoder::CreateDecoderConfig() {
|
||||
int max_num_entrypoints = vaMaxNumEntrypoints(va_display_);
|
||||
std::vector<VAEntrypoint> jpeg_entrypoint_list;
|
||||
jpeg_entrypoint_list.resize(max_num_entrypoints);
|
||||
int num_entrypoints = 0;
|
||||
CHECK_VAAPI(vaQueryConfigEntrypoints(va_display_, va_profile_, jpeg_entrypoint_list.data(), &num_entrypoints));
|
||||
bool hw_jpeg_decoder_supported = false;
|
||||
if (num_entrypoints > 0) {
|
||||
for (auto entry_point : jpeg_entrypoint_list) {
|
||||
if (entry_point == VAEntrypointVLD) {
|
||||
hw_jpeg_decoder_supported = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return ROCJPEG_STATUS_HW_JPEG_DECODER_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
if (hw_jpeg_decoder_supported) {
|
||||
va_config_attrib_.resize(3);
|
||||
va_config_attrib_[0].type = VAConfigAttribRTFormat;
|
||||
va_config_attrib_[1].type = VAConfigAttribMaxPictureWidth;
|
||||
va_config_attrib_[2].type = VAConfigAttribMaxPictureHeight;
|
||||
CHECK_VAAPI(vaGetConfigAttributes(va_display_, va_profile_, VAEntrypointVLD, va_config_attrib_.data(), va_config_attrib_.size()));
|
||||
CHECK_VAAPI(vaCreateConfig(va_display_, va_profile_, VAEntrypointVLD, &va_config_attrib_[0], 1, &va_config_id_));
|
||||
if (va_config_attrib_[1].value != VA_ATTRIB_NOT_SUPPORTED) {
|
||||
max_picture_width_ = va_config_attrib_[1].value;
|
||||
}
|
||||
if (va_config_attrib_[2].value != VA_ATTRIB_NOT_SUPPORTED) {
|
||||
max_picture_height_ = va_config_attrib_[2].value;
|
||||
}
|
||||
return ROCJPEG_STATUS_SUCCESS;
|
||||
} else {
|
||||
return ROCJPEG_STATUS_HW_JPEG_DECODER_NOT_SUPPORTED;
|
||||
}
|
||||
}
|
||||
|
||||
RocJpegStatus RocJpegVappiDecoder::DestroyDataBuffers() {
|
||||
if (va_picture_parameter_buf_id_) {
|
||||
CHECK_VAAPI(vaDestroyBuffer(va_display_, va_picture_parameter_buf_id_));
|
||||
va_picture_parameter_buf_id_ = 0;
|
||||
}
|
||||
if (va_quantization_matrix_buf_id_) {
|
||||
CHECK_VAAPI(vaDestroyBuffer(va_display_, va_quantization_matrix_buf_id_));
|
||||
va_quantization_matrix_buf_id_ = 0;
|
||||
}
|
||||
if (va_huffmantable_buf_id_) {
|
||||
CHECK_VAAPI(vaDestroyBuffer(va_display_, va_huffmantable_buf_id_));
|
||||
va_huffmantable_buf_id_ = 0;
|
||||
}
|
||||
if (va_slice_param_buf_id_) {
|
||||
CHECK_VAAPI(vaDestroyBuffer(va_display_, va_slice_param_buf_id_));
|
||||
va_slice_param_buf_id_ = 0;
|
||||
}
|
||||
if (va_slice_data_buf_id_) {
|
||||
CHECK_VAAPI(vaDestroyBuffer(va_display_, va_slice_data_buf_id_));
|
||||
va_slice_data_buf_id_ = 0;
|
||||
}
|
||||
return ROCJPEG_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
RocJpegStatus RocJpegVappiDecoder::SubmitDecode(const JpegStreamParameters *jpeg_stream_params, uint32_t &surface_id) {
|
||||
if (jpeg_stream_params == nullptr) {
|
||||
return ROCJPEG_STATUS_INVALID_PARAMETER;
|
||||
}
|
||||
|
||||
if (sizeof(jpeg_stream_params->picture_parameter_buffer) != sizeof(VAPictureParameterBufferJPEGBaseline) ||
|
||||
sizeof(jpeg_stream_params->quantization_matrix_buffer) != sizeof(VAIQMatrixBufferJPEGBaseline) ||
|
||||
sizeof(jpeg_stream_params->huffman_table_buffer) != sizeof(VAHuffmanTableBufferJPEGBaseline) ||
|
||||
sizeof(jpeg_stream_params->slice_parameter_buffer) != sizeof(VASliceParameterBufferJPEGBaseline)) {
|
||||
return ROCJPEG_STATUS_INVALID_PARAMETER;
|
||||
}
|
||||
|
||||
if (jpeg_stream_params->picture_parameter_buffer.picture_width < min_picture_width_ ||
|
||||
jpeg_stream_params->picture_parameter_buffer.picture_height < min_picture_height_ ||
|
||||
jpeg_stream_params->picture_parameter_buffer.picture_width > max_picture_width_ ||
|
||||
jpeg_stream_params->picture_parameter_buffer.picture_height > max_picture_height_) {
|
||||
ERR("The JPEG image resolution is not supported!");
|
||||
return ROCJPEG_STATUS_JPEG_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
uint8_t surface_format;
|
||||
switch (jpeg_stream_params->chroma_subsampling) {
|
||||
case CSS_444:
|
||||
surface_format = VA_RT_FORMAT_YUV444;
|
||||
break;
|
||||
case CSS_422:
|
||||
surface_format = VA_RT_FORMAT_YUV422;
|
||||
break;
|
||||
case CSS_420:
|
||||
surface_format = VA_RT_FORMAT_YUV420;
|
||||
break;
|
||||
case CSS_400:
|
||||
surface_format = VA_RT_FORMAT_YUV400;
|
||||
break;
|
||||
default:
|
||||
ERR("ERROR: The chroma subsampling is not supported by the VCN hardware!");
|
||||
return ROCJPEG_STATUS_JPEG_NOT_SUPPORTED;
|
||||
break;
|
||||
}
|
||||
|
||||
VASurfaceID va_surface_id;
|
||||
CHECK_VAAPI(vaCreateSurfaces(va_display_, surface_format, jpeg_stream_params->picture_parameter_buffer.picture_width, jpeg_stream_params->picture_parameter_buffer.picture_height, &va_surface_id, 1, nullptr, 1));
|
||||
va_surface_ids_.push_back(va_surface_id);
|
||||
surface_id = va_surface_id;
|
||||
|
||||
if (va_context_id_) {
|
||||
vaDestroyContext(va_display_, va_context_id_);
|
||||
va_context_id_ = 0;
|
||||
}
|
||||
CHECK_VAAPI(vaCreateContext(va_display_, va_config_id_, jpeg_stream_params->picture_parameter_buffer.picture_width, jpeg_stream_params->picture_parameter_buffer.picture_height, VA_PROGRESSIVE, &va_surface_id, 1, &va_context_id_));
|
||||
|
||||
CHECK_ROCJPEG(DestroyDataBuffers());
|
||||
|
||||
CHECK_VAAPI(vaCreateBuffer(va_display_, va_context_id_, VAPictureParameterBufferType, sizeof(VAPictureParameterBufferJPEGBaseline), 1, (void *)&jpeg_stream_params->picture_parameter_buffer, &va_picture_parameter_buf_id_));
|
||||
CHECK_VAAPI(vaCreateBuffer(va_display_, va_context_id_, VAIQMatrixBufferType, sizeof(VAIQMatrixBufferJPEGBaseline), 1, (void *)&jpeg_stream_params->quantization_matrix_buffer, &va_quantization_matrix_buf_id_));
|
||||
CHECK_VAAPI(vaCreateBuffer(va_display_, va_context_id_, VAHuffmanTableBufferType, sizeof(VAHuffmanTableBufferJPEGBaseline), 1, (void *)&jpeg_stream_params->huffman_table_buffer, &va_huffmantable_buf_id_));
|
||||
CHECK_VAAPI(vaCreateBuffer(va_display_, va_context_id_, VASliceParameterBufferType, sizeof(VASliceParameterBufferJPEGBaseline), 1, (void *)&jpeg_stream_params->slice_parameter_buffer, &va_slice_param_buf_id_));
|
||||
CHECK_VAAPI(vaCreateBuffer(va_display_, va_context_id_, VASliceDataBufferType, jpeg_stream_params->slice_parameter_buffer.slice_data_size, 1, (void *)jpeg_stream_params->slice_data_buffer, &va_slice_data_buf_id_));
|
||||
|
||||
CHECK_VAAPI(vaBeginPicture(va_display_, va_context_id_, va_surface_id));
|
||||
CHECK_VAAPI(vaRenderPicture(va_display_, va_context_id_, &va_picture_parameter_buf_id_, 1));
|
||||
CHECK_VAAPI(vaRenderPicture(va_display_, va_context_id_, &va_quantization_matrix_buf_id_, 1));
|
||||
CHECK_VAAPI(vaRenderPicture(va_display_, va_context_id_, &va_huffmantable_buf_id_, 1));
|
||||
CHECK_VAAPI(vaRenderPicture(va_display_, va_context_id_, &va_slice_param_buf_id_, 1));
|
||||
CHECK_VAAPI(vaRenderPicture(va_display_, va_context_id_, &va_slice_data_buf_id_, 1));
|
||||
CHECK_VAAPI(vaEndPicture(va_display_, va_context_id_));
|
||||
|
||||
return ROCJPEG_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
RocJpegStatus RocJpegVappiDecoder::ExportSurface(VASurfaceID surface_id, VADRMPRIMESurfaceDescriptor &va_drm_prime_surface_desc) {
|
||||
|
||||
bool is_surface_id_found = false;
|
||||
int idx = 0;
|
||||
for (idx = 0; idx < va_surface_ids_.size(); idx++) {
|
||||
if (va_surface_ids_[idx] == surface_id) {
|
||||
is_surface_id_found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!is_surface_id_found) {
|
||||
return ROCJPEG_STATUS_INVALID_PARAMETER;
|
||||
}
|
||||
CHECK_VAAPI(vaExportSurfaceHandle(va_display_, surface_id,
|
||||
VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2,
|
||||
VA_EXPORT_SURFACE_READ_ONLY |
|
||||
VA_EXPORT_SURFACE_SEPARATE_LAYERS,
|
||||
&va_drm_prime_surface_desc));
|
||||
|
||||
return ROCJPEG_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
RocJpegStatus RocJpegVappiDecoder::SyncSurface(VASurfaceID surface_id) {
|
||||
VASurfaceStatus surface_status;
|
||||
bool is_surface_id_found = false;
|
||||
int idx = 0;
|
||||
|
||||
for (idx = 0; idx < va_surface_ids_.size(); idx++) {
|
||||
if (va_surface_ids_[idx] == surface_id) {
|
||||
is_surface_id_found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!is_surface_id_found) {
|
||||
return ROCJPEG_STATUS_INVALID_PARAMETER;
|
||||
}
|
||||
|
||||
CHECK_VAAPI(vaQuerySurfaceStatus(va_display_, surface_id, &surface_status));
|
||||
while (surface_status != VASurfaceReady) {
|
||||
VAStatus va_status = vaSyncSurface(va_display_, surface_id);
|
||||
if (va_status != VA_STATUS_SUCCESS) {
|
||||
if (va_status == 0x26 /*VA_STATUS_ERROR_TIMEDOUT*/) {
|
||||
CHECK_VAAPI(vaQuerySurfaceStatus(va_display_, surface_id, &surface_status));
|
||||
} else {
|
||||
std::cout << "vaSyncSurface() failed with error code: 0x" << std::hex << va_status <<
|
||||
std::dec << "', status: " << vaErrorStr(va_status) << "' at " << __FILE__ << ":" << __LINE__ << std::endl;
|
||||
return ROCJPEG_STATUS_RUNTIME_ERROR;
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return ROCJPEG_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
RocJpegStatus RocJpegVappiDecoder::ReleaseSurface(VASurfaceID surface_id) {
|
||||
bool is_surface_id_found = false;
|
||||
int idx = 0;
|
||||
|
||||
for (idx = 0; idx < va_surface_ids_.size(); idx++) {
|
||||
if (va_surface_ids_[idx] == surface_id) {
|
||||
is_surface_id_found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!is_surface_id_found) {
|
||||
return ROCJPEG_STATUS_INVALID_PARAMETER;
|
||||
}
|
||||
|
||||
CHECK_VAAPI(vaDestroySurfaces(va_display_, &va_surface_ids_[idx], 1));
|
||||
va_surface_ids_.erase(va_surface_ids_.begin() + idx);
|
||||
|
||||
return ROCJPEG_STATUS_SUCCESS;
|
||||
}
|
||||
@@ -0,0 +1,75 @@
|
||||
/*
|
||||
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef ROC_JPEG_VAAPI_DECODER_H_
|
||||
#define ROC_JPEG_VAAPI_DECODER_H_
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
#include <va/va.h>
|
||||
#include <va/va_drm.h>
|
||||
#include <va/va_drmcommon.h>
|
||||
#include "rocjpeg_commons.h"
|
||||
#include "rocjpeg_parser.h"
|
||||
#include "../api/rocjpeg.h"
|
||||
|
||||
/*Note: va.h doesn't have VA_FOURCC_YUYV defined but vaExportSurfaceHandle returns 0x56595559 for packed YUYV for YUV 4:2:2*/
|
||||
#define ROCJPEG_FOURCC_YUYV 0x56595559
|
||||
|
||||
class RocJpegVappiDecoder {
|
||||
public:
|
||||
RocJpegVappiDecoder(int device_id = 0);
|
||||
~RocJpegVappiDecoder();
|
||||
RocJpegStatus InitializeDecoder(std::string gcn_arch_name);
|
||||
RocJpegStatus SubmitDecode(const JpegStreamParameters *jpeg_stream_params, uint32_t &surface_id);
|
||||
RocJpegStatus ExportSurface(VASurfaceID surface_id, VADRMPRIMESurfaceDescriptor &va_drm_prime_surface_desc);
|
||||
RocJpegStatus SyncSurface(VASurfaceID surface_id);
|
||||
RocJpegStatus ReleaseSurface(VASurfaceID surface_id);
|
||||
private:
|
||||
int device_id_;
|
||||
int drm_fd_;
|
||||
uint32_t min_picture_width_;
|
||||
uint32_t min_picture_height_;
|
||||
uint32_t max_picture_width_;
|
||||
uint32_t max_picture_height_;
|
||||
VADisplay va_display_;
|
||||
std::vector<VAConfigAttrib> va_config_attrib_;
|
||||
VAConfigID va_config_id_;
|
||||
VAProfile va_profile_;
|
||||
VAContextID va_context_id_;
|
||||
std::vector<VASurfaceID> va_surface_ids_;
|
||||
VABufferID va_picture_parameter_buf_id_;
|
||||
VABufferID va_quantization_matrix_buf_id_;
|
||||
VABufferID va_huffmantable_buf_id_;
|
||||
VABufferID va_slice_param_buf_id_;
|
||||
VABufferID va_slice_data_buf_id_;
|
||||
RocJpegStatus InitVAAPI(std::string drm_node);
|
||||
RocJpegStatus CreateDecoderConfig();
|
||||
RocJpegStatus DestroyDataBuffers();
|
||||
};
|
||||
|
||||
#endif // ROC_JPEG_VAAPI_DECODER_H_
|
||||
@@ -0,0 +1,99 @@
|
||||
# ##############################################################################
|
||||
# Copyright (c) 2024 Advanced Micro Devices, Inc.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
#
|
||||
# ##############################################################################
|
||||
cmake_minimum_required(VERSION 3.5)
|
||||
|
||||
project(ROCJPEG-test)
|
||||
|
||||
# make test with CTest
|
||||
enable_testing()
|
||||
include(CTest)
|
||||
|
||||
# ROCM Path
|
||||
if(DEFINED ENV{ROCM_PATH})
|
||||
set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "${White}${PROJECT_NAME}: Default ROCm installation path${ColourReset}")
|
||||
elseif(ROCM_PATH)
|
||||
message("-- ${White}${PROJECT_NAME} :ROCM_PATH Set -- ${ROCM_PATH}${ColourReset}")
|
||||
else()
|
||||
set(ROCM_PATH /opt/rocm CACHE PATH "${White}${PROJECT_NAME}: Default ROCm installation path${ColourReset}")
|
||||
endif()
|
||||
|
||||
# find rocJPEG
|
||||
find_library(ROCJPEG_LIBRARY NAMES ROCJPEG HINTS ${ROCM_PATH}/lib)
|
||||
find_path(ROCJPEG_INCLUDE_DIR NAMES rocjpeg.h PATHS /opt/rocm/include/rocjpeg ${ROCM_PATH}/include/rocjpeg)
|
||||
|
||||
if(ROCJPEG_LIBRARY AND ROCJPEG_INCLUDE_DIR)
|
||||
set(ROCJPEG_FOUND TRUE)
|
||||
message("-- ${White}Using ROCJPEG -- \n\tLibraries:${ROCJPEG_LIBRARY} \n\tIncludes:${ROCJPEG_INCLUDE_DIR}${ColourReset}")
|
||||
endif()
|
||||
|
||||
if(NOT ROCJPEG_FOUND)
|
||||
message("-- ${Yellow}${PROJECT_NAME} requires rocJPEG. Install rocJPEG before running CTests")
|
||||
endif()
|
||||
|
||||
add_test(
|
||||
NAME
|
||||
jpeg-decode-fmt-unchanged
|
||||
COMMAND
|
||||
"${CMAKE_CTEST_COMMAND}"
|
||||
--build-and-test "${ROCM_PATH}/share/rocjpeg/samples/jpegDecode"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/jpegDecode"
|
||||
--build-generator "${CMAKE_GENERATOR}"
|
||||
--test-command "jpegdecode"
|
||||
-i ${ROCM_PATH}/share/rocjpeg/images/
|
||||
)
|
||||
|
||||
add_test(
|
||||
NAME
|
||||
jpeg-decode-fmt-yuv
|
||||
COMMAND
|
||||
"${CMAKE_CTEST_COMMAND}"
|
||||
--build-and-test "${ROCM_PATH}/share/rocjpeg/samples/jpegDecode"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/jpegDecode"
|
||||
--build-generator "${CMAKE_GENERATOR}"
|
||||
--test-command "jpegdecode"
|
||||
-i ${ROCM_PATH}/share/rocjpeg/images/ -fmt yuv
|
||||
)
|
||||
|
||||
add_test(
|
||||
NAME
|
||||
jpeg-decode-fmt-y
|
||||
COMMAND
|
||||
"${CMAKE_CTEST_COMMAND}"
|
||||
--build-and-test "${ROCM_PATH}/share/rocjpeg/samples/jpegDecode"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/jpegDecode"
|
||||
--build-generator "${CMAKE_GENERATOR}"
|
||||
--test-command "jpegdecode"
|
||||
-i ${ROCM_PATH}/share/rocjpeg/images/ -fmt y
|
||||
)
|
||||
|
||||
add_test(
|
||||
NAME
|
||||
jpeg-decode-fmt-rgbi
|
||||
COMMAND
|
||||
"${CMAKE_CTEST_COMMAND}"
|
||||
--build-and-test "${ROCM_PATH}/share/rocjpeg/samples/jpegDecode"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/jpegDecode"
|
||||
--build-generator "${CMAKE_GENERATOR}"
|
||||
--test-command "jpegdecode"
|
||||
-i ${ROCM_PATH}/share/rocjpeg/images/ -fmt rgbi
|
||||
)
|
||||
Reference in New Issue
Block a user