From e307b89ca4944e9c54732b636b3f9cbfebb9d36b Mon Sep 17 00:00:00 2001 From: "Trowbridge, Ian" Date: Fri, 17 Jan 2025 16:42:25 -0600 Subject: [PATCH] rocDecode API Tracing Support (#49) * rocDecode API Tracing support * Test bin file added to rocdecode. Need to add validate python methods * Added option to not make rocDecode tests * Added rocdecode and rocprofv3 tests * Added csv test * Address PR comments. Changed tests to use built-in rocstreambit decoder to remove ffmpeg dependancy. Changed cmake option to disbale tests rather than not build them. Tests work locally, but will fail until rocDecode is built with tracing enabled on CI * Add option to avoid building rocdecode tests * Added option to avoid building rocdecode bin file * Merge conflict error * CMake files changed in response to review comments. Attempting to implement callbacks. * Turned off test building for rocdecode * Minor fixes for review comments * Review comments * Updated formatting * Document changes and format.hpp reversion. Need to remove iterate args support for now for later update. * Remove iterate args support * Remove iterate-args * enforce abi versioning in macro if * Fix doc error * removed spaces to fix indentation error --------- Co-authored-by: Madsen, Jonathan --- CHANGELOG.md | 1 + cmake/Modules/FindrocDecode.cmake | 49 + cmake/rocprofiler_config_interfaces.cmake | 20 + cmake/rocprofiler_interfaces.cmake | 3 + cmake/rocprofiler_options.cmake | 2 + source/bin/rocprofv3.py | 12 +- source/docs/data/rocdecode_api_trace.csv | 7 + source/docs/how-to/using-rocprofv3.rst | 42 +- source/docs/rocprofv3-schema.json | 60 + source/docs/rocprofv3_input_schema.json | 12 +- source/include/rocprofiler-sdk/CMakeLists.txt | 2 + .../include/rocprofiler-sdk/buffer_tracing.h | 19 + .../rocprofiler-sdk/callback_tracing.h | 11 + .../include/rocprofiler-sdk/cxx/perfetto.hpp | 4 +- .../rocprofiler-sdk/cxx/serialization.hpp | 22 + .../rocprofiler-sdk/external_correlation.h | 1 + source/include/rocprofiler-sdk/fwd.h | 29 +- source/include/rocprofiler-sdk/rocdecode.h | 27 + .../rocprofiler-sdk/rocdecode/CMakeLists.txt | 13 + .../rocprofiler-sdk/rocdecode/api_args.h | 165 ++ .../rocprofiler-sdk/rocdecode/api_id.h | 56 + .../rocdecode/details/CMakeLists.txt | 13 + .../rocdecode/details/roc_bitstream_reader.h | 110 + .../rocdecode/details/rocdecode.h | 1888 +++++++++++++++++ .../rocdecode/details/rocdecode_api_trace.h | 157 ++ .../rocdecode/details/rocdecode_version.h | 60 + .../rocdecode/details/rocparser.h | 349 +++ .../rocprofiler-sdk/rocdecode/table_id.h | 31 + source/lib/output/buffered_output.hpp | 2 + source/lib/output/domain_type.cpp | 1 + source/lib/output/domain_type.hpp | 1 + source/lib/output/generateCSV.cpp | 42 + source/lib/output/generateCSV.hpp | 6 + source/lib/output/generateJSON.cpp | 4 +- source/lib/output/generateJSON.hpp | 4 +- source/lib/output/generateOTF2.cpp | 6 +- source/lib/output/generateOTF2.hpp | 3 +- source/lib/output/generatePerfetto.cpp | 37 +- source/lib/output/generatePerfetto.hpp | 3 +- source/lib/output/generateStats.cpp | 18 + source/lib/output/generateStats.hpp | 5 + source/lib/rocprofiler-sdk-tool/config.hpp | 1 + source/lib/rocprofiler-sdk-tool/tool.cpp | 61 +- source/lib/rocprofiler-sdk/CMakeLists.txt | 2 + source/lib/rocprofiler-sdk/buffer_tracing.cpp | 13 + .../lib/rocprofiler-sdk/callback_tracing.cpp | 16 + .../lib/rocprofiler-sdk/intercept_table.cpp | 9 +- .../rocprofiler-sdk/internal_threading.cpp | 3 +- source/lib/rocprofiler-sdk/registration.cpp | 25 + .../rocprofiler-sdk/rocdecode/CMakeLists.txt | 6 + source/lib/rocprofiler-sdk/rocdecode/abi.cpp | 67 + .../lib/rocprofiler-sdk/rocdecode/defines.hpp | 216 ++ .../rocprofiler-sdk/rocdecode/rocdecode.cpp | 560 +++++ .../rocdecode/rocdecode.def.cpp | 90 + .../rocprofiler-sdk/rocdecode/rocdecode.hpp | 126 ++ .../runtime_initialization.cpp | 1 + tests/CMakeLists.txt | 4 + tests/bin/CMakeLists.txt | 3 + tests/bin/rocdecode/CMakeLists.txt | 43 + tests/bin/rocdecode/roc_video_dec.cpp | 1456 +++++++++++++ tests/bin/rocdecode/roc_video_dec.h | 648 ++++++ tests/bin/rocdecode/rocdecode.cpp | 109 + tests/pytest-packages/tests/rocprofv3.py | 12 +- tests/rocdecode/CMakeLists.txt | 53 + tests/rocdecode/conftest.py | 22 + tests/rocdecode/pytest.ini | 5 + tests/rocdecode/validate.py | 285 +++ tests/rocprofv3/CMakeLists.txt | 3 + .../rocprofv3/rocdecode-trace/CMakeLists.txt | 52 + tests/rocprofv3/rocdecode-trace/conftest.py | 71 + tests/rocprofv3/rocdecode-trace/pytest.ini | 5 + tests/rocprofv3/rocdecode-trace/validate.py | 138 ++ tests/tools/json-tool.cpp | 129 +- 73 files changed, 7486 insertions(+), 45 deletions(-) create mode 100644 cmake/Modules/FindrocDecode.cmake create mode 100644 source/docs/data/rocdecode_api_trace.csv create mode 100644 source/include/rocprofiler-sdk/rocdecode.h create mode 100644 source/include/rocprofiler-sdk/rocdecode/CMakeLists.txt create mode 100644 source/include/rocprofiler-sdk/rocdecode/api_args.h create mode 100644 source/include/rocprofiler-sdk/rocdecode/api_id.h create mode 100644 source/include/rocprofiler-sdk/rocdecode/details/CMakeLists.txt create mode 100644 source/include/rocprofiler-sdk/rocdecode/details/roc_bitstream_reader.h create mode 100644 source/include/rocprofiler-sdk/rocdecode/details/rocdecode.h create mode 100644 source/include/rocprofiler-sdk/rocdecode/details/rocdecode_api_trace.h create mode 100644 source/include/rocprofiler-sdk/rocdecode/details/rocdecode_version.h create mode 100644 source/include/rocprofiler-sdk/rocdecode/details/rocparser.h create mode 100644 source/include/rocprofiler-sdk/rocdecode/table_id.h create mode 100644 source/lib/rocprofiler-sdk/rocdecode/CMakeLists.txt create mode 100644 source/lib/rocprofiler-sdk/rocdecode/abi.cpp create mode 100644 source/lib/rocprofiler-sdk/rocdecode/defines.hpp create mode 100644 source/lib/rocprofiler-sdk/rocdecode/rocdecode.cpp create mode 100644 source/lib/rocprofiler-sdk/rocdecode/rocdecode.def.cpp create mode 100644 source/lib/rocprofiler-sdk/rocdecode/rocdecode.hpp create mode 100644 tests/bin/rocdecode/CMakeLists.txt create mode 100644 tests/bin/rocdecode/roc_video_dec.cpp create mode 100644 tests/bin/rocdecode/roc_video_dec.h create mode 100644 tests/bin/rocdecode/rocdecode.cpp create mode 100644 tests/rocdecode/CMakeLists.txt create mode 100644 tests/rocdecode/conftest.py create mode 100644 tests/rocdecode/pytest.ini create mode 100644 tests/rocdecode/validate.py create mode 100644 tests/rocprofv3/rocdecode-trace/CMakeLists.txt create mode 100644 tests/rocprofv3/rocdecode-trace/conftest.py create mode 100644 tests/rocprofv3/rocdecode-trace/pytest.ini create mode 100755 tests/rocprofv3/rocdecode-trace/validate.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 5faeacdc53..7ae5ae62e2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -152,6 +152,7 @@ Full documentation for ROCprofiler-SDK is available at [rocm.docs.amd.com/projec - `--collection-period` feature added in rocprofv3, to enable filtering using time. - `--collection-period-unit` feature added in rocprofv3, to allow the user to control time units used in collection period option. - Added deprecation notice for rocprofiler(v1) and rocprofiler(v2). +- Added support for rocDecode API Tracing ### Changed diff --git a/cmake/Modules/FindrocDecode.cmake b/cmake/Modules/FindrocDecode.cmake new file mode 100644 index 0000000000..fde2f102e2 --- /dev/null +++ b/cmake/Modules/FindrocDecode.cmake @@ -0,0 +1,49 @@ +################################################################################ +# Copyright (c) 2024 - 2025 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +################################################################################ + +# find rocDecode - library and headers +find_path( + rocDecode_INCLUDE_DIR + NAMES rocdecode.h + PATHS ${ROCM_PATH}/include/rocdecode) +find_library( + rocDecode_LIBRARY + NAMES rocdecode + HINTS ${ROCM_PATH}/lib) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args( + rocDecode + FOUND_VAR rocDecode_FOUND + REQUIRED_VARS rocDecode_INCLUDE_DIR rocDecode_LIBRARY) + +if(rocDecode_FOUND) + if(NOT TARGET rocDecode::rocDecode) + add_library(rocDecode::rocDecode INTERFACE IMPORTED) + target_link_libraries(rocDecode::rocDecode INTERFACE ${rocDecode_LIBRARY}) + target_include_directories(rocDecode::rocDecode + INTERFACE ${rocDecode_INCLUDE_DIR}) + endif() +endif() + +mark_as_advanced(rocDecode_INCLUDE_DIR rocDecode_LIBRARY) diff --git a/cmake/rocprofiler_config_interfaces.cmake b/cmake/rocprofiler_config_interfaces.cmake index 69637b4d2d..a01a6df27d 100644 --- a/cmake/rocprofiler_config_interfaces.cmake +++ b/cmake/rocprofiler_config_interfaces.cmake @@ -325,3 +325,23 @@ else() INTERFACE ROCPROFILER_SDK_USE_SYSTEM_RCCL=0) endif() + +# ----------------------------------------------------------------------------------------# +# +# ROCDecode +# +# ----------------------------------------------------------------------------------------# + +find_package(rocDecode) + +if(rocDecode_FOUND + AND rocDecode_INCLUDE_DIR + AND EXISTS "${ROCDECODE_INCLUDE_DIR}/rocdecode/amd_detail/rocdecode_api_trace.h") + rocprofiler_config_nolink_target( + rocprofiler-sdk-rocdecode-nolink rocdecode::rocdecode INTERFACE + ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE=1) +else() + target_compile_definitions(rocprofiler-sdk-rocdecode-nolink + INTERFACE ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE=0) + +endif() diff --git a/cmake/rocprofiler_interfaces.cmake b/cmake/rocprofiler_interfaces.cmake index 0a804ac764..7e82e713e1 100644 --- a/cmake/rocprofiler_interfaces.cmake +++ b/cmake/rocprofiler_interfaces.cmake @@ -91,3 +91,6 @@ rocprofiler_add_interface_library( "rocprofiler-sdk-hsakmt without linking to HSAKMT library" IMPORTED) rocprofiler_add_interface_library(rocprofiler-sdk-rccl-nolink "RCCL headers without linking to RCCL library" IMPORTED) +rocprofiler_add_interface_library( + rocprofiler-sdk-rocdecode-nolink + "ROCDECODE headers without linking to ROCDECODE library" IMPORTED) diff --git a/cmake/rocprofiler_options.cmake b/cmake/rocprofiler_options.cmake index 35862acdc4..7a684b26e1 100644 --- a/cmake/rocprofiler_options.cmake +++ b/cmake/rocprofiler_options.cmake @@ -59,6 +59,8 @@ if(ROCPROFILER_BUILD_TESTS) rocprofiler_add_option( ROCPROFILER_BUILD_GTEST "Enable building gtest (Google testing) library internally" ON ADVANCED) + rocprofiler_add_option(ROCPROFILER_BUILD_ROCDECODE_TESTS + "Enable building rocDecode tests" OFF ADVANCED) endif() rocprofiler_add_option(ROCPROFILER_ENABLE_CLANG_TIDY "Enable clang-tidy checks" OFF diff --git a/source/bin/rocprofv3.py b/source/bin/rocprofv3.py index 86eb9581fa..34dcc8f1e0 100755 --- a/source/bin/rocprofv3.py +++ b/source/bin/rocprofv3.py @@ -158,13 +158,13 @@ For MPI applications (or other job launchers such as SLURM), place rocprofv3 ins aggregate_tracing_options, "-r", "--runtime-trace", - help="Collect tracing data for HIP runtime API, Marker (ROCTx) API, RCCL API, Memory operations (copies, scratch, and allocation), and Kernel dispatches. Similar to --sys-trace but without tracing HIP compiler API and the underlying HSA API.", + help="Collect tracing data for HIP runtime API, Marker (ROCTx) API, RCCL API, ROCDecode API, Memory operations (copies, scratch, and allocation), and Kernel dispatches. Similar to --sys-trace but without tracing HIP compiler API and the underlying HSA API.", ) add_parser_bool_argument( aggregate_tracing_options, "-s", "--sys-trace", - help="Collect tracing data for HIP API, HSA API, Marker (ROCTx) API, RCCL API, Memory operations (copies, scratch, and allocations), and Kernel dispatches.", + help="Collect tracing data for HIP API, HSA API, Marker (ROCTx) API, RCCL API, ROCDecode API, Memory operations (copies, scratch, and allocations), and Kernel dispatches.", ) pc_sampling_options = parser.add_argument_group("PC sampling options") @@ -245,6 +245,11 @@ For MPI applications (or other job launchers such as SLURM), place rocprofv3 ins "--kokkos-trace", help="Enable built-in Kokkos Tools support (implies --marker-trace and --kernel-rename)", ) + add_parser_bool_argument( + basic_tracing_options, + "--rocdecode-trace", + help="For collecting ROCDecode Traces", + ) extended_tracing_options = parser.add_argument_group("Granular tracing options") @@ -761,6 +766,7 @@ def run(app_args, args, **kwargs): "memory_allocation_trace", "scratch_memory_trace", "rccl_trace", + "rocdecode_trace", ): setattr(args, itr, True) @@ -773,6 +779,7 @@ def run(app_args, args, **kwargs): "memory_allocation_trace", "scratch_memory_trace", "rccl_trace", + "rocdecode_trace", ): setattr(args, itr, True) @@ -796,6 +803,7 @@ def run(app_args, args, **kwargs): ["hsa_finalizer_trace", "HSA_FINALIZER_EXT_API_TRACE"], ["marker_trace", "MARKER_API_TRACE"], ["rccl_trace", "RCCL_API_TRACE"], + ["rocdecode_trace", "ROCDECODE_API_TRACE"], ["kernel_trace", "KERNEL_TRACE"], ["memory_copy_trace", "MEMORY_COPY_TRACE"], ["memory_allocation_trace", "MEMORY_ALLOCATION_TRACE"], diff --git a/source/docs/data/rocdecode_api_trace.csv b/source/docs/data/rocdecode_api_trace.csv new file mode 100644 index 0000000000..b2c5da25ac --- /dev/null +++ b/source/docs/data/rocdecode_api_trace.csv @@ -0,0 +1,7 @@ +"Domain","Function","Process_Id","Thread_Id","Correlation_Id","Start_Timestamp","End_Timestamp" +"ROCDECODE_API","rocDecCreateVideoParser",41688,41688,583,615449881677279,615449882001583 +"ROCDECODE_API","rocDecGetDecoderCaps",41688,41688,584,615449882016054,615449882163756 +"ROCDECODE_API","rocDecGetDecoderCaps",41688,41688,588,615449886038750,615449886050880 +"ROCDECODE_API","rocDecCreateDecoder",41688,41688,591,615449886084210,615450756910310 +"ROCDECODE_API","rocDecDecodeFrame",41688,41688,595,615450757036042,615450767147413 +"ROCDECODE_API","rocDecGetDecodeStatus",41688,41688,812,615450836779385,615450836779575 diff --git a/source/docs/how-to/using-rocprofv3.rst b/source/docs/how-to/using-rocprofv3.rst index 34a875a34f..844d087407 100644 --- a/source/docs/how-to/using-rocprofv3.rst +++ b/source/docs/how-to/using-rocprofv3.rst @@ -55,11 +55,11 @@ Here is the sample of commonly used ``rocprofv3`` command-line options. Some opt - Output control * - ``-r`` \| ``--runtime-trace`` - - Collects HIP (runtime), memory copy, memory allocation, marker, scratch memory, and kernel dispatch traces. + - Collects HIP (runtime), memory copy, memory allocation, marker, scratch memory, rocDecode, and kernel dispatch traces. - Application Tracing * - ``-s`` \| ``--sys-trace`` - - Collects HIP, HSA, memory copy, memory allocation, marker, scratch memory, and kernel dispatch traces. + - Collects HIP, HSA, memory copy, memory allocation, marker, scratch memory, rocDecode, and kernel dispatch traces. - Application Tracing * - ``--hip-trace`` @@ -86,6 +86,10 @@ Here is the sample of commonly used ``rocprofv3`` command-line options. Some opt - Collects scratch memory operations traces. - Application tracing + * - ``--rocdecode-trace`` + - Collects rocDecode API traces. + - Application tracing + * - ``--hsa-trace`` - Collects HSA API traces. - Application tracing @@ -615,6 +619,28 @@ Here are the contents of ``rccl_api_trace.csv`` file: :widths: 10,10,10,10,10,20,20 :header-rows: 1 +rocDecode trace +++++++++++++++++ + +`rocDecode `_ is a high-performance video decode SDK for AMD GPUs. This option traces the rocDecode API. + +.. code-block:: shell + + rocprofv3 --rocdecode-trace -- + +The above command generates a ``rocdecode_api_trace`` file prefixed with the process ID. + +.. code-block:: shell + + $ cat 41688_rocdecode_api_trace.csv + +Here are the contents of ``rocdecode_api_trace.csv`` file: + +.. csv-table:: rocDecode trace + :file: /data/rocdecode_api_trace.csv + :widths: 10,10,10,10,10,20,20 + :header-rows: 1 + Post-processing tracing options ++++++++++++++++++++++++++++++++ @@ -1336,3 +1362,15 @@ Properties - **`handle`** *(integer, required)*: Handle of the agent. - **`address`** *(string, required)*: Starting address of allocation. - **`allocation_size`** *(integer, required)*: Size of allocation. + - **`rocDecode_api`** *(array)*: rocDecode API records. + - **Items** *(object)* + - **`size`** *(integer, required)*: Size of the rocDecode API record. + - **`kind`** *(integer, required)*: Kind of the rocDecode API. + - **`operation`** *(integer, required)*: Operation of the rocDecode API. + - **`correlation_id`** *(object, required)*: Correlation ID information. + - **`internal`** *(integer, required)*: Internal correlation ID. + - **`external`** *(integer, required)*: External correlation ID. + - **`start_timestamp`** *(integer, required)*: Start timestamp. + - **`end_timestamp`** *(integer, required)*: End timestamp. + - **`thread_id`** *(integer, required)*: Thread ID. + diff --git a/source/docs/rocprofv3-schema.json b/source/docs/rocprofv3-schema.json index 20ac7ac318..012e64f2e5 100644 --- a/source/docs/rocprofv3-schema.json +++ b/source/docs/rocprofv3-schema.json @@ -1678,6 +1678,66 @@ "address", "allocation_size" ] + } + }, + "rocdecoder_api": { + "type": "array", + "description": "ROCDecode API records.", + "items": { + "type": "object", + "properties": { + "size": { + "type": "integer", + "description": "Size of the rocDecode API record." + }, + "kind": { + "type": "integer", + "description": "Kind of the rocDecode API." + }, + "operation": { + "type": "integer", + "description": "Operation of the rocDecode API." + }, + "correlation_id": { + "type": "object", + "description": "Correlation ID information.", + "properties": { + "internal": { + "type": "integer", + "description": "Internal correlation ID." + }, + "external": { + "type": "integer", + "description": "External correlation ID." + } + }, + "required": [ + "internal", + "external" + ] + }, + "start_timestamp": { + "type": "integer", + "description": "Start timestamp." + }, + "end_timestamp": { + "type": "integer", + "description": "End timestamp." + }, + "thread_id": { + "type": "integer", + "description": "Thread ID." + } + }, + "required": [ + "size", + "kind", + "operation", + "correlation_id", + "start_timestamp", + "end_timestamp", + "thread_id" + ] } } } diff --git a/source/docs/rocprofv3_input_schema.json b/source/docs/rocprofv3_input_schema.json index c32c3f4f40..9009d11486 100644 --- a/source/docs/rocprofv3_input_schema.json +++ b/source/docs/rocprofv3_input_schema.json @@ -65,7 +65,10 @@ "type": "boolean", "description": "For Collecting Memory Allocation Traces" }, - + "rocdecode_trace": { + "type": "boolean", + "description": "For Collecting rocDecode Traces" + }, "scratch_memory_trace": { "type": "boolean", "description": "For Collecting Scratch Memory operations Traces" @@ -101,9 +104,14 @@ "description": "For Collecting HSA API Traces (Image-extenson API)" }, + "runtime_trace" : { + "type": "boolean", + "description": "For collecting HIP (runtime), memory copy, memory allocation, marker, scratch memory, rocDecode, and Kernel dispatch traces." + }, + "sys_trace" : { "type": "boolean", - "description": "For Collecting HIP, HSA, Marker (ROCTx), Memory copy, Memory allocation, Scratch memory, and Kernel dispatch traces" + "description": "For Collecting HIP, HSA, Marker (ROCTx), Memory copy, Memory allocation, Scratch memory, rocDecode, and Kernel dispatch traces" }, "mangled_kernels": { diff --git a/source/include/rocprofiler-sdk/CMakeLists.txt b/source/include/rocprofiler-sdk/CMakeLists.txt index 45431b8198..b720f62c9f 100644 --- a/source/include/rocprofiler-sdk/CMakeLists.txt +++ b/source/include/rocprofiler-sdk/CMakeLists.txt @@ -31,6 +31,7 @@ set(ROCPROFILER_HEADER_FILES profile_config.h registration.h rccl.h + rocdecode.h spm.h ${CMAKE_CURRENT_BINARY_DIR}/version.h) @@ -44,6 +45,7 @@ add_subdirectory(hsa) add_subdirectory(marker) add_subdirectory(ompt) add_subdirectory(rccl) +add_subdirectory(rocdecode) add_subdirectory(cxx) add_subdirectory(kfd) add_subdirectory(amd_detail) diff --git a/source/include/rocprofiler-sdk/buffer_tracing.h b/source/include/rocprofiler-sdk/buffer_tracing.h index b0e2214654..fb3d3d9ec2 100644 --- a/source/include/rocprofiler-sdk/buffer_tracing.h +++ b/source/include/rocprofiler-sdk/buffer_tracing.h @@ -182,6 +182,25 @@ typedef struct /// @brief Specification of the API function, e.g., ::rocprofiler_rccl_api_id_t } rocprofiler_buffer_tracing_rccl_api_record_t; +/** + * @brief ROCProfiler Buffer ROCDecode API Record. + */ +typedef struct +{ + uint64_t size; ///< size of this struct + rocprofiler_buffer_tracing_kind_t kind; + rocprofiler_tracing_operation_t operation; + rocprofiler_correlation_id_t correlation_id; ///< correlation ids for record + rocprofiler_timestamp_t start_timestamp; ///< start time in nanoseconds + rocprofiler_timestamp_t end_timestamp; ///< end time in nanoseconds + rocprofiler_thread_id_t thread_id; ///< id for thread generating this record + + /// @var kind + /// @brief ::ROCPROFILER_CALLBACK_TRACING_ROCDECODE_API + /// @var operation + /// @brief Specification of the API function, e.g., ::rocprofiler_rocdecode_api_id_t +} rocprofiler_buffer_tracing_rocdecode_api_record_t; + /** * @brief ROCProfiler Buffer Memory Copy Tracer Record. */ diff --git a/source/include/rocprofiler-sdk/callback_tracing.h b/source/include/rocprofiler-sdk/callback_tracing.h index 525d75d699..762feb4302 100644 --- a/source/include/rocprofiler-sdk/callback_tracing.h +++ b/source/include/rocprofiler-sdk/callback_tracing.h @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -108,6 +109,16 @@ typedef struct rocprofiler_rccl_api_retval_t retval; } rocprofiler_callback_tracing_rccl_api_data_t; +/** + * @brief ROCProfiler ROCDecode API Callback Data. + */ +typedef struct +{ + uint64_t size; ///< size of this struct + rocprofiler_rocdecode_api_args_t args; + rocprofiler_rocdecode_api_retval_t retval; +} rocprofiler_callback_tracing_rocdecode_api_data_t; + /** * @brief ROCProfiler Code Object Load Tracer Callback Record. */ diff --git a/source/include/rocprofiler-sdk/cxx/perfetto.hpp b/source/include/rocprofiler-sdk/cxx/perfetto.hpp index 7832d62ed9..3ab3f0ce18 100644 --- a/source/include/rocprofiler-sdk/cxx/perfetto.hpp +++ b/source/include/rocprofiler-sdk/cxx/perfetto.hpp @@ -82,6 +82,7 @@ ROCPROFILER_DEFINE_CATEGORY(category, openmp, "OpenMP") ROCPROFILER_DEFINE_CATEGORY(category, kernel_dispatch, "GPU kernel dispatch") ROCPROFILER_DEFINE_CATEGORY(category, memory_copy, "Async memory copy") ROCPROFILER_DEFINE_CATEGORY(category, memory_allocation, "Memory Allocation") +ROCPROFILER_DEFINE_CATEGORY(category, rocdecode_api, "ROCDecode API function") #define ROCPROFILER_PERFETTO_CATEGORIES \ ROCPROFILER_PERFETTO_CATEGORY(category::hsa_api), \ @@ -91,7 +92,8 @@ ROCPROFILER_DEFINE_CATEGORY(category, memory_allocation, "Memory Allocation") ROCPROFILER_PERFETTO_CATEGORY(category::openmp), \ ROCPROFILER_PERFETTO_CATEGORY(category::kernel_dispatch), \ ROCPROFILER_PERFETTO_CATEGORY(category::memory_copy), \ - ROCPROFILER_PERFETTO_CATEGORY(category::memory_allocation) + ROCPROFILER_PERFETTO_CATEGORY(category::memory_allocation), \ + ROCPROFILER_PERFETTO_CATEGORY(category::rocdecode_api) #include diff --git a/source/include/rocprofiler-sdk/cxx/serialization.hpp b/source/include/rocprofiler-sdk/cxx/serialization.hpp index a28423802e..eb7444e15d 100644 --- a/source/include/rocprofiler-sdk/cxx/serialization.hpp +++ b/source/include/rocprofiler-sdk/cxx/serialization.hpp @@ -386,6 +386,21 @@ save(ArchiveT& ar, rocprofiler_callback_tracing_rccl_api_data_t data) ROCP_SDK_SAVE_DATA_FIELD(retval); } +template +void +save(ArchiveT& ar, rocprofiler_rocdecode_api_retval_t data) +{ + ROCP_SDK_SAVE_DATA_FIELD(rocDecStatus_retval); +} + +template +void +save(ArchiveT& ar, rocprofiler_callback_tracing_rocdecode_api_data_t data) +{ + ROCP_SDK_SAVE_DATA_FIELD(size); + ROCP_SDK_SAVE_DATA_FIELD(retval); +} + template void save(ArchiveT& ar, rocprofiler_callback_tracing_ompt_data_t data) @@ -479,6 +494,13 @@ save(ArchiveT& ar, rocprofiler_buffer_tracing_rccl_api_record_t data) save_buffer_tracing_api_record(ar, data); } +template +void +save(ArchiveT& ar, rocprofiler_buffer_tracing_rocdecode_api_record_t data) +{ + save_buffer_tracing_api_record(ar, data); +} + template void save(ArchiveT& ar, rocprofiler_buffer_tracing_ompt_target_t data) diff --git a/source/include/rocprofiler-sdk/external_correlation.h b/source/include/rocprofiler-sdk/external_correlation.h index 14203a089e..cbf87b2870 100644 --- a/source/include/rocprofiler-sdk/external_correlation.h +++ b/source/include/rocprofiler-sdk/external_correlation.h @@ -69,6 +69,7 @@ typedef enum // NOLINT(performance-enum-size) ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_RCCL_API, ///< ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_OMPT, ///< ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_MEMORY_ALLOCATION, ///< + ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_ROCDECODE_API, ///< ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_LAST, } rocprofiler_external_correlation_id_request_kind_t; diff --git a/source/include/rocprofiler-sdk/fwd.h b/source/include/rocprofiler-sdk/fwd.h index b4c632eafa..91aa551c8c 100644 --- a/source/include/rocprofiler-sdk/fwd.h +++ b/source/include/rocprofiler-sdk/fwd.h @@ -176,6 +176,7 @@ typedef enum // NOLINT(performance-enum-size) ///< ::rocprofiler_memory_allocation_operation_t ROCPROFILER_CALLBACK_TRACING_RUNTIME_INITIALIZATION, ///< Callback notifying that a runtime ///< library has been initialized + ROCPROFILER_CALLBACK_TRACING_ROCDECODE_API, ///< rocDecode API Tracing ROCPROFILER_CALLBACK_TRACING_LAST, } rocprofiler_callback_tracing_kind_t; @@ -207,6 +208,7 @@ typedef enum // NOLINT(performance-enum-size) ROCPROFILER_BUFFER_TRACING_RUNTIME_INITIALIZATION, ///< Record indicating a runtime library has ///< been initialized. @see ///< ::rocprofiler_runtime_initialization_operation_t + ROCPROFILER_BUFFER_TRACING_ROCDECODE_API, ///< rocDecode tracing ROCPROFILER_BUFFER_TRACING_LAST, } rocprofiler_buffer_tracing_kind_t; @@ -363,12 +365,13 @@ typedef enum */ typedef enum { - ROCPROFILER_LIBRARY = (1 << 0), - ROCPROFILER_HSA_LIBRARY = (1 << 1), - ROCPROFILER_HIP_LIBRARY = (1 << 2), - ROCPROFILER_MARKER_LIBRARY = (1 << 3), - ROCPROFILER_RCCL_LIBRARY = (1 << 4), - ROCPROFILER_LIBRARY_LAST = ROCPROFILER_RCCL_LIBRARY, + ROCPROFILER_LIBRARY = (1 << 0), + ROCPROFILER_HSA_LIBRARY = (1 << 1), + ROCPROFILER_HIP_LIBRARY = (1 << 2), + ROCPROFILER_MARKER_LIBRARY = (1 << 3), + ROCPROFILER_RCCL_LIBRARY = (1 << 4), + ROCPROFILER_ROCDECODE_LIBRARY = (1 << 5), + ROCPROFILER_LIBRARY_LAST = ROCPROFILER_ROCDECODE_LIBRARY, } rocprofiler_runtime_library_t; /** @@ -384,7 +387,8 @@ typedef enum ROCPROFILER_MARKER_CONTROL_TABLE = (1 << 4), ROCPROFILER_MARKER_NAME_TABLE = (1 << 5), ROCPROFILER_RCCL_TABLE = (1 << 6), - ROCPROFILER_TABLE_LAST = ROCPROFILER_RCCL_TABLE, + ROCPROFILER_ROCDECODE_TABLE = (1 << 7), + ROCPROFILER_TABLE_LAST = ROCPROFILER_ROCDECODE_TABLE, } rocprofiler_intercept_table_t; /** @@ -392,11 +396,12 @@ typedef enum */ typedef enum // NOLINT(performance-enum-size) { - ROCPROFILER_RUNTIME_INITIALIZATION_NONE = 0, ///< Unknown runtime initialization - ROCPROFILER_RUNTIME_INITIALIZATION_HSA, ///< Application loaded HSA runtime - ROCPROFILER_RUNTIME_INITIALIZATION_HIP, ///< Application loaded HIP runtime - ROCPROFILER_RUNTIME_INITIALIZATION_MARKER, ///< Application loaded Marker (ROCTx) runtime - ROCPROFILER_RUNTIME_INITIALIZATION_RCCL, ///< Application loaded RCCL runtime + ROCPROFILER_RUNTIME_INITIALIZATION_NONE = 0, ///< Unknown runtime initialization + ROCPROFILER_RUNTIME_INITIALIZATION_HSA, ///< Application loaded HSA runtime + ROCPROFILER_RUNTIME_INITIALIZATION_HIP, ///< Application loaded HIP runtime + ROCPROFILER_RUNTIME_INITIALIZATION_MARKER, ///< Application loaded Marker (ROCTx) runtime + ROCPROFILER_RUNTIME_INITIALIZATION_RCCL, ///< Application loaded RCCL runtime + ROCPROFILER_RUNTIME_INITIALIZATION_ROCDECODE, ///< Application loaded rocDecode runtime ROCPROFILER_RUNTIME_INITIALIZATION_LAST, } rocprofiler_runtime_initialization_operation_t; diff --git a/source/include/rocprofiler-sdk/rocdecode.h b/source/include/rocprofiler-sdk/rocdecode.h new file mode 100644 index 0000000000..ed54739191 --- /dev/null +++ b/source/include/rocprofiler-sdk/rocdecode.h @@ -0,0 +1,27 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include +#include +#include diff --git a/source/include/rocprofiler-sdk/rocdecode/CMakeLists.txt b/source/include/rocprofiler-sdk/rocdecode/CMakeLists.txt new file mode 100644 index 0000000000..f2528fec3a --- /dev/null +++ b/source/include/rocprofiler-sdk/rocdecode/CMakeLists.txt @@ -0,0 +1,13 @@ +# +# +# Installation of public rocDecode headers +# +# +set(ROCPROFILER_ROCDECODE_HEADER_FILES api_args.h api_id.h table_id.h) + +install( + FILES ${ROCPROFILER_ROCDECODE_HEADER_FILES} + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/rocprofiler-sdk/rocdecode + COMPONENT development) + +add_subdirectory(details) diff --git a/source/include/rocprofiler-sdk/rocdecode/api_args.h b/source/include/rocprofiler-sdk/rocdecode/api_args.h new file mode 100644 index 0000000000..d5f5dfd183 --- /dev/null +++ b/source/include/rocprofiler-sdk/rocdecode/api_args.h @@ -0,0 +1,165 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include +#include + +#if !defined(ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE) +# if defined __has_include +# if __has_include() && __has_include() && __has_include() +# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 1 +# else +# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 0 +# endif +# else +# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 0 +# endif +#endif + +#if ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE > 0 +# include +# include +# include +#else +# include +# include +# include +#endif + +#include + +ROCPROFILER_EXTERN_C_INIT + +// Empty struct has a size of 0 in C but size of 1 in C++. +// This struct is added to the union members which represent +// functions with no arguments to ensure ABI compatibility +typedef struct rocprofiler_rocdecode_api_no_args +{ + char empty; +} rocprofiler_rocdecode_api_no_args; + +typedef union rocprofiler_rocdecode_api_retval_t +{ + int32_t rocDecStatus_retval; + const char* const_charp_retval; +} rocprofiler_rocdecode_api_retval_t; + +typedef union rocprofiler_rocdecode_api_args_t +{ + struct + { + RocdecVideoParser* parser_handle; + RocdecParserParams* params; + } rocDecCreateVideoParser; + + struct + { + RocdecVideoParser parser_handle; + RocdecSourceDataPacket* packet; + } rocDecParseVideoData; + + struct + { + RocdecVideoParser parser_handle; + } rocDecDestroyVideoParser; + + struct + { + rocDecDecoderHandle* decoder_handle; + RocDecoderCreateInfo* decoder_create_info; + } rocDecCreateDecoder; + + struct + { + rocDecDecoderHandle decoder_handle; + } rocDecDestroyDecoder; + + struct + { + RocdecDecodeCaps* decode_caps; + } rocDecGetDecoderCaps; + + struct + { + rocDecDecoderHandle decoder_handle; + RocdecPicParams* pic_params; + } rocDecDecodeFrame; + + struct + { + rocDecDecoderHandle decoder_handle; + int pic_idx; + RocdecDecodeStatus* decode_status; + } rocDecGetDecodeStatus; + + struct + { + rocDecDecoderHandle decoder_handle; + RocdecReconfigureDecoderInfo* reconfig_params; + } rocDecReconfigureDecoder; + + struct + { + rocDecDecoderHandle decoder_handle; + int pic_idx; + void** dev_mem_ptr; + uint32_t* horizontal_pitch; + RocdecProcParams* vid_postproc_params; + } rocDecGetVideoFrame; + struct + { + rocDecStatus rocdec_status; + } rocDecGetErrorName; + +#if ROCDECODE_RUNTIME_API_TABLE_STEP_VERSION >= 1 + struct + { + RocdecBitstreamReader* bs_reader_handle; + const char* input_file_path; + } rocDecCreateBitstreamReader; + struct + { + RocdecBitstreamReader bs_reader_handle; + rocDecVideoCodec* codec_type; + } rocDecGetBitstreamCodecType; + struct + { + RocdecBitstreamReader bs_reader_handle; + int* bit_depth; + } rocDecGetBitstreamBitDepth; + struct + { + RocdecBitstreamReader bs_reader_handle; + uint8_t** pic_data; + int* pic_size; + int64_t* pts; + } rocDecGetBitstreamPicData; + struct + { + RocdecBitstreamReader bs_reader_handle; + } rocDecDestroyBitstreamReader; +#endif +} rocprofiler_rocdecode_api_args_t; + +ROCPROFILER_EXTERN_C_FINI diff --git a/source/include/rocprofiler-sdk/rocdecode/api_id.h b/source/include/rocprofiler-sdk/rocdecode/api_id.h new file mode 100644 index 0000000000..8ad53182d9 --- /dev/null +++ b/source/include/rocprofiler-sdk/rocdecode/api_id.h @@ -0,0 +1,56 @@ + + +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include + +/** + * @brief ROCProfiler enumeration of HSA Core API tracing operations + */ +typedef enum // NOLINT(performance-enum-size) +{ + ROCPROFILER_ROCDECODE_API_ID_NONE = -1, + + ROCPROFILER_ROCDECODE_API_ID_rocDecCreateVideoParser = 0, + ROCPROFILER_ROCDECODE_API_ID_rocDecParseVideoData, + ROCPROFILER_ROCDECODE_API_ID_rocDecDestroyVideoParser, + ROCPROFILER_ROCDECODE_API_ID_rocDecCreateDecoder, + ROCPROFILER_ROCDECODE_API_ID_rocDecDestroyDecoder, + ROCPROFILER_ROCDECODE_API_ID_rocDecGetDecoderCaps, + ROCPROFILER_ROCDECODE_API_ID_rocDecDecodeFrame, + ROCPROFILER_ROCDECODE_API_ID_rocDecGetDecodeStatus, + ROCPROFILER_ROCDECODE_API_ID_rocDecReconfigureDecoder, + ROCPROFILER_ROCDECODE_API_ID_rocDecGetVideoFrame, + ROCPROFILER_ROCDECODE_API_ID_rocDecGetErrorName, + +#if ROCDECODE_RUNTIME_API_TABLE_STEP_VERSION >= 1 + ROCPROFILER_ROCDECODE_API_ID_rocDecCreateBitstreamReader, + ROCPROFILER_ROCDECODE_API_ID_rocDecGetBitstreamCodecType, + ROCPROFILER_ROCDECODE_API_ID_rocDecGetBitstreamBitDepth, + ROCPROFILER_ROCDECODE_API_ID_rocDecGetBitstreamPicData, + ROCPROFILER_ROCDECODE_API_ID_rocDecDestroyBitstreamReader, +#endif + ROCPROFILER_ROCDECODE_API_ID_LAST, +} rocprofiler_rocdecode_api_id_t; diff --git a/source/include/rocprofiler-sdk/rocdecode/details/CMakeLists.txt b/source/include/rocprofiler-sdk/rocdecode/details/CMakeLists.txt new file mode 100644 index 0000000000..7869b7c825 --- /dev/null +++ b/source/include/rocprofiler-sdk/rocdecode/details/CMakeLists.txt @@ -0,0 +1,13 @@ +# +# +# Installation of public ROCDecode headers +# +# +set(ROCPROFILER_ROCDECODE_DETAILS_HEADER_FILES + rocdecode_api_trace.h rocdecode.h rocparser.h rocdecode_version.h + roc_bitstream_reader.h) + +install( + FILES ${ROCPROFILER_ROCDECODE_DETAILS_HEADER_FILES} + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/rocprofiler-sdk/rocdecode/details + COMPONENT development) diff --git a/source/include/rocprofiler-sdk/rocdecode/details/roc_bitstream_reader.h b/source/include/rocprofiler-sdk/rocdecode/details/roc_bitstream_reader.h new file mode 100644 index 0000000000..91df84a2a6 --- /dev/null +++ b/source/include/rocprofiler-sdk/rocdecode/details/roc_bitstream_reader.h @@ -0,0 +1,110 @@ +/* +Copyright (c) 2024 - 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#if !defined(ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE) +# if defined __has_include +# if __has_include() +# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 1 +# else +# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 0 +# endif +# else +# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 0 +# endif +#endif + +#if ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE > 0 +# include +#else +# include +#endif + +/*! + * \file + * \brief The AMD rocBitstreamReader Library. + * + * \defgroup group_roc_bitstream_reader rocDecode Parser: AMD ROCm Video Bitstream Reader API + * \brief AMD The rocBitstreamReader is a toolkit to read picture data from bitstream files for + * decoding on AMD’s GPUs. + */ + +#if defined(__cplusplus) +extern "C" { +#endif /* __cplusplus */ + +/*********************************************************************************/ +//! HANDLE of rocBitstreamReader +//! Used in subsequent API calls after rocDecCreateBitstreamReader +/*********************************************************************************/ +typedef void* RocdecBitstreamReader; + +/************************************************************************************************/ +//! \ingroup group_roc_bitstream_reader +//! \fn rocDecStatus ROCDECAPI rocDecCreateBitstreamReader(RocdecBitstreamReader *bs_reader_handle, +//! const char *input_file_path) Create video bitstream reader object and initialize +/************************************************************************************************/ +extern rocDecStatus ROCDECAPI +rocDecCreateBitstreamReader(RocdecBitstreamReader* bs_reader_handle, const char* input_file_path); + +/************************************************************************************************/ +//! \ingroup group_roc_bitstream_reader +//! \fn rocDecStatus ROCDECAPI rocDecGetBitstreamCodecType(RocdecBitstreamReader bs_reader_handle, +//! rocDecVideoCodec *codec_type) Get the codec type of the bitstream +/************************************************************************************************/ +extern rocDecStatus ROCDECAPI +rocDecGetBitstreamCodecType(RocdecBitstreamReader bs_reader_handle, rocDecVideoCodec* codec_type); + +/************************************************************************************************/ +//! \ingroup group_roc_bitstream_reader +//! \fn rocDecStatus ROCDECAPI rocDecGetBitstreamBitDepth(RocdecBitstreamReader bs_reader_handle, +//! int *bit_depth) Get the bit depth of the bitstream +/************************************************************************************************/ +extern rocDecStatus ROCDECAPI +rocDecGetBitstreamBitDepth(RocdecBitstreamReader bs_reader_handle, int* bit_depth); + +/************************************************************************************************/ +//! \ingroup group_roc_bitstream_reader +//! \fn rocDecStatus ROCDECAPI rocDecGetBitstreamPicData(RocdecBitstreamReader bs_reader_handle, +//! uint8_t **pic_data, int *pic_size, int64_t *pts) Read one unit of picture data from the +//! bitstream. The unit can be a frame or field for AVC/HEVC, a temporal unit for AV1, or a frame +//! (including superframe) for VP9. The picture data unit is pointed by pic_data. The size of the +//! unit is specified by pic_size. The presentation time stamp, if available, is given by pts. +/************************************************************************************************/ +extern rocDecStatus ROCDECAPI +rocDecGetBitstreamPicData(RocdecBitstreamReader bs_reader_handle, + uint8_t** pic_data, + int* pic_size, + int64_t* pts); + +/************************************************************************************************/ +//! \ingroup group_roc_bitstream_reader +//! \fn rocDecStatus ROCDECAPI rocDecDestroyBitstreamReader(RocdecBitstreamReader bs_reader_handle) +//! Destroy the video parser object +/************************************************************************************************/ +extern rocDecStatus ROCDECAPI +rocDecDestroyBitstreamReader(RocdecBitstreamReader bs_reader_handle); + +#if defined(__cplusplus) +} +#endif /* __cplusplus */ diff --git a/source/include/rocprofiler-sdk/rocdecode/details/rocdecode.h b/source/include/rocprofiler-sdk/rocdecode/details/rocdecode.h new file mode 100644 index 0000000000..446fad6924 --- /dev/null +++ b/source/include/rocprofiler-sdk/rocdecode/details/rocdecode.h @@ -0,0 +1,1888 @@ +/* +Copyright (c) 2024 - 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#ifndef ROCDECAPI +# if defined(_WIN32) +# define ROCDECAPI __stdcall // for future: only linux is supported in this version +# else +# define ROCDECAPI +# endif +#endif + +#include "hip/hip_runtime.h" +#if !defined(ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE) +# if defined __has_include +# if __has_include() +# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 1 +# else +# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 0 +# endif +# else +# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 0 +# endif +#endif + +#if ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE > 0 +# include +#else +# include +#endif +/*! + * \file + * \brief The AMD rocDecode Library. + * + * \defgroup group_amd_rocdecode rocDecode: AMD ROCm Decode API + * \brief AMD The rocDecode is a toolkit to decode videos and images using a hardware-accelerated + * video decoder on AMD’s GPUs. + */ + +#if defined(__cplusplus) +extern "C" { +#endif // __cplusplus + +/*********************************************************************************/ +//! HANDLE pf rocDecDecoder +//! Used in subsequent API calls after rocDecCreateDecoder +/*********************************************************************************/ + +typedef void* rocDecDecoderHandle; + +/*********************************************************************************/ +//! \enum rocDecStatus +//! \ingroup group_amd_rocdecode +//! rocDecoder return status enums +//! These enums are used in all API calls to rocDecoder +/*********************************************************************************/ +typedef enum rocDecStatus_enum +{ + ROCDEC_DEVICE_INVALID = -1, + ROCDEC_CONTEXT_INVALID = -2, + ROCDEC_RUNTIME_ERROR = -3, + ROCDEC_OUTOF_MEMORY = -4, + ROCDEC_INVALID_PARAMETER = -5, + ROCDEC_NOT_IMPLEMENTED = -6, + ROCDEC_NOT_INITIALIZED = -7, + ROCDEC_NOT_SUPPORTED = -8, + ROCDEC_SUCCESS = 0, +} rocDecStatus; + +/*********************************************************************************/ +//! \enum rocDecodeVideoCodec +//! \ingroup group_amd_rocdecode +//! Video codec enums +//! These enums are used in ROCDECODECREATEINFO and ROCDECODEVIDDECODECAPS structures +/*********************************************************************************/ +typedef enum rocDecVideoCodec_enum +{ + rocDecVideoCodec_MPEG1 = 0, /**< MPEG1 */ + rocDecVideoCodec_MPEG2, /**< MPEG2 */ + rocDecVideoCodec_MPEG4, /**< MPEG4 */ + rocDecVideoCodec_AVC, /**< AVC/H264 */ + rocDecVideoCodec_HEVC, /**< HEVC */ + rocDecVideoCodec_AV1, /**< AV1 */ + rocDecVideoCodec_VP8, /**< VP8 */ + rocDecVideoCodec_VP9, /**< VP9 */ + rocDecVideoCodec_JPEG, /**< JPEG */ + rocDecVideoCodec_NumCodecs, /**< Max codecs */ + // Uncompressed YUV + rocDecVideoCodec_YUV420 = + (('I' << 24) | ('Y' << 16) | ('U' << 8) | ('V')), /**< Y,U,V (4:2:0) */ + rocDecVideoCodec_YV12 = + (('Y' << 24) | ('V' << 16) | ('1' << 8) | ('2')), /**< Y,V,U (4:2:0) */ + rocDecVideoCodec_NV12 = + (('N' << 24) | ('V' << 16) | ('1' << 8) | ('2')), /**< Y,UV (4:2:0) */ + rocDecVideoCodec_YUYV = + (('Y' << 24) | ('U' << 16) | ('Y' << 8) | ('V')), /**< YUYV/YUY2 (4:2:2) */ + rocDecVideoCodec_UYVY = (('U' << 24) | ('Y' << 16) | ('V' << 8) | ('Y')) /**< UYVY (4:2:2) */ +} rocDecVideoCodec; + +/*********************************************************************************/ +//! \enum rocDecVideoSurfaceFormat +//! \ingroup group_amd_rocdecode +//! Video surface format enums used for output format of decoded output +//! These enums are used in RocDecoderCreateInfo structure +/*********************************************************************************/ +typedef enum rocDecVideoSurfaceFormat_enum +{ + rocDecVideoSurfaceFormat_NV12 = + 0, /**< Semi-Planar YUV [Y plane followed by interleaved UV plane] */ + rocDecVideoSurfaceFormat_P016 = + 1, /**< 16 bit Semi-Planar YUV [Y plane followed by interleaved UV plane]. + Can be used for 10 bit(6LSB bits 0), 12 bit (4LSB bits 0) */ + rocDecVideoSurfaceFormat_YUV444 = 2, /**< Planar YUV [Y plane followed by U and V planes] */ + rocDecVideoSurfaceFormat_YUV444_16Bit = + 3, /**< 16 bit Planar YUV [Y plane followed by U and V planes]. + Can be used for 10 bit(6LSB bits 0), 12 bit (4LSB bits 0) */ + rocDecVideoSurfaceFormat_YUV420 = + 4, /**< Planar YUV [Y plane followed by U and V planes in 4:2:0 format] */ + rocDecVideoSurfaceFormat_YUV420_16Bit = + 5, /**< 16 bit Planar YUV [Y plane followed by U and V planes in ]. + Can be used for 10 bit(LSB), 12 bit (LSB) */ +} rocDecVideoSurfaceFormat; + +/**************************************************************************************************************/ +//! \enum rocDecVideoChromaFormat +//! \ingroup group_amd_rocdecode +//! Chroma format enums +//! These enums are used in ROCDCODECREATEINFO and RocdecDecodeCaps structures +/**************************************************************************************************************/ +typedef enum rocDecVideoChromaFormat_enum +{ + rocDecVideoChromaFormat_Monochrome = 0, /**< MonoChrome */ + rocDecVideoChromaFormat_420, /**< YUV 4:2:0 */ + rocDecVideoChromaFormat_422, /**< YUV 4:2:2 */ + rocDecVideoChromaFormat_444 /**< YUV 4:4:4 */ +} rocDecVideoChromaFormat; + +/*************************************************************************/ +//! \enum rocDecDecodeStatus +//! \ingroup group_amd_rocdecode +//! Decode status enums +//! These enums are used in RocdecGetDecodeStatus structure +/*************************************************************************/ +typedef enum rocDecodeStatus_enum +{ + rocDecodeStatus_Invalid = 0, // Decode status is not valid + rocDecodeStatus_InProgress = 1, // Decode is in progress + rocDecodeStatus_Success = 2, // Decode is completed without any errors + // 3 to 7 enums are reserved for future use + rocDecodeStatus_Error = 8, // Decode is completed with an error (error is not concealed) + rocDecodeStatus_Error_Concealed = + 9, // Decode is completed with an error and error is concealed + rocDecodeStatus_Displaying = 10, // Decode is completed, displaying in progress +} rocDecDecodeStatus; + +/**************************************************************************************************************/ +//! \struct RocdecDecodeCaps; +//! \ingroup group_amd_rocdecode +//! This structure is used in rocDecGetDecoderCaps API +/**************************************************************************************************************/ +typedef struct _RocdecDecodeCaps +{ + uint8_t device_id; /**< IN: the device id for which query the decode capability 0 for the first + device, 1 for the second device on the system, etc.*/ + rocDecVideoCodec codec_type; /**< IN: rocDecVideoCodec_XXX */ + rocDecVideoChromaFormat chroma_format; /**< IN: rocDecVideoChromaFormat_XXX */ + uint32_t bit_depth_minus_8; /**< IN: The Value "BitDepth minus 8" */ + uint32_t reserved_1[3]; /**< Reserved for future use - set to zero */ + uint8_t is_supported; /**< OUT: 1 if codec supported, 0 if not supported */ + uint8_t num_decoders; /**< OUT: Number of Decoders that can support IN params */ + uint16_t output_format_mask; /**< OUT: each bit represents corresponding + rocDecVideoSurfaceFormat enum */ + uint32_t max_width; /**< OUT: Max supported coded width in pixels */ + uint32_t max_height; /**< OUT: Max supported coded height in pixels */ + uint16_t min_width; /**< OUT: Min supported coded width in pixels */ + uint16_t min_height; /**< OUT: Min supported coded height in pixels */ + uint32_t reserved_2[6]; /**< Reserved for future use - set to zero */ +} RocdecDecodeCaps; + +/**************************************************************************************************************/ +//! \struct RocDecoderCreateInfo +//! \ingroup group_amd_rocdecode +//! This structure is used in rocDecCreateDecoder API +/**************************************************************************************************************/ +typedef struct _RocDecoderCreateInfo +{ + uint8_t device_id; /**< IN: the device id for which a decoder should be created + 0 for the first device, 1 for the second device on the system, etc.*/ + uint32_t width; /**< IN: Coded sequence width in pixels */ + uint32_t height; /**< IN: Coded sequence height in pixels */ + uint32_t num_decode_surfaces; /**< IN: Maximum number of internal decode surfaces */ + rocDecVideoCodec codec_type; /**< IN: rocDecVideoCodec_XXX */ + rocDecVideoChromaFormat chroma_format; /**< IN: rocDecVideoChromaFormat_XXX */ + uint32_t bit_depth_minus_8; /**< IN: The value "BitDepth minus 8" */ + uint32_t + intra_decode_only; /**< IN: Set 1 only if video has all intra frames (default value is 0). + This will optimize video memory for Intra frames only decoding. The + support is limited to specific codecs - AVC/H264, HEVC, VP9, the flag + will be ignored for codecs which are not supported. However decoding + might fail if the flag is enabled in case of supported codecs for + regular bit streams having P and/or B frames. */ + uint32_t max_width; /**< IN: Coded sequence max width in pixels used with reconfigure Decoder */ + uint32_t + max_height; /**< IN: Coded sequence max height in pixels used with reconfigure Decoder */ + struct + { + int16_t left; + int16_t top; + int16_t right; + int16_t bottom; + } display_rect; /**< IN: area of the frame that should be displayed */ + rocDecVideoSurfaceFormat output_format; /**< IN: rocDecVideoSurfaceFormat_XXX */ + uint32_t target_width; /**< IN: Post-processed output width (Should be aligned to 2) */ + uint32_t target_height; /**< IN: Post-processed output height (Should be aligned to 2) */ + uint32_t + num_output_surfaces; /**< IN: Maximum number of output surfaces simultaneously mapped */ + struct + { + int16_t left; + int16_t top; + int16_t right; + int16_t bottom; + } target_rect; /**< IN: (for future use) target rectangle in the output frame (for aspect ratio + conversion) + if a null rectangle is specified, {0,0,target_width,target_height} will + be used*/ + uint32_t reserved_2[4]; /**< Reserved for future use - set to zero */ +} RocDecoderCreateInfo; + +/*********************************************************************************************************/ +//! \struct RocdecDecodeStatus +//! \ingroup group_amd_rocdecode +//! Struct for reporting decode status. +//! This structure is used in RocdecGetDecodeStatus API. +/*********************************************************************************************************/ +typedef struct _RocdecDecodeStatus +{ + rocDecDecodeStatus decode_status; + uint32_t reserved[31]; + void* p_reserved[8]; +} RocdecDecodeStatus; + +/****************************************************/ +//! \struct RocdecReconfigureDecoderInfo +//! \ingroup group_amd_rocdecode +//! Struct for decoder reset +//! This structure is used in rocDecReconfigureDecoder() API +/****************************************************/ +typedef struct _RocdecReconfigureDecoderInfo +{ + uint32_t width; /**< IN: Coded sequence width in pixels, MUST be < = max_width defined at + RocDecoderCreateInfo */ + uint32_t height; /**< IN: Coded sequence height in pixels, MUST be < = max_height defined at + RocDecoderCreateInfo */ + uint32_t target_width; /**< IN: Post processed output width */ + uint32_t target_height; /**< IN: Post Processed output height */ + uint32_t num_decode_surfaces; /**< IN: Maximum number of internal decode surfaces */ + uint32_t reserved_1[12]; /**< Reserved for future use. Set to Zero */ + struct + { + int16_t left; + int16_t top; + int16_t right; + int16_t bottom; + } display_rect; /**< IN: area of the frame that should be displayed */ + struct + { + int16_t left; + int16_t top; + int16_t right; + int16_t bottom; + } target_rect; /**< IN: (for future use) target rectangle in the output frame (for aspect ratio + conversion) + if a null rectangle is specified, {0,0,target_width,target_height} will be + used */ + uint32_t reserved_2[11]; /**< Reserved for future use. Set to Zero */ +} RocdecReconfigureDecoderInfo; + +/*********************************************************/ +//! \struct RocdecAvcPicture +//! \ingroup group_amd_rocdecode +//! AVC/H.264 Picture Entry +//! This structure is used in RocdecAvcPicParams structure +/*********************************************************/ +typedef struct _RocdecAvcPicture +{ + int pic_idx; /**< picture index of reference frame */ + uint32_t frame_idx; /**< frame_num(int16_t-term) or LongTermFrameIdx(long-term) */ + uint32_t flags; /**< See below for definitions */ + int32_t top_field_order_cnt; /**< field order count of top field */ + int32_t bottom_field_order_cnt; /**< field order count of bottom field */ + uint32_t reserved[4]; +} RocdecAvcPicture; + +/* flags in RocdecAvcPicture could be OR of the following */ +#define RocdecAvcPicture_FLAGS_INVALID 0x00000001 +#define RocdecAvcPicture_FLAGS_TOP_FIELD 0x00000002 +#define RocdecAvcPicture_FLAGS_BOTTOM_FIELD 0x00000004 +#define RocdecAvcPicture_FLAGS_SHORT_TERM_REFERENCE 0x00000008 +#define RocdecAvcPicture_FLAGS_LONG_TERM_REFERENCE 0x00000010 +#define RocdecAvcPicture_FLAGS_NON_EXISTING 0x00000020 + +/*********************************************************/ +//! \struct RocdecHevcPicture +//! \ingroup group_amd_rocdecode +//! HEVC Picture Entry +//! This structure is used in RocdecHevcPicParams structure +/*********************************************************/ +typedef struct _RocdecHevcPicture +{ + int pic_idx; /**< reconstructed picture surface ID */ + /** \brief picture order count. + //! \ingroup group_amd_rocdecode + * in HEVC, POCs for top and bottom fields of same picture should + * take different values. + */ + int poc; + uint32_t flags; /**< See below for definitions */ + uint32_t reserved[4]; /**< reserved for future; must be zero */ +} RocdecHevcPicture; + +/* flags in RocdecHevcPicture could be OR of the following */ +#define RocdecHevcPicture_INVALID 0x00000001 +/** \brief indication of interlace scan picture. + * should take same value for all the pictures in sequence. + */ +#define RocdecHevcPicture_FIELD_PIC 0x00000002 +/** \brief polarity of the field picture. + * top field takes even lines of buffer surface. + * bottom field takes odd lines of buffer surface. + */ +#define RocdecHevcPicture_BOTTOM_FIELD 0x00000004 +/** \brief Long term reference picture */ +#define RocdecHevcPicture_LONG_TERM_REFERENCE 0x00000008 +/** + * RocdecHevcPicture_ST_CURR_BEFORE, RocdecHevcPicture_RPS_ST_CURR_AFTER + * and RocdecHevcPicture_RPS_LT_CURR of any picture in ReferenceFrames[] should + * be exclusive. No more than one of them can be set for any picture. + * Sum of NumPocStCurrBefore, NumPocStCurrAfter and NumPocLtCurr + * equals NumPocTotalCurr, which should be equal to or smaller than 8. + * Application should provide valid values for both int16_t format and long format. + * The pictures in DPB with any of these three flags turned on are referred by + * the current picture. + */ +/** \brief RefPicSetStCurrBefore of HEVC spec variable + * Number of ReferenceFrames[] entries with this bit set equals + * NumPocStCurrBefore. + */ +#define RocdecHevcPicture_RPS_ST_CURR_BEFORE 0x00000010 +/** \brief RefPicSetStCurrAfter of HEVC spec variable + * Number of ReferenceFrames[] entries with this bit set equals + * NumPocStCurrAfter. + */ +#define RocdecHevcPicture_RPS_ST_CURR_AFTER 0x00000020 +/** \brief RefPicSetLtCurr of HEVC spec variable + * Number of ReferenceFrames[] entries with this bit set equals + * NumPocLtCurr. + */ +#define RocdecHevcPicture_RPS_LT_CURR 0x00000040 + +/***********************************************************/ +//! \struct RocdecJPEGPicParams placeholder +//! \ingroup group_amd_rocdecode +//! JPEG picture parameters +//! This structure is used in RocdecPicParams structure +/***********************************************************/ +typedef struct _RocdecJPEGPicParams +{ + int reserved; +} RocdecJPEGPicParams; + +/***********************************************************/ +//! \struct RocdecMpeg2QMatrix +//! \ingroup group_amd_rocdecode +//! MPEG2 QMatrix +//! This structure is used in _RocdecMpeg2PicParams structure +/***********************************************************/ +typedef struct _RocdecMpeg2QMatrix +{ + int32_t load_intra_quantiser_matrix; + int32_t load_non_intra_quantiser_matrix; + int32_t load_chroma_intra_quantiser_matrix; + int32_t load_chroma_non_intra_quantiser_matrix; + uint8_t intra_quantiser_matrix[64]; + uint8_t non_intra_quantiser_matrix[64]; + uint8_t chroma_intra_quantiser_matrix[64]; + uint8_t chroma_non_intra_quantiser_matrix[64]; +} RocdecMpeg2QMatrix; + +/***********************************************************/ +//! \struct RocdecMpeg2PicParams +//! \ingroup group_amd_rocdecode +//! MPEG2 picture parameters +//! This structure is used in RocdecMpeg2PicParams structure +/***********************************************************/ +typedef struct _RocdecMpeg2PicParams +{ + uint16_t horizontal_size; + uint16_t vertical_size; + uint32_t forward_reference_pic; // surface_id for forward reference + uint32_t backward_reference_picture; // surface_id for backward reference + /* meanings of the following fields are the same as in the standard */ + int32_t picture_coding_type; + int32_t f_code; /* pack all four fcode into this */ + union + { + struct + { + uint32_t intra_dc_precision : 2; + uint32_t picture_structure : 2; + uint32_t top_field_first : 1; + uint32_t frame_pred_frame_dct : 1; + uint32_t concealment_motion_vectors : 1; + uint32_t q_scale_type : 1; + uint32_t intra_vlc_format : 1; + uint32_t alternate_scan : 1; + uint32_t repeat_first_field : 1; + uint32_t progressive_frame : 1; + uint32_t is_first_field : 1; // indicate whether the current field is the first field + // for field picture + } bits; + uint32_t value; + } picture_coding_extension; + + RocdecMpeg2QMatrix q_matrix; + uint32_t reserved[4]; +} RocdecMpeg2PicParams; + +/***********************************************************/ +//! \struct RocdecVc1PicParams placeholder +//! \ingroup group_amd_rocdecode +//! JPEG picture parameters +//! This structure is used in RocdecVc1PicParams structure +/***********************************************************/ +typedef struct _RocdecVc1PicParams +{ + int reserved; +} RocdecVc1PicParams; + +/***********************************************************/ +//! \struct RocdecAvcPicParams +//! \ingroup group_amd_rocdecode +//! AVC picture parameters +//! This structure is used in RocdecAvcPicParams structure +//! This structure is configured to be the same as VA-API VAPictureParameterBufferH264 structure +/***********************************************************/ +typedef struct _RocdecAvcPicParams +{ + RocdecAvcPicture curr_pic; + RocdecAvcPicture ref_frames[16]; /* in DPB */ + uint16_t picture_width_in_mbs_minus1; + uint16_t picture_height_in_mbs_minus1; + uint8_t bit_depth_luma_minus8; + uint8_t bit_depth_chroma_minus8; + uint8_t num_ref_frames; + union + { + struct + { + uint32_t chroma_format_idc : 2; + uint32_t residual_colour_transform_flag : 1; + uint32_t gaps_in_frame_num_value_allowed_flag : 1; + uint32_t frame_mbs_only_flag : 1; + uint32_t mb_adaptive_frame_field_flag : 1; + uint32_t direct_8x8_inference_flag : 1; + uint32_t MinLumaBiPredSize8x8 : 1; /* see A.3.3.2 */ + uint32_t log2_max_frame_num_minus4 : 4; + uint32_t pic_order_cnt_type : 2; + uint32_t log2_max_pic_order_cnt_lsb_minus4 : 4; + uint32_t delta_pic_order_always_zero_flag : 1; + } bits; + uint32_t value; + } seq_fields; + + // FMO/ASO + uint8_t num_slice_groups_minus1; + uint8_t slice_group_map_type; + uint16_t slice_group_change_rate_minus1; + int8_t pic_init_qp_minus26; + int8_t pic_init_qs_minus26; + int8_t chroma_qp_index_offset; + int8_t second_chroma_qp_index_offset; + union + { + struct + { + uint32_t entropy_coding_mode_flag : 1; + uint32_t weighted_pred_flag : 1; + uint32_t weighted_bipred_idc : 2; + uint32_t transform_8x8_mode_flag : 1; + uint32_t field_pic_flag : 1; + uint32_t constrained_intra_pred_flag : 1; + uint32_t pic_order_present_flag : 1; + uint32_t deblocking_filter_control_present_flag : 1; + uint32_t redundant_pic_cnt_present_flag : 1; + uint32_t reference_pic_flag : 1; /* nal_ref_idc != 0 */ + } bits; + uint32_t value; + } pic_fields; + uint16_t frame_num; + + uint32_t reserved[8]; +} RocdecAvcPicParams; + +/***********************************************************/ +//! \struct RocdecAvcSliceParams +//! \ingroup group_amd_rocdecode +//! AVC slice parameter buffer +//! This structure is configured to be the same as VA-API VASliceParameterBufferH264 structure +/***********************************************************/ +typedef struct _RocdecAvcSliceParams +{ + uint32_t slice_data_size; // slice size in bytes + uint32_t slice_data_offset; // byte offset of the current slice in the slice data buffer + uint32_t slice_data_flag; /* see VA_SLICE_DATA_FLAG_XXX defintions */ + /** + * \brief Bit offset from NAL Header Unit to the begining of slice_data(). + * + * This bit offset is relative to and includes the NAL unit byte + * and represents the number of bits parsed in the slice_header() + * after the removal of any emulation prevention bytes in + * there. However, the slice data buffer passed to the hardware is + * the original bitstream, thus including any emulation prevention + * bytes. + */ + uint16_t slice_data_bit_offset; + uint16_t first_mb_in_slice; + uint8_t slice_type; + uint8_t direct_spatial_mv_pred_flag; + uint8_t num_ref_idx_l0_active_minus1; + uint8_t num_ref_idx_l1_active_minus1; + uint8_t cabac_init_idc; + int8_t slice_qp_delta; + uint8_t disable_deblocking_filter_idc; + int8_t slice_alpha_c0_offset_div2; + int8_t slice_beta_offset_div2; + RocdecAvcPicture ref_pic_list_0[32]; // 8.2.4.2 + RocdecAvcPicture ref_pic_list_1[32]; // 8.2.4.2 + uint8_t luma_log2_weight_denom; + uint8_t chroma_log2_weight_denom; + uint8_t luma_weight_l0_flag; + int16_t luma_weight_l0[32]; + int16_t luma_offset_l0[32]; + uint8_t chroma_weight_l0_flag; + int16_t chroma_weight_l0[32][2]; + int16_t chroma_offset_l0[32][2]; + uint8_t luma_weight_l1_flag; + int16_t luma_weight_l1[32]; + int16_t luma_offset_l1[32]; + uint8_t chroma_weight_l1_flag; + int16_t chroma_weight_l1[32][2]; + int16_t chroma_offset_l1[32][2]; + uint32_t reserved[4]; +} RocdecAvcSliceParams; + +/***********************************************************/ +//! \struct RocdecAvcIQMatrix +//! \ingroup group_amd_rocdecode +//! AVC Inverse Quantization Matrix +//! This structure is configured to be the same as VA-API VAIQMatrixBufferH264 structure +/***********************************************************/ +typedef struct _RocdecAvcIQMatrix +{ + /** \brief 4x4 scaling list, in raster scan order. */ + uint8_t scaling_list_4x4[6][16]; + /** \brief 8x8 scaling list, in raster scan order. */ + uint8_t scaling_list_8x8[2][64]; + /** \brief Reserved bytes for future use, must be zero */ + uint32_t reserved[4]; +} RocdecAvcIQMatrix; + +/***********************************************************/ +//! \struct RocdecHevcPicParams +//! \ingroup group_amd_rocdecode +//! HEVC picture parameters +//! This structure is used in RocdecHevcPicParams structure +/***********************************************************/ +typedef struct _RocdecHevcPicParams +{ + RocdecHevcPicture curr_pic; + RocdecHevcPicture ref_frames[15]; /* reference frame list in DPB */ + uint16_t picture_width_in_luma_samples; + uint16_t picture_height_in_luma_samples; + union + { + struct + { + /** following flags have same syntax and semantic as those in HEVC spec */ + uint32_t chroma_format_idc : 2; + uint32_t separate_colour_plane_flag : 1; + uint32_t pcm_enabled_flag : 1; + uint32_t scaling_list_enabled_flag : 1; + uint32_t transform_skip_enabled_flag : 1; + uint32_t amp_enabled_flag : 1; + uint32_t strong_intra_smoothing_enabled_flag : 1; + uint32_t sign_data_hiding_enabled_flag : 1; + uint32_t constrained_intra_pred_flag : 1; + uint32_t cu_qp_delta_enabled_flag : 1; + uint32_t weighted_pred_flag : 1; + uint32_t weighted_bipred_flag : 1; + uint32_t transquant_bypass_enabled_flag : 1; + uint32_t tiles_enabled_flag : 1; + uint32_t entropy_coding_sync_enabled_flag : 1; + uint32_t pps_loop_filter_across_slices_enabled_flag : 1; + uint32_t loop_filter_across_tiles_enabled_flag : 1; + uint32_t pcm_loop_filter_disabled_flag : 1; + /** set based on sps_max_num_reorder_pics of current temporal layer. */ + uint32_t no_pic_reordering_flag : 1; + /** picture has no B slices */ + uint32_t no_bi_pred_flag : 1; + uint32_t reserved_bits : 11; + } bits; + uint32_t value; + } pic_fields; + + /** SPS fields: the following parameters have same syntax with those in HEVC spec */ + uint8_t sps_max_dec_pic_buffering_minus1; /**< IN: DPB size for current temporal layer */ + uint8_t bit_depth_luma_minus8; + uint8_t bit_depth_chroma_minus8; + uint8_t pcm_sample_bit_depth_luma_minus1; + uint8_t pcm_sample_bit_depth_chroma_minus1; + uint8_t log2_min_luma_coding_block_size_minus3; + uint8_t log2_diff_max_min_luma_coding_block_size; + uint8_t log2_min_transform_block_size_minus2; + uint8_t log2_diff_max_min_transform_block_size; + uint8_t log2_min_pcm_luma_coding_block_size_minus3; + uint8_t log2_diff_max_min_pcm_luma_coding_block_size; + uint8_t max_transform_hierarchy_depth_intra; + uint8_t max_transform_hierarchy_depth_inter; + int8_t init_qp_minus26; + uint8_t diff_cu_qp_delta_depth; + int8_t pps_cb_qp_offset; + int8_t pps_cr_qp_offset; + uint8_t log2_parallel_merge_level_minus2; + uint8_t num_tile_columns_minus1; + uint8_t num_tile_rows_minus1; + /** + * when uniform_spacing_flag equals 1, application should populate + * column_width_minus[], and row_height_minus1[] with approperiate values. + */ + uint16_t column_width_minus1[19]; + uint16_t row_height_minus1[21]; + + union + { + struct + { + /** following parameters have same syntax with those in HEVC spec */ + uint32_t lists_modification_present_flag : 1; + uint32_t long_term_ref_pics_present_flag : 1; + uint32_t sps_temporal_mvp_enabled_flag : 1; + uint32_t cabac_init_present_flag : 1; + uint32_t output_flag_present_flag : 1; + uint32_t dependent_slice_segments_enabled_flag : 1; + uint32_t pps_slice_chroma_qp_offsets_present_flag : 1; + uint32_t sample_adaptive_offset_enabled_flag : 1; + uint32_t deblocking_filter_override_enabled_flag : 1; + uint32_t pps_disable_deblocking_filter_flag : 1; + uint32_t slice_segment_header_extension_present_flag : 1; + + /** current picture with NUT between 16 and 21 inclusive */ + uint32_t rap_pic_flag : 1; + /** current picture with NUT between 19 and 20 inclusive */ + uint32_t idr_pic_flag : 1; + /** current picture has only intra slices */ + uint32_t intra_pic_flag : 1; + + uint32_t reserved_bits : 18; + } bits; + uint32_t value; + } slice_parsing_fields; + + /** following parameters have same syntax with those in HEVC spec */ + uint8_t log2_max_pic_order_cnt_lsb_minus4; + uint8_t num_short_term_ref_pic_sets; + uint8_t num_long_term_ref_pic_sps; + uint8_t num_ref_idx_l0_default_active_minus1; + uint8_t num_ref_idx_l1_default_active_minus1; + int8_t pps_beta_offset_div2; + int8_t pps_tc_offset_div2; + uint8_t num_extra_slice_header_bits; + /** + * \brief number of bits that structure + * short_term_ref_pic_set( num_short_term_ref_pic_sets ) takes in slice + * segment header when short_term_ref_pic_set_sps_flag equals 0. + * if short_term_ref_pic_set_sps_flag equals 1, the value should be 0. + * the bit count is calculated after emulation prevention bytes are removed + * from bit streams. + * This variable is used for accelorater to skip parsing the + * short_term_ref_pic_set( num_short_term_ref_pic_sets ) structure. + */ + uint32_t st_rps_bits; + uint32_t reserved[8]; +} RocdecHevcPicParams; + +/***********************************************************/ +//! \struct RocdecHevcSliceParams +//! \ingroup group_amd_rocdecode +//! HEVC slice parameters +//! This structure is used in RocdecPicParams structure +/***********************************************************/ +typedef struct _RocdecHevcSliceParams +{ + /** \brief Number of bytes in the slice data buffer for this slice + * counting from and including NAL unit header. + */ + uint32_t slice_data_size; + /** \brief The offset to the NAL unit header for this slice */ + uint32_t slice_data_offset; + /** \brief Slice data buffer flags. See \c VA_SLICE_DATA_FLAG_XXX. */ + uint32_t slice_data_flag; + /** + * \brief Byte offset from NAL unit header to the begining of slice_data(). + * + * This byte offset is relative to and includes the NAL unit header + * and represents the number of bytes parsed in the slice_header() + * after the removal of any emulation prevention bytes in + * there. However, the slice data buffer passed to the hardware is + * the original bitstream, thus including any emulation prevention + * bytes. + */ + uint32_t slice_data_byte_offset; + /** HEVC syntax element. */ + uint32_t slice_segment_address; + /** \brief index into ReferenceFrames[] + * ref_pic_list[0][] corresponds to RefPicList0[] of HEVC variable. + * ref_pic_list[1][] corresponds to RefPicList1[] of HEVC variable. + * value range [0..14, 0xFF], where 0xFF indicates invalid entry. + */ + uint8_t ref_pic_list[2][15]; + union + { + uint32_t value; + struct + { + /** current slice is last slice of picture. */ + uint32_t last_slice_of_pic : 1; + /** HEVC syntax element. */ + uint32_t dependent_slice_segment_flag : 1; + uint32_t slice_type : 2; + uint32_t color_plane_id : 2; + uint32_t slice_sao_luma_flag : 1; + uint32_t slice_sao_chroma_flag : 1; + uint32_t mvd_l1_zero_flag : 1; + uint32_t cabac_init_flag : 1; + uint32_t slice_temporal_mvp_enabled_flag : 1; + uint32_t slice_deblocking_filter_disabled_flag : 1; + uint32_t collocated_from_l0_flag : 1; + uint32_t slice_loop_filter_across_slices_enabled_flag : 1; + uint32_t reserved : 18; + } fields; + } long_slice_flags; + + /** HEVC syntax element. */ + uint8_t collocated_ref_idx; + uint8_t num_ref_idx_l0_active_minus1; + uint8_t num_ref_idx_l1_active_minus1; + int8_t slice_qp_delta; + int8_t slice_cb_qp_offset; + int8_t slice_cr_qp_offset; + int8_t slice_beta_offset_div2; + int8_t slice_tc_offset_div2; + uint8_t luma_log2_weight_denom; + int8_t delta_chroma_log2_weight_denom; + int8_t delta_luma_weight_l0[15]; + int8_t luma_offset_l0[15]; + int8_t delta_chroma_weight_l0[15][2]; + /** corresponds to HEVC spec variable of the same name. */ + int8_t chroma_offset_l0[15][2]; + /** HEVC syntax element. */ + int8_t delta_luma_weight_l1[15]; + int8_t luma_offset_l1[15]; + int8_t delta_chroma_weight_l1[15][2]; + /** corresponds to HEVC spec variable of the same name. */ + int8_t chroma_offset_l1[15][2]; + /** HEVC syntax element. */ + uint8_t five_minus_max_num_merge_cand; + uint16_t num_entry_point_offsets; + uint16_t entry_offset_to_subset_array; + /** \brief Number of emulation prevention bytes in slice header. */ + uint16_t slice_data_num_emu_prevn_bytes; + + uint32_t reserved[2]; +} RocdecHevcSliceParams; + +/***********************************************************/ +//! \struct RocdecHevcIQMatrix +//! \ingroup group_amd_rocdecode +//! HEVC IQMatrix +//! This structure is sent once per frame, +//! and only when scaling_list_enabled_flag = 1. +//! When sps_scaling_list_data_present_flag = 0, app still +//! needs to send in this structure with default matrix values. +//! This structure is used in RocdecHevcQMatrix structure +/***********************************************************/ +typedef struct _RocdecHevcIQMatrix +{ + /** + * \brief 4x4 scaling, + * correspongs i = 0, MatrixID is in the range of 0 to 5, + * inclusive. And j is in the range of 0 to 15, inclusive. + */ + uint8_t scaling_list_4x4[6][16]; + /** + * \brief 8x8 scaling, + * correspongs i = 1, MatrixID is in the range of 0 to 5, + * inclusive. And j is in the range of 0 to 63, inclusive. + */ + uint8_t scaling_list_8x8[6][64]; + /** + * \brief 16x16 scaling, + * correspongs i = 2, MatrixID is in the range of 0 to 5, + * inclusive. And j is in the range of 0 to 63, inclusive. + */ + uint8_t scaling_list_16x16[6][64]; + /** + * \brief 32x32 scaling, + * correspongs i = 3, MatrixID is in the range of 0 to 1, + * inclusive. And j is in the range of 0 to 63, inclusive. + */ + uint8_t scaling_list_32x32[2][64]; + /** + * \brief DC values of the 16x16 scaling lists, + * corresponds to HEVC spec syntax + * scaling_list_dc_coef_minus8[ sizeID - 2 ][ matrixID ] + 8 + * with sizeID = 2 and matrixID in the range of 0 to 5, inclusive. + */ + uint8_t scaling_list_dc_16x16[6]; + /** + * \brief DC values of the 32x32 scaling lists, + * corresponds to HEVC spec syntax + * scaling_list_dc_coef_minus8[ sizeID - 2 ][ matrixID ] + 8 + * with sizeID = 3 and matrixID in the range of 0 to 1, inclusive. + */ + uint8_t scaling_list_dc_32x32[2]; + uint32_t reserved[4]; +} RocdecHevcIQMatrix; + +/***********************************************************/ +//! \struct RocdecVp9PicParams +//! \ingroup group_amd_rocdecode +//! VP9 picture parameters +//! This structure is configured to be the same as VA-API VADecPictureParameterBufferVP9 structure. +/***********************************************************/ +typedef struct _RocdecVp9PicParams +{ + /** \brief picture width + * Picture original resolution. The value may not be multiple of 8. + */ + uint16_t frame_width; + /** \brief picture height + * Picture original resolution. The value may not be multiple of 8. + */ + uint16_t frame_height; + + /** \brief Surface indices of reference frames in DPB. + * + * Each entry of the list specifies the surface index of the picture + * that is referred by current picture or will be referred by any future + * picture. + * Application who calls this API should update this list based on the + * refreshing information from VP9 bitstream. + */ + uint32_t reference_frames[8]; + + union + { + struct + { + /** \brief flags for current picture + * same syntax and semantic as those in VP9 code + */ + uint32_t subsampling_x : 1; + uint32_t subsampling_y : 1; + uint32_t frame_type : 1; + uint32_t show_frame : 1; + uint32_t error_resilient_mode : 1; + uint32_t intra_only : 1; + uint32_t allow_high_precision_mv : 1; + uint32_t mcomp_filter_type : 3; + uint32_t frame_parallel_decoding_mode : 1; + uint32_t reset_frame_context : 2; + uint32_t refresh_frame_context : 1; + uint32_t frame_context_idx : 2; + uint32_t segmentation_enabled : 1; + + /** \brief corresponds to variable temporal_update in VP9 code. + */ + uint32_t segmentation_temporal_update : 1; + /** \brief corresponds to variable update_mb_segmentation_map + * in VP9 code. + */ + uint32_t segmentation_update_map : 1; + + /** \brief Index of reference_frames[] and points to the + * LAST reference frame. + * It corresponds to active_ref_idx[0] in VP9 code. + */ + uint32_t last_ref_frame : 3; + /** \brief Sign Bias of the LAST reference frame. + * It corresponds to ref_frame_sign_bias[LAST_FRAME] in VP9 code. + */ + uint32_t last_ref_frame_sign_bias : 1; + /** \brief Index of reference_frames[] and points to the + * GOLDERN reference frame. + * It corresponds to active_ref_idx[1] in VP9 code. + */ + uint32_t golden_ref_frame : 3; + /** \brief Sign Bias of the GOLDERN reference frame. + * Corresponds to ref_frame_sign_bias[GOLDERN_FRAME] in VP9 code. + */ + uint32_t golden_ref_frame_sign_bias : 1; + /** \brief Index of reference_frames[] and points to the + * ALTERNATE reference frame. + * Corresponds to active_ref_idx[2] in VP9 code. + */ + uint32_t alt_ref_frame : 3; + /** \brief Sign Bias of the ALTERNATE reference frame. + * Corresponds to ref_frame_sign_bias[ALTREF_FRAME] in VP9 code. + */ + uint32_t alt_ref_frame_sign_bias : 1; + /** \brief Lossless Mode + * LosslessFlag = base_qindex == 0 && + * y_dc_delta_q == 0 && + * uv_dc_delta_q == 0 && + * uv_ac_delta_q == 0; + * Where base_qindex, y_dc_delta_q, uv_dc_delta_q and uv_ac_delta_q + * are all variables in VP9 code. + */ + uint32_t lossless_flag : 1; + } bits; + uint32_t value; + } pic_fields; + + /* following parameters have same syntax with those in VP9 code */ + uint8_t filter_level; + uint8_t sharpness_level; + + /** \brief number of tile rows specified by (1 << log2_tile_rows). + * It corresponds the variable with same name in VP9 code. + */ + uint8_t log2_tile_rows; + /** \brief number of tile columns specified by (1 << log2_tile_columns). + * It corresponds the variable with same name in VP9 code. + */ + uint8_t log2_tile_columns; + /** \brief Number of bytes taken up by the uncompressed frame header, + * which corresponds to byte length of function + * read_uncompressed_header() in VP9 code. + * Specifically, it is the byte count from bit stream buffer start to + * the last byte of uncompressed frame header. + * If there are other meta data in the buffer before uncompressed header, + * its size should be also included here. + */ + uint8_t frame_header_length_in_bytes; + + /** \brief The byte count of compressed header the bitstream buffer, + * which corresponds to syntax first_partition_size in code. + */ + uint16_t first_partition_size; + + /** These values are segment probabilities with same names in VP9 + * function setup_segmentation(). They should be parsed directly from + * bitstream by application. + */ + uint8_t mb_segment_tree_probs[7]; + uint8_t segment_pred_probs[3]; + + /** \brief VP9 Profile definition + * value range [0..3]. + */ + uint8_t profile; + + /** \brief VP9 bit depth per sample + * same for both luma and chroma samples. + */ + uint8_t bit_depth; + + /** \brief Reserved bytes for future use, must be zero */ + uint32_t va_reserved[8]; + +} RocdecVp9PicParams; + +/** + * \brief VP9 Segmentation Parameter Data Structure + * This structure is configured to be the same as VA-API VASegmentParameterVP9 structure. + */ +typedef struct _RocdecVp9SegmentParameter +{ + union + { + struct + { + /** \brief Indicates if per segment reference frame indicator + * is enabled. + * Corresponding to variable feature_enabled when + * j == SEG_LVL_REF_FRAME in function setup_segmentation() VP9 code. + */ + uint16_t segment_reference_enabled : 1; + /** \brief Specifies per segment reference indication. + * 0: reserved + * 1: Last ref + * 2: golden + * 3: altref + * Value can be derived from variable data when + * j == SEG_LVL_REF_FRAME in function setup_segmentation() VP9 code. + */ + uint16_t segment_reference : 2; + /** \brief Indicates if per segment skip feature is enabled. + * Corresponding to variable feature_enabled when + * j == SEG_LVL_SKIP in function setup_segmentation() VP9 code. + */ + uint16_t segment_reference_skipped : 1; + } fields; + uint16_t value; + } segment_flags; + + /** \brief Specifies the filter level information per segment. + * The value corresponds to variable lfi->lvl[seg][ref][mode] in VP9 code, + * where m is [ref], and n is [mode] in FilterLevel[m][n]. + */ + uint8_t filter_level[4][2]; + /** \brief Specifies per segment Luma AC quantization scale. + * Corresponding to y_dequant[qindex][1] in vp9_mb_init_quantizer() + * function of VP9 code. + */ + int16_t luma_ac_quant_scale; + /** \brief Specifies per segment Luma DC quantization scale. + * Corresponding to y_dequant[qindex][0] in vp9_mb_init_quantizer() + * function of VP9 code. + */ + int16_t luma_dc_quant_scale; + /** \brief Specifies per segment Chroma AC quantization scale. + * Corresponding to uv_dequant[qindex][1] in vp9_mb_init_quantizer() + * function of VP9 code. + */ + int16_t chroma_ac_quant_scale; + /** \brief Specifies per segment Chroma DC quantization scale. + * Corresponding to uv_dequant[qindex][0] in vp9_mb_init_quantizer() + * function of VP9 code. + */ + int16_t chroma_dc_quant_scale; + + /** \brief Reserved bytes for future use, must be zero */ + uint32_t va_reserved[4]; + +} RocdecVp9SegmentParameter; + +/***********************************************************/ +//! \struct RocdecVp9SliceParams +//! \ingroup group_amd_rocdecode +//! VP9 slice parameter buffer +//! This structure is configured to be the same as VA-API VASliceParameterBufferVP9 structure. +/***********************************************************/ +typedef struct _RocdecVp9SliceParams +{ + /** \brief The byte count of current frame in the bitstream buffer, + * starting from first byte of the buffer. + * It uses the name slice_data_size to be consitent with other codec, + * but actually means frame_data_size. + */ + uint32_t slice_data_size; + /** + * offset to the first byte of partition data (control partition) + */ + uint32_t slice_data_offset; + /** + * see VA_SLICE_DATA_FLAG_XXX definitions + */ + uint32_t slice_data_flag; + + /** + * \brief per segment information + */ + RocdecVp9SegmentParameter seg_param[8]; + + /** \brief Reserved bytes for future use, must be zero */ + uint32_t va_reserved[4]; + +} RocdecVp9SliceParams; + +/** \brief Segmentation Information for AV1 + */ +typedef struct _RocdecAv1SegmentationStruct +{ + union + { + struct + { + /** Indicates whether segmentation map related syntax elements + * are present or not for current frame. If equal to 0, + * the segmentation map related syntax elements are + * not present for the current frame and the control flags of + * segmentation map related tables feature_data[][], and + * feature_mask[] are not valid and shall be ignored by accelerator. + */ + uint32_t enabled : 1; + /** Value 1 indicates that the segmentation map are updated + * during the decoding of this frame. + * Value 0 means that the segmentation map from the previous + * frame is used. + */ + uint32_t update_map : 1; + /** Value 1 indicates that the updates to the segmentation map + * are coded relative to the existing segmentation map. + * Value 0 indicates that the new segmentation map is coded + * without reference to the existing segmentation map. + */ + uint32_t temporal_update : 1; + /** Value 1 indicates that new parameters are about to be + * specified for each segment. + * Value 0 indicates that the segmentation parameters + * should keep their existing values. + */ + uint32_t update_data : 1; + + /** \brief Reserved bytes for future use, must be zero */ + uint32_t reserved : 28; + } bits; + uint32_t value; + } segment_info_fields; + + /** \brief Segmentation parameters for current frame. + * feature_data[segment_id][feature_id] + * where segment_id has value range [0..7] indicating the segment id. + * and feature_id is defined as + typedef enum { + SEG_LVL_ALT_Q, // Use alternate Quantizer .... + SEG_LVL_ALT_LF_Y_V, // Use alternate loop filter value on y plane vertical + SEG_LVL_ALT_LF_Y_H, // Use alternate loop filter value on y plane horizontal + SEG_LVL_ALT_LF_U, // Use alternate loop filter value on u plane + SEG_LVL_ALT_LF_V, // Use alternate loop filter value on v plane + SEG_LVL_REF_FRAME, // Optional Segment reference frame + SEG_LVL_SKIP, // Optional Segment (0,0) + skip mode + SEG_LVL_GLOBALMV, + SEG_LVL_MAX + } SEG_LVL_FEATURES; + * feature_data[][] is equivalent to variable FeatureData[][] in spec, + * which is after clip3() operation. + * Clip3(x, y, z) = (z < x)? x : ((z > y)? y : z); + * The limit is defined in Segmentation_Feature_Max[ SEG_LVL_MAX ] = { + * 255, MAX_LOOP_FILTER, MAX_LOOP_FILTER, MAX_LOOP_FILTER, MAX_LOOP_FILTER, 7, 0, 0 } + */ + int16_t feature_data[8][8]; + + /** \brief indicates if a feature is enabled or not. + * Each bit field itself is the feature_id. Index is segment_id. + * feature_mask[segment_id] & (1 << feature_id) equal to 1 specify that the feature of + * feature_id for segment of segment_id is enabled, otherwise disabled. + */ + uint8_t feature_mask[8]; + + /** \brief Reserved bytes for future use, must be zero */ + uint32_t reserved[4]; +} RocdecAv1SegmentationStruct; + +/** \brief Film Grain Information for AV1 + */ +typedef struct _RocdecAv1FilmGrainStruct +{ + union + { + struct + { + /** \brief Specify whether or not film grain is applied on current frame. + * If set to 0, all the rest parameters should be set to zero + * and ignored. + */ + uint32_t apply_grain : 1; + uint32_t chroma_scaling_from_luma : 1; + uint32_t grain_scaling_minus_8 : 2; + uint32_t ar_coeff_lag : 2; + uint32_t ar_coeff_shift_minus_6 : 2; + uint32_t grain_scale_shift : 2; + uint32_t overlap_flag : 1; + uint32_t clip_to_restricted_range : 1; + /** \brief Reserved bytes for future use, must be zero */ + uint32_t reserved : 20; + } bits; + uint32_t value; + } film_grain_info_fields; + + uint16_t grain_seed; + /* value range [0..14] */ + uint8_t num_y_points; + uint8_t point_y_value[14]; + uint8_t point_y_scaling[14]; + /* value range [0..10] */ + uint8_t num_cb_points; + uint8_t point_cb_value[10]; + uint8_t point_cb_scaling[10]; + /* value range [0..10] */ + uint8_t num_cr_points; + uint8_t point_cr_value[10]; + uint8_t point_cr_scaling[10]; + /* value range [-128..127] */ + int8_t ar_coeffs_y[24]; + int8_t ar_coeffs_cb[25]; + int8_t ar_coeffs_cr[25]; + uint8_t cb_mult; + uint8_t cb_luma_mult; + uint16_t cb_offset; + uint8_t cr_mult; + uint8_t cr_luma_mult; + uint16_t cr_offset; + + /** \brief Reserved bytes for future use, must be zero */ + uint32_t reserved[4]; +} RocdecAv1FilmGrainStruct; + +typedef enum +{ + /** identity transformation, 0-parameter */ + RocdecAv1TransformationIdentity = 0, + /** translational motion, 2-parameter */ + RocdecAv1TransformationTranslation = 1, + /** simplified affine with rotation + zoom only, 4-parameter */ + RocdecAv1TransformationRotzoom = 2, + /** affine, 6-parameter */ + RocdecAv1TransformationAffine = 3, + /** transformation count */ + RocdecAv1TransformationCount +} RocdecAv1TransformationType; + +typedef struct _RocdecAv1WarpedMotionParams +{ + /** \brief Specify the type of warped motion */ + RocdecAv1TransformationType wmtype; + + /** \brief Specify warp motion parameters + * wm.wmmat[] corresponds to gm_params[][] in spec. + * Details in AV1 spec section 5.9.24 or refer to libaom code + * https://aomedia.googlesource.com/aom/+/refs/heads/master/av1/decoder/decodeframe.c + */ + int32_t wmmat[8]; + + /* valid or invalid on affine set */ + uint8_t invalid; + + /** \brief Reserved bytes for future use, must be zero */ + uint32_t reserved[4]; +} RocdecAv1WarpedMotionParams; + +/***********************************************************/ +//! \struct RocdecAv1PicParams +//! \ingroup group_amd_rocdecode +//! AV1 picture parameters +//! This structure is used in RocdecAv1PicParams structure +/***********************************************************/ +typedef struct _RocdecAV1PicParams +{ + /** \brief sequence level information + */ + + /** \brief AV1 bit stream profile + */ + uint8_t profile; + + uint8_t order_hint_bits_minus_1; + + /** \brief bit depth index + * value range [0..2] + * 0 - bit depth 8; + * 1 - bit depth 10; + * 2 - bit depth 12; + */ + uint8_t bit_depth_idx; + + /** \brief corresponds to AV1 spec variable of the same name. */ + uint8_t matrix_coefficients; + + union + { + struct + { + uint32_t still_picture : 1; + uint32_t use_128x128_superblock : 1; + uint32_t enable_filter_intra : 1; + uint32_t enable_intra_edge_filter : 1; + + /** read_compound_tools */ + uint32_t enable_interintra_compound : 1; + uint32_t enable_masked_compound : 1; + + uint32_t enable_dual_filter : 1; + uint32_t enable_order_hint : 1; + uint32_t enable_jnt_comp : 1; + uint32_t enable_cdef : 1; + uint32_t mono_chrome : 1; + uint32_t color_range : 1; + uint32_t subsampling_x : 1; + uint32_t subsampling_y : 1; + uint32_t chroma_sample_position : 1; + uint32_t film_grain_params_present : 1; + /** \brief Reserved bytes for future use, must be zero */ + uint32_t reserved : 16; + } fields; + uint32_t value; + } seq_info_fields; + + /** \brief Picture level information + */ + + /** \brief buffer description of decoded current picture + */ + int current_frame; + + /** \brief display buffer of current picture + * Used for film grain applied decoded picture. + * Valid only when apply_grain equals 1. + */ + int current_display_picture; + + /** \brief number of anchor frames for large scale tile + * This parameter gives the number of entries of anchor_frames_list[]. + * Value range [0..128]. + */ + uint8_t anchor_frames_num; + + /** \brief anchor frame list for large scale tile + * For large scale tile applications, the anchor frames could come from + * previously decoded frames in current sequence (aka. internal), or + * from external sources. + * For external anchor frames, application should call API + * vaCreateBuffer() to generate frame buffers and populate them with + * pixel frames. And this process may happen multiple times. + * The array anchor_frames_list[] is used to register all the available + * anchor frames from both external and internal, up to the current + * frame instance. If a previously registerred anchor frame is no longer + * needed, it should be removed from the list. But it does not prevent + * applications from relacing the frame buffer with new anchor frames. + * Please note that the internal anchor frames may not still be present + * in the current DPB buffer. But if it is in the anchor_frames_list[], + * it should not be replaced with other frames or removed from memory + * until it is not shown in the list. + * This number of entries of the list is given by parameter anchor_frames_num. + */ + int* anchor_frames_list; + + /** \brief Picture resolution minus 1 + * Picture original resolution. If SuperRes is enabled, + * this is the upscaled resolution. + * value range [0..65535] + */ + uint16_t frame_width_minus1; + uint16_t frame_height_minus1; + + /** \brief Output frame buffer size in unit of tiles + * Valid only when large_scale_tile equals 1. + * value range [0..65535] + */ + uint16_t output_frame_width_in_tiles_minus_1; + uint16_t output_frame_height_in_tiles_minus_1; + + /** \brief Surface indices of reference frames in DPB. + * + * Contains a list of uncompressed frame buffer surface indices as references. + * Application needs to make sure all the entries point to valid frames + * except for intra frames by checking ref_frame_id[]. If missing frame + * is identified, application may choose to perform error recovery by + * pointing problematic index to an alternative frame buffer. + * Driver is not responsible to validate reference frames' id. + */ + int ref_frame_map[8]; + + /** \brief Reference frame indices. + * + * Contains a list of indices into ref_frame_map[8]. + * It specifies the reference frame correspondence. + * The indices of the array are defined as [LAST_FRAME – LAST_FRAME, + * LAST2_FRAME – LAST_FRAME, …, ALTREF_FRAME – LAST_FRAME], where each + * symbol is defined as: + * enum{INTRA_FRAME = 0, LAST_FRAME, LAST2_FRAME, LAST3_FRAME, GOLDEN_FRAME, + * BWDREF_FRAME, ALTREF2_FRAME, ALTREF_FRAME}; + */ + uint8_t ref_frame_idx[7]; + + /** \brief primary reference frame index + * Index into ref_frame_idx[], specifying which reference frame contains + * propagated info that should be loaded at the start of the frame. + * When value equals PRIMARY_REF_NONE (7), it indicates there is + * no primary reference frame. + * value range [0..7] + */ + uint8_t primary_ref_frame; + uint8_t order_hint; + + RocdecAv1SegmentationStruct seg_info; + RocdecAv1FilmGrainStruct film_grain_info; + + /** \brief tile structure + * When uniform_tile_spacing_flag == 1, width_in_sbs_minus_1[] and + * height_in_sbs_minus_1[] should be ignored, which will be generated + * by driver based on tile_cols and tile_rows. + */ + uint8_t tile_cols; + uint8_t tile_rows; + + /* The width/height of a tile minus 1 in units of superblocks. Though the + * maximum number of tiles is 64, since ones of the last tile are computed + * from ones of the other tiles and frame_width/height, they are not + * necessarily specified. + */ + uint16_t width_in_sbs_minus_1[63]; + uint16_t height_in_sbs_minus_1[63]; + + /** \brief number of tiles minus 1 in large scale tile list + * Same as AV1 semantic element. + * Valid only when large_scale_tiles == 1. + */ + uint16_t tile_count_minus_1; + + /* specify the tile index for context updating */ + uint16_t context_update_tile_id; + + union + { + /** \brief flags for current picture + * same syntax and semantic as those in AV1 code + */ + struct + { + /** \brief Frame Type: + * 0: KEY_FRAME; + * 1: INTER_FRAME; + * 2: INTRA_ONLY_FRAME; + * 3: SWITCH_FRAME + * For SWITCH_FRAME, application shall set error_resilient_mode = 1, + * refresh_frame_flags, etc. appropriately. And driver will convert it + * to INTER_FRAME. + */ + uint32_t frame_type : 2; + uint32_t show_frame : 1; + uint32_t showable_frame : 1; + uint32_t error_resilient_mode : 1; + uint32_t disable_cdf_update : 1; + uint32_t allow_screen_content_tools : 1; + uint32_t force_integer_mv : 1; + uint32_t allow_intrabc : 1; + uint32_t use_superres : 1; + uint32_t allow_high_precision_mv : 1; + uint32_t is_motion_mode_switchable : 1; + uint32_t use_ref_frame_mvs : 1; + /* disable_frame_end_update_cdf is coded as refresh_frame_context. */ + uint32_t disable_frame_end_update_cdf : 1; + uint32_t uniform_tile_spacing_flag : 1; + uint32_t allow_warped_motion : 1; + /** \brief indicate if current frame in large scale tile mode */ + uint32_t large_scale_tile : 1; + + /** \brief Reserved bytes for future use, must be zero */ + uint32_t reserved : 15; + } bits; + uint32_t value; + } pic_info_fields; + + /** \brief Supper resolution scale denominator. + * When use_superres=1, superres_scale_denominator must be in the range [9..16]. + * When use_superres=0, superres_scale_denominator must be 8. + */ + uint8_t superres_scale_denominator; + + /** \brief Interpolation filter. + * value range [0..4] + */ + uint8_t interp_filter; + + /** \brief luma loop filter levels. + * value range [0..63]. + */ + uint8_t filter_level[2]; + + /** \brief chroma loop filter levels. + * value range [0..63]. + */ + uint8_t filter_level_u; + uint8_t filter_level_v; + + union + { + struct + { + /** \brief flags for reference pictures + * same syntax and semantic as those in AV1 code + */ + uint8_t sharpness_level : 3; + uint8_t mode_ref_delta_enabled : 1; + uint8_t mode_ref_delta_update : 1; + + /** \brief Reserved bytes for future use, must be zero */ + uint8_t reserved : 3; + } bits; + uint8_t value; + } loop_filter_info_fields; + + /** \brief The adjustment needed for the filter level based on + * the chosen reference frame. + * value range [-64..63]. + */ + int8_t ref_deltas[8]; + + /** \brief The adjustment needed for the filter level based on + * the chosen mode. + * value range [-64..63]. + */ + int8_t mode_deltas[2]; + + /** \brief quantization + */ + /** \brief Y AC index + * value range [0..255] + */ + uint8_t base_qindex; + /** \brief Y DC delta from Y AC + * value range [-64..63] + */ + int8_t y_dc_delta_q; + /** \brief U DC delta from Y AC + * value range [-64..63] + */ + int8_t u_dc_delta_q; + /** \brief U AC delta from Y AC + * value range [-64..63] + */ + int8_t u_ac_delta_q; + /** \brief V DC delta from Y AC + * value range [-64..63] + */ + int8_t v_dc_delta_q; + /** \brief V AC delta from Y AC + * value range [-64..63] + */ + int8_t v_ac_delta_q; + + /** \brief quantization_matrix + */ + union + { + struct + { + uint16_t using_qmatrix : 1; + /** \brief qm level + * value range [0..15] + * Invalid if using_qmatrix equals 0. + */ + uint16_t qm_y : 4; + uint16_t qm_u : 4; + uint16_t qm_v : 4; + + /** \brief Reserved bytes for future use, must be zero */ + uint16_t reserved : 3; + } bits; + uint16_t value; + } qmatrix_fields; + + union + { + struct + { + /** \brief delta_q parameters + */ + uint32_t delta_q_present_flag : 1; + uint32_t log2_delta_q_res : 2; + + /** \brief delta_lf parameters + */ + uint32_t delta_lf_present_flag : 1; + uint32_t log2_delta_lf_res : 2; + + /** \brief CONFIG_LOOPFILTER_LEVEL + */ + uint32_t delta_lf_multi : 1; + + /** \brief read_tx_mode + * value range [0..2] + */ + uint32_t tx_mode : 2; + + /* AV1 frame reference mode semantic */ + uint32_t reference_select : 1; + + uint32_t reduced_tx_set_used : 1; + + uint32_t skip_mode_present : 1; + + /** \brief Reserved bytes for future use, must be zero */ + uint32_t reserved : 20; + } bits; + uint32_t value; + } mode_control_fields; + + /** \brief CDEF parameters + */ + /* value range [0..3] */ + uint8_t cdef_damping_minus_3; + /* value range [0..3] */ + uint8_t cdef_bits; + + /** Encode cdef strength: + * + * The cdef_y_strengths[] and cdef_uv_strengths[] are expected to be packed + * with both primary and secondary strength. The secondary strength is + * given in the lower two bits and the primary strength is given in the next + * four bits. + * + * cdef_y_strengths[] & cdef_uv_strengths[] should be derived as: + * (cdef_y_strengths[]) = (cdef_y_pri_strength[] << 2) | (cdef_y_sec_strength[] & 0x03) + * (cdef_uv_strengths[]) = (cdef_uv_pri_strength[] << 2) | (cdef_uv_sec_strength[] & 0x03) + * In which, + * cdef_y_pri_strength[]/cdef_y_sec_strength[]/cdef_uv_pri_strength[]/cdef_uv_sec_strength[] are + * variables defined in AV1 Spec 5.9.19. The cdef_y_strengths[] & cdef_uv_strengths[] are + * corresponding to LIBAOM variables cm->cdef_strengths[] & cm->cdef_uv_strengths[] + * respectively. + */ + /* value range [0..63] */ + uint8_t cdef_y_strengths[8]; + /* value range [0..63] */ + uint8_t cdef_uv_strengths[8]; + + /** \brief loop restoration parameters + */ + union + { + struct + { + uint16_t yframe_restoration_type : 2; + uint16_t cbframe_restoration_type : 2; + uint16_t crframe_restoration_type : 2; + uint16_t lr_unit_shift : 2; + uint16_t lr_uv_shift : 1; + + /** \brief Reserved bytes for future use, must be zero */ + uint16_t reserved : 7; + } bits; + uint16_t value; + } loop_restoration_fields; + + /** \brief global motion + */ + RocdecAv1WarpedMotionParams wm[7]; + + /**@}*/ + + /** \brief Reserved bytes for future use, must be zero */ + uint32_t reserved[8]; +} RocdecAv1PicParams; + +/***********************************************************/ +//! \struct RocdecAv1SliceParams +//! \ingroup group_amd_rocdecode +//! AV1 slice parameter buffer +//! This structure is configured to be the same as VA-API VASliceParameterBufferAV1 structure. +//! This structure conveys parameters related to bit stream data and should be sent once per tile. +//! It uses the name RocdecAv1SliceParams to be consistent with other codec, but actually means +//! RocdecTileParameterAV1. Slice data buffer of VASliceDataBufferType is used to send the +//! bitstream. +/***********************************************************/ +typedef struct _RocdecAv1SliceParams +{ + /** \brief The byte count of current tile in the bitstream buffer, + * starting from first byte of the buffer. + * It uses the name slice_data_size to be consistent with other codec, + * but actually means tile_data_size. + */ + uint32_t slice_data_size; + /** + * offset to the first byte of the data buffer. + */ + uint32_t slice_data_offset; + /** + * see VA_SLICE_DATA_FLAG_XXX definitions + */ + uint32_t slice_data_flag; + + uint16_t tile_row; + uint16_t tile_column; + + uint16_t tg_start; + uint16_t tg_end; + + /** \brief anchor frame index for large scale tile. + * index into an array AnchorFrames of the frames that the tile uses + * for prediction. + * valid only when large_scale_tile equals 1. + */ + uint8_t anchor_frame_idx; + + /** \brief tile index in the tile list. + * Valid only when large_scale_tile is enabled. + * Driver uses this field to decide the tile output location. + */ + uint16_t tile_idx_in_tile_list; + + /** \brief Reserved bytes for future use, must be zero */ + uint32_t reserved[4]; +} RocdecAv1SliceParams; + +/******************************************************************************************/ +//! \struct _RocdecPicParams +//! \ingroup group_amd_rocdecode +//! Picture parameters for decoding +//! This structure is used in rocDecDecodePicture API +//! IN for rocDecDecodePicture +/******************************************************************************************/ +typedef struct _RocdecPicParams +{ + int pic_width; /**< IN: Coded frame width */ + int pic_height; /**< IN: Coded frame height */ + int curr_pic_idx; /**< IN: Output index of the current picture */ + int field_pic_flag; /**< IN: 0=frame picture, 1=field picture */ + int bottom_field_flag; /**< IN: 0=top field, 1=bottom field (ignored if field_pic_flag=0) */ + int second_field; /**< IN: Second field of a complementary field pair */ + // Bitstream data + uint32_t bitstream_data_len; /**< IN: Number of bytes in bitstream data buffer */ + const uint8_t* bitstream_data; /**< IN: Ptr to bitstream data for this picture (slice-layer) */ + uint32_t num_slices; /**< IN: Number of slices in this picture */ + + int ref_pic_flag; /**< IN: This picture is a reference picture */ + int intra_pic_flag; /**< IN: This picture is entirely intra coded */ + uint32_t reserved[30]; /**< Reserved for future use */ + + // IN: Codec-specific data + union + { + RocdecMpeg2PicParams mpeg2; /**< Also used for MPEG-1 */ + RocdecAvcPicParams avc; + RocdecHevcPicParams hevc; + RocdecVc1PicParams vc1; + RocdecJPEGPicParams jpeg; + RocdecVp9PicParams vp9; + RocdecAv1PicParams av1; + uint32_t codec_reserved[256]; + } pic_params; + + /*! \brief Variable size array. The user should allocate one slice param struct for each slice. + */ + union + { + // Todo: Add slice params defines for other codecs. + RocdecAvcSliceParams* avc; + RocdecHevcSliceParams* hevc; + RocdecVp9SliceParams* vp9; + RocdecAv1SliceParams* av1; + } slice_params; + + union + { + // Todo: Added IQ matrix defines for other codecs. + RocdecAvcIQMatrix avc; + RocdecHevcIQMatrix hevc; + } iq_matrix; +} RocdecPicParams; + +/******************************************************/ +//! \struct RocdecProcParams +//! \ingroup group_amd_rocdecode +//! Picture parameters for postprocessing +//! This structure is used in rocDecGetVideoFrame API +/******************************************************/ +typedef struct _RocdecProcParams +{ + int progressive_frame; /**< IN: Input is progressive (deinterlace_mode will be ignored) */ + int top_field_first; /**< IN: Input frame is top field first (1st field is top, 2nd field is + bottom) */ + uint32_t reserved_flags[2]; /**< Reserved for future use (set to zero) */ + + // The fields below are used for raw YUV input + uint64_t raw_input_dptr; /**< IN: Input HIP device ptr for raw YUV extensions */ + uint32_t raw_input_pitch; /**< IN: pitch in bytes of raw YUV input (should be aligned + appropriately) */ + uint32_t raw_input_format; /**< IN: Input YUV format (rocDecVideoCodec_enum) */ + uint64_t raw_output_dptr; /**< IN: Output HIP device mem ptr for raw YUV extensions */ + uint32_t raw_output_pitch; /**< IN: pitch in bytes of raw YUV output (should be aligned + appropriately) */ + uint32_t raw_output_format; /**< IN: Output YUV format (rocDecVideoCodec_enum) */ + uint32_t reserved[16]; /**< Reserved for future use (set to zero) */ +} RocdecProcParams; + +/*****************************************************************************************************/ +//! \fn rocDecStatus ROCDECAPI rocDecCreateDecoder(rocDecDecoderHandle *decoder_handle, +//! RocDecoderCreateInfo *decoder_create_info) \ingroup group_amd_rocdecode Create the decoder +//! object based on decoder_create_info. A handle to the created decoder is returned +/*****************************************************************************************************/ +extern rocDecStatus ROCDECAPI +rocDecCreateDecoder(rocDecDecoderHandle* decoder_handle, RocDecoderCreateInfo* decoder_create_info); + +/*****************************************************************************************************/ +//! \fn rocDecStatus ROCDECAPI rocDecDestroyDecoder(rocDecDecoderHandle decoder_handle) +//! \ingroup group_amd_rocdecode +//! Destroy the decoder object +/*****************************************************************************************************/ +extern rocDecStatus ROCDECAPI +rocDecDestroyDecoder(rocDecDecoderHandle decoder_handle); + +/**********************************************************************************************************************/ +//! \fn rocDecStatus ROCDECAPI rocdecGetDecoderCaps(RocdecDecodeCaps *decode_caps) +//! \ingroup group_amd_rocdecode +//! Queries decode capabilities of AMD's VCN decoder based on codec type, chroma_format and +//! BitDepthMinus8 parameters. +//! 1. Application fills IN parameters codec_type, chroma_format and BitDepthMinus8 of +//! RocdecDecodeCaps structure +//! 2. On calling rocdecGetDecoderCaps, driver fills OUT parameters (for GPU device) if the IN +//! parameters are supported +//! If IN parameters passed to the driver are not supported by AMD-VCN-HW, then all OUT params +//! are set to 0. +/**********************************************************************************************************************/ +extern rocDecStatus ROCDECAPI +rocDecGetDecoderCaps(RocdecDecodeCaps* decode_caps); + +/*****************************************************************************************************/ +//! \fn rocDecStatus ROCDECAPI rocDecDecodeFrame(rocDecDecoderHandle decoder_handle, RocdecPicParams +//! *pic_params) \ingroup group_amd_rocdecode Decodes a single picture Submits the frame for HW +//! decoding +/*****************************************************************************************************/ +extern rocDecStatus ROCDECAPI +rocDecDecodeFrame(rocDecDecoderHandle decoder_handle, RocdecPicParams* pic_params); + +/************************************************************************************************************/ +//! \fn rocDecStatus ROCDECAPI rocDecGetDecodeStatus(rocDecDecoderHandle decoder_handle, int +//! pic_idx, RocdecDecodeStatus* decode_status); \ingroup group_amd_rocdecode Get the decode status +//! for frame corresponding to nPicIdx API is currently supported for HEVC, AVC/H264 and JPEG +//! codecs. API returns ROCDEC_NOT_SUPPORTED error code for unsupported GPU or codec. +/************************************************************************************************************/ +extern rocDecStatus ROCDECAPI +rocDecGetDecodeStatus(rocDecDecoderHandle decoder_handle, + int pic_idx, + RocdecDecodeStatus* decode_status); + +/*********************************************************************************************************/ +//! \fn rocDecStatus ROCDECAPI rocDecReconfigureDecoder(rocDecDecoderHandle decoder_handle, +//! RocdecReconfigureDecoderInfo *reconfig_params) \ingroup group_amd_rocdecode Used to reuse single +//! decoder for multiple clips. Currently supports resolution change, resize params params, target +//! area params change for same codec. Must be called during +//! RocdecParserParams::pfn_sequence_callback +/*********************************************************************************************************/ +extern rocDecStatus ROCDECAPI +rocDecReconfigureDecoder(rocDecDecoderHandle decoder_handle, + RocdecReconfigureDecoderInfo* reconfig_params); + +/************************************************************************************************************************/ +//! \fn extern rocDecStatus ROCDECAPI rocDecGetVideoFrame(rocDecDecoderHandle decoder_handle, int +//! pic_idx, +//! uint32_t *dev_mem_ptr, uint32_t *horizontal_pitch, +//! RocdecProcParams *vid_postproc_params); +//! \ingroup group_amd_rocdecode +//! Post-process and map video frame corresponding to pic_idx for use in HIP. Returns HIP device +//! pointer and associated pitch(horizontal stride) of the video frame. Returns device memory +//! pointers and pitch for each plane (Y, U and V) seperately horizontal_pitch is a pointer to an +//! unsigned 32-bit integer array of size 3. +/************************************************************************************************************************/ +extern rocDecStatus ROCDECAPI +rocDecGetVideoFrame(rocDecDecoderHandle decoder_handle, + int pic_idx, + void* dev_mem_ptr[3], + uint32_t* horizontal_pitch, + RocdecProcParams* vid_postproc_params); + +/*****************************************************************************************************/ +//! \fn const char* ROCDECAPI rocDecGetErrorName(rocDecStatus rocdec_status) +//! \ingroup group_amd_rocdecode +//! Return name of the specified error code in text form. +/*****************************************************************************************************/ +extern const char* ROCDECAPI +rocDecGetErrorName(rocDecStatus rocdec_status); + +#ifdef __cplusplus +} +#endif diff --git a/source/include/rocprofiler-sdk/rocdecode/details/rocdecode_api_trace.h b/source/include/rocprofiler-sdk/rocdecode/details/rocdecode_api_trace.h new file mode 100644 index 0000000000..a71a57cd43 --- /dev/null +++ b/source/include/rocprofiler-sdk/rocdecode/details/rocdecode_api_trace.h @@ -0,0 +1,157 @@ +/* +Copyright (c) 2024 - 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#pragma once + +#if !defined(ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE) +# if defined __has_include +# if __has_include() && __has_include() && __has_include() +# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 1 +# else +# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 0 +# endif +# else +# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 0 +# endif +#endif + +#if ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE > 0 +# include +# include +# include +#else +# include +# include +# include +#endif + +// Define version macros for the rocDecode API dispatch table, specifying the MAJOR and STEP +// versions. +// +// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! IMPORTANT !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +// +// 1. When adding new functions to the rocDecode API dispatch table, always append the new function +// pointer +// to the end of the table and increment the dispatch table's version number. Never rearrange the +// order of the member variables in the dispatch table, as doing so will break the Application +// Binary Interface (ABI). +// 2. In critical situations where the type of an existing member variable in a dispatch table has +// been changed +// or removed due to a data type modification, it is important to increment the major version +// number of the rocDecode API dispatch table. If the function pointer type can no longer be +// declared, do not remove it. Instead, change the function pointer type to `void*` and ensure it +// is always initialized to `nullptr`. +// +// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +// + +// The major version number should ideally remain unchanged. Increment the +// ROCDECODE_RUNTIME_API_TABLE_MAJOR_VERSION only for fundamental changes to the +// rocDecodeDispatchTable struct, such as altering the type or name of an existing member variable. +// Please DO NOT REMOVE it. +#define ROCDECODE_RUNTIME_API_TABLE_MAJOR_VERSION 0 + +// Increment the ROCDECODE_RUNTIME_API_TABLE_STEP_VERSION when new runtime API functions are added. +// If the corresponding ROCDECODE_RUNTIME_API_TABLE_MAJOR_VERSION increases reset the +// ROCDECODE_RUNTIME_API_TABLE_STEP_VERSION to zero. +#define ROCDECODE_RUNTIME_API_TABLE_STEP_VERSION 1 + +// rocDecode API interface +typedef rocDecStatus(ROCDECAPI* PfnRocDecCreateVideoParser)(RocdecVideoParser* parser_handle, + RocdecParserParams* params); +typedef rocDecStatus(ROCDECAPI* PfnRocDecParseVideoData)(RocdecVideoParser parser_handle, + RocdecSourceDataPacket* packet); +typedef rocDecStatus(ROCDECAPI* PfnRocDecDestroyVideoParser)(RocdecVideoParser parser_handle); +typedef rocDecStatus(ROCDECAPI* PfnRocDecCreateDecoder)(rocDecDecoderHandle* decoder_handle, + RocDecoderCreateInfo* decoder_create_info); +typedef rocDecStatus(ROCDECAPI* PfnRocDecDestroyDecoder)(rocDecDecoderHandle decoder_handle); +typedef rocDecStatus(ROCDECAPI* PfnRocDecGetDecoderCaps)(RocdecDecodeCaps* decode_caps); +typedef rocDecStatus(ROCDECAPI* PfnRocDecDecodeFrame)(rocDecDecoderHandle decoder_handle, + RocdecPicParams* pic_params); +typedef rocDecStatus(ROCDECAPI* PfnRocDecGetDecodeStatus)(rocDecDecoderHandle decoder_handle, + int pic_idx, + RocdecDecodeStatus* decode_status); +typedef rocDecStatus(ROCDECAPI* PfnRocDecReconfigureDecoder)( + rocDecDecoderHandle decoder_handle, + RocdecReconfigureDecoderInfo* reconfig_params); +typedef rocDecStatus(ROCDECAPI* PfnRocDecGetVideoFrame)(rocDecDecoderHandle decoder_handle, + int pic_idx, + void* dev_mem_ptr[3], + uint32_t* horizontal_pitch, + RocdecProcParams* vid_postproc_params); +typedef const char*(ROCDECAPI* PfnRocDecGetErrorName)(rocDecStatus rocdec_status); +typedef rocDecStatus(ROCDECAPI* PfnRocDecCreateBitstreamReader)( + RocdecBitstreamReader* bs_reader_handle, + const char* input_file_path); +typedef rocDecStatus(ROCDECAPI* PfnRocDecGetBitstreamCodecType)( + RocdecBitstreamReader bs_reader_handle, + rocDecVideoCodec* codec_type); +typedef rocDecStatus(ROCDECAPI* PfnRocDecGetBitstreamBitDepth)( + RocdecBitstreamReader bs_reader_handle, + int* bit_depth); +typedef rocDecStatus(ROCDECAPI* PfnRocDecGetBitstreamPicData)( + RocdecBitstreamReader bs_reader_handle, + uint8_t** pic_data, + int* pic_size, + int64_t* pts); +typedef rocDecStatus(ROCDECAPI* PfnRocDecDestroyBitstreamReader)( + RocdecBitstreamReader bs_reader_handle); + +// rocDecode API dispatch table +struct RocDecodeDispatchTable +{ + // ROCDECODE_RUNTIME_API_TABLE_STEP_VERSION == 0 + size_t size; + PfnRocDecCreateVideoParser pfn_rocdec_create_video_parser; + PfnRocDecParseVideoData pfn_rocdec_parse_video_data; + PfnRocDecDestroyVideoParser pfn_rocdec_destroy_video_parser; + PfnRocDecCreateDecoder pfn_rocdec_create_decoder; + PfnRocDecDestroyDecoder pfn_rocdec_destroy_decoder; + PfnRocDecGetDecoderCaps pfn_rocdec_get_gecoder_caps; + PfnRocDecDecodeFrame pfn_rocdec_decode_frame; + PfnRocDecGetDecodeStatus pfn_rocdec_get_decode_status; + PfnRocDecReconfigureDecoder pfn_rocdec_reconfigure_decoder; + PfnRocDecGetVideoFrame pfn_rocdec_get_video_frame; + PfnRocDecGetErrorName pfn_rocdec_get_error_name; + // PLEASE DO NOT EDIT ABOVE! + // ROCDECODE_RUNTIME_API_TABLE_STEP_VERSION == 1 + PfnRocDecCreateBitstreamReader pfn_rocdec_create_bitstream_reader; + PfnRocDecGetBitstreamCodecType pfn_rocdec_get_bitstream_codec_type; + PfnRocDecGetBitstreamBitDepth pfn_rocdec_get_bitstream_bit_depth; + PfnRocDecGetBitstreamPicData pfn_rocdec_get_bitstream_pic_data; + PfnRocDecDestroyBitstreamReader pfn_rocdec_destroy_bitstream_reader; + // PLEASE DO NOT EDIT ABOVE! + // ROCDECODE_RUNTIME_API_TABLE_STEP_VERSION == 2 + + // ******************************************************************************************* + // // + // READ BELOW + // ******************************************************************************************* + // // Please keep this text at the end of the structure: + + // 1. Do not reorder any existing members. + // 2. Increase the step version definition before adding new members. + // 3. Insert new members under the appropriate step version comment. + // 4. Generate a comment for the next step version. + // 5. Add a "PLEASE DO NOT EDIT ABOVE!" comment. + // ******************************************************************************************* + // // +}; diff --git a/source/include/rocprofiler-sdk/rocdecode/details/rocdecode_version.h b/source/include/rocprofiler-sdk/rocdecode/details/rocdecode_version.h new file mode 100644 index 0000000000..7a2a78fc88 --- /dev/null +++ b/source/include/rocprofiler-sdk/rocdecode/details/rocdecode_version.h @@ -0,0 +1,60 @@ +/* +Copyright (c) 2024 - 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef ROCDECODE_VERSION_H +#define ROCDECODE_VERSION_H + +/*! + * \file + * \brief rocDecode version + * \defgroup group_rocdecode_version rocDecode Version + * \brief rocDecode version + */ + +#ifdef __cplusplus +extern "C" { +#endif +/* NOTE: Match version with CMakeLists.txt */ +#define ROCDECODE_MAJOR_VERSION 0 +#define ROCDECODE_MINOR_VERSION 10 +#define ROCDECODE_MICRO_VERSION 0 + +/** + * ROCDECODE_CHECK_VERSION: + * @major: major version, like 1 in 1.2.3 + * @minor: minor version, like 2 in 1.2.3 + * @micro: micro version, like 3 in 1.2.3 + * + * Evaluates to %TRUE if the version of rocDecode is greater than + * @major, @minor and @micro + */ +#define ROCDECODE_CHECK_VERSION(major, minor, micro) \ + (ROCDECODE_MAJOR_VERSION > (major) || \ + (ROCDECODE_MAJOR_VERSION == (major) && ROCDECODE_MINOR_VERSION > (minor)) || \ + (ROCDECODE_MAJOR_VERSION == (major) && ROCDECODE_MINOR_VERSION == (minor) && \ + ROCDECODE_MICRO_VERSION >= (micro))) + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/source/include/rocprofiler-sdk/rocdecode/details/rocparser.h b/source/include/rocprofiler-sdk/rocdecode/details/rocparser.h new file mode 100644 index 0000000000..b2960393b9 --- /dev/null +++ b/source/include/rocprofiler-sdk/rocdecode/details/rocparser.h @@ -0,0 +1,349 @@ +/* +Copyright (c) 2024 - 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#if !defined(ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE) +# if defined __has_include +# if __has_include() +# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 1 +# else +# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 0 +# endif +# else +# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 0 +# endif +#endif + +#if ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE > 0 +# include +#else +# include +#endif + +/*! + * \file + * \brief The AMD rocParser Library. + * + * \defgroup group_rocparser rocDecode Parser: AMD ROCm Video Parser API + * \brief AMD The rocDecode video parser for AMD’s GPUs. + * \defgroup group_rocdec_struct rocDecode Parser Structs: AMD ROCm Video Parser Structs + * \brief AMD The rocDecode video parser struct + */ + +#if defined(__cplusplus) +extern "C" { +#endif /* __cplusplus */ + +/*********************************************************************************/ +//! HANDLE pf rocDecDecoder +//! Used in subsequent API calls after rocDecCreateDecoder +/*********************************************************************************/ + +typedef void* RocdecVideoParser; +typedef uint64_t RocdecTimeStamp; + +/** + * @brief ROCDEC_VIDEO_FORMAT struct + * @ingroup group_rocdec_struct + * Used in Parser callback API + */ +typedef struct +{ + rocDecVideoCodec codec; /**< OUT: Compression format */ + /** + * OUT: frame rate = numerator / denominator (for example: 30000/1001) + */ + struct + { + /**< OUT: frame rate numerator (0 = unspecified or variable frame rate) */ + uint32_t numerator; + /**< OUT: frame rate denominator (0 = unspecified or variable frame rate) */ + uint32_t denominator; + } frame_rate; + uint8_t progressive_sequence; /**< OUT: 0=interlaced, 1=progressive */ + uint8_t bit_depth_luma_minus8; /**< OUT: high bit depth luma. E.g, 2 for 10-bitdepth, 4 for + 12-bitdepth */ + uint8_t bit_depth_chroma_minus8; /**< OUT: high bit depth chroma. E.g, 2 for 10-bitdepth, 4 for + 12-bitdepth */ + uint8_t min_num_decode_surfaces; /**< OUT: Minimum number of decode surfaces to be allocated for + correct decoding. The client can send this value in + num_decode_surfaces. This guarantees correct functionality + and optimal video memory usage but not necessarily the best + performance, which depends on the design of the overall + application. The optimal number of decode surfaces (in terms + of performance and memory utilization) should be decided by + experimentation for each application, but it cannot go below + min_num_decode_surfaces. If this value is used for + num_decode_surfaces then it must be returned to parser + during sequence callback. */ + uint32_t coded_width; /**< OUT: coded frame width in pixels */ + uint32_t coded_height; /**< OUT: coded frame height in pixels */ + /** + * area of the frame that should be displayed + * typical example: + * coded_width = 1920, coded_height = 1088 + * display_area = { 0,0,1920,1080 } + */ + struct + { + int left; /**< OUT: left position of display rect */ + int top; /**< OUT: top position of display rect */ + int right; /**< OUT: right position of display rect */ + int bottom; /**< OUT: bottom position of display rect */ + } display_area; + + rocDecVideoChromaFormat chroma_format; /**< OUT: Chroma format */ + uint32_t bitrate; /**< OUT: video bitrate (bps, 0=unknown) */ + /** + * OUT: Display Aspect Ratio = x:y (4:3, 16:9, etc) + */ + struct + { + int x; + int y; + } display_aspect_ratio; + /** + * Video Signal Description + * Refer section E.2.1 (VUI parameters semantics) of H264 spec file + */ + struct + { + uint8_t video_format : 3; /**< OUT: 0-Component, 1-PAL, 2-NTSC, 3-SECAM, 4-MAC, + 5-Unspecified */ + uint8_t video_full_range_flag : 1; /**< OUT: indicates the black level and luma and chroma + range */ + uint8_t reserved_zero_bits : 4; /**< Reserved bits */ + uint8_t color_primaries; /**< OUT: chromaticity coordinates of source primaries */ + uint8_t transfer_characteristics; /**< OUT: opto-electronic transfer characteristic of the + source picture */ + uint8_t matrix_coefficients; /**< OUT: used in deriving luma and chroma signals from RGB + primaries */ + } video_signal_description; + uint32_t seqhdr_data_length; /**< OUT: Additional bytes following (RocdecVideoFormatEx) */ +} RocdecVideoFormat; + +/****************************************************************/ +//! \ingroup group_rocdec_struct +//! \struct RocdecVideoFormat +//! Video format including raw sequence header information +//! Used in rocDecCreateVideoParser API +/****************************************************************/ +typedef struct +{ + RocdecVideoFormat format; /**< OUT: RocdecVideoFormat structure */ + uint32_t max_width; + uint32_t max_height; + uint8_t raw_seqhdr_data[1024]; /**< OUT: Sequence header data */ +} RocdecVideoFormatEx; + +/***************************************************************/ +//! \enum RocdecVideoPacketFlags +//! Data packet flags +//! Used in RocdecSourceDataPacket structure +/***************************************************************/ +typedef enum +{ + ROCDEC_PKT_ENDOFSTREAM = 0x01, /**< Set when this is the last packet for this stream */ + ROCDEC_PKT_TIMESTAMP = 0x02, /**< Timestamp is valid */ + ROCDEC_PKT_DISCONTINUITY = 0x04, /**< Set when a discontinuity has to be signalled */ + ROCDEC_PKT_ENDOFPICTURE = + 0x08, /**< Set when the packet contains exactly one frame or one field */ + ROCDEC_PKT_NOTIFY_EOS = + 0x10, /**< If this flag is set along with ROCDEC_PKT_ENDOFSTREAM, an additional (dummy) + display callback will be invoked with null value of ROCDECPARSERDISPINFO which + should be interpreted as end of the stream. */ +} RocdecVideoPacketFlags; + +/*****************************************************************************/ +//! \ingroup group_rocdec_struct +//! \struct RocdecSourceDataPacket +//! Data Packet +//! Used in rocDecParseVideoData API +//! IN for rocDecParseVideoData +/*****************************************************************************/ +typedef struct _RocdecSourceDataPacket +{ + uint32_t flags; /**< IN: Combination of ROCDEC_PKT_XXX flags */ + uint32_t + payload_size; /**< IN: number of bytes in the payload (may be zero if EOS flag is set) */ + const uint8_t* + payload; /**< IN: Pointer to packet payload data (may be NULL if EOS flag is set) */ + RocdecTimeStamp pts; /**< IN: Presentation time stamp (10MHz clock), only valid if + ROCDEC_PKT_TIMESTAMP flag is set */ +} RocdecSourceDataPacket; + +/**********************************************************************************/ +/*! \brief Timing Info struct + * \ingroup group_rocdec_struct + * \struct RocdecParserDispInfo + * \Used in rocdecParseVideoData API with PFNVIDDISPLAYCALLBACK pfn_display_picture + */ +/**********************************************************************************/ +typedef struct _RocdecParserDispInfo +{ + int picture_index; /**< OUT: Index of the current picture */ + int progressive_frame; /**< OUT: 1 if progressive frame; 0 otherwise */ + int top_field_first; /**< OUT: 1 if top field is displayed first; 0 otherwise */ + int repeat_first_field; /**< OUT: Number of additional fields (1=ivtc, 2=frame doubling, 4=frame + tripling, -1=unpaired field) */ + RocdecTimeStamp pts; /**< OUT: Presentation time stamp */ +} RocdecParserDispInfo; + +/** + * @brief RocdecOperatingPointInfo struct + * @ingroup group_rocdec_struct + * Operating point information of scalable bitstream + */ +typedef struct _RocdecOperatingPointInfo +{ + rocDecVideoCodec codec; + union + { + struct + { + uint8_t operating_points_cnt; + uint8_t reserved24_bits[3]; + uint16_t operating_points_idc[32]; + } av1; + uint8_t codec_reserved[1024]; + }; +} RocdecOperatingPointInfo; + +/**********************************************************************************/ +//! \ingroup group_rocdec_struct +//! \struct RocdecSeiMessage; +//! Used in RocdecSeiMessageInfo structure +/**********************************************************************************/ +typedef struct _RocdecSeiMessage +{ + uint8_t sei_message_type; /**< OUT: SEI Message Type */ + uint8_t reserved[3]; + uint32_t sei_message_size; /**< OUT: SEI Message Size */ +} RocdecSeiMessage; + +/**********************************************************************************/ +//! \ingroup group_rocdec_struct +//! \struct RocdecSeiMessageInfo +//! Used in rocDecParseVideoData API with PFNVIDSEIMSGCALLBACK pfn_get_sei_msg +/**********************************************************************************/ +typedef struct _RocdecSeiMessageInfo +{ + void* sei_data; /**< OUT: SEI Message Data */ + RocdecSeiMessage* sei_message; /**< OUT: SEI Message Info */ + uint32_t sei_message_count; /**< OUT: SEI Message Count */ + uint32_t picIdx; /**< OUT: SEI Message Pic Index */ +} RocdecSeiMessageInfo; + +/** + * @brief Parser callbacks + * \ The parser will call these synchronously from within rocDecParseVideoData(), whenever there is + * sequence change or a picture \ is ready to be decoded and/or displayed. \ Return values from + * these callbacks are interpreted as below. If the callbacks return failure, it will be propagated + * by \ rocDecParseVideoData() to the application. \ Parser picks default operating point as 0 and + * outputAllLayers flag as 0 if PFNVIDOPPOINTCALLBACK is not set or return value is \ -1 or invalid + * operating point. \ PFNVIDSEQUENCECALLBACK : 0: fail, 1: succeeded, > 1: override dpb size of + * parser (set by RocdecParserParams::max_num_decode_surfaces \ while creating parser) \ + * PFNVIDDECODECALLBACK : 0: fail, >=1: succeeded \ PFNVIDDISPLAYCALLBACK : 0: fail, >=1: + * succeeded \ PFNVIDOPPOINTCALLBACK : <0: fail, >=0: succeeded (bit 0-9: OperatingPoint, bit + * 10-10: outputAllLayers, bit 11-30: reserved) \ PFNVIDSEIMSGCALLBACK : 0: fail, >=1: succeeded + */ +typedef int(ROCDECAPI* PFNVIDSEQUENCECALLBACK)(void*, RocdecVideoFormat*); +typedef int(ROCDECAPI* PFNVIDDECODECALLBACK)(void*, RocdecPicParams*); +typedef int(ROCDECAPI* PFNVIDDISPLAYCALLBACK)(void*, RocdecParserDispInfo*); +// typedef int (ROCDECAPI *PFNVIDOPPOINTCALLBACK)(void *, RocdecOperatingPointInfo*); // +// reserved for future (AV1 specific) +typedef int(ROCDECAPI* PFNVIDSEIMSGCALLBACK)(void*, RocdecSeiMessageInfo*); + +/** + * \brief The AMD rocDecode library. + * \ingroup group_rocdec_struct + * \Used in rocDecCreateVideoParser API + */ +typedef struct _RocdecParserParams +{ + rocDecVideoCodec codec_type; /**< IN: rocDecVideoCodec_XXX */ + uint32_t max_num_decode_surfaces; /**< IN: Max # of decode surfaces (parser will cycle through + these) */ + uint32_t clock_rate; /**< IN: Timestamp units in Hz (0=default=10000000Hz) */ + uint32_t error_threshold; /**< IN: % Error threshold (0-100) for calling pfn_decode_picture + (100=always IN: call pfn_decode_picture even if picture bitstream + is fully corrupted) */ + uint32_t max_display_delay; /**< IN: Max display queue delay (improves pipelining of decode with + display) 0 = no delay (recommended values: 2..4) */ + uint32_t annex_b : 1; /**< IN: AV1 annexB stream */ + uint32_t reserved : 31; /**< Reserved for future use - set to zero */ + uint32_t reserved_1[4]; /**< IN: Reserved for future use - set to 0 */ + void* user_data; /**< IN: User data for callbacks */ + PFNVIDSEQUENCECALLBACK pfn_sequence_callback; /**< IN: Called before decoding frames and/or + whenever there is a fmt change */ + PFNVIDDECODECALLBACK pfn_decode_picture; /**< IN: Called when a picture is ready to be decoded + (decode order) */ + PFNVIDDISPLAYCALLBACK pfn_display_picture; /**< IN: Called whenever a picture is ready to be + displayed (display order) */ + PFNVIDSEIMSGCALLBACK + pfn_get_sei_msg; /**< IN: Called when all SEI messages are parsed for particular frame */ + void* reserved_2[5]; /**< Reserved for future use - set to NULL */ + RocdecVideoFormatEx* + ext_video_info; /**< IN: [Optional] sequence header data from system layer */ +} RocdecParserParams; + +/************************************************************************************************/ +//! \ingroup group_rocparser +//! \fn rocDecodeStatus ROCDECAPI rocDecCreateVideoParser(RocdecVideoParser *parser_handle, +//! RocdecParserParams *params) Create video parser object and initialize +/************************************************************************************************/ +extern rocDecStatus ROCDECAPI +rocDecCreateVideoParser(RocdecVideoParser* parser_handle, RocdecParserParams* params); + +/************************************************************************************************/ +//! \ingroup group_rocparser +//! \fn rocDecodeStatus ROCDECAPI rocDecParseVideoData(RocdecVideoParser parser_handle, +//! RocdecSourceDataPacket *packet) Parse the video data from source data packet in pPacket Extracts +//! parameter sets like SPS, PPS, bitstream etc. from pPacket and calls back pfn_decode_picture with +//! RocdecPicParams data for kicking of HW decoding calls back pfn_sequence_callback with +//! RocdecVideoFormat data for initial sequence header or when the decoder encounters a video format +//! change calls back pfn_display_picture with RocdecParserDispInfo data to display a video frame +/************************************************************************************************/ +extern rocDecStatus ROCDECAPI +rocDecParseVideoData(RocdecVideoParser parser_handle, RocdecSourceDataPacket* packet); + +/************************************************************************************************/ +//! \ingroup group_rocparser +//! \fn rocDecStatus ROCDECAPI rocDecParserMarkFrameForReuse(RocdecVideoParser parser_handle, int +//! pic_idx) Mark frame with index pic_idx in parser's buffer pool for reuse (means the frame has +//! been consumed) +/************************************************************************************************/ +extern rocDecStatus ROCDECAPI +rocDecParserMarkFrameForReuse(RocdecVideoParser parser_handle, int pic_idx); + +/************************************************************************************************/ +//! \ingroup group_rocparser +//! \fn rocDecStatus ROCDECAPI rocDecDestroyVideoParser(RocdecVideoParser parser_handle) +//! Destroy the video parser object +/************************************************************************************************/ +extern rocDecStatus ROCDECAPI +rocDecDestroyVideoParser(RocdecVideoParser parser_handle); + +#if defined(__cplusplus) +} +#endif /* __cplusplus */ diff --git a/source/include/rocprofiler-sdk/rocdecode/table_id.h b/source/include/rocprofiler-sdk/rocdecode/table_id.h new file mode 100644 index 0000000000..185d04e34b --- /dev/null +++ b/source/include/rocprofiler-sdk/rocdecode/table_id.h @@ -0,0 +1,31 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +// NOLINTNEXTLINE(performance-enum-size) +typedef enum +{ + ROCPROFILER_ROCDECODE_TABLE_ID_NONE = -1, + ROCPROFILER_ROCDECODE_TABLE_ID = 0, + ROCPROFILER_ROCDECODE_TABLE_ID_LAST, +} rocprofiler_rocdecode_table_id_t; diff --git a/source/lib/output/buffered_output.hpp b/source/lib/output/buffered_output.hpp index 27f943c774..1c3efbdd90 100644 --- a/source/lib/output/buffered_output.hpp +++ b/source/lib/output/buffered_output.hpp @@ -163,5 +163,7 @@ using counter_records_buffered_output_t = using pc_sampling_host_trap_buffered_output_t = buffered_output; +using rocdecode_buffered_output_t = + buffered_output; } // namespace tool } // namespace rocprofiler diff --git a/source/lib/output/domain_type.cpp b/source/lib/output/domain_type.cpp index f476189150..65934ea6b2 100644 --- a/source/lib/output/domain_type.cpp +++ b/source/lib/output/domain_type.cpp @@ -61,6 +61,7 @@ DEFINE_BUFFER_TYPE_NAME(PC_SAMPLING_HOST_TRAP, "PC_SAMPLING_HOST_TRAP", "pc_sampling_host_trap", "pc_sampling_host_trap_stats") +DEFINE_BUFFER_TYPE_NAME(ROCDECODE, "ROCDECODE_API", "rocdecode_api_trace", "rocdecode_api_stats") #undef DEFINE_BUFFER_TYPE_NAME diff --git a/source/lib/output/domain_type.hpp b/source/lib/output/domain_type.hpp index 28b41b376a..c8a8c08b81 100644 --- a/source/lib/output/domain_type.hpp +++ b/source/lib/output/domain_type.hpp @@ -37,6 +37,7 @@ enum class domain_type MEMORY_ALLOCATION, COUNTER_VALUES, PC_SAMPLING_HOST_TRAP, + ROCDECODE, LAST, }; diff --git a/source/lib/output/generateCSV.cpp b/source/lib/output/generateCSV.cpp index ad7080083a..2aa6ce91ab 100644 --- a/source/lib/output/generateCSV.cpp +++ b/source/lib/output/generateCSV.cpp @@ -722,6 +722,48 @@ generate_csv(const output_config& cfg, } } +void +generate_csv(const output_config& cfg, + const metadata& tool_metadata, + const generator& data, + const stats_entry_t& stats) +{ + if(data.empty()) return; + + if(cfg.stats && stats) + write_stats(get_stats_output_file(cfg, domain_type::ROCDECODE), stats.entries); + + auto ofs = tool::csv_output_file{cfg, + domain_type::ROCDECODE, + tool::csv::api_csv_encoder{}, + {"Domain", + "Function", + "Process_Id", + "Thread_Id", + "Correlation_Id", + "Start_Timestamp", + "End_Timestamp"}}; + for(auto ditr : data) + { + for(auto record : data.get(ditr)) + { + auto row_ss = std::stringstream{}; + auto api_name = tool_metadata.get_operation_name(record.kind, record.operation); + rocprofiler::tool::csv::api_csv_encoder::write_row( + row_ss, + tool_metadata.get_kind_name(record.kind), + api_name, + tool_metadata.process_id, + record.thread_id, + record.correlation_id.internal, + record.start_timestamp, + record.end_timestamp); + + ofs << row_ss.str(); + } + } +} + void generate_csv(const output_config& cfg, const metadata& tool_metadata, diff --git a/source/lib/output/generateCSV.hpp b/source/lib/output/generateCSV.hpp index 9c03a6821a..ac57f9951e 100644 --- a/source/lib/output/generateCSV.hpp +++ b/source/lib/output/generateCSV.hpp @@ -87,6 +87,12 @@ generate_csv(const output_config& cfg, const generator& data, const stats_entry_t& stats); +void +generate_csv(const output_config& cfg, + const metadata& tool_metadata, + const generator& data, + const stats_entry_t& stats); + void generate_csv(const output_config& cfg, const metadata& tool_metadata, diff --git a/source/lib/output/generateJSON.cpp b/source/lib/output/generateJSON.cpp index b2846f88ca..a6c7aa504e 100644 --- a/source/lib/output/generateJSON.cpp +++ b/source/lib/output/generateJSON.cpp @@ -187,7 +187,8 @@ write_json(json_output& json_ar, generator scratch_memory_gen, generator rccl_api_gen, generator memory_allocation_gen, - generator pc_sampling_gen) + generator pc_sampling_gen, + generator rocdecode_api_gen) { // summary @@ -229,6 +230,7 @@ write_json(json_output& json_ar, json_ar(cereal::make_nvp("memory_allocation", memory_allocation_gen)); json_ar(cereal::make_nvp("scratch_memory", scratch_memory_gen)); json_ar(cereal::make_nvp("pc_sample_host_trap", pc_sampling_gen)); + json_ar(cereal::make_nvp("rocdecode_api", rocdecode_api_gen)); json_ar.finishNode(); } } diff --git a/source/lib/output/generateJSON.hpp b/source/lib/output/generateJSON.hpp index 93baa8c3fc..f253254763 100644 --- a/source/lib/output/generateJSON.hpp +++ b/source/lib/output/generateJSON.hpp @@ -94,6 +94,8 @@ write_json(json_output& json generator scratch_memory_gen, generator rccl_api_gen, generator memory_allocation_gen, - generator pc_sampling_gen); + generator pc_sampling_gen, + generator rocdecode_api_gen); + } // namespace tool } // namespace rocprofiler diff --git a/source/lib/output/generateOTF2.cpp b/source/lib/output/generateOTF2.cpp index 555bf15041..d926354fc0 100644 --- a/source/lib/output/generateOTF2.cpp +++ b/source/lib/output/generateOTF2.cpp @@ -367,7 +367,8 @@ write_otf2( std::deque* marker_api_data, std::deque* /*scratch_memory_data*/, std::deque* rccl_api_data, - std::deque* memory_allocation_data) + std::deque* memory_allocation_data, + std::deque* rocdecode_api_data) { namespace sdk = ::rocprofiler::sdk; @@ -418,6 +419,8 @@ write_otf2( tids.emplace(itr.thread_id); for(auto itr : *rccl_api_data) tids.emplace(itr.thread_id); + for(auto itr : *rocdecode_api_data) + tids.emplace(itr.thread_id); for(auto itr : *memory_copy_data) { @@ -614,6 +617,7 @@ write_otf2( add_event_data(hip_api_data, sdk::category::hip_api{}); add_event_data(marker_api_data, sdk::category::marker_api{}); add_event_data(rccl_api_data, sdk::category::rccl_api{}); + add_event_data(rocdecode_api_data, sdk::category::rocdecode_api{}); } for(auto itr : *memory_copy_data) diff --git a/source/lib/output/generateOTF2.hpp b/source/lib/output/generateOTF2.hpp index 1b47b4c73b..dba7db1889 100644 --- a/source/lib/output/generateOTF2.hpp +++ b/source/lib/output/generateOTF2.hpp @@ -46,6 +46,7 @@ write_otf2( std::deque* marker_api_data, std::deque* scratch_memory_data, std::deque* rccl_api_data, - std::deque* memory_allocation_data); + std::deque* memory_allocation_data, + std::deque* rocdecode_api_data); } // namespace tool } // namespace rocprofiler diff --git a/source/lib/output/generatePerfetto.cpp b/source/lib/output/generatePerfetto.cpp index 0cbfb01068..72a40b0319 100644 --- a/source/lib/output/generatePerfetto.cpp +++ b/source/lib/output/generatePerfetto.cpp @@ -72,7 +72,8 @@ write_perfetto( const generator& marker_api_gen, const generator& /*scratch_memory_gen*/, const generator& rccl_api_gen, - const generator& memory_allocation_gen) + const generator& memory_allocation_gen, + const generator& rocdecode_api_gen) { namespace sdk = ::rocprofiler::sdk; @@ -168,6 +169,9 @@ write_perfetto( for(auto ditr : rccl_api_gen) for(auto itr : rccl_api_gen.get(ditr)) tids.emplace(itr.thread_id); + for(auto ditr : rocdecode_api_gen) + for(auto itr : rocdecode_api_gen.get(ditr)) + tids.emplace(itr.thread_id); for(auto ditr : memory_copy_gen) for(auto itr : memory_copy_gen.get(ditr)) @@ -399,6 +403,37 @@ write_perfetto( tracing_session->FlushBlocking(); } + for(auto ditr : rocdecode_api_gen) + for(auto itr : rocdecode_api_gen.get(ditr)) + { + auto name = buffer_names.at(itr.kind, itr.operation); + auto& track = thread_tracks.at(itr.thread_id); + + TRACE_EVENT_BEGIN(sdk::perfetto_category::name, + ::perfetto::StaticString(name.data()), + track, + itr.start_timestamp, + ::perfetto::Flow::ProcessScoped(itr.correlation_id.internal), + "begin_ns", + itr.start_timestamp, + "end_ns", + itr.end_timestamp, + "delta_ns", + (itr.end_timestamp - itr.start_timestamp), + "tid", + itr.thread_id, + "kind", + itr.kind, + "operation", + itr.operation, + "corr_id", + itr.correlation_id.internal); + TRACE_EVENT_END(sdk::perfetto_category::name, + track, + itr.end_timestamp); + tracing_session->FlushBlocking(); + } + for(auto ditr : memory_copy_gen) for(auto itr : memory_copy_gen.get(ditr)) { diff --git a/source/lib/output/generatePerfetto.hpp b/source/lib/output/generatePerfetto.hpp index 9074dd6b3b..d6ede1cd2d 100644 --- a/source/lib/output/generatePerfetto.hpp +++ b/source/lib/output/generatePerfetto.hpp @@ -46,6 +46,7 @@ write_perfetto( const generator& marker_api_gen, const generator& scratch_memory_gen, const generator& rccl_api_gen, - const generator& memory_allocation_gen); + const generator& memory_allocation_gen, + const generator& rocdecode_api_gen); } // namespace tool } // namespace rocprofiler diff --git a/source/lib/output/generateStats.cpp b/source/lib/output/generateStats.cpp index a8ab73cb3f..f01bed60b6 100644 --- a/source/lib/output/generateStats.cpp +++ b/source/lib/output/generateStats.cpp @@ -228,6 +228,24 @@ generate_stats(const output_config& /*cfg*/, return get_stats(rccl_stats); } +stats_entry_t +generate_stats(const output_config& /*cfg*/, + const metadata& tool_metadata, + const generator& data) +{ + auto rocdecode_stats = stats_map_t{}; + for(auto ditr : data) + { + for(auto record : data.get(ditr)) + { + auto api_name = tool_metadata.get_operation_name(record.kind, record.operation); + rocdecode_stats[api_name] += (record.end_timestamp - record.start_timestamp); + } + } + + return get_stats(rocdecode_stats); +} + namespace { void diff --git a/source/lib/output/generateStats.hpp b/source/lib/output/generateStats.hpp index 7a025a9192..1df9855777 100644 --- a/source/lib/output/generateStats.hpp +++ b/source/lib/output/generateStats.hpp @@ -75,6 +75,11 @@ generate_stats(const output_config& cfg, const metadata& tool_metadata, const generator& data); +stats_entry_t +generate_stats(const output_config& cfg, + const metadata& tool_metadata, + const generator& data); + stats_entry_t generate_stats(const output_config& cfg, const metadata& tool_metadata, diff --git a/source/lib/rocprofiler-sdk-tool/config.hpp b/source/lib/rocprofiler-sdk-tool/config.hpp index 6006343924..79e2b04f80 100644 --- a/source/lib/rocprofiler-sdk-tool/config.hpp +++ b/source/lib/rocprofiler-sdk-tool/config.hpp @@ -97,6 +97,7 @@ struct config : output_config bool hip_runtime_api_trace = get_env("ROCPROF_HIP_RUNTIME_API_TRACE", false); bool hip_compiler_api_trace = get_env("ROCPROF_HIP_COMPILER_API_TRACE", false); bool rccl_api_trace = get_env("ROCPROF_RCCL_API_TRACE", false); + bool rocdecode_api_trace = get_env("ROCPROF_ROCDECODE_API_TRACE", false); bool list_metrics = get_env("ROCPROF_LIST_METRICS", false); bool list_metrics_output_file = get_env("ROCPROF_OUTPUT_LIST_METRICS_FILE", false); bool pc_sampling_host_trap = false; diff --git a/source/lib/rocprofiler-sdk-tool/tool.cpp b/source/lib/rocprofiler-sdk-tool/tool.cpp index 35cc4c4f33..f953039e02 100644 --- a/source/lib/rocprofiler-sdk-tool/tool.cpp +++ b/source/lib/rocprofiler-sdk-tool/tool.cpp @@ -139,18 +139,20 @@ struct buffer_ids rocprofiler_buffer_id_t scratch_memory = {}; rocprofiler_buffer_id_t rccl_api_trace = {}; rocprofiler_buffer_id_t pc_sampling_host_trap = {}; + rocprofiler_buffer_id_t rocdecode_api_trace = {}; auto as_array() const { - return std::array{hsa_api_trace, - hip_api_trace, - kernel_trace, - memory_copy_trace, - memory_allocation_trace, - counter_collection, - scratch_memory, - rccl_api_trace, - pc_sampling_host_trap}; + return std::array{hsa_api_trace, + hip_api_trace, + kernel_trace, + memory_copy_trace, + memory_allocation_trace, + counter_collection, + scratch_memory, + rccl_api_trace, + pc_sampling_host_trap, + rocdecode_api_trace}; } }; @@ -742,6 +744,13 @@ buffered_tracing_callback(rocprofiler_context_id_t /*context*/, tool::write_ring_buffer(*record, domain_type::RCCL); } + else if(header->kind == ROCPROFILER_BUFFER_TRACING_ROCDECODE_API) + { + auto* record = static_cast( + header->payload); + + tool::write_ring_buffer(*record, domain_type::ROCDECODE); + } else { ROCP_FATAL << fmt::format( @@ -1267,6 +1276,26 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) "Could not setup counting service"); } + if(tool::get_config().rocdecode_api_trace) + { + ROCPROFILER_CALL(rocprofiler_create_buffer(get_client_ctx(), + buffer_size, + buffer_watermark, + ROCPROFILER_BUFFER_POLICY_LOSSLESS, + buffered_tracing_callback, + tool_data, + &get_buffers().rocdecode_api_trace), + "buffer creation"); + + ROCPROFILER_CALL( + rocprofiler_configure_buffer_tracing_service(get_client_ctx(), + ROCPROFILER_BUFFER_TRACING_ROCDECODE_API, + nullptr, + 0, + get_buffers().rocdecode_api_trace), + "buffer tracing service for ROCDecode api configure"); + } + if(tool::get_config().kernel_rename) { auto rename_ctx = rocprofiler_context_id_t{0}; @@ -1441,6 +1470,8 @@ tool_fini(void* /*tool_data*/) tool::memory_allocation_buffered_output_t{tool::get_config().memory_allocation_trace}; auto counters_records_output = tool::counter_records_buffered_output_t{tool::get_config().counter_collection}; + auto rocdecode_output = + tool::rocdecode_buffered_output_t{tool::get_config().rocdecode_api_trace}; auto pc_sampling_host_trap_output = tool::pc_sampling_host_trap_buffered_output_t{tool::get_config().pc_sampling_host_trap}; @@ -1465,6 +1496,7 @@ tool_fini(void* /*tool_data*/) generate_output(rccl_output, contributions); generate_output(counters_output, contributions); generate_output(scratch_memory_output, contributions); + generate_output(rocdecode_output, contributions); generate_output(pc_sampling_host_trap_output, contributions); if(tool::get_config().stats && tool::get_config().csv_output) @@ -1491,7 +1523,8 @@ tool_fini(void* /*tool_data*/) scratch_memory_output.get_generator(), rccl_output.get_generator(), memory_allocation_output.get_generator(), - pc_sampling_host_trap_output.get_generator()); + pc_sampling_host_trap_output.get_generator(), + rocdecode_output.get_generator()); json_ar.finish_process(); tool::close_json(json_ar); @@ -1509,7 +1542,8 @@ tool_fini(void* /*tool_data*/) marker_output.get_generator(), scratch_memory_output.get_generator(), rccl_output.get_generator(), - memory_allocation_output.get_generator()); + memory_allocation_output.get_generator(), + rocdecode_output.get_generator()); } if(tool::get_config().otf2_output) @@ -1522,6 +1556,7 @@ tool_fini(void* /*tool_data*/) auto scratch_memory_elem_data = scratch_memory_output.load_all(); auto rccl_elem_data = rccl_output.load_all(); auto memory_allocation_elem_data = memory_allocation_output.load_all(); + auto rocdecode_elem_data = rocdecode_output.load_all(); tool::write_otf2(tool::get_config(), *tool_metadata, @@ -1534,7 +1569,8 @@ tool_fini(void* /*tool_data*/) &marker_elem_data, &scratch_memory_elem_data, &rccl_elem_data, - &memory_allocation_elem_data); + &memory_allocation_elem_data, + &rocdecode_elem_data); } if(tool::get_config().summary_output) @@ -1554,6 +1590,7 @@ tool_fini(void* /*tool_data*/) destroy_output(scratch_memory_output); destroy_output(rccl_output); destroy_output(counters_records_output); + destroy_output(rocdecode_output); destroy_output(pc_sampling_host_trap_output); if(destructors) diff --git a/source/lib/rocprofiler-sdk/CMakeLists.txt b/source/lib/rocprofiler-sdk/CMakeLists.txt index af6e69e157..880e8f45fc 100644 --- a/source/lib/rocprofiler-sdk/CMakeLists.txt +++ b/source/lib/rocprofiler-sdk/CMakeLists.txt @@ -52,6 +52,7 @@ add_subdirectory(tracing) add_subdirectory(kernel_dispatch) add_subdirectory(page_migration) add_subdirectory(rccl) +add_subdirectory(rocdecode) add_subdirectory(details) add_subdirectory(ompt) @@ -61,6 +62,7 @@ target_link_libraries( rocprofiler-sdk::rocprofiler-sdk-hip-nolink rocprofiler-sdk::rocprofiler-sdk-hsa-runtime-nolink rocprofiler-sdk::rocprofiler-sdk-rccl-nolink + rocprofiler-sdk::rocprofiler-sdk-rocdecode-nolink PRIVATE rocprofiler-sdk::rocprofiler-sdk-build-flags rocprofiler-sdk::rocprofiler-sdk-memcheck rocprofiler-sdk::rocprofiler-sdk-common-library diff --git a/source/lib/rocprofiler-sdk/buffer_tracing.cpp b/source/lib/rocprofiler-sdk/buffer_tracing.cpp index 1dad1a567a..01ad9b885f 100644 --- a/source/lib/rocprofiler-sdk/buffer_tracing.cpp +++ b/source/lib/rocprofiler-sdk/buffer_tracing.cpp @@ -34,6 +34,7 @@ #include "lib/rocprofiler-sdk/page_migration/page_migration.hpp" #include "lib/rocprofiler-sdk/rccl/rccl.hpp" #include "lib/rocprofiler-sdk/registration.hpp" +#include "lib/rocprofiler-sdk/rocdecode/rocdecode.hpp" #include "lib/rocprofiler-sdk/runtime_initialization.hpp" #include @@ -41,6 +42,7 @@ #include #include #include +#include #include #include @@ -91,6 +93,7 @@ ROCPROFILER_BUFFER_TRACING_KIND_STRING(CORRELATION_ID_RETIREMENT) ROCPROFILER_BUFFER_TRACING_KIND_STRING(RCCL_API) ROCPROFILER_BUFFER_TRACING_KIND_STRING(OMPT) ROCPROFILER_BUFFER_TRACING_KIND_STRING(RUNTIME_INITIALIZATION) +ROCPROFILER_BUFFER_TRACING_KIND_STRING(ROCDECODE_API) template std::pair @@ -288,6 +291,11 @@ rocprofiler_query_buffer_tracing_kind_operation_name(rocprofiler_buffer_tracing_ { return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED; } + case ROCPROFILER_BUFFER_TRACING_ROCDECODE_API: + { + val = rocprofiler::rocdecode::name_by_id(operation); + break; + } }; if(!val) @@ -419,6 +427,11 @@ rocprofiler_iterate_buffer_tracing_kind_operations( { return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED; } + case ROCPROFILER_BUFFER_TRACING_ROCDECODE_API: + { + ops = rocprofiler::rocdecode::get_ids(); + break; + } } for(const auto& itr : ops) diff --git a/source/lib/rocprofiler-sdk/callback_tracing.cpp b/source/lib/rocprofiler-sdk/callback_tracing.cpp index f3f65a99f4..713535216a 100644 --- a/source/lib/rocprofiler-sdk/callback_tracing.cpp +++ b/source/lib/rocprofiler-sdk/callback_tracing.cpp @@ -33,6 +33,7 @@ #include "lib/rocprofiler-sdk/ompt/ompt.hpp" #include "lib/rocprofiler-sdk/rccl/rccl.hpp" #include "lib/rocprofiler-sdk/registration.hpp" +#include "lib/rocprofiler-sdk/rocdecode/rocdecode.hpp" #include "lib/rocprofiler-sdk/runtime_initialization.hpp" #include @@ -41,6 +42,7 @@ #include #include #include +#include #include #include @@ -88,6 +90,7 @@ ROCPROFILER_CALLBACK_TRACING_KIND_STRING(MEMORY_ALLOCATION) ROCPROFILER_CALLBACK_TRACING_KIND_STRING(RCCL_API) ROCPROFILER_CALLBACK_TRACING_KIND_STRING(OMPT) ROCPROFILER_CALLBACK_TRACING_KIND_STRING(RUNTIME_INITIALIZATION) +ROCPROFILER_CALLBACK_TRACING_KIND_STRING(ROCDECODE_API) template std::pair @@ -269,6 +272,12 @@ rocprofiler_query_callback_tracing_kind_operation_name(rocprofiler_callback_trac case ROCPROFILER_CALLBACK_TRACING_RUNTIME_INITIALIZATION: { val = rocprofiler::runtime_init::name_by_id(operation); + break; + } + case ROCPROFILER_CALLBACK_TRACING_ROCDECODE_API: + { + val = rocprofiler::rocdecode::name_by_id(operation); + break; } }; @@ -397,6 +406,12 @@ rocprofiler_iterate_callback_tracing_kind_operations( case ROCPROFILER_CALLBACK_TRACING_RUNTIME_INITIALIZATION: { ops = rocprofiler::runtime_init::get_ids(); + break; + } + case ROCPROFILER_CALLBACK_TRACING_ROCDECODE_API: + { + ops = rocprofiler::rocdecode::get_ids(); + break; } }; @@ -539,6 +554,7 @@ rocprofiler_iterate_callback_tracing_kind_operation_args( case ROCPROFILER_CALLBACK_TRACING_MEMORY_COPY: case ROCPROFILER_CALLBACK_TRACING_MEMORY_ALLOCATION: case ROCPROFILER_CALLBACK_TRACING_RCCL_API: + case ROCPROFILER_CALLBACK_TRACING_ROCDECODE_API: case ROCPROFILER_CALLBACK_TRACING_RUNTIME_INITIALIZATION: { return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED; diff --git a/source/lib/rocprofiler-sdk/intercept_table.cpp b/source/lib/rocprofiler-sdk/intercept_table.cpp index b3507e1519..8bee97c169 100644 --- a/source/lib/rocprofiler-sdk/intercept_table.cpp +++ b/source/lib/rocprofiler-sdk/intercept_table.cpp @@ -34,6 +34,7 @@ #include #include #include "lib/rocprofiler-sdk/rccl/rccl.hpp" +#include "lib/rocprofiler-sdk/rocdecode/rocdecode.hpp" #include #include @@ -57,7 +58,8 @@ constexpr auto intercept_library_seq = library_sequence_t{}; + ROCPROFILER_RCCL_TABLE, + ROCPROFILER_ROCDECODE_TABLE>{}; // check that intercept_library_seq is up to date static_assert((1 << (intercept_library_seq.size() - 1)) == ROCPROFILER_TABLE_LAST, @@ -192,6 +194,11 @@ template void notify_intercept_table_registration(rocprofiler_intercept_table_t, uint64_t, uint64_t, std::tuple); + +template void notify_intercept_table_registration(rocprofiler_intercept_table_t, + uint64_t, + uint64_t, + std::tuple); } // namespace intercept_table } // namespace rocprofiler diff --git a/source/lib/rocprofiler-sdk/internal_threading.cpp b/source/lib/rocprofiler-sdk/internal_threading.cpp index af9c389c38..a60c3a947b 100644 --- a/source/lib/rocprofiler-sdk/internal_threading.cpp +++ b/source/lib/rocprofiler-sdk/internal_threading.cpp @@ -122,7 +122,8 @@ constexpr auto creation_notifier_library_seq = library_sequence_t{}; + ROCPROFILER_RCCL_LIBRARY, + ROCPROFILER_ROCDECODE_LIBRARY>{}; // check that creation_notifier_library_seq is up to date static_assert((1 << (creation_notifier_library_seq.size() - 1)) == ROCPROFILER_LIBRARY_LAST, diff --git a/source/lib/rocprofiler-sdk/registration.cpp b/source/lib/rocprofiler-sdk/registration.cpp index 76988c68ee..fb99e63c8e 100644 --- a/source/lib/rocprofiler-sdk/registration.cpp +++ b/source/lib/rocprofiler-sdk/registration.cpp @@ -46,6 +46,7 @@ #include "lib/rocprofiler-sdk/pc_sampling/code_object.hpp" #include "lib/rocprofiler-sdk/pc_sampling/service.hpp" #include "lib/rocprofiler-sdk/rccl/rccl.hpp" +#include "lib/rocprofiler-sdk/rocdecode/rocdecode.hpp" #include "lib/rocprofiler-sdk/runtime_initialization.hpp" #include @@ -908,6 +909,30 @@ rocprofiler_set_api_table(const char* name, rocprofiler::intercept_table::notify_intercept_table_registration( ROCPROFILER_RCCL_TABLE, lib_version, lib_instance, std::make_tuple(rccl_api)); } + else if(std::string_view{name} == "rocdecode") + { + // pass to rocdecode init + ROCP_ERROR_IF(num_tables > 1) + << "rocprofiler expected ROCDecode library to pass 1 API table, not " << num_tables; + + auto* rocdecode_api = static_cast(tables[0]); + + // any internal modifications to the rocdecodeApiFuncTable need to be done before we make + // the copy or else those modifications will be lost when ROCDecode API tracing is enabled + // because the ROCDecode API tracing invokes the function pointers from the copy below + rocprofiler::rocdecode::copy_table(rocdecode_api, lib_instance); + + // install rocprofiler API wrappers + rocprofiler::rocdecode::update_table(rocdecode_api); + + // Tracing notifications the runtime has initialized + rocprofiler::runtime_init::initialize( + ROCPROFILER_RUNTIME_INITIALIZATION_ROCDECODE, lib_version, lib_instance); + + // allow tools to install API wrappers + rocprofiler::intercept_table::notify_intercept_table_registration( + ROCPROFILER_ROCDECODE_TABLE, lib_version, lib_instance, std::make_tuple(rocdecode_api)); + } else { ROCP_ERROR << "rocprofiler does not accept API tables from " << name; diff --git a/source/lib/rocprofiler-sdk/rocdecode/CMakeLists.txt b/source/lib/rocprofiler-sdk/rocdecode/CMakeLists.txt new file mode 100644 index 0000000000..3eeba36b33 --- /dev/null +++ b/source/lib/rocprofiler-sdk/rocdecode/CMakeLists.txt @@ -0,0 +1,6 @@ +set(ROCPROFILER_LIB_ROCDECODE_SOURCES abi.cpp rocdecode.cpp) +set(ROCPROFILER_LIB_ROCDECODE_HEADERS defines.hpp rocdecode.hpp) + +target_sources( + rocprofiler-sdk-object-library PRIVATE ${ROCPROFILER_LIB_ROCDECODE_SOURCES} + ${ROCPROFILER_LIB_ROCDECODE_HEADERS}) diff --git a/source/lib/rocprofiler-sdk/rocdecode/abi.cpp b/source/lib/rocprofiler-sdk/rocdecode/abi.cpp new file mode 100644 index 0000000000..8579c2b9b2 --- /dev/null +++ b/source/lib/rocprofiler-sdk/rocdecode/abi.cpp @@ -0,0 +1,67 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "lib/rocprofiler-sdk/rocdecode/rocdecode.hpp" + +#include "lib/common/abi.hpp" +#include "lib/common/defines.hpp" + +#include +#include + +namespace rocprofiler +{ +namespace rocdecode +{ +static_assert(ROCDECODE_RUNTIME_API_TABLE_MAJOR_VERSION == 0, + "Major version updated for ROCDecode dispatch table"); + +#if ROCDECODE_RUNTIME_API_TABLE_STEP_VERSION == 0 +ROCP_SDK_ENFORCE_ABI_VERSIONING(::RocDecodeDispatchTable, 11); +#endif + +#if ROCDECODE_RUNTIME_API_TABLE_STEP_VERSION == 1 +ROCP_SDK_ENFORCE_ABI_VERSIONING(::RocDecodeDispatchTable, 16); +#endif + +ROCP_SDK_ENFORCE_ABI(::RocDecodeDispatchTable, pfn_rocdec_create_video_parser, 0) +ROCP_SDK_ENFORCE_ABI(::RocDecodeDispatchTable, pfn_rocdec_parse_video_data, 1) +ROCP_SDK_ENFORCE_ABI(::RocDecodeDispatchTable, pfn_rocdec_destroy_video_parser, 2) +ROCP_SDK_ENFORCE_ABI(::RocDecodeDispatchTable, pfn_rocdec_create_decoder, 3) +ROCP_SDK_ENFORCE_ABI(::RocDecodeDispatchTable, pfn_rocdec_destroy_decoder, 4) +ROCP_SDK_ENFORCE_ABI(::RocDecodeDispatchTable, pfn_rocdec_get_gecoder_caps, 5) +ROCP_SDK_ENFORCE_ABI(::RocDecodeDispatchTable, pfn_rocdec_decode_frame, 6) +ROCP_SDK_ENFORCE_ABI(::RocDecodeDispatchTable, pfn_rocdec_get_decode_status, 7) +ROCP_SDK_ENFORCE_ABI(::RocDecodeDispatchTable, pfn_rocdec_reconfigure_decoder, 8) +ROCP_SDK_ENFORCE_ABI(::RocDecodeDispatchTable, pfn_rocdec_get_video_frame, 9) +ROCP_SDK_ENFORCE_ABI(::RocDecodeDispatchTable, pfn_rocdec_get_error_name, 10) + +#if ROCDECODE_RUNTIME_API_TABLE_STEP_VERSION >= 1 +ROCP_SDK_ENFORCE_ABI(::RocDecodeDispatchTable, pfn_rocdec_create_bitstream_reader, 11); +ROCP_SDK_ENFORCE_ABI(::RocDecodeDispatchTable, pfn_rocdec_get_bitstream_codec_type, 12); +ROCP_SDK_ENFORCE_ABI(::RocDecodeDispatchTable, pfn_rocdec_get_bitstream_bit_depth, 13); +ROCP_SDK_ENFORCE_ABI(::RocDecodeDispatchTable, pfn_rocdec_get_bitstream_pic_data, 14); +ROCP_SDK_ENFORCE_ABI(::RocDecodeDispatchTable, pfn_rocdec_destroy_bitstream_reader, 15); +#endif + +} // namespace rocdecode +} // namespace rocprofiler diff --git a/source/lib/rocprofiler-sdk/rocdecode/defines.hpp b/source/lib/rocprofiler-sdk/rocdecode/defines.hpp new file mode 100644 index 0000000000..f45885d51e --- /dev/null +++ b/source/lib/rocprofiler-sdk/rocdecode/defines.hpp @@ -0,0 +1,216 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include "lib/common/defines.hpp" + +#define ROCDECODE_API_INFO_DEFINITION_0( \ + ROCDECODE_TABLE, ROCDECODE_API_ID, ROCDECODE_FUNC, ROCDECODE_FUNC_PTR) \ + namespace rocprofiler \ + { \ + namespace rocdecode \ + { \ + template <> \ + struct rocdecode_api_info \ + : rocdecode_domain_info \ + { \ + static constexpr auto table_idx = ROCDECODE_TABLE; \ + static constexpr auto operation_idx = ROCDECODE_API_ID; \ + static constexpr auto name = #ROCDECODE_FUNC; \ + \ + using domain_type = rocdecode_domain_info; \ + using this_type = rocdecode_api_info; \ + using base_type = rocdecode_api_impl; \ + \ + using domain_type::callback_domain_idx; \ + using domain_type::buffered_domain_idx; \ + using domain_type::args_type; \ + using domain_type::retval_type; \ + using domain_type::callback_data_type; \ + \ + static constexpr auto offset() \ + { \ + return offsetof(rocdecode_table_lookup::type, ROCDECODE_FUNC_PTR); \ + } \ + \ + static_assert(offsetof(rocdecode_table_lookup::type, ROCDECODE_FUNC_PTR) == \ + (sizeof(size_t) + (operation_idx * sizeof(void*))), \ + "ABI error for " #ROCDECODE_FUNC); \ + \ + static auto& get_table() { return rocdecode_table_lookup{}(); } \ + \ + template \ + static auto& get_table(TableT& _v) \ + { \ + return rocdecode_table_lookup{}(_v); \ + } \ + \ + template \ + static auto& get_table_func(TableT& _table) \ + { \ + if constexpr(std::is_pointer::value) \ + { \ + assert(_table != nullptr && "nullptr to MARKER table for " #ROCDECODE_FUNC \ + " function"); \ + return _table->ROCDECODE_FUNC_PTR; \ + } \ + else \ + { \ + return _table.ROCDECODE_FUNC_PTR; \ + } \ + } \ + \ + static auto& get_table_func() { return get_table_func(get_table()); } \ + \ + template \ + static auto& get_api_data_args(DataT& _data) \ + { \ + return _data.ROCDECODE_FUNC; \ + } \ + \ + template \ + static auto get_functor(RetT (*)(Args...)) \ + { \ + return &base_type::functor; \ + } \ + \ + static std::vector as_arg_addr(callback_data_type) { return std::vector{}; } \ + \ + static std::vector as_arg_list(callback_data_type, int32_t) \ + { \ + return {}; \ + } \ + }; \ + } \ + } + +#define ROCDECODE_API_INFO_DEFINITION_V( \ + ROCDECODE_TABLE, ROCDECODE_API_ID, ROCDECODE_FUNC, ROCDECODE_FUNC_PTR, ...) \ + namespace rocprofiler \ + { \ + namespace rocdecode \ + { \ + template <> \ + struct rocdecode_api_info \ + : rocdecode_domain_info \ + { \ + static constexpr auto table_idx = ROCDECODE_TABLE; \ + static constexpr auto operation_idx = ROCDECODE_API_ID; \ + static constexpr auto name = #ROCDECODE_FUNC; \ + \ + using domain_type = rocdecode_domain_info; \ + using this_type = rocdecode_api_info; \ + using base_type = rocdecode_api_impl; \ + \ + static constexpr auto callback_domain_idx = domain_type::callback_domain_idx; \ + static constexpr auto buffered_domain_idx = domain_type::buffered_domain_idx; \ + \ + using domain_type::args_type; \ + using domain_type::retval_type; \ + using domain_type::callback_data_type; \ + \ + static constexpr auto offset() \ + { \ + return offsetof(rocdecode_table_lookup::type, ROCDECODE_FUNC_PTR); \ + } \ + \ + static_assert(offsetof(rocdecode_table_lookup::type, ROCDECODE_FUNC_PTR) == \ + (sizeof(size_t) + (operation_idx * sizeof(void*))), \ + "ABI error for " #ROCDECODE_FUNC); \ + \ + static auto& get_table() { return rocdecode_table_lookup{}(); } \ + \ + template \ + static auto& get_table(TableT& _v) \ + { \ + return rocdecode_table_lookup{}(_v); \ + } \ + \ + template \ + static auto& get_table_func(TableT& _table) \ + { \ + if constexpr(std::is_pointer::value) \ + { \ + assert(_table != nullptr && "nullptr to MARKER table for " #ROCDECODE_FUNC \ + " function"); \ + return _table->ROCDECODE_FUNC_PTR; \ + } \ + else \ + { \ + return _table.ROCDECODE_FUNC_PTR; \ + } \ + } \ + \ + static auto& get_table_func() { return get_table_func(get_table()); } \ + \ + template \ + static auto& get_api_data_args(DataT& _data) \ + { \ + return _data.ROCDECODE_FUNC; \ + } \ + \ + template \ + static auto get_functor(RetT (*)(Args...)) \ + { \ + return &base_type::functor; \ + } \ + \ + static std::vector as_arg_addr(callback_data_type trace_data) \ + { \ + return std::vector{ \ + GET_ADDR_MEMBER_FIELDS(get_api_data_args(trace_data.args), __VA_ARGS__)}; \ + } \ + }; \ + } \ + } + +#define ROCDECODE_API_TABLE_LOOKUP_DEFINITION(TABLE_ID, TYPE) \ + namespace rocprofiler \ + { \ + namespace rocdecode \ + { \ + namespace \ + { \ + template <> \ + auto* get_table() \ + { \ + return get_table_impl(); \ + } \ + } \ + \ + template <> \ + struct rocdecode_table_lookup \ + { \ + using type = TYPE; \ + auto& operator()(type& _v) const { return _v; } \ + auto& operator()(type* _v) const { return *_v; } \ + auto& operator()() const { return (*this)(get_table()); } \ + }; \ + \ + template <> \ + struct rocdecode_table_id_lookup \ + { \ + static constexpr auto value = TABLE_ID; \ + }; \ + } \ + } diff --git a/source/lib/rocprofiler-sdk/rocdecode/rocdecode.cpp b/source/lib/rocprofiler-sdk/rocdecode/rocdecode.cpp new file mode 100644 index 0000000000..38b1e9bfec --- /dev/null +++ b/source/lib/rocprofiler-sdk/rocdecode/rocdecode.cpp @@ -0,0 +1,560 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "lib/rocprofiler-sdk/rocdecode/rocdecode.hpp" +#include "lib/common/defines.hpp" +#include "lib/common/static_object.hpp" +#include "lib/rocprofiler-sdk/buffer.hpp" +#include "lib/rocprofiler-sdk/context/context.hpp" +#include "lib/rocprofiler-sdk/hip/hip.hpp" +#include "lib/rocprofiler-sdk/hip/utils.hpp" +#include "lib/rocprofiler-sdk/registration.hpp" +#include "lib/rocprofiler-sdk/tracing/tracing.hpp" + +#include +#include +#include +#include + +#include +#include +// must be included after runtime api +#include + +#include +#include +#include +#include + +namespace rocprofiler +{ +namespace rocdecode +{ +namespace +{ +struct null_type +{}; + +template +auto +get_default_retval() +{ + if constexpr(std::is_pointer::value) + { + Tp v = nullptr; + return v; + } + else if constexpr(std::is_same::value) + return ROCDEC_RUNTIME_ERROR; + else if constexpr(std::is_same::value) + return "UnknownString"; + else + static_assert(std::is_empty::value, "Error! unsupported return type"); +} + +template +void +set_data_retval(DataT& _data, Tp _val) +{ + if constexpr(std::is_same::value) + { + _data.rocDecStatus_retval = _val; + } + else if constexpr(std::is_same::value) + { + _data.const_charp_retval = _val; + } + else + { + static_assert(std::is_empty::value, "Error! unsupported return type"); + } +} + +template +Tp* +get_table_impl() +{ + static auto*& _v = common::static_object::construct(common::init_public_api_struct(Tp{})); + return _v; +} + +template +auto* +get_table(); + +} // namespace + +template +template +auto +rocdecode_api_impl::set_data_args(DataArgsT& _data_args, Args... args) +{ + if constexpr(sizeof...(Args) == 0) + _data_args.no_args.empty = '\0'; + else + _data_args = DataArgsT{args...}; +} + +template +template +auto +rocdecode_api_impl::exec(FuncT&& _func, Args&&... args) +{ + using return_type = std::decay_t>; + + if(_func) + { + if constexpr(std::is_void::value) + { + _func(std::forward(args)...); + return null_type{}; + } + else + { + return _func(std::forward(args)...); + } + } + + using info_type = rocdecode_api_info; + ROCP_ERROR << "nullptr to next rocdecode function for " << info_type::name << " (" + << info_type::operation_idx << ")"; + + return get_default_retval(); +} + +template +template +RetT +rocdecode_api_impl::functor(Args... args) +{ + using info_type = rocdecode_api_info; + using callback_api_data_t = typename rocdecode_domain_info::callback_data_type; + using buffered_api_data_t = typename rocdecode_domain_info::buffer_data_type; + + constexpr auto external_corr_id_domain_idx = + rocdecode_domain_info::external_correlation_id_domain_idx; + + if(registration::get_fini_status() != 0) + { + [[maybe_unused]] auto _ret = exec(info_type::get_table_func(), std::forward(args)...); + if constexpr(!std::is_void::value) + return _ret; + else + return; + } + + constexpr auto ref_count = 2; + auto thr_id = common::get_tid(); + auto callback_contexts = tracing::callback_context_data_vec_t{}; + auto buffered_contexts = tracing::buffered_context_data_vec_t{}; + auto external_corr_ids = tracing::external_correlation_id_map_t{}; + + tracing::populate_contexts(info_type::callback_domain_idx, + info_type::buffered_domain_idx, + info_type::operation_idx, + callback_contexts, + buffered_contexts, + external_corr_ids); + + if(callback_contexts.empty() && buffered_contexts.empty()) + { + [[maybe_unused]] auto _ret = exec(info_type::get_table_func(), std::forward(args)...); + if constexpr(!std::is_void::value) + return _ret; + else + return; + } + + auto buffer_record = common::init_public_api_struct(buffered_api_data_t{}); + auto tracer_data = common::init_public_api_struct(callback_api_data_t{}); + auto* corr_id = tracing::correlation_service::construct(ref_count); + auto internal_corr_id = corr_id->internal; + + tracing::populate_external_correlation_ids(external_corr_ids, + thr_id, + external_corr_id_domain_idx, + info_type::operation_idx, + internal_corr_id); + + // invoke the callbacks + if(!callback_contexts.empty()) + { + set_data_args(info_type::get_api_data_args(tracer_data.args), std::forward(args)...); + + tracing::execute_phase_enter_callbacks(callback_contexts, + thr_id, + internal_corr_id, + external_corr_ids, + info_type::callback_domain_idx, + info_type::operation_idx, + tracer_data); + } + + // enter callback may update the external correlation id field + tracing::update_external_correlation_ids( + external_corr_ids, thr_id, external_corr_id_domain_idx); + + // record the start timestamp as close to the function call as possible + if(!buffered_contexts.empty()) + { + buffer_record.start_timestamp = common::timestamp_ns(); + } + + // decrement the reference count before invoking + corr_id->sub_ref_count(); + + auto _ret = exec(info_type::get_table_func(), std::forward(args)...); + + // record the end timestamp as close to the function call as possible + if(!buffered_contexts.empty()) + { + buffer_record.end_timestamp = common::timestamp_ns(); + } + + if(!callback_contexts.empty()) + { + set_data_retval(tracer_data.retval, _ret); + + tracing::execute_phase_exit_callbacks(callback_contexts, + external_corr_ids, + info_type::callback_domain_idx, + info_type::operation_idx, + tracer_data); + } + + if(!buffered_contexts.empty()) + { + tracing::execute_buffer_record_emplace(buffered_contexts, + thr_id, + internal_corr_id, + external_corr_ids, + info_type::buffered_domain_idx, + info_type::operation_idx, + buffer_record); + } + + // decrement the reference count after usage in the callback/buffers + corr_id->sub_ref_count(); + + context::pop_latest_correlation_id(corr_id); + + if constexpr(!std::is_void::value) return _ret; +} +} // namespace rocdecode +} // namespace rocprofiler + +#define ROCPROFILER_LIB_ROCPROFILER_SDK_ROCDECODE_ROCDECODE_CPP_IMPL 1 + +// template specializations +#include "rocdecode.def.cpp" + +namespace rocprofiler +{ +namespace rocdecode +{ +namespace +{ +template +const char* +name_by_id(const uint32_t id, std::index_sequence) +{ + if(OpIdx == id) return rocdecode_api_info::name; + + if constexpr(sizeof...(OpIdxTail) > 0) + return name_by_id(id, std::index_sequence{}); + else + return nullptr; +} + +template +uint32_t +id_by_name(const char* name, std::index_sequence) +{ + if(std::string_view{rocdecode_api_info::name} == std::string_view{name}) + return rocdecode_api_info::operation_idx; + + if constexpr(sizeof...(OpIdxTail) > 0) + return id_by_name(name, std::index_sequence{}); + else + return rocdecode_domain_info::none; +} + +template +void +get_ids(std::vector& _id_list, std::index_sequence) +{ + auto _idx = rocdecode_api_info::operation_idx; + if(_idx < rocdecode_domain_info::last) _id_list.emplace_back(_idx); + + if constexpr(sizeof...(OpIdxTail) > 0) + get_ids(_id_list, std::index_sequence{}); +} + +template +void +get_names(std::vector& _name_list, std::index_sequence) +{ + auto&& _name = rocdecode_api_info::name; + if(_name != nullptr && strnlen(_name, 1) > 0) _name_list.emplace_back(_name); + + if constexpr(sizeof...(OpIdxTail) > 0) + get_names(_name_list, std::index_sequence{}); +} + +template +void +iterate_args(const uint32_t id, + const DataT& data, + rocprofiler_callback_tracing_operation_args_cb_t func, + int32_t max_deref, + void* user_data, + std::index_sequence) +{ + if(OpIdx == id) + { + using info_type = rocdecode_api_info; + auto&& arg_list = info_type::as_arg_list(data, max_deref); + auto&& arg_addr = info_type::as_arg_addr(data); + for(size_t i = 0; i < std::min(arg_list.size(), arg_addr.size()); ++i) + { + auto ret = func(info_type::callback_domain_idx, // kind + id, // operation + i, // arg_number + arg_addr.at(i), // arg_value_addr + arg_list.at(i).indirection_level, // indirection + arg_list.at(i).type, // arg_type + arg_list.at(i).name, // arg_name + arg_list.at(i).value.c_str(), // arg_value_str + arg_list.at(i).dereference_count, // num deref in str + user_data); + if(ret != 0) break; + } + return; + } + if constexpr(sizeof...(OpIdxTail) > 0) + iterate_args( + id, data, func, max_deref, user_data, std::index_sequence{}); +} + +bool +should_wrap_functor(rocprofiler_callback_tracing_kind_t _callback_domain, + rocprofiler_buffer_tracing_kind_t _buffered_domain, + int _operation) +{ + // we loop over all the *registered* contexts and see if any of them, at any point in time, + // might require callback or buffered API tracing + for(const auto& itr : context::get_registered_contexts()) + { + if(!itr) continue; + + // if there is a callback tracer enabled for the given domain and op, we need to wrap + if(itr->callback_tracer && itr->callback_tracer->domains(_callback_domain) && + itr->callback_tracer->domains(_callback_domain, _operation)) + return true; + + // if there is a buffered tracer enabled for the given domain and op, we need to wrap + if(itr->buffered_tracer && itr->buffered_tracer->domains(_buffered_domain) && + itr->buffered_tracer->domains(_buffered_domain, _operation)) + return true; + } + return false; +} + +template +void +copy_table(Tp* _orig, uint64_t _tbl_instance, std::integral_constant) +{ + using table_type = typename rocdecode_table_lookup::type; + + if constexpr(std::is_same::value) + { + auto _info = rocdecode_api_info{}; + + // make sure we don't access a field that doesn't exist in input table + if(_info.offset() >= _orig->size) return; + + // 1. get the sub-table containing the function pointer in original table + // 2. get reference to function pointer in sub-table in original table + auto& _orig_table = _info.get_table(_orig); + auto& _orig_func = _info.get_table_func(_orig_table); + // 3. get the sub-table containing the function pointer in saved table + // 4. get reference to function pointer in sub-table in saved table + // 5. save the original function in the saved table + auto& _copy_table = _info.get_table(*get_table()); + auto& _copy_func = _info.get_table_func(_copy_table); + + ROCP_FATAL_IF(_copy_func && _tbl_instance == 0) + << _info.name << " has non-null function pointer " << _copy_func + << " despite this being the first instance of the library being copies"; + + if(!_copy_func) + { + ROCP_TRACE << "copying table entry for " << _info.name; + _copy_func = _orig_func; + } + else + { + ROCP_TRACE << "skipping copying table entry for " << _info.name + << " from table instance " << _tbl_instance; + } + } +} + +template +void +update_table(Tp* _orig, std::integral_constant) +{ + using table_type = typename rocdecode_table_lookup::type; + + if constexpr(std::is_same::value) + { + auto _info = rocdecode_api_info{}; + + // make sure we don't access a field that doesn't exist in input table + if(_info.offset() >= _orig->size) return; + + // check to see if there are any contexts which enable this operation in the HIP API domain + if(!should_wrap_functor( + _info.callback_domain_idx, _info.buffered_domain_idx, _info.operation_idx)) + return; + + ROCP_TRACE << "updating table entry for " << _info.name; + + // 1. get the sub-table containing the function pointer in original table + // 2. get reference to function pointer in sub-table in original table + // 3. update function pointer with wrapper + auto& _table = _info.get_table(_orig); + auto& _func = _info.get_table_func(_table); + _func = _info.get_functor(_func); + } +} + +template +void +copy_table(Tp* _orig, uint64_t _tbl_instance, std::index_sequence) +{ + copy_table(_orig, _tbl_instance, std::integral_constant{}); + if constexpr(sizeof...(OpIdxTail) > 0) + copy_table(_orig, _tbl_instance, std::index_sequence{}); +} + +template +void +update_table(Tp* _orig, std::index_sequence) +{ + update_table(_orig, std::integral_constant{}); + if constexpr(sizeof...(OpIdxTail) > 0) + update_table(_orig, std::index_sequence{}); +} +} // namespace + +// check out the assembly here... this compiles to a switch statement +template +const char* +name_by_id(uint32_t id) +{ + return name_by_id(id, + std::make_index_sequence::last>{}); +} + +template +uint32_t +id_by_name(const char* name) +{ + return id_by_name(name, + std::make_index_sequence::last>{}); +} + +template +std::vector +get_ids() +{ + constexpr auto last_api_id = rocdecode_domain_info::last; + auto _data = std::vector{}; + _data.reserve(last_api_id); + get_ids(_data, std::make_index_sequence{}); + return _data; +} + +template +std::vector +get_names() +{ + constexpr auto last_api_id = rocdecode_domain_info::last; + auto _data = std::vector{}; + _data.reserve(last_api_id); + get_names(_data, std::make_index_sequence{}); + return _data; +} + +template +void +iterate_args(uint32_t id, + const rocprofiler_callback_tracing_rocdecode_api_data_t& data, + rocprofiler_callback_tracing_operation_args_cb_t callback, + int32_t max_deref, + void* user_data) +{ + if(callback) + iterate_args(id, + data, + callback, + max_deref, + user_data, + std::make_index_sequence::last>{}); +} + +template +void +copy_table(TableT* _orig, uint64_t _tbl_instance) +{ + constexpr auto TableIdx = rocdecode_table_id_lookup::value; + if(_orig) + copy_table(_orig, + _tbl_instance, + std::make_index_sequence::last>{}); +} + +template +void +update_table(TableT* _orig) +{ + constexpr auto TableIdx = rocdecode_table_id_lookup::value; + if(_orig) + update_table(_orig, + std::make_index_sequence::last>{}); +} + +using rocdecode_api_data_t = rocprofiler_callback_tracing_rocdecode_api_data_t; +using rocdecode_op_args_cb_t = rocprofiler_callback_tracing_operation_args_cb_t; + +#define INSTANTIATE_ROCDECODE_TABLE_FUNC(TABLE_TYPE, TABLE_IDX) \ + template void copy_table(TABLE_TYPE * _tbl, uint64_t _instv); \ + template void update_table(TABLE_TYPE * _tbl); \ + template const char* name_by_id(uint32_t); \ + template uint32_t id_by_name(const char*); \ + template std::vector get_ids(); \ + template std::vector get_names(); + +INSTANTIATE_ROCDECODE_TABLE_FUNC(rocdecode_api_func_table_t, ROCPROFILER_ROCDECODE_TABLE_ID) +} // namespace rocdecode +} // namespace rocprofiler diff --git a/source/lib/rocprofiler-sdk/rocdecode/rocdecode.def.cpp b/source/lib/rocprofiler-sdk/rocdecode/rocdecode.def.cpp new file mode 100644 index 0000000000..51c81f3f22 --- /dev/null +++ b/source/lib/rocprofiler-sdk/rocdecode/rocdecode.def.cpp @@ -0,0 +1,90 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "lib/rocprofiler-sdk/rocdecode/defines.hpp" +#include "lib/rocprofiler-sdk/rocdecode/rocdecode.hpp" + +#include +#include +#include +#include + +namespace rocprofiler +{ +namespace rocdecode +{ +template <> +struct rocdecode_domain_info +{ + using args_type = rocprofiler_rocdecode_api_args_t; + using retval_type = rocprofiler_rocdecode_api_retval_t; + using callback_data_type = rocprofiler_callback_tracing_rocdecode_api_data_t; + using buffer_data_type = rocprofiler_buffer_tracing_rocdecode_api_record_t; +}; + +template <> +struct rocdecode_domain_info +: rocdecode_domain_info +{ + using enum_type = rocprofiler_marker_core_api_id_t; + static constexpr auto callback_domain_idx = ROCPROFILER_CALLBACK_TRACING_ROCDECODE_API; + static constexpr auto buffered_domain_idx = ROCPROFILER_BUFFER_TRACING_ROCDECODE_API; + static constexpr auto none = ROCPROFILER_ROCDECODE_API_ID_NONE; + static constexpr auto last = ROCPROFILER_ROCDECODE_API_ID_LAST; + static constexpr auto external_correlation_id_domain_idx = + ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_ROCDECODE_API; +}; + +} // namespace rocdecode +} // namespace rocprofiler + +#if defined(ROCPROFILER_LIB_ROCPROFILER_SDK_ROCDECODE_ROCDECODE_CPP_IMPL) && \ + ROCPROFILER_LIB_ROCPROFILER_SDK_ROCDECODE_ROCDECODE_CPP_IMPL == 1 + +// clang-format off +ROCDECODE_API_TABLE_LOOKUP_DEFINITION(ROCPROFILER_ROCDECODE_TABLE_ID, rocdecode_api_func_table_t) + +ROCDECODE_API_INFO_DEFINITION_V(ROCPROFILER_ROCDECODE_TABLE_ID, ROCPROFILER_ROCDECODE_API_ID_rocDecCreateVideoParser, rocDecCreateVideoParser, pfn_rocdec_create_video_parser, parser_handle, params) +ROCDECODE_API_INFO_DEFINITION_V(ROCPROFILER_ROCDECODE_TABLE_ID, ROCPROFILER_ROCDECODE_API_ID_rocDecParseVideoData, rocDecParseVideoData, pfn_rocdec_parse_video_data, parser_handle, packet) +ROCDECODE_API_INFO_DEFINITION_V(ROCPROFILER_ROCDECODE_TABLE_ID, ROCPROFILER_ROCDECODE_API_ID_rocDecDestroyVideoParser, rocDecDestroyVideoParser, pfn_rocdec_destroy_video_parser, parser_handle) +ROCDECODE_API_INFO_DEFINITION_V(ROCPROFILER_ROCDECODE_TABLE_ID, ROCPROFILER_ROCDECODE_API_ID_rocDecCreateDecoder, rocDecCreateDecoder, pfn_rocdec_create_decoder, decoder_handle, decoder_create_info) +ROCDECODE_API_INFO_DEFINITION_V(ROCPROFILER_ROCDECODE_TABLE_ID, ROCPROFILER_ROCDECODE_API_ID_rocDecDestroyDecoder, rocDecDestroyDecoder, pfn_rocdec_destroy_decoder, decoder_handle) +ROCDECODE_API_INFO_DEFINITION_V(ROCPROFILER_ROCDECODE_TABLE_ID, ROCPROFILER_ROCDECODE_API_ID_rocDecGetDecoderCaps, rocDecGetDecoderCaps, pfn_rocdec_get_gecoder_caps, decode_caps) +ROCDECODE_API_INFO_DEFINITION_V(ROCPROFILER_ROCDECODE_TABLE_ID, ROCPROFILER_ROCDECODE_API_ID_rocDecDecodeFrame, rocDecDecodeFrame, pfn_rocdec_decode_frame, decoder_handle, pic_params) +ROCDECODE_API_INFO_DEFINITION_V(ROCPROFILER_ROCDECODE_TABLE_ID, ROCPROFILER_ROCDECODE_API_ID_rocDecGetDecodeStatus, rocDecGetDecodeStatus, pfn_rocdec_get_decode_status, decoder_handle, pic_idx, decode_status) +ROCDECODE_API_INFO_DEFINITION_V(ROCPROFILER_ROCDECODE_TABLE_ID, ROCPROFILER_ROCDECODE_API_ID_rocDecReconfigureDecoder, rocDecReconfigureDecoder, pfn_rocdec_reconfigure_decoder, decoder_handle, reconfig_params) +ROCDECODE_API_INFO_DEFINITION_V(ROCPROFILER_ROCDECODE_TABLE_ID, ROCPROFILER_ROCDECODE_API_ID_rocDecGetVideoFrame, rocDecGetVideoFrame, pfn_rocdec_get_video_frame, decoder_handle, pic_idx, dev_mem_ptr, horizontal_pitch, vid_postproc_params) +ROCDECODE_API_INFO_DEFINITION_V(ROCPROFILER_ROCDECODE_TABLE_ID, ROCPROFILER_ROCDECODE_API_ID_rocDecGetErrorName, rocDecGetErrorName, pfn_rocdec_get_error_name, rocdec_status) + +#if ROCDECODE_RUNTIME_API_TABLE_STEP_VERSION >= 1 +ROCDECODE_API_INFO_DEFINITION_V(ROCPROFILER_ROCDECODE_TABLE_ID, ROCPROFILER_ROCDECODE_API_ID_rocDecCreateBitstreamReader, rocDecCreateBitstreamReader, pfn_rocdec_create_bitstream_reader, bs_reader_handle, input_file_path); +ROCDECODE_API_INFO_DEFINITION_V(ROCPROFILER_ROCDECODE_TABLE_ID, ROCPROFILER_ROCDECODE_API_ID_rocDecGetBitstreamCodecType, rocDecGetBitstreamCodecType, pfn_rocdec_get_bitstream_codec_type, bs_reader_handle, codec_type); +ROCDECODE_API_INFO_DEFINITION_V(ROCPROFILER_ROCDECODE_TABLE_ID, ROCPROFILER_ROCDECODE_API_ID_rocDecGetBitstreamBitDepth, rocDecGetBitstreamBitDepth, pfn_rocdec_get_bitstream_bit_depth, bs_reader_handle, bit_depth); +ROCDECODE_API_INFO_DEFINITION_V(ROCPROFILER_ROCDECODE_TABLE_ID, ROCPROFILER_ROCDECODE_API_ID_rocDecGetBitstreamPicData, rocDecGetBitstreamPicData, pfn_rocdec_get_bitstream_pic_data, bs_reader_handle, pic_data, pic_size, pts); +ROCDECODE_API_INFO_DEFINITION_V(ROCPROFILER_ROCDECODE_TABLE_ID, ROCPROFILER_ROCDECODE_API_ID_rocDecDestroyBitstreamReader, rocDecDestroyBitstreamReader, pfn_rocdec_destroy_bitstream_reader, bs_reader_handle); +#endif +#else +# error \ + "Do not compile this file directly. It is included by lib/rocprofiler-sdk/rocdecode/rocdecode.cpp" +#endif + + diff --git a/source/lib/rocprofiler-sdk/rocdecode/rocdecode.hpp b/source/lib/rocprofiler-sdk/rocdecode/rocdecode.hpp new file mode 100644 index 0000000000..af4b96ca66 --- /dev/null +++ b/source/lib/rocprofiler-sdk/rocdecode/rocdecode.hpp @@ -0,0 +1,126 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#if !defined(ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE) +# if defined __has_include +# if __has_include() +# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 1 +# else +# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 0 +# endif +# else +# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 0 +# endif +#endif + +#if ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE > 0 +# include +# include +# include +#else +# include +# include +# include +#endif + +#include + +#include +#include + +namespace rocprofiler +{ +namespace rocdecode +{ +using rocdecode_api_func_table_t = ::RocDecodeDispatchTable; + +struct ROCDecodeAPITable +{ + rocdecode_api_func_table_t* rocdecode_api_table = nullptr; +}; + +using rocdecode_api_table_t = ROCDecodeAPITable; + +rocdecode_api_table_t& +get_table(); + +template +struct rocdecode_table_lookup; + +template +struct rocdecode_table_id_lookup; + +template +struct rocdecode_domain_info; + +template +struct rocdecode_api_info; + +template +struct rocdecode_api_impl : rocdecode_domain_info +{ + template + static auto set_data_args(DataArgsT&, Args... args); + + template + static auto exec(FuncT&&, Args&&... args); + + template + static RetT functor(Args... args); +}; + +template +const char* +name_by_id(uint32_t id); + +template +uint32_t +id_by_name(const char* name); + +template +std::vector +get_names(); + +template +std::vector +get_ids(); + +template +void +iterate_args(uint32_t id, + const rocprofiler_callback_tracing_rocdecode_api_data_t& data, + rocprofiler_callback_tracing_operation_args_cb_t callback, + int32_t max_deref, + void* user_data); + +template +void +copy_table(TableT* _orig, uint64_t _tbl_instance); + +template +void +update_table(TableT* _orig); + +} // namespace rocdecode +} // namespace rocprofiler diff --git a/source/lib/rocprofiler-sdk/runtime_initialization.cpp b/source/lib/rocprofiler-sdk/runtime_initialization.cpp index 46d7f233a4..1e23621685 100644 --- a/source/lib/rocprofiler-sdk/runtime_initialization.cpp +++ b/source/lib/rocprofiler-sdk/runtime_initialization.cpp @@ -57,6 +57,7 @@ SPECIALIZE_RUNTIME_INIT_INFO(HSA, "HSA runtime") SPECIALIZE_RUNTIME_INIT_INFO(HIP, "HIP runtime") SPECIALIZE_RUNTIME_INIT_INFO(MARKER, "Marker (ROCTx) runtime") SPECIALIZE_RUNTIME_INIT_INFO(RCCL, "RCCL runtime") +SPECIALIZE_RUNTIME_INIT_INFO(ROCDECODE, "ROCDecode runtime") #undef SPECIALIZE_RUNTIME_INIT_INFO diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 292e5e8933..ef26a2975e 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -62,6 +62,10 @@ add_subdirectory(thread-trace) add_subdirectory(pc_sampling) add_subdirectory(hip-graph-tracing) add_subdirectory(counter-collection) +if(ROCPROFILER_BUILD_ROCDECODE_TESTS) + add_subdirectory(rocdecode) +endif() + if(ROCPROFILER_BUILD_OPENMP_TESTS) add_subdirectory(openmp-tools) endif() diff --git a/tests/bin/CMakeLists.txt b/tests/bin/CMakeLists.txt index 8ae854798c..c065303ce6 100644 --- a/tests/bin/CMakeLists.txt +++ b/tests/bin/CMakeLists.txt @@ -29,3 +29,6 @@ add_subdirectory(hsa-queue-dependency) add_subdirectory(hip-graph) add_subdirectory(hsa-memory-allocation) add_subdirectory(pc-sampling) +if(ROCPROFILER_BUILD_ROCDECODE_TESTS) + add_subdirectory(rocdecode) +endif() diff --git a/tests/bin/rocdecode/CMakeLists.txt b/tests/bin/rocdecode/CMakeLists.txt new file mode 100644 index 0000000000..99ec32635b --- /dev/null +++ b/tests/bin/rocdecode/CMakeLists.txt @@ -0,0 +1,43 @@ +# +# +# +cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR) + +if(NOT CMAKE_HIP_COMPILER) + find_program( + amdclangpp_EXECUTABLE + NAMES amdclang++ + HINTS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm + PATHS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm + PATH_SUFFIXES bin llvm/bin NO_CACHE) + mark_as_advanced(amdclangpp_EXECUTABLE) + + if(amdclangpp_EXECUTABLE) + set(CMAKE_HIP_COMPILER "${amdclangpp_EXECUTABLE}") + endif() +endif() + +project(rocprofiler-tool-test-app-rocdecode LANGUAGES CXX HIP) + +foreach(_TYPE DEBUG MINSIZEREL RELEASE RELWITHDEBINFO) + if("${CMAKE_HIP_FLAGS_${_TYPE}}" STREQUAL "") + set(CMAKE_HIP_FLAGS_${_TYPE} "${CMAKE_CXX_FLAGS_${_TYPE}}") + endif() +endforeach() + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_EXTENSIONS OFF) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_HIP_STANDARD 17) +set(CMAKE_HIP_EXTENSIONS OFF) +set(CMAKE_HIP_STANDARD_REQUIRED ON) + +set_source_files_properties(rocdecode.cpp roc_video_dec.cpp PROPERTIES LANGUAGE HIP) +add_executable(rocdecode) +target_sources(rocdecode PRIVATE rocdecode.cpp roc_video_dec.cpp) + +find_package(Threads REQUIRED) +find_package(rocDecode REQUIRED) +target_link_libraries( + rocdecode PRIVATE rocprofiler-sdk::tests-build-flags Threads::Threads hsa-runtime64 + rocprofiler-sdk::tests-common-library rocDecode::rocDecode) diff --git a/tests/bin/rocdecode/roc_video_dec.cpp b/tests/bin/rocdecode/roc_video_dec.cpp new file mode 100644 index 0000000000..7921b4a5c7 --- /dev/null +++ b/tests/bin/rocdecode/roc_video_dec.cpp @@ -0,0 +1,1456 @@ +/* +Copyright (c) 2024 - 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "roc_video_dec.h" + +RocVideoDecoder::RocVideoDecoder(int device_id, + OutputSurfaceMemoryType out_mem_type, + rocDecVideoCodec codec, + bool force_zero_latency, + const Rect* p_crop_rect, + bool extract_user_sei_Message, + uint32_t disp_delay, + int max_width, + int max_height, + uint32_t clk_rate) +: device_id_{device_id} +, out_mem_type_(out_mem_type) +, b_extract_sei_message_(extract_user_sei_Message) +, b_force_zero_latency_(force_zero_latency) +, disp_delay_(disp_delay) +, codec_id_(codec) +, max_width_(max_width) +, max_height_(max_height) +{ + if(!InitHIP(device_id_)) + { + THROW("Failed to initilize the HIP"); + } + if(p_crop_rect) crop_rect_ = *p_crop_rect; + if(b_extract_sei_message_) + { + fp_sei_ = fopen("rocdec_sei_message.txt", "wb"); + curr_sei_message_ptr_ = new RocdecSeiMessageInfo; + memset(&sei_message_display_q_, 0, sizeof(sei_message_display_q_)); + } + // create rocdec videoparser + RocdecParserParams parser_params = {}; + parser_params.codec_type = codec_id_; + parser_params.max_num_decode_surfaces = + 1; // let the parser to determine the decode buffer pool size + parser_params.clock_rate = clk_rate; + parser_params.max_display_delay = disp_delay_; + parser_params.user_data = this; + parser_params.pfn_sequence_callback = HandleVideoSequenceProc; + parser_params.pfn_decode_picture = HandlePictureDecodeProc; + parser_params.pfn_display_picture = b_force_zero_latency_ ? NULL : HandlePictureDisplayProc; + parser_params.pfn_get_sei_msg = b_extract_sei_message_ ? HandleSEIMessagesProc : NULL; + ROCDEC_API_CALL(rocDecCreateVideoParser(&rocdec_parser_, &parser_params)); +} + +RocVideoDecoder::~RocVideoDecoder() +{ + auto start_time = StartTimer(); + if(curr_sei_message_ptr_) + { + delete curr_sei_message_ptr_; + curr_sei_message_ptr_ = nullptr; + } + + if(fp_sei_) + { + fclose(fp_sei_); + fp_sei_ = nullptr; + } + + if(rocdec_parser_) + { + rocDecDestroyVideoParser(rocdec_parser_); + rocdec_parser_ = nullptr; + } + + if(roc_decoder_) + { + rocDecDestroyDecoder(roc_decoder_); + roc_decoder_ = nullptr; + } + + if(curr_video_format_ptr_) + { + delete curr_video_format_ptr_; + curr_video_format_ptr_ = nullptr; + } + + std::lock_guard lock(mtx_vp_frame_); + if(out_mem_type_ != OUT_SURFACE_MEM_DEV_INTERNAL) + { + for(auto& p_frame : vp_frames_) + { + if(p_frame.frame_ptr) + { + if(out_mem_type_ == OUT_SURFACE_MEM_DEV_COPIED) + { + hipError_t hip_status = hipFree(p_frame.frame_ptr); + if(hip_status != hipSuccess) + { + std::cerr << "ERROR: hipFree failed! (" << hip_status << ")" << std::endl; + } + } + else + delete[](p_frame.frame_ptr); + p_frame.frame_ptr = nullptr; + } + } + } + if(hip_stream_) + { + hipError_t hip_status = hipSuccess; + hip_status = hipStreamDestroy(hip_stream_); + if(hip_status != hipSuccess) + { + std::cerr << "ERROR: hipStream_Destroy failed! (" << hip_status << ")" << std::endl; + } + } + if(fp_out_) + { + fclose(fp_out_); + fp_out_ = nullptr; + } + + double elapsed_time = StopTimer(start_time); + AddDecoderSessionOverHead(std::this_thread::get_id(), elapsed_time); +} + +static const char* +GetVideoCodecString(rocDecVideoCodec e_codec) +{ + static struct + { + rocDecVideoCodec e_codec; + const char* name; + } aCodecName[] = { + {rocDecVideoCodec_MPEG1, "MPEG-1"}, + {rocDecVideoCodec_MPEG2, "MPEG-2"}, + {rocDecVideoCodec_MPEG4, "MPEG-4 (ASP)"}, + {rocDecVideoCodec_AVC, "AVC/H.264"}, + {rocDecVideoCodec_HEVC, "H.265/HEVC"}, + {rocDecVideoCodec_AV1, "AV1"}, + {rocDecVideoCodec_VP8, "VP8"}, + {rocDecVideoCodec_VP9, "VP9"}, + {rocDecVideoCodec_JPEG, "M-JPEG"}, + {rocDecVideoCodec_NumCodecs, "Invalid"}, + }; + + if(e_codec >= 0 && e_codec <= rocDecVideoCodec_NumCodecs) + { + return aCodecName[e_codec].name; + } + for(size_t i = rocDecVideoCodec_NumCodecs + 1; i < sizeof(aCodecName) / sizeof(aCodecName[0]); + i++) + { + if(e_codec == aCodecName[i].e_codec) + { + return aCodecName[e_codec].name; + } + } + return "Unknown"; +} + +/** + * @brief function to return the name from codec_id + * + * @param codec_id + * @return const char* + */ +const char* +RocVideoDecoder::GetCodecFmtName(rocDecVideoCodec codec_id) +{ + return GetVideoCodecString(codec_id); +} + +static const char* +GetSurfaceFormatString(rocDecVideoSurfaceFormat surface_format_id) +{ + static struct + { + rocDecVideoSurfaceFormat surf_fmt; + const char* name; + } SurfName[] = { + {rocDecVideoSurfaceFormat_NV12, "NV12"}, + {rocDecVideoSurfaceFormat_P016, "P016"}, + {rocDecVideoSurfaceFormat_YUV444, "YUV444"}, + {rocDecVideoSurfaceFormat_YUV444_16Bit, "YUV444_16Bit"}, + }; + + if(surface_format_id >= rocDecVideoSurfaceFormat_NV12 && + surface_format_id <= rocDecVideoSurfaceFormat_YUV444_16Bit) + return SurfName[surface_format_id].name; + else + return "Unknown"; +} + +/** + * @brief function to return the name from surface_format_id + * + * @param surface_format_id - enum for surface format + * @return const char* + */ +const char* +RocVideoDecoder::GetSurfaceFmtName(rocDecVideoSurfaceFormat surface_format_id) +{ + return GetSurfaceFormatString(surface_format_id); +} + +static const char* +GetVideoChromaFormatName(rocDecVideoChromaFormat e_chroma_format) +{ + static struct + { + rocDecVideoChromaFormat chroma_fmt; + const char* name; + } ChromaFormatName[] = { + {rocDecVideoChromaFormat_Monochrome, "YUV 400 (Monochrome)"}, + {rocDecVideoChromaFormat_420, "YUV 420"}, + {rocDecVideoChromaFormat_422, "YUV 422"}, + {rocDecVideoChromaFormat_444, "YUV 444"}, + }; + + if(e_chroma_format >= 0 && e_chroma_format <= rocDecVideoChromaFormat_444) + { + return ChromaFormatName[e_chroma_format].name; + } + return "Unknown"; +} + +static void +GetSurfaceStrideInternal(rocDecVideoSurfaceFormat surface_format, + uint32_t width, + uint32_t height, + uint32_t* pitch, + uint32_t* vstride) +{ + switch(surface_format) + { + case rocDecVideoSurfaceFormat_NV12: + *pitch = align(width, 256); + *vstride = align(height, 16); + break; + case rocDecVideoSurfaceFormat_P016: + *pitch = align(width, 128) * 2; + *vstride = align(height, 16); + break; + case rocDecVideoSurfaceFormat_YUV444: + *pitch = align(width, 256); + *vstride = align(height, 16); + break; + case rocDecVideoSurfaceFormat_YUV444_16Bit: + *pitch = align(width, 128) * 2; + *vstride = align(height, 16); + break; + case rocDecVideoSurfaceFormat_YUV420: + *pitch = align(width, 256); + *vstride = align(height, 16); + break; + case rocDecVideoSurfaceFormat_YUV420_16Bit: + *pitch = align(width, 128) * 2; + *vstride = align(height, 16); + break; + } + return; +} + +/* Return value from HandleVideoSequence() are interpreted as : + * 0: fail, 1: succeeded, > 1: override dpb size of parser (set by + * CUVIDPARSERPARAMS::max_num_decode_surfaces while creating parser) + */ +int +RocVideoDecoder::HandleVideoSequence(RocdecVideoFormat* p_video_format) +{ + if(p_video_format == nullptr) + { + ROCDEC_THROW("Rocdec:: Invalid video format in HandleVideoSequence: ", + ROCDEC_INVALID_PARAMETER); + return 0; + } + auto start_time = StartTimer(); + input_video_info_str_.str(""); + input_video_info_str_.clear(); + input_video_info_str_ << "Input Video Information" << std::endl + << "\tCodec : " << GetCodecFmtName(p_video_format->codec) + << std::endl; + if(p_video_format->frame_rate.numerator && p_video_format->frame_rate.denominator) + { + input_video_info_str_ << "\tFrame rate : " << p_video_format->frame_rate.numerator << "/" + << p_video_format->frame_rate.denominator << " = " + << 1.0 * p_video_format->frame_rate.numerator / + p_video_format->frame_rate.denominator + << " fps" << std::endl; + } + input_video_info_str_ << "\tSequence : " + << (p_video_format->progressive_sequence ? "Progressive" : "Interlaced") + << std::endl + << "\tCoded size : [" << p_video_format->coded_width << ", " + << p_video_format->coded_height << "]" << std::endl + << "\tDisplay area : [" << p_video_format->display_area.left << ", " + << p_video_format->display_area.top << ", " + << p_video_format->display_area.right << ", " + << p_video_format->display_area.bottom << "]" << std::endl + << "\tChroma : " + << GetVideoChromaFormatName(p_video_format->chroma_format) << std::endl + << "\tBit depth : " << p_video_format->bit_depth_luma_minus8 + 8; + input_video_info_str_ << std::endl; + + int num_decode_surfaces = p_video_format->min_num_decode_surfaces; + + RocdecDecodeCaps decode_caps; + memset(&decode_caps, 0, sizeof(decode_caps)); + decode_caps.codec_type = p_video_format->codec; + decode_caps.chroma_format = p_video_format->chroma_format; + decode_caps.bit_depth_minus_8 = p_video_format->bit_depth_luma_minus8; + + rocDecGetDecoderCaps(&decode_caps); + if(!decode_caps.is_supported) + { + ROCDEC_THROW("rocDecode:: Codec not supported on this GPU ", ROCDEC_NOT_SUPPORTED); + return 0; + } + if((p_video_format->coded_width > decode_caps.max_width) || + (p_video_format->coded_height > decode_caps.max_height)) + { + std::ostringstream errorString; + errorString << std::endl + << "Resolution : " << p_video_format->coded_width << "x" + << p_video_format->coded_height << std::endl + << "Max Supported (wxh) : " << decode_caps.max_width << "x" + << decode_caps.max_height << std::endl + << "Resolution not supported on this GPU "; + const std::string cErr = errorString.str(); + ROCDEC_THROW(cErr, ROCDEC_NOT_SUPPORTED); + return 0; + } + if(curr_video_format_ptr_ == nullptr) + { + curr_video_format_ptr_ = new RocdecVideoFormat(); + } + // store current video format: this is required to call reconfigure from application in case of + // random seek + if(curr_video_format_ptr_) + memcpy(curr_video_format_ptr_, p_video_format, sizeof(RocdecVideoFormat)); + + if(coded_width_ && coded_height_) + { + // rocdecCreateDecoder() has been called before, and now there's possible config change + return ReconfigureDecoder(p_video_format); + } + // e_codec has been set in the constructor (for parser). Here it's set again for potential + // correction + codec_id_ = p_video_format->codec; + video_chroma_format_ = p_video_format->chroma_format; + bitdepth_minus_8_ = p_video_format->bit_depth_luma_minus8; + byte_per_pixel_ = bitdepth_minus_8_ > 0 ? 2 : 1; + + // Set the output surface format same as chroma format + if(video_chroma_format_ == rocDecVideoChromaFormat_420 || rocDecVideoChromaFormat_Monochrome) + video_surface_format_ = + bitdepth_minus_8_ ? rocDecVideoSurfaceFormat_P016 : rocDecVideoSurfaceFormat_NV12; + else if(video_chroma_format_ == rocDecVideoChromaFormat_444) + video_surface_format_ = bitdepth_minus_8_ ? rocDecVideoSurfaceFormat_YUV444_16Bit + : rocDecVideoSurfaceFormat_YUV444; + else if(video_chroma_format_ == rocDecVideoChromaFormat_422) + video_surface_format_ = rocDecVideoSurfaceFormat_NV12; + + // Check if output format supported. If not, check falback options + if(!(decode_caps.output_format_mask & (1 << video_surface_format_))) + { + if(decode_caps.output_format_mask & (1 << rocDecVideoSurfaceFormat_NV12)) + video_surface_format_ = rocDecVideoSurfaceFormat_NV12; + else if(decode_caps.output_format_mask & (1 << rocDecVideoSurfaceFormat_P016)) + video_surface_format_ = rocDecVideoSurfaceFormat_P016; + else if(decode_caps.output_format_mask & (1 << rocDecVideoSurfaceFormat_YUV444)) + video_surface_format_ = rocDecVideoSurfaceFormat_YUV444; + else if(decode_caps.output_format_mask & (1 << rocDecVideoSurfaceFormat_YUV444_16Bit)) + video_surface_format_ = rocDecVideoSurfaceFormat_YUV444_16Bit; + else + ROCDEC_THROW("No supported output format found", ROCDEC_NOT_SUPPORTED); + } + + coded_width_ = p_video_format->coded_width; + coded_height_ = p_video_format->coded_height; + disp_rect_.top = p_video_format->display_area.top; + disp_rect_.bottom = p_video_format->display_area.bottom; + disp_rect_.left = p_video_format->display_area.left; + disp_rect_.right = p_video_format->display_area.right; + disp_width_ = p_video_format->display_area.right - p_video_format->display_area.left; + disp_height_ = p_video_format->display_area.bottom - p_video_format->display_area.top; + + // AV1 has max width/height of sequence in sequence header + if(codec_id_ == rocDecVideoCodec_AV1 && p_video_format->seqhdr_data_length > 0) + { + // dont overwrite if it is already set from cmdline or reconfig.txt + if(!(static_cast(max_width_) > p_video_format->coded_width || + static_cast(max_height_) > p_video_format->coded_height)) + { + RocdecVideoFormatEx* vidFormatEx = (RocdecVideoFormatEx*) p_video_format; + max_width_ = vidFormatEx->max_width; + max_height_ = vidFormatEx->max_height; + } + } + if(max_width_ < (int) p_video_format->coded_width) max_width_ = p_video_format->coded_width; + if(max_height_ < (int) p_video_format->coded_height) max_height_ = p_video_format->coded_height; + + RocDecoderCreateInfo videoDecodeCreateInfo = {}; + videoDecodeCreateInfo.device_id = device_id_; + videoDecodeCreateInfo.codec_type = codec_id_; + videoDecodeCreateInfo.chroma_format = video_chroma_format_; + videoDecodeCreateInfo.output_format = video_surface_format_; + videoDecodeCreateInfo.bit_depth_minus_8 = bitdepth_minus_8_; + videoDecodeCreateInfo.num_decode_surfaces = num_decode_surfaces; + videoDecodeCreateInfo.width = coded_width_; + videoDecodeCreateInfo.height = coded_height_; + videoDecodeCreateInfo.max_width = max_width_; + videoDecodeCreateInfo.max_height = max_height_; + if(!(crop_rect_.right && crop_rect_.bottom)) + { + videoDecodeCreateInfo.display_rect.top = disp_rect_.top; + videoDecodeCreateInfo.display_rect.bottom = disp_rect_.bottom; + videoDecodeCreateInfo.display_rect.left = disp_rect_.left; + videoDecodeCreateInfo.display_rect.right = disp_rect_.right; + target_width_ = (disp_width_ + 1) & ~1; + target_height_ = (disp_height_ + 1) & ~1; + } + else + { + videoDecodeCreateInfo.display_rect.top = crop_rect_.top; + videoDecodeCreateInfo.display_rect.bottom = crop_rect_.bottom; + videoDecodeCreateInfo.display_rect.left = crop_rect_.left; + videoDecodeCreateInfo.display_rect.right = crop_rect_.right; + target_width_ = (crop_rect_.right - crop_rect_.left + 1) & ~1; + target_height_ = (crop_rect_.bottom - crop_rect_.top + 1) & ~1; + } + videoDecodeCreateInfo.target_width = target_width_; + videoDecodeCreateInfo.target_height = target_height_; + + chroma_height_ = (int) (ceil(target_height_ * GetChromaHeightFactor(video_surface_format_))); + num_chroma_planes_ = GetChromaPlaneCount(video_surface_format_); + if(video_chroma_format_ == rocDecVideoChromaFormat_Monochrome) num_chroma_planes_ = 0; + if(out_mem_type_ == OUT_SURFACE_MEM_DEV_INTERNAL || out_mem_type_ == OUT_SURFACE_MEM_NOT_MAPPED) + GetSurfaceStrideInternal(video_surface_format_, + p_video_format->coded_width, + p_video_format->coded_height, + &surface_stride_, + &surface_vstride_); + else + { + surface_stride_ = + videoDecodeCreateInfo.target_width * + byte_per_pixel_; // todo:: check if we need pitched memory for faster copy + } + chroma_vstride_ = (int) (ceil(surface_vstride_ * GetChromaHeightFactor(video_surface_format_))); + // fill output_surface_info_ + output_surface_info_.output_width = target_width_; + output_surface_info_.output_height = target_height_; + output_surface_info_.output_pitch = surface_stride_; + output_surface_info_.output_vstride = (out_mem_type_ == OUT_SURFACE_MEM_DEV_INTERNAL) + ? surface_vstride_ + : videoDecodeCreateInfo.target_height; + output_surface_info_.disp_rect = disp_rect_; + output_surface_info_.chroma_height = chroma_height_; + output_surface_info_.bit_depth = bitdepth_minus_8_ + 8; + output_surface_info_.bytes_per_pixel = byte_per_pixel_; + output_surface_info_.surface_format = video_surface_format_; + output_surface_info_.num_chroma_planes = num_chroma_planes_; + if(out_mem_type_ == OUT_SURFACE_MEM_DEV_INTERNAL) + { + output_surface_info_.output_surface_size_in_bytes = + surface_stride_ * (surface_vstride_ + (chroma_vstride_ * num_chroma_planes_)); + output_surface_info_.mem_type = OUT_SURFACE_MEM_DEV_INTERNAL; + } + else if(out_mem_type_ == OUT_SURFACE_MEM_DEV_COPIED) + { + output_surface_info_.output_surface_size_in_bytes = GetFrameSize(); + output_surface_info_.mem_type = OUT_SURFACE_MEM_DEV_COPIED; + } + else if(out_mem_type_ == OUT_SURFACE_MEM_HOST_COPIED) + { + output_surface_info_.output_surface_size_in_bytes = GetFrameSize(); + output_surface_info_.mem_type = OUT_SURFACE_MEM_HOST_COPIED; + } + else + { + output_surface_info_.output_surface_size_in_bytes = + surface_stride_ * (surface_vstride_ + (chroma_vstride_ * num_chroma_planes_)); + output_surface_info_.mem_type = OUT_SURFACE_MEM_NOT_MAPPED; + } + + input_video_info_str_ << "Video Decoding Params:" << std::endl + << "\tNum Surfaces : " << videoDecodeCreateInfo.num_decode_surfaces + << std::endl + << "\tCrop : [" << videoDecodeCreateInfo.display_rect.left << ", " + << videoDecodeCreateInfo.display_rect.top << ", " + << videoDecodeCreateInfo.display_rect.right << ", " + << videoDecodeCreateInfo.display_rect.bottom << "]" << std::endl + << "\tResize : " << videoDecodeCreateInfo.target_width << "x" + << videoDecodeCreateInfo.target_height << std::endl; + input_video_info_str_ << std::endl; + std::cout << input_video_info_str_.str(); + + ROCDEC_API_CALL(rocDecCreateDecoder(&roc_decoder_, &videoDecodeCreateInfo)); + double elapsed_time = StopTimer(start_time); + AddDecoderSessionOverHead(std::this_thread::get_id(), elapsed_time); + return num_decode_surfaces; +} + +/** + * @brief Function to set the Reconfig Params object + * + * @param p_reconfig_params: pointer to reconfig params struct + * @return true : success + * @return false : fail + */ +bool +RocVideoDecoder::SetReconfigParams(ReconfigParams* p_reconfig_params, bool b_force_reconfig_flush) +{ + if(!p_reconfig_params) + { + std::cerr << "ERROR: Invalid reconfig struct passed! " << std::endl; + return false; + } + // save it + p_reconfig_params_ = p_reconfig_params; + b_force_recofig_flush_ = b_force_reconfig_flush; + return true; +} + +/** + * @brief Function to force Reconfigure Flush: needed for random seeking to key frames + * + * @return int 1: Success 0: Fail + */ +int +RocVideoDecoder::FlushAndReconfigure() +{ + if(!p_reconfig_params_) + { + std::cerr << "ERROR: Reconfig params is not set! " << std::endl; + return 0; + } + if(!curr_video_format_ptr_) + { + std::cerr << "ERROR: video format is not initialized! " << std::endl; + return 0; + } + // call reconfigure + b_force_recofig_flush_ = true; // if not already set to force reconfigure + ReconfigureDecoder(curr_video_format_ptr_); + return true; +} + +/** + * @brief function to reconfigure decoder if there is a change in sequence params. + * + * @param p_video_format + * @return int 1: success 0: fail + */ +int +RocVideoDecoder::ReconfigureDecoder(RocdecVideoFormat* p_video_format) +{ + if(p_video_format->codec != codec_id_) + { + ROCDEC_THROW("Reconfigure Not supported for codec change", ROCDEC_NOT_SUPPORTED); + return 0; + } + if(p_video_format->chroma_format != video_chroma_format_) + { + ROCDEC_THROW("Reconfigure Not supported for chroma format change", ROCDEC_NOT_SUPPORTED); + return 0; + } + if(p_video_format->bit_depth_luma_minus8 != bitdepth_minus_8_) + { + ROCDEC_THROW("Reconfigure Not supported for bit depth change", ROCDEC_NOT_SUPPORTED); + return 0; + } + bool is_decode_res_changed = !(p_video_format->coded_width == coded_width_ && + p_video_format->coded_height == coded_height_); + bool is_display_rect_changed = !(p_video_format->display_area.bottom == disp_rect_.bottom && + p_video_format->display_area.top == disp_rect_.top && + p_video_format->display_area.left == disp_rect_.left && + p_video_format->display_area.right == disp_rect_.right); + + if(!is_decode_res_changed && !is_display_rect_changed && !b_force_recofig_flush_) + { + return 1; + } + + // Flush and clear internal frame store to reconfigure when either coded size or display size + // has changed. + if(p_reconfig_params_ && p_reconfig_params_->p_fn_reconfigure_flush) + num_frames_flushed_during_reconfig_ += p_reconfig_params_->p_fn_reconfigure_flush( + this, + p_reconfig_params_->reconfig_flush_mode, + static_cast(p_reconfig_params_->p_reconfig_user_struct)); + // clear the existing output buffers of different size + // note that app lose the remaining frames in the vp_frames/vp_frames_q in case application + // didn't set p_fn_reconfigure_flush_ callback + if(out_mem_type_ == OUT_SURFACE_MEM_DEV_INTERNAL) + { + ReleaseInternalFrames(); + } + else + { + std::lock_guard lock(mtx_vp_frame_); + while(!vp_frames_.empty()) + { + DecFrameBuffer* p_frame = &vp_frames_.back(); + // pop decoded frame + vp_frames_.pop_back(); + if(p_frame->frame_ptr) + { + if(out_mem_type_ == OUT_SURFACE_MEM_DEV_COPIED) + { + hipError_t hip_status = hipFree(p_frame->frame_ptr); + if(hip_status != hipSuccess) + std::cerr << "ERROR: hipFree failed! (" << hip_status << ")" << std::endl; + } + else + delete[](p_frame->frame_ptr); + } + } + } + output_frame_cnt_ = 0; // reset frame_count + if(is_decode_res_changed) + { + coded_width_ = p_video_format->coded_width; + coded_height_ = p_video_format->coded_height; + } + if(is_display_rect_changed) + { + disp_rect_.left = p_video_format->display_area.left; + disp_rect_.right = p_video_format->display_area.right; + disp_rect_.top = p_video_format->display_area.top; + disp_rect_.bottom = p_video_format->display_area.bottom; + disp_width_ = p_video_format->display_area.right - p_video_format->display_area.left; + disp_height_ = p_video_format->display_area.bottom - p_video_format->display_area.top; + chroma_height_ = static_cast( + std::ceil(target_height_ * GetChromaHeightFactor(video_surface_format_))); + if(!(crop_rect_.right && crop_rect_.bottom)) + { + target_width_ = (disp_width_ + 1) & ~1; + target_height_ = (disp_height_ + 1) & ~1; + } + else + { + target_width_ = (crop_rect_.right - crop_rect_.left + 1) & ~1; + target_height_ = (crop_rect_.bottom - crop_rect_.top + 1) & ~1; + } + } + + if(out_mem_type_ == OUT_SURFACE_MEM_DEV_INTERNAL || out_mem_type_ == OUT_SURFACE_MEM_NOT_MAPPED) + { + GetSurfaceStrideInternal(video_surface_format_, + coded_width_, + coded_height_, + &surface_stride_, + &surface_vstride_); + } + else + { + surface_stride_ = target_width_ * byte_per_pixel_; + } + chroma_height_ = + static_cast(ceil(target_height_ * GetChromaHeightFactor(video_surface_format_))); + num_chroma_planes_ = GetChromaPlaneCount(video_surface_format_); + if(p_video_format->chroma_format == rocDecVideoChromaFormat_Monochrome) num_chroma_planes_ = 0; + chroma_vstride_ = static_cast( + std::ceil(surface_vstride_ * GetChromaHeightFactor(video_surface_format_))); + // Fill output_surface_info_ + output_surface_info_.output_width = target_width_; + output_surface_info_.output_height = target_height_; + output_surface_info_.output_pitch = surface_stride_; + output_surface_info_.output_vstride = + (out_mem_type_ == OUT_SURFACE_MEM_DEV_INTERNAL) ? surface_vstride_ : target_height_; + output_surface_info_.disp_rect = disp_rect_; + output_surface_info_.chroma_height = chroma_height_; + output_surface_info_.bit_depth = bitdepth_minus_8_ + 8; + output_surface_info_.bytes_per_pixel = byte_per_pixel_; + output_surface_info_.surface_format = video_surface_format_; + output_surface_info_.num_chroma_planes = num_chroma_planes_; + if(out_mem_type_ == OUT_SURFACE_MEM_DEV_INTERNAL) + { + output_surface_info_.output_surface_size_in_bytes = + surface_stride_ * (surface_vstride_ + (chroma_vstride_ * num_chroma_planes_)); + output_surface_info_.mem_type = OUT_SURFACE_MEM_DEV_INTERNAL; + } + else if(out_mem_type_ == OUT_SURFACE_MEM_DEV_COPIED) + { + output_surface_info_.output_surface_size_in_bytes = GetFrameSize(); + output_surface_info_.mem_type = OUT_SURFACE_MEM_DEV_COPIED; + } + else if(out_mem_type_ == OUT_SURFACE_MEM_HOST_COPIED) + { + output_surface_info_.output_surface_size_in_bytes = GetFrameSize(); + output_surface_info_.mem_type = OUT_SURFACE_MEM_HOST_COPIED; + } + else + { + output_surface_info_.output_surface_size_in_bytes = + surface_stride_ * (surface_vstride_ + (chroma_vstride_ * num_chroma_planes_)); + output_surface_info_.mem_type = OUT_SURFACE_MEM_NOT_MAPPED; + } + + // If the coded_width or coded_height hasn't changed but display resolution has changed, then + // need to update width and height for correct output with cropping. There is no need to + // reconfigure the decoder. + if(!is_decode_res_changed && is_display_rect_changed) + { + return 1; + } + + RocdecReconfigureDecoderInfo reconfig_params = {}; + reconfig_params.width = coded_width_; + reconfig_params.height = coded_height_; + reconfig_params.target_width = target_width_; + reconfig_params.target_height = target_height_; + reconfig_params.num_decode_surfaces = p_video_format->min_num_decode_surfaces; + if(!(crop_rect_.right && crop_rect_.bottom)) + { + reconfig_params.display_rect.top = disp_rect_.top; + reconfig_params.display_rect.bottom = disp_rect_.bottom; + reconfig_params.display_rect.left = disp_rect_.left; + reconfig_params.display_rect.right = disp_rect_.right; + } + else + { + reconfig_params.display_rect.top = crop_rect_.top; + reconfig_params.display_rect.bottom = crop_rect_.bottom; + reconfig_params.display_rect.left = crop_rect_.left; + reconfig_params.display_rect.right = crop_rect_.right; + } + + if(roc_decoder_ == nullptr) + { + ROCDEC_THROW("Reconfigurition of the decoder detected but the decoder was not initialized " + "previoulsy!", + ROCDEC_NOT_SUPPORTED); + return 0; + } + ROCDEC_API_CALL(rocDecReconfigureDecoder(roc_decoder_, &reconfig_params)); + + input_video_info_str_.str(""); + input_video_info_str_.clear(); + input_video_info_str_ << "Input Video Resolution Changed:" << std::endl + << "\tCoded size : [" << p_video_format->coded_width << ", " + << p_video_format->coded_height << "]" << std::endl + << "\tDisplay area : [" << p_video_format->display_area.left << ", " + << p_video_format->display_area.top << ", " + << p_video_format->display_area.right << ", " + << p_video_format->display_area.bottom << "]" << std::endl; + input_video_info_str_ << std::endl; + input_video_info_str_ << "Video Decoding Params:" << std::endl + << "\tNum Surfaces : " << reconfig_params.num_decode_surfaces << std::endl + << "\tResize : " << reconfig_params.target_width << "x" + << reconfig_params.target_height << std::endl; + input_video_info_str_ << std::endl; + std::cout << input_video_info_str_.str(); + + is_decoder_reconfigured_ = true; + return 1; +} + +/** + * @brief + * + * @param pPicParams + * @return int 1: success 0: fail + */ +int +RocVideoDecoder::HandlePictureDecode(RocdecPicParams* pPicParams) +{ + if(!roc_decoder_) + { + THROW("RocDecoder not initialized: failed with ErrCode: " + TOSTR(ROCDEC_NOT_INITIALIZED)); + } + pic_num_in_dec_order_[pPicParams->curr_pic_idx] = decode_poc_++; + ROCDEC_API_CALL(rocDecDecodeFrame(roc_decoder_, pPicParams)); + last_decode_surf_idx_ = pPicParams->curr_pic_idx; + decoded_pic_cnt_++; + if(b_force_zero_latency_ && ((!pPicParams->field_pic_flag) || (pPicParams->second_field))) + { + RocdecParserDispInfo disp_info; + memset(&disp_info, 0, sizeof(disp_info)); + disp_info.picture_index = pPicParams->curr_pic_idx; + disp_info.progressive_frame = !pPicParams->field_pic_flag; + disp_info.top_field_first = pPicParams->bottom_field_flag ^ 1; + HandlePictureDisplay(&disp_info); + } + return 1; +} + +/** + * @brief function to handle display picture + * + * @param pDispInfo + * @return int 0:fail 1: success + */ +int +RocVideoDecoder::HandlePictureDisplay(RocdecParserDispInfo* pDispInfo) +{ + RocdecProcParams video_proc_params = {}; + video_proc_params.progressive_frame = pDispInfo->progressive_frame; + video_proc_params.top_field_first = pDispInfo->top_field_first; + + if(b_extract_sei_message_) + { + if(sei_message_display_q_[pDispInfo->picture_index].sei_data) + { + // Write SEI Message + uint8_t* sei_buffer = + (uint8_t*) (sei_message_display_q_[pDispInfo->picture_index].sei_data); + uint32_t sei_num_messages = + sei_message_display_q_[pDispInfo->picture_index].sei_message_count; + RocdecSeiMessage* sei_message = + sei_message_display_q_[pDispInfo->picture_index].sei_message; + if(fp_sei_) + { + for(uint32_t i = 0; i < sei_num_messages; i++) + { + if(codec_id_ == rocDecVideoCodec_AVC || codec_id_ == rocDecVideoCodec_HEVC) + { + switch(sei_message[i].sei_message_type) + { + case SEI_TYPE_TIME_CODE: + { + // todo:: check if we need to write timecode + } + break; + case SEI_TYPE_USER_DATA_UNREGISTERED: + { + fwrite(sei_buffer, sei_message[i].sei_message_size, 1, fp_sei_); + } + break; + } + } + if(codec_id_ == rocDecVideoCodec_AV1) + { + fwrite(sei_buffer, sei_message[i].sei_message_size, 1, fp_sei_); + } + sei_buffer += sei_message[i].sei_message_size; + } + } + free(sei_message_display_q_[pDispInfo->picture_index].sei_data); + sei_message_display_q_[pDispInfo->picture_index].sei_data = + NULL; // to avoid double free + free(sei_message_display_q_[pDispInfo->picture_index].sei_message); + sei_message_display_q_[pDispInfo->picture_index].sei_message = + NULL; // to avoid double free + } + } + if(out_mem_type_ != OUT_SURFACE_MEM_NOT_MAPPED) + { + void* src_dev_ptr[3] = {0}; + uint32_t src_pitch[3] = {0}; + ROCDEC_API_CALL(rocDecGetVideoFrame( + roc_decoder_, pDispInfo->picture_index, src_dev_ptr, src_pitch, &video_proc_params)); + RocdecDecodeStatus dec_status; + memset(&dec_status, 0, sizeof(dec_status)); + rocDecStatus result = + rocDecGetDecodeStatus(roc_decoder_, pDispInfo->picture_index, &dec_status); + if(result == ROCDEC_SUCCESS && + (dec_status.decode_status == rocDecodeStatus_Error || + dec_status.decode_status == rocDecodeStatus_Error_Concealed)) + { + std::cerr << "Decode Error occurred for picture: " + << pic_num_in_dec_order_[pDispInfo->picture_index] << std::endl; + } + if(out_mem_type_ == OUT_SURFACE_MEM_DEV_INTERNAL) + { + DecFrameBuffer dec_frame = {}; + dec_frame.frame_ptr = (uint8_t*) (src_dev_ptr[0]); + dec_frame.pts = pDispInfo->pts; + dec_frame.picture_index = pDispInfo->picture_index; + std::lock_guard lock(mtx_vp_frame_); + vp_frames_q_.push(dec_frame); + output_frame_cnt_++; + } + else + { + // copy the decoded surface info device or host + uint8_t* p_dec_frame = nullptr; + { + std::lock_guard lock(mtx_vp_frame_); + // if not enough frames in stock, allocate + if((unsigned) ++output_frame_cnt_ > vp_frames_.size()) + { + num_alloced_frames_++; + DecFrameBuffer dec_frame = {}; + if(out_mem_type_ == OUT_SURFACE_MEM_DEV_COPIED) + { + // allocate device memory + HIP_API_CALL(hipMalloc((void**) &dec_frame.frame_ptr, GetFrameSize())); + } + else + { + dec_frame.frame_ptr = new uint8_t[GetFrameSize()]; + } + dec_frame.pts = pDispInfo->pts; + dec_frame.picture_index = pDispInfo->picture_index; + vp_frames_.push_back(dec_frame); + } + p_dec_frame = vp_frames_[output_frame_cnt_ - 1].frame_ptr; + } + // Copy luma data + uint32_t dst_pitch = disp_width_ * byte_per_pixel_; + uint8_t* p_src_ptr_y = static_cast(src_dev_ptr[0]) + + (disp_rect_.top + crop_rect_.top) * src_pitch[0] + + (disp_rect_.left + crop_rect_.left) * byte_per_pixel_; + if(out_mem_type_ == OUT_SURFACE_MEM_DEV_COPIED) + { + if(src_pitch[0] == dst_pitch) + { + int luma_size = src_pitch[0] * coded_height_; + HIP_API_CALL( + hipMemcpyDtoDAsync(p_dec_frame, p_src_ptr_y, luma_size, hip_stream_)); + } + else + { + // use 2d copy to copy an ROI + HIP_API_CALL(hipMemcpy2DAsync(p_dec_frame, + dst_pitch, + p_src_ptr_y, + src_pitch[0], + dst_pitch, + disp_height_, + hipMemcpyDeviceToDevice, + hip_stream_)); + } + } + else + HIP_API_CALL(hipMemcpy2DAsync(p_dec_frame, + dst_pitch, + p_src_ptr_y, + src_pitch[0], + dst_pitch, + disp_height_, + hipMemcpyDeviceToHost, + hip_stream_)); + + // Copy chroma plane ( ) + // rocDec output gives pointer to luma and chroma pointers seperated for the decoded + // frame + uint8_t* p_frame_uv = p_dec_frame + dst_pitch * disp_height_; + uint8_t* p_src_ptr_uv = + (num_chroma_planes_ == 1) + ? static_cast(src_dev_ptr[1]) + + ((disp_rect_.top + crop_rect_.top) >> 1) * src_pitch[1] + + (disp_rect_.left + crop_rect_.left) * byte_per_pixel_ + : static_cast(src_dev_ptr[1]) + + (disp_rect_.top + crop_rect_.top) * src_pitch[1] + + (disp_rect_.left + crop_rect_.left) * byte_per_pixel_; + if(out_mem_type_ == OUT_SURFACE_MEM_DEV_COPIED) + { + if(src_pitch[1] == dst_pitch) + { + int chroma_size = chroma_height_ * dst_pitch; + HIP_API_CALL( + hipMemcpyDtoDAsync(p_frame_uv, p_src_ptr_uv, chroma_size, hip_stream_)); + } + else + { + // use 2d copy to copy an ROI + HIP_API_CALL(hipMemcpy2DAsync(p_frame_uv, + dst_pitch, + p_src_ptr_uv, + src_pitch[1], + dst_pitch, + chroma_height_, + hipMemcpyDeviceToDevice, + hip_stream_)); + } + } + else + HIP_API_CALL(hipMemcpy2DAsync(p_frame_uv, + dst_pitch, + p_src_ptr_uv, + src_pitch[1], + dst_pitch, + chroma_height_, + hipMemcpyDeviceToHost, + hip_stream_)); + + if(num_chroma_planes_ == 2) + { + uint8_t* p_frame_v = p_dec_frame + dst_pitch * (disp_height_ + chroma_height_); + uint8_t* p_src_ptr_v = static_cast(src_dev_ptr[2]) + + (disp_rect_.top + crop_rect_.top) * src_pitch[2] + + (disp_rect_.left + crop_rect_.left) * byte_per_pixel_; + if(out_mem_type_ == OUT_SURFACE_MEM_DEV_COPIED) + { + if(src_pitch[2] == dst_pitch) + { + int chroma_size = chroma_height_ * dst_pitch; + HIP_API_CALL( + hipMemcpyDtoDAsync(p_frame_v, p_src_ptr_v, chroma_size, hip_stream_)); + } + else + { + // use 2d copy to copy an ROI + HIP_API_CALL(hipMemcpy2DAsync(p_frame_v, + dst_pitch, + p_src_ptr_v, + src_pitch[2], + dst_pitch, + chroma_height_, + hipMemcpyDeviceToDevice, + hip_stream_)); + } + } + else + HIP_API_CALL(hipMemcpy2DAsync(p_frame_v, + dst_pitch, + p_src_ptr_v, + src_pitch[2], + dst_pitch, + chroma_height_, + hipMemcpyDeviceToHost, + hip_stream_)); + } + + HIP_API_CALL(hipStreamSynchronize(hip_stream_)); + } + } + else + { + RocdecDecodeStatus dec_status; + memset(&dec_status, 0, sizeof(dec_status)); + rocDecStatus result = + rocDecGetDecodeStatus(roc_decoder_, pDispInfo->picture_index, &dec_status); + if(result == ROCDEC_SUCCESS && + (dec_status.decode_status == rocDecodeStatus_Error || + dec_status.decode_status == rocDecodeStatus_Error_Concealed)) + { + std::cerr << "Decode Error occurred for picture: " + << pic_num_in_dec_order_[pDispInfo->picture_index] << std::endl; + } + output_frame_cnt_++; + } + + return 1; +} + +int +RocVideoDecoder::GetSEIMessage(RocdecSeiMessageInfo* pSEIMessageInfo) +{ + uint32_t sei_num_mesages = pSEIMessageInfo->sei_message_count; + if(sei_num_mesages) + { + RocdecSeiMessage* p_sei_msg_info = pSEIMessageInfo->sei_message; + size_t total_SEI_buff_size = 0; + if((pSEIMessageInfo->picIdx < 0) || (pSEIMessageInfo->picIdx >= MAX_FRAME_NUM)) + { + ERR("Invalid picture index for SEI message: " + TOSTR(pSEIMessageInfo->picIdx)); + return 0; + } + for(uint32_t i = 0; i < sei_num_mesages; i++) + { + total_SEI_buff_size += p_sei_msg_info[i].sei_message_size; + } + if(!curr_sei_message_ptr_) + { + ERR("Out of Memory, Allocation failed for m_pCurrSEIMessage"); + return 0; + } + curr_sei_message_ptr_->sei_data = malloc(total_SEI_buff_size); + if(!curr_sei_message_ptr_->sei_data) + { + ERR("Out of Memory, Allocation failed for SEI Buffer"); + return 0; + } + memcpy(curr_sei_message_ptr_->sei_data, pSEIMessageInfo->sei_data, total_SEI_buff_size); + curr_sei_message_ptr_->sei_message = + (RocdecSeiMessage*) malloc(sizeof(RocdecSeiMessage) * sei_num_mesages); + if(!curr_sei_message_ptr_->sei_message) + { + free(curr_sei_message_ptr_->sei_data); + curr_sei_message_ptr_->sei_data = NULL; + return 0; + } + memcpy(curr_sei_message_ptr_->sei_message, + pSEIMessageInfo->sei_message, + sizeof(RocdecSeiMessage) * sei_num_mesages); + curr_sei_message_ptr_->sei_message_count = pSEIMessageInfo->sei_message_count; + sei_message_display_q_[pSEIMessageInfo->picIdx] = *curr_sei_message_ptr_; + } + return 1; +} + +int +RocVideoDecoder::DecodeFrame(const uint8_t* data, + size_t size, + int pkt_flags, + int64_t pts, + int* num_decoded_pics) +{ + output_frame_cnt_ = 0, output_frame_cnt_ret_ = 0; + decoded_pic_cnt_ = 0; + RocdecSourceDataPacket packet = {}; + packet.payload = data; + packet.payload_size = size; + packet.flags = pkt_flags | ROCDEC_PKT_TIMESTAMP; + packet.pts = pts; + if(!data || size == 0) + { + packet.flags |= ROCDEC_PKT_ENDOFSTREAM; + } + ROCDEC_API_CALL(rocDecParseVideoData(rocdec_parser_, &packet)); + if(num_decoded_pics) + { + *num_decoded_pics = decoded_pic_cnt_; + } + return output_frame_cnt_; +} + +uint8_t* +RocVideoDecoder::GetFrame(int64_t* pts) +{ + if(output_frame_cnt_ > 0) + { + std::lock_guard lock(mtx_vp_frame_); + output_frame_cnt_--; + if(out_mem_type_ == OUT_SURFACE_MEM_DEV_INTERNAL && !vp_frames_q_.empty()) + { + DecFrameBuffer* fb = &vp_frames_q_.front(); + if(pts) *pts = fb->pts; + return fb->frame_ptr; + } + else if(vp_frames_.size() > 0) + { + if(pts) *pts = vp_frames_[output_frame_cnt_ret_].pts; + return vp_frames_[output_frame_cnt_ret_++].frame_ptr; + } + } + return nullptr; +} + +/** + * @brief function to release frame after use by the application: Only used with + * "OUT_SURFACE_MEM_DEV_INTERNAL" + * + * @param pTimestamp - timestamp of the frame to be released (unmapped) + * @return true - success + * @return false - falied + */ + +bool +RocVideoDecoder::ReleaseFrame(int64_t pTimestamp, bool b_flushing) +{ + if(out_mem_type_ == OUT_SURFACE_MEM_NOT_MAPPED) return true; // nothing to do + if(out_mem_type_ != OUT_SURFACE_MEM_DEV_INTERNAL) + { + if(!b_flushing) // if not flushing the buffers are re-used, so keep them + return true; // nothing to do + else + { + DecFrameBuffer* fb = &vp_frames_[0]; + if(pTimestamp != fb->pts) + { + std::cerr << "Decoded Frame is released out of order" << std::endl; + return false; + } + vp_frames_.erase(vp_frames_.begin()); // get rid of the frames from the framestore + } + } + // only needed when using internal mapped buffer + if(!vp_frames_q_.empty()) + { + std::lock_guard lock(mtx_vp_frame_); + DecFrameBuffer* fb = &vp_frames_q_.front(); + + if(pTimestamp != fb->pts) + { + std::cerr << "Decoded Frame is released out of order" << std::endl; + return false; + } + // pop decoded frame + vp_frames_q_.pop(); + } + return true; +} + +/** + * @brief function to release all internal frames and clear the q (used with reconfigure): Only used + * with "OUT_SURFACE_MEM_DEV_INTERNAL" + * + * @return true - success + * @return false - falied + */ +bool +RocVideoDecoder::ReleaseInternalFrames() +{ + if(out_mem_type_ != OUT_SURFACE_MEM_DEV_INTERNAL || out_mem_type_ == OUT_SURFACE_MEM_NOT_MAPPED) + return true; // nothing to do + // only needed when using internal mapped buffer + while(!vp_frames_q_.empty()) + { + std::lock_guard lock(mtx_vp_frame_); + // pop decoded frame + vp_frames_q_.pop(); + } + return true; +} + +void +RocVideoDecoder::SaveFrameToFile(std::string output_file_name, + void* surf_mem, + OutputSurfaceInfo* surf_info, + size_t rgb_image_size) +{ + uint8_t* hst_ptr = nullptr; + bool is_rgb = (rgb_image_size != 0); + uint64_t output_image_size = is_rgb ? rgb_image_size : surf_info->output_surface_size_in_bytes; + if(surf_info->mem_type == OUT_SURFACE_MEM_DEV_INTERNAL || + surf_info->mem_type == OUT_SURFACE_MEM_DEV_COPIED) + { + if(hst_ptr == nullptr) + { + hst_ptr = new uint8_t[output_image_size]; + } + hipError_t hip_status = hipSuccess; + hip_status = hipMemcpyDtoH((void*) hst_ptr, surf_mem, output_image_size); + if(hip_status != hipSuccess) + { + std::cerr << "ERROR: hipMemcpyDtoH failed! (" << hipGetErrorName(hip_status) << ")" + << std::endl; + delete[] hst_ptr; + return; + } + } + else + hst_ptr = static_cast(surf_mem); + + if(current_output_filename.empty()) + { + current_output_filename = output_file_name; + } + + // don't overwrite to the same file if reconfigure is detected for a resolution changes. + if(is_decoder_reconfigured_) + { + if(fp_out_) + { + fclose(fp_out_); + fp_out_ = nullptr; + } + // Append the width and height of the new stream to the old file name to create a file name + // to save the new frames do this only if resolution changes within a stream (e.g., decoding + // a multi-resolution stream using the videoDecode app) don't append to the output_file_name + // if multiple output file name is provided (e.g., decoding multi-files using the + // videDecodeMultiFiles) + if(!current_output_filename.compare(output_file_name)) + { + std::string::size_type const pos(output_file_name.find_last_of('.')); + extra_output_file_count_++; + std::string to_append = "_" + std::to_string(surf_info->output_width) + "_" + + std::to_string(surf_info->output_height) + "_" + + std::to_string(extra_output_file_count_); + if(pos != std::string::npos) + { + output_file_name.insert(pos, to_append); + } + else + { + output_file_name += to_append; + } + } + is_decoder_reconfigured_ = false; + } + + if(fp_out_ == nullptr) + { + fp_out_ = fopen(output_file_name.c_str(), "wb"); + } + if(fp_out_) + { + if(!is_rgb) + { + uint8_t* tmp_hst_ptr = hst_ptr; + if(surf_info->mem_type == OUT_SURFACE_MEM_DEV_INTERNAL) + { + tmp_hst_ptr += ((disp_rect_.top + crop_rect_.top) * surf_info->output_pitch) + + (disp_rect_.left + crop_rect_.left) * surf_info->bytes_per_pixel; + } + auto img_width = surf_info->output_width; + auto img_height = surf_info->output_height; + auto output_stride = surf_info->output_pitch; + if(img_width * surf_info->bytes_per_pixel == output_stride && + img_height == surf_info->output_vstride) + { + fwrite(hst_ptr, 1, output_image_size, fp_out_); + } + else + { + uint32_t width = surf_info->output_width * surf_info->bytes_per_pixel; + if(surf_info->bit_depth <= 16) + { + for(uint32_t i = 0; i < surf_info->output_height; i++) + { + fwrite(tmp_hst_ptr, 1, width, fp_out_); + tmp_hst_ptr += output_stride; + } + // dump chroma + uint8_t* uv_hst_ptr = hst_ptr + output_stride * surf_info->output_vstride; + if(surf_info->mem_type == OUT_SURFACE_MEM_DEV_INTERNAL) + { + uv_hst_ptr += + (num_chroma_planes_ == 1) + ? (((disp_rect_.top + crop_rect_.top) >> 1) * + surf_info->output_pitch) + + ((disp_rect_.left + crop_rect_.left) * + surf_info->bytes_per_pixel) + : ((disp_rect_.top + crop_rect_.top) * surf_info->output_pitch) + + ((disp_rect_.left + crop_rect_.left) * + surf_info->bytes_per_pixel); + } + for(uint32_t i = 0; i < chroma_height_; i++) + { + fwrite(uv_hst_ptr, 1, width, fp_out_); + uv_hst_ptr += output_stride; + } + if(num_chroma_planes_ == 2) + { + uv_hst_ptr = + hst_ptr + output_stride * (surf_info->output_vstride + chroma_vstride_); + if(surf_info->mem_type == OUT_SURFACE_MEM_DEV_INTERNAL) + { + uv_hst_ptr += + ((disp_rect_.top + crop_rect_.top) * surf_info->output_pitch) + + ((disp_rect_.left + crop_rect_.left) * surf_info->bytes_per_pixel); + } + for(uint32_t i = 0; i < chroma_height_; i++) + { + fwrite(uv_hst_ptr, 1, width, fp_out_); + uv_hst_ptr += output_stride; + } + } + } + } + } + else + { + fwrite(hst_ptr, 1, rgb_image_size, fp_out_); + } + } + + if(hst_ptr && (surf_info->mem_type != OUT_SURFACE_MEM_HOST_COPIED)) + { + delete[] hst_ptr; + } +} + +void +RocVideoDecoder::ResetSaveFrameToFile() +{ + if(fp_out_) + { + fclose(fp_out_); + fp_out_ = nullptr; + } +} + +void +RocVideoDecoder::GetDeviceinfo(std::string& device_name, + std::string& gcn_arch_name, + int& pci_bus_id, + int& pci_domain_id, + int& pci_device_id) +{ + device_name = hip_dev_prop_.name; + gcn_arch_name = hip_dev_prop_.gcnArchName; + pci_bus_id = hip_dev_prop_.pciBusID; + pci_domain_id = hip_dev_prop_.pciDomainID; + pci_device_id = hip_dev_prop_.pciDeviceID; +} + +bool +RocVideoDecoder::GetOutputSurfaceInfo(OutputSurfaceInfo** surface_info) +{ + if(!disp_width_ || !disp_height_) + { + std::cerr << "ERROR: RocVideoDecoder is not intialized" << std::endl; + return false; + } + *surface_info = &output_surface_info_; + return true; +} + +bool +RocVideoDecoder::InitHIP(int device_id) +{ + HIP_API_CALL(hipGetDeviceCount(&num_devices_)); + if(num_devices_ < 1) + { + std::cerr << "ERROR: didn't find any GPU!" << std::endl; + return false; + } + HIP_API_CALL(hipSetDevice(device_id)); + HIP_API_CALL(hipGetDeviceProperties(&hip_dev_prop_, device_id)); + HIP_API_CALL(hipStreamCreate(&hip_stream_)); + return true; +} + +std::chrono::_V2::system_clock::time_point +RocVideoDecoder::StartTimer() +{ + return std::chrono::_V2::system_clock::now(); +} + +double +RocVideoDecoder::StopTimer(const std::chrono::_V2::system_clock::time_point& start_time) +{ + return std::chrono::duration(std::chrono::_V2::system_clock::now() - + start_time) + .count(); +} + +bool +RocVideoDecoder::CodecSupported(int device_id, rocDecVideoCodec codec_id, uint32_t bit_depth) +{ + RocdecDecodeCaps decode_caps; + decode_caps.device_id = device_id; + decode_caps.codec_type = codec_id; + decode_caps.chroma_format = rocDecVideoChromaFormat_420; + decode_caps.bit_depth_minus_8 = bit_depth - 8; + if(rocDecGetDecoderCaps(&decode_caps) != ROCDEC_SUCCESS) + { + return false; + } + return true; +} + +void +RocVideoDecoder::WaitForDecodeCompletion() +{ + RocdecDecodeStatus dec_status; + memset(&dec_status, 0, sizeof(dec_status)); + do + { + rocDecGetDecodeStatus(roc_decoder_, last_decode_surf_idx_, &dec_status); + } while(dec_status.decode_status == rocDecodeStatus_InProgress); +} diff --git a/tests/bin/rocdecode/roc_video_dec.h b/tests/bin/rocdecode/roc_video_dec.h new file mode 100644 index 0000000000..90b7ae4b2d --- /dev/null +++ b/tests/bin/rocdecode/roc_video_dec.h @@ -0,0 +1,648 @@ +/* +Copyright (c) 2024 - 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/*! + * \file + * \brief The AMD Video Decode Library. + * + * \defgroup group_amd_roc_video_dec rocDecode Video Decode: AMD Video Decode API + * \brief AMD The rocDecode video decoder for AMD’s GPUs. + */ + +#define MAX_FRAME_NUM 16 + +typedef int(ROCDECAPI* PFNRECONFIGUEFLUSHCALLBACK)(void*, uint32_t, void*); + +typedef enum SeiAvcHevcPayloadType_enum +{ + SEI_TYPE_TIME_CODE = 136, + SEI_TYPE_USER_DATA_UNREGISTERED = 5 +} SeiAvcHevcPayloadType; + +typedef enum OutputSurfaceMemoryType_enum +{ + OUT_SURFACE_MEM_DEV_INTERNAL = + 0, /**< Internal interopped decoded surface memory(original mapped decoded surface) */ + OUT_SURFACE_MEM_DEV_COPIED = 1, /**< decoded output will be copied to a separate device memory + (the user doesn't need to call release) **/ + OUT_SURFACE_MEM_HOST_COPIED = 2, /**< decoded output will be copied to a separate host memory + (the user doesn't need to call release) **/ + OUT_SURFACE_MEM_NOT_MAPPED = 3 /**< < decoded output is not available (interop won't be used): + useful for decode only performance app*/ +} OutputSurfaceMemoryType; + +#define TOSTR(X) std::to_string(static_cast(X)) +#define STR(X) std::string(X) + +#if DBGINFO +# define INFO(X) \ + std::clog << "[INF] " \ + << " {" << __func__ << "} " \ + << " " << X << std::endl; +#else +# define INFO(X) ; +#endif +#define ERR(X) \ + std::cerr << "[ERR] " \ + << " {" << __func__ << "} " \ + << " " << X << std::endl; + +inline int +GetChromaPlaneCount(rocDecVideoSurfaceFormat surface_format) +{ + int num_planes = 1; + switch(surface_format) + { + case rocDecVideoSurfaceFormat_NV12: + case rocDecVideoSurfaceFormat_P016: num_planes = 1; break; + case rocDecVideoSurfaceFormat_YUV444: + case rocDecVideoSurfaceFormat_YUV444_16Bit: num_planes = 2; break; + case rocDecVideoSurfaceFormat_YUV420: + case rocDecVideoSurfaceFormat_YUV420_16Bit: num_planes = 2; break; + } + + return num_planes; +}; + +inline float +GetChromaHeightFactor(rocDecVideoSurfaceFormat surface_format) +{ + float factor = 0.5; + switch(surface_format) + { + case rocDecVideoSurfaceFormat_NV12: + case rocDecVideoSurfaceFormat_P016: + case rocDecVideoSurfaceFormat_YUV420: + case rocDecVideoSurfaceFormat_YUV420_16Bit: factor = 0.5; break; + case rocDecVideoSurfaceFormat_YUV444: + case rocDecVideoSurfaceFormat_YUV444_16Bit: factor = 1.0; break; + } + + return factor; +}; + +class RocVideoDecodeException : public std::exception +{ +public: + explicit RocVideoDecodeException(const std::string& message, const int err_code) + : _message(message) + , _err_code(err_code) + {} + explicit RocVideoDecodeException(const std::string& message) + : _message(message) + , _err_code(-1) + {} + virtual const char* what() const throw() override { return _message.c_str(); } + int Geterror_code() const { return _err_code; } + +private: + std::string _message; + int _err_code; +}; + +#define ROCDEC_THROW(X, CODE) \ + throw RocVideoDecodeException(" { " + std::string(__func__) + " } " + X, CODE); +#define THROW(X) throw RocVideoDecodeException(" { " + std::string(__func__) + " } " + X); + +#define ROCDEC_API_CALL(rocDecAPI) \ + do \ + { \ + rocDecStatus error_code = rocDecAPI; \ + if(error_code != ROCDEC_SUCCESS) \ + { \ + std::ostringstream error_log; \ + error_log << #rocDecAPI << " returned " << rocDecGetErrorName(error_code) << " at " \ + << __FILE__ << ":" << __LINE__; \ + ROCDEC_THROW(error_log.str(), error_code); \ + } \ + } while(0) + +#define HIP_API_CALL(call) \ + do \ + { \ + hipError_t hip_status = call; \ + if(hip_status != hipSuccess) \ + { \ + const char* sz_err_name = NULL; \ + sz_err_name = hipGetErrorName(hip_status); \ + std::ostringstream error_log; \ + error_log << "hip API error " << sz_err_name; \ + ROCDEC_THROW(error_log.str(), hip_status); \ + } \ + } while(0) + +struct Rect +{ + int left; + int top; + int right; + int bottom; +}; + +struct Dim +{ + int w, h; +}; + +static inline int +align(int value, int alignment) +{ + return (value + alignment - 1) & ~(alignment - 1); +} + +typedef struct DecFrameBuffer_ +{ + uint8_t* frame_ptr; /**< device memory pointer for the decoded frame */ + int64_t pts; /**< timestamp for the decoded frame */ + int picture_index; /**< surface index for the decoded frame */ +} DecFrameBuffer; + +typedef struct OutputSurfaceInfoType +{ + uint32_t output_width; /**< Output width of decoded surface*/ + uint32_t output_height; /**< Output height of decoded surface*/ + uint32_t output_pitch; /**< Output pitch in bytes of luma plane, chroma pitch can be inferred + based on chromaFormat*/ + uint32_t output_vstride; /**< Output vertical stride in case of using internal mem pointer **/ + uint32_t chroma_height; /**< Chroma plane height **/ + Rect disp_rect; /**< Display area **/ + uint32_t bytes_per_pixel; /**< Output BytesPerPixel of decoded image*/ + uint32_t bit_depth; /**< Output BitDepth of the image*/ + uint32_t num_chroma_planes; /**< Output Chroma number of planes*/ + uint64_t output_surface_size_in_bytes; /**< Output Image Size in Bytes; including both luma and + chroma planes*/ + rocDecVideoSurfaceFormat surface_format; /**< Chroma format of the decoded image*/ + OutputSurfaceMemoryType mem_type; /**< Output mem_type of the surface*/ +} OutputSurfaceInfo; + +typedef struct ReconfigParams_t +{ + PFNRECONFIGUEFLUSHCALLBACK p_fn_reconfigure_flush; + void* p_reconfig_user_struct; + uint32_t reconfig_flush_mode; +} ReconfigParams; + +class RocVideoDecoder +{ +public: + /** + * @brief Construct a new Roc Video Decoder object + * + * @param device_id : device_id to initialize HIP and VCN + * @param out_mem_type : out_mem_type for the decoded surface + * @param codec : codec type + * @param force_zero_latency : to force zero latency (output in decoding orde) + * @param p_crop_rect : to crop output + * @param extract_user_SEI_Message : enable to extract SEI + * @param disp_delay : output delayed by #disp_delay surfaces + * @param max_width : Max. width for the output surface + * @param max_height : Max. height for the output surface + * @param clk_rate : FPS clock-rate + */ + RocVideoDecoder(int device_id, + OutputSurfaceMemoryType out_mem_type, + rocDecVideoCodec codec, + bool force_zero_latency = false, + const Rect* p_crop_rect = nullptr, + bool extract_user_SEI_Message = false, + uint32_t disp_delay = 0, + int max_width = 0, + int max_height = 0, + uint32_t clk_rate = 1000); + ~RocVideoDecoder(); + + rocDecVideoCodec GetCodecId() { return codec_id_; } + + hipStream_t GetStream() { return hip_stream_; } + + /** + * @brief Get the output frame width + */ + uint32_t GetWidth() + { + assert(disp_width_); + return disp_width_; + } + + /** + * @brief This function is used to get the actual decode width + */ + int GetDecodeWidth() + { + assert(coded_width_); + return coded_width_; + } + + /** + * @brief Get the output frame height + */ + uint32_t GetHeight() + { + assert(disp_height_); + return disp_height_; + } + + /** + * @brief This function is used to get the current chroma height. + */ + int GetChromaHeight() + { + assert(chroma_height_); + return chroma_height_; + } + + /** + * @brief This function is used to get the number of chroma planes. + */ + int GetNumChromaPlanes() + { + assert(num_chroma_planes_); + return num_chroma_planes_; + } + + /** + * @brief This function is used to get the current frame size based on pixel format. + */ + virtual int GetFrameSize() + { + assert(disp_width_); + return disp_width_ * (disp_height_ + (chroma_height_ * num_chroma_planes_)) * + byte_per_pixel_; + } + + /** + * @brief Get the Bit Depth and BytesPerPixel associated with the pixel format + * + * @return uint32_t + */ + uint32_t GetBitDepth() + { + assert(bitdepth_minus_8_); + return (bitdepth_minus_8_ + 8); + } + uint32_t GetBytePerPixel() + { + assert(byte_per_pixel_); + return byte_per_pixel_; + } + /** + * @brief Functions to get the output surface attributes + */ + size_t GetSurfaceSize() + { + assert(surface_size_); + return surface_size_; + } + uint32_t GetSurfaceStride() + { + assert(surface_stride_); + return surface_stride_; + } + // RocDecImageFormat GetSubsampling() { return subsampling_; } + /** + * @brief Get the name of the output format + * + * @param codec_id + * @return std::string + */ + const char* GetCodecFmtName(rocDecVideoCodec codec_id); + + /** + * @brief function to return the name from surface_format_id + * + * @param surface_format_id - enum for surface format + * @return const char* + */ + const char* GetSurfaceFmtName(rocDecVideoSurfaceFormat surface_format_id); + + /** + * @brief Get the pointer to the Output Image Info + * + * @param surface_info ptr to output surface info + * @return true + * @return false + */ + bool GetOutputSurfaceInfo(OutputSurfaceInfo** surface_info); + + /** + * @brief Function to set the Reconfig Params object + * + * @param p_reconfig_params: pointer to reconfig params struct + * @return true : success + * @return false : fail + */ + bool SetReconfigParams(ReconfigParams* p_reconfig_params, bool b_force_reconfig_flush = false); + + /** + * @brief Function to force Reconfigure Flush: needed for random seeking to key frames + * + * @return int 1: Success 0: Fail + */ + int FlushAndReconfigure(); + /** + * @brief this function decodes a frame and returns the number of frames avalable for display + * + * @param data - pointer to the data buffer that is to be decode + * @param size - size of the data buffer in bytes + * @param pts - presentation timestamp + * @param flags - video packet flags + * @param num_decoded_pics - nummber of pictures decoded in this call + * @return int - num of frames to display + */ + virtual int DecodeFrame(const uint8_t* data, + size_t size, + int pkt_flags, + int64_t pts = 0, + int* num_decoded_pics = nullptr); + /** + * @brief This function returns a decoded frame and timestamp. This should be called in a loop + * fetching all the available frames + * + */ + virtual uint8_t* GetFrame(int64_t* pts); + + /** + * @brief function to release frame after use by the application: Only used with + * "OUT_SURFACE_MEM_DEV_INTERNAL" + * + * @param pTimestamp - timestamp of the frame to be released (unmapped) + * @param b_flushing - true when flushing + * @return true - success + * @return false - falied + */ + virtual bool ReleaseFrame(int64_t pTimestamp, bool b_flushing = false); + + /** + * @brief utility function to save image to a file + * + * @param output_file_name - file to write + * @param dev_mem - dev_memory pointer of the frame + * @param image_info - output image info + * @param is_output_RGB - to write in RGB + */ + // void SaveImage(std::string output_file_name, void* dev_mem, OutputImageInfo* image_info, bool + // is_output_RGB = 0); + + /** + * @brief Get the Device info for the current device + * + * @param device_name + * @param gcn_arch_name + * @param pci_bus_id + * @param pci_domain_id + * @param pci_device_id + */ + void GetDeviceinfo(std::string& device_name, + std::string& gcn_arch_name, + int& pci_bus_id, + int& pci_domain_id, + int& pci_device_id); + + /** + * @brief Helper function to dump decoded output surface to file + * + * @param output_file_name - Output file name + * @param dev_mem - pointer to surface memory + * @param surf_info - surface info + * @param rgb_image_size - image size for rgb (optional). A non_zero value indicates the + * surf_mem holds an rgb interleaved image and the entire size will be dumped to file + */ + virtual void SaveFrameToFile(std::string output_file_name, + void* surf_mem, + OutputSurfaceInfo* surf_info, + size_t rgb_image_size = 0); + + /** + * @brief Helper funtion to close a existing file and dump to new file in case of multiple files + * using same decoder + */ + virtual void ResetSaveFrameToFile(); + + /** + * @brief Get the Num Of Flushed Frames from video decoder object + * + * @return int32_t + */ + int32_t GetNumOfFlushedFrames() { return num_frames_flushed_during_reconfig_; } + + /*! \brief Function to wait for the decode completion of the last submitted picture + */ + void WaitForDecodeCompletion(); + + // Session overhead refers to decoder initialization and deinitialization time + void AddDecoderSessionOverHead(std::thread::id session_id, double duration) + { + session_overhead_[session_id] += duration; + } + double GetDecoderSessionOverHead(std::thread::id session_id) + { + if(session_overhead_.find(session_id) != session_overhead_.end()) + { + return session_overhead_[session_id]; + } + else + { + return 0; + } + } + + /** + * @brief Check if the given Video Codec is supported on the given GPU + * + * @return rocDecStatus + */ + bool CodecSupported(int device_id, rocDecVideoCodec codec_id, uint32_t bit_depth); + + /** + * @brief This function reconfigure decoder if there is a change in sequence params. + */ + virtual int ReconfigureDecoder(RocdecVideoFormat* p_video_format); + +protected: + /** + * @brief Callback function to be registered for getting a callback when decoding of sequence + * starts + */ + static int ROCDECAPI HandleVideoSequenceProc(void* p_user_data, + RocdecVideoFormat* p_video_format) + { + return ((RocVideoDecoder*) p_user_data)->HandleVideoSequence(p_video_format); + } + + /** + * @brief Callback function to be registered for getting a callback when a decoded frame is + * ready to be decoded + */ + static int ROCDECAPI HandlePictureDecodeProc(void* p_user_data, RocdecPicParams* p_pic_params) + { + return ((RocVideoDecoder*) p_user_data)->HandlePictureDecode(p_pic_params); + } + + /** + * @brief Callback function to be registered for getting a callback when a decoded frame is + * available for display + */ + static int ROCDECAPI HandlePictureDisplayProc(void* p_user_data, + RocdecParserDispInfo* p_disp_info) + { + return ((RocVideoDecoder*) p_user_data)->HandlePictureDisplay(p_disp_info); + } + + /** + * @brief Callback function to be registered for getting a callback when all the unregistered + * user SEI Messages are parsed for a frame. + */ + static int ROCDECAPI HandleSEIMessagesProc(void* p_user_data, + RocdecSeiMessageInfo* p_sei_message_info) + { + return ((RocVideoDecoder*) p_user_data)->GetSEIMessage(p_sei_message_info); + } + + /** + * @brief This function gets called when a sequence is ready to be decoded. The function also + gets called when there is format change + */ + int HandleVideoSequence(RocdecVideoFormat* p_video_format); + + /** + * @brief This function gets called when a picture is ready to be decoded. cuvidDecodePicture + * is called from this function to decode the picture + */ + int HandlePictureDecode(RocdecPicParams* p_pic_params); + + /** + * @brief This function gets called after a picture is decoded and available for display. + Frames are fetched and stored in internal buffer + */ + int HandlePictureDisplay(RocdecParserDispInfo* p_disp_info); + /** + * @brief This function gets called when all unregistered user SEI messages are parsed for a + * frame + */ + int GetSEIMessage(RocdecSeiMessageInfo* p_sei_message_info); + + /** + * @brief function to release all internal frames and clear the vp_frames_q_ (used with + * reconfigure): Only used with "OUT_SURFACE_MEM_DEV_INTERNAL" + * + * @return true - success + * @return false - falied + */ + bool ReleaseInternalFrames(); + + /** + * @brief Function to Initialize GPU-HIP + * + */ + bool InitHIP(int device_id); + + /** + * @brief Function to get start time + * + */ + std::chrono::_V2::system_clock::time_point StartTimer(); + + /** + * @brief Function to get elapsed time + * + */ + double StopTimer(const std::chrono::_V2::system_clock::time_point& start_time); + + int num_devices_; + int device_id_; + RocdecVideoParser rocdec_parser_ = nullptr; + rocDecDecoderHandle roc_decoder_ = nullptr; + OutputSurfaceMemoryType out_mem_type_ = OUT_SURFACE_MEM_DEV_INTERNAL; + bool b_extract_sei_message_ = false; + bool b_force_zero_latency_ = false; + uint32_t disp_delay_; + ReconfigParams* p_reconfig_params_ = nullptr; + bool b_force_recofig_flush_ = false; + int32_t num_frames_flushed_during_reconfig_ = 0; + hipDeviceProp_t hip_dev_prop_; + hipStream_t hip_stream_; + rocDecVideoCodec codec_id_ = rocDecVideoCodec_NumCodecs; + rocDecVideoChromaFormat video_chroma_format_ = rocDecVideoChromaFormat_420; + rocDecVideoSurfaceFormat video_surface_format_ = rocDecVideoSurfaceFormat_NV12; + RocdecSeiMessageInfo* curr_sei_message_ptr_ = nullptr; + RocdecSeiMessageInfo sei_message_display_q_[MAX_FRAME_NUM]; + RocdecVideoFormat* curr_video_format_ptr_ = nullptr; + int output_frame_cnt_ = 0, output_frame_cnt_ret_ = 0; + int decoded_pic_cnt_ = 0; + int decode_poc_ = 0, pic_num_in_dec_order_[MAX_FRAME_NUM]; + int num_alloced_frames_ = 0; + int last_decode_surf_idx_ = 0; + std::ostringstream input_video_info_str_; + int bitdepth_minus_8_ = 0; + uint32_t byte_per_pixel_ = 1; + uint32_t coded_width_ = 0; + uint32_t disp_width_ = 0; + uint32_t coded_height_ = 0; + uint32_t disp_height_ = 0; + uint32_t target_width_ = 0; + uint32_t target_height_ = 0; + int max_width_ = 0, max_height_ = 0; + uint32_t chroma_height_ = 0, chroma_width_ = 0; + uint32_t num_chroma_planes_ = 0; + uint32_t num_components_ = 0; + uint32_t surface_stride_ = 0; + uint32_t surface_vstride_ = 0, + chroma_vstride_ = + 0; // vertical stride between planes: used when using internal dev memory + size_t surface_size_ = 0; + OutputSurfaceInfo output_surface_info_ = {}; + std::mutex mtx_vp_frame_; + std::vector vp_frames_; // vector of decoded frames + std::queue vp_frames_q_; + Rect disp_rect_ = {}; // displayable area specified in the bitstream + Rect crop_rect_ = {}; // user specified region of interest within diplayable area disp_rect_ + FILE* fp_sei_ = NULL; + FILE* fp_out_ = NULL; + bool is_decoder_reconfigured_ = false; + std::string current_output_filename = ""; + uint32_t extra_output_file_count_ = 0; + std::thread::id + decoder_session_id_; // Decoder session identifier. Used to gather session level stats. + std::unordered_map + session_overhead_; // Records session overhead of initialization+deinitialization time. + // Format is (thread id, duration) +}; diff --git a/tests/bin/rocdecode/rocdecode.cpp b/tests/bin/rocdecode/rocdecode.cpp new file mode 100644 index 0000000000..06cdf9defa --- /dev/null +++ b/tests/bin/rocdecode/rocdecode.cpp @@ -0,0 +1,109 @@ +/* +Copyright (c) 2024 - 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include +#include +#include "roc_video_dec.h" + +int +main(int argc, char** argv) +{ + // Get input file + std::string input_file_path{}; + for(int i = 1; i < argc; i++) + { + if(!strcmp(argv[i], "-i")) + { + if(++i == argc) + { + std::cerr << "Provide path to input file" << std::endl; + } + input_file_path = argv[i]; + continue; + } + } + // Set up bitstreamreader + RocdecBitstreamReader bs_reader = nullptr; + rocDecVideoCodec rocdec_codec_id{}; + int bit_depth{}; + if(rocDecCreateBitstreamReader(&bs_reader, input_file_path.c_str()) != ROCDEC_SUCCESS) + { + std::cerr << "Failed to create the bitstream reader." << std::endl; + return 1; + } + if(rocDecGetBitstreamCodecType(bs_reader, &rocdec_codec_id) != ROCDEC_SUCCESS) + { + std::cerr << "Failed to get stream codec type." << std::endl; + return 1; + } + if(rocdec_codec_id >= rocDecVideoCodec_NumCodecs) + { + std::cerr << "Unsupported stream file type or codec type by the bitstream reader. Exiting." + << std::endl; + return 1; + } + if(rocDecGetBitstreamBitDepth(bs_reader, &bit_depth) != ROCDEC_SUCCESS) + { + std::cerr << "Failed to get stream bit depth." << std::endl; + return 1; + } + + // Set up video decoder + int device_id = 0; + OutputSurfaceMemoryType mem_type = OUT_SURFACE_MEM_DEV_INTERNAL; + bool b_force_zero_latency = false; + Rect* p_crop_rect = nullptr; + int disp_delay = 1; + bool b_extract_sei_messages = false; + RocVideoDecoder* viddec = new RocVideoDecoder(device_id, + mem_type, + rocdec_codec_id, + b_force_zero_latency, + p_crop_rect, + b_extract_sei_messages, + disp_delay); + + uint8_t* pvideo = nullptr; + int n_video_bytes = 0; + int64_t pts = 0; + int pkg_flags = 0; + int decoded_pics = 0; + if(rocDecGetBitstreamPicData(bs_reader, &pvideo, &n_video_bytes, &pts) != ROCDEC_SUCCESS) + { + std::cerr << "Failed to get picture data." << std::endl; + return 1; + } + // Treat 0 bitstream size as end of stream indicator + if(n_video_bytes == 0) + { + pkg_flags |= ROCDEC_PKT_ENDOFSTREAM; + } + viddec->DecodeFrame(pvideo, n_video_bytes, pkg_flags, pts, &decoded_pics); + viddec->DecodeFrame(pvideo, n_video_bytes, pkg_flags, pts, &decoded_pics); + viddec->DecodeFrame(pvideo, n_video_bytes, pkg_flags, pts, &decoded_pics); + if(bs_reader) + { + rocDecDestroyBitstreamReader(bs_reader); + } +} diff --git a/tests/pytest-packages/tests/rocprofv3.py b/tests/pytest-packages/tests/rocprofv3.py index ed08eb7f08..85f86f818b 100644 --- a/tests/pytest-packages/tests/rocprofv3.py +++ b/tests/pytest-packages/tests/rocprofv3.py @@ -26,7 +26,15 @@ from __future__ import absolute_import def test_perfetto_data( pftrace_data, json_data, - categories=("hip", "hsa", "marker", "kernel", "memory_copy", "memory_allocation"), + categories=( + "hip", + "hsa", + "marker", + "kernel", + "memory_copy", + "memory_allocation", + "rocdecode_api", + ), ): mapping = { @@ -36,6 +44,7 @@ def test_perfetto_data( "kernel": ("kernel_dispatch", "kernel_dispatch"), "memory_copy": ("memory_copy", "memory_copy"), "memory_allocation": ("memory_allocation", "memory_allocation"), + "rocdecode_api": ("rocdecode_api", "rocdecode_api"), } # make sure they specified valid categories @@ -73,6 +82,7 @@ def test_otf2_data( "kernel": ("kernel_dispatch", "kernel_dispatch"), "memory_copy": ("memory_copy", "memory_copy"), "memory_allocation": ("memory_allocation", "memory_allocation"), + "rocdecode_api": ("rocdecode_api", "rocdecode_api"), } # make sure they specified valid categories diff --git a/tests/rocdecode/CMakeLists.txt b/tests/rocdecode/CMakeLists.txt new file mode 100644 index 0000000000..85d9f6a5e8 --- /dev/null +++ b/tests/rocdecode/CMakeLists.txt @@ -0,0 +1,53 @@ +# +# +# +cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR) + +project( + rocprofiler-tests-rocdecode-tracing + LANGUAGES CXX + VERSION 0.0.0) + +find_package(rocprofiler-sdk REQUIRED) + +if(ROCPROFILER_MEMCHECK_PRELOAD_ENV) + set(PRELOAD_ENV + "${ROCPROFILER_MEMCHECK_PRELOAD_ENV}:$") +else() + set(PRELOAD_ENV "LD_PRELOAD=$") +endif() + +set(ROCDECODE_VIDEO_FILE + "${ROCM_PATH}/share/rocdecode/video/AMD_driving_virtual_20-H265.265") +if(NOT EXISTS "${ROCDECODE_VIDEO_FILE}") + message( + FATAL_ERROR + "Unable to find video file for rocdecode tests: ${ROCDECODE_VIDEO_FILE}") +endif() +add_test(NAME test-rocdecode-tracing-execute COMMAND $ -i + ${ROCDECODE_VIDEO_FILE}) + +set(rocdecode-tracing-env + "${PRELOAD_ENV}" + "ROCPROFILER_TOOL_OUTPUT_FILE=rocdecode-tracing-test.json" + "LD_LIBRARY_PATH=$:$ENV{LD_LIBRARY_PATH}" + ) + +set_tests_properties( + test-rocdecode-tracing-execute + PROPERTIES TIMEOUT 45 LABELS "integration-tests" ENVIRONMENT + "${rocdecode-tracing-env}" FAIL_REGULAR_EXPRESSION + "${ROCPROFILER_DEFAULT_FAIL_REGEX}") + +# copy to binary directory +rocprofiler_configure_pytest_files(COPY validate.py conftest.py CONFIG pytest.ini) + +add_test(NAME test-rocdecode-tracing-validate + COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py --input + ${CMAKE_CURRENT_BINARY_DIR}/rocdecode-tracing-test.json) + +set_tests_properties( + test-rocdecode-tracing-validate + PROPERTIES TIMEOUT 45 LABELS "integration-tests" DEPENDS + test-rocdecode-tracing-execute FAIL_REGULAR_EXPRESSION + "${ROCPROFILER_DEFAULT_FAIL_REGEX}") diff --git a/tests/rocdecode/conftest.py b/tests/rocdecode/conftest.py new file mode 100644 index 0000000000..6924b2699e --- /dev/null +++ b/tests/rocdecode/conftest.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python3 + +import json +import pytest + +from rocprofiler_sdk.pytest_utils.dotdict import dotdict + + +def pytest_addoption(parser): + parser.addoption( + "--input", + action="store", + default="rocdecode-tracing-test.json", + help="Input JSON", + ) + + +@pytest.fixture +def input_data(request): + filename = request.config.getoption("--input") + with open(filename, "r") as inp: + return dotdict(json.load(inp)) diff --git a/tests/rocdecode/pytest.ini b/tests/rocdecode/pytest.ini new file mode 100644 index 0000000000..5e1e1c14a0 --- /dev/null +++ b/tests/rocdecode/pytest.ini @@ -0,0 +1,5 @@ + +[pytest] +addopts = --durations=20 -rA -s -vv +testpaths = validate.py +pythonpath = @ROCPROFILER_SDK_TESTS_BINARY_DIR@/pytest-packages diff --git a/tests/rocdecode/validate.py b/tests/rocdecode/validate.py new file mode 100644 index 0000000000..e1619349ca --- /dev/null +++ b/tests/rocdecode/validate.py @@ -0,0 +1,285 @@ +#!/usr/bin/env python3 + +import sys +import pytest + + +# helper function +def node_exists(name, data, min_len=1): + assert name in data + assert data[name] is not None + if isinstance(data[name], (list, tuple, dict, set)): + assert len(data[name]) >= min_len, f"{name}:\n{data}" + + +def test_data_structure(input_data): + """verify minimum amount of expected data is present""" + data = input_data + + node_exists("rocprofiler-sdk-json-tool", data) + + sdk_data = data["rocprofiler-sdk-json-tool"] + + node_exists("metadata", sdk_data) + node_exists("pid", sdk_data["metadata"]) + node_exists("main_tid", sdk_data["metadata"]) + node_exists("init_time", sdk_data["metadata"]) + node_exists("fini_time", sdk_data["metadata"]) + + node_exists("agents", sdk_data) + node_exists("call_stack", sdk_data) + node_exists("callback_records", sdk_data) + node_exists("buffer_records", sdk_data) + + node_exists("names", sdk_data["callback_records"]) + node_exists("hsa_api_traces", sdk_data["callback_records"]) + node_exists("hip_api_traces", sdk_data["callback_records"]) + node_exists("memory_allocations", sdk_data["callback_records"]) + node_exists("rocdecode_api_traces", sdk_data["callback_records"]) + + node_exists("names", sdk_data["buffer_records"]) + node_exists("hsa_api_traces", sdk_data["buffer_records"]) + node_exists("hip_api_traces", sdk_data["buffer_records"]) + node_exists("memory_allocations", sdk_data["buffer_records"]) + node_exists("rocdecode_api_traces", sdk_data["buffer_records"]) + + +def test_size_entries(input_data): + # check that size fields are > 0 but account for function arguments + # which are named "size" + def check_size(data, bt): + if "size" in data.keys(): + if isinstance(data["size"], str) and bt.endswith('["args"]'): + pass + else: + assert data["size"] > 0, f"origin: {bt}" + + # recursively check the entire data structure + def iterate_data(data, bt): + if isinstance(data, (list, tuple)): + for i, itr in enumerate(data): + if isinstance(itr, dict): + check_size(itr, f"{bt}[{i}]") + iterate_data(itr, f"{bt}[{i}]") + elif isinstance(data, dict): + check_size(data, f"{bt}") + for key, itr in data.items(): + iterate_data(itr, f'{bt}["{key}"]') + + # start recursive check over entire JSON dict + iterate_data(input_data, "input_data") + + +def test_timestamps(input_data): + """Verify starting timestamps are less than ending timestamps""" + data = input_data + sdk_data = data["rocprofiler-sdk-json-tool"] + + cb_start = {} + cb_end = {} + for titr in ["hsa_api_traces", "hip_api_traces", "rocdecode_api_traces"]: + for itr in sdk_data["callback_records"][titr]: + cid = itr["correlation_id"]["internal"] + phase = itr["phase"] + if phase == 1: + cb_start[cid] = itr["timestamp"] + elif phase == 2: + cb_end[cid] = itr["timestamp"] + assert cb_start[cid] <= itr["timestamp"] + else: + assert phase == 1 or phase == 2 + + for itr in sdk_data["buffer_records"][titr]: + assert itr["start_timestamp"] <= itr["end_timestamp"] + + for titr in ["memory_allocations"]: + for itr in sdk_data["buffer_records"][titr]: + assert itr["start_timestamp"] < itr["end_timestamp"], f"[{titr}] {itr}" + assert itr["correlation_id"]["internal"] > 0, f"[{titr}] {itr}" + assert itr["correlation_id"]["external"] > 0, f"[{titr}] {itr}" + assert ( + sdk_data["metadata"]["init_time"] < itr["start_timestamp"] + ), f"[{titr}] {itr}" + assert ( + sdk_data["metadata"]["init_time"] < itr["end_timestamp"] + ), f"[{titr}] {itr}" + assert ( + sdk_data["metadata"]["fini_time"] > itr["start_timestamp"] + ), f"[{titr}] {itr}" + assert ( + sdk_data["metadata"]["fini_time"] > itr["end_timestamp"] + ), f"[{titr}] {itr}" + + api_start = cb_start[itr["correlation_id"]["internal"]] + # api_end = cb_end[itr["correlation_id"]["internal"]] + assert api_start < itr["start_timestamp"], f"[{titr}] {itr}" + # assert api_end <= itr["end_timestamp"], f"[{titr}] {itr}" + + +def test_internal_correlation_ids(input_data): + """Assure correlation ids are unique""" + data = input_data + sdk_data = data["rocprofiler-sdk-json-tool"] + + api_corr_ids = [] + for titr in ["hsa_api_traces", "hip_api_traces", "rocdecode_api_traces"]: + for itr in sdk_data["callback_records"][titr]: + api_corr_ids.append(itr["correlation_id"]["internal"]) + + for itr in sdk_data["buffer_records"][titr]: + api_corr_ids.append(itr["correlation_id"]["internal"]) + + api_corr_ids_sorted = sorted(api_corr_ids) + api_corr_ids_unique = list(set(api_corr_ids)) + + for itr in sdk_data["buffer_records"]["memory_allocations"]: + assert itr["correlation_id"]["internal"] in api_corr_ids_unique + + len_corr_id_unq = len(api_corr_ids_unique) + assert len(api_corr_ids) != len_corr_id_unq + assert max(api_corr_ids_sorted) == len_corr_id_unq + + +def test_external_correlation_ids(input_data): + data = input_data + sdk_data = data["rocprofiler-sdk-json-tool"] + + extern_corr_ids = [] + for titr in ["hsa_api_traces", "hip_api_traces", "rocdecode_api_traces"]: + for itr in sdk_data["callback_records"][titr]: + assert itr["correlation_id"]["external"] > 0 + assert itr["thread_id"] == itr["correlation_id"]["external"] + extern_corr_ids.append(itr["correlation_id"]["external"]) + + extern_corr_ids = list(set(sorted(extern_corr_ids))) + for titr in ["hsa_api_traces", "hip_api_traces", "rocdecode_api_traces"]: + for itr in sdk_data["buffer_records"][titr]: + assert itr["correlation_id"]["external"] > 0, f"[{titr}] {itr}" + assert ( + itr["thread_id"] == itr["correlation_id"]["external"] + ), f"[{titr}] {itr}" + assert itr["thread_id"] in extern_corr_ids, f"[{titr}] {itr}" + assert itr["correlation_id"]["external"] in extern_corr_ids, f"[{titr}] {itr}" + + for titr in ["memory_allocations"]: + for itr in sdk_data["buffer_records"][titr]: + assert itr["correlation_id"]["external"] > 0, f"[{titr}] {itr}" + assert itr["correlation_id"]["external"] in extern_corr_ids, f"[{titr}] {itr}" + + for itr in sdk_data["callback_records"][titr]: + assert itr["correlation_id"]["external"] > 0, f"[{titr}] {itr}" + assert itr["correlation_id"]["external"] in extern_corr_ids, f"[{titr}] {itr}" + + +def get_operation(record, kind_name, op_name=None): + for idx, itr in enumerate(record["names"]): + if kind_name == itr["kind"]: + if op_name is None: + return idx, itr["operations"] + else: + for oidx, oname in enumerate(itr["operations"]): + if op_name == oname: + return oidx + return None + + +def test_rocdecode_traces(input_data): + data = input_data + sdk_data = data["rocprofiler-sdk-json-tool"] + + callback_records = sdk_data["callback_records"] + buffer_records = sdk_data["buffer_records"] + + rocdecode_bf_traces = sdk_data["buffer_records"]["rocdecode_api_traces"] + rocdecode_api_bf_ops = get_operation(buffer_records, "ROCDECODE_API") + assert len(rocdecode_api_bf_ops[1]) == 16 + + rocdecode_cb_traces = sdk_data["callback_records"]["rocdecode_api_traces"] + rocdecode_api_cb_ops = get_operation(callback_records, "ROCDECODE_API") + + assert ( + rocdecode_api_bf_ops[1] == rocdecode_api_cb_ops[1] + and len(rocdecode_api_cb_ops[1]) == 16 + ) + + # check that buffer and callback records agree + phase_enter_count = 0 + phase_end_count = 0 + + api_calls = [] + + for api_call in rocdecode_cb_traces: + if api_call["phase"] == 1: + phase_enter_count += 1 + api_calls.append(rocdecode_api_cb_ops[1][api_call["operation"]]) + if api_call["phase"] == 2: + phase_end_count += 1 + + assert phase_enter_count == phase_end_count == len(rocdecode_bf_traces) + + for call in [ + "rocDecCreateBitstreamReader", + "rocDecGetBitstreamCodecType", + "rocDecGetBitstreamBitDepth", + "rocDecCreateVideoParser", + "rocDecGetBitstreamPicData", + "rocDecGetDecoderCaps", + "rocDecCreateDecoder", + "rocDecDecodeFrame", + "rocDecParseVideoData", + "rocDecGetVideoFrame", + "rocDecGetDecodeStatus", + "rocDecDestroyBitstreamReader", + ]: + assert call in api_calls + + +def test_retired_correlation_ids(input_data): + data = input_data + sdk_data = data["rocprofiler-sdk-json-tool"] + + def _sort_dict(inp): + return dict(sorted(inp.items())) + + api_corr_ids = {} + for titr in ["hsa_api_traces", "hip_api_traces", "rocdecode_api_traces"]: + for itr in sdk_data["buffer_records"][titr]: + corr_id = itr["correlation_id"]["internal"] + assert corr_id not in api_corr_ids.keys() + api_corr_ids[corr_id] = itr + + alloc_corr_ids = {} + for titr in ["memory_allocations"]: + for itr in sdk_data["buffer_records"][titr]: + corr_id = itr["correlation_id"]["internal"] + assert corr_id not in alloc_corr_ids.keys() + alloc_corr_ids[corr_id] = itr + + retired_corr_ids = {} + for itr in sdk_data["buffer_records"]["retired_correlation_ids"]: + corr_id = itr["internal_correlation_id"] + assert corr_id not in retired_corr_ids.keys() + retired_corr_ids[corr_id] = itr + + api_corr_ids = _sort_dict(api_corr_ids) + alloc_corr_ids = _sort_dict(alloc_corr_ids) + retired_corr_ids = _sort_dict(retired_corr_ids) + + for cid, itr in alloc_corr_ids.items(): + assert cid in retired_corr_ids.keys() + retired_ts = retired_corr_ids[cid]["timestamp"] + end_ts = itr["end_timestamp"] + assert (retired_ts - end_ts) > 0, f"correlation-id: {cid}, data: {itr}" + + for cid, itr in api_corr_ids.items(): + assert cid in retired_corr_ids.keys() + retired_ts = retired_corr_ids[cid]["timestamp"] + end_ts = itr["end_timestamp"] + assert (retired_ts - end_ts) > 0, f"correlation-id: {cid}, data: {itr}" + + assert len(api_corr_ids.keys()) == (len(retired_corr_ids.keys())) + + +if __name__ == "__main__": + exit_code = pytest.main(["-x", __file__] + sys.argv[1:]) + sys.exit(exit_code) diff --git a/tests/rocprofv3/CMakeLists.txt b/tests/rocprofv3/CMakeLists.txt index c5ae31e705..70b8fe84ea 100644 --- a/tests/rocprofv3/CMakeLists.txt +++ b/tests/rocprofv3/CMakeLists.txt @@ -36,3 +36,6 @@ add_subdirectory(roctracer-roctx) add_subdirectory(scratch-memory) add_subdirectory(pc-sampling) add_subdirectory(collection-period) +if(ROCPROFILER_BUILD_ROCDECODE_TESTS) + add_subdirectory(rocdecode-trace) +endif() diff --git a/tests/rocprofv3/rocdecode-trace/CMakeLists.txt b/tests/rocprofv3/rocdecode-trace/CMakeLists.txt new file mode 100644 index 0000000000..e2537b3d9d --- /dev/null +++ b/tests/rocprofv3/rocdecode-trace/CMakeLists.txt @@ -0,0 +1,52 @@ +# +# +# +cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR) + +project( + rocprofiler-tests-rocprofv3-rocdecode-tracing + LANGUAGES CXX + VERSION 0.0.0) + +find_package(rocprofiler-sdk REQUIRED) + +rocprofiler_configure_pytest_files(CONFIG pytest.ini COPY validate.py conftest.py) + +string(REPLACE "LD_PRELOAD=" "ROCPROF_PRELOAD=" PRELOAD_ENV + "${ROCPROFILER_MEMCHECK_PRELOAD_ENV}") + +set(rocdecode-tracing-env "${PRELOAD_ENV}") + +set(ROCDECODE_VIDEO_FILE + "${ROCM_PATH}/share/rocdecode/video/AMD_driving_virtual_20-H265.265") +if(NOT EXISTS "${ROCDECODE_VIDEO_FILE}") + message( + FATAL_ERROR + "Unable to find video file for rocdecode tests: ${ROCDECODE_VIDEO_FILE}") +endif() +add_test( + NAME rocprofv3-test-rocdecode-tracing-execute + COMMAND + $ --rocdecode-trace -d + ${CMAKE_CURRENT_BINARY_DIR}/%tag%-trace -o out --output-format json otf2 pftrace + csv --log-level env -- $ -i ${ROCDECODE_VIDEO_FILE}) + +set_tests_properties( + rocprofv3-test-rocdecode-tracing-execute + PROPERTIES TIMEOUT 45 LABELS "integration-tests" ENVIRONMENT + "${rocdecode-tracing-env}" FAIL_REGULAR_EXPRESSION "threw an exception") + +add_test( + NAME rocprofv3-test-rocdecode-tracing-validate + COMMAND + ${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py --json-input + ${CMAKE_CURRENT_BINARY_DIR}/rocdecode-trace/out_results.json --otf2-input + ${CMAKE_CURRENT_BINARY_DIR}/rocdecode-trace/out_results.otf2 --pftrace-input + ${CMAKE_CURRENT_BINARY_DIR}/rocdecode-trace/out_results.pftrace --csv-input + ${CMAKE_CURRENT_BINARY_DIR}/rocdecode-trace/out_rocdecode_api_trace.csv) + +set_tests_properties( + rocprofv3-test-rocdecode-tracing-validate + PROPERTIES TIMEOUT 45 LABELS "integration-tests" DEPENDS + rocprofv3-test-rocdecode-tracing-execute FAIL_REGULAR_EXPRESSION + "AssertionError") diff --git a/tests/rocprofv3/rocdecode-trace/conftest.py b/tests/rocprofv3/rocdecode-trace/conftest.py new file mode 100644 index 0000000000..6a5ef0dd0a --- /dev/null +++ b/tests/rocprofv3/rocdecode-trace/conftest.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 + +import csv +import json +import os +import pytest + +from rocprofiler_sdk.pytest_utils.dotdict import dotdict +from rocprofiler_sdk.pytest_utils import collapse_dict_list +from rocprofiler_sdk.pytest_utils.perfetto_reader import PerfettoReader +from rocprofiler_sdk.pytest_utils.otf2_reader import OTF2Reader + + +def pytest_addoption(parser): + parser.addoption( + "--json-input", + action="store", + default="rocdecode-tracing/out_results.json", + help="Input JSON", + ) + parser.addoption( + "--otf2-input", + action="store", + default="rocdecode-tracing/out_results.otf2", + help="Input OTF2", + ) + parser.addoption( + "--pftrace-input", + action="store", + default="rocdecode-tracing/out_results.pftrace", + help="Input pftrace file", + ) + parser.addoption( + "--csv-input", + action="store", + default="rocdecode-tracing/out_rocdecode_api_trace.csv", + help="Input CSV", + ) + + +@pytest.fixture +def json_data(request): + filename = request.config.getoption("--json-input") + with open(filename, "r") as inp: + return dotdict(collapse_dict_list(json.load(inp))) + + +@pytest.fixture +def csv_data(request): + filename = request.config.getoption("--csv-input") + data = [] + with open(filename, "r") as inp: + reader = csv.DictReader(inp) + for row in reader: + data.append(row) + + return data + + +@pytest.fixture +def otf2_data(request): + filename = request.config.getoption("--otf2-input") + if not os.path.exists(filename): + raise FileExistsError(f"{filename} does not exist") + return OTF2Reader(filename).read()[0] + + +@pytest.fixture +def pftrace_data(request): + filename = request.config.getoption("--pftrace-input") + return PerfettoReader(filename).read()[0] diff --git a/tests/rocprofv3/rocdecode-trace/pytest.ini b/tests/rocprofv3/rocdecode-trace/pytest.ini new file mode 100644 index 0000000000..5e1e1c14a0 --- /dev/null +++ b/tests/rocprofv3/rocdecode-trace/pytest.ini @@ -0,0 +1,5 @@ + +[pytest] +addopts = --durations=20 -rA -s -vv +testpaths = validate.py +pythonpath = @ROCPROFILER_SDK_TESTS_BINARY_DIR@/pytest-packages diff --git a/tests/rocprofv3/rocdecode-trace/validate.py b/tests/rocprofv3/rocdecode-trace/validate.py new file mode 100755 index 0000000000..8c2ac1a0f4 --- /dev/null +++ b/tests/rocprofv3/rocdecode-trace/validate.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python3 + +import sys +import pytest +import json + +from collections import defaultdict + + +# helper function +def node_exists(name, data, min_len=1): + assert name in data + assert data[name] is not None + if isinstance(data[name], (list, tuple, dict, set)): + assert len(data[name]) >= min_len + + +def get_operation(record, kind_name, op_name=None): + for idx, itr in enumerate(record["strings"]["buffer_records"]): + if kind_name == itr["kind"]: + if op_name is None: + return idx, itr["operations"] + else: + for oidx, oname in enumerate(itr["operations"]): + if op_name == oname: + return oidx + return None + + +def test_rocdeocde(json_data): + data = json_data["rocprofiler-sdk-tool"] + buffer_records = data["buffer_records"] + + rocdecode_data = buffer_records["rocdecode_api"] + + _, bf_op_names = get_operation(data, "ROCDECODE_API") + + assert len(bf_op_names) == 16 + + rocdecode_reported_agent_ids = set() + # check buffering data + for node in rocdecode_data: + assert "size" in node + assert "kind" in node + assert "operation" in node + assert "correlation_id" in node + assert "end_timestamp" in node + assert "start_timestamp" in node + assert "thread_id" in node + + assert node.size > 0 + assert node.thread_id > 0 + assert node.start_timestamp > 0 + assert node.end_timestamp > 0 + assert node.start_timestamp < node.end_timestamp + + assert data.strings.buffer_records[node.kind].kind == "ROCDECODE_API" + assert ( + data.strings.buffer_records[node.kind].operations[node.operation] + in bf_op_names + ) + + +def test_csv_data(csv_data): + assert len(csv_data) > 0, "Expected non-empty csv data" + + api_calls = [] + + for row in csv_data: + assert "Domain" in row, "'Domain' was not present in csv data for rocdecode-trace" + assert ( + "Function" in row + ), "'Function' was not present in csv data for rocdecode-trace" + assert ( + "Process_Id" in row + ), "'Process_Id' was not present in csv data for rocdecode-trace" + assert ( + "Thread_Id" in row + ), "'Thread_Id' was not present in csv data for rocdecode-trace" + assert ( + "Correlation_Id" in row + ), "'Correlation_Id' was not present in csv data for rocdecode-trace" + assert ( + "Start_Timestamp" in row + ), "'Start_Timestamp' was not present in csv data for rocdecode-trace" + assert ( + "End_Timestamp" in row + ), "'End_Timestamp' was not present in csv data for rocdecode-trace" + + api_calls.append(row["Function"]) + + assert row["Domain"] == "ROCDECODE_API" + assert int(row["Process_Id"]) > 0 + assert int(row["Thread_Id"]) > 0 + assert int(row["Start_Timestamp"]) > 0 + assert int(row["End_Timestamp"]) > 0 + assert int(row["Start_Timestamp"]) < int(row["End_Timestamp"]) + + for call in [ + "rocDecCreateBitstreamReader", + "rocDecGetBitstreamCodecType", + "rocDecGetBitstreamBitDepth", + "rocDecCreateVideoParser", + "rocDecGetBitstreamPicData", + "rocDecGetDecoderCaps", + "rocDecCreateDecoder", + "rocDecDecodeFrame", + "rocDecParseVideoData", + "rocDecGetVideoFrame", + "rocDecGetDecodeStatus", + "rocDecDestroyBitstreamReader", + ]: + assert call in api_calls + + +def test_perfetto_data(pftrace_data, json_data): + import rocprofiler_sdk.tests.rocprofv3 as rocprofv3 + + rocprofv3.test_perfetto_data( + pftrace_data, + json_data, + ("hip", "hsa", "memory_allocation", "rocdecode_api"), + ) + + +def test_otf2_data(otf2_data, json_data): + import rocprofiler_sdk.tests.rocprofv3 as rocprofv3 + + rocprofv3.test_otf2_data( + otf2_data, + json_data, + ("hip", "hsa", "memory_allocation", "rocdecode_api"), + ) + + +if __name__ == "__main__": + exit_code = pytest.main(["-x", __file__] + sys.argv[1:]) + sys.exit(exit_code) diff --git a/tests/tools/json-tool.cpp b/tests/tools/json-tool.cpp index d43fe1e979..662eef19d7 100644 --- a/tests/tools/json-tool.cpp +++ b/tests/tools/json-tool.cpp @@ -397,6 +397,23 @@ struct rccl_api_callback_record_t } }; +struct rocdecode_api_callback_record_t +{ + uint64_t timestamp = 0; + rocprofiler_callback_tracing_record_t record = {}; + rocprofiler_callback_tracing_rocdecode_api_data_t payload = {}; + callback_arg_array_t args = {}; + + template + void save(ArchiveT& ar) const + { + ar(cereal::make_nvp("timestamp", timestamp)); + cereal::save(ar, record); + ar(cereal::make_nvp("payload", payload)); + serialize_args(ar, args); + } +}; + struct ompt_callback_record_t { uint64_t timestamp = 0; @@ -555,6 +572,7 @@ auto kernel_dispatch_cb_records = std::deque{}; auto memory_allocation_cb_records = std::deque{}; auto rccl_api_cb_records = std::deque{}; +auto rocdecode_api_cb_records = std::deque{}; auto ompt_cb_records = std::deque{}; int @@ -824,6 +842,20 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record, runtime_init_cb_records.emplace_back( runtime_init_callback_record_t{ts, record, *data, std::move(args)}); } + else if(record.kind == ROCPROFILER_CALLBACK_TRACING_ROCDECODE_API) + { + auto* data = + static_cast(record.payload); + auto args = callback_arg_array_t{}; + if(record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT) + rocprofiler_iterate_callback_tracing_kind_operation_args( + record, save_args, record.phase, &args); + + static auto _mutex = std::mutex{}; + auto _lk = std::unique_lock{_mutex}; + rocdecode_api_cb_records.emplace_back( + rocdecode_api_callback_record_t{ts, record, *data, std::move(args)}); + } else { throw std::runtime_error{"unsupported callback kind"}; @@ -843,8 +875,9 @@ auto scratch_memory_records = std::deque{}; auto corr_id_retire_records = std::deque{}; -auto rccl_api_bf_records = std::deque{}; -auto ompt_bf_records = std::deque{}; +auto rccl_api_bf_records = std::deque{}; +auto rocdecode_api_bf_records = std::deque{}; +auto ompt_bf_records = std::deque{}; void tool_tracing_buffered(rocprofiler_context_id_t /*context*/, @@ -971,6 +1004,13 @@ tool_tracing_buffered(rocprofiler_context_id_t /*context*/, runtime_init_bf_records.emplace_back(*record); } + else if(header->kind == ROCPROFILER_BUFFER_TRACING_ROCDECODE_API) + { + auto* record = static_cast( + header->payload); + + rocdecode_api_bf_records.emplace_back(*record); + } else { throw std::runtime_error{ @@ -1069,6 +1109,9 @@ rocprofiler_context_id_t kernel_dispatch_buffered_ctx = {0}; rocprofiler_context_id_t page_migration_ctx = {0}; rocprofiler_context_id_t runtime_init_callback_ctx = {}; rocprofiler_context_id_t runtime_init_buffered_ctx = {}; +rocprofiler_context_id_t rocdecode_api_callback_ctx = {0}; +rocprofiler_context_id_t rocdecode_api_buffered_ctx = {0}; + // buffers rocprofiler_buffer_id_t runtime_init_buffered_buffer = {}; rocprofiler_buffer_id_t hsa_api_buffered_buffer = {}; @@ -1082,6 +1125,7 @@ rocprofiler_buffer_id_t counter_collection_buffer = {}; rocprofiler_buffer_id_t scratch_memory_buffer = {}; rocprofiler_buffer_id_t corr_id_retire_buffer = {}; rocprofiler_buffer_id_t rccl_api_buffered_buffer = {}; +rocprofiler_buffer_id_t rocdecode_api_buffer = {}; rocprofiler_buffer_id_t ompt_buffered_buffer = {}; auto contexts = std::unordered_map{ @@ -1107,10 +1151,12 @@ auto contexts = std::unordered_map{ {"SCRATCH_MEMORY", &scratch_memory_ctx}, {"CORRELATION_ID_RETIREMENT", &corr_id_retire_ctx}, {"RCCL_API_BUFFERED", &rccl_api_buffered_ctx}, + {"ROCDECODE_API_CALLBACK", &rocdecode_api_callback_ctx}, + {"ROCDECODE_API_BUFFERED", &rocdecode_api_buffered_ctx}, {"OMPT_BUFFERED", &ompt_buffered_ctx}, }; -auto buffers = std::array{&runtime_init_buffered_buffer, +auto buffers = std::array{&runtime_init_buffered_buffer, &hsa_api_buffered_buffer, &hip_api_buffered_buffer, &marker_api_buffered_buffer, @@ -1122,7 +1168,8 @@ auto buffers = std::array{&runtime_init_buffered_b &counter_collection_buffer, &corr_id_retire_buffer, &rccl_api_buffered_buffer, - &ompt_buffered_buffer}; + &ompt_buffered_buffer, + &rocdecode_api_buffer}; auto agents = std::vector{}; auto agents_map = std::unordered_map{}; @@ -1288,6 +1335,15 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) nullptr), "rccl api callback tracing service configure"); + ROCPROFILER_CALL( + rocprofiler_configure_callback_tracing_service(rocdecode_api_callback_ctx, + ROCPROFILER_CALLBACK_TRACING_ROCDECODE_API, + nullptr, + 0, + tool_tracing_callback, + nullptr), + "rocdecode api callback tracing service configure"); + ROCPROFILER_CALL( rocprofiler_configure_callback_tracing_service(ompt_callback_ctx, ROCPROFILER_CALLBACK_TRACING_OMPT, @@ -1408,6 +1464,15 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) &rccl_api_buffered_buffer), "buffer creation"); + ROCPROFILER_CALL(rocprofiler_create_buffer(rocdecode_api_buffered_ctx, + buffer_size, + watermark, + ROCPROFILER_BUFFER_POLICY_LOSSLESS, + tool_tracing_buffered, + tool_data, + &rocdecode_api_buffer), + "buffer creation"); + ROCPROFILER_CALL(rocprofiler_create_buffer(ompt_buffered_ctx, buffer_size, watermark, @@ -1532,6 +1597,14 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) rccl_api_buffered_buffer), "buffer tracing service for rccl api configure"); + ROCPROFILER_CALL( + rocprofiler_configure_buffer_tracing_service(rocdecode_api_buffered_ctx, + ROCPROFILER_BUFFER_TRACING_ROCDECODE_API, + nullptr, + 0, + rocdecode_api_buffer), + "buffer tracing service for rocdecode api configure"); + ROCPROFILER_CALL( rocprofiler_configure_buffer_tracing_service( ompt_buffered_ctx, ROCPROFILER_BUFFER_TRACING_OMPT, nullptr, 0, ompt_buffered_buffer), @@ -1701,7 +1774,8 @@ tool_fini(void* tool_data) << ", rccl_api_bf_records=" << rccl_api_bf_records.size() << ", ompt_bf_records=" << ompt_bf_records.size() << ", counter_collection_value_records=" << counter_collection_bf_records.size() - << "...\n" + << ", rocdecode_api_callback_records=" << rocdecode_api_cb_records.size() + << ", rocdecode_api_bf_records=" << rocdecode_api_bf_records.size() << "...\n" << std::flush; auto* _call_stack = static_cast(tool_data); @@ -1797,6 +1871,7 @@ write_json(call_stack_t* _call_stack) json_ar(cereal::make_nvp("kernel_dispatch", kernel_dispatch_cb_records)); json_ar(cereal::make_nvp("memory_copies", memory_copy_cb_records)); json_ar(cereal::make_nvp("memory_allocations", memory_allocation_cb_records)); + json_ar(cereal::make_nvp("rocdecode_api_traces", rocdecode_api_cb_records)); } catch(std::exception& e) { std::cerr << "[" << getpid() << "][" << __FUNCTION__ @@ -1823,6 +1898,7 @@ write_json(call_stack_t* _call_stack) json_ar(cereal::make_nvp("ompt_traces", ompt_bf_records)); json_ar(cereal::make_nvp("retired_correlation_ids", corr_id_retire_records)); json_ar(cereal::make_nvp("counter_collection", counter_collection_bf_records)); + json_ar(cereal::make_nvp("rocdecode_api_traces", rocdecode_api_bf_records)); } catch(std::exception& e) { std::cerr << "[" << getpid() << "][" << __FUNCTION__ @@ -1894,6 +1970,8 @@ write_perfetto() tids.emplace(itr.thread_id); for(auto itr : ompt_bf_records) tids.emplace(itr.thread_id); + for(auto itr : rocdecode_api_bf_records) + tids.emplace(itr.thread_id); for(auto itr : memory_copy_bf_records) { @@ -2147,6 +2225,47 @@ write_perfetto() itr.end_timestamp); } + for(auto itr : rocdecode_api_bf_records) + { + auto name = buffer_names.at(itr.kind, itr.operation); + auto& track = thread_tracks.at(itr.thread_id); + + auto _args = callback_arg_array_t{}; + auto ritr = std::find_if( + rocdecode_api_cb_records.begin(), + rocdecode_api_cb_records.end(), + [&itr](const auto& citr) { + return (citr.record.correlation_id.internal == itr.correlation_id.internal && + !citr.args.empty()); + }); + if(ritr != rocdecode_api_cb_records.end()) _args = ritr->args; + + TRACE_EVENT_BEGIN(sdk::perfetto_category::name, + ::perfetto::StaticString(name.data()), + track, + itr.start_timestamp, + ::perfetto::Flow::ProcessScoped(itr.correlation_id.internal), + "begin_ns", + itr.start_timestamp, + "tid", + itr.thread_id, + "kind", + itr.kind, + "operation", + itr.operation, + "corr_id", + itr.correlation_id.internal, + [&](::perfetto::EventContext ctx) { + for(const auto& aitr : _args) + sdk::add_perfetto_annotation(ctx, aitr.first, aitr.second); + }); + TRACE_EVENT_END(sdk::perfetto_category::name, + track, + itr.end_timestamp, + "end_ns", + itr.end_timestamp); + } + for(auto itr : ompt_bf_records) { auto name = buffer_names.at(itr.kind, itr.operation);