diff --git a/projects/rocprofiler-sdk/CHANGELOG.md b/projects/rocprofiler-sdk/CHANGELOG.md index 5faeacdc53..7ae5ae62e2 100644 --- a/projects/rocprofiler-sdk/CHANGELOG.md +++ b/projects/rocprofiler-sdk/CHANGELOG.md @@ -152,6 +152,7 @@ Full documentation for ROCprofiler-SDK is available at [rocm.docs.amd.com/projec - `--collection-period` feature added in rocprofv3, to enable filtering using time. - `--collection-period-unit` feature added in rocprofv3, to allow the user to control time units used in collection period option. - Added deprecation notice for rocprofiler(v1) and rocprofiler(v2). +- Added support for rocDecode API Tracing ### Changed diff --git a/projects/rocprofiler-sdk/cmake/Modules/FindrocDecode.cmake b/projects/rocprofiler-sdk/cmake/Modules/FindrocDecode.cmake new file mode 100644 index 0000000000..fde2f102e2 --- /dev/null +++ b/projects/rocprofiler-sdk/cmake/Modules/FindrocDecode.cmake @@ -0,0 +1,49 @@ +################################################################################ +# Copyright (c) 2024 - 2025 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +################################################################################ + +# find rocDecode - library and headers +find_path( + rocDecode_INCLUDE_DIR + NAMES rocdecode.h + PATHS ${ROCM_PATH}/include/rocdecode) +find_library( + rocDecode_LIBRARY + NAMES rocdecode + HINTS ${ROCM_PATH}/lib) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args( + rocDecode + FOUND_VAR rocDecode_FOUND + REQUIRED_VARS rocDecode_INCLUDE_DIR rocDecode_LIBRARY) + +if(rocDecode_FOUND) + if(NOT TARGET rocDecode::rocDecode) + add_library(rocDecode::rocDecode INTERFACE IMPORTED) + target_link_libraries(rocDecode::rocDecode INTERFACE ${rocDecode_LIBRARY}) + target_include_directories(rocDecode::rocDecode + INTERFACE ${rocDecode_INCLUDE_DIR}) + endif() +endif() + +mark_as_advanced(rocDecode_INCLUDE_DIR rocDecode_LIBRARY) diff --git a/projects/rocprofiler-sdk/cmake/rocprofiler_config_interfaces.cmake b/projects/rocprofiler-sdk/cmake/rocprofiler_config_interfaces.cmake index 69637b4d2d..a01a6df27d 100644 --- a/projects/rocprofiler-sdk/cmake/rocprofiler_config_interfaces.cmake +++ b/projects/rocprofiler-sdk/cmake/rocprofiler_config_interfaces.cmake @@ -325,3 +325,23 @@ else() INTERFACE ROCPROFILER_SDK_USE_SYSTEM_RCCL=0) endif() + +# ----------------------------------------------------------------------------------------# +# +# ROCDecode +# +# ----------------------------------------------------------------------------------------# + +find_package(rocDecode) + +if(rocDecode_FOUND + AND rocDecode_INCLUDE_DIR + AND EXISTS "${ROCDECODE_INCLUDE_DIR}/rocdecode/amd_detail/rocdecode_api_trace.h") + rocprofiler_config_nolink_target( + rocprofiler-sdk-rocdecode-nolink rocdecode::rocdecode INTERFACE + ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE=1) +else() + target_compile_definitions(rocprofiler-sdk-rocdecode-nolink + INTERFACE ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE=0) + +endif() diff --git a/projects/rocprofiler-sdk/cmake/rocprofiler_interfaces.cmake b/projects/rocprofiler-sdk/cmake/rocprofiler_interfaces.cmake index 0a804ac764..7e82e713e1 100644 --- a/projects/rocprofiler-sdk/cmake/rocprofiler_interfaces.cmake +++ b/projects/rocprofiler-sdk/cmake/rocprofiler_interfaces.cmake @@ -91,3 +91,6 @@ rocprofiler_add_interface_library( "rocprofiler-sdk-hsakmt without linking to HSAKMT library" IMPORTED) rocprofiler_add_interface_library(rocprofiler-sdk-rccl-nolink "RCCL headers without linking to RCCL library" IMPORTED) +rocprofiler_add_interface_library( + rocprofiler-sdk-rocdecode-nolink + "ROCDECODE headers without linking to ROCDECODE library" IMPORTED) diff --git a/projects/rocprofiler-sdk/cmake/rocprofiler_options.cmake b/projects/rocprofiler-sdk/cmake/rocprofiler_options.cmake index 35862acdc4..7a684b26e1 100644 --- a/projects/rocprofiler-sdk/cmake/rocprofiler_options.cmake +++ b/projects/rocprofiler-sdk/cmake/rocprofiler_options.cmake @@ -59,6 +59,8 @@ if(ROCPROFILER_BUILD_TESTS) rocprofiler_add_option( ROCPROFILER_BUILD_GTEST "Enable building gtest (Google testing) library internally" ON ADVANCED) + rocprofiler_add_option(ROCPROFILER_BUILD_ROCDECODE_TESTS + "Enable building rocDecode tests" OFF ADVANCED) endif() rocprofiler_add_option(ROCPROFILER_ENABLE_CLANG_TIDY "Enable clang-tidy checks" OFF diff --git a/projects/rocprofiler-sdk/source/bin/rocprofv3.py b/projects/rocprofiler-sdk/source/bin/rocprofv3.py index 86eb9581fa..34dcc8f1e0 100755 --- a/projects/rocprofiler-sdk/source/bin/rocprofv3.py +++ b/projects/rocprofiler-sdk/source/bin/rocprofv3.py @@ -158,13 +158,13 @@ For MPI applications (or other job launchers such as SLURM), place rocprofv3 ins aggregate_tracing_options, "-r", "--runtime-trace", - help="Collect tracing data for HIP runtime API, Marker (ROCTx) API, RCCL API, Memory operations (copies, scratch, and allocation), and Kernel dispatches. Similar to --sys-trace but without tracing HIP compiler API and the underlying HSA API.", + help="Collect tracing data for HIP runtime API, Marker (ROCTx) API, RCCL API, ROCDecode API, Memory operations (copies, scratch, and allocation), and Kernel dispatches. Similar to --sys-trace but without tracing HIP compiler API and the underlying HSA API.", ) add_parser_bool_argument( aggregate_tracing_options, "-s", "--sys-trace", - help="Collect tracing data for HIP API, HSA API, Marker (ROCTx) API, RCCL API, Memory operations (copies, scratch, and allocations), and Kernel dispatches.", + help="Collect tracing data for HIP API, HSA API, Marker (ROCTx) API, RCCL API, ROCDecode API, Memory operations (copies, scratch, and allocations), and Kernel dispatches.", ) pc_sampling_options = parser.add_argument_group("PC sampling options") @@ -245,6 +245,11 @@ For MPI applications (or other job launchers such as SLURM), place rocprofv3 ins "--kokkos-trace", help="Enable built-in Kokkos Tools support (implies --marker-trace and --kernel-rename)", ) + add_parser_bool_argument( + basic_tracing_options, + "--rocdecode-trace", + help="For collecting ROCDecode Traces", + ) extended_tracing_options = parser.add_argument_group("Granular tracing options") @@ -761,6 +766,7 @@ def run(app_args, args, **kwargs): "memory_allocation_trace", "scratch_memory_trace", "rccl_trace", + "rocdecode_trace", ): setattr(args, itr, True) @@ -773,6 +779,7 @@ def run(app_args, args, **kwargs): "memory_allocation_trace", "scratch_memory_trace", "rccl_trace", + "rocdecode_trace", ): setattr(args, itr, True) @@ -796,6 +803,7 @@ def run(app_args, args, **kwargs): ["hsa_finalizer_trace", "HSA_FINALIZER_EXT_API_TRACE"], ["marker_trace", "MARKER_API_TRACE"], ["rccl_trace", "RCCL_API_TRACE"], + ["rocdecode_trace", "ROCDECODE_API_TRACE"], ["kernel_trace", "KERNEL_TRACE"], ["memory_copy_trace", "MEMORY_COPY_TRACE"], ["memory_allocation_trace", "MEMORY_ALLOCATION_TRACE"], diff --git a/projects/rocprofiler-sdk/source/docs/data/rocdecode_api_trace.csv b/projects/rocprofiler-sdk/source/docs/data/rocdecode_api_trace.csv new file mode 100644 index 0000000000..b2c5da25ac --- /dev/null +++ b/projects/rocprofiler-sdk/source/docs/data/rocdecode_api_trace.csv @@ -0,0 +1,7 @@ +"Domain","Function","Process_Id","Thread_Id","Correlation_Id","Start_Timestamp","End_Timestamp" +"ROCDECODE_API","rocDecCreateVideoParser",41688,41688,583,615449881677279,615449882001583 +"ROCDECODE_API","rocDecGetDecoderCaps",41688,41688,584,615449882016054,615449882163756 +"ROCDECODE_API","rocDecGetDecoderCaps",41688,41688,588,615449886038750,615449886050880 +"ROCDECODE_API","rocDecCreateDecoder",41688,41688,591,615449886084210,615450756910310 +"ROCDECODE_API","rocDecDecodeFrame",41688,41688,595,615450757036042,615450767147413 +"ROCDECODE_API","rocDecGetDecodeStatus",41688,41688,812,615450836779385,615450836779575 diff --git a/projects/rocprofiler-sdk/source/docs/how-to/using-rocprofv3.rst b/projects/rocprofiler-sdk/source/docs/how-to/using-rocprofv3.rst index 34a875a34f..844d087407 100644 --- a/projects/rocprofiler-sdk/source/docs/how-to/using-rocprofv3.rst +++ b/projects/rocprofiler-sdk/source/docs/how-to/using-rocprofv3.rst @@ -55,11 +55,11 @@ Here is the sample of commonly used ``rocprofv3`` command-line options. Some opt - Output control * - ``-r`` \| ``--runtime-trace`` - - Collects HIP (runtime), memory copy, memory allocation, marker, scratch memory, and kernel dispatch traces. + - Collects HIP (runtime), memory copy, memory allocation, marker, scratch memory, rocDecode, and kernel dispatch traces. - Application Tracing * - ``-s`` \| ``--sys-trace`` - - Collects HIP, HSA, memory copy, memory allocation, marker, scratch memory, and kernel dispatch traces. + - Collects HIP, HSA, memory copy, memory allocation, marker, scratch memory, rocDecode, and kernel dispatch traces. - Application Tracing * - ``--hip-trace`` @@ -86,6 +86,10 @@ Here is the sample of commonly used ``rocprofv3`` command-line options. Some opt - Collects scratch memory operations traces. - Application tracing + * - ``--rocdecode-trace`` + - Collects rocDecode API traces. + - Application tracing + * - ``--hsa-trace`` - Collects HSA API traces. - Application tracing @@ -615,6 +619,28 @@ Here are the contents of ``rccl_api_trace.csv`` file: :widths: 10,10,10,10,10,20,20 :header-rows: 1 +rocDecode trace +++++++++++++++++ + +`rocDecode `_ is a high-performance video decode SDK for AMD GPUs. This option traces the rocDecode API. + +.. code-block:: shell + + rocprofv3 --rocdecode-trace -- + +The above command generates a ``rocdecode_api_trace`` file prefixed with the process ID. + +.. code-block:: shell + + $ cat 41688_rocdecode_api_trace.csv + +Here are the contents of ``rocdecode_api_trace.csv`` file: + +.. csv-table:: rocDecode trace + :file: /data/rocdecode_api_trace.csv + :widths: 10,10,10,10,10,20,20 + :header-rows: 1 + Post-processing tracing options ++++++++++++++++++++++++++++++++ @@ -1336,3 +1362,15 @@ Properties - **`handle`** *(integer, required)*: Handle of the agent. - **`address`** *(string, required)*: Starting address of allocation. - **`allocation_size`** *(integer, required)*: Size of allocation. + - **`rocDecode_api`** *(array)*: rocDecode API records. + - **Items** *(object)* + - **`size`** *(integer, required)*: Size of the rocDecode API record. + - **`kind`** *(integer, required)*: Kind of the rocDecode API. + - **`operation`** *(integer, required)*: Operation of the rocDecode API. + - **`correlation_id`** *(object, required)*: Correlation ID information. + - **`internal`** *(integer, required)*: Internal correlation ID. + - **`external`** *(integer, required)*: External correlation ID. + - **`start_timestamp`** *(integer, required)*: Start timestamp. + - **`end_timestamp`** *(integer, required)*: End timestamp. + - **`thread_id`** *(integer, required)*: Thread ID. + diff --git a/projects/rocprofiler-sdk/source/docs/rocprofv3-schema.json b/projects/rocprofiler-sdk/source/docs/rocprofv3-schema.json index 20ac7ac318..012e64f2e5 100644 --- a/projects/rocprofiler-sdk/source/docs/rocprofv3-schema.json +++ b/projects/rocprofiler-sdk/source/docs/rocprofv3-schema.json @@ -1678,6 +1678,66 @@ "address", "allocation_size" ] + } + }, + "rocdecoder_api": { + "type": "array", + "description": "ROCDecode API records.", + "items": { + "type": "object", + "properties": { + "size": { + "type": "integer", + "description": "Size of the rocDecode API record." + }, + "kind": { + "type": "integer", + "description": "Kind of the rocDecode API." + }, + "operation": { + "type": "integer", + "description": "Operation of the rocDecode API." + }, + "correlation_id": { + "type": "object", + "description": "Correlation ID information.", + "properties": { + "internal": { + "type": "integer", + "description": "Internal correlation ID." + }, + "external": { + "type": "integer", + "description": "External correlation ID." + } + }, + "required": [ + "internal", + "external" + ] + }, + "start_timestamp": { + "type": "integer", + "description": "Start timestamp." + }, + "end_timestamp": { + "type": "integer", + "description": "End timestamp." + }, + "thread_id": { + "type": "integer", + "description": "Thread ID." + } + }, + "required": [ + "size", + "kind", + "operation", + "correlation_id", + "start_timestamp", + "end_timestamp", + "thread_id" + ] } } } diff --git a/projects/rocprofiler-sdk/source/docs/rocprofv3_input_schema.json b/projects/rocprofiler-sdk/source/docs/rocprofv3_input_schema.json index c32c3f4f40..9009d11486 100644 --- a/projects/rocprofiler-sdk/source/docs/rocprofv3_input_schema.json +++ b/projects/rocprofiler-sdk/source/docs/rocprofv3_input_schema.json @@ -65,7 +65,10 @@ "type": "boolean", "description": "For Collecting Memory Allocation Traces" }, - + "rocdecode_trace": { + "type": "boolean", + "description": "For Collecting rocDecode Traces" + }, "scratch_memory_trace": { "type": "boolean", "description": "For Collecting Scratch Memory operations Traces" @@ -101,9 +104,14 @@ "description": "For Collecting HSA API Traces (Image-extenson API)" }, + "runtime_trace" : { + "type": "boolean", + "description": "For collecting HIP (runtime), memory copy, memory allocation, marker, scratch memory, rocDecode, and Kernel dispatch traces." + }, + "sys_trace" : { "type": "boolean", - "description": "For Collecting HIP, HSA, Marker (ROCTx), Memory copy, Memory allocation, Scratch memory, and Kernel dispatch traces" + "description": "For Collecting HIP, HSA, Marker (ROCTx), Memory copy, Memory allocation, Scratch memory, rocDecode, and Kernel dispatch traces" }, "mangled_kernels": { diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/CMakeLists.txt b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/CMakeLists.txt index 45431b8198..b720f62c9f 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/CMakeLists.txt +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/CMakeLists.txt @@ -31,6 +31,7 @@ set(ROCPROFILER_HEADER_FILES profile_config.h registration.h rccl.h + rocdecode.h spm.h ${CMAKE_CURRENT_BINARY_DIR}/version.h) @@ -44,6 +45,7 @@ add_subdirectory(hsa) add_subdirectory(marker) add_subdirectory(ompt) add_subdirectory(rccl) +add_subdirectory(rocdecode) add_subdirectory(cxx) add_subdirectory(kfd) add_subdirectory(amd_detail) diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/buffer_tracing.h b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/buffer_tracing.h index b0e2214654..fb3d3d9ec2 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/buffer_tracing.h +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/buffer_tracing.h @@ -182,6 +182,25 @@ typedef struct /// @brief Specification of the API function, e.g., ::rocprofiler_rccl_api_id_t } rocprofiler_buffer_tracing_rccl_api_record_t; +/** + * @brief ROCProfiler Buffer ROCDecode API Record. + */ +typedef struct +{ + uint64_t size; ///< size of this struct + rocprofiler_buffer_tracing_kind_t kind; + rocprofiler_tracing_operation_t operation; + rocprofiler_correlation_id_t correlation_id; ///< correlation ids for record + rocprofiler_timestamp_t start_timestamp; ///< start time in nanoseconds + rocprofiler_timestamp_t end_timestamp; ///< end time in nanoseconds + rocprofiler_thread_id_t thread_id; ///< id for thread generating this record + + /// @var kind + /// @brief ::ROCPROFILER_CALLBACK_TRACING_ROCDECODE_API + /// @var operation + /// @brief Specification of the API function, e.g., ::rocprofiler_rocdecode_api_id_t +} rocprofiler_buffer_tracing_rocdecode_api_record_t; + /** * @brief ROCProfiler Buffer Memory Copy Tracer Record. */ diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/callback_tracing.h b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/callback_tracing.h index 525d75d699..762feb4302 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/callback_tracing.h +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/callback_tracing.h @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -108,6 +109,16 @@ typedef struct rocprofiler_rccl_api_retval_t retval; } rocprofiler_callback_tracing_rccl_api_data_t; +/** + * @brief ROCProfiler ROCDecode API Callback Data. + */ +typedef struct +{ + uint64_t size; ///< size of this struct + rocprofiler_rocdecode_api_args_t args; + rocprofiler_rocdecode_api_retval_t retval; +} rocprofiler_callback_tracing_rocdecode_api_data_t; + /** * @brief ROCProfiler Code Object Load Tracer Callback Record. */ diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/perfetto.hpp b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/perfetto.hpp index 7832d62ed9..3ab3f0ce18 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/perfetto.hpp +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/perfetto.hpp @@ -82,6 +82,7 @@ ROCPROFILER_DEFINE_CATEGORY(category, openmp, "OpenMP") ROCPROFILER_DEFINE_CATEGORY(category, kernel_dispatch, "GPU kernel dispatch") ROCPROFILER_DEFINE_CATEGORY(category, memory_copy, "Async memory copy") ROCPROFILER_DEFINE_CATEGORY(category, memory_allocation, "Memory Allocation") +ROCPROFILER_DEFINE_CATEGORY(category, rocdecode_api, "ROCDecode API function") #define ROCPROFILER_PERFETTO_CATEGORIES \ ROCPROFILER_PERFETTO_CATEGORY(category::hsa_api), \ @@ -91,7 +92,8 @@ ROCPROFILER_DEFINE_CATEGORY(category, memory_allocation, "Memory Allocation") ROCPROFILER_PERFETTO_CATEGORY(category::openmp), \ ROCPROFILER_PERFETTO_CATEGORY(category::kernel_dispatch), \ ROCPROFILER_PERFETTO_CATEGORY(category::memory_copy), \ - ROCPROFILER_PERFETTO_CATEGORY(category::memory_allocation) + ROCPROFILER_PERFETTO_CATEGORY(category::memory_allocation), \ + ROCPROFILER_PERFETTO_CATEGORY(category::rocdecode_api) #include diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/serialization.hpp b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/serialization.hpp index a28423802e..eb7444e15d 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/serialization.hpp +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/serialization.hpp @@ -386,6 +386,21 @@ save(ArchiveT& ar, rocprofiler_callback_tracing_rccl_api_data_t data) ROCP_SDK_SAVE_DATA_FIELD(retval); } +template +void +save(ArchiveT& ar, rocprofiler_rocdecode_api_retval_t data) +{ + ROCP_SDK_SAVE_DATA_FIELD(rocDecStatus_retval); +} + +template +void +save(ArchiveT& ar, rocprofiler_callback_tracing_rocdecode_api_data_t data) +{ + ROCP_SDK_SAVE_DATA_FIELD(size); + ROCP_SDK_SAVE_DATA_FIELD(retval); +} + template void save(ArchiveT& ar, rocprofiler_callback_tracing_ompt_data_t data) @@ -479,6 +494,13 @@ save(ArchiveT& ar, rocprofiler_buffer_tracing_rccl_api_record_t data) save_buffer_tracing_api_record(ar, data); } +template +void +save(ArchiveT& ar, rocprofiler_buffer_tracing_rocdecode_api_record_t data) +{ + save_buffer_tracing_api_record(ar, data); +} + template void save(ArchiveT& ar, rocprofiler_buffer_tracing_ompt_target_t data) diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/external_correlation.h b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/external_correlation.h index 14203a089e..cbf87b2870 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/external_correlation.h +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/external_correlation.h @@ -69,6 +69,7 @@ typedef enum // NOLINT(performance-enum-size) ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_RCCL_API, ///< ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_OMPT, ///< ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_MEMORY_ALLOCATION, ///< + ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_ROCDECODE_API, ///< ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_LAST, } rocprofiler_external_correlation_id_request_kind_t; diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/fwd.h b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/fwd.h index b4c632eafa..91aa551c8c 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/fwd.h +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/fwd.h @@ -176,6 +176,7 @@ typedef enum // NOLINT(performance-enum-size) ///< ::rocprofiler_memory_allocation_operation_t ROCPROFILER_CALLBACK_TRACING_RUNTIME_INITIALIZATION, ///< Callback notifying that a runtime ///< library has been initialized + ROCPROFILER_CALLBACK_TRACING_ROCDECODE_API, ///< rocDecode API Tracing ROCPROFILER_CALLBACK_TRACING_LAST, } rocprofiler_callback_tracing_kind_t; @@ -207,6 +208,7 @@ typedef enum // NOLINT(performance-enum-size) ROCPROFILER_BUFFER_TRACING_RUNTIME_INITIALIZATION, ///< Record indicating a runtime library has ///< been initialized. @see ///< ::rocprofiler_runtime_initialization_operation_t + ROCPROFILER_BUFFER_TRACING_ROCDECODE_API, ///< rocDecode tracing ROCPROFILER_BUFFER_TRACING_LAST, } rocprofiler_buffer_tracing_kind_t; @@ -363,12 +365,13 @@ typedef enum */ typedef enum { - ROCPROFILER_LIBRARY = (1 << 0), - ROCPROFILER_HSA_LIBRARY = (1 << 1), - ROCPROFILER_HIP_LIBRARY = (1 << 2), - ROCPROFILER_MARKER_LIBRARY = (1 << 3), - ROCPROFILER_RCCL_LIBRARY = (1 << 4), - ROCPROFILER_LIBRARY_LAST = ROCPROFILER_RCCL_LIBRARY, + ROCPROFILER_LIBRARY = (1 << 0), + ROCPROFILER_HSA_LIBRARY = (1 << 1), + ROCPROFILER_HIP_LIBRARY = (1 << 2), + ROCPROFILER_MARKER_LIBRARY = (1 << 3), + ROCPROFILER_RCCL_LIBRARY = (1 << 4), + ROCPROFILER_ROCDECODE_LIBRARY = (1 << 5), + ROCPROFILER_LIBRARY_LAST = ROCPROFILER_ROCDECODE_LIBRARY, } rocprofiler_runtime_library_t; /** @@ -384,7 +387,8 @@ typedef enum ROCPROFILER_MARKER_CONTROL_TABLE = (1 << 4), ROCPROFILER_MARKER_NAME_TABLE = (1 << 5), ROCPROFILER_RCCL_TABLE = (1 << 6), - ROCPROFILER_TABLE_LAST = ROCPROFILER_RCCL_TABLE, + ROCPROFILER_ROCDECODE_TABLE = (1 << 7), + ROCPROFILER_TABLE_LAST = ROCPROFILER_ROCDECODE_TABLE, } rocprofiler_intercept_table_t; /** @@ -392,11 +396,12 @@ typedef enum */ typedef enum // NOLINT(performance-enum-size) { - ROCPROFILER_RUNTIME_INITIALIZATION_NONE = 0, ///< Unknown runtime initialization - ROCPROFILER_RUNTIME_INITIALIZATION_HSA, ///< Application loaded HSA runtime - ROCPROFILER_RUNTIME_INITIALIZATION_HIP, ///< Application loaded HIP runtime - ROCPROFILER_RUNTIME_INITIALIZATION_MARKER, ///< Application loaded Marker (ROCTx) runtime - ROCPROFILER_RUNTIME_INITIALIZATION_RCCL, ///< Application loaded RCCL runtime + ROCPROFILER_RUNTIME_INITIALIZATION_NONE = 0, ///< Unknown runtime initialization + ROCPROFILER_RUNTIME_INITIALIZATION_HSA, ///< Application loaded HSA runtime + ROCPROFILER_RUNTIME_INITIALIZATION_HIP, ///< Application loaded HIP runtime + ROCPROFILER_RUNTIME_INITIALIZATION_MARKER, ///< Application loaded Marker (ROCTx) runtime + ROCPROFILER_RUNTIME_INITIALIZATION_RCCL, ///< Application loaded RCCL runtime + ROCPROFILER_RUNTIME_INITIALIZATION_ROCDECODE, ///< Application loaded rocDecode runtime ROCPROFILER_RUNTIME_INITIALIZATION_LAST, } rocprofiler_runtime_initialization_operation_t; diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/rocdecode.h b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/rocdecode.h new file mode 100644 index 0000000000..ed54739191 --- /dev/null +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/rocdecode.h @@ -0,0 +1,27 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include +#include +#include diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/rocdecode/CMakeLists.txt b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/rocdecode/CMakeLists.txt new file mode 100644 index 0000000000..f2528fec3a --- /dev/null +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/rocdecode/CMakeLists.txt @@ -0,0 +1,13 @@ +# +# +# Installation of public rocDecode headers +# +# +set(ROCPROFILER_ROCDECODE_HEADER_FILES api_args.h api_id.h table_id.h) + +install( + FILES ${ROCPROFILER_ROCDECODE_HEADER_FILES} + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/rocprofiler-sdk/rocdecode + COMPONENT development) + +add_subdirectory(details) diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/rocdecode/api_args.h b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/rocdecode/api_args.h new file mode 100644 index 0000000000..d5f5dfd183 --- /dev/null +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/rocdecode/api_args.h @@ -0,0 +1,165 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include +#include + +#if !defined(ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE) +# if defined __has_include +# if __has_include() && __has_include() && __has_include() +# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 1 +# else +# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 0 +# endif +# else +# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 0 +# endif +#endif + +#if ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE > 0 +# include +# include +# include +#else +# include +# include +# include +#endif + +#include + +ROCPROFILER_EXTERN_C_INIT + +// Empty struct has a size of 0 in C but size of 1 in C++. +// This struct is added to the union members which represent +// functions with no arguments to ensure ABI compatibility +typedef struct rocprofiler_rocdecode_api_no_args +{ + char empty; +} rocprofiler_rocdecode_api_no_args; + +typedef union rocprofiler_rocdecode_api_retval_t +{ + int32_t rocDecStatus_retval; + const char* const_charp_retval; +} rocprofiler_rocdecode_api_retval_t; + +typedef union rocprofiler_rocdecode_api_args_t +{ + struct + { + RocdecVideoParser* parser_handle; + RocdecParserParams* params; + } rocDecCreateVideoParser; + + struct + { + RocdecVideoParser parser_handle; + RocdecSourceDataPacket* packet; + } rocDecParseVideoData; + + struct + { + RocdecVideoParser parser_handle; + } rocDecDestroyVideoParser; + + struct + { + rocDecDecoderHandle* decoder_handle; + RocDecoderCreateInfo* decoder_create_info; + } rocDecCreateDecoder; + + struct + { + rocDecDecoderHandle decoder_handle; + } rocDecDestroyDecoder; + + struct + { + RocdecDecodeCaps* decode_caps; + } rocDecGetDecoderCaps; + + struct + { + rocDecDecoderHandle decoder_handle; + RocdecPicParams* pic_params; + } rocDecDecodeFrame; + + struct + { + rocDecDecoderHandle decoder_handle; + int pic_idx; + RocdecDecodeStatus* decode_status; + } rocDecGetDecodeStatus; + + struct + { + rocDecDecoderHandle decoder_handle; + RocdecReconfigureDecoderInfo* reconfig_params; + } rocDecReconfigureDecoder; + + struct + { + rocDecDecoderHandle decoder_handle; + int pic_idx; + void** dev_mem_ptr; + uint32_t* horizontal_pitch; + RocdecProcParams* vid_postproc_params; + } rocDecGetVideoFrame; + struct + { + rocDecStatus rocdec_status; + } rocDecGetErrorName; + +#if ROCDECODE_RUNTIME_API_TABLE_STEP_VERSION >= 1 + struct + { + RocdecBitstreamReader* bs_reader_handle; + const char* input_file_path; + } rocDecCreateBitstreamReader; + struct + { + RocdecBitstreamReader bs_reader_handle; + rocDecVideoCodec* codec_type; + } rocDecGetBitstreamCodecType; + struct + { + RocdecBitstreamReader bs_reader_handle; + int* bit_depth; + } rocDecGetBitstreamBitDepth; + struct + { + RocdecBitstreamReader bs_reader_handle; + uint8_t** pic_data; + int* pic_size; + int64_t* pts; + } rocDecGetBitstreamPicData; + struct + { + RocdecBitstreamReader bs_reader_handle; + } rocDecDestroyBitstreamReader; +#endif +} rocprofiler_rocdecode_api_args_t; + +ROCPROFILER_EXTERN_C_FINI diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/rocdecode/api_id.h b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/rocdecode/api_id.h new file mode 100644 index 0000000000..8ad53182d9 --- /dev/null +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/rocdecode/api_id.h @@ -0,0 +1,56 @@ + + +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include + +/** + * @brief ROCProfiler enumeration of HSA Core API tracing operations + */ +typedef enum // NOLINT(performance-enum-size) +{ + ROCPROFILER_ROCDECODE_API_ID_NONE = -1, + + ROCPROFILER_ROCDECODE_API_ID_rocDecCreateVideoParser = 0, + ROCPROFILER_ROCDECODE_API_ID_rocDecParseVideoData, + ROCPROFILER_ROCDECODE_API_ID_rocDecDestroyVideoParser, + ROCPROFILER_ROCDECODE_API_ID_rocDecCreateDecoder, + ROCPROFILER_ROCDECODE_API_ID_rocDecDestroyDecoder, + ROCPROFILER_ROCDECODE_API_ID_rocDecGetDecoderCaps, + ROCPROFILER_ROCDECODE_API_ID_rocDecDecodeFrame, + ROCPROFILER_ROCDECODE_API_ID_rocDecGetDecodeStatus, + ROCPROFILER_ROCDECODE_API_ID_rocDecReconfigureDecoder, + ROCPROFILER_ROCDECODE_API_ID_rocDecGetVideoFrame, + ROCPROFILER_ROCDECODE_API_ID_rocDecGetErrorName, + +#if ROCDECODE_RUNTIME_API_TABLE_STEP_VERSION >= 1 + ROCPROFILER_ROCDECODE_API_ID_rocDecCreateBitstreamReader, + ROCPROFILER_ROCDECODE_API_ID_rocDecGetBitstreamCodecType, + ROCPROFILER_ROCDECODE_API_ID_rocDecGetBitstreamBitDepth, + ROCPROFILER_ROCDECODE_API_ID_rocDecGetBitstreamPicData, + ROCPROFILER_ROCDECODE_API_ID_rocDecDestroyBitstreamReader, +#endif + ROCPROFILER_ROCDECODE_API_ID_LAST, +} rocprofiler_rocdecode_api_id_t; diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/rocdecode/details/CMakeLists.txt b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/rocdecode/details/CMakeLists.txt new file mode 100644 index 0000000000..7869b7c825 --- /dev/null +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/rocdecode/details/CMakeLists.txt @@ -0,0 +1,13 @@ +# +# +# Installation of public ROCDecode headers +# +# +set(ROCPROFILER_ROCDECODE_DETAILS_HEADER_FILES + rocdecode_api_trace.h rocdecode.h rocparser.h rocdecode_version.h + roc_bitstream_reader.h) + +install( + FILES ${ROCPROFILER_ROCDECODE_DETAILS_HEADER_FILES} + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/rocprofiler-sdk/rocdecode/details + COMPONENT development) diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/rocdecode/details/roc_bitstream_reader.h b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/rocdecode/details/roc_bitstream_reader.h new file mode 100644 index 0000000000..91df84a2a6 --- /dev/null +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/rocdecode/details/roc_bitstream_reader.h @@ -0,0 +1,110 @@ +/* +Copyright (c) 2024 - 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#if !defined(ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE) +# if defined __has_include +# if __has_include() +# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 1 +# else +# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 0 +# endif +# else +# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 0 +# endif +#endif + +#if ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE > 0 +# include +#else +# include +#endif + +/*! + * \file + * \brief The AMD rocBitstreamReader Library. + * + * \defgroup group_roc_bitstream_reader rocDecode Parser: AMD ROCm Video Bitstream Reader API + * \brief AMD The rocBitstreamReader is a toolkit to read picture data from bitstream files for + * decoding on AMD’s GPUs. + */ + +#if defined(__cplusplus) +extern "C" { +#endif /* __cplusplus */ + +/*********************************************************************************/ +//! HANDLE of rocBitstreamReader +//! Used in subsequent API calls after rocDecCreateBitstreamReader +/*********************************************************************************/ +typedef void* RocdecBitstreamReader; + +/************************************************************************************************/ +//! \ingroup group_roc_bitstream_reader +//! \fn rocDecStatus ROCDECAPI rocDecCreateBitstreamReader(RocdecBitstreamReader *bs_reader_handle, +//! const char *input_file_path) Create video bitstream reader object and initialize +/************************************************************************************************/ +extern rocDecStatus ROCDECAPI +rocDecCreateBitstreamReader(RocdecBitstreamReader* bs_reader_handle, const char* input_file_path); + +/************************************************************************************************/ +//! \ingroup group_roc_bitstream_reader +//! \fn rocDecStatus ROCDECAPI rocDecGetBitstreamCodecType(RocdecBitstreamReader bs_reader_handle, +//! rocDecVideoCodec *codec_type) Get the codec type of the bitstream +/************************************************************************************************/ +extern rocDecStatus ROCDECAPI +rocDecGetBitstreamCodecType(RocdecBitstreamReader bs_reader_handle, rocDecVideoCodec* codec_type); + +/************************************************************************************************/ +//! \ingroup group_roc_bitstream_reader +//! \fn rocDecStatus ROCDECAPI rocDecGetBitstreamBitDepth(RocdecBitstreamReader bs_reader_handle, +//! int *bit_depth) Get the bit depth of the bitstream +/************************************************************************************************/ +extern rocDecStatus ROCDECAPI +rocDecGetBitstreamBitDepth(RocdecBitstreamReader bs_reader_handle, int* bit_depth); + +/************************************************************************************************/ +//! \ingroup group_roc_bitstream_reader +//! \fn rocDecStatus ROCDECAPI rocDecGetBitstreamPicData(RocdecBitstreamReader bs_reader_handle, +//! uint8_t **pic_data, int *pic_size, int64_t *pts) Read one unit of picture data from the +//! bitstream. The unit can be a frame or field for AVC/HEVC, a temporal unit for AV1, or a frame +//! (including superframe) for VP9. The picture data unit is pointed by pic_data. The size of the +//! unit is specified by pic_size. The presentation time stamp, if available, is given by pts. +/************************************************************************************************/ +extern rocDecStatus ROCDECAPI +rocDecGetBitstreamPicData(RocdecBitstreamReader bs_reader_handle, + uint8_t** pic_data, + int* pic_size, + int64_t* pts); + +/************************************************************************************************/ +//! \ingroup group_roc_bitstream_reader +//! \fn rocDecStatus ROCDECAPI rocDecDestroyBitstreamReader(RocdecBitstreamReader bs_reader_handle) +//! Destroy the video parser object +/************************************************************************************************/ +extern rocDecStatus ROCDECAPI +rocDecDestroyBitstreamReader(RocdecBitstreamReader bs_reader_handle); + +#if defined(__cplusplus) +} +#endif /* __cplusplus */ diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/rocdecode/details/rocdecode.h b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/rocdecode/details/rocdecode.h new file mode 100644 index 0000000000..446fad6924 --- /dev/null +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/rocdecode/details/rocdecode.h @@ -0,0 +1,1888 @@ +/* +Copyright (c) 2024 - 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#ifndef ROCDECAPI +# if defined(_WIN32) +# define ROCDECAPI __stdcall // for future: only linux is supported in this version +# else +# define ROCDECAPI +# endif +#endif + +#include "hip/hip_runtime.h" +#if !defined(ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE) +# if defined __has_include +# if __has_include() +# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 1 +# else +# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 0 +# endif +# else +# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 0 +# endif +#endif + +#if ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE > 0 +# include +#else +# include +#endif +/*! + * \file + * \brief The AMD rocDecode Library. + * + * \defgroup group_amd_rocdecode rocDecode: AMD ROCm Decode API + * \brief AMD The rocDecode is a toolkit to decode videos and images using a hardware-accelerated + * video decoder on AMD’s GPUs. + */ + +#if defined(__cplusplus) +extern "C" { +#endif // __cplusplus + +/*********************************************************************************/ +//! HANDLE pf rocDecDecoder +//! Used in subsequent API calls after rocDecCreateDecoder +/*********************************************************************************/ + +typedef void* rocDecDecoderHandle; + +/*********************************************************************************/ +//! \enum rocDecStatus +//! \ingroup group_amd_rocdecode +//! rocDecoder return status enums +//! These enums are used in all API calls to rocDecoder +/*********************************************************************************/ +typedef enum rocDecStatus_enum +{ + ROCDEC_DEVICE_INVALID = -1, + ROCDEC_CONTEXT_INVALID = -2, + ROCDEC_RUNTIME_ERROR = -3, + ROCDEC_OUTOF_MEMORY = -4, + ROCDEC_INVALID_PARAMETER = -5, + ROCDEC_NOT_IMPLEMENTED = -6, + ROCDEC_NOT_INITIALIZED = -7, + ROCDEC_NOT_SUPPORTED = -8, + ROCDEC_SUCCESS = 0, +} rocDecStatus; + +/*********************************************************************************/ +//! \enum rocDecodeVideoCodec +//! \ingroup group_amd_rocdecode +//! Video codec enums +//! These enums are used in ROCDECODECREATEINFO and ROCDECODEVIDDECODECAPS structures +/*********************************************************************************/ +typedef enum rocDecVideoCodec_enum +{ + rocDecVideoCodec_MPEG1 = 0, /**< MPEG1 */ + rocDecVideoCodec_MPEG2, /**< MPEG2 */ + rocDecVideoCodec_MPEG4, /**< MPEG4 */ + rocDecVideoCodec_AVC, /**< AVC/H264 */ + rocDecVideoCodec_HEVC, /**< HEVC */ + rocDecVideoCodec_AV1, /**< AV1 */ + rocDecVideoCodec_VP8, /**< VP8 */ + rocDecVideoCodec_VP9, /**< VP9 */ + rocDecVideoCodec_JPEG, /**< JPEG */ + rocDecVideoCodec_NumCodecs, /**< Max codecs */ + // Uncompressed YUV + rocDecVideoCodec_YUV420 = + (('I' << 24) | ('Y' << 16) | ('U' << 8) | ('V')), /**< Y,U,V (4:2:0) */ + rocDecVideoCodec_YV12 = + (('Y' << 24) | ('V' << 16) | ('1' << 8) | ('2')), /**< Y,V,U (4:2:0) */ + rocDecVideoCodec_NV12 = + (('N' << 24) | ('V' << 16) | ('1' << 8) | ('2')), /**< Y,UV (4:2:0) */ + rocDecVideoCodec_YUYV = + (('Y' << 24) | ('U' << 16) | ('Y' << 8) | ('V')), /**< YUYV/YUY2 (4:2:2) */ + rocDecVideoCodec_UYVY = (('U' << 24) | ('Y' << 16) | ('V' << 8) | ('Y')) /**< UYVY (4:2:2) */ +} rocDecVideoCodec; + +/*********************************************************************************/ +//! \enum rocDecVideoSurfaceFormat +//! \ingroup group_amd_rocdecode +//! Video surface format enums used for output format of decoded output +//! These enums are used in RocDecoderCreateInfo structure +/*********************************************************************************/ +typedef enum rocDecVideoSurfaceFormat_enum +{ + rocDecVideoSurfaceFormat_NV12 = + 0, /**< Semi-Planar YUV [Y plane followed by interleaved UV plane] */ + rocDecVideoSurfaceFormat_P016 = + 1, /**< 16 bit Semi-Planar YUV [Y plane followed by interleaved UV plane]. + Can be used for 10 bit(6LSB bits 0), 12 bit (4LSB bits 0) */ + rocDecVideoSurfaceFormat_YUV444 = 2, /**< Planar YUV [Y plane followed by U and V planes] */ + rocDecVideoSurfaceFormat_YUV444_16Bit = + 3, /**< 16 bit Planar YUV [Y plane followed by U and V planes]. + Can be used for 10 bit(6LSB bits 0), 12 bit (4LSB bits 0) */ + rocDecVideoSurfaceFormat_YUV420 = + 4, /**< Planar YUV [Y plane followed by U and V planes in 4:2:0 format] */ + rocDecVideoSurfaceFormat_YUV420_16Bit = + 5, /**< 16 bit Planar YUV [Y plane followed by U and V planes in ]. + Can be used for 10 bit(LSB), 12 bit (LSB) */ +} rocDecVideoSurfaceFormat; + +/**************************************************************************************************************/ +//! \enum rocDecVideoChromaFormat +//! \ingroup group_amd_rocdecode +//! Chroma format enums +//! These enums are used in ROCDCODECREATEINFO and RocdecDecodeCaps structures +/**************************************************************************************************************/ +typedef enum rocDecVideoChromaFormat_enum +{ + rocDecVideoChromaFormat_Monochrome = 0, /**< MonoChrome */ + rocDecVideoChromaFormat_420, /**< YUV 4:2:0 */ + rocDecVideoChromaFormat_422, /**< YUV 4:2:2 */ + rocDecVideoChromaFormat_444 /**< YUV 4:4:4 */ +} rocDecVideoChromaFormat; + +/*************************************************************************/ +//! \enum rocDecDecodeStatus +//! \ingroup group_amd_rocdecode +//! Decode status enums +//! These enums are used in RocdecGetDecodeStatus structure +/*************************************************************************/ +typedef enum rocDecodeStatus_enum +{ + rocDecodeStatus_Invalid = 0, // Decode status is not valid + rocDecodeStatus_InProgress = 1, // Decode is in progress + rocDecodeStatus_Success = 2, // Decode is completed without any errors + // 3 to 7 enums are reserved for future use + rocDecodeStatus_Error = 8, // Decode is completed with an error (error is not concealed) + rocDecodeStatus_Error_Concealed = + 9, // Decode is completed with an error and error is concealed + rocDecodeStatus_Displaying = 10, // Decode is completed, displaying in progress +} rocDecDecodeStatus; + +/**************************************************************************************************************/ +//! \struct RocdecDecodeCaps; +//! \ingroup group_amd_rocdecode +//! This structure is used in rocDecGetDecoderCaps API +/**************************************************************************************************************/ +typedef struct _RocdecDecodeCaps +{ + uint8_t device_id; /**< IN: the device id for which query the decode capability 0 for the first + device, 1 for the second device on the system, etc.*/ + rocDecVideoCodec codec_type; /**< IN: rocDecVideoCodec_XXX */ + rocDecVideoChromaFormat chroma_format; /**< IN: rocDecVideoChromaFormat_XXX */ + uint32_t bit_depth_minus_8; /**< IN: The Value "BitDepth minus 8" */ + uint32_t reserved_1[3]; /**< Reserved for future use - set to zero */ + uint8_t is_supported; /**< OUT: 1 if codec supported, 0 if not supported */ + uint8_t num_decoders; /**< OUT: Number of Decoders that can support IN params */ + uint16_t output_format_mask; /**< OUT: each bit represents corresponding + rocDecVideoSurfaceFormat enum */ + uint32_t max_width; /**< OUT: Max supported coded width in pixels */ + uint32_t max_height; /**< OUT: Max supported coded height in pixels */ + uint16_t min_width; /**< OUT: Min supported coded width in pixels */ + uint16_t min_height; /**< OUT: Min supported coded height in pixels */ + uint32_t reserved_2[6]; /**< Reserved for future use - set to zero */ +} RocdecDecodeCaps; + +/**************************************************************************************************************/ +//! \struct RocDecoderCreateInfo +//! \ingroup group_amd_rocdecode +//! This structure is used in rocDecCreateDecoder API +/**************************************************************************************************************/ +typedef struct _RocDecoderCreateInfo +{ + uint8_t device_id; /**< IN: the device id for which a decoder should be created + 0 for the first device, 1 for the second device on the system, etc.*/ + uint32_t width; /**< IN: Coded sequence width in pixels */ + uint32_t height; /**< IN: Coded sequence height in pixels */ + uint32_t num_decode_surfaces; /**< IN: Maximum number of internal decode surfaces */ + rocDecVideoCodec codec_type; /**< IN: rocDecVideoCodec_XXX */ + rocDecVideoChromaFormat chroma_format; /**< IN: rocDecVideoChromaFormat_XXX */ + uint32_t bit_depth_minus_8; /**< IN: The value "BitDepth minus 8" */ + uint32_t + intra_decode_only; /**< IN: Set 1 only if video has all intra frames (default value is 0). + This will optimize video memory for Intra frames only decoding. The + support is limited to specific codecs - AVC/H264, HEVC, VP9, the flag + will be ignored for codecs which are not supported. However decoding + might fail if the flag is enabled in case of supported codecs for + regular bit streams having P and/or B frames. */ + uint32_t max_width; /**< IN: Coded sequence max width in pixels used with reconfigure Decoder */ + uint32_t + max_height; /**< IN: Coded sequence max height in pixels used with reconfigure Decoder */ + struct + { + int16_t left; + int16_t top; + int16_t right; + int16_t bottom; + } display_rect; /**< IN: area of the frame that should be displayed */ + rocDecVideoSurfaceFormat output_format; /**< IN: rocDecVideoSurfaceFormat_XXX */ + uint32_t target_width; /**< IN: Post-processed output width (Should be aligned to 2) */ + uint32_t target_height; /**< IN: Post-processed output height (Should be aligned to 2) */ + uint32_t + num_output_surfaces; /**< IN: Maximum number of output surfaces simultaneously mapped */ + struct + { + int16_t left; + int16_t top; + int16_t right; + int16_t bottom; + } target_rect; /**< IN: (for future use) target rectangle in the output frame (for aspect ratio + conversion) + if a null rectangle is specified, {0,0,target_width,target_height} will + be used*/ + uint32_t reserved_2[4]; /**< Reserved for future use - set to zero */ +} RocDecoderCreateInfo; + +/*********************************************************************************************************/ +//! \struct RocdecDecodeStatus +//! \ingroup group_amd_rocdecode +//! Struct for reporting decode status. +//! This structure is used in RocdecGetDecodeStatus API. +/*********************************************************************************************************/ +typedef struct _RocdecDecodeStatus +{ + rocDecDecodeStatus decode_status; + uint32_t reserved[31]; + void* p_reserved[8]; +} RocdecDecodeStatus; + +/****************************************************/ +//! \struct RocdecReconfigureDecoderInfo +//! \ingroup group_amd_rocdecode +//! Struct for decoder reset +//! This structure is used in rocDecReconfigureDecoder() API +/****************************************************/ +typedef struct _RocdecReconfigureDecoderInfo +{ + uint32_t width; /**< IN: Coded sequence width in pixels, MUST be < = max_width defined at + RocDecoderCreateInfo */ + uint32_t height; /**< IN: Coded sequence height in pixels, MUST be < = max_height defined at + RocDecoderCreateInfo */ + uint32_t target_width; /**< IN: Post processed output width */ + uint32_t target_height; /**< IN: Post Processed output height */ + uint32_t num_decode_surfaces; /**< IN: Maximum number of internal decode surfaces */ + uint32_t reserved_1[12]; /**< Reserved for future use. Set to Zero */ + struct + { + int16_t left; + int16_t top; + int16_t right; + int16_t bottom; + } display_rect; /**< IN: area of the frame that should be displayed */ + struct + { + int16_t left; + int16_t top; + int16_t right; + int16_t bottom; + } target_rect; /**< IN: (for future use) target rectangle in the output frame (for aspect ratio + conversion) + if a null rectangle is specified, {0,0,target_width,target_height} will be + used */ + uint32_t reserved_2[11]; /**< Reserved for future use. Set to Zero */ +} RocdecReconfigureDecoderInfo; + +/*********************************************************/ +//! \struct RocdecAvcPicture +//! \ingroup group_amd_rocdecode +//! AVC/H.264 Picture Entry +//! This structure is used in RocdecAvcPicParams structure +/*********************************************************/ +typedef struct _RocdecAvcPicture +{ + int pic_idx; /**< picture index of reference frame */ + uint32_t frame_idx; /**< frame_num(int16_t-term) or LongTermFrameIdx(long-term) */ + uint32_t flags; /**< See below for definitions */ + int32_t top_field_order_cnt; /**< field order count of top field */ + int32_t bottom_field_order_cnt; /**< field order count of bottom field */ + uint32_t reserved[4]; +} RocdecAvcPicture; + +/* flags in RocdecAvcPicture could be OR of the following */ +#define RocdecAvcPicture_FLAGS_INVALID 0x00000001 +#define RocdecAvcPicture_FLAGS_TOP_FIELD 0x00000002 +#define RocdecAvcPicture_FLAGS_BOTTOM_FIELD 0x00000004 +#define RocdecAvcPicture_FLAGS_SHORT_TERM_REFERENCE 0x00000008 +#define RocdecAvcPicture_FLAGS_LONG_TERM_REFERENCE 0x00000010 +#define RocdecAvcPicture_FLAGS_NON_EXISTING 0x00000020 + +/*********************************************************/ +//! \struct RocdecHevcPicture +//! \ingroup group_amd_rocdecode +//! HEVC Picture Entry +//! This structure is used in RocdecHevcPicParams structure +/*********************************************************/ +typedef struct _RocdecHevcPicture +{ + int pic_idx; /**< reconstructed picture surface ID */ + /** \brief picture order count. + //! \ingroup group_amd_rocdecode + * in HEVC, POCs for top and bottom fields of same picture should + * take different values. + */ + int poc; + uint32_t flags; /**< See below for definitions */ + uint32_t reserved[4]; /**< reserved for future; must be zero */ +} RocdecHevcPicture; + +/* flags in RocdecHevcPicture could be OR of the following */ +#define RocdecHevcPicture_INVALID 0x00000001 +/** \brief indication of interlace scan picture. + * should take same value for all the pictures in sequence. + */ +#define RocdecHevcPicture_FIELD_PIC 0x00000002 +/** \brief polarity of the field picture. + * top field takes even lines of buffer surface. + * bottom field takes odd lines of buffer surface. + */ +#define RocdecHevcPicture_BOTTOM_FIELD 0x00000004 +/** \brief Long term reference picture */ +#define RocdecHevcPicture_LONG_TERM_REFERENCE 0x00000008 +/** + * RocdecHevcPicture_ST_CURR_BEFORE, RocdecHevcPicture_RPS_ST_CURR_AFTER + * and RocdecHevcPicture_RPS_LT_CURR of any picture in ReferenceFrames[] should + * be exclusive. No more than one of them can be set for any picture. + * Sum of NumPocStCurrBefore, NumPocStCurrAfter and NumPocLtCurr + * equals NumPocTotalCurr, which should be equal to or smaller than 8. + * Application should provide valid values for both int16_t format and long format. + * The pictures in DPB with any of these three flags turned on are referred by + * the current picture. + */ +/** \brief RefPicSetStCurrBefore of HEVC spec variable + * Number of ReferenceFrames[] entries with this bit set equals + * NumPocStCurrBefore. + */ +#define RocdecHevcPicture_RPS_ST_CURR_BEFORE 0x00000010 +/** \brief RefPicSetStCurrAfter of HEVC spec variable + * Number of ReferenceFrames[] entries with this bit set equals + * NumPocStCurrAfter. + */ +#define RocdecHevcPicture_RPS_ST_CURR_AFTER 0x00000020 +/** \brief RefPicSetLtCurr of HEVC spec variable + * Number of ReferenceFrames[] entries with this bit set equals + * NumPocLtCurr. + */ +#define RocdecHevcPicture_RPS_LT_CURR 0x00000040 + +/***********************************************************/ +//! \struct RocdecJPEGPicParams placeholder +//! \ingroup group_amd_rocdecode +//! JPEG picture parameters +//! This structure is used in RocdecPicParams structure +/***********************************************************/ +typedef struct _RocdecJPEGPicParams +{ + int reserved; +} RocdecJPEGPicParams; + +/***********************************************************/ +//! \struct RocdecMpeg2QMatrix +//! \ingroup group_amd_rocdecode +//! MPEG2 QMatrix +//! This structure is used in _RocdecMpeg2PicParams structure +/***********************************************************/ +typedef struct _RocdecMpeg2QMatrix +{ + int32_t load_intra_quantiser_matrix; + int32_t load_non_intra_quantiser_matrix; + int32_t load_chroma_intra_quantiser_matrix; + int32_t load_chroma_non_intra_quantiser_matrix; + uint8_t intra_quantiser_matrix[64]; + uint8_t non_intra_quantiser_matrix[64]; + uint8_t chroma_intra_quantiser_matrix[64]; + uint8_t chroma_non_intra_quantiser_matrix[64]; +} RocdecMpeg2QMatrix; + +/***********************************************************/ +//! \struct RocdecMpeg2PicParams +//! \ingroup group_amd_rocdecode +//! MPEG2 picture parameters +//! This structure is used in RocdecMpeg2PicParams structure +/***********************************************************/ +typedef struct _RocdecMpeg2PicParams +{ + uint16_t horizontal_size; + uint16_t vertical_size; + uint32_t forward_reference_pic; // surface_id for forward reference + uint32_t backward_reference_picture; // surface_id for backward reference + /* meanings of the following fields are the same as in the standard */ + int32_t picture_coding_type; + int32_t f_code; /* pack all four fcode into this */ + union + { + struct + { + uint32_t intra_dc_precision : 2; + uint32_t picture_structure : 2; + uint32_t top_field_first : 1; + uint32_t frame_pred_frame_dct : 1; + uint32_t concealment_motion_vectors : 1; + uint32_t q_scale_type : 1; + uint32_t intra_vlc_format : 1; + uint32_t alternate_scan : 1; + uint32_t repeat_first_field : 1; + uint32_t progressive_frame : 1; + uint32_t is_first_field : 1; // indicate whether the current field is the first field + // for field picture + } bits; + uint32_t value; + } picture_coding_extension; + + RocdecMpeg2QMatrix q_matrix; + uint32_t reserved[4]; +} RocdecMpeg2PicParams; + +/***********************************************************/ +//! \struct RocdecVc1PicParams placeholder +//! \ingroup group_amd_rocdecode +//! JPEG picture parameters +//! This structure is used in RocdecVc1PicParams structure +/***********************************************************/ +typedef struct _RocdecVc1PicParams +{ + int reserved; +} RocdecVc1PicParams; + +/***********************************************************/ +//! \struct RocdecAvcPicParams +//! \ingroup group_amd_rocdecode +//! AVC picture parameters +//! This structure is used in RocdecAvcPicParams structure +//! This structure is configured to be the same as VA-API VAPictureParameterBufferH264 structure +/***********************************************************/ +typedef struct _RocdecAvcPicParams +{ + RocdecAvcPicture curr_pic; + RocdecAvcPicture ref_frames[16]; /* in DPB */ + uint16_t picture_width_in_mbs_minus1; + uint16_t picture_height_in_mbs_minus1; + uint8_t bit_depth_luma_minus8; + uint8_t bit_depth_chroma_minus8; + uint8_t num_ref_frames; + union + { + struct + { + uint32_t chroma_format_idc : 2; + uint32_t residual_colour_transform_flag : 1; + uint32_t gaps_in_frame_num_value_allowed_flag : 1; + uint32_t frame_mbs_only_flag : 1; + uint32_t mb_adaptive_frame_field_flag : 1; + uint32_t direct_8x8_inference_flag : 1; + uint32_t MinLumaBiPredSize8x8 : 1; /* see A.3.3.2 */ + uint32_t log2_max_frame_num_minus4 : 4; + uint32_t pic_order_cnt_type : 2; + uint32_t log2_max_pic_order_cnt_lsb_minus4 : 4; + uint32_t delta_pic_order_always_zero_flag : 1; + } bits; + uint32_t value; + } seq_fields; + + // FMO/ASO + uint8_t num_slice_groups_minus1; + uint8_t slice_group_map_type; + uint16_t slice_group_change_rate_minus1; + int8_t pic_init_qp_minus26; + int8_t pic_init_qs_minus26; + int8_t chroma_qp_index_offset; + int8_t second_chroma_qp_index_offset; + union + { + struct + { + uint32_t entropy_coding_mode_flag : 1; + uint32_t weighted_pred_flag : 1; + uint32_t weighted_bipred_idc : 2; + uint32_t transform_8x8_mode_flag : 1; + uint32_t field_pic_flag : 1; + uint32_t constrained_intra_pred_flag : 1; + uint32_t pic_order_present_flag : 1; + uint32_t deblocking_filter_control_present_flag : 1; + uint32_t redundant_pic_cnt_present_flag : 1; + uint32_t reference_pic_flag : 1; /* nal_ref_idc != 0 */ + } bits; + uint32_t value; + } pic_fields; + uint16_t frame_num; + + uint32_t reserved[8]; +} RocdecAvcPicParams; + +/***********************************************************/ +//! \struct RocdecAvcSliceParams +//! \ingroup group_amd_rocdecode +//! AVC slice parameter buffer +//! This structure is configured to be the same as VA-API VASliceParameterBufferH264 structure +/***********************************************************/ +typedef struct _RocdecAvcSliceParams +{ + uint32_t slice_data_size; // slice size in bytes + uint32_t slice_data_offset; // byte offset of the current slice in the slice data buffer + uint32_t slice_data_flag; /* see VA_SLICE_DATA_FLAG_XXX defintions */ + /** + * \brief Bit offset from NAL Header Unit to the begining of slice_data(). + * + * This bit offset is relative to and includes the NAL unit byte + * and represents the number of bits parsed in the slice_header() + * after the removal of any emulation prevention bytes in + * there. However, the slice data buffer passed to the hardware is + * the original bitstream, thus including any emulation prevention + * bytes. + */ + uint16_t slice_data_bit_offset; + uint16_t first_mb_in_slice; + uint8_t slice_type; + uint8_t direct_spatial_mv_pred_flag; + uint8_t num_ref_idx_l0_active_minus1; + uint8_t num_ref_idx_l1_active_minus1; + uint8_t cabac_init_idc; + int8_t slice_qp_delta; + uint8_t disable_deblocking_filter_idc; + int8_t slice_alpha_c0_offset_div2; + int8_t slice_beta_offset_div2; + RocdecAvcPicture ref_pic_list_0[32]; // 8.2.4.2 + RocdecAvcPicture ref_pic_list_1[32]; // 8.2.4.2 + uint8_t luma_log2_weight_denom; + uint8_t chroma_log2_weight_denom; + uint8_t luma_weight_l0_flag; + int16_t luma_weight_l0[32]; + int16_t luma_offset_l0[32]; + uint8_t chroma_weight_l0_flag; + int16_t chroma_weight_l0[32][2]; + int16_t chroma_offset_l0[32][2]; + uint8_t luma_weight_l1_flag; + int16_t luma_weight_l1[32]; + int16_t luma_offset_l1[32]; + uint8_t chroma_weight_l1_flag; + int16_t chroma_weight_l1[32][2]; + int16_t chroma_offset_l1[32][2]; + uint32_t reserved[4]; +} RocdecAvcSliceParams; + +/***********************************************************/ +//! \struct RocdecAvcIQMatrix +//! \ingroup group_amd_rocdecode +//! AVC Inverse Quantization Matrix +//! This structure is configured to be the same as VA-API VAIQMatrixBufferH264 structure +/***********************************************************/ +typedef struct _RocdecAvcIQMatrix +{ + /** \brief 4x4 scaling list, in raster scan order. */ + uint8_t scaling_list_4x4[6][16]; + /** \brief 8x8 scaling list, in raster scan order. */ + uint8_t scaling_list_8x8[2][64]; + /** \brief Reserved bytes for future use, must be zero */ + uint32_t reserved[4]; +} RocdecAvcIQMatrix; + +/***********************************************************/ +//! \struct RocdecHevcPicParams +//! \ingroup group_amd_rocdecode +//! HEVC picture parameters +//! This structure is used in RocdecHevcPicParams structure +/***********************************************************/ +typedef struct _RocdecHevcPicParams +{ + RocdecHevcPicture curr_pic; + RocdecHevcPicture ref_frames[15]; /* reference frame list in DPB */ + uint16_t picture_width_in_luma_samples; + uint16_t picture_height_in_luma_samples; + union + { + struct + { + /** following flags have same syntax and semantic as those in HEVC spec */ + uint32_t chroma_format_idc : 2; + uint32_t separate_colour_plane_flag : 1; + uint32_t pcm_enabled_flag : 1; + uint32_t scaling_list_enabled_flag : 1; + uint32_t transform_skip_enabled_flag : 1; + uint32_t amp_enabled_flag : 1; + uint32_t strong_intra_smoothing_enabled_flag : 1; + uint32_t sign_data_hiding_enabled_flag : 1; + uint32_t constrained_intra_pred_flag : 1; + uint32_t cu_qp_delta_enabled_flag : 1; + uint32_t weighted_pred_flag : 1; + uint32_t weighted_bipred_flag : 1; + uint32_t transquant_bypass_enabled_flag : 1; + uint32_t tiles_enabled_flag : 1; + uint32_t entropy_coding_sync_enabled_flag : 1; + uint32_t pps_loop_filter_across_slices_enabled_flag : 1; + uint32_t loop_filter_across_tiles_enabled_flag : 1; + uint32_t pcm_loop_filter_disabled_flag : 1; + /** set based on sps_max_num_reorder_pics of current temporal layer. */ + uint32_t no_pic_reordering_flag : 1; + /** picture has no B slices */ + uint32_t no_bi_pred_flag : 1; + uint32_t reserved_bits : 11; + } bits; + uint32_t value; + } pic_fields; + + /** SPS fields: the following parameters have same syntax with those in HEVC spec */ + uint8_t sps_max_dec_pic_buffering_minus1; /**< IN: DPB size for current temporal layer */ + uint8_t bit_depth_luma_minus8; + uint8_t bit_depth_chroma_minus8; + uint8_t pcm_sample_bit_depth_luma_minus1; + uint8_t pcm_sample_bit_depth_chroma_minus1; + uint8_t log2_min_luma_coding_block_size_minus3; + uint8_t log2_diff_max_min_luma_coding_block_size; + uint8_t log2_min_transform_block_size_minus2; + uint8_t log2_diff_max_min_transform_block_size; + uint8_t log2_min_pcm_luma_coding_block_size_minus3; + uint8_t log2_diff_max_min_pcm_luma_coding_block_size; + uint8_t max_transform_hierarchy_depth_intra; + uint8_t max_transform_hierarchy_depth_inter; + int8_t init_qp_minus26; + uint8_t diff_cu_qp_delta_depth; + int8_t pps_cb_qp_offset; + int8_t pps_cr_qp_offset; + uint8_t log2_parallel_merge_level_minus2; + uint8_t num_tile_columns_minus1; + uint8_t num_tile_rows_minus1; + /** + * when uniform_spacing_flag equals 1, application should populate + * column_width_minus[], and row_height_minus1[] with approperiate values. + */ + uint16_t column_width_minus1[19]; + uint16_t row_height_minus1[21]; + + union + { + struct + { + /** following parameters have same syntax with those in HEVC spec */ + uint32_t lists_modification_present_flag : 1; + uint32_t long_term_ref_pics_present_flag : 1; + uint32_t sps_temporal_mvp_enabled_flag : 1; + uint32_t cabac_init_present_flag : 1; + uint32_t output_flag_present_flag : 1; + uint32_t dependent_slice_segments_enabled_flag : 1; + uint32_t pps_slice_chroma_qp_offsets_present_flag : 1; + uint32_t sample_adaptive_offset_enabled_flag : 1; + uint32_t deblocking_filter_override_enabled_flag : 1; + uint32_t pps_disable_deblocking_filter_flag : 1; + uint32_t slice_segment_header_extension_present_flag : 1; + + /** current picture with NUT between 16 and 21 inclusive */ + uint32_t rap_pic_flag : 1; + /** current picture with NUT between 19 and 20 inclusive */ + uint32_t idr_pic_flag : 1; + /** current picture has only intra slices */ + uint32_t intra_pic_flag : 1; + + uint32_t reserved_bits : 18; + } bits; + uint32_t value; + } slice_parsing_fields; + + /** following parameters have same syntax with those in HEVC spec */ + uint8_t log2_max_pic_order_cnt_lsb_minus4; + uint8_t num_short_term_ref_pic_sets; + uint8_t num_long_term_ref_pic_sps; + uint8_t num_ref_idx_l0_default_active_minus1; + uint8_t num_ref_idx_l1_default_active_minus1; + int8_t pps_beta_offset_div2; + int8_t pps_tc_offset_div2; + uint8_t num_extra_slice_header_bits; + /** + * \brief number of bits that structure + * short_term_ref_pic_set( num_short_term_ref_pic_sets ) takes in slice + * segment header when short_term_ref_pic_set_sps_flag equals 0. + * if short_term_ref_pic_set_sps_flag equals 1, the value should be 0. + * the bit count is calculated after emulation prevention bytes are removed + * from bit streams. + * This variable is used for accelorater to skip parsing the + * short_term_ref_pic_set( num_short_term_ref_pic_sets ) structure. + */ + uint32_t st_rps_bits; + uint32_t reserved[8]; +} RocdecHevcPicParams; + +/***********************************************************/ +//! \struct RocdecHevcSliceParams +//! \ingroup group_amd_rocdecode +//! HEVC slice parameters +//! This structure is used in RocdecPicParams structure +/***********************************************************/ +typedef struct _RocdecHevcSliceParams +{ + /** \brief Number of bytes in the slice data buffer for this slice + * counting from and including NAL unit header. + */ + uint32_t slice_data_size; + /** \brief The offset to the NAL unit header for this slice */ + uint32_t slice_data_offset; + /** \brief Slice data buffer flags. See \c VA_SLICE_DATA_FLAG_XXX. */ + uint32_t slice_data_flag; + /** + * \brief Byte offset from NAL unit header to the begining of slice_data(). + * + * This byte offset is relative to and includes the NAL unit header + * and represents the number of bytes parsed in the slice_header() + * after the removal of any emulation prevention bytes in + * there. However, the slice data buffer passed to the hardware is + * the original bitstream, thus including any emulation prevention + * bytes. + */ + uint32_t slice_data_byte_offset; + /** HEVC syntax element. */ + uint32_t slice_segment_address; + /** \brief index into ReferenceFrames[] + * ref_pic_list[0][] corresponds to RefPicList0[] of HEVC variable. + * ref_pic_list[1][] corresponds to RefPicList1[] of HEVC variable. + * value range [0..14, 0xFF], where 0xFF indicates invalid entry. + */ + uint8_t ref_pic_list[2][15]; + union + { + uint32_t value; + struct + { + /** current slice is last slice of picture. */ + uint32_t last_slice_of_pic : 1; + /** HEVC syntax element. */ + uint32_t dependent_slice_segment_flag : 1; + uint32_t slice_type : 2; + uint32_t color_plane_id : 2; + uint32_t slice_sao_luma_flag : 1; + uint32_t slice_sao_chroma_flag : 1; + uint32_t mvd_l1_zero_flag : 1; + uint32_t cabac_init_flag : 1; + uint32_t slice_temporal_mvp_enabled_flag : 1; + uint32_t slice_deblocking_filter_disabled_flag : 1; + uint32_t collocated_from_l0_flag : 1; + uint32_t slice_loop_filter_across_slices_enabled_flag : 1; + uint32_t reserved : 18; + } fields; + } long_slice_flags; + + /** HEVC syntax element. */ + uint8_t collocated_ref_idx; + uint8_t num_ref_idx_l0_active_minus1; + uint8_t num_ref_idx_l1_active_minus1; + int8_t slice_qp_delta; + int8_t slice_cb_qp_offset; + int8_t slice_cr_qp_offset; + int8_t slice_beta_offset_div2; + int8_t slice_tc_offset_div2; + uint8_t luma_log2_weight_denom; + int8_t delta_chroma_log2_weight_denom; + int8_t delta_luma_weight_l0[15]; + int8_t luma_offset_l0[15]; + int8_t delta_chroma_weight_l0[15][2]; + /** corresponds to HEVC spec variable of the same name. */ + int8_t chroma_offset_l0[15][2]; + /** HEVC syntax element. */ + int8_t delta_luma_weight_l1[15]; + int8_t luma_offset_l1[15]; + int8_t delta_chroma_weight_l1[15][2]; + /** corresponds to HEVC spec variable of the same name. */ + int8_t chroma_offset_l1[15][2]; + /** HEVC syntax element. */ + uint8_t five_minus_max_num_merge_cand; + uint16_t num_entry_point_offsets; + uint16_t entry_offset_to_subset_array; + /** \brief Number of emulation prevention bytes in slice header. */ + uint16_t slice_data_num_emu_prevn_bytes; + + uint32_t reserved[2]; +} RocdecHevcSliceParams; + +/***********************************************************/ +//! \struct RocdecHevcIQMatrix +//! \ingroup group_amd_rocdecode +//! HEVC IQMatrix +//! This structure is sent once per frame, +//! and only when scaling_list_enabled_flag = 1. +//! When sps_scaling_list_data_present_flag = 0, app still +//! needs to send in this structure with default matrix values. +//! This structure is used in RocdecHevcQMatrix structure +/***********************************************************/ +typedef struct _RocdecHevcIQMatrix +{ + /** + * \brief 4x4 scaling, + * correspongs i = 0, MatrixID is in the range of 0 to 5, + * inclusive. And j is in the range of 0 to 15, inclusive. + */ + uint8_t scaling_list_4x4[6][16]; + /** + * \brief 8x8 scaling, + * correspongs i = 1, MatrixID is in the range of 0 to 5, + * inclusive. And j is in the range of 0 to 63, inclusive. + */ + uint8_t scaling_list_8x8[6][64]; + /** + * \brief 16x16 scaling, + * correspongs i = 2, MatrixID is in the range of 0 to 5, + * inclusive. And j is in the range of 0 to 63, inclusive. + */ + uint8_t scaling_list_16x16[6][64]; + /** + * \brief 32x32 scaling, + * correspongs i = 3, MatrixID is in the range of 0 to 1, + * inclusive. And j is in the range of 0 to 63, inclusive. + */ + uint8_t scaling_list_32x32[2][64]; + /** + * \brief DC values of the 16x16 scaling lists, + * corresponds to HEVC spec syntax + * scaling_list_dc_coef_minus8[ sizeID - 2 ][ matrixID ] + 8 + * with sizeID = 2 and matrixID in the range of 0 to 5, inclusive. + */ + uint8_t scaling_list_dc_16x16[6]; + /** + * \brief DC values of the 32x32 scaling lists, + * corresponds to HEVC spec syntax + * scaling_list_dc_coef_minus8[ sizeID - 2 ][ matrixID ] + 8 + * with sizeID = 3 and matrixID in the range of 0 to 1, inclusive. + */ + uint8_t scaling_list_dc_32x32[2]; + uint32_t reserved[4]; +} RocdecHevcIQMatrix; + +/***********************************************************/ +//! \struct RocdecVp9PicParams +//! \ingroup group_amd_rocdecode +//! VP9 picture parameters +//! This structure is configured to be the same as VA-API VADecPictureParameterBufferVP9 structure. +/***********************************************************/ +typedef struct _RocdecVp9PicParams +{ + /** \brief picture width + * Picture original resolution. The value may not be multiple of 8. + */ + uint16_t frame_width; + /** \brief picture height + * Picture original resolution. The value may not be multiple of 8. + */ + uint16_t frame_height; + + /** \brief Surface indices of reference frames in DPB. + * + * Each entry of the list specifies the surface index of the picture + * that is referred by current picture or will be referred by any future + * picture. + * Application who calls this API should update this list based on the + * refreshing information from VP9 bitstream. + */ + uint32_t reference_frames[8]; + + union + { + struct + { + /** \brief flags for current picture + * same syntax and semantic as those in VP9 code + */ + uint32_t subsampling_x : 1; + uint32_t subsampling_y : 1; + uint32_t frame_type : 1; + uint32_t show_frame : 1; + uint32_t error_resilient_mode : 1; + uint32_t intra_only : 1; + uint32_t allow_high_precision_mv : 1; + uint32_t mcomp_filter_type : 3; + uint32_t frame_parallel_decoding_mode : 1; + uint32_t reset_frame_context : 2; + uint32_t refresh_frame_context : 1; + uint32_t frame_context_idx : 2; + uint32_t segmentation_enabled : 1; + + /** \brief corresponds to variable temporal_update in VP9 code. + */ + uint32_t segmentation_temporal_update : 1; + /** \brief corresponds to variable update_mb_segmentation_map + * in VP9 code. + */ + uint32_t segmentation_update_map : 1; + + /** \brief Index of reference_frames[] and points to the + * LAST reference frame. + * It corresponds to active_ref_idx[0] in VP9 code. + */ + uint32_t last_ref_frame : 3; + /** \brief Sign Bias of the LAST reference frame. + * It corresponds to ref_frame_sign_bias[LAST_FRAME] in VP9 code. + */ + uint32_t last_ref_frame_sign_bias : 1; + /** \brief Index of reference_frames[] and points to the + * GOLDERN reference frame. + * It corresponds to active_ref_idx[1] in VP9 code. + */ + uint32_t golden_ref_frame : 3; + /** \brief Sign Bias of the GOLDERN reference frame. + * Corresponds to ref_frame_sign_bias[GOLDERN_FRAME] in VP9 code. + */ + uint32_t golden_ref_frame_sign_bias : 1; + /** \brief Index of reference_frames[] and points to the + * ALTERNATE reference frame. + * Corresponds to active_ref_idx[2] in VP9 code. + */ + uint32_t alt_ref_frame : 3; + /** \brief Sign Bias of the ALTERNATE reference frame. + * Corresponds to ref_frame_sign_bias[ALTREF_FRAME] in VP9 code. + */ + uint32_t alt_ref_frame_sign_bias : 1; + /** \brief Lossless Mode + * LosslessFlag = base_qindex == 0 && + * y_dc_delta_q == 0 && + * uv_dc_delta_q == 0 && + * uv_ac_delta_q == 0; + * Where base_qindex, y_dc_delta_q, uv_dc_delta_q and uv_ac_delta_q + * are all variables in VP9 code. + */ + uint32_t lossless_flag : 1; + } bits; + uint32_t value; + } pic_fields; + + /* following parameters have same syntax with those in VP9 code */ + uint8_t filter_level; + uint8_t sharpness_level; + + /** \brief number of tile rows specified by (1 << log2_tile_rows). + * It corresponds the variable with same name in VP9 code. + */ + uint8_t log2_tile_rows; + /** \brief number of tile columns specified by (1 << log2_tile_columns). + * It corresponds the variable with same name in VP9 code. + */ + uint8_t log2_tile_columns; + /** \brief Number of bytes taken up by the uncompressed frame header, + * which corresponds to byte length of function + * read_uncompressed_header() in VP9 code. + * Specifically, it is the byte count from bit stream buffer start to + * the last byte of uncompressed frame header. + * If there are other meta data in the buffer before uncompressed header, + * its size should be also included here. + */ + uint8_t frame_header_length_in_bytes; + + /** \brief The byte count of compressed header the bitstream buffer, + * which corresponds to syntax first_partition_size in code. + */ + uint16_t first_partition_size; + + /** These values are segment probabilities with same names in VP9 + * function setup_segmentation(). They should be parsed directly from + * bitstream by application. + */ + uint8_t mb_segment_tree_probs[7]; + uint8_t segment_pred_probs[3]; + + /** \brief VP9 Profile definition + * value range [0..3]. + */ + uint8_t profile; + + /** \brief VP9 bit depth per sample + * same for both luma and chroma samples. + */ + uint8_t bit_depth; + + /** \brief Reserved bytes for future use, must be zero */ + uint32_t va_reserved[8]; + +} RocdecVp9PicParams; + +/** + * \brief VP9 Segmentation Parameter Data Structure + * This structure is configured to be the same as VA-API VASegmentParameterVP9 structure. + */ +typedef struct _RocdecVp9SegmentParameter +{ + union + { + struct + { + /** \brief Indicates if per segment reference frame indicator + * is enabled. + * Corresponding to variable feature_enabled when + * j == SEG_LVL_REF_FRAME in function setup_segmentation() VP9 code. + */ + uint16_t segment_reference_enabled : 1; + /** \brief Specifies per segment reference indication. + * 0: reserved + * 1: Last ref + * 2: golden + * 3: altref + * Value can be derived from variable data when + * j == SEG_LVL_REF_FRAME in function setup_segmentation() VP9 code. + */ + uint16_t segment_reference : 2; + /** \brief Indicates if per segment skip feature is enabled. + * Corresponding to variable feature_enabled when + * j == SEG_LVL_SKIP in function setup_segmentation() VP9 code. + */ + uint16_t segment_reference_skipped : 1; + } fields; + uint16_t value; + } segment_flags; + + /** \brief Specifies the filter level information per segment. + * The value corresponds to variable lfi->lvl[seg][ref][mode] in VP9 code, + * where m is [ref], and n is [mode] in FilterLevel[m][n]. + */ + uint8_t filter_level[4][2]; + /** \brief Specifies per segment Luma AC quantization scale. + * Corresponding to y_dequant[qindex][1] in vp9_mb_init_quantizer() + * function of VP9 code. + */ + int16_t luma_ac_quant_scale; + /** \brief Specifies per segment Luma DC quantization scale. + * Corresponding to y_dequant[qindex][0] in vp9_mb_init_quantizer() + * function of VP9 code. + */ + int16_t luma_dc_quant_scale; + /** \brief Specifies per segment Chroma AC quantization scale. + * Corresponding to uv_dequant[qindex][1] in vp9_mb_init_quantizer() + * function of VP9 code. + */ + int16_t chroma_ac_quant_scale; + /** \brief Specifies per segment Chroma DC quantization scale. + * Corresponding to uv_dequant[qindex][0] in vp9_mb_init_quantizer() + * function of VP9 code. + */ + int16_t chroma_dc_quant_scale; + + /** \brief Reserved bytes for future use, must be zero */ + uint32_t va_reserved[4]; + +} RocdecVp9SegmentParameter; + +/***********************************************************/ +//! \struct RocdecVp9SliceParams +//! \ingroup group_amd_rocdecode +//! VP9 slice parameter buffer +//! This structure is configured to be the same as VA-API VASliceParameterBufferVP9 structure. +/***********************************************************/ +typedef struct _RocdecVp9SliceParams +{ + /** \brief The byte count of current frame in the bitstream buffer, + * starting from first byte of the buffer. + * It uses the name slice_data_size to be consitent with other codec, + * but actually means frame_data_size. + */ + uint32_t slice_data_size; + /** + * offset to the first byte of partition data (control partition) + */ + uint32_t slice_data_offset; + /** + * see VA_SLICE_DATA_FLAG_XXX definitions + */ + uint32_t slice_data_flag; + + /** + * \brief per segment information + */ + RocdecVp9SegmentParameter seg_param[8]; + + /** \brief Reserved bytes for future use, must be zero */ + uint32_t va_reserved[4]; + +} RocdecVp9SliceParams; + +/** \brief Segmentation Information for AV1 + */ +typedef struct _RocdecAv1SegmentationStruct +{ + union + { + struct + { + /** Indicates whether segmentation map related syntax elements + * are present or not for current frame. If equal to 0, + * the segmentation map related syntax elements are + * not present for the current frame and the control flags of + * segmentation map related tables feature_data[][], and + * feature_mask[] are not valid and shall be ignored by accelerator. + */ + uint32_t enabled : 1; + /** Value 1 indicates that the segmentation map are updated + * during the decoding of this frame. + * Value 0 means that the segmentation map from the previous + * frame is used. + */ + uint32_t update_map : 1; + /** Value 1 indicates that the updates to the segmentation map + * are coded relative to the existing segmentation map. + * Value 0 indicates that the new segmentation map is coded + * without reference to the existing segmentation map. + */ + uint32_t temporal_update : 1; + /** Value 1 indicates that new parameters are about to be + * specified for each segment. + * Value 0 indicates that the segmentation parameters + * should keep their existing values. + */ + uint32_t update_data : 1; + + /** \brief Reserved bytes for future use, must be zero */ + uint32_t reserved : 28; + } bits; + uint32_t value; + } segment_info_fields; + + /** \brief Segmentation parameters for current frame. + * feature_data[segment_id][feature_id] + * where segment_id has value range [0..7] indicating the segment id. + * and feature_id is defined as + typedef enum { + SEG_LVL_ALT_Q, // Use alternate Quantizer .... + SEG_LVL_ALT_LF_Y_V, // Use alternate loop filter value on y plane vertical + SEG_LVL_ALT_LF_Y_H, // Use alternate loop filter value on y plane horizontal + SEG_LVL_ALT_LF_U, // Use alternate loop filter value on u plane + SEG_LVL_ALT_LF_V, // Use alternate loop filter value on v plane + SEG_LVL_REF_FRAME, // Optional Segment reference frame + SEG_LVL_SKIP, // Optional Segment (0,0) + skip mode + SEG_LVL_GLOBALMV, + SEG_LVL_MAX + } SEG_LVL_FEATURES; + * feature_data[][] is equivalent to variable FeatureData[][] in spec, + * which is after clip3() operation. + * Clip3(x, y, z) = (z < x)? x : ((z > y)? y : z); + * The limit is defined in Segmentation_Feature_Max[ SEG_LVL_MAX ] = { + * 255, MAX_LOOP_FILTER, MAX_LOOP_FILTER, MAX_LOOP_FILTER, MAX_LOOP_FILTER, 7, 0, 0 } + */ + int16_t feature_data[8][8]; + + /** \brief indicates if a feature is enabled or not. + * Each bit field itself is the feature_id. Index is segment_id. + * feature_mask[segment_id] & (1 << feature_id) equal to 1 specify that the feature of + * feature_id for segment of segment_id is enabled, otherwise disabled. + */ + uint8_t feature_mask[8]; + + /** \brief Reserved bytes for future use, must be zero */ + uint32_t reserved[4]; +} RocdecAv1SegmentationStruct; + +/** \brief Film Grain Information for AV1 + */ +typedef struct _RocdecAv1FilmGrainStruct +{ + union + { + struct + { + /** \brief Specify whether or not film grain is applied on current frame. + * If set to 0, all the rest parameters should be set to zero + * and ignored. + */ + uint32_t apply_grain : 1; + uint32_t chroma_scaling_from_luma : 1; + uint32_t grain_scaling_minus_8 : 2; + uint32_t ar_coeff_lag : 2; + uint32_t ar_coeff_shift_minus_6 : 2; + uint32_t grain_scale_shift : 2; + uint32_t overlap_flag : 1; + uint32_t clip_to_restricted_range : 1; + /** \brief Reserved bytes for future use, must be zero */ + uint32_t reserved : 20; + } bits; + uint32_t value; + } film_grain_info_fields; + + uint16_t grain_seed; + /* value range [0..14] */ + uint8_t num_y_points; + uint8_t point_y_value[14]; + uint8_t point_y_scaling[14]; + /* value range [0..10] */ + uint8_t num_cb_points; + uint8_t point_cb_value[10]; + uint8_t point_cb_scaling[10]; + /* value range [0..10] */ + uint8_t num_cr_points; + uint8_t point_cr_value[10]; + uint8_t point_cr_scaling[10]; + /* value range [-128..127] */ + int8_t ar_coeffs_y[24]; + int8_t ar_coeffs_cb[25]; + int8_t ar_coeffs_cr[25]; + uint8_t cb_mult; + uint8_t cb_luma_mult; + uint16_t cb_offset; + uint8_t cr_mult; + uint8_t cr_luma_mult; + uint16_t cr_offset; + + /** \brief Reserved bytes for future use, must be zero */ + uint32_t reserved[4]; +} RocdecAv1FilmGrainStruct; + +typedef enum +{ + /** identity transformation, 0-parameter */ + RocdecAv1TransformationIdentity = 0, + /** translational motion, 2-parameter */ + RocdecAv1TransformationTranslation = 1, + /** simplified affine with rotation + zoom only, 4-parameter */ + RocdecAv1TransformationRotzoom = 2, + /** affine, 6-parameter */ + RocdecAv1TransformationAffine = 3, + /** transformation count */ + RocdecAv1TransformationCount +} RocdecAv1TransformationType; + +typedef struct _RocdecAv1WarpedMotionParams +{ + /** \brief Specify the type of warped motion */ + RocdecAv1TransformationType wmtype; + + /** \brief Specify warp motion parameters + * wm.wmmat[] corresponds to gm_params[][] in spec. + * Details in AV1 spec section 5.9.24 or refer to libaom code + * https://aomedia.googlesource.com/aom/+/refs/heads/master/av1/decoder/decodeframe.c + */ + int32_t wmmat[8]; + + /* valid or invalid on affine set */ + uint8_t invalid; + + /** \brief Reserved bytes for future use, must be zero */ + uint32_t reserved[4]; +} RocdecAv1WarpedMotionParams; + +/***********************************************************/ +//! \struct RocdecAv1PicParams +//! \ingroup group_amd_rocdecode +//! AV1 picture parameters +//! This structure is used in RocdecAv1PicParams structure +/***********************************************************/ +typedef struct _RocdecAV1PicParams +{ + /** \brief sequence level information + */ + + /** \brief AV1 bit stream profile + */ + uint8_t profile; + + uint8_t order_hint_bits_minus_1; + + /** \brief bit depth index + * value range [0..2] + * 0 - bit depth 8; + * 1 - bit depth 10; + * 2 - bit depth 12; + */ + uint8_t bit_depth_idx; + + /** \brief corresponds to AV1 spec variable of the same name. */ + uint8_t matrix_coefficients; + + union + { + struct + { + uint32_t still_picture : 1; + uint32_t use_128x128_superblock : 1; + uint32_t enable_filter_intra : 1; + uint32_t enable_intra_edge_filter : 1; + + /** read_compound_tools */ + uint32_t enable_interintra_compound : 1; + uint32_t enable_masked_compound : 1; + + uint32_t enable_dual_filter : 1; + uint32_t enable_order_hint : 1; + uint32_t enable_jnt_comp : 1; + uint32_t enable_cdef : 1; + uint32_t mono_chrome : 1; + uint32_t color_range : 1; + uint32_t subsampling_x : 1; + uint32_t subsampling_y : 1; + uint32_t chroma_sample_position : 1; + uint32_t film_grain_params_present : 1; + /** \brief Reserved bytes for future use, must be zero */ + uint32_t reserved : 16; + } fields; + uint32_t value; + } seq_info_fields; + + /** \brief Picture level information + */ + + /** \brief buffer description of decoded current picture + */ + int current_frame; + + /** \brief display buffer of current picture + * Used for film grain applied decoded picture. + * Valid only when apply_grain equals 1. + */ + int current_display_picture; + + /** \brief number of anchor frames for large scale tile + * This parameter gives the number of entries of anchor_frames_list[]. + * Value range [0..128]. + */ + uint8_t anchor_frames_num; + + /** \brief anchor frame list for large scale tile + * For large scale tile applications, the anchor frames could come from + * previously decoded frames in current sequence (aka. internal), or + * from external sources. + * For external anchor frames, application should call API + * vaCreateBuffer() to generate frame buffers and populate them with + * pixel frames. And this process may happen multiple times. + * The array anchor_frames_list[] is used to register all the available + * anchor frames from both external and internal, up to the current + * frame instance. If a previously registerred anchor frame is no longer + * needed, it should be removed from the list. But it does not prevent + * applications from relacing the frame buffer with new anchor frames. + * Please note that the internal anchor frames may not still be present + * in the current DPB buffer. But if it is in the anchor_frames_list[], + * it should not be replaced with other frames or removed from memory + * until it is not shown in the list. + * This number of entries of the list is given by parameter anchor_frames_num. + */ + int* anchor_frames_list; + + /** \brief Picture resolution minus 1 + * Picture original resolution. If SuperRes is enabled, + * this is the upscaled resolution. + * value range [0..65535] + */ + uint16_t frame_width_minus1; + uint16_t frame_height_minus1; + + /** \brief Output frame buffer size in unit of tiles + * Valid only when large_scale_tile equals 1. + * value range [0..65535] + */ + uint16_t output_frame_width_in_tiles_minus_1; + uint16_t output_frame_height_in_tiles_minus_1; + + /** \brief Surface indices of reference frames in DPB. + * + * Contains a list of uncompressed frame buffer surface indices as references. + * Application needs to make sure all the entries point to valid frames + * except for intra frames by checking ref_frame_id[]. If missing frame + * is identified, application may choose to perform error recovery by + * pointing problematic index to an alternative frame buffer. + * Driver is not responsible to validate reference frames' id. + */ + int ref_frame_map[8]; + + /** \brief Reference frame indices. + * + * Contains a list of indices into ref_frame_map[8]. + * It specifies the reference frame correspondence. + * The indices of the array are defined as [LAST_FRAME – LAST_FRAME, + * LAST2_FRAME – LAST_FRAME, …, ALTREF_FRAME – LAST_FRAME], where each + * symbol is defined as: + * enum{INTRA_FRAME = 0, LAST_FRAME, LAST2_FRAME, LAST3_FRAME, GOLDEN_FRAME, + * BWDREF_FRAME, ALTREF2_FRAME, ALTREF_FRAME}; + */ + uint8_t ref_frame_idx[7]; + + /** \brief primary reference frame index + * Index into ref_frame_idx[], specifying which reference frame contains + * propagated info that should be loaded at the start of the frame. + * When value equals PRIMARY_REF_NONE (7), it indicates there is + * no primary reference frame. + * value range [0..7] + */ + uint8_t primary_ref_frame; + uint8_t order_hint; + + RocdecAv1SegmentationStruct seg_info; + RocdecAv1FilmGrainStruct film_grain_info; + + /** \brief tile structure + * When uniform_tile_spacing_flag == 1, width_in_sbs_minus_1[] and + * height_in_sbs_minus_1[] should be ignored, which will be generated + * by driver based on tile_cols and tile_rows. + */ + uint8_t tile_cols; + uint8_t tile_rows; + + /* The width/height of a tile minus 1 in units of superblocks. Though the + * maximum number of tiles is 64, since ones of the last tile are computed + * from ones of the other tiles and frame_width/height, they are not + * necessarily specified. + */ + uint16_t width_in_sbs_minus_1[63]; + uint16_t height_in_sbs_minus_1[63]; + + /** \brief number of tiles minus 1 in large scale tile list + * Same as AV1 semantic element. + * Valid only when large_scale_tiles == 1. + */ + uint16_t tile_count_minus_1; + + /* specify the tile index for context updating */ + uint16_t context_update_tile_id; + + union + { + /** \brief flags for current picture + * same syntax and semantic as those in AV1 code + */ + struct + { + /** \brief Frame Type: + * 0: KEY_FRAME; + * 1: INTER_FRAME; + * 2: INTRA_ONLY_FRAME; + * 3: SWITCH_FRAME + * For SWITCH_FRAME, application shall set error_resilient_mode = 1, + * refresh_frame_flags, etc. appropriately. And driver will convert it + * to INTER_FRAME. + */ + uint32_t frame_type : 2; + uint32_t show_frame : 1; + uint32_t showable_frame : 1; + uint32_t error_resilient_mode : 1; + uint32_t disable_cdf_update : 1; + uint32_t allow_screen_content_tools : 1; + uint32_t force_integer_mv : 1; + uint32_t allow_intrabc : 1; + uint32_t use_superres : 1; + uint32_t allow_high_precision_mv : 1; + uint32_t is_motion_mode_switchable : 1; + uint32_t use_ref_frame_mvs : 1; + /* disable_frame_end_update_cdf is coded as refresh_frame_context. */ + uint32_t disable_frame_end_update_cdf : 1; + uint32_t uniform_tile_spacing_flag : 1; + uint32_t allow_warped_motion : 1; + /** \brief indicate if current frame in large scale tile mode */ + uint32_t large_scale_tile : 1; + + /** \brief Reserved bytes for future use, must be zero */ + uint32_t reserved : 15; + } bits; + uint32_t value; + } pic_info_fields; + + /** \brief Supper resolution scale denominator. + * When use_superres=1, superres_scale_denominator must be in the range [9..16]. + * When use_superres=0, superres_scale_denominator must be 8. + */ + uint8_t superres_scale_denominator; + + /** \brief Interpolation filter. + * value range [0..4] + */ + uint8_t interp_filter; + + /** \brief luma loop filter levels. + * value range [0..63]. + */ + uint8_t filter_level[2]; + + /** \brief chroma loop filter levels. + * value range [0..63]. + */ + uint8_t filter_level_u; + uint8_t filter_level_v; + + union + { + struct + { + /** \brief flags for reference pictures + * same syntax and semantic as those in AV1 code + */ + uint8_t sharpness_level : 3; + uint8_t mode_ref_delta_enabled : 1; + uint8_t mode_ref_delta_update : 1; + + /** \brief Reserved bytes for future use, must be zero */ + uint8_t reserved : 3; + } bits; + uint8_t value; + } loop_filter_info_fields; + + /** \brief The adjustment needed for the filter level based on + * the chosen reference frame. + * value range [-64..63]. + */ + int8_t ref_deltas[8]; + + /** \brief The adjustment needed for the filter level based on + * the chosen mode. + * value range [-64..63]. + */ + int8_t mode_deltas[2]; + + /** \brief quantization + */ + /** \brief Y AC index + * value range [0..255] + */ + uint8_t base_qindex; + /** \brief Y DC delta from Y AC + * value range [-64..63] + */ + int8_t y_dc_delta_q; + /** \brief U DC delta from Y AC + * value range [-64..63] + */ + int8_t u_dc_delta_q; + /** \brief U AC delta from Y AC + * value range [-64..63] + */ + int8_t u_ac_delta_q; + /** \brief V DC delta from Y AC + * value range [-64..63] + */ + int8_t v_dc_delta_q; + /** \brief V AC delta from Y AC + * value range [-64..63] + */ + int8_t v_ac_delta_q; + + /** \brief quantization_matrix + */ + union + { + struct + { + uint16_t using_qmatrix : 1; + /** \brief qm level + * value range [0..15] + * Invalid if using_qmatrix equals 0. + */ + uint16_t qm_y : 4; + uint16_t qm_u : 4; + uint16_t qm_v : 4; + + /** \brief Reserved bytes for future use, must be zero */ + uint16_t reserved : 3; + } bits; + uint16_t value; + } qmatrix_fields; + + union + { + struct + { + /** \brief delta_q parameters + */ + uint32_t delta_q_present_flag : 1; + uint32_t log2_delta_q_res : 2; + + /** \brief delta_lf parameters + */ + uint32_t delta_lf_present_flag : 1; + uint32_t log2_delta_lf_res : 2; + + /** \brief CONFIG_LOOPFILTER_LEVEL + */ + uint32_t delta_lf_multi : 1; + + /** \brief read_tx_mode + * value range [0..2] + */ + uint32_t tx_mode : 2; + + /* AV1 frame reference mode semantic */ + uint32_t reference_select : 1; + + uint32_t reduced_tx_set_used : 1; + + uint32_t skip_mode_present : 1; + + /** \brief Reserved bytes for future use, must be zero */ + uint32_t reserved : 20; + } bits; + uint32_t value; + } mode_control_fields; + + /** \brief CDEF parameters + */ + /* value range [0..3] */ + uint8_t cdef_damping_minus_3; + /* value range [0..3] */ + uint8_t cdef_bits; + + /** Encode cdef strength: + * + * The cdef_y_strengths[] and cdef_uv_strengths[] are expected to be packed + * with both primary and secondary strength. The secondary strength is + * given in the lower two bits and the primary strength is given in the next + * four bits. + * + * cdef_y_strengths[] & cdef_uv_strengths[] should be derived as: + * (cdef_y_strengths[]) = (cdef_y_pri_strength[] << 2) | (cdef_y_sec_strength[] & 0x03) + * (cdef_uv_strengths[]) = (cdef_uv_pri_strength[] << 2) | (cdef_uv_sec_strength[] & 0x03) + * In which, + * cdef_y_pri_strength[]/cdef_y_sec_strength[]/cdef_uv_pri_strength[]/cdef_uv_sec_strength[] are + * variables defined in AV1 Spec 5.9.19. The cdef_y_strengths[] & cdef_uv_strengths[] are + * corresponding to LIBAOM variables cm->cdef_strengths[] & cm->cdef_uv_strengths[] + * respectively. + */ + /* value range [0..63] */ + uint8_t cdef_y_strengths[8]; + /* value range [0..63] */ + uint8_t cdef_uv_strengths[8]; + + /** \brief loop restoration parameters + */ + union + { + struct + { + uint16_t yframe_restoration_type : 2; + uint16_t cbframe_restoration_type : 2; + uint16_t crframe_restoration_type : 2; + uint16_t lr_unit_shift : 2; + uint16_t lr_uv_shift : 1; + + /** \brief Reserved bytes for future use, must be zero */ + uint16_t reserved : 7; + } bits; + uint16_t value; + } loop_restoration_fields; + + /** \brief global motion + */ + RocdecAv1WarpedMotionParams wm[7]; + + /**@}*/ + + /** \brief Reserved bytes for future use, must be zero */ + uint32_t reserved[8]; +} RocdecAv1PicParams; + +/***********************************************************/ +//! \struct RocdecAv1SliceParams +//! \ingroup group_amd_rocdecode +//! AV1 slice parameter buffer +//! This structure is configured to be the same as VA-API VASliceParameterBufferAV1 structure. +//! This structure conveys parameters related to bit stream data and should be sent once per tile. +//! It uses the name RocdecAv1SliceParams to be consistent with other codec, but actually means +//! RocdecTileParameterAV1. Slice data buffer of VASliceDataBufferType is used to send the +//! bitstream. +/***********************************************************/ +typedef struct _RocdecAv1SliceParams +{ + /** \brief The byte count of current tile in the bitstream buffer, + * starting from first byte of the buffer. + * It uses the name slice_data_size to be consistent with other codec, + * but actually means tile_data_size. + */ + uint32_t slice_data_size; + /** + * offset to the first byte of the data buffer. + */ + uint32_t slice_data_offset; + /** + * see VA_SLICE_DATA_FLAG_XXX definitions + */ + uint32_t slice_data_flag; + + uint16_t tile_row; + uint16_t tile_column; + + uint16_t tg_start; + uint16_t tg_end; + + /** \brief anchor frame index for large scale tile. + * index into an array AnchorFrames of the frames that the tile uses + * for prediction. + * valid only when large_scale_tile equals 1. + */ + uint8_t anchor_frame_idx; + + /** \brief tile index in the tile list. + * Valid only when large_scale_tile is enabled. + * Driver uses this field to decide the tile output location. + */ + uint16_t tile_idx_in_tile_list; + + /** \brief Reserved bytes for future use, must be zero */ + uint32_t reserved[4]; +} RocdecAv1SliceParams; + +/******************************************************************************************/ +//! \struct _RocdecPicParams +//! \ingroup group_amd_rocdecode +//! Picture parameters for decoding +//! This structure is used in rocDecDecodePicture API +//! IN for rocDecDecodePicture +/******************************************************************************************/ +typedef struct _RocdecPicParams +{ + int pic_width; /**< IN: Coded frame width */ + int pic_height; /**< IN: Coded frame height */ + int curr_pic_idx; /**< IN: Output index of the current picture */ + int field_pic_flag; /**< IN: 0=frame picture, 1=field picture */ + int bottom_field_flag; /**< IN: 0=top field, 1=bottom field (ignored if field_pic_flag=0) */ + int second_field; /**< IN: Second field of a complementary field pair */ + // Bitstream data + uint32_t bitstream_data_len; /**< IN: Number of bytes in bitstream data buffer */ + const uint8_t* bitstream_data; /**< IN: Ptr to bitstream data for this picture (slice-layer) */ + uint32_t num_slices; /**< IN: Number of slices in this picture */ + + int ref_pic_flag; /**< IN: This picture is a reference picture */ + int intra_pic_flag; /**< IN: This picture is entirely intra coded */ + uint32_t reserved[30]; /**< Reserved for future use */ + + // IN: Codec-specific data + union + { + RocdecMpeg2PicParams mpeg2; /**< Also used for MPEG-1 */ + RocdecAvcPicParams avc; + RocdecHevcPicParams hevc; + RocdecVc1PicParams vc1; + RocdecJPEGPicParams jpeg; + RocdecVp9PicParams vp9; + RocdecAv1PicParams av1; + uint32_t codec_reserved[256]; + } pic_params; + + /*! \brief Variable size array. The user should allocate one slice param struct for each slice. + */ + union + { + // Todo: Add slice params defines for other codecs. + RocdecAvcSliceParams* avc; + RocdecHevcSliceParams* hevc; + RocdecVp9SliceParams* vp9; + RocdecAv1SliceParams* av1; + } slice_params; + + union + { + // Todo: Added IQ matrix defines for other codecs. + RocdecAvcIQMatrix avc; + RocdecHevcIQMatrix hevc; + } iq_matrix; +} RocdecPicParams; + +/******************************************************/ +//! \struct RocdecProcParams +//! \ingroup group_amd_rocdecode +//! Picture parameters for postprocessing +//! This structure is used in rocDecGetVideoFrame API +/******************************************************/ +typedef struct _RocdecProcParams +{ + int progressive_frame; /**< IN: Input is progressive (deinterlace_mode will be ignored) */ + int top_field_first; /**< IN: Input frame is top field first (1st field is top, 2nd field is + bottom) */ + uint32_t reserved_flags[2]; /**< Reserved for future use (set to zero) */ + + // The fields below are used for raw YUV input + uint64_t raw_input_dptr; /**< IN: Input HIP device ptr for raw YUV extensions */ + uint32_t raw_input_pitch; /**< IN: pitch in bytes of raw YUV input (should be aligned + appropriately) */ + uint32_t raw_input_format; /**< IN: Input YUV format (rocDecVideoCodec_enum) */ + uint64_t raw_output_dptr; /**< IN: Output HIP device mem ptr for raw YUV extensions */ + uint32_t raw_output_pitch; /**< IN: pitch in bytes of raw YUV output (should be aligned + appropriately) */ + uint32_t raw_output_format; /**< IN: Output YUV format (rocDecVideoCodec_enum) */ + uint32_t reserved[16]; /**< Reserved for future use (set to zero) */ +} RocdecProcParams; + +/*****************************************************************************************************/ +//! \fn rocDecStatus ROCDECAPI rocDecCreateDecoder(rocDecDecoderHandle *decoder_handle, +//! RocDecoderCreateInfo *decoder_create_info) \ingroup group_amd_rocdecode Create the decoder +//! object based on decoder_create_info. A handle to the created decoder is returned +/*****************************************************************************************************/ +extern rocDecStatus ROCDECAPI +rocDecCreateDecoder(rocDecDecoderHandle* decoder_handle, RocDecoderCreateInfo* decoder_create_info); + +/*****************************************************************************************************/ +//! \fn rocDecStatus ROCDECAPI rocDecDestroyDecoder(rocDecDecoderHandle decoder_handle) +//! \ingroup group_amd_rocdecode +//! Destroy the decoder object +/*****************************************************************************************************/ +extern rocDecStatus ROCDECAPI +rocDecDestroyDecoder(rocDecDecoderHandle decoder_handle); + +/**********************************************************************************************************************/ +//! \fn rocDecStatus ROCDECAPI rocdecGetDecoderCaps(RocdecDecodeCaps *decode_caps) +//! \ingroup group_amd_rocdecode +//! Queries decode capabilities of AMD's VCN decoder based on codec type, chroma_format and +//! BitDepthMinus8 parameters. +//! 1. Application fills IN parameters codec_type, chroma_format and BitDepthMinus8 of +//! RocdecDecodeCaps structure +//! 2. On calling rocdecGetDecoderCaps, driver fills OUT parameters (for GPU device) if the IN +//! parameters are supported +//! If IN parameters passed to the driver are not supported by AMD-VCN-HW, then all OUT params +//! are set to 0. +/**********************************************************************************************************************/ +extern rocDecStatus ROCDECAPI +rocDecGetDecoderCaps(RocdecDecodeCaps* decode_caps); + +/*****************************************************************************************************/ +//! \fn rocDecStatus ROCDECAPI rocDecDecodeFrame(rocDecDecoderHandle decoder_handle, RocdecPicParams +//! *pic_params) \ingroup group_amd_rocdecode Decodes a single picture Submits the frame for HW +//! decoding +/*****************************************************************************************************/ +extern rocDecStatus ROCDECAPI +rocDecDecodeFrame(rocDecDecoderHandle decoder_handle, RocdecPicParams* pic_params); + +/************************************************************************************************************/ +//! \fn rocDecStatus ROCDECAPI rocDecGetDecodeStatus(rocDecDecoderHandle decoder_handle, int +//! pic_idx, RocdecDecodeStatus* decode_status); \ingroup group_amd_rocdecode Get the decode status +//! for frame corresponding to nPicIdx API is currently supported for HEVC, AVC/H264 and JPEG +//! codecs. API returns ROCDEC_NOT_SUPPORTED error code for unsupported GPU or codec. +/************************************************************************************************************/ +extern rocDecStatus ROCDECAPI +rocDecGetDecodeStatus(rocDecDecoderHandle decoder_handle, + int pic_idx, + RocdecDecodeStatus* decode_status); + +/*********************************************************************************************************/ +//! \fn rocDecStatus ROCDECAPI rocDecReconfigureDecoder(rocDecDecoderHandle decoder_handle, +//! RocdecReconfigureDecoderInfo *reconfig_params) \ingroup group_amd_rocdecode Used to reuse single +//! decoder for multiple clips. Currently supports resolution change, resize params params, target +//! area params change for same codec. Must be called during +//! RocdecParserParams::pfn_sequence_callback +/*********************************************************************************************************/ +extern rocDecStatus ROCDECAPI +rocDecReconfigureDecoder(rocDecDecoderHandle decoder_handle, + RocdecReconfigureDecoderInfo* reconfig_params); + +/************************************************************************************************************************/ +//! \fn extern rocDecStatus ROCDECAPI rocDecGetVideoFrame(rocDecDecoderHandle decoder_handle, int +//! pic_idx, +//! uint32_t *dev_mem_ptr, uint32_t *horizontal_pitch, +//! RocdecProcParams *vid_postproc_params); +//! \ingroup group_amd_rocdecode +//! Post-process and map video frame corresponding to pic_idx for use in HIP. Returns HIP device +//! pointer and associated pitch(horizontal stride) of the video frame. Returns device memory +//! pointers and pitch for each plane (Y, U and V) seperately horizontal_pitch is a pointer to an +//! unsigned 32-bit integer array of size 3. +/************************************************************************************************************************/ +extern rocDecStatus ROCDECAPI +rocDecGetVideoFrame(rocDecDecoderHandle decoder_handle, + int pic_idx, + void* dev_mem_ptr[3], + uint32_t* horizontal_pitch, + RocdecProcParams* vid_postproc_params); + +/*****************************************************************************************************/ +//! \fn const char* ROCDECAPI rocDecGetErrorName(rocDecStatus rocdec_status) +//! \ingroup group_amd_rocdecode +//! Return name of the specified error code in text form. +/*****************************************************************************************************/ +extern const char* ROCDECAPI +rocDecGetErrorName(rocDecStatus rocdec_status); + +#ifdef __cplusplus +} +#endif diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/rocdecode/details/rocdecode_api_trace.h b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/rocdecode/details/rocdecode_api_trace.h new file mode 100644 index 0000000000..a71a57cd43 --- /dev/null +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/rocdecode/details/rocdecode_api_trace.h @@ -0,0 +1,157 @@ +/* +Copyright (c) 2024 - 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#pragma once + +#if !defined(ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE) +# if defined __has_include +# if __has_include() && __has_include() && __has_include() +# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 1 +# else +# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 0 +# endif +# else +# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 0 +# endif +#endif + +#if ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE > 0 +# include +# include +# include +#else +# include +# include +# include +#endif + +// Define version macros for the rocDecode API dispatch table, specifying the MAJOR and STEP +// versions. +// +// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! IMPORTANT !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +// +// 1. When adding new functions to the rocDecode API dispatch table, always append the new function +// pointer +// to the end of the table and increment the dispatch table's version number. Never rearrange the +// order of the member variables in the dispatch table, as doing so will break the Application +// Binary Interface (ABI). +// 2. In critical situations where the type of an existing member variable in a dispatch table has +// been changed +// or removed due to a data type modification, it is important to increment the major version +// number of the rocDecode API dispatch table. If the function pointer type can no longer be +// declared, do not remove it. Instead, change the function pointer type to `void*` and ensure it +// is always initialized to `nullptr`. +// +// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +// + +// The major version number should ideally remain unchanged. Increment the +// ROCDECODE_RUNTIME_API_TABLE_MAJOR_VERSION only for fundamental changes to the +// rocDecodeDispatchTable struct, such as altering the type or name of an existing member variable. +// Please DO NOT REMOVE it. +#define ROCDECODE_RUNTIME_API_TABLE_MAJOR_VERSION 0 + +// Increment the ROCDECODE_RUNTIME_API_TABLE_STEP_VERSION when new runtime API functions are added. +// If the corresponding ROCDECODE_RUNTIME_API_TABLE_MAJOR_VERSION increases reset the +// ROCDECODE_RUNTIME_API_TABLE_STEP_VERSION to zero. +#define ROCDECODE_RUNTIME_API_TABLE_STEP_VERSION 1 + +// rocDecode API interface +typedef rocDecStatus(ROCDECAPI* PfnRocDecCreateVideoParser)(RocdecVideoParser* parser_handle, + RocdecParserParams* params); +typedef rocDecStatus(ROCDECAPI* PfnRocDecParseVideoData)(RocdecVideoParser parser_handle, + RocdecSourceDataPacket* packet); +typedef rocDecStatus(ROCDECAPI* PfnRocDecDestroyVideoParser)(RocdecVideoParser parser_handle); +typedef rocDecStatus(ROCDECAPI* PfnRocDecCreateDecoder)(rocDecDecoderHandle* decoder_handle, + RocDecoderCreateInfo* decoder_create_info); +typedef rocDecStatus(ROCDECAPI* PfnRocDecDestroyDecoder)(rocDecDecoderHandle decoder_handle); +typedef rocDecStatus(ROCDECAPI* PfnRocDecGetDecoderCaps)(RocdecDecodeCaps* decode_caps); +typedef rocDecStatus(ROCDECAPI* PfnRocDecDecodeFrame)(rocDecDecoderHandle decoder_handle, + RocdecPicParams* pic_params); +typedef rocDecStatus(ROCDECAPI* PfnRocDecGetDecodeStatus)(rocDecDecoderHandle decoder_handle, + int pic_idx, + RocdecDecodeStatus* decode_status); +typedef rocDecStatus(ROCDECAPI* PfnRocDecReconfigureDecoder)( + rocDecDecoderHandle decoder_handle, + RocdecReconfigureDecoderInfo* reconfig_params); +typedef rocDecStatus(ROCDECAPI* PfnRocDecGetVideoFrame)(rocDecDecoderHandle decoder_handle, + int pic_idx, + void* dev_mem_ptr[3], + uint32_t* horizontal_pitch, + RocdecProcParams* vid_postproc_params); +typedef const char*(ROCDECAPI* PfnRocDecGetErrorName)(rocDecStatus rocdec_status); +typedef rocDecStatus(ROCDECAPI* PfnRocDecCreateBitstreamReader)( + RocdecBitstreamReader* bs_reader_handle, + const char* input_file_path); +typedef rocDecStatus(ROCDECAPI* PfnRocDecGetBitstreamCodecType)( + RocdecBitstreamReader bs_reader_handle, + rocDecVideoCodec* codec_type); +typedef rocDecStatus(ROCDECAPI* PfnRocDecGetBitstreamBitDepth)( + RocdecBitstreamReader bs_reader_handle, + int* bit_depth); +typedef rocDecStatus(ROCDECAPI* PfnRocDecGetBitstreamPicData)( + RocdecBitstreamReader bs_reader_handle, + uint8_t** pic_data, + int* pic_size, + int64_t* pts); +typedef rocDecStatus(ROCDECAPI* PfnRocDecDestroyBitstreamReader)( + RocdecBitstreamReader bs_reader_handle); + +// rocDecode API dispatch table +struct RocDecodeDispatchTable +{ + // ROCDECODE_RUNTIME_API_TABLE_STEP_VERSION == 0 + size_t size; + PfnRocDecCreateVideoParser pfn_rocdec_create_video_parser; + PfnRocDecParseVideoData pfn_rocdec_parse_video_data; + PfnRocDecDestroyVideoParser pfn_rocdec_destroy_video_parser; + PfnRocDecCreateDecoder pfn_rocdec_create_decoder; + PfnRocDecDestroyDecoder pfn_rocdec_destroy_decoder; + PfnRocDecGetDecoderCaps pfn_rocdec_get_gecoder_caps; + PfnRocDecDecodeFrame pfn_rocdec_decode_frame; + PfnRocDecGetDecodeStatus pfn_rocdec_get_decode_status; + PfnRocDecReconfigureDecoder pfn_rocdec_reconfigure_decoder; + PfnRocDecGetVideoFrame pfn_rocdec_get_video_frame; + PfnRocDecGetErrorName pfn_rocdec_get_error_name; + // PLEASE DO NOT EDIT ABOVE! + // ROCDECODE_RUNTIME_API_TABLE_STEP_VERSION == 1 + PfnRocDecCreateBitstreamReader pfn_rocdec_create_bitstream_reader; + PfnRocDecGetBitstreamCodecType pfn_rocdec_get_bitstream_codec_type; + PfnRocDecGetBitstreamBitDepth pfn_rocdec_get_bitstream_bit_depth; + PfnRocDecGetBitstreamPicData pfn_rocdec_get_bitstream_pic_data; + PfnRocDecDestroyBitstreamReader pfn_rocdec_destroy_bitstream_reader; + // PLEASE DO NOT EDIT ABOVE! + // ROCDECODE_RUNTIME_API_TABLE_STEP_VERSION == 2 + + // ******************************************************************************************* + // // + // READ BELOW + // ******************************************************************************************* + // // Please keep this text at the end of the structure: + + // 1. Do not reorder any existing members. + // 2. Increase the step version definition before adding new members. + // 3. Insert new members under the appropriate step version comment. + // 4. Generate a comment for the next step version. + // 5. Add a "PLEASE DO NOT EDIT ABOVE!" comment. + // ******************************************************************************************* + // // +}; diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/rocdecode/details/rocdecode_version.h b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/rocdecode/details/rocdecode_version.h new file mode 100644 index 0000000000..7a2a78fc88 --- /dev/null +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/rocdecode/details/rocdecode_version.h @@ -0,0 +1,60 @@ +/* +Copyright (c) 2024 - 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef ROCDECODE_VERSION_H +#define ROCDECODE_VERSION_H + +/*! + * \file + * \brief rocDecode version + * \defgroup group_rocdecode_version rocDecode Version + * \brief rocDecode version + */ + +#ifdef __cplusplus +extern "C" { +#endif +/* NOTE: Match version with CMakeLists.txt */ +#define ROCDECODE_MAJOR_VERSION 0 +#define ROCDECODE_MINOR_VERSION 10 +#define ROCDECODE_MICRO_VERSION 0 + +/** + * ROCDECODE_CHECK_VERSION: + * @major: major version, like 1 in 1.2.3 + * @minor: minor version, like 2 in 1.2.3 + * @micro: micro version, like 3 in 1.2.3 + * + * Evaluates to %TRUE if the version of rocDecode is greater than + * @major, @minor and @micro + */ +#define ROCDECODE_CHECK_VERSION(major, minor, micro) \ + (ROCDECODE_MAJOR_VERSION > (major) || \ + (ROCDECODE_MAJOR_VERSION == (major) && ROCDECODE_MINOR_VERSION > (minor)) || \ + (ROCDECODE_MAJOR_VERSION == (major) && ROCDECODE_MINOR_VERSION == (minor) && \ + ROCDECODE_MICRO_VERSION >= (micro))) + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/rocdecode/details/rocparser.h b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/rocdecode/details/rocparser.h new file mode 100644 index 0000000000..b2960393b9 --- /dev/null +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/rocdecode/details/rocparser.h @@ -0,0 +1,349 @@ +/* +Copyright (c) 2024 - 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#if !defined(ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE) +# if defined __has_include +# if __has_include() +# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 1 +# else +# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 0 +# endif +# else +# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 0 +# endif +#endif + +#if ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE > 0 +# include +#else +# include +#endif + +/*! + * \file + * \brief The AMD rocParser Library. + * + * \defgroup group_rocparser rocDecode Parser: AMD ROCm Video Parser API + * \brief AMD The rocDecode video parser for AMD’s GPUs. + * \defgroup group_rocdec_struct rocDecode Parser Structs: AMD ROCm Video Parser Structs + * \brief AMD The rocDecode video parser struct + */ + +#if defined(__cplusplus) +extern "C" { +#endif /* __cplusplus */ + +/*********************************************************************************/ +//! HANDLE pf rocDecDecoder +//! Used in subsequent API calls after rocDecCreateDecoder +/*********************************************************************************/ + +typedef void* RocdecVideoParser; +typedef uint64_t RocdecTimeStamp; + +/** + * @brief ROCDEC_VIDEO_FORMAT struct + * @ingroup group_rocdec_struct + * Used in Parser callback API + */ +typedef struct +{ + rocDecVideoCodec codec; /**< OUT: Compression format */ + /** + * OUT: frame rate = numerator / denominator (for example: 30000/1001) + */ + struct + { + /**< OUT: frame rate numerator (0 = unspecified or variable frame rate) */ + uint32_t numerator; + /**< OUT: frame rate denominator (0 = unspecified or variable frame rate) */ + uint32_t denominator; + } frame_rate; + uint8_t progressive_sequence; /**< OUT: 0=interlaced, 1=progressive */ + uint8_t bit_depth_luma_minus8; /**< OUT: high bit depth luma. E.g, 2 for 10-bitdepth, 4 for + 12-bitdepth */ + uint8_t bit_depth_chroma_minus8; /**< OUT: high bit depth chroma. E.g, 2 for 10-bitdepth, 4 for + 12-bitdepth */ + uint8_t min_num_decode_surfaces; /**< OUT: Minimum number of decode surfaces to be allocated for + correct decoding. The client can send this value in + num_decode_surfaces. This guarantees correct functionality + and optimal video memory usage but not necessarily the best + performance, which depends on the design of the overall + application. The optimal number of decode surfaces (in terms + of performance and memory utilization) should be decided by + experimentation for each application, but it cannot go below + min_num_decode_surfaces. If this value is used for + num_decode_surfaces then it must be returned to parser + during sequence callback. */ + uint32_t coded_width; /**< OUT: coded frame width in pixels */ + uint32_t coded_height; /**< OUT: coded frame height in pixels */ + /** + * area of the frame that should be displayed + * typical example: + * coded_width = 1920, coded_height = 1088 + * display_area = { 0,0,1920,1080 } + */ + struct + { + int left; /**< OUT: left position of display rect */ + int top; /**< OUT: top position of display rect */ + int right; /**< OUT: right position of display rect */ + int bottom; /**< OUT: bottom position of display rect */ + } display_area; + + rocDecVideoChromaFormat chroma_format; /**< OUT: Chroma format */ + uint32_t bitrate; /**< OUT: video bitrate (bps, 0=unknown) */ + /** + * OUT: Display Aspect Ratio = x:y (4:3, 16:9, etc) + */ + struct + { + int x; + int y; + } display_aspect_ratio; + /** + * Video Signal Description + * Refer section E.2.1 (VUI parameters semantics) of H264 spec file + */ + struct + { + uint8_t video_format : 3; /**< OUT: 0-Component, 1-PAL, 2-NTSC, 3-SECAM, 4-MAC, + 5-Unspecified */ + uint8_t video_full_range_flag : 1; /**< OUT: indicates the black level and luma and chroma + range */ + uint8_t reserved_zero_bits : 4; /**< Reserved bits */ + uint8_t color_primaries; /**< OUT: chromaticity coordinates of source primaries */ + uint8_t transfer_characteristics; /**< OUT: opto-electronic transfer characteristic of the + source picture */ + uint8_t matrix_coefficients; /**< OUT: used in deriving luma and chroma signals from RGB + primaries */ + } video_signal_description; + uint32_t seqhdr_data_length; /**< OUT: Additional bytes following (RocdecVideoFormatEx) */ +} RocdecVideoFormat; + +/****************************************************************/ +//! \ingroup group_rocdec_struct +//! \struct RocdecVideoFormat +//! Video format including raw sequence header information +//! Used in rocDecCreateVideoParser API +/****************************************************************/ +typedef struct +{ + RocdecVideoFormat format; /**< OUT: RocdecVideoFormat structure */ + uint32_t max_width; + uint32_t max_height; + uint8_t raw_seqhdr_data[1024]; /**< OUT: Sequence header data */ +} RocdecVideoFormatEx; + +/***************************************************************/ +//! \enum RocdecVideoPacketFlags +//! Data packet flags +//! Used in RocdecSourceDataPacket structure +/***************************************************************/ +typedef enum +{ + ROCDEC_PKT_ENDOFSTREAM = 0x01, /**< Set when this is the last packet for this stream */ + ROCDEC_PKT_TIMESTAMP = 0x02, /**< Timestamp is valid */ + ROCDEC_PKT_DISCONTINUITY = 0x04, /**< Set when a discontinuity has to be signalled */ + ROCDEC_PKT_ENDOFPICTURE = + 0x08, /**< Set when the packet contains exactly one frame or one field */ + ROCDEC_PKT_NOTIFY_EOS = + 0x10, /**< If this flag is set along with ROCDEC_PKT_ENDOFSTREAM, an additional (dummy) + display callback will be invoked with null value of ROCDECPARSERDISPINFO which + should be interpreted as end of the stream. */ +} RocdecVideoPacketFlags; + +/*****************************************************************************/ +//! \ingroup group_rocdec_struct +//! \struct RocdecSourceDataPacket +//! Data Packet +//! Used in rocDecParseVideoData API +//! IN for rocDecParseVideoData +/*****************************************************************************/ +typedef struct _RocdecSourceDataPacket +{ + uint32_t flags; /**< IN: Combination of ROCDEC_PKT_XXX flags */ + uint32_t + payload_size; /**< IN: number of bytes in the payload (may be zero if EOS flag is set) */ + const uint8_t* + payload; /**< IN: Pointer to packet payload data (may be NULL if EOS flag is set) */ + RocdecTimeStamp pts; /**< IN: Presentation time stamp (10MHz clock), only valid if + ROCDEC_PKT_TIMESTAMP flag is set */ +} RocdecSourceDataPacket; + +/**********************************************************************************/ +/*! \brief Timing Info struct + * \ingroup group_rocdec_struct + * \struct RocdecParserDispInfo + * \Used in rocdecParseVideoData API with PFNVIDDISPLAYCALLBACK pfn_display_picture + */ +/**********************************************************************************/ +typedef struct _RocdecParserDispInfo +{ + int picture_index; /**< OUT: Index of the current picture */ + int progressive_frame; /**< OUT: 1 if progressive frame; 0 otherwise */ + int top_field_first; /**< OUT: 1 if top field is displayed first; 0 otherwise */ + int repeat_first_field; /**< OUT: Number of additional fields (1=ivtc, 2=frame doubling, 4=frame + tripling, -1=unpaired field) */ + RocdecTimeStamp pts; /**< OUT: Presentation time stamp */ +} RocdecParserDispInfo; + +/** + * @brief RocdecOperatingPointInfo struct + * @ingroup group_rocdec_struct + * Operating point information of scalable bitstream + */ +typedef struct _RocdecOperatingPointInfo +{ + rocDecVideoCodec codec; + union + { + struct + { + uint8_t operating_points_cnt; + uint8_t reserved24_bits[3]; + uint16_t operating_points_idc[32]; + } av1; + uint8_t codec_reserved[1024]; + }; +} RocdecOperatingPointInfo; + +/**********************************************************************************/ +//! \ingroup group_rocdec_struct +//! \struct RocdecSeiMessage; +//! Used in RocdecSeiMessageInfo structure +/**********************************************************************************/ +typedef struct _RocdecSeiMessage +{ + uint8_t sei_message_type; /**< OUT: SEI Message Type */ + uint8_t reserved[3]; + uint32_t sei_message_size; /**< OUT: SEI Message Size */ +} RocdecSeiMessage; + +/**********************************************************************************/ +//! \ingroup group_rocdec_struct +//! \struct RocdecSeiMessageInfo +//! Used in rocDecParseVideoData API with PFNVIDSEIMSGCALLBACK pfn_get_sei_msg +/**********************************************************************************/ +typedef struct _RocdecSeiMessageInfo +{ + void* sei_data; /**< OUT: SEI Message Data */ + RocdecSeiMessage* sei_message; /**< OUT: SEI Message Info */ + uint32_t sei_message_count; /**< OUT: SEI Message Count */ + uint32_t picIdx; /**< OUT: SEI Message Pic Index */ +} RocdecSeiMessageInfo; + +/** + * @brief Parser callbacks + * \ The parser will call these synchronously from within rocDecParseVideoData(), whenever there is + * sequence change or a picture \ is ready to be decoded and/or displayed. \ Return values from + * these callbacks are interpreted as below. If the callbacks return failure, it will be propagated + * by \ rocDecParseVideoData() to the application. \ Parser picks default operating point as 0 and + * outputAllLayers flag as 0 if PFNVIDOPPOINTCALLBACK is not set or return value is \ -1 or invalid + * operating point. \ PFNVIDSEQUENCECALLBACK : 0: fail, 1: succeeded, > 1: override dpb size of + * parser (set by RocdecParserParams::max_num_decode_surfaces \ while creating parser) \ + * PFNVIDDECODECALLBACK : 0: fail, >=1: succeeded \ PFNVIDDISPLAYCALLBACK : 0: fail, >=1: + * succeeded \ PFNVIDOPPOINTCALLBACK : <0: fail, >=0: succeeded (bit 0-9: OperatingPoint, bit + * 10-10: outputAllLayers, bit 11-30: reserved) \ PFNVIDSEIMSGCALLBACK : 0: fail, >=1: succeeded + */ +typedef int(ROCDECAPI* PFNVIDSEQUENCECALLBACK)(void*, RocdecVideoFormat*); +typedef int(ROCDECAPI* PFNVIDDECODECALLBACK)(void*, RocdecPicParams*); +typedef int(ROCDECAPI* PFNVIDDISPLAYCALLBACK)(void*, RocdecParserDispInfo*); +// typedef int (ROCDECAPI *PFNVIDOPPOINTCALLBACK)(void *, RocdecOperatingPointInfo*); // +// reserved for future (AV1 specific) +typedef int(ROCDECAPI* PFNVIDSEIMSGCALLBACK)(void*, RocdecSeiMessageInfo*); + +/** + * \brief The AMD rocDecode library. + * \ingroup group_rocdec_struct + * \Used in rocDecCreateVideoParser API + */ +typedef struct _RocdecParserParams +{ + rocDecVideoCodec codec_type; /**< IN: rocDecVideoCodec_XXX */ + uint32_t max_num_decode_surfaces; /**< IN: Max # of decode surfaces (parser will cycle through + these) */ + uint32_t clock_rate; /**< IN: Timestamp units in Hz (0=default=10000000Hz) */ + uint32_t error_threshold; /**< IN: % Error threshold (0-100) for calling pfn_decode_picture + (100=always IN: call pfn_decode_picture even if picture bitstream + is fully corrupted) */ + uint32_t max_display_delay; /**< IN: Max display queue delay (improves pipelining of decode with + display) 0 = no delay (recommended values: 2..4) */ + uint32_t annex_b : 1; /**< IN: AV1 annexB stream */ + uint32_t reserved : 31; /**< Reserved for future use - set to zero */ + uint32_t reserved_1[4]; /**< IN: Reserved for future use - set to 0 */ + void* user_data; /**< IN: User data for callbacks */ + PFNVIDSEQUENCECALLBACK pfn_sequence_callback; /**< IN: Called before decoding frames and/or + whenever there is a fmt change */ + PFNVIDDECODECALLBACK pfn_decode_picture; /**< IN: Called when a picture is ready to be decoded + (decode order) */ + PFNVIDDISPLAYCALLBACK pfn_display_picture; /**< IN: Called whenever a picture is ready to be + displayed (display order) */ + PFNVIDSEIMSGCALLBACK + pfn_get_sei_msg; /**< IN: Called when all SEI messages are parsed for particular frame */ + void* reserved_2[5]; /**< Reserved for future use - set to NULL */ + RocdecVideoFormatEx* + ext_video_info; /**< IN: [Optional] sequence header data from system layer */ +} RocdecParserParams; + +/************************************************************************************************/ +//! \ingroup group_rocparser +//! \fn rocDecodeStatus ROCDECAPI rocDecCreateVideoParser(RocdecVideoParser *parser_handle, +//! RocdecParserParams *params) Create video parser object and initialize +/************************************************************************************************/ +extern rocDecStatus ROCDECAPI +rocDecCreateVideoParser(RocdecVideoParser* parser_handle, RocdecParserParams* params); + +/************************************************************************************************/ +//! \ingroup group_rocparser +//! \fn rocDecodeStatus ROCDECAPI rocDecParseVideoData(RocdecVideoParser parser_handle, +//! RocdecSourceDataPacket *packet) Parse the video data from source data packet in pPacket Extracts +//! parameter sets like SPS, PPS, bitstream etc. from pPacket and calls back pfn_decode_picture with +//! RocdecPicParams data for kicking of HW decoding calls back pfn_sequence_callback with +//! RocdecVideoFormat data for initial sequence header or when the decoder encounters a video format +//! change calls back pfn_display_picture with RocdecParserDispInfo data to display a video frame +/************************************************************************************************/ +extern rocDecStatus ROCDECAPI +rocDecParseVideoData(RocdecVideoParser parser_handle, RocdecSourceDataPacket* packet); + +/************************************************************************************************/ +//! \ingroup group_rocparser +//! \fn rocDecStatus ROCDECAPI rocDecParserMarkFrameForReuse(RocdecVideoParser parser_handle, int +//! pic_idx) Mark frame with index pic_idx in parser's buffer pool for reuse (means the frame has +//! been consumed) +/************************************************************************************************/ +extern rocDecStatus ROCDECAPI +rocDecParserMarkFrameForReuse(RocdecVideoParser parser_handle, int pic_idx); + +/************************************************************************************************/ +//! \ingroup group_rocparser +//! \fn rocDecStatus ROCDECAPI rocDecDestroyVideoParser(RocdecVideoParser parser_handle) +//! Destroy the video parser object +/************************************************************************************************/ +extern rocDecStatus ROCDECAPI +rocDecDestroyVideoParser(RocdecVideoParser parser_handle); + +#if defined(__cplusplus) +} +#endif /* __cplusplus */ diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/rocdecode/table_id.h b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/rocdecode/table_id.h new file mode 100644 index 0000000000..185d04e34b --- /dev/null +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/rocdecode/table_id.h @@ -0,0 +1,31 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +// NOLINTNEXTLINE(performance-enum-size) +typedef enum +{ + ROCPROFILER_ROCDECODE_TABLE_ID_NONE = -1, + ROCPROFILER_ROCDECODE_TABLE_ID = 0, + ROCPROFILER_ROCDECODE_TABLE_ID_LAST, +} rocprofiler_rocdecode_table_id_t; diff --git a/projects/rocprofiler-sdk/source/lib/output/buffered_output.hpp b/projects/rocprofiler-sdk/source/lib/output/buffered_output.hpp index 27f943c774..1c3efbdd90 100644 --- a/projects/rocprofiler-sdk/source/lib/output/buffered_output.hpp +++ b/projects/rocprofiler-sdk/source/lib/output/buffered_output.hpp @@ -163,5 +163,7 @@ using counter_records_buffered_output_t = using pc_sampling_host_trap_buffered_output_t = buffered_output; +using rocdecode_buffered_output_t = + buffered_output; } // namespace tool } // namespace rocprofiler diff --git a/projects/rocprofiler-sdk/source/lib/output/domain_type.cpp b/projects/rocprofiler-sdk/source/lib/output/domain_type.cpp index f476189150..65934ea6b2 100644 --- a/projects/rocprofiler-sdk/source/lib/output/domain_type.cpp +++ b/projects/rocprofiler-sdk/source/lib/output/domain_type.cpp @@ -61,6 +61,7 @@ DEFINE_BUFFER_TYPE_NAME(PC_SAMPLING_HOST_TRAP, "PC_SAMPLING_HOST_TRAP", "pc_sampling_host_trap", "pc_sampling_host_trap_stats") +DEFINE_BUFFER_TYPE_NAME(ROCDECODE, "ROCDECODE_API", "rocdecode_api_trace", "rocdecode_api_stats") #undef DEFINE_BUFFER_TYPE_NAME diff --git a/projects/rocprofiler-sdk/source/lib/output/domain_type.hpp b/projects/rocprofiler-sdk/source/lib/output/domain_type.hpp index 28b41b376a..c8a8c08b81 100644 --- a/projects/rocprofiler-sdk/source/lib/output/domain_type.hpp +++ b/projects/rocprofiler-sdk/source/lib/output/domain_type.hpp @@ -37,6 +37,7 @@ enum class domain_type MEMORY_ALLOCATION, COUNTER_VALUES, PC_SAMPLING_HOST_TRAP, + ROCDECODE, LAST, }; diff --git a/projects/rocprofiler-sdk/source/lib/output/generateCSV.cpp b/projects/rocprofiler-sdk/source/lib/output/generateCSV.cpp index ad7080083a..2aa6ce91ab 100644 --- a/projects/rocprofiler-sdk/source/lib/output/generateCSV.cpp +++ b/projects/rocprofiler-sdk/source/lib/output/generateCSV.cpp @@ -722,6 +722,48 @@ generate_csv(const output_config& cfg, } } +void +generate_csv(const output_config& cfg, + const metadata& tool_metadata, + const generator& data, + const stats_entry_t& stats) +{ + if(data.empty()) return; + + if(cfg.stats && stats) + write_stats(get_stats_output_file(cfg, domain_type::ROCDECODE), stats.entries); + + auto ofs = tool::csv_output_file{cfg, + domain_type::ROCDECODE, + tool::csv::api_csv_encoder{}, + {"Domain", + "Function", + "Process_Id", + "Thread_Id", + "Correlation_Id", + "Start_Timestamp", + "End_Timestamp"}}; + for(auto ditr : data) + { + for(auto record : data.get(ditr)) + { + auto row_ss = std::stringstream{}; + auto api_name = tool_metadata.get_operation_name(record.kind, record.operation); + rocprofiler::tool::csv::api_csv_encoder::write_row( + row_ss, + tool_metadata.get_kind_name(record.kind), + api_name, + tool_metadata.process_id, + record.thread_id, + record.correlation_id.internal, + record.start_timestamp, + record.end_timestamp); + + ofs << row_ss.str(); + } + } +} + void generate_csv(const output_config& cfg, const metadata& tool_metadata, diff --git a/projects/rocprofiler-sdk/source/lib/output/generateCSV.hpp b/projects/rocprofiler-sdk/source/lib/output/generateCSV.hpp index 9c03a6821a..ac57f9951e 100644 --- a/projects/rocprofiler-sdk/source/lib/output/generateCSV.hpp +++ b/projects/rocprofiler-sdk/source/lib/output/generateCSV.hpp @@ -87,6 +87,12 @@ generate_csv(const output_config& cfg, const generator& data, const stats_entry_t& stats); +void +generate_csv(const output_config& cfg, + const metadata& tool_metadata, + const generator& data, + const stats_entry_t& stats); + void generate_csv(const output_config& cfg, const metadata& tool_metadata, diff --git a/projects/rocprofiler-sdk/source/lib/output/generateJSON.cpp b/projects/rocprofiler-sdk/source/lib/output/generateJSON.cpp index b2846f88ca..a6c7aa504e 100644 --- a/projects/rocprofiler-sdk/source/lib/output/generateJSON.cpp +++ b/projects/rocprofiler-sdk/source/lib/output/generateJSON.cpp @@ -187,7 +187,8 @@ write_json(json_output& json_ar, generator scratch_memory_gen, generator rccl_api_gen, generator memory_allocation_gen, - generator pc_sampling_gen) + generator pc_sampling_gen, + generator rocdecode_api_gen) { // summary @@ -229,6 +230,7 @@ write_json(json_output& json_ar, json_ar(cereal::make_nvp("memory_allocation", memory_allocation_gen)); json_ar(cereal::make_nvp("scratch_memory", scratch_memory_gen)); json_ar(cereal::make_nvp("pc_sample_host_trap", pc_sampling_gen)); + json_ar(cereal::make_nvp("rocdecode_api", rocdecode_api_gen)); json_ar.finishNode(); } } diff --git a/projects/rocprofiler-sdk/source/lib/output/generateJSON.hpp b/projects/rocprofiler-sdk/source/lib/output/generateJSON.hpp index 93baa8c3fc..f253254763 100644 --- a/projects/rocprofiler-sdk/source/lib/output/generateJSON.hpp +++ b/projects/rocprofiler-sdk/source/lib/output/generateJSON.hpp @@ -94,6 +94,8 @@ write_json(json_output& json generator scratch_memory_gen, generator rccl_api_gen, generator memory_allocation_gen, - generator pc_sampling_gen); + generator pc_sampling_gen, + generator rocdecode_api_gen); + } // namespace tool } // namespace rocprofiler diff --git a/projects/rocprofiler-sdk/source/lib/output/generateOTF2.cpp b/projects/rocprofiler-sdk/source/lib/output/generateOTF2.cpp index 555bf15041..d926354fc0 100644 --- a/projects/rocprofiler-sdk/source/lib/output/generateOTF2.cpp +++ b/projects/rocprofiler-sdk/source/lib/output/generateOTF2.cpp @@ -367,7 +367,8 @@ write_otf2( std::deque* marker_api_data, std::deque* /*scratch_memory_data*/, std::deque* rccl_api_data, - std::deque* memory_allocation_data) + std::deque* memory_allocation_data, + std::deque* rocdecode_api_data) { namespace sdk = ::rocprofiler::sdk; @@ -418,6 +419,8 @@ write_otf2( tids.emplace(itr.thread_id); for(auto itr : *rccl_api_data) tids.emplace(itr.thread_id); + for(auto itr : *rocdecode_api_data) + tids.emplace(itr.thread_id); for(auto itr : *memory_copy_data) { @@ -614,6 +617,7 @@ write_otf2( add_event_data(hip_api_data, sdk::category::hip_api{}); add_event_data(marker_api_data, sdk::category::marker_api{}); add_event_data(rccl_api_data, sdk::category::rccl_api{}); + add_event_data(rocdecode_api_data, sdk::category::rocdecode_api{}); } for(auto itr : *memory_copy_data) diff --git a/projects/rocprofiler-sdk/source/lib/output/generateOTF2.hpp b/projects/rocprofiler-sdk/source/lib/output/generateOTF2.hpp index 1b47b4c73b..dba7db1889 100644 --- a/projects/rocprofiler-sdk/source/lib/output/generateOTF2.hpp +++ b/projects/rocprofiler-sdk/source/lib/output/generateOTF2.hpp @@ -46,6 +46,7 @@ write_otf2( std::deque* marker_api_data, std::deque* scratch_memory_data, std::deque* rccl_api_data, - std::deque* memory_allocation_data); + std::deque* memory_allocation_data, + std::deque* rocdecode_api_data); } // namespace tool } // namespace rocprofiler diff --git a/projects/rocprofiler-sdk/source/lib/output/generatePerfetto.cpp b/projects/rocprofiler-sdk/source/lib/output/generatePerfetto.cpp index 0cbfb01068..72a40b0319 100644 --- a/projects/rocprofiler-sdk/source/lib/output/generatePerfetto.cpp +++ b/projects/rocprofiler-sdk/source/lib/output/generatePerfetto.cpp @@ -72,7 +72,8 @@ write_perfetto( const generator& marker_api_gen, const generator& /*scratch_memory_gen*/, const generator& rccl_api_gen, - const generator& memory_allocation_gen) + const generator& memory_allocation_gen, + const generator& rocdecode_api_gen) { namespace sdk = ::rocprofiler::sdk; @@ -168,6 +169,9 @@ write_perfetto( for(auto ditr : rccl_api_gen) for(auto itr : rccl_api_gen.get(ditr)) tids.emplace(itr.thread_id); + for(auto ditr : rocdecode_api_gen) + for(auto itr : rocdecode_api_gen.get(ditr)) + tids.emplace(itr.thread_id); for(auto ditr : memory_copy_gen) for(auto itr : memory_copy_gen.get(ditr)) @@ -399,6 +403,37 @@ write_perfetto( tracing_session->FlushBlocking(); } + for(auto ditr : rocdecode_api_gen) + for(auto itr : rocdecode_api_gen.get(ditr)) + { + auto name = buffer_names.at(itr.kind, itr.operation); + auto& track = thread_tracks.at(itr.thread_id); + + TRACE_EVENT_BEGIN(sdk::perfetto_category::name, + ::perfetto::StaticString(name.data()), + track, + itr.start_timestamp, + ::perfetto::Flow::ProcessScoped(itr.correlation_id.internal), + "begin_ns", + itr.start_timestamp, + "end_ns", + itr.end_timestamp, + "delta_ns", + (itr.end_timestamp - itr.start_timestamp), + "tid", + itr.thread_id, + "kind", + itr.kind, + "operation", + itr.operation, + "corr_id", + itr.correlation_id.internal); + TRACE_EVENT_END(sdk::perfetto_category::name, + track, + itr.end_timestamp); + tracing_session->FlushBlocking(); + } + for(auto ditr : memory_copy_gen) for(auto itr : memory_copy_gen.get(ditr)) { diff --git a/projects/rocprofiler-sdk/source/lib/output/generatePerfetto.hpp b/projects/rocprofiler-sdk/source/lib/output/generatePerfetto.hpp index 9074dd6b3b..d6ede1cd2d 100644 --- a/projects/rocprofiler-sdk/source/lib/output/generatePerfetto.hpp +++ b/projects/rocprofiler-sdk/source/lib/output/generatePerfetto.hpp @@ -46,6 +46,7 @@ write_perfetto( const generator& marker_api_gen, const generator& scratch_memory_gen, const generator& rccl_api_gen, - const generator& memory_allocation_gen); + const generator& memory_allocation_gen, + const generator& rocdecode_api_gen); } // namespace tool } // namespace rocprofiler diff --git a/projects/rocprofiler-sdk/source/lib/output/generateStats.cpp b/projects/rocprofiler-sdk/source/lib/output/generateStats.cpp index a8ab73cb3f..f01bed60b6 100644 --- a/projects/rocprofiler-sdk/source/lib/output/generateStats.cpp +++ b/projects/rocprofiler-sdk/source/lib/output/generateStats.cpp @@ -228,6 +228,24 @@ generate_stats(const output_config& /*cfg*/, return get_stats(rccl_stats); } +stats_entry_t +generate_stats(const output_config& /*cfg*/, + const metadata& tool_metadata, + const generator& data) +{ + auto rocdecode_stats = stats_map_t{}; + for(auto ditr : data) + { + for(auto record : data.get(ditr)) + { + auto api_name = tool_metadata.get_operation_name(record.kind, record.operation); + rocdecode_stats[api_name] += (record.end_timestamp - record.start_timestamp); + } + } + + return get_stats(rocdecode_stats); +} + namespace { void diff --git a/projects/rocprofiler-sdk/source/lib/output/generateStats.hpp b/projects/rocprofiler-sdk/source/lib/output/generateStats.hpp index 7a025a9192..1df9855777 100644 --- a/projects/rocprofiler-sdk/source/lib/output/generateStats.hpp +++ b/projects/rocprofiler-sdk/source/lib/output/generateStats.hpp @@ -75,6 +75,11 @@ generate_stats(const output_config& cfg, const metadata& tool_metadata, const generator& data); +stats_entry_t +generate_stats(const output_config& cfg, + const metadata& tool_metadata, + const generator& data); + stats_entry_t generate_stats(const output_config& cfg, const metadata& tool_metadata, diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/config.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/config.hpp index 6006343924..79e2b04f80 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/config.hpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/config.hpp @@ -97,6 +97,7 @@ struct config : output_config bool hip_runtime_api_trace = get_env("ROCPROF_HIP_RUNTIME_API_TRACE", false); bool hip_compiler_api_trace = get_env("ROCPROF_HIP_COMPILER_API_TRACE", false); bool rccl_api_trace = get_env("ROCPROF_RCCL_API_TRACE", false); + bool rocdecode_api_trace = get_env("ROCPROF_ROCDECODE_API_TRACE", false); bool list_metrics = get_env("ROCPROF_LIST_METRICS", false); bool list_metrics_output_file = get_env("ROCPROF_OUTPUT_LIST_METRICS_FILE", false); bool pc_sampling_host_trap = false; diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/tool.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/tool.cpp index 35cc4c4f33..f953039e02 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/tool.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/tool.cpp @@ -139,18 +139,20 @@ struct buffer_ids rocprofiler_buffer_id_t scratch_memory = {}; rocprofiler_buffer_id_t rccl_api_trace = {}; rocprofiler_buffer_id_t pc_sampling_host_trap = {}; + rocprofiler_buffer_id_t rocdecode_api_trace = {}; auto as_array() const { - return std::array{hsa_api_trace, - hip_api_trace, - kernel_trace, - memory_copy_trace, - memory_allocation_trace, - counter_collection, - scratch_memory, - rccl_api_trace, - pc_sampling_host_trap}; + return std::array{hsa_api_trace, + hip_api_trace, + kernel_trace, + memory_copy_trace, + memory_allocation_trace, + counter_collection, + scratch_memory, + rccl_api_trace, + pc_sampling_host_trap, + rocdecode_api_trace}; } }; @@ -742,6 +744,13 @@ buffered_tracing_callback(rocprofiler_context_id_t /*context*/, tool::write_ring_buffer(*record, domain_type::RCCL); } + else if(header->kind == ROCPROFILER_BUFFER_TRACING_ROCDECODE_API) + { + auto* record = static_cast( + header->payload); + + tool::write_ring_buffer(*record, domain_type::ROCDECODE); + } else { ROCP_FATAL << fmt::format( @@ -1267,6 +1276,26 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) "Could not setup counting service"); } + if(tool::get_config().rocdecode_api_trace) + { + ROCPROFILER_CALL(rocprofiler_create_buffer(get_client_ctx(), + buffer_size, + buffer_watermark, + ROCPROFILER_BUFFER_POLICY_LOSSLESS, + buffered_tracing_callback, + tool_data, + &get_buffers().rocdecode_api_trace), + "buffer creation"); + + ROCPROFILER_CALL( + rocprofiler_configure_buffer_tracing_service(get_client_ctx(), + ROCPROFILER_BUFFER_TRACING_ROCDECODE_API, + nullptr, + 0, + get_buffers().rocdecode_api_trace), + "buffer tracing service for ROCDecode api configure"); + } + if(tool::get_config().kernel_rename) { auto rename_ctx = rocprofiler_context_id_t{0}; @@ -1441,6 +1470,8 @@ tool_fini(void* /*tool_data*/) tool::memory_allocation_buffered_output_t{tool::get_config().memory_allocation_trace}; auto counters_records_output = tool::counter_records_buffered_output_t{tool::get_config().counter_collection}; + auto rocdecode_output = + tool::rocdecode_buffered_output_t{tool::get_config().rocdecode_api_trace}; auto pc_sampling_host_trap_output = tool::pc_sampling_host_trap_buffered_output_t{tool::get_config().pc_sampling_host_trap}; @@ -1465,6 +1496,7 @@ tool_fini(void* /*tool_data*/) generate_output(rccl_output, contributions); generate_output(counters_output, contributions); generate_output(scratch_memory_output, contributions); + generate_output(rocdecode_output, contributions); generate_output(pc_sampling_host_trap_output, contributions); if(tool::get_config().stats && tool::get_config().csv_output) @@ -1491,7 +1523,8 @@ tool_fini(void* /*tool_data*/) scratch_memory_output.get_generator(), rccl_output.get_generator(), memory_allocation_output.get_generator(), - pc_sampling_host_trap_output.get_generator()); + pc_sampling_host_trap_output.get_generator(), + rocdecode_output.get_generator()); json_ar.finish_process(); tool::close_json(json_ar); @@ -1509,7 +1542,8 @@ tool_fini(void* /*tool_data*/) marker_output.get_generator(), scratch_memory_output.get_generator(), rccl_output.get_generator(), - memory_allocation_output.get_generator()); + memory_allocation_output.get_generator(), + rocdecode_output.get_generator()); } if(tool::get_config().otf2_output) @@ -1522,6 +1556,7 @@ tool_fini(void* /*tool_data*/) auto scratch_memory_elem_data = scratch_memory_output.load_all(); auto rccl_elem_data = rccl_output.load_all(); auto memory_allocation_elem_data = memory_allocation_output.load_all(); + auto rocdecode_elem_data = rocdecode_output.load_all(); tool::write_otf2(tool::get_config(), *tool_metadata, @@ -1534,7 +1569,8 @@ tool_fini(void* /*tool_data*/) &marker_elem_data, &scratch_memory_elem_data, &rccl_elem_data, - &memory_allocation_elem_data); + &memory_allocation_elem_data, + &rocdecode_elem_data); } if(tool::get_config().summary_output) @@ -1554,6 +1590,7 @@ tool_fini(void* /*tool_data*/) destroy_output(scratch_memory_output); destroy_output(rccl_output); destroy_output(counters_records_output); + destroy_output(rocdecode_output); destroy_output(pc_sampling_host_trap_output); if(destructors) diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/CMakeLists.txt b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/CMakeLists.txt index af6e69e157..880e8f45fc 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/CMakeLists.txt +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/CMakeLists.txt @@ -52,6 +52,7 @@ add_subdirectory(tracing) add_subdirectory(kernel_dispatch) add_subdirectory(page_migration) add_subdirectory(rccl) +add_subdirectory(rocdecode) add_subdirectory(details) add_subdirectory(ompt) @@ -61,6 +62,7 @@ target_link_libraries( rocprofiler-sdk::rocprofiler-sdk-hip-nolink rocprofiler-sdk::rocprofiler-sdk-hsa-runtime-nolink rocprofiler-sdk::rocprofiler-sdk-rccl-nolink + rocprofiler-sdk::rocprofiler-sdk-rocdecode-nolink PRIVATE rocprofiler-sdk::rocprofiler-sdk-build-flags rocprofiler-sdk::rocprofiler-sdk-memcheck rocprofiler-sdk::rocprofiler-sdk-common-library diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/buffer_tracing.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/buffer_tracing.cpp index 1dad1a567a..01ad9b885f 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/buffer_tracing.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/buffer_tracing.cpp @@ -34,6 +34,7 @@ #include "lib/rocprofiler-sdk/page_migration/page_migration.hpp" #include "lib/rocprofiler-sdk/rccl/rccl.hpp" #include "lib/rocprofiler-sdk/registration.hpp" +#include "lib/rocprofiler-sdk/rocdecode/rocdecode.hpp" #include "lib/rocprofiler-sdk/runtime_initialization.hpp" #include @@ -41,6 +42,7 @@ #include #include #include +#include #include #include @@ -91,6 +93,7 @@ ROCPROFILER_BUFFER_TRACING_KIND_STRING(CORRELATION_ID_RETIREMENT) ROCPROFILER_BUFFER_TRACING_KIND_STRING(RCCL_API) ROCPROFILER_BUFFER_TRACING_KIND_STRING(OMPT) ROCPROFILER_BUFFER_TRACING_KIND_STRING(RUNTIME_INITIALIZATION) +ROCPROFILER_BUFFER_TRACING_KIND_STRING(ROCDECODE_API) template std::pair @@ -288,6 +291,11 @@ rocprofiler_query_buffer_tracing_kind_operation_name(rocprofiler_buffer_tracing_ { return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED; } + case ROCPROFILER_BUFFER_TRACING_ROCDECODE_API: + { + val = rocprofiler::rocdecode::name_by_id(operation); + break; + } }; if(!val) @@ -419,6 +427,11 @@ rocprofiler_iterate_buffer_tracing_kind_operations( { return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED; } + case ROCPROFILER_BUFFER_TRACING_ROCDECODE_API: + { + ops = rocprofiler::rocdecode::get_ids(); + break; + } } for(const auto& itr : ops) diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/callback_tracing.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/callback_tracing.cpp index f3f65a99f4..713535216a 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/callback_tracing.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/callback_tracing.cpp @@ -33,6 +33,7 @@ #include "lib/rocprofiler-sdk/ompt/ompt.hpp" #include "lib/rocprofiler-sdk/rccl/rccl.hpp" #include "lib/rocprofiler-sdk/registration.hpp" +#include "lib/rocprofiler-sdk/rocdecode/rocdecode.hpp" #include "lib/rocprofiler-sdk/runtime_initialization.hpp" #include @@ -41,6 +42,7 @@ #include #include #include +#include #include #include @@ -88,6 +90,7 @@ ROCPROFILER_CALLBACK_TRACING_KIND_STRING(MEMORY_ALLOCATION) ROCPROFILER_CALLBACK_TRACING_KIND_STRING(RCCL_API) ROCPROFILER_CALLBACK_TRACING_KIND_STRING(OMPT) ROCPROFILER_CALLBACK_TRACING_KIND_STRING(RUNTIME_INITIALIZATION) +ROCPROFILER_CALLBACK_TRACING_KIND_STRING(ROCDECODE_API) template std::pair @@ -269,6 +272,12 @@ rocprofiler_query_callback_tracing_kind_operation_name(rocprofiler_callback_trac case ROCPROFILER_CALLBACK_TRACING_RUNTIME_INITIALIZATION: { val = rocprofiler::runtime_init::name_by_id(operation); + break; + } + case ROCPROFILER_CALLBACK_TRACING_ROCDECODE_API: + { + val = rocprofiler::rocdecode::name_by_id(operation); + break; } }; @@ -397,6 +406,12 @@ rocprofiler_iterate_callback_tracing_kind_operations( case ROCPROFILER_CALLBACK_TRACING_RUNTIME_INITIALIZATION: { ops = rocprofiler::runtime_init::get_ids(); + break; + } + case ROCPROFILER_CALLBACK_TRACING_ROCDECODE_API: + { + ops = rocprofiler::rocdecode::get_ids(); + break; } }; @@ -539,6 +554,7 @@ rocprofiler_iterate_callback_tracing_kind_operation_args( case ROCPROFILER_CALLBACK_TRACING_MEMORY_COPY: case ROCPROFILER_CALLBACK_TRACING_MEMORY_ALLOCATION: case ROCPROFILER_CALLBACK_TRACING_RCCL_API: + case ROCPROFILER_CALLBACK_TRACING_ROCDECODE_API: case ROCPROFILER_CALLBACK_TRACING_RUNTIME_INITIALIZATION: { return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED; diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/intercept_table.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/intercept_table.cpp index b3507e1519..8bee97c169 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/intercept_table.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/intercept_table.cpp @@ -34,6 +34,7 @@ #include #include #include "lib/rocprofiler-sdk/rccl/rccl.hpp" +#include "lib/rocprofiler-sdk/rocdecode/rocdecode.hpp" #include #include @@ -57,7 +58,8 @@ constexpr auto intercept_library_seq = library_sequence_t{}; + ROCPROFILER_RCCL_TABLE, + ROCPROFILER_ROCDECODE_TABLE>{}; // check that intercept_library_seq is up to date static_assert((1 << (intercept_library_seq.size() - 1)) == ROCPROFILER_TABLE_LAST, @@ -192,6 +194,11 @@ template void notify_intercept_table_registration(rocprofiler_intercept_table_t, uint64_t, uint64_t, std::tuple); + +template void notify_intercept_table_registration(rocprofiler_intercept_table_t, + uint64_t, + uint64_t, + std::tuple); } // namespace intercept_table } // namespace rocprofiler diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/internal_threading.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/internal_threading.cpp index af9c389c38..a60c3a947b 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/internal_threading.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/internal_threading.cpp @@ -122,7 +122,8 @@ constexpr auto creation_notifier_library_seq = library_sequence_t{}; + ROCPROFILER_RCCL_LIBRARY, + ROCPROFILER_ROCDECODE_LIBRARY>{}; // check that creation_notifier_library_seq is up to date static_assert((1 << (creation_notifier_library_seq.size() - 1)) == ROCPROFILER_LIBRARY_LAST, diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/registration.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/registration.cpp index 76988c68ee..fb99e63c8e 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/registration.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/registration.cpp @@ -46,6 +46,7 @@ #include "lib/rocprofiler-sdk/pc_sampling/code_object.hpp" #include "lib/rocprofiler-sdk/pc_sampling/service.hpp" #include "lib/rocprofiler-sdk/rccl/rccl.hpp" +#include "lib/rocprofiler-sdk/rocdecode/rocdecode.hpp" #include "lib/rocprofiler-sdk/runtime_initialization.hpp" #include @@ -908,6 +909,30 @@ rocprofiler_set_api_table(const char* name, rocprofiler::intercept_table::notify_intercept_table_registration( ROCPROFILER_RCCL_TABLE, lib_version, lib_instance, std::make_tuple(rccl_api)); } + else if(std::string_view{name} == "rocdecode") + { + // pass to rocdecode init + ROCP_ERROR_IF(num_tables > 1) + << "rocprofiler expected ROCDecode library to pass 1 API table, not " << num_tables; + + auto* rocdecode_api = static_cast(tables[0]); + + // any internal modifications to the rocdecodeApiFuncTable need to be done before we make + // the copy or else those modifications will be lost when ROCDecode API tracing is enabled + // because the ROCDecode API tracing invokes the function pointers from the copy below + rocprofiler::rocdecode::copy_table(rocdecode_api, lib_instance); + + // install rocprofiler API wrappers + rocprofiler::rocdecode::update_table(rocdecode_api); + + // Tracing notifications the runtime has initialized + rocprofiler::runtime_init::initialize( + ROCPROFILER_RUNTIME_INITIALIZATION_ROCDECODE, lib_version, lib_instance); + + // allow tools to install API wrappers + rocprofiler::intercept_table::notify_intercept_table_registration( + ROCPROFILER_ROCDECODE_TABLE, lib_version, lib_instance, std::make_tuple(rocdecode_api)); + } else { ROCP_ERROR << "rocprofiler does not accept API tables from " << name; diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rocdecode/CMakeLists.txt b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rocdecode/CMakeLists.txt new file mode 100644 index 0000000000..3eeba36b33 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rocdecode/CMakeLists.txt @@ -0,0 +1,6 @@ +set(ROCPROFILER_LIB_ROCDECODE_SOURCES abi.cpp rocdecode.cpp) +set(ROCPROFILER_LIB_ROCDECODE_HEADERS defines.hpp rocdecode.hpp) + +target_sources( + rocprofiler-sdk-object-library PRIVATE ${ROCPROFILER_LIB_ROCDECODE_SOURCES} + ${ROCPROFILER_LIB_ROCDECODE_HEADERS}) diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rocdecode/abi.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rocdecode/abi.cpp new file mode 100644 index 0000000000..8579c2b9b2 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rocdecode/abi.cpp @@ -0,0 +1,67 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "lib/rocprofiler-sdk/rocdecode/rocdecode.hpp" + +#include "lib/common/abi.hpp" +#include "lib/common/defines.hpp" + +#include +#include + +namespace rocprofiler +{ +namespace rocdecode +{ +static_assert(ROCDECODE_RUNTIME_API_TABLE_MAJOR_VERSION == 0, + "Major version updated for ROCDecode dispatch table"); + +#if ROCDECODE_RUNTIME_API_TABLE_STEP_VERSION == 0 +ROCP_SDK_ENFORCE_ABI_VERSIONING(::RocDecodeDispatchTable, 11); +#endif + +#if ROCDECODE_RUNTIME_API_TABLE_STEP_VERSION == 1 +ROCP_SDK_ENFORCE_ABI_VERSIONING(::RocDecodeDispatchTable, 16); +#endif + +ROCP_SDK_ENFORCE_ABI(::RocDecodeDispatchTable, pfn_rocdec_create_video_parser, 0) +ROCP_SDK_ENFORCE_ABI(::RocDecodeDispatchTable, pfn_rocdec_parse_video_data, 1) +ROCP_SDK_ENFORCE_ABI(::RocDecodeDispatchTable, pfn_rocdec_destroy_video_parser, 2) +ROCP_SDK_ENFORCE_ABI(::RocDecodeDispatchTable, pfn_rocdec_create_decoder, 3) +ROCP_SDK_ENFORCE_ABI(::RocDecodeDispatchTable, pfn_rocdec_destroy_decoder, 4) +ROCP_SDK_ENFORCE_ABI(::RocDecodeDispatchTable, pfn_rocdec_get_gecoder_caps, 5) +ROCP_SDK_ENFORCE_ABI(::RocDecodeDispatchTable, pfn_rocdec_decode_frame, 6) +ROCP_SDK_ENFORCE_ABI(::RocDecodeDispatchTable, pfn_rocdec_get_decode_status, 7) +ROCP_SDK_ENFORCE_ABI(::RocDecodeDispatchTable, pfn_rocdec_reconfigure_decoder, 8) +ROCP_SDK_ENFORCE_ABI(::RocDecodeDispatchTable, pfn_rocdec_get_video_frame, 9) +ROCP_SDK_ENFORCE_ABI(::RocDecodeDispatchTable, pfn_rocdec_get_error_name, 10) + +#if ROCDECODE_RUNTIME_API_TABLE_STEP_VERSION >= 1 +ROCP_SDK_ENFORCE_ABI(::RocDecodeDispatchTable, pfn_rocdec_create_bitstream_reader, 11); +ROCP_SDK_ENFORCE_ABI(::RocDecodeDispatchTable, pfn_rocdec_get_bitstream_codec_type, 12); +ROCP_SDK_ENFORCE_ABI(::RocDecodeDispatchTable, pfn_rocdec_get_bitstream_bit_depth, 13); +ROCP_SDK_ENFORCE_ABI(::RocDecodeDispatchTable, pfn_rocdec_get_bitstream_pic_data, 14); +ROCP_SDK_ENFORCE_ABI(::RocDecodeDispatchTable, pfn_rocdec_destroy_bitstream_reader, 15); +#endif + +} // namespace rocdecode +} // namespace rocprofiler diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rocdecode/defines.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rocdecode/defines.hpp new file mode 100644 index 0000000000..f45885d51e --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rocdecode/defines.hpp @@ -0,0 +1,216 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include "lib/common/defines.hpp" + +#define ROCDECODE_API_INFO_DEFINITION_0( \ + ROCDECODE_TABLE, ROCDECODE_API_ID, ROCDECODE_FUNC, ROCDECODE_FUNC_PTR) \ + namespace rocprofiler \ + { \ + namespace rocdecode \ + { \ + template <> \ + struct rocdecode_api_info \ + : rocdecode_domain_info \ + { \ + static constexpr auto table_idx = ROCDECODE_TABLE; \ + static constexpr auto operation_idx = ROCDECODE_API_ID; \ + static constexpr auto name = #ROCDECODE_FUNC; \ + \ + using domain_type = rocdecode_domain_info; \ + using this_type = rocdecode_api_info; \ + using base_type = rocdecode_api_impl; \ + \ + using domain_type::callback_domain_idx; \ + using domain_type::buffered_domain_idx; \ + using domain_type::args_type; \ + using domain_type::retval_type; \ + using domain_type::callback_data_type; \ + \ + static constexpr auto offset() \ + { \ + return offsetof(rocdecode_table_lookup::type, ROCDECODE_FUNC_PTR); \ + } \ + \ + static_assert(offsetof(rocdecode_table_lookup::type, ROCDECODE_FUNC_PTR) == \ + (sizeof(size_t) + (operation_idx * sizeof(void*))), \ + "ABI error for " #ROCDECODE_FUNC); \ + \ + static auto& get_table() { return rocdecode_table_lookup{}(); } \ + \ + template \ + static auto& get_table(TableT& _v) \ + { \ + return rocdecode_table_lookup{}(_v); \ + } \ + \ + template \ + static auto& get_table_func(TableT& _table) \ + { \ + if constexpr(std::is_pointer::value) \ + { \ + assert(_table != nullptr && "nullptr to MARKER table for " #ROCDECODE_FUNC \ + " function"); \ + return _table->ROCDECODE_FUNC_PTR; \ + } \ + else \ + { \ + return _table.ROCDECODE_FUNC_PTR; \ + } \ + } \ + \ + static auto& get_table_func() { return get_table_func(get_table()); } \ + \ + template \ + static auto& get_api_data_args(DataT& _data) \ + { \ + return _data.ROCDECODE_FUNC; \ + } \ + \ + template \ + static auto get_functor(RetT (*)(Args...)) \ + { \ + return &base_type::functor; \ + } \ + \ + static std::vector as_arg_addr(callback_data_type) { return std::vector{}; } \ + \ + static std::vector as_arg_list(callback_data_type, int32_t) \ + { \ + return {}; \ + } \ + }; \ + } \ + } + +#define ROCDECODE_API_INFO_DEFINITION_V( \ + ROCDECODE_TABLE, ROCDECODE_API_ID, ROCDECODE_FUNC, ROCDECODE_FUNC_PTR, ...) \ + namespace rocprofiler \ + { \ + namespace rocdecode \ + { \ + template <> \ + struct rocdecode_api_info \ + : rocdecode_domain_info \ + { \ + static constexpr auto table_idx = ROCDECODE_TABLE; \ + static constexpr auto operation_idx = ROCDECODE_API_ID; \ + static constexpr auto name = #ROCDECODE_FUNC; \ + \ + using domain_type = rocdecode_domain_info; \ + using this_type = rocdecode_api_info; \ + using base_type = rocdecode_api_impl; \ + \ + static constexpr auto callback_domain_idx = domain_type::callback_domain_idx; \ + static constexpr auto buffered_domain_idx = domain_type::buffered_domain_idx; \ + \ + using domain_type::args_type; \ + using domain_type::retval_type; \ + using domain_type::callback_data_type; \ + \ + static constexpr auto offset() \ + { \ + return offsetof(rocdecode_table_lookup::type, ROCDECODE_FUNC_PTR); \ + } \ + \ + static_assert(offsetof(rocdecode_table_lookup::type, ROCDECODE_FUNC_PTR) == \ + (sizeof(size_t) + (operation_idx * sizeof(void*))), \ + "ABI error for " #ROCDECODE_FUNC); \ + \ + static auto& get_table() { return rocdecode_table_lookup{}(); } \ + \ + template \ + static auto& get_table(TableT& _v) \ + { \ + return rocdecode_table_lookup{}(_v); \ + } \ + \ + template \ + static auto& get_table_func(TableT& _table) \ + { \ + if constexpr(std::is_pointer::value) \ + { \ + assert(_table != nullptr && "nullptr to MARKER table for " #ROCDECODE_FUNC \ + " function"); \ + return _table->ROCDECODE_FUNC_PTR; \ + } \ + else \ + { \ + return _table.ROCDECODE_FUNC_PTR; \ + } \ + } \ + \ + static auto& get_table_func() { return get_table_func(get_table()); } \ + \ + template \ + static auto& get_api_data_args(DataT& _data) \ + { \ + return _data.ROCDECODE_FUNC; \ + } \ + \ + template \ + static auto get_functor(RetT (*)(Args...)) \ + { \ + return &base_type::functor; \ + } \ + \ + static std::vector as_arg_addr(callback_data_type trace_data) \ + { \ + return std::vector{ \ + GET_ADDR_MEMBER_FIELDS(get_api_data_args(trace_data.args), __VA_ARGS__)}; \ + } \ + }; \ + } \ + } + +#define ROCDECODE_API_TABLE_LOOKUP_DEFINITION(TABLE_ID, TYPE) \ + namespace rocprofiler \ + { \ + namespace rocdecode \ + { \ + namespace \ + { \ + template <> \ + auto* get_table() \ + { \ + return get_table_impl(); \ + } \ + } \ + \ + template <> \ + struct rocdecode_table_lookup \ + { \ + using type = TYPE; \ + auto& operator()(type& _v) const { return _v; } \ + auto& operator()(type* _v) const { return *_v; } \ + auto& operator()() const { return (*this)(get_table()); } \ + }; \ + \ + template <> \ + struct rocdecode_table_id_lookup \ + { \ + static constexpr auto value = TABLE_ID; \ + }; \ + } \ + } diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rocdecode/rocdecode.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rocdecode/rocdecode.cpp new file mode 100644 index 0000000000..38b1e9bfec --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rocdecode/rocdecode.cpp @@ -0,0 +1,560 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "lib/rocprofiler-sdk/rocdecode/rocdecode.hpp" +#include "lib/common/defines.hpp" +#include "lib/common/static_object.hpp" +#include "lib/rocprofiler-sdk/buffer.hpp" +#include "lib/rocprofiler-sdk/context/context.hpp" +#include "lib/rocprofiler-sdk/hip/hip.hpp" +#include "lib/rocprofiler-sdk/hip/utils.hpp" +#include "lib/rocprofiler-sdk/registration.hpp" +#include "lib/rocprofiler-sdk/tracing/tracing.hpp" + +#include +#include +#include +#include + +#include +#include +// must be included after runtime api +#include + +#include +#include +#include +#include + +namespace rocprofiler +{ +namespace rocdecode +{ +namespace +{ +struct null_type +{}; + +template +auto +get_default_retval() +{ + if constexpr(std::is_pointer::value) + { + Tp v = nullptr; + return v; + } + else if constexpr(std::is_same::value) + return ROCDEC_RUNTIME_ERROR; + else if constexpr(std::is_same::value) + return "UnknownString"; + else + static_assert(std::is_empty::value, "Error! unsupported return type"); +} + +template +void +set_data_retval(DataT& _data, Tp _val) +{ + if constexpr(std::is_same::value) + { + _data.rocDecStatus_retval = _val; + } + else if constexpr(std::is_same::value) + { + _data.const_charp_retval = _val; + } + else + { + static_assert(std::is_empty::value, "Error! unsupported return type"); + } +} + +template +Tp* +get_table_impl() +{ + static auto*& _v = common::static_object::construct(common::init_public_api_struct(Tp{})); + return _v; +} + +template +auto* +get_table(); + +} // namespace + +template +template +auto +rocdecode_api_impl::set_data_args(DataArgsT& _data_args, Args... args) +{ + if constexpr(sizeof...(Args) == 0) + _data_args.no_args.empty = '\0'; + else + _data_args = DataArgsT{args...}; +} + +template +template +auto +rocdecode_api_impl::exec(FuncT&& _func, Args&&... args) +{ + using return_type = std::decay_t>; + + if(_func) + { + if constexpr(std::is_void::value) + { + _func(std::forward(args)...); + return null_type{}; + } + else + { + return _func(std::forward(args)...); + } + } + + using info_type = rocdecode_api_info; + ROCP_ERROR << "nullptr to next rocdecode function for " << info_type::name << " (" + << info_type::operation_idx << ")"; + + return get_default_retval(); +} + +template +template +RetT +rocdecode_api_impl::functor(Args... args) +{ + using info_type = rocdecode_api_info; + using callback_api_data_t = typename rocdecode_domain_info::callback_data_type; + using buffered_api_data_t = typename rocdecode_domain_info::buffer_data_type; + + constexpr auto external_corr_id_domain_idx = + rocdecode_domain_info::external_correlation_id_domain_idx; + + if(registration::get_fini_status() != 0) + { + [[maybe_unused]] auto _ret = exec(info_type::get_table_func(), std::forward(args)...); + if constexpr(!std::is_void::value) + return _ret; + else + return; + } + + constexpr auto ref_count = 2; + auto thr_id = common::get_tid(); + auto callback_contexts = tracing::callback_context_data_vec_t{}; + auto buffered_contexts = tracing::buffered_context_data_vec_t{}; + auto external_corr_ids = tracing::external_correlation_id_map_t{}; + + tracing::populate_contexts(info_type::callback_domain_idx, + info_type::buffered_domain_idx, + info_type::operation_idx, + callback_contexts, + buffered_contexts, + external_corr_ids); + + if(callback_contexts.empty() && buffered_contexts.empty()) + { + [[maybe_unused]] auto _ret = exec(info_type::get_table_func(), std::forward(args)...); + if constexpr(!std::is_void::value) + return _ret; + else + return; + } + + auto buffer_record = common::init_public_api_struct(buffered_api_data_t{}); + auto tracer_data = common::init_public_api_struct(callback_api_data_t{}); + auto* corr_id = tracing::correlation_service::construct(ref_count); + auto internal_corr_id = corr_id->internal; + + tracing::populate_external_correlation_ids(external_corr_ids, + thr_id, + external_corr_id_domain_idx, + info_type::operation_idx, + internal_corr_id); + + // invoke the callbacks + if(!callback_contexts.empty()) + { + set_data_args(info_type::get_api_data_args(tracer_data.args), std::forward(args)...); + + tracing::execute_phase_enter_callbacks(callback_contexts, + thr_id, + internal_corr_id, + external_corr_ids, + info_type::callback_domain_idx, + info_type::operation_idx, + tracer_data); + } + + // enter callback may update the external correlation id field + tracing::update_external_correlation_ids( + external_corr_ids, thr_id, external_corr_id_domain_idx); + + // record the start timestamp as close to the function call as possible + if(!buffered_contexts.empty()) + { + buffer_record.start_timestamp = common::timestamp_ns(); + } + + // decrement the reference count before invoking + corr_id->sub_ref_count(); + + auto _ret = exec(info_type::get_table_func(), std::forward(args)...); + + // record the end timestamp as close to the function call as possible + if(!buffered_contexts.empty()) + { + buffer_record.end_timestamp = common::timestamp_ns(); + } + + if(!callback_contexts.empty()) + { + set_data_retval(tracer_data.retval, _ret); + + tracing::execute_phase_exit_callbacks(callback_contexts, + external_corr_ids, + info_type::callback_domain_idx, + info_type::operation_idx, + tracer_data); + } + + if(!buffered_contexts.empty()) + { + tracing::execute_buffer_record_emplace(buffered_contexts, + thr_id, + internal_corr_id, + external_corr_ids, + info_type::buffered_domain_idx, + info_type::operation_idx, + buffer_record); + } + + // decrement the reference count after usage in the callback/buffers + corr_id->sub_ref_count(); + + context::pop_latest_correlation_id(corr_id); + + if constexpr(!std::is_void::value) return _ret; +} +} // namespace rocdecode +} // namespace rocprofiler + +#define ROCPROFILER_LIB_ROCPROFILER_SDK_ROCDECODE_ROCDECODE_CPP_IMPL 1 + +// template specializations +#include "rocdecode.def.cpp" + +namespace rocprofiler +{ +namespace rocdecode +{ +namespace +{ +template +const char* +name_by_id(const uint32_t id, std::index_sequence) +{ + if(OpIdx == id) return rocdecode_api_info::name; + + if constexpr(sizeof...(OpIdxTail) > 0) + return name_by_id(id, std::index_sequence{}); + else + return nullptr; +} + +template +uint32_t +id_by_name(const char* name, std::index_sequence) +{ + if(std::string_view{rocdecode_api_info::name} == std::string_view{name}) + return rocdecode_api_info::operation_idx; + + if constexpr(sizeof...(OpIdxTail) > 0) + return id_by_name(name, std::index_sequence{}); + else + return rocdecode_domain_info::none; +} + +template +void +get_ids(std::vector& _id_list, std::index_sequence) +{ + auto _idx = rocdecode_api_info::operation_idx; + if(_idx < rocdecode_domain_info::last) _id_list.emplace_back(_idx); + + if constexpr(sizeof...(OpIdxTail) > 0) + get_ids(_id_list, std::index_sequence{}); +} + +template +void +get_names(std::vector& _name_list, std::index_sequence) +{ + auto&& _name = rocdecode_api_info::name; + if(_name != nullptr && strnlen(_name, 1) > 0) _name_list.emplace_back(_name); + + if constexpr(sizeof...(OpIdxTail) > 0) + get_names(_name_list, std::index_sequence{}); +} + +template +void +iterate_args(const uint32_t id, + const DataT& data, + rocprofiler_callback_tracing_operation_args_cb_t func, + int32_t max_deref, + void* user_data, + std::index_sequence) +{ + if(OpIdx == id) + { + using info_type = rocdecode_api_info; + auto&& arg_list = info_type::as_arg_list(data, max_deref); + auto&& arg_addr = info_type::as_arg_addr(data); + for(size_t i = 0; i < std::min(arg_list.size(), arg_addr.size()); ++i) + { + auto ret = func(info_type::callback_domain_idx, // kind + id, // operation + i, // arg_number + arg_addr.at(i), // arg_value_addr + arg_list.at(i).indirection_level, // indirection + arg_list.at(i).type, // arg_type + arg_list.at(i).name, // arg_name + arg_list.at(i).value.c_str(), // arg_value_str + arg_list.at(i).dereference_count, // num deref in str + user_data); + if(ret != 0) break; + } + return; + } + if constexpr(sizeof...(OpIdxTail) > 0) + iterate_args( + id, data, func, max_deref, user_data, std::index_sequence{}); +} + +bool +should_wrap_functor(rocprofiler_callback_tracing_kind_t _callback_domain, + rocprofiler_buffer_tracing_kind_t _buffered_domain, + int _operation) +{ + // we loop over all the *registered* contexts and see if any of them, at any point in time, + // might require callback or buffered API tracing + for(const auto& itr : context::get_registered_contexts()) + { + if(!itr) continue; + + // if there is a callback tracer enabled for the given domain and op, we need to wrap + if(itr->callback_tracer && itr->callback_tracer->domains(_callback_domain) && + itr->callback_tracer->domains(_callback_domain, _operation)) + return true; + + // if there is a buffered tracer enabled for the given domain and op, we need to wrap + if(itr->buffered_tracer && itr->buffered_tracer->domains(_buffered_domain) && + itr->buffered_tracer->domains(_buffered_domain, _operation)) + return true; + } + return false; +} + +template +void +copy_table(Tp* _orig, uint64_t _tbl_instance, std::integral_constant) +{ + using table_type = typename rocdecode_table_lookup::type; + + if constexpr(std::is_same::value) + { + auto _info = rocdecode_api_info{}; + + // make sure we don't access a field that doesn't exist in input table + if(_info.offset() >= _orig->size) return; + + // 1. get the sub-table containing the function pointer in original table + // 2. get reference to function pointer in sub-table in original table + auto& _orig_table = _info.get_table(_orig); + auto& _orig_func = _info.get_table_func(_orig_table); + // 3. get the sub-table containing the function pointer in saved table + // 4. get reference to function pointer in sub-table in saved table + // 5. save the original function in the saved table + auto& _copy_table = _info.get_table(*get_table()); + auto& _copy_func = _info.get_table_func(_copy_table); + + ROCP_FATAL_IF(_copy_func && _tbl_instance == 0) + << _info.name << " has non-null function pointer " << _copy_func + << " despite this being the first instance of the library being copies"; + + if(!_copy_func) + { + ROCP_TRACE << "copying table entry for " << _info.name; + _copy_func = _orig_func; + } + else + { + ROCP_TRACE << "skipping copying table entry for " << _info.name + << " from table instance " << _tbl_instance; + } + } +} + +template +void +update_table(Tp* _orig, std::integral_constant) +{ + using table_type = typename rocdecode_table_lookup::type; + + if constexpr(std::is_same::value) + { + auto _info = rocdecode_api_info{}; + + // make sure we don't access a field that doesn't exist in input table + if(_info.offset() >= _orig->size) return; + + // check to see if there are any contexts which enable this operation in the HIP API domain + if(!should_wrap_functor( + _info.callback_domain_idx, _info.buffered_domain_idx, _info.operation_idx)) + return; + + ROCP_TRACE << "updating table entry for " << _info.name; + + // 1. get the sub-table containing the function pointer in original table + // 2. get reference to function pointer in sub-table in original table + // 3. update function pointer with wrapper + auto& _table = _info.get_table(_orig); + auto& _func = _info.get_table_func(_table); + _func = _info.get_functor(_func); + } +} + +template +void +copy_table(Tp* _orig, uint64_t _tbl_instance, std::index_sequence) +{ + copy_table(_orig, _tbl_instance, std::integral_constant{}); + if constexpr(sizeof...(OpIdxTail) > 0) + copy_table(_orig, _tbl_instance, std::index_sequence{}); +} + +template +void +update_table(Tp* _orig, std::index_sequence) +{ + update_table(_orig, std::integral_constant{}); + if constexpr(sizeof...(OpIdxTail) > 0) + update_table(_orig, std::index_sequence{}); +} +} // namespace + +// check out the assembly here... this compiles to a switch statement +template +const char* +name_by_id(uint32_t id) +{ + return name_by_id(id, + std::make_index_sequence::last>{}); +} + +template +uint32_t +id_by_name(const char* name) +{ + return id_by_name(name, + std::make_index_sequence::last>{}); +} + +template +std::vector +get_ids() +{ + constexpr auto last_api_id = rocdecode_domain_info::last; + auto _data = std::vector{}; + _data.reserve(last_api_id); + get_ids(_data, std::make_index_sequence{}); + return _data; +} + +template +std::vector +get_names() +{ + constexpr auto last_api_id = rocdecode_domain_info::last; + auto _data = std::vector{}; + _data.reserve(last_api_id); + get_names(_data, std::make_index_sequence{}); + return _data; +} + +template +void +iterate_args(uint32_t id, + const rocprofiler_callback_tracing_rocdecode_api_data_t& data, + rocprofiler_callback_tracing_operation_args_cb_t callback, + int32_t max_deref, + void* user_data) +{ + if(callback) + iterate_args(id, + data, + callback, + max_deref, + user_data, + std::make_index_sequence::last>{}); +} + +template +void +copy_table(TableT* _orig, uint64_t _tbl_instance) +{ + constexpr auto TableIdx = rocdecode_table_id_lookup::value; + if(_orig) + copy_table(_orig, + _tbl_instance, + std::make_index_sequence::last>{}); +} + +template +void +update_table(TableT* _orig) +{ + constexpr auto TableIdx = rocdecode_table_id_lookup::value; + if(_orig) + update_table(_orig, + std::make_index_sequence::last>{}); +} + +using rocdecode_api_data_t = rocprofiler_callback_tracing_rocdecode_api_data_t; +using rocdecode_op_args_cb_t = rocprofiler_callback_tracing_operation_args_cb_t; + +#define INSTANTIATE_ROCDECODE_TABLE_FUNC(TABLE_TYPE, TABLE_IDX) \ + template void copy_table(TABLE_TYPE * _tbl, uint64_t _instv); \ + template void update_table(TABLE_TYPE * _tbl); \ + template const char* name_by_id(uint32_t); \ + template uint32_t id_by_name(const char*); \ + template std::vector get_ids(); \ + template std::vector get_names(); + +INSTANTIATE_ROCDECODE_TABLE_FUNC(rocdecode_api_func_table_t, ROCPROFILER_ROCDECODE_TABLE_ID) +} // namespace rocdecode +} // namespace rocprofiler diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rocdecode/rocdecode.def.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rocdecode/rocdecode.def.cpp new file mode 100644 index 0000000000..51c81f3f22 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rocdecode/rocdecode.def.cpp @@ -0,0 +1,90 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "lib/rocprofiler-sdk/rocdecode/defines.hpp" +#include "lib/rocprofiler-sdk/rocdecode/rocdecode.hpp" + +#include +#include +#include +#include + +namespace rocprofiler +{ +namespace rocdecode +{ +template <> +struct rocdecode_domain_info +{ + using args_type = rocprofiler_rocdecode_api_args_t; + using retval_type = rocprofiler_rocdecode_api_retval_t; + using callback_data_type = rocprofiler_callback_tracing_rocdecode_api_data_t; + using buffer_data_type = rocprofiler_buffer_tracing_rocdecode_api_record_t; +}; + +template <> +struct rocdecode_domain_info +: rocdecode_domain_info +{ + using enum_type = rocprofiler_marker_core_api_id_t; + static constexpr auto callback_domain_idx = ROCPROFILER_CALLBACK_TRACING_ROCDECODE_API; + static constexpr auto buffered_domain_idx = ROCPROFILER_BUFFER_TRACING_ROCDECODE_API; + static constexpr auto none = ROCPROFILER_ROCDECODE_API_ID_NONE; + static constexpr auto last = ROCPROFILER_ROCDECODE_API_ID_LAST; + static constexpr auto external_correlation_id_domain_idx = + ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_ROCDECODE_API; +}; + +} // namespace rocdecode +} // namespace rocprofiler + +#if defined(ROCPROFILER_LIB_ROCPROFILER_SDK_ROCDECODE_ROCDECODE_CPP_IMPL) && \ + ROCPROFILER_LIB_ROCPROFILER_SDK_ROCDECODE_ROCDECODE_CPP_IMPL == 1 + +// clang-format off +ROCDECODE_API_TABLE_LOOKUP_DEFINITION(ROCPROFILER_ROCDECODE_TABLE_ID, rocdecode_api_func_table_t) + +ROCDECODE_API_INFO_DEFINITION_V(ROCPROFILER_ROCDECODE_TABLE_ID, ROCPROFILER_ROCDECODE_API_ID_rocDecCreateVideoParser, rocDecCreateVideoParser, pfn_rocdec_create_video_parser, parser_handle, params) +ROCDECODE_API_INFO_DEFINITION_V(ROCPROFILER_ROCDECODE_TABLE_ID, ROCPROFILER_ROCDECODE_API_ID_rocDecParseVideoData, rocDecParseVideoData, pfn_rocdec_parse_video_data, parser_handle, packet) +ROCDECODE_API_INFO_DEFINITION_V(ROCPROFILER_ROCDECODE_TABLE_ID, ROCPROFILER_ROCDECODE_API_ID_rocDecDestroyVideoParser, rocDecDestroyVideoParser, pfn_rocdec_destroy_video_parser, parser_handle) +ROCDECODE_API_INFO_DEFINITION_V(ROCPROFILER_ROCDECODE_TABLE_ID, ROCPROFILER_ROCDECODE_API_ID_rocDecCreateDecoder, rocDecCreateDecoder, pfn_rocdec_create_decoder, decoder_handle, decoder_create_info) +ROCDECODE_API_INFO_DEFINITION_V(ROCPROFILER_ROCDECODE_TABLE_ID, ROCPROFILER_ROCDECODE_API_ID_rocDecDestroyDecoder, rocDecDestroyDecoder, pfn_rocdec_destroy_decoder, decoder_handle) +ROCDECODE_API_INFO_DEFINITION_V(ROCPROFILER_ROCDECODE_TABLE_ID, ROCPROFILER_ROCDECODE_API_ID_rocDecGetDecoderCaps, rocDecGetDecoderCaps, pfn_rocdec_get_gecoder_caps, decode_caps) +ROCDECODE_API_INFO_DEFINITION_V(ROCPROFILER_ROCDECODE_TABLE_ID, ROCPROFILER_ROCDECODE_API_ID_rocDecDecodeFrame, rocDecDecodeFrame, pfn_rocdec_decode_frame, decoder_handle, pic_params) +ROCDECODE_API_INFO_DEFINITION_V(ROCPROFILER_ROCDECODE_TABLE_ID, ROCPROFILER_ROCDECODE_API_ID_rocDecGetDecodeStatus, rocDecGetDecodeStatus, pfn_rocdec_get_decode_status, decoder_handle, pic_idx, decode_status) +ROCDECODE_API_INFO_DEFINITION_V(ROCPROFILER_ROCDECODE_TABLE_ID, ROCPROFILER_ROCDECODE_API_ID_rocDecReconfigureDecoder, rocDecReconfigureDecoder, pfn_rocdec_reconfigure_decoder, decoder_handle, reconfig_params) +ROCDECODE_API_INFO_DEFINITION_V(ROCPROFILER_ROCDECODE_TABLE_ID, ROCPROFILER_ROCDECODE_API_ID_rocDecGetVideoFrame, rocDecGetVideoFrame, pfn_rocdec_get_video_frame, decoder_handle, pic_idx, dev_mem_ptr, horizontal_pitch, vid_postproc_params) +ROCDECODE_API_INFO_DEFINITION_V(ROCPROFILER_ROCDECODE_TABLE_ID, ROCPROFILER_ROCDECODE_API_ID_rocDecGetErrorName, rocDecGetErrorName, pfn_rocdec_get_error_name, rocdec_status) + +#if ROCDECODE_RUNTIME_API_TABLE_STEP_VERSION >= 1 +ROCDECODE_API_INFO_DEFINITION_V(ROCPROFILER_ROCDECODE_TABLE_ID, ROCPROFILER_ROCDECODE_API_ID_rocDecCreateBitstreamReader, rocDecCreateBitstreamReader, pfn_rocdec_create_bitstream_reader, bs_reader_handle, input_file_path); +ROCDECODE_API_INFO_DEFINITION_V(ROCPROFILER_ROCDECODE_TABLE_ID, ROCPROFILER_ROCDECODE_API_ID_rocDecGetBitstreamCodecType, rocDecGetBitstreamCodecType, pfn_rocdec_get_bitstream_codec_type, bs_reader_handle, codec_type); +ROCDECODE_API_INFO_DEFINITION_V(ROCPROFILER_ROCDECODE_TABLE_ID, ROCPROFILER_ROCDECODE_API_ID_rocDecGetBitstreamBitDepth, rocDecGetBitstreamBitDepth, pfn_rocdec_get_bitstream_bit_depth, bs_reader_handle, bit_depth); +ROCDECODE_API_INFO_DEFINITION_V(ROCPROFILER_ROCDECODE_TABLE_ID, ROCPROFILER_ROCDECODE_API_ID_rocDecGetBitstreamPicData, rocDecGetBitstreamPicData, pfn_rocdec_get_bitstream_pic_data, bs_reader_handle, pic_data, pic_size, pts); +ROCDECODE_API_INFO_DEFINITION_V(ROCPROFILER_ROCDECODE_TABLE_ID, ROCPROFILER_ROCDECODE_API_ID_rocDecDestroyBitstreamReader, rocDecDestroyBitstreamReader, pfn_rocdec_destroy_bitstream_reader, bs_reader_handle); +#endif +#else +# error \ + "Do not compile this file directly. It is included by lib/rocprofiler-sdk/rocdecode/rocdecode.cpp" +#endif + + diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rocdecode/rocdecode.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rocdecode/rocdecode.hpp new file mode 100644 index 0000000000..af4b96ca66 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rocdecode/rocdecode.hpp @@ -0,0 +1,126 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#if !defined(ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE) +# if defined __has_include +# if __has_include() +# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 1 +# else +# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 0 +# endif +# else +# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 0 +# endif +#endif + +#if ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE > 0 +# include +# include +# include +#else +# include +# include +# include +#endif + +#include + +#include +#include + +namespace rocprofiler +{ +namespace rocdecode +{ +using rocdecode_api_func_table_t = ::RocDecodeDispatchTable; + +struct ROCDecodeAPITable +{ + rocdecode_api_func_table_t* rocdecode_api_table = nullptr; +}; + +using rocdecode_api_table_t = ROCDecodeAPITable; + +rocdecode_api_table_t& +get_table(); + +template +struct rocdecode_table_lookup; + +template +struct rocdecode_table_id_lookup; + +template +struct rocdecode_domain_info; + +template +struct rocdecode_api_info; + +template +struct rocdecode_api_impl : rocdecode_domain_info +{ + template + static auto set_data_args(DataArgsT&, Args... args); + + template + static auto exec(FuncT&&, Args&&... args); + + template + static RetT functor(Args... args); +}; + +template +const char* +name_by_id(uint32_t id); + +template +uint32_t +id_by_name(const char* name); + +template +std::vector +get_names(); + +template +std::vector +get_ids(); + +template +void +iterate_args(uint32_t id, + const rocprofiler_callback_tracing_rocdecode_api_data_t& data, + rocprofiler_callback_tracing_operation_args_cb_t callback, + int32_t max_deref, + void* user_data); + +template +void +copy_table(TableT* _orig, uint64_t _tbl_instance); + +template +void +update_table(TableT* _orig); + +} // namespace rocdecode +} // namespace rocprofiler diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/runtime_initialization.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/runtime_initialization.cpp index 46d7f233a4..1e23621685 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/runtime_initialization.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/runtime_initialization.cpp @@ -57,6 +57,7 @@ SPECIALIZE_RUNTIME_INIT_INFO(HSA, "HSA runtime") SPECIALIZE_RUNTIME_INIT_INFO(HIP, "HIP runtime") SPECIALIZE_RUNTIME_INIT_INFO(MARKER, "Marker (ROCTx) runtime") SPECIALIZE_RUNTIME_INIT_INFO(RCCL, "RCCL runtime") +SPECIALIZE_RUNTIME_INIT_INFO(ROCDECODE, "ROCDecode runtime") #undef SPECIALIZE_RUNTIME_INIT_INFO diff --git a/projects/rocprofiler-sdk/tests/CMakeLists.txt b/projects/rocprofiler-sdk/tests/CMakeLists.txt index 292e5e8933..ef26a2975e 100644 --- a/projects/rocprofiler-sdk/tests/CMakeLists.txt +++ b/projects/rocprofiler-sdk/tests/CMakeLists.txt @@ -62,6 +62,10 @@ add_subdirectory(thread-trace) add_subdirectory(pc_sampling) add_subdirectory(hip-graph-tracing) add_subdirectory(counter-collection) +if(ROCPROFILER_BUILD_ROCDECODE_TESTS) + add_subdirectory(rocdecode) +endif() + if(ROCPROFILER_BUILD_OPENMP_TESTS) add_subdirectory(openmp-tools) endif() diff --git a/projects/rocprofiler-sdk/tests/bin/CMakeLists.txt b/projects/rocprofiler-sdk/tests/bin/CMakeLists.txt index 8ae854798c..c065303ce6 100644 --- a/projects/rocprofiler-sdk/tests/bin/CMakeLists.txt +++ b/projects/rocprofiler-sdk/tests/bin/CMakeLists.txt @@ -29,3 +29,6 @@ add_subdirectory(hsa-queue-dependency) add_subdirectory(hip-graph) add_subdirectory(hsa-memory-allocation) add_subdirectory(pc-sampling) +if(ROCPROFILER_BUILD_ROCDECODE_TESTS) + add_subdirectory(rocdecode) +endif() diff --git a/projects/rocprofiler-sdk/tests/bin/rocdecode/CMakeLists.txt b/projects/rocprofiler-sdk/tests/bin/rocdecode/CMakeLists.txt new file mode 100644 index 0000000000..99ec32635b --- /dev/null +++ b/projects/rocprofiler-sdk/tests/bin/rocdecode/CMakeLists.txt @@ -0,0 +1,43 @@ +# +# +# +cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR) + +if(NOT CMAKE_HIP_COMPILER) + find_program( + amdclangpp_EXECUTABLE + NAMES amdclang++ + HINTS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm + PATHS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm + PATH_SUFFIXES bin llvm/bin NO_CACHE) + mark_as_advanced(amdclangpp_EXECUTABLE) + + if(amdclangpp_EXECUTABLE) + set(CMAKE_HIP_COMPILER "${amdclangpp_EXECUTABLE}") + endif() +endif() + +project(rocprofiler-tool-test-app-rocdecode LANGUAGES CXX HIP) + +foreach(_TYPE DEBUG MINSIZEREL RELEASE RELWITHDEBINFO) + if("${CMAKE_HIP_FLAGS_${_TYPE}}" STREQUAL "") + set(CMAKE_HIP_FLAGS_${_TYPE} "${CMAKE_CXX_FLAGS_${_TYPE}}") + endif() +endforeach() + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_EXTENSIONS OFF) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_HIP_STANDARD 17) +set(CMAKE_HIP_EXTENSIONS OFF) +set(CMAKE_HIP_STANDARD_REQUIRED ON) + +set_source_files_properties(rocdecode.cpp roc_video_dec.cpp PROPERTIES LANGUAGE HIP) +add_executable(rocdecode) +target_sources(rocdecode PRIVATE rocdecode.cpp roc_video_dec.cpp) + +find_package(Threads REQUIRED) +find_package(rocDecode REQUIRED) +target_link_libraries( + rocdecode PRIVATE rocprofiler-sdk::tests-build-flags Threads::Threads hsa-runtime64 + rocprofiler-sdk::tests-common-library rocDecode::rocDecode) diff --git a/projects/rocprofiler-sdk/tests/bin/rocdecode/roc_video_dec.cpp b/projects/rocprofiler-sdk/tests/bin/rocdecode/roc_video_dec.cpp new file mode 100644 index 0000000000..7921b4a5c7 --- /dev/null +++ b/projects/rocprofiler-sdk/tests/bin/rocdecode/roc_video_dec.cpp @@ -0,0 +1,1456 @@ +/* +Copyright (c) 2024 - 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "roc_video_dec.h" + +RocVideoDecoder::RocVideoDecoder(int device_id, + OutputSurfaceMemoryType out_mem_type, + rocDecVideoCodec codec, + bool force_zero_latency, + const Rect* p_crop_rect, + bool extract_user_sei_Message, + uint32_t disp_delay, + int max_width, + int max_height, + uint32_t clk_rate) +: device_id_{device_id} +, out_mem_type_(out_mem_type) +, b_extract_sei_message_(extract_user_sei_Message) +, b_force_zero_latency_(force_zero_latency) +, disp_delay_(disp_delay) +, codec_id_(codec) +, max_width_(max_width) +, max_height_(max_height) +{ + if(!InitHIP(device_id_)) + { + THROW("Failed to initilize the HIP"); + } + if(p_crop_rect) crop_rect_ = *p_crop_rect; + if(b_extract_sei_message_) + { + fp_sei_ = fopen("rocdec_sei_message.txt", "wb"); + curr_sei_message_ptr_ = new RocdecSeiMessageInfo; + memset(&sei_message_display_q_, 0, sizeof(sei_message_display_q_)); + } + // create rocdec videoparser + RocdecParserParams parser_params = {}; + parser_params.codec_type = codec_id_; + parser_params.max_num_decode_surfaces = + 1; // let the parser to determine the decode buffer pool size + parser_params.clock_rate = clk_rate; + parser_params.max_display_delay = disp_delay_; + parser_params.user_data = this; + parser_params.pfn_sequence_callback = HandleVideoSequenceProc; + parser_params.pfn_decode_picture = HandlePictureDecodeProc; + parser_params.pfn_display_picture = b_force_zero_latency_ ? NULL : HandlePictureDisplayProc; + parser_params.pfn_get_sei_msg = b_extract_sei_message_ ? HandleSEIMessagesProc : NULL; + ROCDEC_API_CALL(rocDecCreateVideoParser(&rocdec_parser_, &parser_params)); +} + +RocVideoDecoder::~RocVideoDecoder() +{ + auto start_time = StartTimer(); + if(curr_sei_message_ptr_) + { + delete curr_sei_message_ptr_; + curr_sei_message_ptr_ = nullptr; + } + + if(fp_sei_) + { + fclose(fp_sei_); + fp_sei_ = nullptr; + } + + if(rocdec_parser_) + { + rocDecDestroyVideoParser(rocdec_parser_); + rocdec_parser_ = nullptr; + } + + if(roc_decoder_) + { + rocDecDestroyDecoder(roc_decoder_); + roc_decoder_ = nullptr; + } + + if(curr_video_format_ptr_) + { + delete curr_video_format_ptr_; + curr_video_format_ptr_ = nullptr; + } + + std::lock_guard lock(mtx_vp_frame_); + if(out_mem_type_ != OUT_SURFACE_MEM_DEV_INTERNAL) + { + for(auto& p_frame : vp_frames_) + { + if(p_frame.frame_ptr) + { + if(out_mem_type_ == OUT_SURFACE_MEM_DEV_COPIED) + { + hipError_t hip_status = hipFree(p_frame.frame_ptr); + if(hip_status != hipSuccess) + { + std::cerr << "ERROR: hipFree failed! (" << hip_status << ")" << std::endl; + } + } + else + delete[](p_frame.frame_ptr); + p_frame.frame_ptr = nullptr; + } + } + } + if(hip_stream_) + { + hipError_t hip_status = hipSuccess; + hip_status = hipStreamDestroy(hip_stream_); + if(hip_status != hipSuccess) + { + std::cerr << "ERROR: hipStream_Destroy failed! (" << hip_status << ")" << std::endl; + } + } + if(fp_out_) + { + fclose(fp_out_); + fp_out_ = nullptr; + } + + double elapsed_time = StopTimer(start_time); + AddDecoderSessionOverHead(std::this_thread::get_id(), elapsed_time); +} + +static const char* +GetVideoCodecString(rocDecVideoCodec e_codec) +{ + static struct + { + rocDecVideoCodec e_codec; + const char* name; + } aCodecName[] = { + {rocDecVideoCodec_MPEG1, "MPEG-1"}, + {rocDecVideoCodec_MPEG2, "MPEG-2"}, + {rocDecVideoCodec_MPEG4, "MPEG-4 (ASP)"}, + {rocDecVideoCodec_AVC, "AVC/H.264"}, + {rocDecVideoCodec_HEVC, "H.265/HEVC"}, + {rocDecVideoCodec_AV1, "AV1"}, + {rocDecVideoCodec_VP8, "VP8"}, + {rocDecVideoCodec_VP9, "VP9"}, + {rocDecVideoCodec_JPEG, "M-JPEG"}, + {rocDecVideoCodec_NumCodecs, "Invalid"}, + }; + + if(e_codec >= 0 && e_codec <= rocDecVideoCodec_NumCodecs) + { + return aCodecName[e_codec].name; + } + for(size_t i = rocDecVideoCodec_NumCodecs + 1; i < sizeof(aCodecName) / sizeof(aCodecName[0]); + i++) + { + if(e_codec == aCodecName[i].e_codec) + { + return aCodecName[e_codec].name; + } + } + return "Unknown"; +} + +/** + * @brief function to return the name from codec_id + * + * @param codec_id + * @return const char* + */ +const char* +RocVideoDecoder::GetCodecFmtName(rocDecVideoCodec codec_id) +{ + return GetVideoCodecString(codec_id); +} + +static const char* +GetSurfaceFormatString(rocDecVideoSurfaceFormat surface_format_id) +{ + static struct + { + rocDecVideoSurfaceFormat surf_fmt; + const char* name; + } SurfName[] = { + {rocDecVideoSurfaceFormat_NV12, "NV12"}, + {rocDecVideoSurfaceFormat_P016, "P016"}, + {rocDecVideoSurfaceFormat_YUV444, "YUV444"}, + {rocDecVideoSurfaceFormat_YUV444_16Bit, "YUV444_16Bit"}, + }; + + if(surface_format_id >= rocDecVideoSurfaceFormat_NV12 && + surface_format_id <= rocDecVideoSurfaceFormat_YUV444_16Bit) + return SurfName[surface_format_id].name; + else + return "Unknown"; +} + +/** + * @brief function to return the name from surface_format_id + * + * @param surface_format_id - enum for surface format + * @return const char* + */ +const char* +RocVideoDecoder::GetSurfaceFmtName(rocDecVideoSurfaceFormat surface_format_id) +{ + return GetSurfaceFormatString(surface_format_id); +} + +static const char* +GetVideoChromaFormatName(rocDecVideoChromaFormat e_chroma_format) +{ + static struct + { + rocDecVideoChromaFormat chroma_fmt; + const char* name; + } ChromaFormatName[] = { + {rocDecVideoChromaFormat_Monochrome, "YUV 400 (Monochrome)"}, + {rocDecVideoChromaFormat_420, "YUV 420"}, + {rocDecVideoChromaFormat_422, "YUV 422"}, + {rocDecVideoChromaFormat_444, "YUV 444"}, + }; + + if(e_chroma_format >= 0 && e_chroma_format <= rocDecVideoChromaFormat_444) + { + return ChromaFormatName[e_chroma_format].name; + } + return "Unknown"; +} + +static void +GetSurfaceStrideInternal(rocDecVideoSurfaceFormat surface_format, + uint32_t width, + uint32_t height, + uint32_t* pitch, + uint32_t* vstride) +{ + switch(surface_format) + { + case rocDecVideoSurfaceFormat_NV12: + *pitch = align(width, 256); + *vstride = align(height, 16); + break; + case rocDecVideoSurfaceFormat_P016: + *pitch = align(width, 128) * 2; + *vstride = align(height, 16); + break; + case rocDecVideoSurfaceFormat_YUV444: + *pitch = align(width, 256); + *vstride = align(height, 16); + break; + case rocDecVideoSurfaceFormat_YUV444_16Bit: + *pitch = align(width, 128) * 2; + *vstride = align(height, 16); + break; + case rocDecVideoSurfaceFormat_YUV420: + *pitch = align(width, 256); + *vstride = align(height, 16); + break; + case rocDecVideoSurfaceFormat_YUV420_16Bit: + *pitch = align(width, 128) * 2; + *vstride = align(height, 16); + break; + } + return; +} + +/* Return value from HandleVideoSequence() are interpreted as : + * 0: fail, 1: succeeded, > 1: override dpb size of parser (set by + * CUVIDPARSERPARAMS::max_num_decode_surfaces while creating parser) + */ +int +RocVideoDecoder::HandleVideoSequence(RocdecVideoFormat* p_video_format) +{ + if(p_video_format == nullptr) + { + ROCDEC_THROW("Rocdec:: Invalid video format in HandleVideoSequence: ", + ROCDEC_INVALID_PARAMETER); + return 0; + } + auto start_time = StartTimer(); + input_video_info_str_.str(""); + input_video_info_str_.clear(); + input_video_info_str_ << "Input Video Information" << std::endl + << "\tCodec : " << GetCodecFmtName(p_video_format->codec) + << std::endl; + if(p_video_format->frame_rate.numerator && p_video_format->frame_rate.denominator) + { + input_video_info_str_ << "\tFrame rate : " << p_video_format->frame_rate.numerator << "/" + << p_video_format->frame_rate.denominator << " = " + << 1.0 * p_video_format->frame_rate.numerator / + p_video_format->frame_rate.denominator + << " fps" << std::endl; + } + input_video_info_str_ << "\tSequence : " + << (p_video_format->progressive_sequence ? "Progressive" : "Interlaced") + << std::endl + << "\tCoded size : [" << p_video_format->coded_width << ", " + << p_video_format->coded_height << "]" << std::endl + << "\tDisplay area : [" << p_video_format->display_area.left << ", " + << p_video_format->display_area.top << ", " + << p_video_format->display_area.right << ", " + << p_video_format->display_area.bottom << "]" << std::endl + << "\tChroma : " + << GetVideoChromaFormatName(p_video_format->chroma_format) << std::endl + << "\tBit depth : " << p_video_format->bit_depth_luma_minus8 + 8; + input_video_info_str_ << std::endl; + + int num_decode_surfaces = p_video_format->min_num_decode_surfaces; + + RocdecDecodeCaps decode_caps; + memset(&decode_caps, 0, sizeof(decode_caps)); + decode_caps.codec_type = p_video_format->codec; + decode_caps.chroma_format = p_video_format->chroma_format; + decode_caps.bit_depth_minus_8 = p_video_format->bit_depth_luma_minus8; + + rocDecGetDecoderCaps(&decode_caps); + if(!decode_caps.is_supported) + { + ROCDEC_THROW("rocDecode:: Codec not supported on this GPU ", ROCDEC_NOT_SUPPORTED); + return 0; + } + if((p_video_format->coded_width > decode_caps.max_width) || + (p_video_format->coded_height > decode_caps.max_height)) + { + std::ostringstream errorString; + errorString << std::endl + << "Resolution : " << p_video_format->coded_width << "x" + << p_video_format->coded_height << std::endl + << "Max Supported (wxh) : " << decode_caps.max_width << "x" + << decode_caps.max_height << std::endl + << "Resolution not supported on this GPU "; + const std::string cErr = errorString.str(); + ROCDEC_THROW(cErr, ROCDEC_NOT_SUPPORTED); + return 0; + } + if(curr_video_format_ptr_ == nullptr) + { + curr_video_format_ptr_ = new RocdecVideoFormat(); + } + // store current video format: this is required to call reconfigure from application in case of + // random seek + if(curr_video_format_ptr_) + memcpy(curr_video_format_ptr_, p_video_format, sizeof(RocdecVideoFormat)); + + if(coded_width_ && coded_height_) + { + // rocdecCreateDecoder() has been called before, and now there's possible config change + return ReconfigureDecoder(p_video_format); + } + // e_codec has been set in the constructor (for parser). Here it's set again for potential + // correction + codec_id_ = p_video_format->codec; + video_chroma_format_ = p_video_format->chroma_format; + bitdepth_minus_8_ = p_video_format->bit_depth_luma_minus8; + byte_per_pixel_ = bitdepth_minus_8_ > 0 ? 2 : 1; + + // Set the output surface format same as chroma format + if(video_chroma_format_ == rocDecVideoChromaFormat_420 || rocDecVideoChromaFormat_Monochrome) + video_surface_format_ = + bitdepth_minus_8_ ? rocDecVideoSurfaceFormat_P016 : rocDecVideoSurfaceFormat_NV12; + else if(video_chroma_format_ == rocDecVideoChromaFormat_444) + video_surface_format_ = bitdepth_minus_8_ ? rocDecVideoSurfaceFormat_YUV444_16Bit + : rocDecVideoSurfaceFormat_YUV444; + else if(video_chroma_format_ == rocDecVideoChromaFormat_422) + video_surface_format_ = rocDecVideoSurfaceFormat_NV12; + + // Check if output format supported. If not, check falback options + if(!(decode_caps.output_format_mask & (1 << video_surface_format_))) + { + if(decode_caps.output_format_mask & (1 << rocDecVideoSurfaceFormat_NV12)) + video_surface_format_ = rocDecVideoSurfaceFormat_NV12; + else if(decode_caps.output_format_mask & (1 << rocDecVideoSurfaceFormat_P016)) + video_surface_format_ = rocDecVideoSurfaceFormat_P016; + else if(decode_caps.output_format_mask & (1 << rocDecVideoSurfaceFormat_YUV444)) + video_surface_format_ = rocDecVideoSurfaceFormat_YUV444; + else if(decode_caps.output_format_mask & (1 << rocDecVideoSurfaceFormat_YUV444_16Bit)) + video_surface_format_ = rocDecVideoSurfaceFormat_YUV444_16Bit; + else + ROCDEC_THROW("No supported output format found", ROCDEC_NOT_SUPPORTED); + } + + coded_width_ = p_video_format->coded_width; + coded_height_ = p_video_format->coded_height; + disp_rect_.top = p_video_format->display_area.top; + disp_rect_.bottom = p_video_format->display_area.bottom; + disp_rect_.left = p_video_format->display_area.left; + disp_rect_.right = p_video_format->display_area.right; + disp_width_ = p_video_format->display_area.right - p_video_format->display_area.left; + disp_height_ = p_video_format->display_area.bottom - p_video_format->display_area.top; + + // AV1 has max width/height of sequence in sequence header + if(codec_id_ == rocDecVideoCodec_AV1 && p_video_format->seqhdr_data_length > 0) + { + // dont overwrite if it is already set from cmdline or reconfig.txt + if(!(static_cast(max_width_) > p_video_format->coded_width || + static_cast(max_height_) > p_video_format->coded_height)) + { + RocdecVideoFormatEx* vidFormatEx = (RocdecVideoFormatEx*) p_video_format; + max_width_ = vidFormatEx->max_width; + max_height_ = vidFormatEx->max_height; + } + } + if(max_width_ < (int) p_video_format->coded_width) max_width_ = p_video_format->coded_width; + if(max_height_ < (int) p_video_format->coded_height) max_height_ = p_video_format->coded_height; + + RocDecoderCreateInfo videoDecodeCreateInfo = {}; + videoDecodeCreateInfo.device_id = device_id_; + videoDecodeCreateInfo.codec_type = codec_id_; + videoDecodeCreateInfo.chroma_format = video_chroma_format_; + videoDecodeCreateInfo.output_format = video_surface_format_; + videoDecodeCreateInfo.bit_depth_minus_8 = bitdepth_minus_8_; + videoDecodeCreateInfo.num_decode_surfaces = num_decode_surfaces; + videoDecodeCreateInfo.width = coded_width_; + videoDecodeCreateInfo.height = coded_height_; + videoDecodeCreateInfo.max_width = max_width_; + videoDecodeCreateInfo.max_height = max_height_; + if(!(crop_rect_.right && crop_rect_.bottom)) + { + videoDecodeCreateInfo.display_rect.top = disp_rect_.top; + videoDecodeCreateInfo.display_rect.bottom = disp_rect_.bottom; + videoDecodeCreateInfo.display_rect.left = disp_rect_.left; + videoDecodeCreateInfo.display_rect.right = disp_rect_.right; + target_width_ = (disp_width_ + 1) & ~1; + target_height_ = (disp_height_ + 1) & ~1; + } + else + { + videoDecodeCreateInfo.display_rect.top = crop_rect_.top; + videoDecodeCreateInfo.display_rect.bottom = crop_rect_.bottom; + videoDecodeCreateInfo.display_rect.left = crop_rect_.left; + videoDecodeCreateInfo.display_rect.right = crop_rect_.right; + target_width_ = (crop_rect_.right - crop_rect_.left + 1) & ~1; + target_height_ = (crop_rect_.bottom - crop_rect_.top + 1) & ~1; + } + videoDecodeCreateInfo.target_width = target_width_; + videoDecodeCreateInfo.target_height = target_height_; + + chroma_height_ = (int) (ceil(target_height_ * GetChromaHeightFactor(video_surface_format_))); + num_chroma_planes_ = GetChromaPlaneCount(video_surface_format_); + if(video_chroma_format_ == rocDecVideoChromaFormat_Monochrome) num_chroma_planes_ = 0; + if(out_mem_type_ == OUT_SURFACE_MEM_DEV_INTERNAL || out_mem_type_ == OUT_SURFACE_MEM_NOT_MAPPED) + GetSurfaceStrideInternal(video_surface_format_, + p_video_format->coded_width, + p_video_format->coded_height, + &surface_stride_, + &surface_vstride_); + else + { + surface_stride_ = + videoDecodeCreateInfo.target_width * + byte_per_pixel_; // todo:: check if we need pitched memory for faster copy + } + chroma_vstride_ = (int) (ceil(surface_vstride_ * GetChromaHeightFactor(video_surface_format_))); + // fill output_surface_info_ + output_surface_info_.output_width = target_width_; + output_surface_info_.output_height = target_height_; + output_surface_info_.output_pitch = surface_stride_; + output_surface_info_.output_vstride = (out_mem_type_ == OUT_SURFACE_MEM_DEV_INTERNAL) + ? surface_vstride_ + : videoDecodeCreateInfo.target_height; + output_surface_info_.disp_rect = disp_rect_; + output_surface_info_.chroma_height = chroma_height_; + output_surface_info_.bit_depth = bitdepth_minus_8_ + 8; + output_surface_info_.bytes_per_pixel = byte_per_pixel_; + output_surface_info_.surface_format = video_surface_format_; + output_surface_info_.num_chroma_planes = num_chroma_planes_; + if(out_mem_type_ == OUT_SURFACE_MEM_DEV_INTERNAL) + { + output_surface_info_.output_surface_size_in_bytes = + surface_stride_ * (surface_vstride_ + (chroma_vstride_ * num_chroma_planes_)); + output_surface_info_.mem_type = OUT_SURFACE_MEM_DEV_INTERNAL; + } + else if(out_mem_type_ == OUT_SURFACE_MEM_DEV_COPIED) + { + output_surface_info_.output_surface_size_in_bytes = GetFrameSize(); + output_surface_info_.mem_type = OUT_SURFACE_MEM_DEV_COPIED; + } + else if(out_mem_type_ == OUT_SURFACE_MEM_HOST_COPIED) + { + output_surface_info_.output_surface_size_in_bytes = GetFrameSize(); + output_surface_info_.mem_type = OUT_SURFACE_MEM_HOST_COPIED; + } + else + { + output_surface_info_.output_surface_size_in_bytes = + surface_stride_ * (surface_vstride_ + (chroma_vstride_ * num_chroma_planes_)); + output_surface_info_.mem_type = OUT_SURFACE_MEM_NOT_MAPPED; + } + + input_video_info_str_ << "Video Decoding Params:" << std::endl + << "\tNum Surfaces : " << videoDecodeCreateInfo.num_decode_surfaces + << std::endl + << "\tCrop : [" << videoDecodeCreateInfo.display_rect.left << ", " + << videoDecodeCreateInfo.display_rect.top << ", " + << videoDecodeCreateInfo.display_rect.right << ", " + << videoDecodeCreateInfo.display_rect.bottom << "]" << std::endl + << "\tResize : " << videoDecodeCreateInfo.target_width << "x" + << videoDecodeCreateInfo.target_height << std::endl; + input_video_info_str_ << std::endl; + std::cout << input_video_info_str_.str(); + + ROCDEC_API_CALL(rocDecCreateDecoder(&roc_decoder_, &videoDecodeCreateInfo)); + double elapsed_time = StopTimer(start_time); + AddDecoderSessionOverHead(std::this_thread::get_id(), elapsed_time); + return num_decode_surfaces; +} + +/** + * @brief Function to set the Reconfig Params object + * + * @param p_reconfig_params: pointer to reconfig params struct + * @return true : success + * @return false : fail + */ +bool +RocVideoDecoder::SetReconfigParams(ReconfigParams* p_reconfig_params, bool b_force_reconfig_flush) +{ + if(!p_reconfig_params) + { + std::cerr << "ERROR: Invalid reconfig struct passed! " << std::endl; + return false; + } + // save it + p_reconfig_params_ = p_reconfig_params; + b_force_recofig_flush_ = b_force_reconfig_flush; + return true; +} + +/** + * @brief Function to force Reconfigure Flush: needed for random seeking to key frames + * + * @return int 1: Success 0: Fail + */ +int +RocVideoDecoder::FlushAndReconfigure() +{ + if(!p_reconfig_params_) + { + std::cerr << "ERROR: Reconfig params is not set! " << std::endl; + return 0; + } + if(!curr_video_format_ptr_) + { + std::cerr << "ERROR: video format is not initialized! " << std::endl; + return 0; + } + // call reconfigure + b_force_recofig_flush_ = true; // if not already set to force reconfigure + ReconfigureDecoder(curr_video_format_ptr_); + return true; +} + +/** + * @brief function to reconfigure decoder if there is a change in sequence params. + * + * @param p_video_format + * @return int 1: success 0: fail + */ +int +RocVideoDecoder::ReconfigureDecoder(RocdecVideoFormat* p_video_format) +{ + if(p_video_format->codec != codec_id_) + { + ROCDEC_THROW("Reconfigure Not supported for codec change", ROCDEC_NOT_SUPPORTED); + return 0; + } + if(p_video_format->chroma_format != video_chroma_format_) + { + ROCDEC_THROW("Reconfigure Not supported for chroma format change", ROCDEC_NOT_SUPPORTED); + return 0; + } + if(p_video_format->bit_depth_luma_minus8 != bitdepth_minus_8_) + { + ROCDEC_THROW("Reconfigure Not supported for bit depth change", ROCDEC_NOT_SUPPORTED); + return 0; + } + bool is_decode_res_changed = !(p_video_format->coded_width == coded_width_ && + p_video_format->coded_height == coded_height_); + bool is_display_rect_changed = !(p_video_format->display_area.bottom == disp_rect_.bottom && + p_video_format->display_area.top == disp_rect_.top && + p_video_format->display_area.left == disp_rect_.left && + p_video_format->display_area.right == disp_rect_.right); + + if(!is_decode_res_changed && !is_display_rect_changed && !b_force_recofig_flush_) + { + return 1; + } + + // Flush and clear internal frame store to reconfigure when either coded size or display size + // has changed. + if(p_reconfig_params_ && p_reconfig_params_->p_fn_reconfigure_flush) + num_frames_flushed_during_reconfig_ += p_reconfig_params_->p_fn_reconfigure_flush( + this, + p_reconfig_params_->reconfig_flush_mode, + static_cast(p_reconfig_params_->p_reconfig_user_struct)); + // clear the existing output buffers of different size + // note that app lose the remaining frames in the vp_frames/vp_frames_q in case application + // didn't set p_fn_reconfigure_flush_ callback + if(out_mem_type_ == OUT_SURFACE_MEM_DEV_INTERNAL) + { + ReleaseInternalFrames(); + } + else + { + std::lock_guard lock(mtx_vp_frame_); + while(!vp_frames_.empty()) + { + DecFrameBuffer* p_frame = &vp_frames_.back(); + // pop decoded frame + vp_frames_.pop_back(); + if(p_frame->frame_ptr) + { + if(out_mem_type_ == OUT_SURFACE_MEM_DEV_COPIED) + { + hipError_t hip_status = hipFree(p_frame->frame_ptr); + if(hip_status != hipSuccess) + std::cerr << "ERROR: hipFree failed! (" << hip_status << ")" << std::endl; + } + else + delete[](p_frame->frame_ptr); + } + } + } + output_frame_cnt_ = 0; // reset frame_count + if(is_decode_res_changed) + { + coded_width_ = p_video_format->coded_width; + coded_height_ = p_video_format->coded_height; + } + if(is_display_rect_changed) + { + disp_rect_.left = p_video_format->display_area.left; + disp_rect_.right = p_video_format->display_area.right; + disp_rect_.top = p_video_format->display_area.top; + disp_rect_.bottom = p_video_format->display_area.bottom; + disp_width_ = p_video_format->display_area.right - p_video_format->display_area.left; + disp_height_ = p_video_format->display_area.bottom - p_video_format->display_area.top; + chroma_height_ = static_cast( + std::ceil(target_height_ * GetChromaHeightFactor(video_surface_format_))); + if(!(crop_rect_.right && crop_rect_.bottom)) + { + target_width_ = (disp_width_ + 1) & ~1; + target_height_ = (disp_height_ + 1) & ~1; + } + else + { + target_width_ = (crop_rect_.right - crop_rect_.left + 1) & ~1; + target_height_ = (crop_rect_.bottom - crop_rect_.top + 1) & ~1; + } + } + + if(out_mem_type_ == OUT_SURFACE_MEM_DEV_INTERNAL || out_mem_type_ == OUT_SURFACE_MEM_NOT_MAPPED) + { + GetSurfaceStrideInternal(video_surface_format_, + coded_width_, + coded_height_, + &surface_stride_, + &surface_vstride_); + } + else + { + surface_stride_ = target_width_ * byte_per_pixel_; + } + chroma_height_ = + static_cast(ceil(target_height_ * GetChromaHeightFactor(video_surface_format_))); + num_chroma_planes_ = GetChromaPlaneCount(video_surface_format_); + if(p_video_format->chroma_format == rocDecVideoChromaFormat_Monochrome) num_chroma_planes_ = 0; + chroma_vstride_ = static_cast( + std::ceil(surface_vstride_ * GetChromaHeightFactor(video_surface_format_))); + // Fill output_surface_info_ + output_surface_info_.output_width = target_width_; + output_surface_info_.output_height = target_height_; + output_surface_info_.output_pitch = surface_stride_; + output_surface_info_.output_vstride = + (out_mem_type_ == OUT_SURFACE_MEM_DEV_INTERNAL) ? surface_vstride_ : target_height_; + output_surface_info_.disp_rect = disp_rect_; + output_surface_info_.chroma_height = chroma_height_; + output_surface_info_.bit_depth = bitdepth_minus_8_ + 8; + output_surface_info_.bytes_per_pixel = byte_per_pixel_; + output_surface_info_.surface_format = video_surface_format_; + output_surface_info_.num_chroma_planes = num_chroma_planes_; + if(out_mem_type_ == OUT_SURFACE_MEM_DEV_INTERNAL) + { + output_surface_info_.output_surface_size_in_bytes = + surface_stride_ * (surface_vstride_ + (chroma_vstride_ * num_chroma_planes_)); + output_surface_info_.mem_type = OUT_SURFACE_MEM_DEV_INTERNAL; + } + else if(out_mem_type_ == OUT_SURFACE_MEM_DEV_COPIED) + { + output_surface_info_.output_surface_size_in_bytes = GetFrameSize(); + output_surface_info_.mem_type = OUT_SURFACE_MEM_DEV_COPIED; + } + else if(out_mem_type_ == OUT_SURFACE_MEM_HOST_COPIED) + { + output_surface_info_.output_surface_size_in_bytes = GetFrameSize(); + output_surface_info_.mem_type = OUT_SURFACE_MEM_HOST_COPIED; + } + else + { + output_surface_info_.output_surface_size_in_bytes = + surface_stride_ * (surface_vstride_ + (chroma_vstride_ * num_chroma_planes_)); + output_surface_info_.mem_type = OUT_SURFACE_MEM_NOT_MAPPED; + } + + // If the coded_width or coded_height hasn't changed but display resolution has changed, then + // need to update width and height for correct output with cropping. There is no need to + // reconfigure the decoder. + if(!is_decode_res_changed && is_display_rect_changed) + { + return 1; + } + + RocdecReconfigureDecoderInfo reconfig_params = {}; + reconfig_params.width = coded_width_; + reconfig_params.height = coded_height_; + reconfig_params.target_width = target_width_; + reconfig_params.target_height = target_height_; + reconfig_params.num_decode_surfaces = p_video_format->min_num_decode_surfaces; + if(!(crop_rect_.right && crop_rect_.bottom)) + { + reconfig_params.display_rect.top = disp_rect_.top; + reconfig_params.display_rect.bottom = disp_rect_.bottom; + reconfig_params.display_rect.left = disp_rect_.left; + reconfig_params.display_rect.right = disp_rect_.right; + } + else + { + reconfig_params.display_rect.top = crop_rect_.top; + reconfig_params.display_rect.bottom = crop_rect_.bottom; + reconfig_params.display_rect.left = crop_rect_.left; + reconfig_params.display_rect.right = crop_rect_.right; + } + + if(roc_decoder_ == nullptr) + { + ROCDEC_THROW("Reconfigurition of the decoder detected but the decoder was not initialized " + "previoulsy!", + ROCDEC_NOT_SUPPORTED); + return 0; + } + ROCDEC_API_CALL(rocDecReconfigureDecoder(roc_decoder_, &reconfig_params)); + + input_video_info_str_.str(""); + input_video_info_str_.clear(); + input_video_info_str_ << "Input Video Resolution Changed:" << std::endl + << "\tCoded size : [" << p_video_format->coded_width << ", " + << p_video_format->coded_height << "]" << std::endl + << "\tDisplay area : [" << p_video_format->display_area.left << ", " + << p_video_format->display_area.top << ", " + << p_video_format->display_area.right << ", " + << p_video_format->display_area.bottom << "]" << std::endl; + input_video_info_str_ << std::endl; + input_video_info_str_ << "Video Decoding Params:" << std::endl + << "\tNum Surfaces : " << reconfig_params.num_decode_surfaces << std::endl + << "\tResize : " << reconfig_params.target_width << "x" + << reconfig_params.target_height << std::endl; + input_video_info_str_ << std::endl; + std::cout << input_video_info_str_.str(); + + is_decoder_reconfigured_ = true; + return 1; +} + +/** + * @brief + * + * @param pPicParams + * @return int 1: success 0: fail + */ +int +RocVideoDecoder::HandlePictureDecode(RocdecPicParams* pPicParams) +{ + if(!roc_decoder_) + { + THROW("RocDecoder not initialized: failed with ErrCode: " + TOSTR(ROCDEC_NOT_INITIALIZED)); + } + pic_num_in_dec_order_[pPicParams->curr_pic_idx] = decode_poc_++; + ROCDEC_API_CALL(rocDecDecodeFrame(roc_decoder_, pPicParams)); + last_decode_surf_idx_ = pPicParams->curr_pic_idx; + decoded_pic_cnt_++; + if(b_force_zero_latency_ && ((!pPicParams->field_pic_flag) || (pPicParams->second_field))) + { + RocdecParserDispInfo disp_info; + memset(&disp_info, 0, sizeof(disp_info)); + disp_info.picture_index = pPicParams->curr_pic_idx; + disp_info.progressive_frame = !pPicParams->field_pic_flag; + disp_info.top_field_first = pPicParams->bottom_field_flag ^ 1; + HandlePictureDisplay(&disp_info); + } + return 1; +} + +/** + * @brief function to handle display picture + * + * @param pDispInfo + * @return int 0:fail 1: success + */ +int +RocVideoDecoder::HandlePictureDisplay(RocdecParserDispInfo* pDispInfo) +{ + RocdecProcParams video_proc_params = {}; + video_proc_params.progressive_frame = pDispInfo->progressive_frame; + video_proc_params.top_field_first = pDispInfo->top_field_first; + + if(b_extract_sei_message_) + { + if(sei_message_display_q_[pDispInfo->picture_index].sei_data) + { + // Write SEI Message + uint8_t* sei_buffer = + (uint8_t*) (sei_message_display_q_[pDispInfo->picture_index].sei_data); + uint32_t sei_num_messages = + sei_message_display_q_[pDispInfo->picture_index].sei_message_count; + RocdecSeiMessage* sei_message = + sei_message_display_q_[pDispInfo->picture_index].sei_message; + if(fp_sei_) + { + for(uint32_t i = 0; i < sei_num_messages; i++) + { + if(codec_id_ == rocDecVideoCodec_AVC || codec_id_ == rocDecVideoCodec_HEVC) + { + switch(sei_message[i].sei_message_type) + { + case SEI_TYPE_TIME_CODE: + { + // todo:: check if we need to write timecode + } + break; + case SEI_TYPE_USER_DATA_UNREGISTERED: + { + fwrite(sei_buffer, sei_message[i].sei_message_size, 1, fp_sei_); + } + break; + } + } + if(codec_id_ == rocDecVideoCodec_AV1) + { + fwrite(sei_buffer, sei_message[i].sei_message_size, 1, fp_sei_); + } + sei_buffer += sei_message[i].sei_message_size; + } + } + free(sei_message_display_q_[pDispInfo->picture_index].sei_data); + sei_message_display_q_[pDispInfo->picture_index].sei_data = + NULL; // to avoid double free + free(sei_message_display_q_[pDispInfo->picture_index].sei_message); + sei_message_display_q_[pDispInfo->picture_index].sei_message = + NULL; // to avoid double free + } + } + if(out_mem_type_ != OUT_SURFACE_MEM_NOT_MAPPED) + { + void* src_dev_ptr[3] = {0}; + uint32_t src_pitch[3] = {0}; + ROCDEC_API_CALL(rocDecGetVideoFrame( + roc_decoder_, pDispInfo->picture_index, src_dev_ptr, src_pitch, &video_proc_params)); + RocdecDecodeStatus dec_status; + memset(&dec_status, 0, sizeof(dec_status)); + rocDecStatus result = + rocDecGetDecodeStatus(roc_decoder_, pDispInfo->picture_index, &dec_status); + if(result == ROCDEC_SUCCESS && + (dec_status.decode_status == rocDecodeStatus_Error || + dec_status.decode_status == rocDecodeStatus_Error_Concealed)) + { + std::cerr << "Decode Error occurred for picture: " + << pic_num_in_dec_order_[pDispInfo->picture_index] << std::endl; + } + if(out_mem_type_ == OUT_SURFACE_MEM_DEV_INTERNAL) + { + DecFrameBuffer dec_frame = {}; + dec_frame.frame_ptr = (uint8_t*) (src_dev_ptr[0]); + dec_frame.pts = pDispInfo->pts; + dec_frame.picture_index = pDispInfo->picture_index; + std::lock_guard lock(mtx_vp_frame_); + vp_frames_q_.push(dec_frame); + output_frame_cnt_++; + } + else + { + // copy the decoded surface info device or host + uint8_t* p_dec_frame = nullptr; + { + std::lock_guard lock(mtx_vp_frame_); + // if not enough frames in stock, allocate + if((unsigned) ++output_frame_cnt_ > vp_frames_.size()) + { + num_alloced_frames_++; + DecFrameBuffer dec_frame = {}; + if(out_mem_type_ == OUT_SURFACE_MEM_DEV_COPIED) + { + // allocate device memory + HIP_API_CALL(hipMalloc((void**) &dec_frame.frame_ptr, GetFrameSize())); + } + else + { + dec_frame.frame_ptr = new uint8_t[GetFrameSize()]; + } + dec_frame.pts = pDispInfo->pts; + dec_frame.picture_index = pDispInfo->picture_index; + vp_frames_.push_back(dec_frame); + } + p_dec_frame = vp_frames_[output_frame_cnt_ - 1].frame_ptr; + } + // Copy luma data + uint32_t dst_pitch = disp_width_ * byte_per_pixel_; + uint8_t* p_src_ptr_y = static_cast(src_dev_ptr[0]) + + (disp_rect_.top + crop_rect_.top) * src_pitch[0] + + (disp_rect_.left + crop_rect_.left) * byte_per_pixel_; + if(out_mem_type_ == OUT_SURFACE_MEM_DEV_COPIED) + { + if(src_pitch[0] == dst_pitch) + { + int luma_size = src_pitch[0] * coded_height_; + HIP_API_CALL( + hipMemcpyDtoDAsync(p_dec_frame, p_src_ptr_y, luma_size, hip_stream_)); + } + else + { + // use 2d copy to copy an ROI + HIP_API_CALL(hipMemcpy2DAsync(p_dec_frame, + dst_pitch, + p_src_ptr_y, + src_pitch[0], + dst_pitch, + disp_height_, + hipMemcpyDeviceToDevice, + hip_stream_)); + } + } + else + HIP_API_CALL(hipMemcpy2DAsync(p_dec_frame, + dst_pitch, + p_src_ptr_y, + src_pitch[0], + dst_pitch, + disp_height_, + hipMemcpyDeviceToHost, + hip_stream_)); + + // Copy chroma plane ( ) + // rocDec output gives pointer to luma and chroma pointers seperated for the decoded + // frame + uint8_t* p_frame_uv = p_dec_frame + dst_pitch * disp_height_; + uint8_t* p_src_ptr_uv = + (num_chroma_planes_ == 1) + ? static_cast(src_dev_ptr[1]) + + ((disp_rect_.top + crop_rect_.top) >> 1) * src_pitch[1] + + (disp_rect_.left + crop_rect_.left) * byte_per_pixel_ + : static_cast(src_dev_ptr[1]) + + (disp_rect_.top + crop_rect_.top) * src_pitch[1] + + (disp_rect_.left + crop_rect_.left) * byte_per_pixel_; + if(out_mem_type_ == OUT_SURFACE_MEM_DEV_COPIED) + { + if(src_pitch[1] == dst_pitch) + { + int chroma_size = chroma_height_ * dst_pitch; + HIP_API_CALL( + hipMemcpyDtoDAsync(p_frame_uv, p_src_ptr_uv, chroma_size, hip_stream_)); + } + else + { + // use 2d copy to copy an ROI + HIP_API_CALL(hipMemcpy2DAsync(p_frame_uv, + dst_pitch, + p_src_ptr_uv, + src_pitch[1], + dst_pitch, + chroma_height_, + hipMemcpyDeviceToDevice, + hip_stream_)); + } + } + else + HIP_API_CALL(hipMemcpy2DAsync(p_frame_uv, + dst_pitch, + p_src_ptr_uv, + src_pitch[1], + dst_pitch, + chroma_height_, + hipMemcpyDeviceToHost, + hip_stream_)); + + if(num_chroma_planes_ == 2) + { + uint8_t* p_frame_v = p_dec_frame + dst_pitch * (disp_height_ + chroma_height_); + uint8_t* p_src_ptr_v = static_cast(src_dev_ptr[2]) + + (disp_rect_.top + crop_rect_.top) * src_pitch[2] + + (disp_rect_.left + crop_rect_.left) * byte_per_pixel_; + if(out_mem_type_ == OUT_SURFACE_MEM_DEV_COPIED) + { + if(src_pitch[2] == dst_pitch) + { + int chroma_size = chroma_height_ * dst_pitch; + HIP_API_CALL( + hipMemcpyDtoDAsync(p_frame_v, p_src_ptr_v, chroma_size, hip_stream_)); + } + else + { + // use 2d copy to copy an ROI + HIP_API_CALL(hipMemcpy2DAsync(p_frame_v, + dst_pitch, + p_src_ptr_v, + src_pitch[2], + dst_pitch, + chroma_height_, + hipMemcpyDeviceToDevice, + hip_stream_)); + } + } + else + HIP_API_CALL(hipMemcpy2DAsync(p_frame_v, + dst_pitch, + p_src_ptr_v, + src_pitch[2], + dst_pitch, + chroma_height_, + hipMemcpyDeviceToHost, + hip_stream_)); + } + + HIP_API_CALL(hipStreamSynchronize(hip_stream_)); + } + } + else + { + RocdecDecodeStatus dec_status; + memset(&dec_status, 0, sizeof(dec_status)); + rocDecStatus result = + rocDecGetDecodeStatus(roc_decoder_, pDispInfo->picture_index, &dec_status); + if(result == ROCDEC_SUCCESS && + (dec_status.decode_status == rocDecodeStatus_Error || + dec_status.decode_status == rocDecodeStatus_Error_Concealed)) + { + std::cerr << "Decode Error occurred for picture: " + << pic_num_in_dec_order_[pDispInfo->picture_index] << std::endl; + } + output_frame_cnt_++; + } + + return 1; +} + +int +RocVideoDecoder::GetSEIMessage(RocdecSeiMessageInfo* pSEIMessageInfo) +{ + uint32_t sei_num_mesages = pSEIMessageInfo->sei_message_count; + if(sei_num_mesages) + { + RocdecSeiMessage* p_sei_msg_info = pSEIMessageInfo->sei_message; + size_t total_SEI_buff_size = 0; + if((pSEIMessageInfo->picIdx < 0) || (pSEIMessageInfo->picIdx >= MAX_FRAME_NUM)) + { + ERR("Invalid picture index for SEI message: " + TOSTR(pSEIMessageInfo->picIdx)); + return 0; + } + for(uint32_t i = 0; i < sei_num_mesages; i++) + { + total_SEI_buff_size += p_sei_msg_info[i].sei_message_size; + } + if(!curr_sei_message_ptr_) + { + ERR("Out of Memory, Allocation failed for m_pCurrSEIMessage"); + return 0; + } + curr_sei_message_ptr_->sei_data = malloc(total_SEI_buff_size); + if(!curr_sei_message_ptr_->sei_data) + { + ERR("Out of Memory, Allocation failed for SEI Buffer"); + return 0; + } + memcpy(curr_sei_message_ptr_->sei_data, pSEIMessageInfo->sei_data, total_SEI_buff_size); + curr_sei_message_ptr_->sei_message = + (RocdecSeiMessage*) malloc(sizeof(RocdecSeiMessage) * sei_num_mesages); + if(!curr_sei_message_ptr_->sei_message) + { + free(curr_sei_message_ptr_->sei_data); + curr_sei_message_ptr_->sei_data = NULL; + return 0; + } + memcpy(curr_sei_message_ptr_->sei_message, + pSEIMessageInfo->sei_message, + sizeof(RocdecSeiMessage) * sei_num_mesages); + curr_sei_message_ptr_->sei_message_count = pSEIMessageInfo->sei_message_count; + sei_message_display_q_[pSEIMessageInfo->picIdx] = *curr_sei_message_ptr_; + } + return 1; +} + +int +RocVideoDecoder::DecodeFrame(const uint8_t* data, + size_t size, + int pkt_flags, + int64_t pts, + int* num_decoded_pics) +{ + output_frame_cnt_ = 0, output_frame_cnt_ret_ = 0; + decoded_pic_cnt_ = 0; + RocdecSourceDataPacket packet = {}; + packet.payload = data; + packet.payload_size = size; + packet.flags = pkt_flags | ROCDEC_PKT_TIMESTAMP; + packet.pts = pts; + if(!data || size == 0) + { + packet.flags |= ROCDEC_PKT_ENDOFSTREAM; + } + ROCDEC_API_CALL(rocDecParseVideoData(rocdec_parser_, &packet)); + if(num_decoded_pics) + { + *num_decoded_pics = decoded_pic_cnt_; + } + return output_frame_cnt_; +} + +uint8_t* +RocVideoDecoder::GetFrame(int64_t* pts) +{ + if(output_frame_cnt_ > 0) + { + std::lock_guard lock(mtx_vp_frame_); + output_frame_cnt_--; + if(out_mem_type_ == OUT_SURFACE_MEM_DEV_INTERNAL && !vp_frames_q_.empty()) + { + DecFrameBuffer* fb = &vp_frames_q_.front(); + if(pts) *pts = fb->pts; + return fb->frame_ptr; + } + else if(vp_frames_.size() > 0) + { + if(pts) *pts = vp_frames_[output_frame_cnt_ret_].pts; + return vp_frames_[output_frame_cnt_ret_++].frame_ptr; + } + } + return nullptr; +} + +/** + * @brief function to release frame after use by the application: Only used with + * "OUT_SURFACE_MEM_DEV_INTERNAL" + * + * @param pTimestamp - timestamp of the frame to be released (unmapped) + * @return true - success + * @return false - falied + */ + +bool +RocVideoDecoder::ReleaseFrame(int64_t pTimestamp, bool b_flushing) +{ + if(out_mem_type_ == OUT_SURFACE_MEM_NOT_MAPPED) return true; // nothing to do + if(out_mem_type_ != OUT_SURFACE_MEM_DEV_INTERNAL) + { + if(!b_flushing) // if not flushing the buffers are re-used, so keep them + return true; // nothing to do + else + { + DecFrameBuffer* fb = &vp_frames_[0]; + if(pTimestamp != fb->pts) + { + std::cerr << "Decoded Frame is released out of order" << std::endl; + return false; + } + vp_frames_.erase(vp_frames_.begin()); // get rid of the frames from the framestore + } + } + // only needed when using internal mapped buffer + if(!vp_frames_q_.empty()) + { + std::lock_guard lock(mtx_vp_frame_); + DecFrameBuffer* fb = &vp_frames_q_.front(); + + if(pTimestamp != fb->pts) + { + std::cerr << "Decoded Frame is released out of order" << std::endl; + return false; + } + // pop decoded frame + vp_frames_q_.pop(); + } + return true; +} + +/** + * @brief function to release all internal frames and clear the q (used with reconfigure): Only used + * with "OUT_SURFACE_MEM_DEV_INTERNAL" + * + * @return true - success + * @return false - falied + */ +bool +RocVideoDecoder::ReleaseInternalFrames() +{ + if(out_mem_type_ != OUT_SURFACE_MEM_DEV_INTERNAL || out_mem_type_ == OUT_SURFACE_MEM_NOT_MAPPED) + return true; // nothing to do + // only needed when using internal mapped buffer + while(!vp_frames_q_.empty()) + { + std::lock_guard lock(mtx_vp_frame_); + // pop decoded frame + vp_frames_q_.pop(); + } + return true; +} + +void +RocVideoDecoder::SaveFrameToFile(std::string output_file_name, + void* surf_mem, + OutputSurfaceInfo* surf_info, + size_t rgb_image_size) +{ + uint8_t* hst_ptr = nullptr; + bool is_rgb = (rgb_image_size != 0); + uint64_t output_image_size = is_rgb ? rgb_image_size : surf_info->output_surface_size_in_bytes; + if(surf_info->mem_type == OUT_SURFACE_MEM_DEV_INTERNAL || + surf_info->mem_type == OUT_SURFACE_MEM_DEV_COPIED) + { + if(hst_ptr == nullptr) + { + hst_ptr = new uint8_t[output_image_size]; + } + hipError_t hip_status = hipSuccess; + hip_status = hipMemcpyDtoH((void*) hst_ptr, surf_mem, output_image_size); + if(hip_status != hipSuccess) + { + std::cerr << "ERROR: hipMemcpyDtoH failed! (" << hipGetErrorName(hip_status) << ")" + << std::endl; + delete[] hst_ptr; + return; + } + } + else + hst_ptr = static_cast(surf_mem); + + if(current_output_filename.empty()) + { + current_output_filename = output_file_name; + } + + // don't overwrite to the same file if reconfigure is detected for a resolution changes. + if(is_decoder_reconfigured_) + { + if(fp_out_) + { + fclose(fp_out_); + fp_out_ = nullptr; + } + // Append the width and height of the new stream to the old file name to create a file name + // to save the new frames do this only if resolution changes within a stream (e.g., decoding + // a multi-resolution stream using the videoDecode app) don't append to the output_file_name + // if multiple output file name is provided (e.g., decoding multi-files using the + // videDecodeMultiFiles) + if(!current_output_filename.compare(output_file_name)) + { + std::string::size_type const pos(output_file_name.find_last_of('.')); + extra_output_file_count_++; + std::string to_append = "_" + std::to_string(surf_info->output_width) + "_" + + std::to_string(surf_info->output_height) + "_" + + std::to_string(extra_output_file_count_); + if(pos != std::string::npos) + { + output_file_name.insert(pos, to_append); + } + else + { + output_file_name += to_append; + } + } + is_decoder_reconfigured_ = false; + } + + if(fp_out_ == nullptr) + { + fp_out_ = fopen(output_file_name.c_str(), "wb"); + } + if(fp_out_) + { + if(!is_rgb) + { + uint8_t* tmp_hst_ptr = hst_ptr; + if(surf_info->mem_type == OUT_SURFACE_MEM_DEV_INTERNAL) + { + tmp_hst_ptr += ((disp_rect_.top + crop_rect_.top) * surf_info->output_pitch) + + (disp_rect_.left + crop_rect_.left) * surf_info->bytes_per_pixel; + } + auto img_width = surf_info->output_width; + auto img_height = surf_info->output_height; + auto output_stride = surf_info->output_pitch; + if(img_width * surf_info->bytes_per_pixel == output_stride && + img_height == surf_info->output_vstride) + { + fwrite(hst_ptr, 1, output_image_size, fp_out_); + } + else + { + uint32_t width = surf_info->output_width * surf_info->bytes_per_pixel; + if(surf_info->bit_depth <= 16) + { + for(uint32_t i = 0; i < surf_info->output_height; i++) + { + fwrite(tmp_hst_ptr, 1, width, fp_out_); + tmp_hst_ptr += output_stride; + } + // dump chroma + uint8_t* uv_hst_ptr = hst_ptr + output_stride * surf_info->output_vstride; + if(surf_info->mem_type == OUT_SURFACE_MEM_DEV_INTERNAL) + { + uv_hst_ptr += + (num_chroma_planes_ == 1) + ? (((disp_rect_.top + crop_rect_.top) >> 1) * + surf_info->output_pitch) + + ((disp_rect_.left + crop_rect_.left) * + surf_info->bytes_per_pixel) + : ((disp_rect_.top + crop_rect_.top) * surf_info->output_pitch) + + ((disp_rect_.left + crop_rect_.left) * + surf_info->bytes_per_pixel); + } + for(uint32_t i = 0; i < chroma_height_; i++) + { + fwrite(uv_hst_ptr, 1, width, fp_out_); + uv_hst_ptr += output_stride; + } + if(num_chroma_planes_ == 2) + { + uv_hst_ptr = + hst_ptr + output_stride * (surf_info->output_vstride + chroma_vstride_); + if(surf_info->mem_type == OUT_SURFACE_MEM_DEV_INTERNAL) + { + uv_hst_ptr += + ((disp_rect_.top + crop_rect_.top) * surf_info->output_pitch) + + ((disp_rect_.left + crop_rect_.left) * surf_info->bytes_per_pixel); + } + for(uint32_t i = 0; i < chroma_height_; i++) + { + fwrite(uv_hst_ptr, 1, width, fp_out_); + uv_hst_ptr += output_stride; + } + } + } + } + } + else + { + fwrite(hst_ptr, 1, rgb_image_size, fp_out_); + } + } + + if(hst_ptr && (surf_info->mem_type != OUT_SURFACE_MEM_HOST_COPIED)) + { + delete[] hst_ptr; + } +} + +void +RocVideoDecoder::ResetSaveFrameToFile() +{ + if(fp_out_) + { + fclose(fp_out_); + fp_out_ = nullptr; + } +} + +void +RocVideoDecoder::GetDeviceinfo(std::string& device_name, + std::string& gcn_arch_name, + int& pci_bus_id, + int& pci_domain_id, + int& pci_device_id) +{ + device_name = hip_dev_prop_.name; + gcn_arch_name = hip_dev_prop_.gcnArchName; + pci_bus_id = hip_dev_prop_.pciBusID; + pci_domain_id = hip_dev_prop_.pciDomainID; + pci_device_id = hip_dev_prop_.pciDeviceID; +} + +bool +RocVideoDecoder::GetOutputSurfaceInfo(OutputSurfaceInfo** surface_info) +{ + if(!disp_width_ || !disp_height_) + { + std::cerr << "ERROR: RocVideoDecoder is not intialized" << std::endl; + return false; + } + *surface_info = &output_surface_info_; + return true; +} + +bool +RocVideoDecoder::InitHIP(int device_id) +{ + HIP_API_CALL(hipGetDeviceCount(&num_devices_)); + if(num_devices_ < 1) + { + std::cerr << "ERROR: didn't find any GPU!" << std::endl; + return false; + } + HIP_API_CALL(hipSetDevice(device_id)); + HIP_API_CALL(hipGetDeviceProperties(&hip_dev_prop_, device_id)); + HIP_API_CALL(hipStreamCreate(&hip_stream_)); + return true; +} + +std::chrono::_V2::system_clock::time_point +RocVideoDecoder::StartTimer() +{ + return std::chrono::_V2::system_clock::now(); +} + +double +RocVideoDecoder::StopTimer(const std::chrono::_V2::system_clock::time_point& start_time) +{ + return std::chrono::duration(std::chrono::_V2::system_clock::now() - + start_time) + .count(); +} + +bool +RocVideoDecoder::CodecSupported(int device_id, rocDecVideoCodec codec_id, uint32_t bit_depth) +{ + RocdecDecodeCaps decode_caps; + decode_caps.device_id = device_id; + decode_caps.codec_type = codec_id; + decode_caps.chroma_format = rocDecVideoChromaFormat_420; + decode_caps.bit_depth_minus_8 = bit_depth - 8; + if(rocDecGetDecoderCaps(&decode_caps) != ROCDEC_SUCCESS) + { + return false; + } + return true; +} + +void +RocVideoDecoder::WaitForDecodeCompletion() +{ + RocdecDecodeStatus dec_status; + memset(&dec_status, 0, sizeof(dec_status)); + do + { + rocDecGetDecodeStatus(roc_decoder_, last_decode_surf_idx_, &dec_status); + } while(dec_status.decode_status == rocDecodeStatus_InProgress); +} diff --git a/projects/rocprofiler-sdk/tests/bin/rocdecode/roc_video_dec.h b/projects/rocprofiler-sdk/tests/bin/rocdecode/roc_video_dec.h new file mode 100644 index 0000000000..90b7ae4b2d --- /dev/null +++ b/projects/rocprofiler-sdk/tests/bin/rocdecode/roc_video_dec.h @@ -0,0 +1,648 @@ +/* +Copyright (c) 2024 - 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/*! + * \file + * \brief The AMD Video Decode Library. + * + * \defgroup group_amd_roc_video_dec rocDecode Video Decode: AMD Video Decode API + * \brief AMD The rocDecode video decoder for AMD’s GPUs. + */ + +#define MAX_FRAME_NUM 16 + +typedef int(ROCDECAPI* PFNRECONFIGUEFLUSHCALLBACK)(void*, uint32_t, void*); + +typedef enum SeiAvcHevcPayloadType_enum +{ + SEI_TYPE_TIME_CODE = 136, + SEI_TYPE_USER_DATA_UNREGISTERED = 5 +} SeiAvcHevcPayloadType; + +typedef enum OutputSurfaceMemoryType_enum +{ + OUT_SURFACE_MEM_DEV_INTERNAL = + 0, /**< Internal interopped decoded surface memory(original mapped decoded surface) */ + OUT_SURFACE_MEM_DEV_COPIED = 1, /**< decoded output will be copied to a separate device memory + (the user doesn't need to call release) **/ + OUT_SURFACE_MEM_HOST_COPIED = 2, /**< decoded output will be copied to a separate host memory + (the user doesn't need to call release) **/ + OUT_SURFACE_MEM_NOT_MAPPED = 3 /**< < decoded output is not available (interop won't be used): + useful for decode only performance app*/ +} OutputSurfaceMemoryType; + +#define TOSTR(X) std::to_string(static_cast(X)) +#define STR(X) std::string(X) + +#if DBGINFO +# define INFO(X) \ + std::clog << "[INF] " \ + << " {" << __func__ << "} " \ + << " " << X << std::endl; +#else +# define INFO(X) ; +#endif +#define ERR(X) \ + std::cerr << "[ERR] " \ + << " {" << __func__ << "} " \ + << " " << X << std::endl; + +inline int +GetChromaPlaneCount(rocDecVideoSurfaceFormat surface_format) +{ + int num_planes = 1; + switch(surface_format) + { + case rocDecVideoSurfaceFormat_NV12: + case rocDecVideoSurfaceFormat_P016: num_planes = 1; break; + case rocDecVideoSurfaceFormat_YUV444: + case rocDecVideoSurfaceFormat_YUV444_16Bit: num_planes = 2; break; + case rocDecVideoSurfaceFormat_YUV420: + case rocDecVideoSurfaceFormat_YUV420_16Bit: num_planes = 2; break; + } + + return num_planes; +}; + +inline float +GetChromaHeightFactor(rocDecVideoSurfaceFormat surface_format) +{ + float factor = 0.5; + switch(surface_format) + { + case rocDecVideoSurfaceFormat_NV12: + case rocDecVideoSurfaceFormat_P016: + case rocDecVideoSurfaceFormat_YUV420: + case rocDecVideoSurfaceFormat_YUV420_16Bit: factor = 0.5; break; + case rocDecVideoSurfaceFormat_YUV444: + case rocDecVideoSurfaceFormat_YUV444_16Bit: factor = 1.0; break; + } + + return factor; +}; + +class RocVideoDecodeException : public std::exception +{ +public: + explicit RocVideoDecodeException(const std::string& message, const int err_code) + : _message(message) + , _err_code(err_code) + {} + explicit RocVideoDecodeException(const std::string& message) + : _message(message) + , _err_code(-1) + {} + virtual const char* what() const throw() override { return _message.c_str(); } + int Geterror_code() const { return _err_code; } + +private: + std::string _message; + int _err_code; +}; + +#define ROCDEC_THROW(X, CODE) \ + throw RocVideoDecodeException(" { " + std::string(__func__) + " } " + X, CODE); +#define THROW(X) throw RocVideoDecodeException(" { " + std::string(__func__) + " } " + X); + +#define ROCDEC_API_CALL(rocDecAPI) \ + do \ + { \ + rocDecStatus error_code = rocDecAPI; \ + if(error_code != ROCDEC_SUCCESS) \ + { \ + std::ostringstream error_log; \ + error_log << #rocDecAPI << " returned " << rocDecGetErrorName(error_code) << " at " \ + << __FILE__ << ":" << __LINE__; \ + ROCDEC_THROW(error_log.str(), error_code); \ + } \ + } while(0) + +#define HIP_API_CALL(call) \ + do \ + { \ + hipError_t hip_status = call; \ + if(hip_status != hipSuccess) \ + { \ + const char* sz_err_name = NULL; \ + sz_err_name = hipGetErrorName(hip_status); \ + std::ostringstream error_log; \ + error_log << "hip API error " << sz_err_name; \ + ROCDEC_THROW(error_log.str(), hip_status); \ + } \ + } while(0) + +struct Rect +{ + int left; + int top; + int right; + int bottom; +}; + +struct Dim +{ + int w, h; +}; + +static inline int +align(int value, int alignment) +{ + return (value + alignment - 1) & ~(alignment - 1); +} + +typedef struct DecFrameBuffer_ +{ + uint8_t* frame_ptr; /**< device memory pointer for the decoded frame */ + int64_t pts; /**< timestamp for the decoded frame */ + int picture_index; /**< surface index for the decoded frame */ +} DecFrameBuffer; + +typedef struct OutputSurfaceInfoType +{ + uint32_t output_width; /**< Output width of decoded surface*/ + uint32_t output_height; /**< Output height of decoded surface*/ + uint32_t output_pitch; /**< Output pitch in bytes of luma plane, chroma pitch can be inferred + based on chromaFormat*/ + uint32_t output_vstride; /**< Output vertical stride in case of using internal mem pointer **/ + uint32_t chroma_height; /**< Chroma plane height **/ + Rect disp_rect; /**< Display area **/ + uint32_t bytes_per_pixel; /**< Output BytesPerPixel of decoded image*/ + uint32_t bit_depth; /**< Output BitDepth of the image*/ + uint32_t num_chroma_planes; /**< Output Chroma number of planes*/ + uint64_t output_surface_size_in_bytes; /**< Output Image Size in Bytes; including both luma and + chroma planes*/ + rocDecVideoSurfaceFormat surface_format; /**< Chroma format of the decoded image*/ + OutputSurfaceMemoryType mem_type; /**< Output mem_type of the surface*/ +} OutputSurfaceInfo; + +typedef struct ReconfigParams_t +{ + PFNRECONFIGUEFLUSHCALLBACK p_fn_reconfigure_flush; + void* p_reconfig_user_struct; + uint32_t reconfig_flush_mode; +} ReconfigParams; + +class RocVideoDecoder +{ +public: + /** + * @brief Construct a new Roc Video Decoder object + * + * @param device_id : device_id to initialize HIP and VCN + * @param out_mem_type : out_mem_type for the decoded surface + * @param codec : codec type + * @param force_zero_latency : to force zero latency (output in decoding orde) + * @param p_crop_rect : to crop output + * @param extract_user_SEI_Message : enable to extract SEI + * @param disp_delay : output delayed by #disp_delay surfaces + * @param max_width : Max. width for the output surface + * @param max_height : Max. height for the output surface + * @param clk_rate : FPS clock-rate + */ + RocVideoDecoder(int device_id, + OutputSurfaceMemoryType out_mem_type, + rocDecVideoCodec codec, + bool force_zero_latency = false, + const Rect* p_crop_rect = nullptr, + bool extract_user_SEI_Message = false, + uint32_t disp_delay = 0, + int max_width = 0, + int max_height = 0, + uint32_t clk_rate = 1000); + ~RocVideoDecoder(); + + rocDecVideoCodec GetCodecId() { return codec_id_; } + + hipStream_t GetStream() { return hip_stream_; } + + /** + * @brief Get the output frame width + */ + uint32_t GetWidth() + { + assert(disp_width_); + return disp_width_; + } + + /** + * @brief This function is used to get the actual decode width + */ + int GetDecodeWidth() + { + assert(coded_width_); + return coded_width_; + } + + /** + * @brief Get the output frame height + */ + uint32_t GetHeight() + { + assert(disp_height_); + return disp_height_; + } + + /** + * @brief This function is used to get the current chroma height. + */ + int GetChromaHeight() + { + assert(chroma_height_); + return chroma_height_; + } + + /** + * @brief This function is used to get the number of chroma planes. + */ + int GetNumChromaPlanes() + { + assert(num_chroma_planes_); + return num_chroma_planes_; + } + + /** + * @brief This function is used to get the current frame size based on pixel format. + */ + virtual int GetFrameSize() + { + assert(disp_width_); + return disp_width_ * (disp_height_ + (chroma_height_ * num_chroma_planes_)) * + byte_per_pixel_; + } + + /** + * @brief Get the Bit Depth and BytesPerPixel associated with the pixel format + * + * @return uint32_t + */ + uint32_t GetBitDepth() + { + assert(bitdepth_minus_8_); + return (bitdepth_minus_8_ + 8); + } + uint32_t GetBytePerPixel() + { + assert(byte_per_pixel_); + return byte_per_pixel_; + } + /** + * @brief Functions to get the output surface attributes + */ + size_t GetSurfaceSize() + { + assert(surface_size_); + return surface_size_; + } + uint32_t GetSurfaceStride() + { + assert(surface_stride_); + return surface_stride_; + } + // RocDecImageFormat GetSubsampling() { return subsampling_; } + /** + * @brief Get the name of the output format + * + * @param codec_id + * @return std::string + */ + const char* GetCodecFmtName(rocDecVideoCodec codec_id); + + /** + * @brief function to return the name from surface_format_id + * + * @param surface_format_id - enum for surface format + * @return const char* + */ + const char* GetSurfaceFmtName(rocDecVideoSurfaceFormat surface_format_id); + + /** + * @brief Get the pointer to the Output Image Info + * + * @param surface_info ptr to output surface info + * @return true + * @return false + */ + bool GetOutputSurfaceInfo(OutputSurfaceInfo** surface_info); + + /** + * @brief Function to set the Reconfig Params object + * + * @param p_reconfig_params: pointer to reconfig params struct + * @return true : success + * @return false : fail + */ + bool SetReconfigParams(ReconfigParams* p_reconfig_params, bool b_force_reconfig_flush = false); + + /** + * @brief Function to force Reconfigure Flush: needed for random seeking to key frames + * + * @return int 1: Success 0: Fail + */ + int FlushAndReconfigure(); + /** + * @brief this function decodes a frame and returns the number of frames avalable for display + * + * @param data - pointer to the data buffer that is to be decode + * @param size - size of the data buffer in bytes + * @param pts - presentation timestamp + * @param flags - video packet flags + * @param num_decoded_pics - nummber of pictures decoded in this call + * @return int - num of frames to display + */ + virtual int DecodeFrame(const uint8_t* data, + size_t size, + int pkt_flags, + int64_t pts = 0, + int* num_decoded_pics = nullptr); + /** + * @brief This function returns a decoded frame and timestamp. This should be called in a loop + * fetching all the available frames + * + */ + virtual uint8_t* GetFrame(int64_t* pts); + + /** + * @brief function to release frame after use by the application: Only used with + * "OUT_SURFACE_MEM_DEV_INTERNAL" + * + * @param pTimestamp - timestamp of the frame to be released (unmapped) + * @param b_flushing - true when flushing + * @return true - success + * @return false - falied + */ + virtual bool ReleaseFrame(int64_t pTimestamp, bool b_flushing = false); + + /** + * @brief utility function to save image to a file + * + * @param output_file_name - file to write + * @param dev_mem - dev_memory pointer of the frame + * @param image_info - output image info + * @param is_output_RGB - to write in RGB + */ + // void SaveImage(std::string output_file_name, void* dev_mem, OutputImageInfo* image_info, bool + // is_output_RGB = 0); + + /** + * @brief Get the Device info for the current device + * + * @param device_name + * @param gcn_arch_name + * @param pci_bus_id + * @param pci_domain_id + * @param pci_device_id + */ + void GetDeviceinfo(std::string& device_name, + std::string& gcn_arch_name, + int& pci_bus_id, + int& pci_domain_id, + int& pci_device_id); + + /** + * @brief Helper function to dump decoded output surface to file + * + * @param output_file_name - Output file name + * @param dev_mem - pointer to surface memory + * @param surf_info - surface info + * @param rgb_image_size - image size for rgb (optional). A non_zero value indicates the + * surf_mem holds an rgb interleaved image and the entire size will be dumped to file + */ + virtual void SaveFrameToFile(std::string output_file_name, + void* surf_mem, + OutputSurfaceInfo* surf_info, + size_t rgb_image_size = 0); + + /** + * @brief Helper funtion to close a existing file and dump to new file in case of multiple files + * using same decoder + */ + virtual void ResetSaveFrameToFile(); + + /** + * @brief Get the Num Of Flushed Frames from video decoder object + * + * @return int32_t + */ + int32_t GetNumOfFlushedFrames() { return num_frames_flushed_during_reconfig_; } + + /*! \brief Function to wait for the decode completion of the last submitted picture + */ + void WaitForDecodeCompletion(); + + // Session overhead refers to decoder initialization and deinitialization time + void AddDecoderSessionOverHead(std::thread::id session_id, double duration) + { + session_overhead_[session_id] += duration; + } + double GetDecoderSessionOverHead(std::thread::id session_id) + { + if(session_overhead_.find(session_id) != session_overhead_.end()) + { + return session_overhead_[session_id]; + } + else + { + return 0; + } + } + + /** + * @brief Check if the given Video Codec is supported on the given GPU + * + * @return rocDecStatus + */ + bool CodecSupported(int device_id, rocDecVideoCodec codec_id, uint32_t bit_depth); + + /** + * @brief This function reconfigure decoder if there is a change in sequence params. + */ + virtual int ReconfigureDecoder(RocdecVideoFormat* p_video_format); + +protected: + /** + * @brief Callback function to be registered for getting a callback when decoding of sequence + * starts + */ + static int ROCDECAPI HandleVideoSequenceProc(void* p_user_data, + RocdecVideoFormat* p_video_format) + { + return ((RocVideoDecoder*) p_user_data)->HandleVideoSequence(p_video_format); + } + + /** + * @brief Callback function to be registered for getting a callback when a decoded frame is + * ready to be decoded + */ + static int ROCDECAPI HandlePictureDecodeProc(void* p_user_data, RocdecPicParams* p_pic_params) + { + return ((RocVideoDecoder*) p_user_data)->HandlePictureDecode(p_pic_params); + } + + /** + * @brief Callback function to be registered for getting a callback when a decoded frame is + * available for display + */ + static int ROCDECAPI HandlePictureDisplayProc(void* p_user_data, + RocdecParserDispInfo* p_disp_info) + { + return ((RocVideoDecoder*) p_user_data)->HandlePictureDisplay(p_disp_info); + } + + /** + * @brief Callback function to be registered for getting a callback when all the unregistered + * user SEI Messages are parsed for a frame. + */ + static int ROCDECAPI HandleSEIMessagesProc(void* p_user_data, + RocdecSeiMessageInfo* p_sei_message_info) + { + return ((RocVideoDecoder*) p_user_data)->GetSEIMessage(p_sei_message_info); + } + + /** + * @brief This function gets called when a sequence is ready to be decoded. The function also + gets called when there is format change + */ + int HandleVideoSequence(RocdecVideoFormat* p_video_format); + + /** + * @brief This function gets called when a picture is ready to be decoded. cuvidDecodePicture + * is called from this function to decode the picture + */ + int HandlePictureDecode(RocdecPicParams* p_pic_params); + + /** + * @brief This function gets called after a picture is decoded and available for display. + Frames are fetched and stored in internal buffer + */ + int HandlePictureDisplay(RocdecParserDispInfo* p_disp_info); + /** + * @brief This function gets called when all unregistered user SEI messages are parsed for a + * frame + */ + int GetSEIMessage(RocdecSeiMessageInfo* p_sei_message_info); + + /** + * @brief function to release all internal frames and clear the vp_frames_q_ (used with + * reconfigure): Only used with "OUT_SURFACE_MEM_DEV_INTERNAL" + * + * @return true - success + * @return false - falied + */ + bool ReleaseInternalFrames(); + + /** + * @brief Function to Initialize GPU-HIP + * + */ + bool InitHIP(int device_id); + + /** + * @brief Function to get start time + * + */ + std::chrono::_V2::system_clock::time_point StartTimer(); + + /** + * @brief Function to get elapsed time + * + */ + double StopTimer(const std::chrono::_V2::system_clock::time_point& start_time); + + int num_devices_; + int device_id_; + RocdecVideoParser rocdec_parser_ = nullptr; + rocDecDecoderHandle roc_decoder_ = nullptr; + OutputSurfaceMemoryType out_mem_type_ = OUT_SURFACE_MEM_DEV_INTERNAL; + bool b_extract_sei_message_ = false; + bool b_force_zero_latency_ = false; + uint32_t disp_delay_; + ReconfigParams* p_reconfig_params_ = nullptr; + bool b_force_recofig_flush_ = false; + int32_t num_frames_flushed_during_reconfig_ = 0; + hipDeviceProp_t hip_dev_prop_; + hipStream_t hip_stream_; + rocDecVideoCodec codec_id_ = rocDecVideoCodec_NumCodecs; + rocDecVideoChromaFormat video_chroma_format_ = rocDecVideoChromaFormat_420; + rocDecVideoSurfaceFormat video_surface_format_ = rocDecVideoSurfaceFormat_NV12; + RocdecSeiMessageInfo* curr_sei_message_ptr_ = nullptr; + RocdecSeiMessageInfo sei_message_display_q_[MAX_FRAME_NUM]; + RocdecVideoFormat* curr_video_format_ptr_ = nullptr; + int output_frame_cnt_ = 0, output_frame_cnt_ret_ = 0; + int decoded_pic_cnt_ = 0; + int decode_poc_ = 0, pic_num_in_dec_order_[MAX_FRAME_NUM]; + int num_alloced_frames_ = 0; + int last_decode_surf_idx_ = 0; + std::ostringstream input_video_info_str_; + int bitdepth_minus_8_ = 0; + uint32_t byte_per_pixel_ = 1; + uint32_t coded_width_ = 0; + uint32_t disp_width_ = 0; + uint32_t coded_height_ = 0; + uint32_t disp_height_ = 0; + uint32_t target_width_ = 0; + uint32_t target_height_ = 0; + int max_width_ = 0, max_height_ = 0; + uint32_t chroma_height_ = 0, chroma_width_ = 0; + uint32_t num_chroma_planes_ = 0; + uint32_t num_components_ = 0; + uint32_t surface_stride_ = 0; + uint32_t surface_vstride_ = 0, + chroma_vstride_ = + 0; // vertical stride between planes: used when using internal dev memory + size_t surface_size_ = 0; + OutputSurfaceInfo output_surface_info_ = {}; + std::mutex mtx_vp_frame_; + std::vector vp_frames_; // vector of decoded frames + std::queue vp_frames_q_; + Rect disp_rect_ = {}; // displayable area specified in the bitstream + Rect crop_rect_ = {}; // user specified region of interest within diplayable area disp_rect_ + FILE* fp_sei_ = NULL; + FILE* fp_out_ = NULL; + bool is_decoder_reconfigured_ = false; + std::string current_output_filename = ""; + uint32_t extra_output_file_count_ = 0; + std::thread::id + decoder_session_id_; // Decoder session identifier. Used to gather session level stats. + std::unordered_map + session_overhead_; // Records session overhead of initialization+deinitialization time. + // Format is (thread id, duration) +}; diff --git a/projects/rocprofiler-sdk/tests/bin/rocdecode/rocdecode.cpp b/projects/rocprofiler-sdk/tests/bin/rocdecode/rocdecode.cpp new file mode 100644 index 0000000000..06cdf9defa --- /dev/null +++ b/projects/rocprofiler-sdk/tests/bin/rocdecode/rocdecode.cpp @@ -0,0 +1,109 @@ +/* +Copyright (c) 2024 - 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include +#include +#include "roc_video_dec.h" + +int +main(int argc, char** argv) +{ + // Get input file + std::string input_file_path{}; + for(int i = 1; i < argc; i++) + { + if(!strcmp(argv[i], "-i")) + { + if(++i == argc) + { + std::cerr << "Provide path to input file" << std::endl; + } + input_file_path = argv[i]; + continue; + } + } + // Set up bitstreamreader + RocdecBitstreamReader bs_reader = nullptr; + rocDecVideoCodec rocdec_codec_id{}; + int bit_depth{}; + if(rocDecCreateBitstreamReader(&bs_reader, input_file_path.c_str()) != ROCDEC_SUCCESS) + { + std::cerr << "Failed to create the bitstream reader." << std::endl; + return 1; + } + if(rocDecGetBitstreamCodecType(bs_reader, &rocdec_codec_id) != ROCDEC_SUCCESS) + { + std::cerr << "Failed to get stream codec type." << std::endl; + return 1; + } + if(rocdec_codec_id >= rocDecVideoCodec_NumCodecs) + { + std::cerr << "Unsupported stream file type or codec type by the bitstream reader. Exiting." + << std::endl; + return 1; + } + if(rocDecGetBitstreamBitDepth(bs_reader, &bit_depth) != ROCDEC_SUCCESS) + { + std::cerr << "Failed to get stream bit depth." << std::endl; + return 1; + } + + // Set up video decoder + int device_id = 0; + OutputSurfaceMemoryType mem_type = OUT_SURFACE_MEM_DEV_INTERNAL; + bool b_force_zero_latency = false; + Rect* p_crop_rect = nullptr; + int disp_delay = 1; + bool b_extract_sei_messages = false; + RocVideoDecoder* viddec = new RocVideoDecoder(device_id, + mem_type, + rocdec_codec_id, + b_force_zero_latency, + p_crop_rect, + b_extract_sei_messages, + disp_delay); + + uint8_t* pvideo = nullptr; + int n_video_bytes = 0; + int64_t pts = 0; + int pkg_flags = 0; + int decoded_pics = 0; + if(rocDecGetBitstreamPicData(bs_reader, &pvideo, &n_video_bytes, &pts) != ROCDEC_SUCCESS) + { + std::cerr << "Failed to get picture data." << std::endl; + return 1; + } + // Treat 0 bitstream size as end of stream indicator + if(n_video_bytes == 0) + { + pkg_flags |= ROCDEC_PKT_ENDOFSTREAM; + } + viddec->DecodeFrame(pvideo, n_video_bytes, pkg_flags, pts, &decoded_pics); + viddec->DecodeFrame(pvideo, n_video_bytes, pkg_flags, pts, &decoded_pics); + viddec->DecodeFrame(pvideo, n_video_bytes, pkg_flags, pts, &decoded_pics); + if(bs_reader) + { + rocDecDestroyBitstreamReader(bs_reader); + } +} diff --git a/projects/rocprofiler-sdk/tests/pytest-packages/tests/rocprofv3.py b/projects/rocprofiler-sdk/tests/pytest-packages/tests/rocprofv3.py index ed08eb7f08..85f86f818b 100644 --- a/projects/rocprofiler-sdk/tests/pytest-packages/tests/rocprofv3.py +++ b/projects/rocprofiler-sdk/tests/pytest-packages/tests/rocprofv3.py @@ -26,7 +26,15 @@ from __future__ import absolute_import def test_perfetto_data( pftrace_data, json_data, - categories=("hip", "hsa", "marker", "kernel", "memory_copy", "memory_allocation"), + categories=( + "hip", + "hsa", + "marker", + "kernel", + "memory_copy", + "memory_allocation", + "rocdecode_api", + ), ): mapping = { @@ -36,6 +44,7 @@ def test_perfetto_data( "kernel": ("kernel_dispatch", "kernel_dispatch"), "memory_copy": ("memory_copy", "memory_copy"), "memory_allocation": ("memory_allocation", "memory_allocation"), + "rocdecode_api": ("rocdecode_api", "rocdecode_api"), } # make sure they specified valid categories @@ -73,6 +82,7 @@ def test_otf2_data( "kernel": ("kernel_dispatch", "kernel_dispatch"), "memory_copy": ("memory_copy", "memory_copy"), "memory_allocation": ("memory_allocation", "memory_allocation"), + "rocdecode_api": ("rocdecode_api", "rocdecode_api"), } # make sure they specified valid categories diff --git a/projects/rocprofiler-sdk/tests/rocdecode/CMakeLists.txt b/projects/rocprofiler-sdk/tests/rocdecode/CMakeLists.txt new file mode 100644 index 0000000000..85d9f6a5e8 --- /dev/null +++ b/projects/rocprofiler-sdk/tests/rocdecode/CMakeLists.txt @@ -0,0 +1,53 @@ +# +# +# +cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR) + +project( + rocprofiler-tests-rocdecode-tracing + LANGUAGES CXX + VERSION 0.0.0) + +find_package(rocprofiler-sdk REQUIRED) + +if(ROCPROFILER_MEMCHECK_PRELOAD_ENV) + set(PRELOAD_ENV + "${ROCPROFILER_MEMCHECK_PRELOAD_ENV}:$") +else() + set(PRELOAD_ENV "LD_PRELOAD=$") +endif() + +set(ROCDECODE_VIDEO_FILE + "${ROCM_PATH}/share/rocdecode/video/AMD_driving_virtual_20-H265.265") +if(NOT EXISTS "${ROCDECODE_VIDEO_FILE}") + message( + FATAL_ERROR + "Unable to find video file for rocdecode tests: ${ROCDECODE_VIDEO_FILE}") +endif() +add_test(NAME test-rocdecode-tracing-execute COMMAND $ -i + ${ROCDECODE_VIDEO_FILE}) + +set(rocdecode-tracing-env + "${PRELOAD_ENV}" + "ROCPROFILER_TOOL_OUTPUT_FILE=rocdecode-tracing-test.json" + "LD_LIBRARY_PATH=$:$ENV{LD_LIBRARY_PATH}" + ) + +set_tests_properties( + test-rocdecode-tracing-execute + PROPERTIES TIMEOUT 45 LABELS "integration-tests" ENVIRONMENT + "${rocdecode-tracing-env}" FAIL_REGULAR_EXPRESSION + "${ROCPROFILER_DEFAULT_FAIL_REGEX}") + +# copy to binary directory +rocprofiler_configure_pytest_files(COPY validate.py conftest.py CONFIG pytest.ini) + +add_test(NAME test-rocdecode-tracing-validate + COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py --input + ${CMAKE_CURRENT_BINARY_DIR}/rocdecode-tracing-test.json) + +set_tests_properties( + test-rocdecode-tracing-validate + PROPERTIES TIMEOUT 45 LABELS "integration-tests" DEPENDS + test-rocdecode-tracing-execute FAIL_REGULAR_EXPRESSION + "${ROCPROFILER_DEFAULT_FAIL_REGEX}") diff --git a/projects/rocprofiler-sdk/tests/rocdecode/conftest.py b/projects/rocprofiler-sdk/tests/rocdecode/conftest.py new file mode 100644 index 0000000000..6924b2699e --- /dev/null +++ b/projects/rocprofiler-sdk/tests/rocdecode/conftest.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python3 + +import json +import pytest + +from rocprofiler_sdk.pytest_utils.dotdict import dotdict + + +def pytest_addoption(parser): + parser.addoption( + "--input", + action="store", + default="rocdecode-tracing-test.json", + help="Input JSON", + ) + + +@pytest.fixture +def input_data(request): + filename = request.config.getoption("--input") + with open(filename, "r") as inp: + return dotdict(json.load(inp)) diff --git a/projects/rocprofiler-sdk/tests/rocdecode/pytest.ini b/projects/rocprofiler-sdk/tests/rocdecode/pytest.ini new file mode 100644 index 0000000000..5e1e1c14a0 --- /dev/null +++ b/projects/rocprofiler-sdk/tests/rocdecode/pytest.ini @@ -0,0 +1,5 @@ + +[pytest] +addopts = --durations=20 -rA -s -vv +testpaths = validate.py +pythonpath = @ROCPROFILER_SDK_TESTS_BINARY_DIR@/pytest-packages diff --git a/projects/rocprofiler-sdk/tests/rocdecode/validate.py b/projects/rocprofiler-sdk/tests/rocdecode/validate.py new file mode 100644 index 0000000000..e1619349ca --- /dev/null +++ b/projects/rocprofiler-sdk/tests/rocdecode/validate.py @@ -0,0 +1,285 @@ +#!/usr/bin/env python3 + +import sys +import pytest + + +# helper function +def node_exists(name, data, min_len=1): + assert name in data + assert data[name] is not None + if isinstance(data[name], (list, tuple, dict, set)): + assert len(data[name]) >= min_len, f"{name}:\n{data}" + + +def test_data_structure(input_data): + """verify minimum amount of expected data is present""" + data = input_data + + node_exists("rocprofiler-sdk-json-tool", data) + + sdk_data = data["rocprofiler-sdk-json-tool"] + + node_exists("metadata", sdk_data) + node_exists("pid", sdk_data["metadata"]) + node_exists("main_tid", sdk_data["metadata"]) + node_exists("init_time", sdk_data["metadata"]) + node_exists("fini_time", sdk_data["metadata"]) + + node_exists("agents", sdk_data) + node_exists("call_stack", sdk_data) + node_exists("callback_records", sdk_data) + node_exists("buffer_records", sdk_data) + + node_exists("names", sdk_data["callback_records"]) + node_exists("hsa_api_traces", sdk_data["callback_records"]) + node_exists("hip_api_traces", sdk_data["callback_records"]) + node_exists("memory_allocations", sdk_data["callback_records"]) + node_exists("rocdecode_api_traces", sdk_data["callback_records"]) + + node_exists("names", sdk_data["buffer_records"]) + node_exists("hsa_api_traces", sdk_data["buffer_records"]) + node_exists("hip_api_traces", sdk_data["buffer_records"]) + node_exists("memory_allocations", sdk_data["buffer_records"]) + node_exists("rocdecode_api_traces", sdk_data["buffer_records"]) + + +def test_size_entries(input_data): + # check that size fields are > 0 but account for function arguments + # which are named "size" + def check_size(data, bt): + if "size" in data.keys(): + if isinstance(data["size"], str) and bt.endswith('["args"]'): + pass + else: + assert data["size"] > 0, f"origin: {bt}" + + # recursively check the entire data structure + def iterate_data(data, bt): + if isinstance(data, (list, tuple)): + for i, itr in enumerate(data): + if isinstance(itr, dict): + check_size(itr, f"{bt}[{i}]") + iterate_data(itr, f"{bt}[{i}]") + elif isinstance(data, dict): + check_size(data, f"{bt}") + for key, itr in data.items(): + iterate_data(itr, f'{bt}["{key}"]') + + # start recursive check over entire JSON dict + iterate_data(input_data, "input_data") + + +def test_timestamps(input_data): + """Verify starting timestamps are less than ending timestamps""" + data = input_data + sdk_data = data["rocprofiler-sdk-json-tool"] + + cb_start = {} + cb_end = {} + for titr in ["hsa_api_traces", "hip_api_traces", "rocdecode_api_traces"]: + for itr in sdk_data["callback_records"][titr]: + cid = itr["correlation_id"]["internal"] + phase = itr["phase"] + if phase == 1: + cb_start[cid] = itr["timestamp"] + elif phase == 2: + cb_end[cid] = itr["timestamp"] + assert cb_start[cid] <= itr["timestamp"] + else: + assert phase == 1 or phase == 2 + + for itr in sdk_data["buffer_records"][titr]: + assert itr["start_timestamp"] <= itr["end_timestamp"] + + for titr in ["memory_allocations"]: + for itr in sdk_data["buffer_records"][titr]: + assert itr["start_timestamp"] < itr["end_timestamp"], f"[{titr}] {itr}" + assert itr["correlation_id"]["internal"] > 0, f"[{titr}] {itr}" + assert itr["correlation_id"]["external"] > 0, f"[{titr}] {itr}" + assert ( + sdk_data["metadata"]["init_time"] < itr["start_timestamp"] + ), f"[{titr}] {itr}" + assert ( + sdk_data["metadata"]["init_time"] < itr["end_timestamp"] + ), f"[{titr}] {itr}" + assert ( + sdk_data["metadata"]["fini_time"] > itr["start_timestamp"] + ), f"[{titr}] {itr}" + assert ( + sdk_data["metadata"]["fini_time"] > itr["end_timestamp"] + ), f"[{titr}] {itr}" + + api_start = cb_start[itr["correlation_id"]["internal"]] + # api_end = cb_end[itr["correlation_id"]["internal"]] + assert api_start < itr["start_timestamp"], f"[{titr}] {itr}" + # assert api_end <= itr["end_timestamp"], f"[{titr}] {itr}" + + +def test_internal_correlation_ids(input_data): + """Assure correlation ids are unique""" + data = input_data + sdk_data = data["rocprofiler-sdk-json-tool"] + + api_corr_ids = [] + for titr in ["hsa_api_traces", "hip_api_traces", "rocdecode_api_traces"]: + for itr in sdk_data["callback_records"][titr]: + api_corr_ids.append(itr["correlation_id"]["internal"]) + + for itr in sdk_data["buffer_records"][titr]: + api_corr_ids.append(itr["correlation_id"]["internal"]) + + api_corr_ids_sorted = sorted(api_corr_ids) + api_corr_ids_unique = list(set(api_corr_ids)) + + for itr in sdk_data["buffer_records"]["memory_allocations"]: + assert itr["correlation_id"]["internal"] in api_corr_ids_unique + + len_corr_id_unq = len(api_corr_ids_unique) + assert len(api_corr_ids) != len_corr_id_unq + assert max(api_corr_ids_sorted) == len_corr_id_unq + + +def test_external_correlation_ids(input_data): + data = input_data + sdk_data = data["rocprofiler-sdk-json-tool"] + + extern_corr_ids = [] + for titr in ["hsa_api_traces", "hip_api_traces", "rocdecode_api_traces"]: + for itr in sdk_data["callback_records"][titr]: + assert itr["correlation_id"]["external"] > 0 + assert itr["thread_id"] == itr["correlation_id"]["external"] + extern_corr_ids.append(itr["correlation_id"]["external"]) + + extern_corr_ids = list(set(sorted(extern_corr_ids))) + for titr in ["hsa_api_traces", "hip_api_traces", "rocdecode_api_traces"]: + for itr in sdk_data["buffer_records"][titr]: + assert itr["correlation_id"]["external"] > 0, f"[{titr}] {itr}" + assert ( + itr["thread_id"] == itr["correlation_id"]["external"] + ), f"[{titr}] {itr}" + assert itr["thread_id"] in extern_corr_ids, f"[{titr}] {itr}" + assert itr["correlation_id"]["external"] in extern_corr_ids, f"[{titr}] {itr}" + + for titr in ["memory_allocations"]: + for itr in sdk_data["buffer_records"][titr]: + assert itr["correlation_id"]["external"] > 0, f"[{titr}] {itr}" + assert itr["correlation_id"]["external"] in extern_corr_ids, f"[{titr}] {itr}" + + for itr in sdk_data["callback_records"][titr]: + assert itr["correlation_id"]["external"] > 0, f"[{titr}] {itr}" + assert itr["correlation_id"]["external"] in extern_corr_ids, f"[{titr}] {itr}" + + +def get_operation(record, kind_name, op_name=None): + for idx, itr in enumerate(record["names"]): + if kind_name == itr["kind"]: + if op_name is None: + return idx, itr["operations"] + else: + for oidx, oname in enumerate(itr["operations"]): + if op_name == oname: + return oidx + return None + + +def test_rocdecode_traces(input_data): + data = input_data + sdk_data = data["rocprofiler-sdk-json-tool"] + + callback_records = sdk_data["callback_records"] + buffer_records = sdk_data["buffer_records"] + + rocdecode_bf_traces = sdk_data["buffer_records"]["rocdecode_api_traces"] + rocdecode_api_bf_ops = get_operation(buffer_records, "ROCDECODE_API") + assert len(rocdecode_api_bf_ops[1]) == 16 + + rocdecode_cb_traces = sdk_data["callback_records"]["rocdecode_api_traces"] + rocdecode_api_cb_ops = get_operation(callback_records, "ROCDECODE_API") + + assert ( + rocdecode_api_bf_ops[1] == rocdecode_api_cb_ops[1] + and len(rocdecode_api_cb_ops[1]) == 16 + ) + + # check that buffer and callback records agree + phase_enter_count = 0 + phase_end_count = 0 + + api_calls = [] + + for api_call in rocdecode_cb_traces: + if api_call["phase"] == 1: + phase_enter_count += 1 + api_calls.append(rocdecode_api_cb_ops[1][api_call["operation"]]) + if api_call["phase"] == 2: + phase_end_count += 1 + + assert phase_enter_count == phase_end_count == len(rocdecode_bf_traces) + + for call in [ + "rocDecCreateBitstreamReader", + "rocDecGetBitstreamCodecType", + "rocDecGetBitstreamBitDepth", + "rocDecCreateVideoParser", + "rocDecGetBitstreamPicData", + "rocDecGetDecoderCaps", + "rocDecCreateDecoder", + "rocDecDecodeFrame", + "rocDecParseVideoData", + "rocDecGetVideoFrame", + "rocDecGetDecodeStatus", + "rocDecDestroyBitstreamReader", + ]: + assert call in api_calls + + +def test_retired_correlation_ids(input_data): + data = input_data + sdk_data = data["rocprofiler-sdk-json-tool"] + + def _sort_dict(inp): + return dict(sorted(inp.items())) + + api_corr_ids = {} + for titr in ["hsa_api_traces", "hip_api_traces", "rocdecode_api_traces"]: + for itr in sdk_data["buffer_records"][titr]: + corr_id = itr["correlation_id"]["internal"] + assert corr_id not in api_corr_ids.keys() + api_corr_ids[corr_id] = itr + + alloc_corr_ids = {} + for titr in ["memory_allocations"]: + for itr in sdk_data["buffer_records"][titr]: + corr_id = itr["correlation_id"]["internal"] + assert corr_id not in alloc_corr_ids.keys() + alloc_corr_ids[corr_id] = itr + + retired_corr_ids = {} + for itr in sdk_data["buffer_records"]["retired_correlation_ids"]: + corr_id = itr["internal_correlation_id"] + assert corr_id not in retired_corr_ids.keys() + retired_corr_ids[corr_id] = itr + + api_corr_ids = _sort_dict(api_corr_ids) + alloc_corr_ids = _sort_dict(alloc_corr_ids) + retired_corr_ids = _sort_dict(retired_corr_ids) + + for cid, itr in alloc_corr_ids.items(): + assert cid in retired_corr_ids.keys() + retired_ts = retired_corr_ids[cid]["timestamp"] + end_ts = itr["end_timestamp"] + assert (retired_ts - end_ts) > 0, f"correlation-id: {cid}, data: {itr}" + + for cid, itr in api_corr_ids.items(): + assert cid in retired_corr_ids.keys() + retired_ts = retired_corr_ids[cid]["timestamp"] + end_ts = itr["end_timestamp"] + assert (retired_ts - end_ts) > 0, f"correlation-id: {cid}, data: {itr}" + + assert len(api_corr_ids.keys()) == (len(retired_corr_ids.keys())) + + +if __name__ == "__main__": + exit_code = pytest.main(["-x", __file__] + sys.argv[1:]) + sys.exit(exit_code) diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/CMakeLists.txt b/projects/rocprofiler-sdk/tests/rocprofv3/CMakeLists.txt index c5ae31e705..70b8fe84ea 100644 --- a/projects/rocprofiler-sdk/tests/rocprofv3/CMakeLists.txt +++ b/projects/rocprofiler-sdk/tests/rocprofv3/CMakeLists.txt @@ -36,3 +36,6 @@ add_subdirectory(roctracer-roctx) add_subdirectory(scratch-memory) add_subdirectory(pc-sampling) add_subdirectory(collection-period) +if(ROCPROFILER_BUILD_ROCDECODE_TESTS) + add_subdirectory(rocdecode-trace) +endif() diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/rocdecode-trace/CMakeLists.txt b/projects/rocprofiler-sdk/tests/rocprofv3/rocdecode-trace/CMakeLists.txt new file mode 100644 index 0000000000..e2537b3d9d --- /dev/null +++ b/projects/rocprofiler-sdk/tests/rocprofv3/rocdecode-trace/CMakeLists.txt @@ -0,0 +1,52 @@ +# +# +# +cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR) + +project( + rocprofiler-tests-rocprofv3-rocdecode-tracing + LANGUAGES CXX + VERSION 0.0.0) + +find_package(rocprofiler-sdk REQUIRED) + +rocprofiler_configure_pytest_files(CONFIG pytest.ini COPY validate.py conftest.py) + +string(REPLACE "LD_PRELOAD=" "ROCPROF_PRELOAD=" PRELOAD_ENV + "${ROCPROFILER_MEMCHECK_PRELOAD_ENV}") + +set(rocdecode-tracing-env "${PRELOAD_ENV}") + +set(ROCDECODE_VIDEO_FILE + "${ROCM_PATH}/share/rocdecode/video/AMD_driving_virtual_20-H265.265") +if(NOT EXISTS "${ROCDECODE_VIDEO_FILE}") + message( + FATAL_ERROR + "Unable to find video file for rocdecode tests: ${ROCDECODE_VIDEO_FILE}") +endif() +add_test( + NAME rocprofv3-test-rocdecode-tracing-execute + COMMAND + $ --rocdecode-trace -d + ${CMAKE_CURRENT_BINARY_DIR}/%tag%-trace -o out --output-format json otf2 pftrace + csv --log-level env -- $ -i ${ROCDECODE_VIDEO_FILE}) + +set_tests_properties( + rocprofv3-test-rocdecode-tracing-execute + PROPERTIES TIMEOUT 45 LABELS "integration-tests" ENVIRONMENT + "${rocdecode-tracing-env}" FAIL_REGULAR_EXPRESSION "threw an exception") + +add_test( + NAME rocprofv3-test-rocdecode-tracing-validate + COMMAND + ${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py --json-input + ${CMAKE_CURRENT_BINARY_DIR}/rocdecode-trace/out_results.json --otf2-input + ${CMAKE_CURRENT_BINARY_DIR}/rocdecode-trace/out_results.otf2 --pftrace-input + ${CMAKE_CURRENT_BINARY_DIR}/rocdecode-trace/out_results.pftrace --csv-input + ${CMAKE_CURRENT_BINARY_DIR}/rocdecode-trace/out_rocdecode_api_trace.csv) + +set_tests_properties( + rocprofv3-test-rocdecode-tracing-validate + PROPERTIES TIMEOUT 45 LABELS "integration-tests" DEPENDS + rocprofv3-test-rocdecode-tracing-execute FAIL_REGULAR_EXPRESSION + "AssertionError") diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/rocdecode-trace/conftest.py b/projects/rocprofiler-sdk/tests/rocprofv3/rocdecode-trace/conftest.py new file mode 100644 index 0000000000..6a5ef0dd0a --- /dev/null +++ b/projects/rocprofiler-sdk/tests/rocprofv3/rocdecode-trace/conftest.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 + +import csv +import json +import os +import pytest + +from rocprofiler_sdk.pytest_utils.dotdict import dotdict +from rocprofiler_sdk.pytest_utils import collapse_dict_list +from rocprofiler_sdk.pytest_utils.perfetto_reader import PerfettoReader +from rocprofiler_sdk.pytest_utils.otf2_reader import OTF2Reader + + +def pytest_addoption(parser): + parser.addoption( + "--json-input", + action="store", + default="rocdecode-tracing/out_results.json", + help="Input JSON", + ) + parser.addoption( + "--otf2-input", + action="store", + default="rocdecode-tracing/out_results.otf2", + help="Input OTF2", + ) + parser.addoption( + "--pftrace-input", + action="store", + default="rocdecode-tracing/out_results.pftrace", + help="Input pftrace file", + ) + parser.addoption( + "--csv-input", + action="store", + default="rocdecode-tracing/out_rocdecode_api_trace.csv", + help="Input CSV", + ) + + +@pytest.fixture +def json_data(request): + filename = request.config.getoption("--json-input") + with open(filename, "r") as inp: + return dotdict(collapse_dict_list(json.load(inp))) + + +@pytest.fixture +def csv_data(request): + filename = request.config.getoption("--csv-input") + data = [] + with open(filename, "r") as inp: + reader = csv.DictReader(inp) + for row in reader: + data.append(row) + + return data + + +@pytest.fixture +def otf2_data(request): + filename = request.config.getoption("--otf2-input") + if not os.path.exists(filename): + raise FileExistsError(f"{filename} does not exist") + return OTF2Reader(filename).read()[0] + + +@pytest.fixture +def pftrace_data(request): + filename = request.config.getoption("--pftrace-input") + return PerfettoReader(filename).read()[0] diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/rocdecode-trace/pytest.ini b/projects/rocprofiler-sdk/tests/rocprofv3/rocdecode-trace/pytest.ini new file mode 100644 index 0000000000..5e1e1c14a0 --- /dev/null +++ b/projects/rocprofiler-sdk/tests/rocprofv3/rocdecode-trace/pytest.ini @@ -0,0 +1,5 @@ + +[pytest] +addopts = --durations=20 -rA -s -vv +testpaths = validate.py +pythonpath = @ROCPROFILER_SDK_TESTS_BINARY_DIR@/pytest-packages diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/rocdecode-trace/validate.py b/projects/rocprofiler-sdk/tests/rocprofv3/rocdecode-trace/validate.py new file mode 100755 index 0000000000..8c2ac1a0f4 --- /dev/null +++ b/projects/rocprofiler-sdk/tests/rocprofv3/rocdecode-trace/validate.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python3 + +import sys +import pytest +import json + +from collections import defaultdict + + +# helper function +def node_exists(name, data, min_len=1): + assert name in data + assert data[name] is not None + if isinstance(data[name], (list, tuple, dict, set)): + assert len(data[name]) >= min_len + + +def get_operation(record, kind_name, op_name=None): + for idx, itr in enumerate(record["strings"]["buffer_records"]): + if kind_name == itr["kind"]: + if op_name is None: + return idx, itr["operations"] + else: + for oidx, oname in enumerate(itr["operations"]): + if op_name == oname: + return oidx + return None + + +def test_rocdeocde(json_data): + data = json_data["rocprofiler-sdk-tool"] + buffer_records = data["buffer_records"] + + rocdecode_data = buffer_records["rocdecode_api"] + + _, bf_op_names = get_operation(data, "ROCDECODE_API") + + assert len(bf_op_names) == 16 + + rocdecode_reported_agent_ids = set() + # check buffering data + for node in rocdecode_data: + assert "size" in node + assert "kind" in node + assert "operation" in node + assert "correlation_id" in node + assert "end_timestamp" in node + assert "start_timestamp" in node + assert "thread_id" in node + + assert node.size > 0 + assert node.thread_id > 0 + assert node.start_timestamp > 0 + assert node.end_timestamp > 0 + assert node.start_timestamp < node.end_timestamp + + assert data.strings.buffer_records[node.kind].kind == "ROCDECODE_API" + assert ( + data.strings.buffer_records[node.kind].operations[node.operation] + in bf_op_names + ) + + +def test_csv_data(csv_data): + assert len(csv_data) > 0, "Expected non-empty csv data" + + api_calls = [] + + for row in csv_data: + assert "Domain" in row, "'Domain' was not present in csv data for rocdecode-trace" + assert ( + "Function" in row + ), "'Function' was not present in csv data for rocdecode-trace" + assert ( + "Process_Id" in row + ), "'Process_Id' was not present in csv data for rocdecode-trace" + assert ( + "Thread_Id" in row + ), "'Thread_Id' was not present in csv data for rocdecode-trace" + assert ( + "Correlation_Id" in row + ), "'Correlation_Id' was not present in csv data for rocdecode-trace" + assert ( + "Start_Timestamp" in row + ), "'Start_Timestamp' was not present in csv data for rocdecode-trace" + assert ( + "End_Timestamp" in row + ), "'End_Timestamp' was not present in csv data for rocdecode-trace" + + api_calls.append(row["Function"]) + + assert row["Domain"] == "ROCDECODE_API" + assert int(row["Process_Id"]) > 0 + assert int(row["Thread_Id"]) > 0 + assert int(row["Start_Timestamp"]) > 0 + assert int(row["End_Timestamp"]) > 0 + assert int(row["Start_Timestamp"]) < int(row["End_Timestamp"]) + + for call in [ + "rocDecCreateBitstreamReader", + "rocDecGetBitstreamCodecType", + "rocDecGetBitstreamBitDepth", + "rocDecCreateVideoParser", + "rocDecGetBitstreamPicData", + "rocDecGetDecoderCaps", + "rocDecCreateDecoder", + "rocDecDecodeFrame", + "rocDecParseVideoData", + "rocDecGetVideoFrame", + "rocDecGetDecodeStatus", + "rocDecDestroyBitstreamReader", + ]: + assert call in api_calls + + +def test_perfetto_data(pftrace_data, json_data): + import rocprofiler_sdk.tests.rocprofv3 as rocprofv3 + + rocprofv3.test_perfetto_data( + pftrace_data, + json_data, + ("hip", "hsa", "memory_allocation", "rocdecode_api"), + ) + + +def test_otf2_data(otf2_data, json_data): + import rocprofiler_sdk.tests.rocprofv3 as rocprofv3 + + rocprofv3.test_otf2_data( + otf2_data, + json_data, + ("hip", "hsa", "memory_allocation", "rocdecode_api"), + ) + + +if __name__ == "__main__": + exit_code = pytest.main(["-x", __file__] + sys.argv[1:]) + sys.exit(exit_code) diff --git a/projects/rocprofiler-sdk/tests/tools/json-tool.cpp b/projects/rocprofiler-sdk/tests/tools/json-tool.cpp index d43fe1e979..662eef19d7 100644 --- a/projects/rocprofiler-sdk/tests/tools/json-tool.cpp +++ b/projects/rocprofiler-sdk/tests/tools/json-tool.cpp @@ -397,6 +397,23 @@ struct rccl_api_callback_record_t } }; +struct rocdecode_api_callback_record_t +{ + uint64_t timestamp = 0; + rocprofiler_callback_tracing_record_t record = {}; + rocprofiler_callback_tracing_rocdecode_api_data_t payload = {}; + callback_arg_array_t args = {}; + + template + void save(ArchiveT& ar) const + { + ar(cereal::make_nvp("timestamp", timestamp)); + cereal::save(ar, record); + ar(cereal::make_nvp("payload", payload)); + serialize_args(ar, args); + } +}; + struct ompt_callback_record_t { uint64_t timestamp = 0; @@ -555,6 +572,7 @@ auto kernel_dispatch_cb_records = std::deque{}; auto memory_allocation_cb_records = std::deque{}; auto rccl_api_cb_records = std::deque{}; +auto rocdecode_api_cb_records = std::deque{}; auto ompt_cb_records = std::deque{}; int @@ -824,6 +842,20 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record, runtime_init_cb_records.emplace_back( runtime_init_callback_record_t{ts, record, *data, std::move(args)}); } + else if(record.kind == ROCPROFILER_CALLBACK_TRACING_ROCDECODE_API) + { + auto* data = + static_cast(record.payload); + auto args = callback_arg_array_t{}; + if(record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT) + rocprofiler_iterate_callback_tracing_kind_operation_args( + record, save_args, record.phase, &args); + + static auto _mutex = std::mutex{}; + auto _lk = std::unique_lock{_mutex}; + rocdecode_api_cb_records.emplace_back( + rocdecode_api_callback_record_t{ts, record, *data, std::move(args)}); + } else { throw std::runtime_error{"unsupported callback kind"}; @@ -843,8 +875,9 @@ auto scratch_memory_records = std::deque{}; auto corr_id_retire_records = std::deque{}; -auto rccl_api_bf_records = std::deque{}; -auto ompt_bf_records = std::deque{}; +auto rccl_api_bf_records = std::deque{}; +auto rocdecode_api_bf_records = std::deque{}; +auto ompt_bf_records = std::deque{}; void tool_tracing_buffered(rocprofiler_context_id_t /*context*/, @@ -971,6 +1004,13 @@ tool_tracing_buffered(rocprofiler_context_id_t /*context*/, runtime_init_bf_records.emplace_back(*record); } + else if(header->kind == ROCPROFILER_BUFFER_TRACING_ROCDECODE_API) + { + auto* record = static_cast( + header->payload); + + rocdecode_api_bf_records.emplace_back(*record); + } else { throw std::runtime_error{ @@ -1069,6 +1109,9 @@ rocprofiler_context_id_t kernel_dispatch_buffered_ctx = {0}; rocprofiler_context_id_t page_migration_ctx = {0}; rocprofiler_context_id_t runtime_init_callback_ctx = {}; rocprofiler_context_id_t runtime_init_buffered_ctx = {}; +rocprofiler_context_id_t rocdecode_api_callback_ctx = {0}; +rocprofiler_context_id_t rocdecode_api_buffered_ctx = {0}; + // buffers rocprofiler_buffer_id_t runtime_init_buffered_buffer = {}; rocprofiler_buffer_id_t hsa_api_buffered_buffer = {}; @@ -1082,6 +1125,7 @@ rocprofiler_buffer_id_t counter_collection_buffer = {}; rocprofiler_buffer_id_t scratch_memory_buffer = {}; rocprofiler_buffer_id_t corr_id_retire_buffer = {}; rocprofiler_buffer_id_t rccl_api_buffered_buffer = {}; +rocprofiler_buffer_id_t rocdecode_api_buffer = {}; rocprofiler_buffer_id_t ompt_buffered_buffer = {}; auto contexts = std::unordered_map{ @@ -1107,10 +1151,12 @@ auto contexts = std::unordered_map{ {"SCRATCH_MEMORY", &scratch_memory_ctx}, {"CORRELATION_ID_RETIREMENT", &corr_id_retire_ctx}, {"RCCL_API_BUFFERED", &rccl_api_buffered_ctx}, + {"ROCDECODE_API_CALLBACK", &rocdecode_api_callback_ctx}, + {"ROCDECODE_API_BUFFERED", &rocdecode_api_buffered_ctx}, {"OMPT_BUFFERED", &ompt_buffered_ctx}, }; -auto buffers = std::array{&runtime_init_buffered_buffer, +auto buffers = std::array{&runtime_init_buffered_buffer, &hsa_api_buffered_buffer, &hip_api_buffered_buffer, &marker_api_buffered_buffer, @@ -1122,7 +1168,8 @@ auto buffers = std::array{&runtime_init_buffered_b &counter_collection_buffer, &corr_id_retire_buffer, &rccl_api_buffered_buffer, - &ompt_buffered_buffer}; + &ompt_buffered_buffer, + &rocdecode_api_buffer}; auto agents = std::vector{}; auto agents_map = std::unordered_map{}; @@ -1288,6 +1335,15 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) nullptr), "rccl api callback tracing service configure"); + ROCPROFILER_CALL( + rocprofiler_configure_callback_tracing_service(rocdecode_api_callback_ctx, + ROCPROFILER_CALLBACK_TRACING_ROCDECODE_API, + nullptr, + 0, + tool_tracing_callback, + nullptr), + "rocdecode api callback tracing service configure"); + ROCPROFILER_CALL( rocprofiler_configure_callback_tracing_service(ompt_callback_ctx, ROCPROFILER_CALLBACK_TRACING_OMPT, @@ -1408,6 +1464,15 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) &rccl_api_buffered_buffer), "buffer creation"); + ROCPROFILER_CALL(rocprofiler_create_buffer(rocdecode_api_buffered_ctx, + buffer_size, + watermark, + ROCPROFILER_BUFFER_POLICY_LOSSLESS, + tool_tracing_buffered, + tool_data, + &rocdecode_api_buffer), + "buffer creation"); + ROCPROFILER_CALL(rocprofiler_create_buffer(ompt_buffered_ctx, buffer_size, watermark, @@ -1532,6 +1597,14 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) rccl_api_buffered_buffer), "buffer tracing service for rccl api configure"); + ROCPROFILER_CALL( + rocprofiler_configure_buffer_tracing_service(rocdecode_api_buffered_ctx, + ROCPROFILER_BUFFER_TRACING_ROCDECODE_API, + nullptr, + 0, + rocdecode_api_buffer), + "buffer tracing service for rocdecode api configure"); + ROCPROFILER_CALL( rocprofiler_configure_buffer_tracing_service( ompt_buffered_ctx, ROCPROFILER_BUFFER_TRACING_OMPT, nullptr, 0, ompt_buffered_buffer), @@ -1701,7 +1774,8 @@ tool_fini(void* tool_data) << ", rccl_api_bf_records=" << rccl_api_bf_records.size() << ", ompt_bf_records=" << ompt_bf_records.size() << ", counter_collection_value_records=" << counter_collection_bf_records.size() - << "...\n" + << ", rocdecode_api_callback_records=" << rocdecode_api_cb_records.size() + << ", rocdecode_api_bf_records=" << rocdecode_api_bf_records.size() << "...\n" << std::flush; auto* _call_stack = static_cast(tool_data); @@ -1797,6 +1871,7 @@ write_json(call_stack_t* _call_stack) json_ar(cereal::make_nvp("kernel_dispatch", kernel_dispatch_cb_records)); json_ar(cereal::make_nvp("memory_copies", memory_copy_cb_records)); json_ar(cereal::make_nvp("memory_allocations", memory_allocation_cb_records)); + json_ar(cereal::make_nvp("rocdecode_api_traces", rocdecode_api_cb_records)); } catch(std::exception& e) { std::cerr << "[" << getpid() << "][" << __FUNCTION__ @@ -1823,6 +1898,7 @@ write_json(call_stack_t* _call_stack) json_ar(cereal::make_nvp("ompt_traces", ompt_bf_records)); json_ar(cereal::make_nvp("retired_correlation_ids", corr_id_retire_records)); json_ar(cereal::make_nvp("counter_collection", counter_collection_bf_records)); + json_ar(cereal::make_nvp("rocdecode_api_traces", rocdecode_api_bf_records)); } catch(std::exception& e) { std::cerr << "[" << getpid() << "][" << __FUNCTION__ @@ -1894,6 +1970,8 @@ write_perfetto() tids.emplace(itr.thread_id); for(auto itr : ompt_bf_records) tids.emplace(itr.thread_id); + for(auto itr : rocdecode_api_bf_records) + tids.emplace(itr.thread_id); for(auto itr : memory_copy_bf_records) { @@ -2147,6 +2225,47 @@ write_perfetto() itr.end_timestamp); } + for(auto itr : rocdecode_api_bf_records) + { + auto name = buffer_names.at(itr.kind, itr.operation); + auto& track = thread_tracks.at(itr.thread_id); + + auto _args = callback_arg_array_t{}; + auto ritr = std::find_if( + rocdecode_api_cb_records.begin(), + rocdecode_api_cb_records.end(), + [&itr](const auto& citr) { + return (citr.record.correlation_id.internal == itr.correlation_id.internal && + !citr.args.empty()); + }); + if(ritr != rocdecode_api_cb_records.end()) _args = ritr->args; + + TRACE_EVENT_BEGIN(sdk::perfetto_category::name, + ::perfetto::StaticString(name.data()), + track, + itr.start_timestamp, + ::perfetto::Flow::ProcessScoped(itr.correlation_id.internal), + "begin_ns", + itr.start_timestamp, + "tid", + itr.thread_id, + "kind", + itr.kind, + "operation", + itr.operation, + "corr_id", + itr.correlation_id.internal, + [&](::perfetto::EventContext ctx) { + for(const auto& aitr : _args) + sdk::add_perfetto_annotation(ctx, aitr.first, aitr.second); + }); + TRACE_EVENT_END(sdk::perfetto_category::name, + track, + itr.end_timestamp, + "end_ns", + itr.end_timestamp); + } + for(auto itr : ompt_bf_records) { auto name = buffer_names.at(itr.kind, itr.operation);