rocDecode API Tracing Support (#49)
* rocDecode API Tracing support
* Test bin file added to rocdecode. Need to add validate python methods
* Added option to not make rocDecode tests
* Added rocdecode and rocprofv3 tests
* Added csv test
* Address PR comments. Changed tests to use built-in rocstreambit decoder to remove ffmpeg dependancy. Changed cmake option to disbale tests rather than not build them. Tests work locally, but will fail until rocDecode is built with tracing enabled on CI
* Add option to avoid building rocdecode tests
* Added option to avoid building rocdecode bin file
* Merge conflict error
* CMake files changed in response to review comments. Attempting to implement callbacks.
* Turned off test building for rocdecode
* Minor fixes for review comments
* Review comments
* Updated formatting
* Document changes and format.hpp reversion. Need to remove iterate args support for now for later update.
* Remove iterate args support
* Remove iterate-args
* enforce abi versioning in macro if
* Fix doc error
* removed spaces to fix indentation error
---------
Co-authored-by: Madsen, Jonathan <Jonathan.Madsen@amd.com>
[ROCm/rocprofiler-sdk commit: e307b89ca4]
This commit is contained in:
committad av
GitHub
förälder
69d94c65a8
incheckning
3e1b8ba4ec
@@ -152,6 +152,7 @@ Full documentation for ROCprofiler-SDK is available at [rocm.docs.amd.com/projec
|
||||
- `--collection-period` feature added in rocprofv3, to enable filtering using time.
|
||||
- `--collection-period-unit` feature added in rocprofv3, to allow the user to control time units used in collection period option.
|
||||
- Added deprecation notice for rocprofiler(v1) and rocprofiler(v2).
|
||||
- Added support for rocDecode API Tracing
|
||||
|
||||
### Changed
|
||||
|
||||
|
||||
@@ -0,0 +1,49 @@
|
||||
################################################################################
|
||||
# Copyright (c) 2024 - 2025 Advanced Micro Devices, Inc.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
#
|
||||
################################################################################
|
||||
|
||||
# find rocDecode - library and headers
|
||||
find_path(
|
||||
rocDecode_INCLUDE_DIR
|
||||
NAMES rocdecode.h
|
||||
PATHS ${ROCM_PATH}/include/rocdecode)
|
||||
find_library(
|
||||
rocDecode_LIBRARY
|
||||
NAMES rocdecode
|
||||
HINTS ${ROCM_PATH}/lib)
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(
|
||||
rocDecode
|
||||
FOUND_VAR rocDecode_FOUND
|
||||
REQUIRED_VARS rocDecode_INCLUDE_DIR rocDecode_LIBRARY)
|
||||
|
||||
if(rocDecode_FOUND)
|
||||
if(NOT TARGET rocDecode::rocDecode)
|
||||
add_library(rocDecode::rocDecode INTERFACE IMPORTED)
|
||||
target_link_libraries(rocDecode::rocDecode INTERFACE ${rocDecode_LIBRARY})
|
||||
target_include_directories(rocDecode::rocDecode
|
||||
INTERFACE ${rocDecode_INCLUDE_DIR})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
mark_as_advanced(rocDecode_INCLUDE_DIR rocDecode_LIBRARY)
|
||||
@@ -325,3 +325,23 @@ else()
|
||||
INTERFACE ROCPROFILER_SDK_USE_SYSTEM_RCCL=0)
|
||||
|
||||
endif()
|
||||
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
#
|
||||
# ROCDecode
|
||||
#
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
|
||||
find_package(rocDecode)
|
||||
|
||||
if(rocDecode_FOUND
|
||||
AND rocDecode_INCLUDE_DIR
|
||||
AND EXISTS "${ROCDECODE_INCLUDE_DIR}/rocdecode/amd_detail/rocdecode_api_trace.h")
|
||||
rocprofiler_config_nolink_target(
|
||||
rocprofiler-sdk-rocdecode-nolink rocdecode::rocdecode INTERFACE
|
||||
ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE=1)
|
||||
else()
|
||||
target_compile_definitions(rocprofiler-sdk-rocdecode-nolink
|
||||
INTERFACE ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE=0)
|
||||
|
||||
endif()
|
||||
|
||||
@@ -91,3 +91,6 @@ rocprofiler_add_interface_library(
|
||||
"rocprofiler-sdk-hsakmt without linking to HSAKMT library" IMPORTED)
|
||||
rocprofiler_add_interface_library(rocprofiler-sdk-rccl-nolink
|
||||
"RCCL headers without linking to RCCL library" IMPORTED)
|
||||
rocprofiler_add_interface_library(
|
||||
rocprofiler-sdk-rocdecode-nolink
|
||||
"ROCDECODE headers without linking to ROCDECODE library" IMPORTED)
|
||||
|
||||
@@ -59,6 +59,8 @@ if(ROCPROFILER_BUILD_TESTS)
|
||||
rocprofiler_add_option(
|
||||
ROCPROFILER_BUILD_GTEST
|
||||
"Enable building gtest (Google testing) library internally" ON ADVANCED)
|
||||
rocprofiler_add_option(ROCPROFILER_BUILD_ROCDECODE_TESTS
|
||||
"Enable building rocDecode tests" OFF ADVANCED)
|
||||
endif()
|
||||
|
||||
rocprofiler_add_option(ROCPROFILER_ENABLE_CLANG_TIDY "Enable clang-tidy checks" OFF
|
||||
|
||||
@@ -158,13 +158,13 @@ For MPI applications (or other job launchers such as SLURM), place rocprofv3 ins
|
||||
aggregate_tracing_options,
|
||||
"-r",
|
||||
"--runtime-trace",
|
||||
help="Collect tracing data for HIP runtime API, Marker (ROCTx) API, RCCL API, Memory operations (copies, scratch, and allocation), and Kernel dispatches. Similar to --sys-trace but without tracing HIP compiler API and the underlying HSA API.",
|
||||
help="Collect tracing data for HIP runtime API, Marker (ROCTx) API, RCCL API, ROCDecode API, Memory operations (copies, scratch, and allocation), and Kernel dispatches. Similar to --sys-trace but without tracing HIP compiler API and the underlying HSA API.",
|
||||
)
|
||||
add_parser_bool_argument(
|
||||
aggregate_tracing_options,
|
||||
"-s",
|
||||
"--sys-trace",
|
||||
help="Collect tracing data for HIP API, HSA API, Marker (ROCTx) API, RCCL API, Memory operations (copies, scratch, and allocations), and Kernel dispatches.",
|
||||
help="Collect tracing data for HIP API, HSA API, Marker (ROCTx) API, RCCL API, ROCDecode API, Memory operations (copies, scratch, and allocations), and Kernel dispatches.",
|
||||
)
|
||||
|
||||
pc_sampling_options = parser.add_argument_group("PC sampling options")
|
||||
@@ -245,6 +245,11 @@ For MPI applications (or other job launchers such as SLURM), place rocprofv3 ins
|
||||
"--kokkos-trace",
|
||||
help="Enable built-in Kokkos Tools support (implies --marker-trace and --kernel-rename)",
|
||||
)
|
||||
add_parser_bool_argument(
|
||||
basic_tracing_options,
|
||||
"--rocdecode-trace",
|
||||
help="For collecting ROCDecode Traces",
|
||||
)
|
||||
|
||||
extended_tracing_options = parser.add_argument_group("Granular tracing options")
|
||||
|
||||
@@ -761,6 +766,7 @@ def run(app_args, args, **kwargs):
|
||||
"memory_allocation_trace",
|
||||
"scratch_memory_trace",
|
||||
"rccl_trace",
|
||||
"rocdecode_trace",
|
||||
):
|
||||
setattr(args, itr, True)
|
||||
|
||||
@@ -773,6 +779,7 @@ def run(app_args, args, **kwargs):
|
||||
"memory_allocation_trace",
|
||||
"scratch_memory_trace",
|
||||
"rccl_trace",
|
||||
"rocdecode_trace",
|
||||
):
|
||||
setattr(args, itr, True)
|
||||
|
||||
@@ -796,6 +803,7 @@ def run(app_args, args, **kwargs):
|
||||
["hsa_finalizer_trace", "HSA_FINALIZER_EXT_API_TRACE"],
|
||||
["marker_trace", "MARKER_API_TRACE"],
|
||||
["rccl_trace", "RCCL_API_TRACE"],
|
||||
["rocdecode_trace", "ROCDECODE_API_TRACE"],
|
||||
["kernel_trace", "KERNEL_TRACE"],
|
||||
["memory_copy_trace", "MEMORY_COPY_TRACE"],
|
||||
["memory_allocation_trace", "MEMORY_ALLOCATION_TRACE"],
|
||||
|
||||
@@ -0,0 +1,7 @@
|
||||
"Domain","Function","Process_Id","Thread_Id","Correlation_Id","Start_Timestamp","End_Timestamp"
|
||||
"ROCDECODE_API","rocDecCreateVideoParser",41688,41688,583,615449881677279,615449882001583
|
||||
"ROCDECODE_API","rocDecGetDecoderCaps",41688,41688,584,615449882016054,615449882163756
|
||||
"ROCDECODE_API","rocDecGetDecoderCaps",41688,41688,588,615449886038750,615449886050880
|
||||
"ROCDECODE_API","rocDecCreateDecoder",41688,41688,591,615449886084210,615450756910310
|
||||
"ROCDECODE_API","rocDecDecodeFrame",41688,41688,595,615450757036042,615450767147413
|
||||
"ROCDECODE_API","rocDecGetDecodeStatus",41688,41688,812,615450836779385,615450836779575
|
||||
|
@@ -55,11 +55,11 @@ Here is the sample of commonly used ``rocprofv3`` command-line options. Some opt
|
||||
- Output control
|
||||
|
||||
* - ``-r`` \| ``--runtime-trace``
|
||||
- Collects HIP (runtime), memory copy, memory allocation, marker, scratch memory, and kernel dispatch traces.
|
||||
- Collects HIP (runtime), memory copy, memory allocation, marker, scratch memory, rocDecode, and kernel dispatch traces.
|
||||
- Application Tracing
|
||||
|
||||
* - ``-s`` \| ``--sys-trace``
|
||||
- Collects HIP, HSA, memory copy, memory allocation, marker, scratch memory, and kernel dispatch traces.
|
||||
- Collects HIP, HSA, memory copy, memory allocation, marker, scratch memory, rocDecode, and kernel dispatch traces.
|
||||
- Application Tracing
|
||||
|
||||
* - ``--hip-trace``
|
||||
@@ -86,6 +86,10 @@ Here is the sample of commonly used ``rocprofv3`` command-line options. Some opt
|
||||
- Collects scratch memory operations traces.
|
||||
- Application tracing
|
||||
|
||||
* - ``--rocdecode-trace``
|
||||
- Collects rocDecode API traces.
|
||||
- Application tracing
|
||||
|
||||
* - ``--hsa-trace``
|
||||
- Collects HSA API traces.
|
||||
- Application tracing
|
||||
@@ -615,6 +619,28 @@ Here are the contents of ``rccl_api_trace.csv`` file:
|
||||
:widths: 10,10,10,10,10,20,20
|
||||
:header-rows: 1
|
||||
|
||||
rocDecode trace
|
||||
++++++++++++++++
|
||||
|
||||
`rocDecode <https://github.com/ROCm/rocDecode>`_ is a high-performance video decode SDK for AMD GPUs. This option traces the rocDecode API.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
rocprofv3 --rocdecode-trace -- <application_path>
|
||||
|
||||
The above command generates a ``rocdecode_api_trace`` file prefixed with the process ID.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
$ cat 41688_rocdecode_api_trace.csv
|
||||
|
||||
Here are the contents of ``rocdecode_api_trace.csv`` file:
|
||||
|
||||
.. csv-table:: rocDecode trace
|
||||
:file: /data/rocdecode_api_trace.csv
|
||||
:widths: 10,10,10,10,10,20,20
|
||||
:header-rows: 1
|
||||
|
||||
Post-processing tracing options
|
||||
++++++++++++++++++++++++++++++++
|
||||
|
||||
@@ -1336,3 +1362,15 @@ Properties
|
||||
- **`handle`** *(integer, required)*: Handle of the agent.
|
||||
- **`address`** *(string, required)*: Starting address of allocation.
|
||||
- **`allocation_size`** *(integer, required)*: Size of allocation.
|
||||
- **`rocDecode_api`** *(array)*: rocDecode API records.
|
||||
- **Items** *(object)*
|
||||
- **`size`** *(integer, required)*: Size of the rocDecode API record.
|
||||
- **`kind`** *(integer, required)*: Kind of the rocDecode API.
|
||||
- **`operation`** *(integer, required)*: Operation of the rocDecode API.
|
||||
- **`correlation_id`** *(object, required)*: Correlation ID information.
|
||||
- **`internal`** *(integer, required)*: Internal correlation ID.
|
||||
- **`external`** *(integer, required)*: External correlation ID.
|
||||
- **`start_timestamp`** *(integer, required)*: Start timestamp.
|
||||
- **`end_timestamp`** *(integer, required)*: End timestamp.
|
||||
- **`thread_id`** *(integer, required)*: Thread ID.
|
||||
|
||||
|
||||
@@ -1678,6 +1678,66 @@
|
||||
"address",
|
||||
"allocation_size"
|
||||
]
|
||||
}
|
||||
},
|
||||
"rocdecoder_api": {
|
||||
"type": "array",
|
||||
"description": "ROCDecode API records.",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"size": {
|
||||
"type": "integer",
|
||||
"description": "Size of the rocDecode API record."
|
||||
},
|
||||
"kind": {
|
||||
"type": "integer",
|
||||
"description": "Kind of the rocDecode API."
|
||||
},
|
||||
"operation": {
|
||||
"type": "integer",
|
||||
"description": "Operation of the rocDecode API."
|
||||
},
|
||||
"correlation_id": {
|
||||
"type": "object",
|
||||
"description": "Correlation ID information.",
|
||||
"properties": {
|
||||
"internal": {
|
||||
"type": "integer",
|
||||
"description": "Internal correlation ID."
|
||||
},
|
||||
"external": {
|
||||
"type": "integer",
|
||||
"description": "External correlation ID."
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"internal",
|
||||
"external"
|
||||
]
|
||||
},
|
||||
"start_timestamp": {
|
||||
"type": "integer",
|
||||
"description": "Start timestamp."
|
||||
},
|
||||
"end_timestamp": {
|
||||
"type": "integer",
|
||||
"description": "End timestamp."
|
||||
},
|
||||
"thread_id": {
|
||||
"type": "integer",
|
||||
"description": "Thread ID."
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"size",
|
||||
"kind",
|
||||
"operation",
|
||||
"correlation_id",
|
||||
"start_timestamp",
|
||||
"end_timestamp",
|
||||
"thread_id"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -65,7 +65,10 @@
|
||||
"type": "boolean",
|
||||
"description": "For Collecting Memory Allocation Traces"
|
||||
},
|
||||
|
||||
"rocdecode_trace": {
|
||||
"type": "boolean",
|
||||
"description": "For Collecting rocDecode Traces"
|
||||
},
|
||||
"scratch_memory_trace": {
|
||||
"type": "boolean",
|
||||
"description": "For Collecting Scratch Memory operations Traces"
|
||||
@@ -101,9 +104,14 @@
|
||||
"description": "For Collecting HSA API Traces (Image-extenson API)"
|
||||
},
|
||||
|
||||
"runtime_trace" : {
|
||||
"type": "boolean",
|
||||
"description": "For collecting HIP (runtime), memory copy, memory allocation, marker, scratch memory, rocDecode, and Kernel dispatch traces."
|
||||
},
|
||||
|
||||
"sys_trace" : {
|
||||
"type": "boolean",
|
||||
"description": "For Collecting HIP, HSA, Marker (ROCTx), Memory copy, Memory allocation, Scratch memory, and Kernel dispatch traces"
|
||||
"description": "For Collecting HIP, HSA, Marker (ROCTx), Memory copy, Memory allocation, Scratch memory, rocDecode, and Kernel dispatch traces"
|
||||
},
|
||||
|
||||
"mangled_kernels": {
|
||||
|
||||
@@ -31,6 +31,7 @@ set(ROCPROFILER_HEADER_FILES
|
||||
profile_config.h
|
||||
registration.h
|
||||
rccl.h
|
||||
rocdecode.h
|
||||
spm.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/version.h)
|
||||
|
||||
@@ -44,6 +45,7 @@ add_subdirectory(hsa)
|
||||
add_subdirectory(marker)
|
||||
add_subdirectory(ompt)
|
||||
add_subdirectory(rccl)
|
||||
add_subdirectory(rocdecode)
|
||||
add_subdirectory(cxx)
|
||||
add_subdirectory(kfd)
|
||||
add_subdirectory(amd_detail)
|
||||
|
||||
@@ -182,6 +182,25 @@ typedef struct
|
||||
/// @brief Specification of the API function, e.g., ::rocprofiler_rccl_api_id_t
|
||||
} rocprofiler_buffer_tracing_rccl_api_record_t;
|
||||
|
||||
/**
|
||||
* @brief ROCProfiler Buffer ROCDecode API Record.
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
uint64_t size; ///< size of this struct
|
||||
rocprofiler_buffer_tracing_kind_t kind;
|
||||
rocprofiler_tracing_operation_t operation;
|
||||
rocprofiler_correlation_id_t correlation_id; ///< correlation ids for record
|
||||
rocprofiler_timestamp_t start_timestamp; ///< start time in nanoseconds
|
||||
rocprofiler_timestamp_t end_timestamp; ///< end time in nanoseconds
|
||||
rocprofiler_thread_id_t thread_id; ///< id for thread generating this record
|
||||
|
||||
/// @var kind
|
||||
/// @brief ::ROCPROFILER_CALLBACK_TRACING_ROCDECODE_API
|
||||
/// @var operation
|
||||
/// @brief Specification of the API function, e.g., ::rocprofiler_rocdecode_api_id_t
|
||||
} rocprofiler_buffer_tracing_rocdecode_api_record_t;
|
||||
|
||||
/**
|
||||
* @brief ROCProfiler Buffer Memory Copy Tracer Record.
|
||||
*/
|
||||
|
||||
@@ -29,6 +29,7 @@
|
||||
#include <rocprofiler-sdk/marker.h>
|
||||
#include <rocprofiler-sdk/ompt.h>
|
||||
#include <rocprofiler-sdk/rccl.h>
|
||||
#include <rocprofiler-sdk/rocdecode.h>
|
||||
|
||||
#include <hsa/hsa.h>
|
||||
#include <hsa/hsa_amd_tool.h>
|
||||
@@ -108,6 +109,16 @@ typedef struct
|
||||
rocprofiler_rccl_api_retval_t retval;
|
||||
} rocprofiler_callback_tracing_rccl_api_data_t;
|
||||
|
||||
/**
|
||||
* @brief ROCProfiler ROCDecode API Callback Data.
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
uint64_t size; ///< size of this struct
|
||||
rocprofiler_rocdecode_api_args_t args;
|
||||
rocprofiler_rocdecode_api_retval_t retval;
|
||||
} rocprofiler_callback_tracing_rocdecode_api_data_t;
|
||||
|
||||
/**
|
||||
* @brief ROCProfiler Code Object Load Tracer Callback Record.
|
||||
*/
|
||||
|
||||
@@ -82,6 +82,7 @@ ROCPROFILER_DEFINE_CATEGORY(category, openmp, "OpenMP")
|
||||
ROCPROFILER_DEFINE_CATEGORY(category, kernel_dispatch, "GPU kernel dispatch")
|
||||
ROCPROFILER_DEFINE_CATEGORY(category, memory_copy, "Async memory copy")
|
||||
ROCPROFILER_DEFINE_CATEGORY(category, memory_allocation, "Memory Allocation")
|
||||
ROCPROFILER_DEFINE_CATEGORY(category, rocdecode_api, "ROCDecode API function")
|
||||
|
||||
#define ROCPROFILER_PERFETTO_CATEGORIES \
|
||||
ROCPROFILER_PERFETTO_CATEGORY(category::hsa_api), \
|
||||
@@ -91,7 +92,8 @@ ROCPROFILER_DEFINE_CATEGORY(category, memory_allocation, "Memory Allocation")
|
||||
ROCPROFILER_PERFETTO_CATEGORY(category::openmp), \
|
||||
ROCPROFILER_PERFETTO_CATEGORY(category::kernel_dispatch), \
|
||||
ROCPROFILER_PERFETTO_CATEGORY(category::memory_copy), \
|
||||
ROCPROFILER_PERFETTO_CATEGORY(category::memory_allocation)
|
||||
ROCPROFILER_PERFETTO_CATEGORY(category::memory_allocation), \
|
||||
ROCPROFILER_PERFETTO_CATEGORY(category::rocdecode_api)
|
||||
|
||||
#include <perfetto.h>
|
||||
|
||||
|
||||
@@ -386,6 +386,21 @@ save(ArchiveT& ar, rocprofiler_callback_tracing_rccl_api_data_t data)
|
||||
ROCP_SDK_SAVE_DATA_FIELD(retval);
|
||||
}
|
||||
|
||||
template <typename ArchiveT>
|
||||
void
|
||||
save(ArchiveT& ar, rocprofiler_rocdecode_api_retval_t data)
|
||||
{
|
||||
ROCP_SDK_SAVE_DATA_FIELD(rocDecStatus_retval);
|
||||
}
|
||||
|
||||
template <typename ArchiveT>
|
||||
void
|
||||
save(ArchiveT& ar, rocprofiler_callback_tracing_rocdecode_api_data_t data)
|
||||
{
|
||||
ROCP_SDK_SAVE_DATA_FIELD(size);
|
||||
ROCP_SDK_SAVE_DATA_FIELD(retval);
|
||||
}
|
||||
|
||||
template <typename ArchiveT>
|
||||
void
|
||||
save(ArchiveT& ar, rocprofiler_callback_tracing_ompt_data_t data)
|
||||
@@ -479,6 +494,13 @@ save(ArchiveT& ar, rocprofiler_buffer_tracing_rccl_api_record_t data)
|
||||
save_buffer_tracing_api_record(ar, data);
|
||||
}
|
||||
|
||||
template <typename ArchiveT>
|
||||
void
|
||||
save(ArchiveT& ar, rocprofiler_buffer_tracing_rocdecode_api_record_t data)
|
||||
{
|
||||
save_buffer_tracing_api_record(ar, data);
|
||||
}
|
||||
|
||||
template <typename ArchiveT>
|
||||
void
|
||||
save(ArchiveT& ar, rocprofiler_buffer_tracing_ompt_target_t data)
|
||||
|
||||
@@ -69,6 +69,7 @@ typedef enum // NOLINT(performance-enum-size)
|
||||
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_RCCL_API, ///<
|
||||
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_OMPT, ///<
|
||||
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_MEMORY_ALLOCATION, ///<
|
||||
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_ROCDECODE_API, ///<
|
||||
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_LAST,
|
||||
} rocprofiler_external_correlation_id_request_kind_t;
|
||||
|
||||
|
||||
@@ -176,6 +176,7 @@ typedef enum // NOLINT(performance-enum-size)
|
||||
///< ::rocprofiler_memory_allocation_operation_t
|
||||
ROCPROFILER_CALLBACK_TRACING_RUNTIME_INITIALIZATION, ///< Callback notifying that a runtime
|
||||
///< library has been initialized
|
||||
ROCPROFILER_CALLBACK_TRACING_ROCDECODE_API, ///< rocDecode API Tracing
|
||||
ROCPROFILER_CALLBACK_TRACING_LAST,
|
||||
} rocprofiler_callback_tracing_kind_t;
|
||||
|
||||
@@ -207,6 +208,7 @@ typedef enum // NOLINT(performance-enum-size)
|
||||
ROCPROFILER_BUFFER_TRACING_RUNTIME_INITIALIZATION, ///< Record indicating a runtime library has
|
||||
///< been initialized. @see
|
||||
///< ::rocprofiler_runtime_initialization_operation_t
|
||||
ROCPROFILER_BUFFER_TRACING_ROCDECODE_API, ///< rocDecode tracing
|
||||
ROCPROFILER_BUFFER_TRACING_LAST,
|
||||
} rocprofiler_buffer_tracing_kind_t;
|
||||
|
||||
@@ -363,12 +365,13 @@ typedef enum
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
ROCPROFILER_LIBRARY = (1 << 0),
|
||||
ROCPROFILER_HSA_LIBRARY = (1 << 1),
|
||||
ROCPROFILER_HIP_LIBRARY = (1 << 2),
|
||||
ROCPROFILER_MARKER_LIBRARY = (1 << 3),
|
||||
ROCPROFILER_RCCL_LIBRARY = (1 << 4),
|
||||
ROCPROFILER_LIBRARY_LAST = ROCPROFILER_RCCL_LIBRARY,
|
||||
ROCPROFILER_LIBRARY = (1 << 0),
|
||||
ROCPROFILER_HSA_LIBRARY = (1 << 1),
|
||||
ROCPROFILER_HIP_LIBRARY = (1 << 2),
|
||||
ROCPROFILER_MARKER_LIBRARY = (1 << 3),
|
||||
ROCPROFILER_RCCL_LIBRARY = (1 << 4),
|
||||
ROCPROFILER_ROCDECODE_LIBRARY = (1 << 5),
|
||||
ROCPROFILER_LIBRARY_LAST = ROCPROFILER_ROCDECODE_LIBRARY,
|
||||
} rocprofiler_runtime_library_t;
|
||||
|
||||
/**
|
||||
@@ -384,7 +387,8 @@ typedef enum
|
||||
ROCPROFILER_MARKER_CONTROL_TABLE = (1 << 4),
|
||||
ROCPROFILER_MARKER_NAME_TABLE = (1 << 5),
|
||||
ROCPROFILER_RCCL_TABLE = (1 << 6),
|
||||
ROCPROFILER_TABLE_LAST = ROCPROFILER_RCCL_TABLE,
|
||||
ROCPROFILER_ROCDECODE_TABLE = (1 << 7),
|
||||
ROCPROFILER_TABLE_LAST = ROCPROFILER_ROCDECODE_TABLE,
|
||||
} rocprofiler_intercept_table_t;
|
||||
|
||||
/**
|
||||
@@ -392,11 +396,12 @@ typedef enum
|
||||
*/
|
||||
typedef enum // NOLINT(performance-enum-size)
|
||||
{
|
||||
ROCPROFILER_RUNTIME_INITIALIZATION_NONE = 0, ///< Unknown runtime initialization
|
||||
ROCPROFILER_RUNTIME_INITIALIZATION_HSA, ///< Application loaded HSA runtime
|
||||
ROCPROFILER_RUNTIME_INITIALIZATION_HIP, ///< Application loaded HIP runtime
|
||||
ROCPROFILER_RUNTIME_INITIALIZATION_MARKER, ///< Application loaded Marker (ROCTx) runtime
|
||||
ROCPROFILER_RUNTIME_INITIALIZATION_RCCL, ///< Application loaded RCCL runtime
|
||||
ROCPROFILER_RUNTIME_INITIALIZATION_NONE = 0, ///< Unknown runtime initialization
|
||||
ROCPROFILER_RUNTIME_INITIALIZATION_HSA, ///< Application loaded HSA runtime
|
||||
ROCPROFILER_RUNTIME_INITIALIZATION_HIP, ///< Application loaded HIP runtime
|
||||
ROCPROFILER_RUNTIME_INITIALIZATION_MARKER, ///< Application loaded Marker (ROCTx) runtime
|
||||
ROCPROFILER_RUNTIME_INITIALIZATION_RCCL, ///< Application loaded RCCL runtime
|
||||
ROCPROFILER_RUNTIME_INITIALIZATION_ROCDECODE, ///< Application loaded rocDecode runtime
|
||||
ROCPROFILER_RUNTIME_INITIALIZATION_LAST,
|
||||
} rocprofiler_runtime_initialization_operation_t;
|
||||
|
||||
|
||||
@@ -0,0 +1,27 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <rocprofiler-sdk/rocdecode/api_args.h>
|
||||
#include <rocprofiler-sdk/rocdecode/api_id.h>
|
||||
#include <rocprofiler-sdk/rocdecode/table_id.h>
|
||||
@@ -0,0 +1,13 @@
|
||||
#
|
||||
#
|
||||
# Installation of public rocDecode headers
|
||||
#
|
||||
#
|
||||
set(ROCPROFILER_ROCDECODE_HEADER_FILES api_args.h api_id.h table_id.h)
|
||||
|
||||
install(
|
||||
FILES ${ROCPROFILER_ROCDECODE_HEADER_FILES}
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/rocprofiler-sdk/rocdecode
|
||||
COMPONENT development)
|
||||
|
||||
add_subdirectory(details)
|
||||
@@ -0,0 +1,165 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <rocprofiler-sdk/defines.h>
|
||||
#include <rocprofiler-sdk/version.h>
|
||||
|
||||
#if !defined(ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE)
|
||||
# if defined __has_include
|
||||
# if __has_include(<rocdecode/rocparser.h>) && __has_include(<rocdecode/rocdecode.h>) && __has_include(<rocdecode/roc_bitstream_reader.h>)
|
||||
# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 1
|
||||
# else
|
||||
# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 0
|
||||
# endif
|
||||
# else
|
||||
# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 0
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE > 0
|
||||
# include <rocdecode/roc_bitstream_reader.h>
|
||||
# include <rocdecode/rocdecode.h>
|
||||
# include <rocdecode/rocparser.h>
|
||||
#else
|
||||
# include <rocprofiler-sdk/rocdecode/details/roc_bitstream_reader.h>
|
||||
# include <rocprofiler-sdk/rocdecode/details/rocdecode.h>
|
||||
# include <rocprofiler-sdk/rocdecode/details/rocparser.h>
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
ROCPROFILER_EXTERN_C_INIT
|
||||
|
||||
// Empty struct has a size of 0 in C but size of 1 in C++.
|
||||
// This struct is added to the union members which represent
|
||||
// functions with no arguments to ensure ABI compatibility
|
||||
typedef struct rocprofiler_rocdecode_api_no_args
|
||||
{
|
||||
char empty;
|
||||
} rocprofiler_rocdecode_api_no_args;
|
||||
|
||||
typedef union rocprofiler_rocdecode_api_retval_t
|
||||
{
|
||||
int32_t rocDecStatus_retval;
|
||||
const char* const_charp_retval;
|
||||
} rocprofiler_rocdecode_api_retval_t;
|
||||
|
||||
typedef union rocprofiler_rocdecode_api_args_t
|
||||
{
|
||||
struct
|
||||
{
|
||||
RocdecVideoParser* parser_handle;
|
||||
RocdecParserParams* params;
|
||||
} rocDecCreateVideoParser;
|
||||
|
||||
struct
|
||||
{
|
||||
RocdecVideoParser parser_handle;
|
||||
RocdecSourceDataPacket* packet;
|
||||
} rocDecParseVideoData;
|
||||
|
||||
struct
|
||||
{
|
||||
RocdecVideoParser parser_handle;
|
||||
} rocDecDestroyVideoParser;
|
||||
|
||||
struct
|
||||
{
|
||||
rocDecDecoderHandle* decoder_handle;
|
||||
RocDecoderCreateInfo* decoder_create_info;
|
||||
} rocDecCreateDecoder;
|
||||
|
||||
struct
|
||||
{
|
||||
rocDecDecoderHandle decoder_handle;
|
||||
} rocDecDestroyDecoder;
|
||||
|
||||
struct
|
||||
{
|
||||
RocdecDecodeCaps* decode_caps;
|
||||
} rocDecGetDecoderCaps;
|
||||
|
||||
struct
|
||||
{
|
||||
rocDecDecoderHandle decoder_handle;
|
||||
RocdecPicParams* pic_params;
|
||||
} rocDecDecodeFrame;
|
||||
|
||||
struct
|
||||
{
|
||||
rocDecDecoderHandle decoder_handle;
|
||||
int pic_idx;
|
||||
RocdecDecodeStatus* decode_status;
|
||||
} rocDecGetDecodeStatus;
|
||||
|
||||
struct
|
||||
{
|
||||
rocDecDecoderHandle decoder_handle;
|
||||
RocdecReconfigureDecoderInfo* reconfig_params;
|
||||
} rocDecReconfigureDecoder;
|
||||
|
||||
struct
|
||||
{
|
||||
rocDecDecoderHandle decoder_handle;
|
||||
int pic_idx;
|
||||
void** dev_mem_ptr;
|
||||
uint32_t* horizontal_pitch;
|
||||
RocdecProcParams* vid_postproc_params;
|
||||
} rocDecGetVideoFrame;
|
||||
struct
|
||||
{
|
||||
rocDecStatus rocdec_status;
|
||||
} rocDecGetErrorName;
|
||||
|
||||
#if ROCDECODE_RUNTIME_API_TABLE_STEP_VERSION >= 1
|
||||
struct
|
||||
{
|
||||
RocdecBitstreamReader* bs_reader_handle;
|
||||
const char* input_file_path;
|
||||
} rocDecCreateBitstreamReader;
|
||||
struct
|
||||
{
|
||||
RocdecBitstreamReader bs_reader_handle;
|
||||
rocDecVideoCodec* codec_type;
|
||||
} rocDecGetBitstreamCodecType;
|
||||
struct
|
||||
{
|
||||
RocdecBitstreamReader bs_reader_handle;
|
||||
int* bit_depth;
|
||||
} rocDecGetBitstreamBitDepth;
|
||||
struct
|
||||
{
|
||||
RocdecBitstreamReader bs_reader_handle;
|
||||
uint8_t** pic_data;
|
||||
int* pic_size;
|
||||
int64_t* pts;
|
||||
} rocDecGetBitstreamPicData;
|
||||
struct
|
||||
{
|
||||
RocdecBitstreamReader bs_reader_handle;
|
||||
} rocDecDestroyBitstreamReader;
|
||||
#endif
|
||||
} rocprofiler_rocdecode_api_args_t;
|
||||
|
||||
ROCPROFILER_EXTERN_C_FINI
|
||||
@@ -0,0 +1,56 @@
|
||||
|
||||
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <rocprofiler-sdk/version.h>
|
||||
|
||||
/**
|
||||
* @brief ROCProfiler enumeration of HSA Core API tracing operations
|
||||
*/
|
||||
typedef enum // NOLINT(performance-enum-size)
|
||||
{
|
||||
ROCPROFILER_ROCDECODE_API_ID_NONE = -1,
|
||||
|
||||
ROCPROFILER_ROCDECODE_API_ID_rocDecCreateVideoParser = 0,
|
||||
ROCPROFILER_ROCDECODE_API_ID_rocDecParseVideoData,
|
||||
ROCPROFILER_ROCDECODE_API_ID_rocDecDestroyVideoParser,
|
||||
ROCPROFILER_ROCDECODE_API_ID_rocDecCreateDecoder,
|
||||
ROCPROFILER_ROCDECODE_API_ID_rocDecDestroyDecoder,
|
||||
ROCPROFILER_ROCDECODE_API_ID_rocDecGetDecoderCaps,
|
||||
ROCPROFILER_ROCDECODE_API_ID_rocDecDecodeFrame,
|
||||
ROCPROFILER_ROCDECODE_API_ID_rocDecGetDecodeStatus,
|
||||
ROCPROFILER_ROCDECODE_API_ID_rocDecReconfigureDecoder,
|
||||
ROCPROFILER_ROCDECODE_API_ID_rocDecGetVideoFrame,
|
||||
ROCPROFILER_ROCDECODE_API_ID_rocDecGetErrorName,
|
||||
|
||||
#if ROCDECODE_RUNTIME_API_TABLE_STEP_VERSION >= 1
|
||||
ROCPROFILER_ROCDECODE_API_ID_rocDecCreateBitstreamReader,
|
||||
ROCPROFILER_ROCDECODE_API_ID_rocDecGetBitstreamCodecType,
|
||||
ROCPROFILER_ROCDECODE_API_ID_rocDecGetBitstreamBitDepth,
|
||||
ROCPROFILER_ROCDECODE_API_ID_rocDecGetBitstreamPicData,
|
||||
ROCPROFILER_ROCDECODE_API_ID_rocDecDestroyBitstreamReader,
|
||||
#endif
|
||||
ROCPROFILER_ROCDECODE_API_ID_LAST,
|
||||
} rocprofiler_rocdecode_api_id_t;
|
||||
+13
@@ -0,0 +1,13 @@
|
||||
#
|
||||
#
|
||||
# Installation of public ROCDecode headers
|
||||
#
|
||||
#
|
||||
set(ROCPROFILER_ROCDECODE_DETAILS_HEADER_FILES
|
||||
rocdecode_api_trace.h rocdecode.h rocparser.h rocdecode_version.h
|
||||
roc_bitstream_reader.h)
|
||||
|
||||
install(
|
||||
FILES ${ROCPROFILER_ROCDECODE_DETAILS_HEADER_FILES}
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/rocprofiler-sdk/rocdecode/details
|
||||
COMPONENT development)
|
||||
+110
@@ -0,0 +1,110 @@
|
||||
/*
|
||||
Copyright (c) 2024 - 2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#if !defined(ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE)
|
||||
# if defined __has_include
|
||||
# if __has_include(<rocdecode/rocdecode.h>)
|
||||
# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 1
|
||||
# else
|
||||
# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 0
|
||||
# endif
|
||||
# else
|
||||
# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 0
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE > 0
|
||||
# include <rocdecode/rocdecode.h>
|
||||
#else
|
||||
# include <rocprofiler-sdk/rocdecode/details/rocdecode.h>
|
||||
#endif
|
||||
|
||||
/*!
|
||||
* \file
|
||||
* \brief The AMD rocBitstreamReader Library.
|
||||
*
|
||||
* \defgroup group_roc_bitstream_reader rocDecode Parser: AMD ROCm Video Bitstream Reader API
|
||||
* \brief AMD The rocBitstreamReader is a toolkit to read picture data from bitstream files for
|
||||
* decoding on AMD’s GPUs.
|
||||
*/
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif /* __cplusplus */
|
||||
|
||||
/*********************************************************************************/
|
||||
//! HANDLE of rocBitstreamReader
|
||||
//! Used in subsequent API calls after rocDecCreateBitstreamReader
|
||||
/*********************************************************************************/
|
||||
typedef void* RocdecBitstreamReader;
|
||||
|
||||
/************************************************************************************************/
|
||||
//! \ingroup group_roc_bitstream_reader
|
||||
//! \fn rocDecStatus ROCDECAPI rocDecCreateBitstreamReader(RocdecBitstreamReader *bs_reader_handle,
|
||||
//! const char *input_file_path) Create video bitstream reader object and initialize
|
||||
/************************************************************************************************/
|
||||
extern rocDecStatus ROCDECAPI
|
||||
rocDecCreateBitstreamReader(RocdecBitstreamReader* bs_reader_handle, const char* input_file_path);
|
||||
|
||||
/************************************************************************************************/
|
||||
//! \ingroup group_roc_bitstream_reader
|
||||
//! \fn rocDecStatus ROCDECAPI rocDecGetBitstreamCodecType(RocdecBitstreamReader bs_reader_handle,
|
||||
//! rocDecVideoCodec *codec_type) Get the codec type of the bitstream
|
||||
/************************************************************************************************/
|
||||
extern rocDecStatus ROCDECAPI
|
||||
rocDecGetBitstreamCodecType(RocdecBitstreamReader bs_reader_handle, rocDecVideoCodec* codec_type);
|
||||
|
||||
/************************************************************************************************/
|
||||
//! \ingroup group_roc_bitstream_reader
|
||||
//! \fn rocDecStatus ROCDECAPI rocDecGetBitstreamBitDepth(RocdecBitstreamReader bs_reader_handle,
|
||||
//! int *bit_depth) Get the bit depth of the bitstream
|
||||
/************************************************************************************************/
|
||||
extern rocDecStatus ROCDECAPI
|
||||
rocDecGetBitstreamBitDepth(RocdecBitstreamReader bs_reader_handle, int* bit_depth);
|
||||
|
||||
/************************************************************************************************/
|
||||
//! \ingroup group_roc_bitstream_reader
|
||||
//! \fn rocDecStatus ROCDECAPI rocDecGetBitstreamPicData(RocdecBitstreamReader bs_reader_handle,
|
||||
//! uint8_t **pic_data, int *pic_size, int64_t *pts) Read one unit of picture data from the
|
||||
//! bitstream. The unit can be a frame or field for AVC/HEVC, a temporal unit for AV1, or a frame
|
||||
//! (including superframe) for VP9. The picture data unit is pointed by pic_data. The size of the
|
||||
//! unit is specified by pic_size. The presentation time stamp, if available, is given by pts.
|
||||
/************************************************************************************************/
|
||||
extern rocDecStatus ROCDECAPI
|
||||
rocDecGetBitstreamPicData(RocdecBitstreamReader bs_reader_handle,
|
||||
uint8_t** pic_data,
|
||||
int* pic_size,
|
||||
int64_t* pts);
|
||||
|
||||
/************************************************************************************************/
|
||||
//! \ingroup group_roc_bitstream_reader
|
||||
//! \fn rocDecStatus ROCDECAPI rocDecDestroyBitstreamReader(RocdecBitstreamReader bs_reader_handle)
|
||||
//! Destroy the video parser object
|
||||
/************************************************************************************************/
|
||||
extern rocDecStatus ROCDECAPI
|
||||
rocDecDestroyBitstreamReader(RocdecBitstreamReader bs_reader_handle);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif /* __cplusplus */
|
||||
+1888
Filskillnaden har hållits tillbaka eftersom den är för stor
Load Diff
+157
@@ -0,0 +1,157 @@
|
||||
/*
|
||||
Copyright (c) 2024 - 2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#if !defined(ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE)
|
||||
# if defined __has_include
|
||||
# if __has_include(<rocdecode/rocparser.h>) && __has_include(<rocdecode/rocdecode.h>) && __has_include(<rocdecode/roc_bitstream_reader.h>)
|
||||
# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 1
|
||||
# else
|
||||
# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 0
|
||||
# endif
|
||||
# else
|
||||
# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 0
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE > 0
|
||||
# include <rocdecode/roc_bitstream_reader.h>
|
||||
# include <rocdecode/rocdecode.h>
|
||||
# include <rocdecode/rocparser.h>
|
||||
#else
|
||||
# include <rocprofiler-sdk/rocdecode/details/roc_bitstream_reader.h>
|
||||
# include <rocprofiler-sdk/rocdecode/details/rocdecode.h>
|
||||
# include <rocprofiler-sdk/rocdecode/details/rocparser.h>
|
||||
#endif
|
||||
|
||||
// Define version macros for the rocDecode API dispatch table, specifying the MAJOR and STEP
|
||||
// versions.
|
||||
//
|
||||
// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! IMPORTANT !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
//
|
||||
// 1. When adding new functions to the rocDecode API dispatch table, always append the new function
|
||||
// pointer
|
||||
// to the end of the table and increment the dispatch table's version number. Never rearrange the
|
||||
// order of the member variables in the dispatch table, as doing so will break the Application
|
||||
// Binary Interface (ABI).
|
||||
// 2. In critical situations where the type of an existing member variable in a dispatch table has
|
||||
// been changed
|
||||
// or removed due to a data type modification, it is important to increment the major version
|
||||
// number of the rocDecode API dispatch table. If the function pointer type can no longer be
|
||||
// declared, do not remove it. Instead, change the function pointer type to `void*` and ensure it
|
||||
// is always initialized to `nullptr`.
|
||||
//
|
||||
// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
//
|
||||
|
||||
// The major version number should ideally remain unchanged. Increment the
|
||||
// ROCDECODE_RUNTIME_API_TABLE_MAJOR_VERSION only for fundamental changes to the
|
||||
// rocDecodeDispatchTable struct, such as altering the type or name of an existing member variable.
|
||||
// Please DO NOT REMOVE it.
|
||||
#define ROCDECODE_RUNTIME_API_TABLE_MAJOR_VERSION 0
|
||||
|
||||
// Increment the ROCDECODE_RUNTIME_API_TABLE_STEP_VERSION when new runtime API functions are added.
|
||||
// If the corresponding ROCDECODE_RUNTIME_API_TABLE_MAJOR_VERSION increases reset the
|
||||
// ROCDECODE_RUNTIME_API_TABLE_STEP_VERSION to zero.
|
||||
#define ROCDECODE_RUNTIME_API_TABLE_STEP_VERSION 1
|
||||
|
||||
// rocDecode API interface
|
||||
typedef rocDecStatus(ROCDECAPI* PfnRocDecCreateVideoParser)(RocdecVideoParser* parser_handle,
|
||||
RocdecParserParams* params);
|
||||
typedef rocDecStatus(ROCDECAPI* PfnRocDecParseVideoData)(RocdecVideoParser parser_handle,
|
||||
RocdecSourceDataPacket* packet);
|
||||
typedef rocDecStatus(ROCDECAPI* PfnRocDecDestroyVideoParser)(RocdecVideoParser parser_handle);
|
||||
typedef rocDecStatus(ROCDECAPI* PfnRocDecCreateDecoder)(rocDecDecoderHandle* decoder_handle,
|
||||
RocDecoderCreateInfo* decoder_create_info);
|
||||
typedef rocDecStatus(ROCDECAPI* PfnRocDecDestroyDecoder)(rocDecDecoderHandle decoder_handle);
|
||||
typedef rocDecStatus(ROCDECAPI* PfnRocDecGetDecoderCaps)(RocdecDecodeCaps* decode_caps);
|
||||
typedef rocDecStatus(ROCDECAPI* PfnRocDecDecodeFrame)(rocDecDecoderHandle decoder_handle,
|
||||
RocdecPicParams* pic_params);
|
||||
typedef rocDecStatus(ROCDECAPI* PfnRocDecGetDecodeStatus)(rocDecDecoderHandle decoder_handle,
|
||||
int pic_idx,
|
||||
RocdecDecodeStatus* decode_status);
|
||||
typedef rocDecStatus(ROCDECAPI* PfnRocDecReconfigureDecoder)(
|
||||
rocDecDecoderHandle decoder_handle,
|
||||
RocdecReconfigureDecoderInfo* reconfig_params);
|
||||
typedef rocDecStatus(ROCDECAPI* PfnRocDecGetVideoFrame)(rocDecDecoderHandle decoder_handle,
|
||||
int pic_idx,
|
||||
void* dev_mem_ptr[3],
|
||||
uint32_t* horizontal_pitch,
|
||||
RocdecProcParams* vid_postproc_params);
|
||||
typedef const char*(ROCDECAPI* PfnRocDecGetErrorName)(rocDecStatus rocdec_status);
|
||||
typedef rocDecStatus(ROCDECAPI* PfnRocDecCreateBitstreamReader)(
|
||||
RocdecBitstreamReader* bs_reader_handle,
|
||||
const char* input_file_path);
|
||||
typedef rocDecStatus(ROCDECAPI* PfnRocDecGetBitstreamCodecType)(
|
||||
RocdecBitstreamReader bs_reader_handle,
|
||||
rocDecVideoCodec* codec_type);
|
||||
typedef rocDecStatus(ROCDECAPI* PfnRocDecGetBitstreamBitDepth)(
|
||||
RocdecBitstreamReader bs_reader_handle,
|
||||
int* bit_depth);
|
||||
typedef rocDecStatus(ROCDECAPI* PfnRocDecGetBitstreamPicData)(
|
||||
RocdecBitstreamReader bs_reader_handle,
|
||||
uint8_t** pic_data,
|
||||
int* pic_size,
|
||||
int64_t* pts);
|
||||
typedef rocDecStatus(ROCDECAPI* PfnRocDecDestroyBitstreamReader)(
|
||||
RocdecBitstreamReader bs_reader_handle);
|
||||
|
||||
// rocDecode API dispatch table
|
||||
struct RocDecodeDispatchTable
|
||||
{
|
||||
// ROCDECODE_RUNTIME_API_TABLE_STEP_VERSION == 0
|
||||
size_t size;
|
||||
PfnRocDecCreateVideoParser pfn_rocdec_create_video_parser;
|
||||
PfnRocDecParseVideoData pfn_rocdec_parse_video_data;
|
||||
PfnRocDecDestroyVideoParser pfn_rocdec_destroy_video_parser;
|
||||
PfnRocDecCreateDecoder pfn_rocdec_create_decoder;
|
||||
PfnRocDecDestroyDecoder pfn_rocdec_destroy_decoder;
|
||||
PfnRocDecGetDecoderCaps pfn_rocdec_get_gecoder_caps;
|
||||
PfnRocDecDecodeFrame pfn_rocdec_decode_frame;
|
||||
PfnRocDecGetDecodeStatus pfn_rocdec_get_decode_status;
|
||||
PfnRocDecReconfigureDecoder pfn_rocdec_reconfigure_decoder;
|
||||
PfnRocDecGetVideoFrame pfn_rocdec_get_video_frame;
|
||||
PfnRocDecGetErrorName pfn_rocdec_get_error_name;
|
||||
// PLEASE DO NOT EDIT ABOVE!
|
||||
// ROCDECODE_RUNTIME_API_TABLE_STEP_VERSION == 1
|
||||
PfnRocDecCreateBitstreamReader pfn_rocdec_create_bitstream_reader;
|
||||
PfnRocDecGetBitstreamCodecType pfn_rocdec_get_bitstream_codec_type;
|
||||
PfnRocDecGetBitstreamBitDepth pfn_rocdec_get_bitstream_bit_depth;
|
||||
PfnRocDecGetBitstreamPicData pfn_rocdec_get_bitstream_pic_data;
|
||||
PfnRocDecDestroyBitstreamReader pfn_rocdec_destroy_bitstream_reader;
|
||||
// PLEASE DO NOT EDIT ABOVE!
|
||||
// ROCDECODE_RUNTIME_API_TABLE_STEP_VERSION == 2
|
||||
|
||||
// *******************************************************************************************
|
||||
// //
|
||||
// READ BELOW
|
||||
// *******************************************************************************************
|
||||
// // Please keep this text at the end of the structure:
|
||||
|
||||
// 1. Do not reorder any existing members.
|
||||
// 2. Increase the step version definition before adding new members.
|
||||
// 3. Insert new members under the appropriate step version comment.
|
||||
// 4. Generate a comment for the next step version.
|
||||
// 5. Add a "PLEASE DO NOT EDIT ABOVE!" comment.
|
||||
// *******************************************************************************************
|
||||
// //
|
||||
};
|
||||
+60
@@ -0,0 +1,60 @@
|
||||
/*
|
||||
Copyright (c) 2024 - 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef ROCDECODE_VERSION_H
|
||||
#define ROCDECODE_VERSION_H
|
||||
|
||||
/*!
|
||||
* \file
|
||||
* \brief rocDecode version
|
||||
* \defgroup group_rocdecode_version rocDecode Version
|
||||
* \brief rocDecode version
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
/* NOTE: Match version with CMakeLists.txt */
|
||||
#define ROCDECODE_MAJOR_VERSION 0
|
||||
#define ROCDECODE_MINOR_VERSION 10
|
||||
#define ROCDECODE_MICRO_VERSION 0
|
||||
|
||||
/**
|
||||
* ROCDECODE_CHECK_VERSION:
|
||||
* @major: major version, like 1 in 1.2.3
|
||||
* @minor: minor version, like 2 in 1.2.3
|
||||
* @micro: micro version, like 3 in 1.2.3
|
||||
*
|
||||
* Evaluates to %TRUE if the version of rocDecode is greater than
|
||||
* @major, @minor and @micro
|
||||
*/
|
||||
#define ROCDECODE_CHECK_VERSION(major, minor, micro) \
|
||||
(ROCDECODE_MAJOR_VERSION > (major) || \
|
||||
(ROCDECODE_MAJOR_VERSION == (major) && ROCDECODE_MINOR_VERSION > (minor)) || \
|
||||
(ROCDECODE_MAJOR_VERSION == (major) && ROCDECODE_MINOR_VERSION == (minor) && \
|
||||
ROCDECODE_MICRO_VERSION >= (micro)))
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,349 @@
|
||||
/*
|
||||
Copyright (c) 2024 - 2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#if !defined(ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE)
|
||||
# if defined __has_include
|
||||
# if __has_include(<rocdecode/rocdecode.h>)
|
||||
# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 1
|
||||
# else
|
||||
# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 0
|
||||
# endif
|
||||
# else
|
||||
# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 0
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE > 0
|
||||
# include <rocdecode/rocdecode.h>
|
||||
#else
|
||||
# include <rocprofiler-sdk/rocdecode/details/rocdecode.h>
|
||||
#endif
|
||||
|
||||
/*!
|
||||
* \file
|
||||
* \brief The AMD rocParser Library.
|
||||
*
|
||||
* \defgroup group_rocparser rocDecode Parser: AMD ROCm Video Parser API
|
||||
* \brief AMD The rocDecode video parser for AMD’s GPUs.
|
||||
* \defgroup group_rocdec_struct rocDecode Parser Structs: AMD ROCm Video Parser Structs
|
||||
* \brief AMD The rocDecode video parser struct
|
||||
*/
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif /* __cplusplus */
|
||||
|
||||
/*********************************************************************************/
|
||||
//! HANDLE pf rocDecDecoder
|
||||
//! Used in subsequent API calls after rocDecCreateDecoder
|
||||
/*********************************************************************************/
|
||||
|
||||
typedef void* RocdecVideoParser;
|
||||
typedef uint64_t RocdecTimeStamp;
|
||||
|
||||
/**
|
||||
* @brief ROCDEC_VIDEO_FORMAT struct
|
||||
* @ingroup group_rocdec_struct
|
||||
* Used in Parser callback API
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
rocDecVideoCodec codec; /**< OUT: Compression format */
|
||||
/**
|
||||
* OUT: frame rate = numerator / denominator (for example: 30000/1001)
|
||||
*/
|
||||
struct
|
||||
{
|
||||
/**< OUT: frame rate numerator (0 = unspecified or variable frame rate) */
|
||||
uint32_t numerator;
|
||||
/**< OUT: frame rate denominator (0 = unspecified or variable frame rate) */
|
||||
uint32_t denominator;
|
||||
} frame_rate;
|
||||
uint8_t progressive_sequence; /**< OUT: 0=interlaced, 1=progressive */
|
||||
uint8_t bit_depth_luma_minus8; /**< OUT: high bit depth luma. E.g, 2 for 10-bitdepth, 4 for
|
||||
12-bitdepth */
|
||||
uint8_t bit_depth_chroma_minus8; /**< OUT: high bit depth chroma. E.g, 2 for 10-bitdepth, 4 for
|
||||
12-bitdepth */
|
||||
uint8_t min_num_decode_surfaces; /**< OUT: Minimum number of decode surfaces to be allocated for
|
||||
correct decoding. The client can send this value in
|
||||
num_decode_surfaces. This guarantees correct functionality
|
||||
and optimal video memory usage but not necessarily the best
|
||||
performance, which depends on the design of the overall
|
||||
application. The optimal number of decode surfaces (in terms
|
||||
of performance and memory utilization) should be decided by
|
||||
experimentation for each application, but it cannot go below
|
||||
min_num_decode_surfaces. If this value is used for
|
||||
num_decode_surfaces then it must be returned to parser
|
||||
during sequence callback. */
|
||||
uint32_t coded_width; /**< OUT: coded frame width in pixels */
|
||||
uint32_t coded_height; /**< OUT: coded frame height in pixels */
|
||||
/**
|
||||
* area of the frame that should be displayed
|
||||
* typical example:
|
||||
* coded_width = 1920, coded_height = 1088
|
||||
* display_area = { 0,0,1920,1080 }
|
||||
*/
|
||||
struct
|
||||
{
|
||||
int left; /**< OUT: left position of display rect */
|
||||
int top; /**< OUT: top position of display rect */
|
||||
int right; /**< OUT: right position of display rect */
|
||||
int bottom; /**< OUT: bottom position of display rect */
|
||||
} display_area;
|
||||
|
||||
rocDecVideoChromaFormat chroma_format; /**< OUT: Chroma format */
|
||||
uint32_t bitrate; /**< OUT: video bitrate (bps, 0=unknown) */
|
||||
/**
|
||||
* OUT: Display Aspect Ratio = x:y (4:3, 16:9, etc)
|
||||
*/
|
||||
struct
|
||||
{
|
||||
int x;
|
||||
int y;
|
||||
} display_aspect_ratio;
|
||||
/**
|
||||
* Video Signal Description
|
||||
* Refer section E.2.1 (VUI parameters semantics) of H264 spec file
|
||||
*/
|
||||
struct
|
||||
{
|
||||
uint8_t video_format : 3; /**< OUT: 0-Component, 1-PAL, 2-NTSC, 3-SECAM, 4-MAC,
|
||||
5-Unspecified */
|
||||
uint8_t video_full_range_flag : 1; /**< OUT: indicates the black level and luma and chroma
|
||||
range */
|
||||
uint8_t reserved_zero_bits : 4; /**< Reserved bits */
|
||||
uint8_t color_primaries; /**< OUT: chromaticity coordinates of source primaries */
|
||||
uint8_t transfer_characteristics; /**< OUT: opto-electronic transfer characteristic of the
|
||||
source picture */
|
||||
uint8_t matrix_coefficients; /**< OUT: used in deriving luma and chroma signals from RGB
|
||||
primaries */
|
||||
} video_signal_description;
|
||||
uint32_t seqhdr_data_length; /**< OUT: Additional bytes following (RocdecVideoFormatEx) */
|
||||
} RocdecVideoFormat;
|
||||
|
||||
/****************************************************************/
|
||||
//! \ingroup group_rocdec_struct
|
||||
//! \struct RocdecVideoFormat
|
||||
//! Video format including raw sequence header information
|
||||
//! Used in rocDecCreateVideoParser API
|
||||
/****************************************************************/
|
||||
typedef struct
|
||||
{
|
||||
RocdecVideoFormat format; /**< OUT: RocdecVideoFormat structure */
|
||||
uint32_t max_width;
|
||||
uint32_t max_height;
|
||||
uint8_t raw_seqhdr_data[1024]; /**< OUT: Sequence header data */
|
||||
} RocdecVideoFormatEx;
|
||||
|
||||
/***************************************************************/
|
||||
//! \enum RocdecVideoPacketFlags
|
||||
//! Data packet flags
|
||||
//! Used in RocdecSourceDataPacket structure
|
||||
/***************************************************************/
|
||||
typedef enum
|
||||
{
|
||||
ROCDEC_PKT_ENDOFSTREAM = 0x01, /**< Set when this is the last packet for this stream */
|
||||
ROCDEC_PKT_TIMESTAMP = 0x02, /**< Timestamp is valid */
|
||||
ROCDEC_PKT_DISCONTINUITY = 0x04, /**< Set when a discontinuity has to be signalled */
|
||||
ROCDEC_PKT_ENDOFPICTURE =
|
||||
0x08, /**< Set when the packet contains exactly one frame or one field */
|
||||
ROCDEC_PKT_NOTIFY_EOS =
|
||||
0x10, /**< If this flag is set along with ROCDEC_PKT_ENDOFSTREAM, an additional (dummy)
|
||||
display callback will be invoked with null value of ROCDECPARSERDISPINFO which
|
||||
should be interpreted as end of the stream. */
|
||||
} RocdecVideoPacketFlags;
|
||||
|
||||
/*****************************************************************************/
|
||||
//! \ingroup group_rocdec_struct
|
||||
//! \struct RocdecSourceDataPacket
|
||||
//! Data Packet
|
||||
//! Used in rocDecParseVideoData API
|
||||
//! IN for rocDecParseVideoData
|
||||
/*****************************************************************************/
|
||||
typedef struct _RocdecSourceDataPacket
|
||||
{
|
||||
uint32_t flags; /**< IN: Combination of ROCDEC_PKT_XXX flags */
|
||||
uint32_t
|
||||
payload_size; /**< IN: number of bytes in the payload (may be zero if EOS flag is set) */
|
||||
const uint8_t*
|
||||
payload; /**< IN: Pointer to packet payload data (may be NULL if EOS flag is set) */
|
||||
RocdecTimeStamp pts; /**< IN: Presentation time stamp (10MHz clock), only valid if
|
||||
ROCDEC_PKT_TIMESTAMP flag is set */
|
||||
} RocdecSourceDataPacket;
|
||||
|
||||
/**********************************************************************************/
|
||||
/*! \brief Timing Info struct
|
||||
* \ingroup group_rocdec_struct
|
||||
* \struct RocdecParserDispInfo
|
||||
* \Used in rocdecParseVideoData API with PFNVIDDISPLAYCALLBACK pfn_display_picture
|
||||
*/
|
||||
/**********************************************************************************/
|
||||
typedef struct _RocdecParserDispInfo
|
||||
{
|
||||
int picture_index; /**< OUT: Index of the current picture */
|
||||
int progressive_frame; /**< OUT: 1 if progressive frame; 0 otherwise */
|
||||
int top_field_first; /**< OUT: 1 if top field is displayed first; 0 otherwise */
|
||||
int repeat_first_field; /**< OUT: Number of additional fields (1=ivtc, 2=frame doubling, 4=frame
|
||||
tripling, -1=unpaired field) */
|
||||
RocdecTimeStamp pts; /**< OUT: Presentation time stamp */
|
||||
} RocdecParserDispInfo;
|
||||
|
||||
/**
|
||||
* @brief RocdecOperatingPointInfo struct
|
||||
* @ingroup group_rocdec_struct
|
||||
* Operating point information of scalable bitstream
|
||||
*/
|
||||
typedef struct _RocdecOperatingPointInfo
|
||||
{
|
||||
rocDecVideoCodec codec;
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint8_t operating_points_cnt;
|
||||
uint8_t reserved24_bits[3];
|
||||
uint16_t operating_points_idc[32];
|
||||
} av1;
|
||||
uint8_t codec_reserved[1024];
|
||||
};
|
||||
} RocdecOperatingPointInfo;
|
||||
|
||||
/**********************************************************************************/
|
||||
//! \ingroup group_rocdec_struct
|
||||
//! \struct RocdecSeiMessage;
|
||||
//! Used in RocdecSeiMessageInfo structure
|
||||
/**********************************************************************************/
|
||||
typedef struct _RocdecSeiMessage
|
||||
{
|
||||
uint8_t sei_message_type; /**< OUT: SEI Message Type */
|
||||
uint8_t reserved[3];
|
||||
uint32_t sei_message_size; /**< OUT: SEI Message Size */
|
||||
} RocdecSeiMessage;
|
||||
|
||||
/**********************************************************************************/
|
||||
//! \ingroup group_rocdec_struct
|
||||
//! \struct RocdecSeiMessageInfo
|
||||
//! Used in rocDecParseVideoData API with PFNVIDSEIMSGCALLBACK pfn_get_sei_msg
|
||||
/**********************************************************************************/
|
||||
typedef struct _RocdecSeiMessageInfo
|
||||
{
|
||||
void* sei_data; /**< OUT: SEI Message Data */
|
||||
RocdecSeiMessage* sei_message; /**< OUT: SEI Message Info */
|
||||
uint32_t sei_message_count; /**< OUT: SEI Message Count */
|
||||
uint32_t picIdx; /**< OUT: SEI Message Pic Index */
|
||||
} RocdecSeiMessageInfo;
|
||||
|
||||
/**
|
||||
* @brief Parser callbacks
|
||||
* \ The parser will call these synchronously from within rocDecParseVideoData(), whenever there is
|
||||
* sequence change or a picture \ is ready to be decoded and/or displayed. \ Return values from
|
||||
* these callbacks are interpreted as below. If the callbacks return failure, it will be propagated
|
||||
* by \ rocDecParseVideoData() to the application. \ Parser picks default operating point as 0 and
|
||||
* outputAllLayers flag as 0 if PFNVIDOPPOINTCALLBACK is not set or return value is \ -1 or invalid
|
||||
* operating point. \ PFNVIDSEQUENCECALLBACK : 0: fail, 1: succeeded, > 1: override dpb size of
|
||||
* parser (set by RocdecParserParams::max_num_decode_surfaces \ while creating parser) \
|
||||
* PFNVIDDECODECALLBACK : 0: fail, >=1: succeeded \ PFNVIDDISPLAYCALLBACK : 0: fail, >=1:
|
||||
* succeeded \ PFNVIDOPPOINTCALLBACK : <0: fail, >=0: succeeded (bit 0-9: OperatingPoint, bit
|
||||
* 10-10: outputAllLayers, bit 11-30: reserved) \ PFNVIDSEIMSGCALLBACK : 0: fail, >=1: succeeded
|
||||
*/
|
||||
typedef int(ROCDECAPI* PFNVIDSEQUENCECALLBACK)(void*, RocdecVideoFormat*);
|
||||
typedef int(ROCDECAPI* PFNVIDDECODECALLBACK)(void*, RocdecPicParams*);
|
||||
typedef int(ROCDECAPI* PFNVIDDISPLAYCALLBACK)(void*, RocdecParserDispInfo*);
|
||||
// typedef int (ROCDECAPI *PFNVIDOPPOINTCALLBACK)(void *, RocdecOperatingPointInfo*); //
|
||||
// reserved for future (AV1 specific)
|
||||
typedef int(ROCDECAPI* PFNVIDSEIMSGCALLBACK)(void*, RocdecSeiMessageInfo*);
|
||||
|
||||
/**
|
||||
* \brief The AMD rocDecode library.
|
||||
* \ingroup group_rocdec_struct
|
||||
* \Used in rocDecCreateVideoParser API
|
||||
*/
|
||||
typedef struct _RocdecParserParams
|
||||
{
|
||||
rocDecVideoCodec codec_type; /**< IN: rocDecVideoCodec_XXX */
|
||||
uint32_t max_num_decode_surfaces; /**< IN: Max # of decode surfaces (parser will cycle through
|
||||
these) */
|
||||
uint32_t clock_rate; /**< IN: Timestamp units in Hz (0=default=10000000Hz) */
|
||||
uint32_t error_threshold; /**< IN: % Error threshold (0-100) for calling pfn_decode_picture
|
||||
(100=always IN: call pfn_decode_picture even if picture bitstream
|
||||
is fully corrupted) */
|
||||
uint32_t max_display_delay; /**< IN: Max display queue delay (improves pipelining of decode with
|
||||
display) 0 = no delay (recommended values: 2..4) */
|
||||
uint32_t annex_b : 1; /**< IN: AV1 annexB stream */
|
||||
uint32_t reserved : 31; /**< Reserved for future use - set to zero */
|
||||
uint32_t reserved_1[4]; /**< IN: Reserved for future use - set to 0 */
|
||||
void* user_data; /**< IN: User data for callbacks */
|
||||
PFNVIDSEQUENCECALLBACK pfn_sequence_callback; /**< IN: Called before decoding frames and/or
|
||||
whenever there is a fmt change */
|
||||
PFNVIDDECODECALLBACK pfn_decode_picture; /**< IN: Called when a picture is ready to be decoded
|
||||
(decode order) */
|
||||
PFNVIDDISPLAYCALLBACK pfn_display_picture; /**< IN: Called whenever a picture is ready to be
|
||||
displayed (display order) */
|
||||
PFNVIDSEIMSGCALLBACK
|
||||
pfn_get_sei_msg; /**< IN: Called when all SEI messages are parsed for particular frame */
|
||||
void* reserved_2[5]; /**< Reserved for future use - set to NULL */
|
||||
RocdecVideoFormatEx*
|
||||
ext_video_info; /**< IN: [Optional] sequence header data from system layer */
|
||||
} RocdecParserParams;
|
||||
|
||||
/************************************************************************************************/
|
||||
//! \ingroup group_rocparser
|
||||
//! \fn rocDecodeStatus ROCDECAPI rocDecCreateVideoParser(RocdecVideoParser *parser_handle,
|
||||
//! RocdecParserParams *params) Create video parser object and initialize
|
||||
/************************************************************************************************/
|
||||
extern rocDecStatus ROCDECAPI
|
||||
rocDecCreateVideoParser(RocdecVideoParser* parser_handle, RocdecParserParams* params);
|
||||
|
||||
/************************************************************************************************/
|
||||
//! \ingroup group_rocparser
|
||||
//! \fn rocDecodeStatus ROCDECAPI rocDecParseVideoData(RocdecVideoParser parser_handle,
|
||||
//! RocdecSourceDataPacket *packet) Parse the video data from source data packet in pPacket Extracts
|
||||
//! parameter sets like SPS, PPS, bitstream etc. from pPacket and calls back pfn_decode_picture with
|
||||
//! RocdecPicParams data for kicking of HW decoding calls back pfn_sequence_callback with
|
||||
//! RocdecVideoFormat data for initial sequence header or when the decoder encounters a video format
|
||||
//! change calls back pfn_display_picture with RocdecParserDispInfo data to display a video frame
|
||||
/************************************************************************************************/
|
||||
extern rocDecStatus ROCDECAPI
|
||||
rocDecParseVideoData(RocdecVideoParser parser_handle, RocdecSourceDataPacket* packet);
|
||||
|
||||
/************************************************************************************************/
|
||||
//! \ingroup group_rocparser
|
||||
//! \fn rocDecStatus ROCDECAPI rocDecParserMarkFrameForReuse(RocdecVideoParser parser_handle, int
|
||||
//! pic_idx) Mark frame with index pic_idx in parser's buffer pool for reuse (means the frame has
|
||||
//! been consumed)
|
||||
/************************************************************************************************/
|
||||
extern rocDecStatus ROCDECAPI
|
||||
rocDecParserMarkFrameForReuse(RocdecVideoParser parser_handle, int pic_idx);
|
||||
|
||||
/************************************************************************************************/
|
||||
//! \ingroup group_rocparser
|
||||
//! \fn rocDecStatus ROCDECAPI rocDecDestroyVideoParser(RocdecVideoParser parser_handle)
|
||||
//! Destroy the video parser object
|
||||
/************************************************************************************************/
|
||||
extern rocDecStatus ROCDECAPI
|
||||
rocDecDestroyVideoParser(RocdecVideoParser parser_handle);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif /* __cplusplus */
|
||||
@@ -0,0 +1,31 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
// NOLINTNEXTLINE(performance-enum-size)
|
||||
typedef enum
|
||||
{
|
||||
ROCPROFILER_ROCDECODE_TABLE_ID_NONE = -1,
|
||||
ROCPROFILER_ROCDECODE_TABLE_ID = 0,
|
||||
ROCPROFILER_ROCDECODE_TABLE_ID_LAST,
|
||||
} rocprofiler_rocdecode_table_id_t;
|
||||
@@ -163,5 +163,7 @@ using counter_records_buffered_output_t =
|
||||
using pc_sampling_host_trap_buffered_output_t =
|
||||
buffered_output<rocprofiler::tool::rocprofiler_tool_pc_sampling_host_trap_record_t,
|
||||
domain_type::PC_SAMPLING_HOST_TRAP>;
|
||||
using rocdecode_buffered_output_t =
|
||||
buffered_output<rocprofiler_buffer_tracing_rocdecode_api_record_t, domain_type::ROCDECODE>;
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
|
||||
@@ -61,6 +61,7 @@ DEFINE_BUFFER_TYPE_NAME(PC_SAMPLING_HOST_TRAP,
|
||||
"PC_SAMPLING_HOST_TRAP",
|
||||
"pc_sampling_host_trap",
|
||||
"pc_sampling_host_trap_stats")
|
||||
DEFINE_BUFFER_TYPE_NAME(ROCDECODE, "ROCDECODE_API", "rocdecode_api_trace", "rocdecode_api_stats")
|
||||
|
||||
#undef DEFINE_BUFFER_TYPE_NAME
|
||||
|
||||
|
||||
@@ -37,6 +37,7 @@ enum class domain_type
|
||||
MEMORY_ALLOCATION,
|
||||
COUNTER_VALUES,
|
||||
PC_SAMPLING_HOST_TRAP,
|
||||
ROCDECODE,
|
||||
LAST,
|
||||
};
|
||||
|
||||
|
||||
@@ -722,6 +722,48 @@ generate_csv(const output_config& cfg,
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
generate_csv(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<rocprofiler_buffer_tracing_rocdecode_api_record_t>& data,
|
||||
const stats_entry_t& stats)
|
||||
{
|
||||
if(data.empty()) return;
|
||||
|
||||
if(cfg.stats && stats)
|
||||
write_stats(get_stats_output_file(cfg, domain_type::ROCDECODE), stats.entries);
|
||||
|
||||
auto ofs = tool::csv_output_file{cfg,
|
||||
domain_type::ROCDECODE,
|
||||
tool::csv::api_csv_encoder{},
|
||||
{"Domain",
|
||||
"Function",
|
||||
"Process_Id",
|
||||
"Thread_Id",
|
||||
"Correlation_Id",
|
||||
"Start_Timestamp",
|
||||
"End_Timestamp"}};
|
||||
for(auto ditr : data)
|
||||
{
|
||||
for(auto record : data.get(ditr))
|
||||
{
|
||||
auto row_ss = std::stringstream{};
|
||||
auto api_name = tool_metadata.get_operation_name(record.kind, record.operation);
|
||||
rocprofiler::tool::csv::api_csv_encoder::write_row(
|
||||
row_ss,
|
||||
tool_metadata.get_kind_name(record.kind),
|
||||
api_name,
|
||||
tool_metadata.process_id,
|
||||
record.thread_id,
|
||||
record.correlation_id.internal,
|
||||
record.start_timestamp,
|
||||
record.end_timestamp);
|
||||
|
||||
ofs << row_ss.str();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
generate_csv(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
|
||||
@@ -87,6 +87,12 @@ generate_csv(const output_config& cfg,
|
||||
const generator<rocprofiler_buffer_tracing_rccl_api_record_t>& data,
|
||||
const stats_entry_t& stats);
|
||||
|
||||
void
|
||||
generate_csv(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<rocprofiler_buffer_tracing_rocdecode_api_record_t>& data,
|
||||
const stats_entry_t& stats);
|
||||
|
||||
void
|
||||
generate_csv(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
|
||||
@@ -187,7 +187,8 @@ write_json(json_output& json_ar,
|
||||
generator<rocprofiler_buffer_tracing_scratch_memory_record_t> scratch_memory_gen,
|
||||
generator<rocprofiler_buffer_tracing_rccl_api_record_t> rccl_api_gen,
|
||||
generator<rocprofiler_buffer_tracing_memory_allocation_record_t> memory_allocation_gen,
|
||||
generator<rocprofiler_tool_pc_sampling_host_trap_record_t> pc_sampling_gen)
|
||||
generator<rocprofiler_tool_pc_sampling_host_trap_record_t> pc_sampling_gen,
|
||||
generator<rocprofiler_buffer_tracing_rocdecode_api_record_t> rocdecode_api_gen)
|
||||
|
||||
{
|
||||
// summary
|
||||
@@ -229,6 +230,7 @@ write_json(json_output& json_ar,
|
||||
json_ar(cereal::make_nvp("memory_allocation", memory_allocation_gen));
|
||||
json_ar(cereal::make_nvp("scratch_memory", scratch_memory_gen));
|
||||
json_ar(cereal::make_nvp("pc_sample_host_trap", pc_sampling_gen));
|
||||
json_ar(cereal::make_nvp("rocdecode_api", rocdecode_api_gen));
|
||||
json_ar.finishNode();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -94,6 +94,8 @@ write_json(json_output& json
|
||||
generator<rocprofiler_buffer_tracing_scratch_memory_record_t> scratch_memory_gen,
|
||||
generator<rocprofiler_buffer_tracing_rccl_api_record_t> rccl_api_gen,
|
||||
generator<rocprofiler_buffer_tracing_memory_allocation_record_t> memory_allocation_gen,
|
||||
generator<rocprofiler_tool_pc_sampling_host_trap_record_t> pc_sampling_gen);
|
||||
generator<rocprofiler_tool_pc_sampling_host_trap_record_t> pc_sampling_gen,
|
||||
generator<rocprofiler_buffer_tracing_rocdecode_api_record_t> rocdecode_api_gen);
|
||||
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
|
||||
@@ -367,7 +367,8 @@ write_otf2(
|
||||
std::deque<rocprofiler_buffer_tracing_marker_api_record_t>* marker_api_data,
|
||||
std::deque<rocprofiler_buffer_tracing_scratch_memory_record_t>* /*scratch_memory_data*/,
|
||||
std::deque<rocprofiler_buffer_tracing_rccl_api_record_t>* rccl_api_data,
|
||||
std::deque<rocprofiler_buffer_tracing_memory_allocation_record_t>* memory_allocation_data)
|
||||
std::deque<rocprofiler_buffer_tracing_memory_allocation_record_t>* memory_allocation_data,
|
||||
std::deque<rocprofiler_buffer_tracing_rocdecode_api_record_t>* rocdecode_api_data)
|
||||
{
|
||||
namespace sdk = ::rocprofiler::sdk;
|
||||
|
||||
@@ -418,6 +419,8 @@ write_otf2(
|
||||
tids.emplace(itr.thread_id);
|
||||
for(auto itr : *rccl_api_data)
|
||||
tids.emplace(itr.thread_id);
|
||||
for(auto itr : *rocdecode_api_data)
|
||||
tids.emplace(itr.thread_id);
|
||||
|
||||
for(auto itr : *memory_copy_data)
|
||||
{
|
||||
@@ -614,6 +617,7 @@ write_otf2(
|
||||
add_event_data(hip_api_data, sdk::category::hip_api{});
|
||||
add_event_data(marker_api_data, sdk::category::marker_api{});
|
||||
add_event_data(rccl_api_data, sdk::category::rccl_api{});
|
||||
add_event_data(rocdecode_api_data, sdk::category::rocdecode_api{});
|
||||
}
|
||||
|
||||
for(auto itr : *memory_copy_data)
|
||||
|
||||
@@ -46,6 +46,7 @@ write_otf2(
|
||||
std::deque<rocprofiler_buffer_tracing_marker_api_record_t>* marker_api_data,
|
||||
std::deque<rocprofiler_buffer_tracing_scratch_memory_record_t>* scratch_memory_data,
|
||||
std::deque<rocprofiler_buffer_tracing_rccl_api_record_t>* rccl_api_data,
|
||||
std::deque<rocprofiler_buffer_tracing_memory_allocation_record_t>* memory_allocation_data);
|
||||
std::deque<rocprofiler_buffer_tracing_memory_allocation_record_t>* memory_allocation_data,
|
||||
std::deque<rocprofiler_buffer_tracing_rocdecode_api_record_t>* rocdecode_api_data);
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
|
||||
@@ -72,7 +72,8 @@ write_perfetto(
|
||||
const generator<rocprofiler_buffer_tracing_marker_api_record_t>& marker_api_gen,
|
||||
const generator<rocprofiler_buffer_tracing_scratch_memory_record_t>& /*scratch_memory_gen*/,
|
||||
const generator<rocprofiler_buffer_tracing_rccl_api_record_t>& rccl_api_gen,
|
||||
const generator<rocprofiler_buffer_tracing_memory_allocation_record_t>& memory_allocation_gen)
|
||||
const generator<rocprofiler_buffer_tracing_memory_allocation_record_t>& memory_allocation_gen,
|
||||
const generator<rocprofiler_buffer_tracing_rocdecode_api_record_t>& rocdecode_api_gen)
|
||||
{
|
||||
namespace sdk = ::rocprofiler::sdk;
|
||||
|
||||
@@ -168,6 +169,9 @@ write_perfetto(
|
||||
for(auto ditr : rccl_api_gen)
|
||||
for(auto itr : rccl_api_gen.get(ditr))
|
||||
tids.emplace(itr.thread_id);
|
||||
for(auto ditr : rocdecode_api_gen)
|
||||
for(auto itr : rocdecode_api_gen.get(ditr))
|
||||
tids.emplace(itr.thread_id);
|
||||
|
||||
for(auto ditr : memory_copy_gen)
|
||||
for(auto itr : memory_copy_gen.get(ditr))
|
||||
@@ -399,6 +403,37 @@ write_perfetto(
|
||||
tracing_session->FlushBlocking();
|
||||
}
|
||||
|
||||
for(auto ditr : rocdecode_api_gen)
|
||||
for(auto itr : rocdecode_api_gen.get(ditr))
|
||||
{
|
||||
auto name = buffer_names.at(itr.kind, itr.operation);
|
||||
auto& track = thread_tracks.at(itr.thread_id);
|
||||
|
||||
TRACE_EVENT_BEGIN(sdk::perfetto_category<sdk::category::rocdecode_api>::name,
|
||||
::perfetto::StaticString(name.data()),
|
||||
track,
|
||||
itr.start_timestamp,
|
||||
::perfetto::Flow::ProcessScoped(itr.correlation_id.internal),
|
||||
"begin_ns",
|
||||
itr.start_timestamp,
|
||||
"end_ns",
|
||||
itr.end_timestamp,
|
||||
"delta_ns",
|
||||
(itr.end_timestamp - itr.start_timestamp),
|
||||
"tid",
|
||||
itr.thread_id,
|
||||
"kind",
|
||||
itr.kind,
|
||||
"operation",
|
||||
itr.operation,
|
||||
"corr_id",
|
||||
itr.correlation_id.internal);
|
||||
TRACE_EVENT_END(sdk::perfetto_category<sdk::category::rocdecode_api>::name,
|
||||
track,
|
||||
itr.end_timestamp);
|
||||
tracing_session->FlushBlocking();
|
||||
}
|
||||
|
||||
for(auto ditr : memory_copy_gen)
|
||||
for(auto itr : memory_copy_gen.get(ditr))
|
||||
{
|
||||
|
||||
@@ -46,6 +46,7 @@ write_perfetto(
|
||||
const generator<rocprofiler_buffer_tracing_marker_api_record_t>& marker_api_gen,
|
||||
const generator<rocprofiler_buffer_tracing_scratch_memory_record_t>& scratch_memory_gen,
|
||||
const generator<rocprofiler_buffer_tracing_rccl_api_record_t>& rccl_api_gen,
|
||||
const generator<rocprofiler_buffer_tracing_memory_allocation_record_t>& memory_allocation_gen);
|
||||
const generator<rocprofiler_buffer_tracing_memory_allocation_record_t>& memory_allocation_gen,
|
||||
const generator<rocprofiler_buffer_tracing_rocdecode_api_record_t>& rocdecode_api_gen);
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
|
||||
@@ -228,6 +228,24 @@ generate_stats(const output_config& /*cfg*/,
|
||||
return get_stats(rccl_stats);
|
||||
}
|
||||
|
||||
stats_entry_t
|
||||
generate_stats(const output_config& /*cfg*/,
|
||||
const metadata& tool_metadata,
|
||||
const generator<rocprofiler_buffer_tracing_rocdecode_api_record_t>& data)
|
||||
{
|
||||
auto rocdecode_stats = stats_map_t{};
|
||||
for(auto ditr : data)
|
||||
{
|
||||
for(auto record : data.get(ditr))
|
||||
{
|
||||
auto api_name = tool_metadata.get_operation_name(record.kind, record.operation);
|
||||
rocdecode_stats[api_name] += (record.end_timestamp - record.start_timestamp);
|
||||
}
|
||||
}
|
||||
|
||||
return get_stats(rocdecode_stats);
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
void
|
||||
|
||||
@@ -75,6 +75,11 @@ generate_stats(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<rocprofiler_buffer_tracing_memory_allocation_record_t>& data);
|
||||
|
||||
stats_entry_t
|
||||
generate_stats(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<rocprofiler_buffer_tracing_rocdecode_api_record_t>& data);
|
||||
|
||||
stats_entry_t
|
||||
generate_stats(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
|
||||
@@ -97,6 +97,7 @@ struct config : output_config
|
||||
bool hip_runtime_api_trace = get_env("ROCPROF_HIP_RUNTIME_API_TRACE", false);
|
||||
bool hip_compiler_api_trace = get_env("ROCPROF_HIP_COMPILER_API_TRACE", false);
|
||||
bool rccl_api_trace = get_env("ROCPROF_RCCL_API_TRACE", false);
|
||||
bool rocdecode_api_trace = get_env("ROCPROF_ROCDECODE_API_TRACE", false);
|
||||
bool list_metrics = get_env("ROCPROF_LIST_METRICS", false);
|
||||
bool list_metrics_output_file = get_env("ROCPROF_OUTPUT_LIST_METRICS_FILE", false);
|
||||
bool pc_sampling_host_trap = false;
|
||||
|
||||
@@ -139,18 +139,20 @@ struct buffer_ids
|
||||
rocprofiler_buffer_id_t scratch_memory = {};
|
||||
rocprofiler_buffer_id_t rccl_api_trace = {};
|
||||
rocprofiler_buffer_id_t pc_sampling_host_trap = {};
|
||||
rocprofiler_buffer_id_t rocdecode_api_trace = {};
|
||||
|
||||
auto as_array() const
|
||||
{
|
||||
return std::array<rocprofiler_buffer_id_t, 9>{hsa_api_trace,
|
||||
hip_api_trace,
|
||||
kernel_trace,
|
||||
memory_copy_trace,
|
||||
memory_allocation_trace,
|
||||
counter_collection,
|
||||
scratch_memory,
|
||||
rccl_api_trace,
|
||||
pc_sampling_host_trap};
|
||||
return std::array<rocprofiler_buffer_id_t, 10>{hsa_api_trace,
|
||||
hip_api_trace,
|
||||
kernel_trace,
|
||||
memory_copy_trace,
|
||||
memory_allocation_trace,
|
||||
counter_collection,
|
||||
scratch_memory,
|
||||
rccl_api_trace,
|
||||
pc_sampling_host_trap,
|
||||
rocdecode_api_trace};
|
||||
}
|
||||
};
|
||||
|
||||
@@ -742,6 +744,13 @@ buffered_tracing_callback(rocprofiler_context_id_t /*context*/,
|
||||
|
||||
tool::write_ring_buffer(*record, domain_type::RCCL);
|
||||
}
|
||||
else if(header->kind == ROCPROFILER_BUFFER_TRACING_ROCDECODE_API)
|
||||
{
|
||||
auto* record = static_cast<rocprofiler_buffer_tracing_rocdecode_api_record_t*>(
|
||||
header->payload);
|
||||
|
||||
tool::write_ring_buffer(*record, domain_type::ROCDECODE);
|
||||
}
|
||||
else
|
||||
{
|
||||
ROCP_FATAL << fmt::format(
|
||||
@@ -1267,6 +1276,26 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
|
||||
"Could not setup counting service");
|
||||
}
|
||||
|
||||
if(tool::get_config().rocdecode_api_trace)
|
||||
{
|
||||
ROCPROFILER_CALL(rocprofiler_create_buffer(get_client_ctx(),
|
||||
buffer_size,
|
||||
buffer_watermark,
|
||||
ROCPROFILER_BUFFER_POLICY_LOSSLESS,
|
||||
buffered_tracing_callback,
|
||||
tool_data,
|
||||
&get_buffers().rocdecode_api_trace),
|
||||
"buffer creation");
|
||||
|
||||
ROCPROFILER_CALL(
|
||||
rocprofiler_configure_buffer_tracing_service(get_client_ctx(),
|
||||
ROCPROFILER_BUFFER_TRACING_ROCDECODE_API,
|
||||
nullptr,
|
||||
0,
|
||||
get_buffers().rocdecode_api_trace),
|
||||
"buffer tracing service for ROCDecode api configure");
|
||||
}
|
||||
|
||||
if(tool::get_config().kernel_rename)
|
||||
{
|
||||
auto rename_ctx = rocprofiler_context_id_t{0};
|
||||
@@ -1441,6 +1470,8 @@ tool_fini(void* /*tool_data*/)
|
||||
tool::memory_allocation_buffered_output_t{tool::get_config().memory_allocation_trace};
|
||||
auto counters_records_output =
|
||||
tool::counter_records_buffered_output_t{tool::get_config().counter_collection};
|
||||
auto rocdecode_output =
|
||||
tool::rocdecode_buffered_output_t{tool::get_config().rocdecode_api_trace};
|
||||
auto pc_sampling_host_trap_output =
|
||||
tool::pc_sampling_host_trap_buffered_output_t{tool::get_config().pc_sampling_host_trap};
|
||||
|
||||
@@ -1465,6 +1496,7 @@ tool_fini(void* /*tool_data*/)
|
||||
generate_output(rccl_output, contributions);
|
||||
generate_output(counters_output, contributions);
|
||||
generate_output(scratch_memory_output, contributions);
|
||||
generate_output(rocdecode_output, contributions);
|
||||
generate_output(pc_sampling_host_trap_output, contributions);
|
||||
|
||||
if(tool::get_config().stats && tool::get_config().csv_output)
|
||||
@@ -1491,7 +1523,8 @@ tool_fini(void* /*tool_data*/)
|
||||
scratch_memory_output.get_generator(),
|
||||
rccl_output.get_generator(),
|
||||
memory_allocation_output.get_generator(),
|
||||
pc_sampling_host_trap_output.get_generator());
|
||||
pc_sampling_host_trap_output.get_generator(),
|
||||
rocdecode_output.get_generator());
|
||||
json_ar.finish_process();
|
||||
|
||||
tool::close_json(json_ar);
|
||||
@@ -1509,7 +1542,8 @@ tool_fini(void* /*tool_data*/)
|
||||
marker_output.get_generator(),
|
||||
scratch_memory_output.get_generator(),
|
||||
rccl_output.get_generator(),
|
||||
memory_allocation_output.get_generator());
|
||||
memory_allocation_output.get_generator(),
|
||||
rocdecode_output.get_generator());
|
||||
}
|
||||
|
||||
if(tool::get_config().otf2_output)
|
||||
@@ -1522,6 +1556,7 @@ tool_fini(void* /*tool_data*/)
|
||||
auto scratch_memory_elem_data = scratch_memory_output.load_all();
|
||||
auto rccl_elem_data = rccl_output.load_all();
|
||||
auto memory_allocation_elem_data = memory_allocation_output.load_all();
|
||||
auto rocdecode_elem_data = rocdecode_output.load_all();
|
||||
|
||||
tool::write_otf2(tool::get_config(),
|
||||
*tool_metadata,
|
||||
@@ -1534,7 +1569,8 @@ tool_fini(void* /*tool_data*/)
|
||||
&marker_elem_data,
|
||||
&scratch_memory_elem_data,
|
||||
&rccl_elem_data,
|
||||
&memory_allocation_elem_data);
|
||||
&memory_allocation_elem_data,
|
||||
&rocdecode_elem_data);
|
||||
}
|
||||
|
||||
if(tool::get_config().summary_output)
|
||||
@@ -1554,6 +1590,7 @@ tool_fini(void* /*tool_data*/)
|
||||
destroy_output(scratch_memory_output);
|
||||
destroy_output(rccl_output);
|
||||
destroy_output(counters_records_output);
|
||||
destroy_output(rocdecode_output);
|
||||
destroy_output(pc_sampling_host_trap_output);
|
||||
|
||||
if(destructors)
|
||||
|
||||
@@ -52,6 +52,7 @@ add_subdirectory(tracing)
|
||||
add_subdirectory(kernel_dispatch)
|
||||
add_subdirectory(page_migration)
|
||||
add_subdirectory(rccl)
|
||||
add_subdirectory(rocdecode)
|
||||
add_subdirectory(details)
|
||||
add_subdirectory(ompt)
|
||||
|
||||
@@ -61,6 +62,7 @@ target_link_libraries(
|
||||
rocprofiler-sdk::rocprofiler-sdk-hip-nolink
|
||||
rocprofiler-sdk::rocprofiler-sdk-hsa-runtime-nolink
|
||||
rocprofiler-sdk::rocprofiler-sdk-rccl-nolink
|
||||
rocprofiler-sdk::rocprofiler-sdk-rocdecode-nolink
|
||||
PRIVATE rocprofiler-sdk::rocprofiler-sdk-build-flags
|
||||
rocprofiler-sdk::rocprofiler-sdk-memcheck
|
||||
rocprofiler-sdk::rocprofiler-sdk-common-library
|
||||
|
||||
@@ -34,6 +34,7 @@
|
||||
#include "lib/rocprofiler-sdk/page_migration/page_migration.hpp"
|
||||
#include "lib/rocprofiler-sdk/rccl/rccl.hpp"
|
||||
#include "lib/rocprofiler-sdk/registration.hpp"
|
||||
#include "lib/rocprofiler-sdk/rocdecode/rocdecode.hpp"
|
||||
#include "lib/rocprofiler-sdk/runtime_initialization.hpp"
|
||||
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
@@ -41,6 +42,7 @@
|
||||
#include <rocprofiler-sdk/hsa/table_id.h>
|
||||
#include <rocprofiler-sdk/marker/table_id.h>
|
||||
#include <rocprofiler-sdk/rccl/table_id.h>
|
||||
#include <rocprofiler-sdk/rocdecode/table_id.h>
|
||||
#include <rocprofiler-sdk/rocprofiler.h>
|
||||
|
||||
#include <atomic>
|
||||
@@ -91,6 +93,7 @@ ROCPROFILER_BUFFER_TRACING_KIND_STRING(CORRELATION_ID_RETIREMENT)
|
||||
ROCPROFILER_BUFFER_TRACING_KIND_STRING(RCCL_API)
|
||||
ROCPROFILER_BUFFER_TRACING_KIND_STRING(OMPT)
|
||||
ROCPROFILER_BUFFER_TRACING_KIND_STRING(RUNTIME_INITIALIZATION)
|
||||
ROCPROFILER_BUFFER_TRACING_KIND_STRING(ROCDECODE_API)
|
||||
|
||||
template <size_t Idx, size_t... Tail>
|
||||
std::pair<const char*, size_t>
|
||||
@@ -288,6 +291,11 @@ rocprofiler_query_buffer_tracing_kind_operation_name(rocprofiler_buffer_tracing_
|
||||
{
|
||||
return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED;
|
||||
}
|
||||
case ROCPROFILER_BUFFER_TRACING_ROCDECODE_API:
|
||||
{
|
||||
val = rocprofiler::rocdecode::name_by_id<ROCPROFILER_ROCDECODE_TABLE_ID>(operation);
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
if(!val)
|
||||
@@ -419,6 +427,11 @@ rocprofiler_iterate_buffer_tracing_kind_operations(
|
||||
{
|
||||
return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED;
|
||||
}
|
||||
case ROCPROFILER_BUFFER_TRACING_ROCDECODE_API:
|
||||
{
|
||||
ops = rocprofiler::rocdecode::get_ids<ROCPROFILER_ROCDECODE_TABLE_ID>();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for(const auto& itr : ops)
|
||||
|
||||
@@ -33,6 +33,7 @@
|
||||
#include "lib/rocprofiler-sdk/ompt/ompt.hpp"
|
||||
#include "lib/rocprofiler-sdk/rccl/rccl.hpp"
|
||||
#include "lib/rocprofiler-sdk/registration.hpp"
|
||||
#include "lib/rocprofiler-sdk/rocdecode/rocdecode.hpp"
|
||||
#include "lib/rocprofiler-sdk/runtime_initialization.hpp"
|
||||
|
||||
#include <rocprofiler-sdk/callback_tracing.h>
|
||||
@@ -41,6 +42,7 @@
|
||||
#include <rocprofiler-sdk/hsa/table_id.h>
|
||||
#include <rocprofiler-sdk/marker/table_id.h>
|
||||
#include <rocprofiler-sdk/rccl/table_id.h>
|
||||
#include <rocprofiler-sdk/rocdecode/table_id.h>
|
||||
#include <rocprofiler-sdk/rocprofiler.h>
|
||||
|
||||
#include <atomic>
|
||||
@@ -88,6 +90,7 @@ ROCPROFILER_CALLBACK_TRACING_KIND_STRING(MEMORY_ALLOCATION)
|
||||
ROCPROFILER_CALLBACK_TRACING_KIND_STRING(RCCL_API)
|
||||
ROCPROFILER_CALLBACK_TRACING_KIND_STRING(OMPT)
|
||||
ROCPROFILER_CALLBACK_TRACING_KIND_STRING(RUNTIME_INITIALIZATION)
|
||||
ROCPROFILER_CALLBACK_TRACING_KIND_STRING(ROCDECODE_API)
|
||||
|
||||
template <size_t Idx, size_t... Tail>
|
||||
std::pair<const char*, size_t>
|
||||
@@ -269,6 +272,12 @@ rocprofiler_query_callback_tracing_kind_operation_name(rocprofiler_callback_trac
|
||||
case ROCPROFILER_CALLBACK_TRACING_RUNTIME_INITIALIZATION:
|
||||
{
|
||||
val = rocprofiler::runtime_init::name_by_id(operation);
|
||||
break;
|
||||
}
|
||||
case ROCPROFILER_CALLBACK_TRACING_ROCDECODE_API:
|
||||
{
|
||||
val = rocprofiler::rocdecode::name_by_id<ROCPROFILER_ROCDECODE_TABLE_ID>(operation);
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -397,6 +406,12 @@ rocprofiler_iterate_callback_tracing_kind_operations(
|
||||
case ROCPROFILER_CALLBACK_TRACING_RUNTIME_INITIALIZATION:
|
||||
{
|
||||
ops = rocprofiler::runtime_init::get_ids();
|
||||
break;
|
||||
}
|
||||
case ROCPROFILER_CALLBACK_TRACING_ROCDECODE_API:
|
||||
{
|
||||
ops = rocprofiler::rocdecode::get_ids<ROCPROFILER_ROCDECODE_TABLE_ID>();
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -539,6 +554,7 @@ rocprofiler_iterate_callback_tracing_kind_operation_args(
|
||||
case ROCPROFILER_CALLBACK_TRACING_MEMORY_COPY:
|
||||
case ROCPROFILER_CALLBACK_TRACING_MEMORY_ALLOCATION:
|
||||
case ROCPROFILER_CALLBACK_TRACING_RCCL_API:
|
||||
case ROCPROFILER_CALLBACK_TRACING_ROCDECODE_API:
|
||||
case ROCPROFILER_CALLBACK_TRACING_RUNTIME_INITIALIZATION:
|
||||
{
|
||||
return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED;
|
||||
|
||||
@@ -34,6 +34,7 @@
|
||||
#include <hsa/hsa_api_trace.h>
|
||||
#include <hip/amd_detail/hip_api_trace.hpp>
|
||||
#include "lib/rocprofiler-sdk/rccl/rccl.hpp"
|
||||
#include "lib/rocprofiler-sdk/rocdecode/rocdecode.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
#include <mutex>
|
||||
@@ -57,7 +58,8 @@ constexpr auto intercept_library_seq = library_sequence_t<ROCPROFILER_HSA_TABLE,
|
||||
ROCPROFILER_MARKER_CORE_TABLE,
|
||||
ROCPROFILER_MARKER_CONTROL_TABLE,
|
||||
ROCPROFILER_MARKER_NAME_TABLE,
|
||||
ROCPROFILER_RCCL_TABLE>{};
|
||||
ROCPROFILER_RCCL_TABLE,
|
||||
ROCPROFILER_ROCDECODE_TABLE>{};
|
||||
|
||||
// check that intercept_library_seq is up to date
|
||||
static_assert((1 << (intercept_library_seq.size() - 1)) == ROCPROFILER_TABLE_LAST,
|
||||
@@ -192,6 +194,11 @@ template void notify_intercept_table_registration(rocprofiler_intercept_table_t,
|
||||
uint64_t,
|
||||
uint64_t,
|
||||
std::tuple<rcclApiFuncTable*>);
|
||||
|
||||
template void notify_intercept_table_registration(rocprofiler_intercept_table_t,
|
||||
uint64_t,
|
||||
uint64_t,
|
||||
std::tuple<RocDecodeDispatchTable*>);
|
||||
} // namespace intercept_table
|
||||
} // namespace rocprofiler
|
||||
|
||||
|
||||
@@ -122,7 +122,8 @@ constexpr auto creation_notifier_library_seq = library_sequence_t<ROCPROFILER_LI
|
||||
ROCPROFILER_HSA_LIBRARY,
|
||||
ROCPROFILER_HIP_LIBRARY,
|
||||
ROCPROFILER_MARKER_LIBRARY,
|
||||
ROCPROFILER_RCCL_LIBRARY>{};
|
||||
ROCPROFILER_RCCL_LIBRARY,
|
||||
ROCPROFILER_ROCDECODE_LIBRARY>{};
|
||||
|
||||
// check that creation_notifier_library_seq is up to date
|
||||
static_assert((1 << (creation_notifier_library_seq.size() - 1)) == ROCPROFILER_LIBRARY_LAST,
|
||||
|
||||
@@ -46,6 +46,7 @@
|
||||
#include "lib/rocprofiler-sdk/pc_sampling/code_object.hpp"
|
||||
#include "lib/rocprofiler-sdk/pc_sampling/service.hpp"
|
||||
#include "lib/rocprofiler-sdk/rccl/rccl.hpp"
|
||||
#include "lib/rocprofiler-sdk/rocdecode/rocdecode.hpp"
|
||||
#include "lib/rocprofiler-sdk/runtime_initialization.hpp"
|
||||
|
||||
#include <rocprofiler-sdk/context.h>
|
||||
@@ -908,6 +909,30 @@ rocprofiler_set_api_table(const char* name,
|
||||
rocprofiler::intercept_table::notify_intercept_table_registration(
|
||||
ROCPROFILER_RCCL_TABLE, lib_version, lib_instance, std::make_tuple(rccl_api));
|
||||
}
|
||||
else if(std::string_view{name} == "rocdecode")
|
||||
{
|
||||
// pass to rocdecode init
|
||||
ROCP_ERROR_IF(num_tables > 1)
|
||||
<< "rocprofiler expected ROCDecode library to pass 1 API table, not " << num_tables;
|
||||
|
||||
auto* rocdecode_api = static_cast<RocDecodeDispatchTable*>(tables[0]);
|
||||
|
||||
// any internal modifications to the rocdecodeApiFuncTable need to be done before we make
|
||||
// the copy or else those modifications will be lost when ROCDecode API tracing is enabled
|
||||
// because the ROCDecode API tracing invokes the function pointers from the copy below
|
||||
rocprofiler::rocdecode::copy_table(rocdecode_api, lib_instance);
|
||||
|
||||
// install rocprofiler API wrappers
|
||||
rocprofiler::rocdecode::update_table(rocdecode_api);
|
||||
|
||||
// Tracing notifications the runtime has initialized
|
||||
rocprofiler::runtime_init::initialize(
|
||||
ROCPROFILER_RUNTIME_INITIALIZATION_ROCDECODE, lib_version, lib_instance);
|
||||
|
||||
// allow tools to install API wrappers
|
||||
rocprofiler::intercept_table::notify_intercept_table_registration(
|
||||
ROCPROFILER_ROCDECODE_TABLE, lib_version, lib_instance, std::make_tuple(rocdecode_api));
|
||||
}
|
||||
else
|
||||
{
|
||||
ROCP_ERROR << "rocprofiler does not accept API tables from " << name;
|
||||
|
||||
@@ -0,0 +1,6 @@
|
||||
set(ROCPROFILER_LIB_ROCDECODE_SOURCES abi.cpp rocdecode.cpp)
|
||||
set(ROCPROFILER_LIB_ROCDECODE_HEADERS defines.hpp rocdecode.hpp)
|
||||
|
||||
target_sources(
|
||||
rocprofiler-sdk-object-library PRIVATE ${ROCPROFILER_LIB_ROCDECODE_SOURCES}
|
||||
${ROCPROFILER_LIB_ROCDECODE_HEADERS})
|
||||
@@ -0,0 +1,67 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "lib/rocprofiler-sdk/rocdecode/rocdecode.hpp"
|
||||
|
||||
#include "lib/common/abi.hpp"
|
||||
#include "lib/common/defines.hpp"
|
||||
|
||||
#include <rocprofiler-sdk/rocdecode.h>
|
||||
#include <rocprofiler-sdk/version.h>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace rocdecode
|
||||
{
|
||||
static_assert(ROCDECODE_RUNTIME_API_TABLE_MAJOR_VERSION == 0,
|
||||
"Major version updated for ROCDecode dispatch table");
|
||||
|
||||
#if ROCDECODE_RUNTIME_API_TABLE_STEP_VERSION == 0
|
||||
ROCP_SDK_ENFORCE_ABI_VERSIONING(::RocDecodeDispatchTable, 11);
|
||||
#endif
|
||||
|
||||
#if ROCDECODE_RUNTIME_API_TABLE_STEP_VERSION == 1
|
||||
ROCP_SDK_ENFORCE_ABI_VERSIONING(::RocDecodeDispatchTable, 16);
|
||||
#endif
|
||||
|
||||
ROCP_SDK_ENFORCE_ABI(::RocDecodeDispatchTable, pfn_rocdec_create_video_parser, 0)
|
||||
ROCP_SDK_ENFORCE_ABI(::RocDecodeDispatchTable, pfn_rocdec_parse_video_data, 1)
|
||||
ROCP_SDK_ENFORCE_ABI(::RocDecodeDispatchTable, pfn_rocdec_destroy_video_parser, 2)
|
||||
ROCP_SDK_ENFORCE_ABI(::RocDecodeDispatchTable, pfn_rocdec_create_decoder, 3)
|
||||
ROCP_SDK_ENFORCE_ABI(::RocDecodeDispatchTable, pfn_rocdec_destroy_decoder, 4)
|
||||
ROCP_SDK_ENFORCE_ABI(::RocDecodeDispatchTable, pfn_rocdec_get_gecoder_caps, 5)
|
||||
ROCP_SDK_ENFORCE_ABI(::RocDecodeDispatchTable, pfn_rocdec_decode_frame, 6)
|
||||
ROCP_SDK_ENFORCE_ABI(::RocDecodeDispatchTable, pfn_rocdec_get_decode_status, 7)
|
||||
ROCP_SDK_ENFORCE_ABI(::RocDecodeDispatchTable, pfn_rocdec_reconfigure_decoder, 8)
|
||||
ROCP_SDK_ENFORCE_ABI(::RocDecodeDispatchTable, pfn_rocdec_get_video_frame, 9)
|
||||
ROCP_SDK_ENFORCE_ABI(::RocDecodeDispatchTable, pfn_rocdec_get_error_name, 10)
|
||||
|
||||
#if ROCDECODE_RUNTIME_API_TABLE_STEP_VERSION >= 1
|
||||
ROCP_SDK_ENFORCE_ABI(::RocDecodeDispatchTable, pfn_rocdec_create_bitstream_reader, 11);
|
||||
ROCP_SDK_ENFORCE_ABI(::RocDecodeDispatchTable, pfn_rocdec_get_bitstream_codec_type, 12);
|
||||
ROCP_SDK_ENFORCE_ABI(::RocDecodeDispatchTable, pfn_rocdec_get_bitstream_bit_depth, 13);
|
||||
ROCP_SDK_ENFORCE_ABI(::RocDecodeDispatchTable, pfn_rocdec_get_bitstream_pic_data, 14);
|
||||
ROCP_SDK_ENFORCE_ABI(::RocDecodeDispatchTable, pfn_rocdec_destroy_bitstream_reader, 15);
|
||||
#endif
|
||||
|
||||
} // namespace rocdecode
|
||||
} // namespace rocprofiler
|
||||
@@ -0,0 +1,216 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "lib/common/defines.hpp"
|
||||
|
||||
#define ROCDECODE_API_INFO_DEFINITION_0( \
|
||||
ROCDECODE_TABLE, ROCDECODE_API_ID, ROCDECODE_FUNC, ROCDECODE_FUNC_PTR) \
|
||||
namespace rocprofiler \
|
||||
{ \
|
||||
namespace rocdecode \
|
||||
{ \
|
||||
template <> \
|
||||
struct rocdecode_api_info<ROCDECODE_TABLE, ROCDECODE_API_ID> \
|
||||
: rocdecode_domain_info<ROCDECODE_TABLE> \
|
||||
{ \
|
||||
static constexpr auto table_idx = ROCDECODE_TABLE; \
|
||||
static constexpr auto operation_idx = ROCDECODE_API_ID; \
|
||||
static constexpr auto name = #ROCDECODE_FUNC; \
|
||||
\
|
||||
using domain_type = rocdecode_domain_info<table_idx>; \
|
||||
using this_type = rocdecode_api_info<table_idx, operation_idx>; \
|
||||
using base_type = rocdecode_api_impl<table_idx, operation_idx>; \
|
||||
\
|
||||
using domain_type::callback_domain_idx; \
|
||||
using domain_type::buffered_domain_idx; \
|
||||
using domain_type::args_type; \
|
||||
using domain_type::retval_type; \
|
||||
using domain_type::callback_data_type; \
|
||||
\
|
||||
static constexpr auto offset() \
|
||||
{ \
|
||||
return offsetof(rocdecode_table_lookup<table_idx>::type, ROCDECODE_FUNC_PTR); \
|
||||
} \
|
||||
\
|
||||
static_assert(offsetof(rocdecode_table_lookup<table_idx>::type, ROCDECODE_FUNC_PTR) == \
|
||||
(sizeof(size_t) + (operation_idx * sizeof(void*))), \
|
||||
"ABI error for " #ROCDECODE_FUNC); \
|
||||
\
|
||||
static auto& get_table() { return rocdecode_table_lookup<table_idx>{}(); } \
|
||||
\
|
||||
template <typename TableT> \
|
||||
static auto& get_table(TableT& _v) \
|
||||
{ \
|
||||
return rocdecode_table_lookup<table_idx>{}(_v); \
|
||||
} \
|
||||
\
|
||||
template <typename TableT> \
|
||||
static auto& get_table_func(TableT& _table) \
|
||||
{ \
|
||||
if constexpr(std::is_pointer<TableT>::value) \
|
||||
{ \
|
||||
assert(_table != nullptr && "nullptr to MARKER table for " #ROCDECODE_FUNC \
|
||||
" function"); \
|
||||
return _table->ROCDECODE_FUNC_PTR; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
return _table.ROCDECODE_FUNC_PTR; \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
static auto& get_table_func() { return get_table_func(get_table()); } \
|
||||
\
|
||||
template <typename DataT> \
|
||||
static auto& get_api_data_args(DataT& _data) \
|
||||
{ \
|
||||
return _data.ROCDECODE_FUNC; \
|
||||
} \
|
||||
\
|
||||
template <typename RetT, typename... Args> \
|
||||
static auto get_functor(RetT (*)(Args...)) \
|
||||
{ \
|
||||
return &base_type::functor<RetT, Args...>; \
|
||||
} \
|
||||
\
|
||||
static std::vector<void*> as_arg_addr(callback_data_type) { return std::vector<void*>{}; } \
|
||||
\
|
||||
static std::vector<common::stringified_argument> as_arg_list(callback_data_type, int32_t) \
|
||||
{ \
|
||||
return {}; \
|
||||
} \
|
||||
}; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define ROCDECODE_API_INFO_DEFINITION_V( \
|
||||
ROCDECODE_TABLE, ROCDECODE_API_ID, ROCDECODE_FUNC, ROCDECODE_FUNC_PTR, ...) \
|
||||
namespace rocprofiler \
|
||||
{ \
|
||||
namespace rocdecode \
|
||||
{ \
|
||||
template <> \
|
||||
struct rocdecode_api_info<ROCDECODE_TABLE, ROCDECODE_API_ID> \
|
||||
: rocdecode_domain_info<ROCDECODE_TABLE> \
|
||||
{ \
|
||||
static constexpr auto table_idx = ROCDECODE_TABLE; \
|
||||
static constexpr auto operation_idx = ROCDECODE_API_ID; \
|
||||
static constexpr auto name = #ROCDECODE_FUNC; \
|
||||
\
|
||||
using domain_type = rocdecode_domain_info<table_idx>; \
|
||||
using this_type = rocdecode_api_info<table_idx, operation_idx>; \
|
||||
using base_type = rocdecode_api_impl<table_idx, operation_idx>; \
|
||||
\
|
||||
static constexpr auto callback_domain_idx = domain_type::callback_domain_idx; \
|
||||
static constexpr auto buffered_domain_idx = domain_type::buffered_domain_idx; \
|
||||
\
|
||||
using domain_type::args_type; \
|
||||
using domain_type::retval_type; \
|
||||
using domain_type::callback_data_type; \
|
||||
\
|
||||
static constexpr auto offset() \
|
||||
{ \
|
||||
return offsetof(rocdecode_table_lookup<table_idx>::type, ROCDECODE_FUNC_PTR); \
|
||||
} \
|
||||
\
|
||||
static_assert(offsetof(rocdecode_table_lookup<table_idx>::type, ROCDECODE_FUNC_PTR) == \
|
||||
(sizeof(size_t) + (operation_idx * sizeof(void*))), \
|
||||
"ABI error for " #ROCDECODE_FUNC); \
|
||||
\
|
||||
static auto& get_table() { return rocdecode_table_lookup<table_idx>{}(); } \
|
||||
\
|
||||
template <typename TableT> \
|
||||
static auto& get_table(TableT& _v) \
|
||||
{ \
|
||||
return rocdecode_table_lookup<table_idx>{}(_v); \
|
||||
} \
|
||||
\
|
||||
template <typename TableT> \
|
||||
static auto& get_table_func(TableT& _table) \
|
||||
{ \
|
||||
if constexpr(std::is_pointer<TableT>::value) \
|
||||
{ \
|
||||
assert(_table != nullptr && "nullptr to MARKER table for " #ROCDECODE_FUNC \
|
||||
" function"); \
|
||||
return _table->ROCDECODE_FUNC_PTR; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
return _table.ROCDECODE_FUNC_PTR; \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
static auto& get_table_func() { return get_table_func(get_table()); } \
|
||||
\
|
||||
template <typename DataT> \
|
||||
static auto& get_api_data_args(DataT& _data) \
|
||||
{ \
|
||||
return _data.ROCDECODE_FUNC; \
|
||||
} \
|
||||
\
|
||||
template <typename RetT, typename... Args> \
|
||||
static auto get_functor(RetT (*)(Args...)) \
|
||||
{ \
|
||||
return &base_type::functor<RetT, Args...>; \
|
||||
} \
|
||||
\
|
||||
static std::vector<void*> as_arg_addr(callback_data_type trace_data) \
|
||||
{ \
|
||||
return std::vector<void*>{ \
|
||||
GET_ADDR_MEMBER_FIELDS(get_api_data_args(trace_data.args), __VA_ARGS__)}; \
|
||||
} \
|
||||
}; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define ROCDECODE_API_TABLE_LOOKUP_DEFINITION(TABLE_ID, TYPE) \
|
||||
namespace rocprofiler \
|
||||
{ \
|
||||
namespace rocdecode \
|
||||
{ \
|
||||
namespace \
|
||||
{ \
|
||||
template <> \
|
||||
auto* get_table<TABLE_ID>() \
|
||||
{ \
|
||||
return get_table_impl<TYPE>(); \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
template <> \
|
||||
struct rocdecode_table_lookup<TABLE_ID> \
|
||||
{ \
|
||||
using type = TYPE; \
|
||||
auto& operator()(type& _v) const { return _v; } \
|
||||
auto& operator()(type* _v) const { return *_v; } \
|
||||
auto& operator()() const { return (*this)(get_table<TABLE_ID>()); } \
|
||||
}; \
|
||||
\
|
||||
template <> \
|
||||
struct rocdecode_table_id_lookup<TYPE> \
|
||||
{ \
|
||||
static constexpr auto value = TABLE_ID; \
|
||||
}; \
|
||||
} \
|
||||
}
|
||||
@@ -0,0 +1,560 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "lib/rocprofiler-sdk/rocdecode/rocdecode.hpp"
|
||||
#include "lib/common/defines.hpp"
|
||||
#include "lib/common/static_object.hpp"
|
||||
#include "lib/rocprofiler-sdk/buffer.hpp"
|
||||
#include "lib/rocprofiler-sdk/context/context.hpp"
|
||||
#include "lib/rocprofiler-sdk/hip/hip.hpp"
|
||||
#include "lib/rocprofiler-sdk/hip/utils.hpp"
|
||||
#include "lib/rocprofiler-sdk/registration.hpp"
|
||||
#include "lib/rocprofiler-sdk/tracing/tracing.hpp"
|
||||
|
||||
#include <rocprofiler-sdk/buffer.h>
|
||||
#include <rocprofiler-sdk/callback_tracing.h>
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
#include <rocprofiler-sdk/rocdecode/table_id.h>
|
||||
|
||||
#include <hip/driver_types.h>
|
||||
#include <hip/hip_runtime_api.h>
|
||||
// must be included after runtime api
|
||||
#include <hip/hip_deprecated.h>
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace rocdecode
|
||||
{
|
||||
namespace
|
||||
{
|
||||
struct null_type
|
||||
{};
|
||||
|
||||
template <typename Tp>
|
||||
auto
|
||||
get_default_retval()
|
||||
{
|
||||
if constexpr(std::is_pointer<Tp>::value)
|
||||
{
|
||||
Tp v = nullptr;
|
||||
return v;
|
||||
}
|
||||
else if constexpr(std::is_same<Tp, rocDecStatus>::value)
|
||||
return ROCDEC_RUNTIME_ERROR;
|
||||
else if constexpr(std::is_same<Tp, const char*>::value)
|
||||
return "UnknownString";
|
||||
else
|
||||
static_assert(std::is_empty<Tp>::value, "Error! unsupported return type");
|
||||
}
|
||||
|
||||
template <typename DataT, typename Tp>
|
||||
void
|
||||
set_data_retval(DataT& _data, Tp _val)
|
||||
{
|
||||
if constexpr(std::is_same<Tp, rocDecStatus>::value)
|
||||
{
|
||||
_data.rocDecStatus_retval = _val;
|
||||
}
|
||||
else if constexpr(std::is_same<Tp, const char*>::value)
|
||||
{
|
||||
_data.const_charp_retval = _val;
|
||||
}
|
||||
else
|
||||
{
|
||||
static_assert(std::is_empty<Tp>::value, "Error! unsupported return type");
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Tp>
|
||||
Tp*
|
||||
get_table_impl()
|
||||
{
|
||||
static auto*& _v = common::static_object<Tp>::construct(common::init_public_api_struct(Tp{}));
|
||||
return _v;
|
||||
}
|
||||
|
||||
template <size_t TableIdx>
|
||||
auto*
|
||||
get_table();
|
||||
|
||||
} // namespace
|
||||
|
||||
template <size_t TableIdx, size_t OpIdx>
|
||||
template <typename DataArgsT, typename... Args>
|
||||
auto
|
||||
rocdecode_api_impl<TableIdx, OpIdx>::set_data_args(DataArgsT& _data_args, Args... args)
|
||||
{
|
||||
if constexpr(sizeof...(Args) == 0)
|
||||
_data_args.no_args.empty = '\0';
|
||||
else
|
||||
_data_args = DataArgsT{args...};
|
||||
}
|
||||
|
||||
template <size_t TableIdx, size_t OpIdx>
|
||||
template <typename FuncT, typename... Args>
|
||||
auto
|
||||
rocdecode_api_impl<TableIdx, OpIdx>::exec(FuncT&& _func, Args&&... args)
|
||||
{
|
||||
using return_type = std::decay_t<std::invoke_result_t<FuncT, Args...>>;
|
||||
|
||||
if(_func)
|
||||
{
|
||||
if constexpr(std::is_void<return_type>::value)
|
||||
{
|
||||
_func(std::forward<Args>(args)...);
|
||||
return null_type{};
|
||||
}
|
||||
else
|
||||
{
|
||||
return _func(std::forward<Args>(args)...);
|
||||
}
|
||||
}
|
||||
|
||||
using info_type = rocdecode_api_info<TableIdx, OpIdx>;
|
||||
ROCP_ERROR << "nullptr to next rocdecode function for " << info_type::name << " ("
|
||||
<< info_type::operation_idx << ")";
|
||||
|
||||
return get_default_retval<return_type>();
|
||||
}
|
||||
|
||||
template <size_t TableIdx, size_t OpIdx>
|
||||
template <typename RetT, typename... Args>
|
||||
RetT
|
||||
rocdecode_api_impl<TableIdx, OpIdx>::functor(Args... args)
|
||||
{
|
||||
using info_type = rocdecode_api_info<TableIdx, OpIdx>;
|
||||
using callback_api_data_t = typename rocdecode_domain_info<TableIdx>::callback_data_type;
|
||||
using buffered_api_data_t = typename rocdecode_domain_info<TableIdx>::buffer_data_type;
|
||||
|
||||
constexpr auto external_corr_id_domain_idx =
|
||||
rocdecode_domain_info<TableIdx>::external_correlation_id_domain_idx;
|
||||
|
||||
if(registration::get_fini_status() != 0)
|
||||
{
|
||||
[[maybe_unused]] auto _ret = exec(info_type::get_table_func(), std::forward<Args>(args)...);
|
||||
if constexpr(!std::is_void<RetT>::value)
|
||||
return _ret;
|
||||
else
|
||||
return;
|
||||
}
|
||||
|
||||
constexpr auto ref_count = 2;
|
||||
auto thr_id = common::get_tid();
|
||||
auto callback_contexts = tracing::callback_context_data_vec_t{};
|
||||
auto buffered_contexts = tracing::buffered_context_data_vec_t{};
|
||||
auto external_corr_ids = tracing::external_correlation_id_map_t{};
|
||||
|
||||
tracing::populate_contexts(info_type::callback_domain_idx,
|
||||
info_type::buffered_domain_idx,
|
||||
info_type::operation_idx,
|
||||
callback_contexts,
|
||||
buffered_contexts,
|
||||
external_corr_ids);
|
||||
|
||||
if(callback_contexts.empty() && buffered_contexts.empty())
|
||||
{
|
||||
[[maybe_unused]] auto _ret = exec(info_type::get_table_func(), std::forward<Args>(args)...);
|
||||
if constexpr(!std::is_void<RetT>::value)
|
||||
return _ret;
|
||||
else
|
||||
return;
|
||||
}
|
||||
|
||||
auto buffer_record = common::init_public_api_struct(buffered_api_data_t{});
|
||||
auto tracer_data = common::init_public_api_struct(callback_api_data_t{});
|
||||
auto* corr_id = tracing::correlation_service::construct(ref_count);
|
||||
auto internal_corr_id = corr_id->internal;
|
||||
|
||||
tracing::populate_external_correlation_ids(external_corr_ids,
|
||||
thr_id,
|
||||
external_corr_id_domain_idx,
|
||||
info_type::operation_idx,
|
||||
internal_corr_id);
|
||||
|
||||
// invoke the callbacks
|
||||
if(!callback_contexts.empty())
|
||||
{
|
||||
set_data_args(info_type::get_api_data_args(tracer_data.args), std::forward<Args>(args)...);
|
||||
|
||||
tracing::execute_phase_enter_callbacks(callback_contexts,
|
||||
thr_id,
|
||||
internal_corr_id,
|
||||
external_corr_ids,
|
||||
info_type::callback_domain_idx,
|
||||
info_type::operation_idx,
|
||||
tracer_data);
|
||||
}
|
||||
|
||||
// enter callback may update the external correlation id field
|
||||
tracing::update_external_correlation_ids(
|
||||
external_corr_ids, thr_id, external_corr_id_domain_idx);
|
||||
|
||||
// record the start timestamp as close to the function call as possible
|
||||
if(!buffered_contexts.empty())
|
||||
{
|
||||
buffer_record.start_timestamp = common::timestamp_ns();
|
||||
}
|
||||
|
||||
// decrement the reference count before invoking
|
||||
corr_id->sub_ref_count();
|
||||
|
||||
auto _ret = exec(info_type::get_table_func(), std::forward<Args>(args)...);
|
||||
|
||||
// record the end timestamp as close to the function call as possible
|
||||
if(!buffered_contexts.empty())
|
||||
{
|
||||
buffer_record.end_timestamp = common::timestamp_ns();
|
||||
}
|
||||
|
||||
if(!callback_contexts.empty())
|
||||
{
|
||||
set_data_retval(tracer_data.retval, _ret);
|
||||
|
||||
tracing::execute_phase_exit_callbacks(callback_contexts,
|
||||
external_corr_ids,
|
||||
info_type::callback_domain_idx,
|
||||
info_type::operation_idx,
|
||||
tracer_data);
|
||||
}
|
||||
|
||||
if(!buffered_contexts.empty())
|
||||
{
|
||||
tracing::execute_buffer_record_emplace(buffered_contexts,
|
||||
thr_id,
|
||||
internal_corr_id,
|
||||
external_corr_ids,
|
||||
info_type::buffered_domain_idx,
|
||||
info_type::operation_idx,
|
||||
buffer_record);
|
||||
}
|
||||
|
||||
// decrement the reference count after usage in the callback/buffers
|
||||
corr_id->sub_ref_count();
|
||||
|
||||
context::pop_latest_correlation_id(corr_id);
|
||||
|
||||
if constexpr(!std::is_void<RetT>::value) return _ret;
|
||||
}
|
||||
} // namespace rocdecode
|
||||
} // namespace rocprofiler
|
||||
|
||||
#define ROCPROFILER_LIB_ROCPROFILER_SDK_ROCDECODE_ROCDECODE_CPP_IMPL 1
|
||||
|
||||
// template specializations
|
||||
#include "rocdecode.def.cpp"
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace rocdecode
|
||||
{
|
||||
namespace
|
||||
{
|
||||
template <size_t TableIdx, size_t OpIdx, size_t... OpIdxTail>
|
||||
const char*
|
||||
name_by_id(const uint32_t id, std::index_sequence<OpIdx, OpIdxTail...>)
|
||||
{
|
||||
if(OpIdx == id) return rocdecode_api_info<TableIdx, OpIdx>::name;
|
||||
|
||||
if constexpr(sizeof...(OpIdxTail) > 0)
|
||||
return name_by_id<TableIdx>(id, std::index_sequence<OpIdxTail...>{});
|
||||
else
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
template <size_t TableIdx, size_t OpIdx, size_t... OpIdxTail>
|
||||
uint32_t
|
||||
id_by_name(const char* name, std::index_sequence<OpIdx, OpIdxTail...>)
|
||||
{
|
||||
if(std::string_view{rocdecode_api_info<TableIdx, OpIdx>::name} == std::string_view{name})
|
||||
return rocdecode_api_info<TableIdx, OpIdx>::operation_idx;
|
||||
|
||||
if constexpr(sizeof...(OpIdxTail) > 0)
|
||||
return id_by_name<TableIdx>(name, std::index_sequence<OpIdxTail...>{});
|
||||
else
|
||||
return rocdecode_domain_info<TableIdx>::none;
|
||||
}
|
||||
|
||||
template <size_t TableIdx, size_t OpIdx, size_t... OpIdxTail>
|
||||
void
|
||||
get_ids(std::vector<uint32_t>& _id_list, std::index_sequence<OpIdx, OpIdxTail...>)
|
||||
{
|
||||
auto _idx = rocdecode_api_info<TableIdx, OpIdx>::operation_idx;
|
||||
if(_idx < rocdecode_domain_info<TableIdx>::last) _id_list.emplace_back(_idx);
|
||||
|
||||
if constexpr(sizeof...(OpIdxTail) > 0)
|
||||
get_ids<TableIdx>(_id_list, std::index_sequence<OpIdxTail...>{});
|
||||
}
|
||||
|
||||
template <size_t TableIdx, size_t OpIdx, size_t... OpIdxTail>
|
||||
void
|
||||
get_names(std::vector<const char*>& _name_list, std::index_sequence<OpIdx, OpIdxTail...>)
|
||||
{
|
||||
auto&& _name = rocdecode_api_info<TableIdx, OpIdx>::name;
|
||||
if(_name != nullptr && strnlen(_name, 1) > 0) _name_list.emplace_back(_name);
|
||||
|
||||
if constexpr(sizeof...(OpIdxTail) > 0)
|
||||
get_names<TableIdx>(_name_list, std::index_sequence<OpIdxTail...>{});
|
||||
}
|
||||
|
||||
template <size_t TableIdx, typename DataT, size_t OpIdx, size_t... OpIdxTail>
|
||||
void
|
||||
iterate_args(const uint32_t id,
|
||||
const DataT& data,
|
||||
rocprofiler_callback_tracing_operation_args_cb_t func,
|
||||
int32_t max_deref,
|
||||
void* user_data,
|
||||
std::index_sequence<OpIdx, OpIdxTail...>)
|
||||
{
|
||||
if(OpIdx == id)
|
||||
{
|
||||
using info_type = rocdecode_api_info<TableIdx, OpIdx>;
|
||||
auto&& arg_list = info_type::as_arg_list(data, max_deref);
|
||||
auto&& arg_addr = info_type::as_arg_addr(data);
|
||||
for(size_t i = 0; i < std::min(arg_list.size(), arg_addr.size()); ++i)
|
||||
{
|
||||
auto ret = func(info_type::callback_domain_idx, // kind
|
||||
id, // operation
|
||||
i, // arg_number
|
||||
arg_addr.at(i), // arg_value_addr
|
||||
arg_list.at(i).indirection_level, // indirection
|
||||
arg_list.at(i).type, // arg_type
|
||||
arg_list.at(i).name, // arg_name
|
||||
arg_list.at(i).value.c_str(), // arg_value_str
|
||||
arg_list.at(i).dereference_count, // num deref in str
|
||||
user_data);
|
||||
if(ret != 0) break;
|
||||
}
|
||||
return;
|
||||
}
|
||||
if constexpr(sizeof...(OpIdxTail) > 0)
|
||||
iterate_args<TableIdx>(
|
||||
id, data, func, max_deref, user_data, std::index_sequence<OpIdxTail...>{});
|
||||
}
|
||||
|
||||
bool
|
||||
should_wrap_functor(rocprofiler_callback_tracing_kind_t _callback_domain,
|
||||
rocprofiler_buffer_tracing_kind_t _buffered_domain,
|
||||
int _operation)
|
||||
{
|
||||
// we loop over all the *registered* contexts and see if any of them, at any point in time,
|
||||
// might require callback or buffered API tracing
|
||||
for(const auto& itr : context::get_registered_contexts())
|
||||
{
|
||||
if(!itr) continue;
|
||||
|
||||
// if there is a callback tracer enabled for the given domain and op, we need to wrap
|
||||
if(itr->callback_tracer && itr->callback_tracer->domains(_callback_domain) &&
|
||||
itr->callback_tracer->domains(_callback_domain, _operation))
|
||||
return true;
|
||||
|
||||
// if there is a buffered tracer enabled for the given domain and op, we need to wrap
|
||||
if(itr->buffered_tracer && itr->buffered_tracer->domains(_buffered_domain) &&
|
||||
itr->buffered_tracer->domains(_buffered_domain, _operation))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
template <size_t TableIdx, typename Tp, size_t OpIdx>
|
||||
void
|
||||
copy_table(Tp* _orig, uint64_t _tbl_instance, std::integral_constant<size_t, OpIdx>)
|
||||
{
|
||||
using table_type = typename rocdecode_table_lookup<TableIdx>::type;
|
||||
|
||||
if constexpr(std::is_same<table_type, Tp>::value)
|
||||
{
|
||||
auto _info = rocdecode_api_info<TableIdx, OpIdx>{};
|
||||
|
||||
// make sure we don't access a field that doesn't exist in input table
|
||||
if(_info.offset() >= _orig->size) return;
|
||||
|
||||
// 1. get the sub-table containing the function pointer in original table
|
||||
// 2. get reference to function pointer in sub-table in original table
|
||||
auto& _orig_table = _info.get_table(_orig);
|
||||
auto& _orig_func = _info.get_table_func(_orig_table);
|
||||
// 3. get the sub-table containing the function pointer in saved table
|
||||
// 4. get reference to function pointer in sub-table in saved table
|
||||
// 5. save the original function in the saved table
|
||||
auto& _copy_table = _info.get_table(*get_table<TableIdx>());
|
||||
auto& _copy_func = _info.get_table_func(_copy_table);
|
||||
|
||||
ROCP_FATAL_IF(_copy_func && _tbl_instance == 0)
|
||||
<< _info.name << " has non-null function pointer " << _copy_func
|
||||
<< " despite this being the first instance of the library being copies";
|
||||
|
||||
if(!_copy_func)
|
||||
{
|
||||
ROCP_TRACE << "copying table entry for " << _info.name;
|
||||
_copy_func = _orig_func;
|
||||
}
|
||||
else
|
||||
{
|
||||
ROCP_TRACE << "skipping copying table entry for " << _info.name
|
||||
<< " from table instance " << _tbl_instance;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <size_t TableIdx, typename Tp, size_t OpIdx>
|
||||
void
|
||||
update_table(Tp* _orig, std::integral_constant<size_t, OpIdx>)
|
||||
{
|
||||
using table_type = typename rocdecode_table_lookup<TableIdx>::type;
|
||||
|
||||
if constexpr(std::is_same<table_type, Tp>::value)
|
||||
{
|
||||
auto _info = rocdecode_api_info<TableIdx, OpIdx>{};
|
||||
|
||||
// make sure we don't access a field that doesn't exist in input table
|
||||
if(_info.offset() >= _orig->size) return;
|
||||
|
||||
// check to see if there are any contexts which enable this operation in the HIP API domain
|
||||
if(!should_wrap_functor(
|
||||
_info.callback_domain_idx, _info.buffered_domain_idx, _info.operation_idx))
|
||||
return;
|
||||
|
||||
ROCP_TRACE << "updating table entry for " << _info.name;
|
||||
|
||||
// 1. get the sub-table containing the function pointer in original table
|
||||
// 2. get reference to function pointer in sub-table in original table
|
||||
// 3. update function pointer with wrapper
|
||||
auto& _table = _info.get_table(_orig);
|
||||
auto& _func = _info.get_table_func(_table);
|
||||
_func = _info.get_functor(_func);
|
||||
}
|
||||
}
|
||||
|
||||
template <size_t TableIdx, typename Tp, size_t OpIdx, size_t... OpIdxTail>
|
||||
void
|
||||
copy_table(Tp* _orig, uint64_t _tbl_instance, std::index_sequence<OpIdx, OpIdxTail...>)
|
||||
{
|
||||
copy_table<TableIdx>(_orig, _tbl_instance, std::integral_constant<size_t, OpIdx>{});
|
||||
if constexpr(sizeof...(OpIdxTail) > 0)
|
||||
copy_table<TableIdx>(_orig, _tbl_instance, std::index_sequence<OpIdxTail...>{});
|
||||
}
|
||||
|
||||
template <size_t TableIdx, typename Tp, size_t OpIdx, size_t... OpIdxTail>
|
||||
void
|
||||
update_table(Tp* _orig, std::index_sequence<OpIdx, OpIdxTail...>)
|
||||
{
|
||||
update_table<TableIdx>(_orig, std::integral_constant<size_t, OpIdx>{});
|
||||
if constexpr(sizeof...(OpIdxTail) > 0)
|
||||
update_table<TableIdx>(_orig, std::index_sequence<OpIdxTail...>{});
|
||||
}
|
||||
} // namespace
|
||||
|
||||
// check out the assembly here... this compiles to a switch statement
|
||||
template <size_t TableIdx>
|
||||
const char*
|
||||
name_by_id(uint32_t id)
|
||||
{
|
||||
return name_by_id<TableIdx>(id,
|
||||
std::make_index_sequence<rocdecode_domain_info<TableIdx>::last>{});
|
||||
}
|
||||
|
||||
template <size_t TableIdx>
|
||||
uint32_t
|
||||
id_by_name(const char* name)
|
||||
{
|
||||
return id_by_name<TableIdx>(name,
|
||||
std::make_index_sequence<rocdecode_domain_info<TableIdx>::last>{});
|
||||
}
|
||||
|
||||
template <size_t TableIdx>
|
||||
std::vector<uint32_t>
|
||||
get_ids()
|
||||
{
|
||||
constexpr auto last_api_id = rocdecode_domain_info<TableIdx>::last;
|
||||
auto _data = std::vector<uint32_t>{};
|
||||
_data.reserve(last_api_id);
|
||||
get_ids<TableIdx>(_data, std::make_index_sequence<last_api_id>{});
|
||||
return _data;
|
||||
}
|
||||
|
||||
template <size_t TableIdx>
|
||||
std::vector<const char*>
|
||||
get_names()
|
||||
{
|
||||
constexpr auto last_api_id = rocdecode_domain_info<TableIdx>::last;
|
||||
auto _data = std::vector<const char*>{};
|
||||
_data.reserve(last_api_id);
|
||||
get_names<TableIdx>(_data, std::make_index_sequence<last_api_id>{});
|
||||
return _data;
|
||||
}
|
||||
|
||||
template <size_t TableIdx>
|
||||
void
|
||||
iterate_args(uint32_t id,
|
||||
const rocprofiler_callback_tracing_rocdecode_api_data_t& data,
|
||||
rocprofiler_callback_tracing_operation_args_cb_t callback,
|
||||
int32_t max_deref,
|
||||
void* user_data)
|
||||
{
|
||||
if(callback)
|
||||
iterate_args<TableIdx>(id,
|
||||
data,
|
||||
callback,
|
||||
max_deref,
|
||||
user_data,
|
||||
std::make_index_sequence<rocdecode_domain_info<TableIdx>::last>{});
|
||||
}
|
||||
|
||||
template <typename TableT>
|
||||
void
|
||||
copy_table(TableT* _orig, uint64_t _tbl_instance)
|
||||
{
|
||||
constexpr auto TableIdx = rocdecode_table_id_lookup<TableT>::value;
|
||||
if(_orig)
|
||||
copy_table<TableIdx>(_orig,
|
||||
_tbl_instance,
|
||||
std::make_index_sequence<rocdecode_domain_info<TableIdx>::last>{});
|
||||
}
|
||||
|
||||
template <typename TableT>
|
||||
void
|
||||
update_table(TableT* _orig)
|
||||
{
|
||||
constexpr auto TableIdx = rocdecode_table_id_lookup<TableT>::value;
|
||||
if(_orig)
|
||||
update_table<TableIdx>(_orig,
|
||||
std::make_index_sequence<rocdecode_domain_info<TableIdx>::last>{});
|
||||
}
|
||||
|
||||
using rocdecode_api_data_t = rocprofiler_callback_tracing_rocdecode_api_data_t;
|
||||
using rocdecode_op_args_cb_t = rocprofiler_callback_tracing_operation_args_cb_t;
|
||||
|
||||
#define INSTANTIATE_ROCDECODE_TABLE_FUNC(TABLE_TYPE, TABLE_IDX) \
|
||||
template void copy_table<TABLE_TYPE>(TABLE_TYPE * _tbl, uint64_t _instv); \
|
||||
template void update_table<TABLE_TYPE>(TABLE_TYPE * _tbl); \
|
||||
template const char* name_by_id<TABLE_IDX>(uint32_t); \
|
||||
template uint32_t id_by_name<TABLE_IDX>(const char*); \
|
||||
template std::vector<uint32_t> get_ids<TABLE_IDX>(); \
|
||||
template std::vector<const char*> get_names<TABLE_IDX>();
|
||||
|
||||
INSTANTIATE_ROCDECODE_TABLE_FUNC(rocdecode_api_func_table_t, ROCPROFILER_ROCDECODE_TABLE_ID)
|
||||
} // namespace rocdecode
|
||||
} // namespace rocprofiler
|
||||
@@ -0,0 +1,90 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "lib/rocprofiler-sdk/rocdecode/defines.hpp"
|
||||
#include "lib/rocprofiler-sdk/rocdecode/rocdecode.hpp"
|
||||
|
||||
#include <rocprofiler-sdk/external_correlation.h>
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
#include <rocprofiler-sdk/rocdecode.h>
|
||||
#include <rocprofiler-sdk/rocdecode/table_id.h>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace rocdecode
|
||||
{
|
||||
template <>
|
||||
struct rocdecode_domain_info<ROCPROFILER_ROCDECODE_TABLE_ID_LAST>
|
||||
{
|
||||
using args_type = rocprofiler_rocdecode_api_args_t;
|
||||
using retval_type = rocprofiler_rocdecode_api_retval_t;
|
||||
using callback_data_type = rocprofiler_callback_tracing_rocdecode_api_data_t;
|
||||
using buffer_data_type = rocprofiler_buffer_tracing_rocdecode_api_record_t;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct rocdecode_domain_info<ROCPROFILER_ROCDECODE_TABLE_ID>
|
||||
: rocdecode_domain_info<ROCPROFILER_ROCDECODE_TABLE_ID_LAST>
|
||||
{
|
||||
using enum_type = rocprofiler_marker_core_api_id_t;
|
||||
static constexpr auto callback_domain_idx = ROCPROFILER_CALLBACK_TRACING_ROCDECODE_API;
|
||||
static constexpr auto buffered_domain_idx = ROCPROFILER_BUFFER_TRACING_ROCDECODE_API;
|
||||
static constexpr auto none = ROCPROFILER_ROCDECODE_API_ID_NONE;
|
||||
static constexpr auto last = ROCPROFILER_ROCDECODE_API_ID_LAST;
|
||||
static constexpr auto external_correlation_id_domain_idx =
|
||||
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_ROCDECODE_API;
|
||||
};
|
||||
|
||||
} // namespace rocdecode
|
||||
} // namespace rocprofiler
|
||||
|
||||
#if defined(ROCPROFILER_LIB_ROCPROFILER_SDK_ROCDECODE_ROCDECODE_CPP_IMPL) && \
|
||||
ROCPROFILER_LIB_ROCPROFILER_SDK_ROCDECODE_ROCDECODE_CPP_IMPL == 1
|
||||
|
||||
// clang-format off
|
||||
ROCDECODE_API_TABLE_LOOKUP_DEFINITION(ROCPROFILER_ROCDECODE_TABLE_ID, rocdecode_api_func_table_t)
|
||||
|
||||
ROCDECODE_API_INFO_DEFINITION_V(ROCPROFILER_ROCDECODE_TABLE_ID, ROCPROFILER_ROCDECODE_API_ID_rocDecCreateVideoParser, rocDecCreateVideoParser, pfn_rocdec_create_video_parser, parser_handle, params)
|
||||
ROCDECODE_API_INFO_DEFINITION_V(ROCPROFILER_ROCDECODE_TABLE_ID, ROCPROFILER_ROCDECODE_API_ID_rocDecParseVideoData, rocDecParseVideoData, pfn_rocdec_parse_video_data, parser_handle, packet)
|
||||
ROCDECODE_API_INFO_DEFINITION_V(ROCPROFILER_ROCDECODE_TABLE_ID, ROCPROFILER_ROCDECODE_API_ID_rocDecDestroyVideoParser, rocDecDestroyVideoParser, pfn_rocdec_destroy_video_parser, parser_handle)
|
||||
ROCDECODE_API_INFO_DEFINITION_V(ROCPROFILER_ROCDECODE_TABLE_ID, ROCPROFILER_ROCDECODE_API_ID_rocDecCreateDecoder, rocDecCreateDecoder, pfn_rocdec_create_decoder, decoder_handle, decoder_create_info)
|
||||
ROCDECODE_API_INFO_DEFINITION_V(ROCPROFILER_ROCDECODE_TABLE_ID, ROCPROFILER_ROCDECODE_API_ID_rocDecDestroyDecoder, rocDecDestroyDecoder, pfn_rocdec_destroy_decoder, decoder_handle)
|
||||
ROCDECODE_API_INFO_DEFINITION_V(ROCPROFILER_ROCDECODE_TABLE_ID, ROCPROFILER_ROCDECODE_API_ID_rocDecGetDecoderCaps, rocDecGetDecoderCaps, pfn_rocdec_get_gecoder_caps, decode_caps)
|
||||
ROCDECODE_API_INFO_DEFINITION_V(ROCPROFILER_ROCDECODE_TABLE_ID, ROCPROFILER_ROCDECODE_API_ID_rocDecDecodeFrame, rocDecDecodeFrame, pfn_rocdec_decode_frame, decoder_handle, pic_params)
|
||||
ROCDECODE_API_INFO_DEFINITION_V(ROCPROFILER_ROCDECODE_TABLE_ID, ROCPROFILER_ROCDECODE_API_ID_rocDecGetDecodeStatus, rocDecGetDecodeStatus, pfn_rocdec_get_decode_status, decoder_handle, pic_idx, decode_status)
|
||||
ROCDECODE_API_INFO_DEFINITION_V(ROCPROFILER_ROCDECODE_TABLE_ID, ROCPROFILER_ROCDECODE_API_ID_rocDecReconfigureDecoder, rocDecReconfigureDecoder, pfn_rocdec_reconfigure_decoder, decoder_handle, reconfig_params)
|
||||
ROCDECODE_API_INFO_DEFINITION_V(ROCPROFILER_ROCDECODE_TABLE_ID, ROCPROFILER_ROCDECODE_API_ID_rocDecGetVideoFrame, rocDecGetVideoFrame, pfn_rocdec_get_video_frame, decoder_handle, pic_idx, dev_mem_ptr, horizontal_pitch, vid_postproc_params)
|
||||
ROCDECODE_API_INFO_DEFINITION_V(ROCPROFILER_ROCDECODE_TABLE_ID, ROCPROFILER_ROCDECODE_API_ID_rocDecGetErrorName, rocDecGetErrorName, pfn_rocdec_get_error_name, rocdec_status)
|
||||
|
||||
#if ROCDECODE_RUNTIME_API_TABLE_STEP_VERSION >= 1
|
||||
ROCDECODE_API_INFO_DEFINITION_V(ROCPROFILER_ROCDECODE_TABLE_ID, ROCPROFILER_ROCDECODE_API_ID_rocDecCreateBitstreamReader, rocDecCreateBitstreamReader, pfn_rocdec_create_bitstream_reader, bs_reader_handle, input_file_path);
|
||||
ROCDECODE_API_INFO_DEFINITION_V(ROCPROFILER_ROCDECODE_TABLE_ID, ROCPROFILER_ROCDECODE_API_ID_rocDecGetBitstreamCodecType, rocDecGetBitstreamCodecType, pfn_rocdec_get_bitstream_codec_type, bs_reader_handle, codec_type);
|
||||
ROCDECODE_API_INFO_DEFINITION_V(ROCPROFILER_ROCDECODE_TABLE_ID, ROCPROFILER_ROCDECODE_API_ID_rocDecGetBitstreamBitDepth, rocDecGetBitstreamBitDepth, pfn_rocdec_get_bitstream_bit_depth, bs_reader_handle, bit_depth);
|
||||
ROCDECODE_API_INFO_DEFINITION_V(ROCPROFILER_ROCDECODE_TABLE_ID, ROCPROFILER_ROCDECODE_API_ID_rocDecGetBitstreamPicData, rocDecGetBitstreamPicData, pfn_rocdec_get_bitstream_pic_data, bs_reader_handle, pic_data, pic_size, pts);
|
||||
ROCDECODE_API_INFO_DEFINITION_V(ROCPROFILER_ROCDECODE_TABLE_ID, ROCPROFILER_ROCDECODE_API_ID_rocDecDestroyBitstreamReader, rocDecDestroyBitstreamReader, pfn_rocdec_destroy_bitstream_reader, bs_reader_handle);
|
||||
#endif
|
||||
#else
|
||||
# error \
|
||||
"Do not compile this file directly. It is included by lib/rocprofiler-sdk/rocdecode/rocdecode.cpp"
|
||||
#endif
|
||||
|
||||
|
||||
@@ -0,0 +1,126 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#if !defined(ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE)
|
||||
# if defined __has_include
|
||||
# if __has_include(<rocdecode/amd_detail/api_trace.h>)
|
||||
# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 1
|
||||
# else
|
||||
# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 0
|
||||
# endif
|
||||
# else
|
||||
# define ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE 0
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if ROCPROFILER_SDK_USE_SYSTEM_ROCDECODE > 0
|
||||
# include <rocdecode/amd_detail/rocdecode_api_trace.h>
|
||||
# include <rocdecode/rocdecode.h>
|
||||
# include <rocdecode/rocparser.h>
|
||||
#else
|
||||
# include <rocprofiler-sdk/rocdecode/details/rocdecode.h>
|
||||
# include <rocprofiler-sdk/rocdecode/details/rocdecode_api_trace.h>
|
||||
# include <rocprofiler-sdk/rocdecode/details/rocparser.h>
|
||||
#endif
|
||||
|
||||
#include <rocprofiler-sdk/rocprofiler.h>
|
||||
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace rocdecode
|
||||
{
|
||||
using rocdecode_api_func_table_t = ::RocDecodeDispatchTable;
|
||||
|
||||
struct ROCDecodeAPITable
|
||||
{
|
||||
rocdecode_api_func_table_t* rocdecode_api_table = nullptr;
|
||||
};
|
||||
|
||||
using rocdecode_api_table_t = ROCDecodeAPITable;
|
||||
|
||||
rocdecode_api_table_t&
|
||||
get_table();
|
||||
|
||||
template <size_t OpIdx>
|
||||
struct rocdecode_table_lookup;
|
||||
|
||||
template <typename Tp>
|
||||
struct rocdecode_table_id_lookup;
|
||||
|
||||
template <size_t TableIdx>
|
||||
struct rocdecode_domain_info;
|
||||
|
||||
template <size_t TableIdx, size_t OpIdx>
|
||||
struct rocdecode_api_info;
|
||||
|
||||
template <size_t TableIdx, size_t OpIdx>
|
||||
struct rocdecode_api_impl : rocdecode_domain_info<TableIdx>
|
||||
{
|
||||
template <typename DataArgsT, typename... Args>
|
||||
static auto set_data_args(DataArgsT&, Args... args);
|
||||
|
||||
template <typename FuncT, typename... Args>
|
||||
static auto exec(FuncT&&, Args&&... args);
|
||||
|
||||
template <typename RetT, typename... Args>
|
||||
static RetT functor(Args... args);
|
||||
};
|
||||
|
||||
template <size_t TableIdx>
|
||||
const char*
|
||||
name_by_id(uint32_t id);
|
||||
|
||||
template <size_t TableIdx>
|
||||
uint32_t
|
||||
id_by_name(const char* name);
|
||||
|
||||
template <size_t TableIdx>
|
||||
std::vector<const char*>
|
||||
get_names();
|
||||
|
||||
template <size_t TableIdx>
|
||||
std::vector<uint32_t>
|
||||
get_ids();
|
||||
|
||||
template <size_t TableIdx>
|
||||
void
|
||||
iterate_args(uint32_t id,
|
||||
const rocprofiler_callback_tracing_rocdecode_api_data_t& data,
|
||||
rocprofiler_callback_tracing_operation_args_cb_t callback,
|
||||
int32_t max_deref,
|
||||
void* user_data);
|
||||
|
||||
template <typename TableT>
|
||||
void
|
||||
copy_table(TableT* _orig, uint64_t _tbl_instance);
|
||||
|
||||
template <typename TableT>
|
||||
void
|
||||
update_table(TableT* _orig);
|
||||
|
||||
} // namespace rocdecode
|
||||
} // namespace rocprofiler
|
||||
@@ -57,6 +57,7 @@ SPECIALIZE_RUNTIME_INIT_INFO(HSA, "HSA runtime")
|
||||
SPECIALIZE_RUNTIME_INIT_INFO(HIP, "HIP runtime")
|
||||
SPECIALIZE_RUNTIME_INIT_INFO(MARKER, "Marker (ROCTx) runtime")
|
||||
SPECIALIZE_RUNTIME_INIT_INFO(RCCL, "RCCL runtime")
|
||||
SPECIALIZE_RUNTIME_INIT_INFO(ROCDECODE, "ROCDecode runtime")
|
||||
|
||||
#undef SPECIALIZE_RUNTIME_INIT_INFO
|
||||
|
||||
|
||||
@@ -62,6 +62,10 @@ add_subdirectory(thread-trace)
|
||||
add_subdirectory(pc_sampling)
|
||||
add_subdirectory(hip-graph-tracing)
|
||||
add_subdirectory(counter-collection)
|
||||
if(ROCPROFILER_BUILD_ROCDECODE_TESTS)
|
||||
add_subdirectory(rocdecode)
|
||||
endif()
|
||||
|
||||
if(ROCPROFILER_BUILD_OPENMP_TESTS)
|
||||
add_subdirectory(openmp-tools)
|
||||
endif()
|
||||
|
||||
@@ -29,3 +29,6 @@ add_subdirectory(hsa-queue-dependency)
|
||||
add_subdirectory(hip-graph)
|
||||
add_subdirectory(hsa-memory-allocation)
|
||||
add_subdirectory(pc-sampling)
|
||||
if(ROCPROFILER_BUILD_ROCDECODE_TESTS)
|
||||
add_subdirectory(rocdecode)
|
||||
endif()
|
||||
|
||||
@@ -0,0 +1,43 @@
|
||||
#
|
||||
#
|
||||
#
|
||||
cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR)
|
||||
|
||||
if(NOT CMAKE_HIP_COMPILER)
|
||||
find_program(
|
||||
amdclangpp_EXECUTABLE
|
||||
NAMES amdclang++
|
||||
HINTS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm
|
||||
PATHS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm
|
||||
PATH_SUFFIXES bin llvm/bin NO_CACHE)
|
||||
mark_as_advanced(amdclangpp_EXECUTABLE)
|
||||
|
||||
if(amdclangpp_EXECUTABLE)
|
||||
set(CMAKE_HIP_COMPILER "${amdclangpp_EXECUTABLE}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
project(rocprofiler-tool-test-app-rocdecode LANGUAGES CXX HIP)
|
||||
|
||||
foreach(_TYPE DEBUG MINSIZEREL RELEASE RELWITHDEBINFO)
|
||||
if("${CMAKE_HIP_FLAGS_${_TYPE}}" STREQUAL "")
|
||||
set(CMAKE_HIP_FLAGS_${_TYPE} "${CMAKE_CXX_FLAGS_${_TYPE}}")
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
set(CMAKE_HIP_STANDARD 17)
|
||||
set(CMAKE_HIP_EXTENSIONS OFF)
|
||||
set(CMAKE_HIP_STANDARD_REQUIRED ON)
|
||||
|
||||
set_source_files_properties(rocdecode.cpp roc_video_dec.cpp PROPERTIES LANGUAGE HIP)
|
||||
add_executable(rocdecode)
|
||||
target_sources(rocdecode PRIVATE rocdecode.cpp roc_video_dec.cpp)
|
||||
|
||||
find_package(Threads REQUIRED)
|
||||
find_package(rocDecode REQUIRED)
|
||||
target_link_libraries(
|
||||
rocdecode PRIVATE rocprofiler-sdk::tests-build-flags Threads::Threads hsa-runtime64
|
||||
rocprofiler-sdk::tests-common-library rocDecode::rocDecode)
|
||||
Filskillnaden har hållits tillbaka eftersom den är för stor
Load Diff
@@ -0,0 +1,648 @@
|
||||
/*
|
||||
Copyright (c) 2024 - 2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <assert.h>
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <rocdecode/rocdecode.h>
|
||||
#include <rocdecode/rocparser.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <chrono>
|
||||
#include <cstring>
|
||||
#include <exception>
|
||||
#include <iostream>
|
||||
#include <mutex>
|
||||
#include <queue>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
/*!
|
||||
* \file
|
||||
* \brief The AMD Video Decode Library.
|
||||
*
|
||||
* \defgroup group_amd_roc_video_dec rocDecode Video Decode: AMD Video Decode API
|
||||
* \brief AMD The rocDecode video decoder for AMD’s GPUs.
|
||||
*/
|
||||
|
||||
#define MAX_FRAME_NUM 16
|
||||
|
||||
typedef int(ROCDECAPI* PFNRECONFIGUEFLUSHCALLBACK)(void*, uint32_t, void*);
|
||||
|
||||
typedef enum SeiAvcHevcPayloadType_enum
|
||||
{
|
||||
SEI_TYPE_TIME_CODE = 136,
|
||||
SEI_TYPE_USER_DATA_UNREGISTERED = 5
|
||||
} SeiAvcHevcPayloadType;
|
||||
|
||||
typedef enum OutputSurfaceMemoryType_enum
|
||||
{
|
||||
OUT_SURFACE_MEM_DEV_INTERNAL =
|
||||
0, /**< Internal interopped decoded surface memory(original mapped decoded surface) */
|
||||
OUT_SURFACE_MEM_DEV_COPIED = 1, /**< decoded output will be copied to a separate device memory
|
||||
(the user doesn't need to call release) **/
|
||||
OUT_SURFACE_MEM_HOST_COPIED = 2, /**< decoded output will be copied to a separate host memory
|
||||
(the user doesn't need to call release) **/
|
||||
OUT_SURFACE_MEM_NOT_MAPPED = 3 /**< < decoded output is not available (interop won't be used):
|
||||
useful for decode only performance app*/
|
||||
} OutputSurfaceMemoryType;
|
||||
|
||||
#define TOSTR(X) std::to_string(static_cast<int>(X))
|
||||
#define STR(X) std::string(X)
|
||||
|
||||
#if DBGINFO
|
||||
# define INFO(X) \
|
||||
std::clog << "[INF] " \
|
||||
<< " {" << __func__ << "} " \
|
||||
<< " " << X << std::endl;
|
||||
#else
|
||||
# define INFO(X) ;
|
||||
#endif
|
||||
#define ERR(X) \
|
||||
std::cerr << "[ERR] " \
|
||||
<< " {" << __func__ << "} " \
|
||||
<< " " << X << std::endl;
|
||||
|
||||
inline int
|
||||
GetChromaPlaneCount(rocDecVideoSurfaceFormat surface_format)
|
||||
{
|
||||
int num_planes = 1;
|
||||
switch(surface_format)
|
||||
{
|
||||
case rocDecVideoSurfaceFormat_NV12:
|
||||
case rocDecVideoSurfaceFormat_P016: num_planes = 1; break;
|
||||
case rocDecVideoSurfaceFormat_YUV444:
|
||||
case rocDecVideoSurfaceFormat_YUV444_16Bit: num_planes = 2; break;
|
||||
case rocDecVideoSurfaceFormat_YUV420:
|
||||
case rocDecVideoSurfaceFormat_YUV420_16Bit: num_planes = 2; break;
|
||||
}
|
||||
|
||||
return num_planes;
|
||||
};
|
||||
|
||||
inline float
|
||||
GetChromaHeightFactor(rocDecVideoSurfaceFormat surface_format)
|
||||
{
|
||||
float factor = 0.5;
|
||||
switch(surface_format)
|
||||
{
|
||||
case rocDecVideoSurfaceFormat_NV12:
|
||||
case rocDecVideoSurfaceFormat_P016:
|
||||
case rocDecVideoSurfaceFormat_YUV420:
|
||||
case rocDecVideoSurfaceFormat_YUV420_16Bit: factor = 0.5; break;
|
||||
case rocDecVideoSurfaceFormat_YUV444:
|
||||
case rocDecVideoSurfaceFormat_YUV444_16Bit: factor = 1.0; break;
|
||||
}
|
||||
|
||||
return factor;
|
||||
};
|
||||
|
||||
class RocVideoDecodeException : public std::exception
|
||||
{
|
||||
public:
|
||||
explicit RocVideoDecodeException(const std::string& message, const int err_code)
|
||||
: _message(message)
|
||||
, _err_code(err_code)
|
||||
{}
|
||||
explicit RocVideoDecodeException(const std::string& message)
|
||||
: _message(message)
|
||||
, _err_code(-1)
|
||||
{}
|
||||
virtual const char* what() const throw() override { return _message.c_str(); }
|
||||
int Geterror_code() const { return _err_code; }
|
||||
|
||||
private:
|
||||
std::string _message;
|
||||
int _err_code;
|
||||
};
|
||||
|
||||
#define ROCDEC_THROW(X, CODE) \
|
||||
throw RocVideoDecodeException(" { " + std::string(__func__) + " } " + X, CODE);
|
||||
#define THROW(X) throw RocVideoDecodeException(" { " + std::string(__func__) + " } " + X);
|
||||
|
||||
#define ROCDEC_API_CALL(rocDecAPI) \
|
||||
do \
|
||||
{ \
|
||||
rocDecStatus error_code = rocDecAPI; \
|
||||
if(error_code != ROCDEC_SUCCESS) \
|
||||
{ \
|
||||
std::ostringstream error_log; \
|
||||
error_log << #rocDecAPI << " returned " << rocDecGetErrorName(error_code) << " at " \
|
||||
<< __FILE__ << ":" << __LINE__; \
|
||||
ROCDEC_THROW(error_log.str(), error_code); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#define HIP_API_CALL(call) \
|
||||
do \
|
||||
{ \
|
||||
hipError_t hip_status = call; \
|
||||
if(hip_status != hipSuccess) \
|
||||
{ \
|
||||
const char* sz_err_name = NULL; \
|
||||
sz_err_name = hipGetErrorName(hip_status); \
|
||||
std::ostringstream error_log; \
|
||||
error_log << "hip API error " << sz_err_name; \
|
||||
ROCDEC_THROW(error_log.str(), hip_status); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
struct Rect
|
||||
{
|
||||
int left;
|
||||
int top;
|
||||
int right;
|
||||
int bottom;
|
||||
};
|
||||
|
||||
struct Dim
|
||||
{
|
||||
int w, h;
|
||||
};
|
||||
|
||||
static inline int
|
||||
align(int value, int alignment)
|
||||
{
|
||||
return (value + alignment - 1) & ~(alignment - 1);
|
||||
}
|
||||
|
||||
typedef struct DecFrameBuffer_
|
||||
{
|
||||
uint8_t* frame_ptr; /**< device memory pointer for the decoded frame */
|
||||
int64_t pts; /**< timestamp for the decoded frame */
|
||||
int picture_index; /**< surface index for the decoded frame */
|
||||
} DecFrameBuffer;
|
||||
|
||||
typedef struct OutputSurfaceInfoType
|
||||
{
|
||||
uint32_t output_width; /**< Output width of decoded surface*/
|
||||
uint32_t output_height; /**< Output height of decoded surface*/
|
||||
uint32_t output_pitch; /**< Output pitch in bytes of luma plane, chroma pitch can be inferred
|
||||
based on chromaFormat*/
|
||||
uint32_t output_vstride; /**< Output vertical stride in case of using internal mem pointer **/
|
||||
uint32_t chroma_height; /**< Chroma plane height **/
|
||||
Rect disp_rect; /**< Display area **/
|
||||
uint32_t bytes_per_pixel; /**< Output BytesPerPixel of decoded image*/
|
||||
uint32_t bit_depth; /**< Output BitDepth of the image*/
|
||||
uint32_t num_chroma_planes; /**< Output Chroma number of planes*/
|
||||
uint64_t output_surface_size_in_bytes; /**< Output Image Size in Bytes; including both luma and
|
||||
chroma planes*/
|
||||
rocDecVideoSurfaceFormat surface_format; /**< Chroma format of the decoded image*/
|
||||
OutputSurfaceMemoryType mem_type; /**< Output mem_type of the surface*/
|
||||
} OutputSurfaceInfo;
|
||||
|
||||
typedef struct ReconfigParams_t
|
||||
{
|
||||
PFNRECONFIGUEFLUSHCALLBACK p_fn_reconfigure_flush;
|
||||
void* p_reconfig_user_struct;
|
||||
uint32_t reconfig_flush_mode;
|
||||
} ReconfigParams;
|
||||
|
||||
class RocVideoDecoder
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* @brief Construct a new Roc Video Decoder object
|
||||
*
|
||||
* @param device_id : device_id to initialize HIP and VCN
|
||||
* @param out_mem_type : out_mem_type for the decoded surface
|
||||
* @param codec : codec type
|
||||
* @param force_zero_latency : to force zero latency (output in decoding orde)
|
||||
* @param p_crop_rect : to crop output
|
||||
* @param extract_user_SEI_Message : enable to extract SEI
|
||||
* @param disp_delay : output delayed by #disp_delay surfaces
|
||||
* @param max_width : Max. width for the output surface
|
||||
* @param max_height : Max. height for the output surface
|
||||
* @param clk_rate : FPS clock-rate
|
||||
*/
|
||||
RocVideoDecoder(int device_id,
|
||||
OutputSurfaceMemoryType out_mem_type,
|
||||
rocDecVideoCodec codec,
|
||||
bool force_zero_latency = false,
|
||||
const Rect* p_crop_rect = nullptr,
|
||||
bool extract_user_SEI_Message = false,
|
||||
uint32_t disp_delay = 0,
|
||||
int max_width = 0,
|
||||
int max_height = 0,
|
||||
uint32_t clk_rate = 1000);
|
||||
~RocVideoDecoder();
|
||||
|
||||
rocDecVideoCodec GetCodecId() { return codec_id_; }
|
||||
|
||||
hipStream_t GetStream() { return hip_stream_; }
|
||||
|
||||
/**
|
||||
* @brief Get the output frame width
|
||||
*/
|
||||
uint32_t GetWidth()
|
||||
{
|
||||
assert(disp_width_);
|
||||
return disp_width_;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief This function is used to get the actual decode width
|
||||
*/
|
||||
int GetDecodeWidth()
|
||||
{
|
||||
assert(coded_width_);
|
||||
return coded_width_;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Get the output frame height
|
||||
*/
|
||||
uint32_t GetHeight()
|
||||
{
|
||||
assert(disp_height_);
|
||||
return disp_height_;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief This function is used to get the current chroma height.
|
||||
*/
|
||||
int GetChromaHeight()
|
||||
{
|
||||
assert(chroma_height_);
|
||||
return chroma_height_;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief This function is used to get the number of chroma planes.
|
||||
*/
|
||||
int GetNumChromaPlanes()
|
||||
{
|
||||
assert(num_chroma_planes_);
|
||||
return num_chroma_planes_;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief This function is used to get the current frame size based on pixel format.
|
||||
*/
|
||||
virtual int GetFrameSize()
|
||||
{
|
||||
assert(disp_width_);
|
||||
return disp_width_ * (disp_height_ + (chroma_height_ * num_chroma_planes_)) *
|
||||
byte_per_pixel_;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Get the Bit Depth and BytesPerPixel associated with the pixel format
|
||||
*
|
||||
* @return uint32_t
|
||||
*/
|
||||
uint32_t GetBitDepth()
|
||||
{
|
||||
assert(bitdepth_minus_8_);
|
||||
return (bitdepth_minus_8_ + 8);
|
||||
}
|
||||
uint32_t GetBytePerPixel()
|
||||
{
|
||||
assert(byte_per_pixel_);
|
||||
return byte_per_pixel_;
|
||||
}
|
||||
/**
|
||||
* @brief Functions to get the output surface attributes
|
||||
*/
|
||||
size_t GetSurfaceSize()
|
||||
{
|
||||
assert(surface_size_);
|
||||
return surface_size_;
|
||||
}
|
||||
uint32_t GetSurfaceStride()
|
||||
{
|
||||
assert(surface_stride_);
|
||||
return surface_stride_;
|
||||
}
|
||||
// RocDecImageFormat GetSubsampling() { return subsampling_; }
|
||||
/**
|
||||
* @brief Get the name of the output format
|
||||
*
|
||||
* @param codec_id
|
||||
* @return std::string
|
||||
*/
|
||||
const char* GetCodecFmtName(rocDecVideoCodec codec_id);
|
||||
|
||||
/**
|
||||
* @brief function to return the name from surface_format_id
|
||||
*
|
||||
* @param surface_format_id - enum for surface format
|
||||
* @return const char*
|
||||
*/
|
||||
const char* GetSurfaceFmtName(rocDecVideoSurfaceFormat surface_format_id);
|
||||
|
||||
/**
|
||||
* @brief Get the pointer to the Output Image Info
|
||||
*
|
||||
* @param surface_info ptr to output surface info
|
||||
* @return true
|
||||
* @return false
|
||||
*/
|
||||
bool GetOutputSurfaceInfo(OutputSurfaceInfo** surface_info);
|
||||
|
||||
/**
|
||||
* @brief Function to set the Reconfig Params object
|
||||
*
|
||||
* @param p_reconfig_params: pointer to reconfig params struct
|
||||
* @return true : success
|
||||
* @return false : fail
|
||||
*/
|
||||
bool SetReconfigParams(ReconfigParams* p_reconfig_params, bool b_force_reconfig_flush = false);
|
||||
|
||||
/**
|
||||
* @brief Function to force Reconfigure Flush: needed for random seeking to key frames
|
||||
*
|
||||
* @return int 1: Success 0: Fail
|
||||
*/
|
||||
int FlushAndReconfigure();
|
||||
/**
|
||||
* @brief this function decodes a frame and returns the number of frames avalable for display
|
||||
*
|
||||
* @param data - pointer to the data buffer that is to be decode
|
||||
* @param size - size of the data buffer in bytes
|
||||
* @param pts - presentation timestamp
|
||||
* @param flags - video packet flags
|
||||
* @param num_decoded_pics - nummber of pictures decoded in this call
|
||||
* @return int - num of frames to display
|
||||
*/
|
||||
virtual int DecodeFrame(const uint8_t* data,
|
||||
size_t size,
|
||||
int pkt_flags,
|
||||
int64_t pts = 0,
|
||||
int* num_decoded_pics = nullptr);
|
||||
/**
|
||||
* @brief This function returns a decoded frame and timestamp. This should be called in a loop
|
||||
* fetching all the available frames
|
||||
*
|
||||
*/
|
||||
virtual uint8_t* GetFrame(int64_t* pts);
|
||||
|
||||
/**
|
||||
* @brief function to release frame after use by the application: Only used with
|
||||
* "OUT_SURFACE_MEM_DEV_INTERNAL"
|
||||
*
|
||||
* @param pTimestamp - timestamp of the frame to be released (unmapped)
|
||||
* @param b_flushing - true when flushing
|
||||
* @return true - success
|
||||
* @return false - falied
|
||||
*/
|
||||
virtual bool ReleaseFrame(int64_t pTimestamp, bool b_flushing = false);
|
||||
|
||||
/**
|
||||
* @brief utility function to save image to a file
|
||||
*
|
||||
* @param output_file_name - file to write
|
||||
* @param dev_mem - dev_memory pointer of the frame
|
||||
* @param image_info - output image info
|
||||
* @param is_output_RGB - to write in RGB
|
||||
*/
|
||||
// void SaveImage(std::string output_file_name, void* dev_mem, OutputImageInfo* image_info, bool
|
||||
// is_output_RGB = 0);
|
||||
|
||||
/**
|
||||
* @brief Get the Device info for the current device
|
||||
*
|
||||
* @param device_name
|
||||
* @param gcn_arch_name
|
||||
* @param pci_bus_id
|
||||
* @param pci_domain_id
|
||||
* @param pci_device_id
|
||||
*/
|
||||
void GetDeviceinfo(std::string& device_name,
|
||||
std::string& gcn_arch_name,
|
||||
int& pci_bus_id,
|
||||
int& pci_domain_id,
|
||||
int& pci_device_id);
|
||||
|
||||
/**
|
||||
* @brief Helper function to dump decoded output surface to file
|
||||
*
|
||||
* @param output_file_name - Output file name
|
||||
* @param dev_mem - pointer to surface memory
|
||||
* @param surf_info - surface info
|
||||
* @param rgb_image_size - image size for rgb (optional). A non_zero value indicates the
|
||||
* surf_mem holds an rgb interleaved image and the entire size will be dumped to file
|
||||
*/
|
||||
virtual void SaveFrameToFile(std::string output_file_name,
|
||||
void* surf_mem,
|
||||
OutputSurfaceInfo* surf_info,
|
||||
size_t rgb_image_size = 0);
|
||||
|
||||
/**
|
||||
* @brief Helper funtion to close a existing file and dump to new file in case of multiple files
|
||||
* using same decoder
|
||||
*/
|
||||
virtual void ResetSaveFrameToFile();
|
||||
|
||||
/**
|
||||
* @brief Get the Num Of Flushed Frames from video decoder object
|
||||
*
|
||||
* @return int32_t
|
||||
*/
|
||||
int32_t GetNumOfFlushedFrames() { return num_frames_flushed_during_reconfig_; }
|
||||
|
||||
/*! \brief Function to wait for the decode completion of the last submitted picture
|
||||
*/
|
||||
void WaitForDecodeCompletion();
|
||||
|
||||
// Session overhead refers to decoder initialization and deinitialization time
|
||||
void AddDecoderSessionOverHead(std::thread::id session_id, double duration)
|
||||
{
|
||||
session_overhead_[session_id] += duration;
|
||||
}
|
||||
double GetDecoderSessionOverHead(std::thread::id session_id)
|
||||
{
|
||||
if(session_overhead_.find(session_id) != session_overhead_.end())
|
||||
{
|
||||
return session_overhead_[session_id];
|
||||
}
|
||||
else
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Check if the given Video Codec is supported on the given GPU
|
||||
*
|
||||
* @return rocDecStatus
|
||||
*/
|
||||
bool CodecSupported(int device_id, rocDecVideoCodec codec_id, uint32_t bit_depth);
|
||||
|
||||
/**
|
||||
* @brief This function reconfigure decoder if there is a change in sequence params.
|
||||
*/
|
||||
virtual int ReconfigureDecoder(RocdecVideoFormat* p_video_format);
|
||||
|
||||
protected:
|
||||
/**
|
||||
* @brief Callback function to be registered for getting a callback when decoding of sequence
|
||||
* starts
|
||||
*/
|
||||
static int ROCDECAPI HandleVideoSequenceProc(void* p_user_data,
|
||||
RocdecVideoFormat* p_video_format)
|
||||
{
|
||||
return ((RocVideoDecoder*) p_user_data)->HandleVideoSequence(p_video_format);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Callback function to be registered for getting a callback when a decoded frame is
|
||||
* ready to be decoded
|
||||
*/
|
||||
static int ROCDECAPI HandlePictureDecodeProc(void* p_user_data, RocdecPicParams* p_pic_params)
|
||||
{
|
||||
return ((RocVideoDecoder*) p_user_data)->HandlePictureDecode(p_pic_params);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Callback function to be registered for getting a callback when a decoded frame is
|
||||
* available for display
|
||||
*/
|
||||
static int ROCDECAPI HandlePictureDisplayProc(void* p_user_data,
|
||||
RocdecParserDispInfo* p_disp_info)
|
||||
{
|
||||
return ((RocVideoDecoder*) p_user_data)->HandlePictureDisplay(p_disp_info);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Callback function to be registered for getting a callback when all the unregistered
|
||||
* user SEI Messages are parsed for a frame.
|
||||
*/
|
||||
static int ROCDECAPI HandleSEIMessagesProc(void* p_user_data,
|
||||
RocdecSeiMessageInfo* p_sei_message_info)
|
||||
{
|
||||
return ((RocVideoDecoder*) p_user_data)->GetSEIMessage(p_sei_message_info);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief This function gets called when a sequence is ready to be decoded. The function also
|
||||
gets called when there is format change
|
||||
*/
|
||||
int HandleVideoSequence(RocdecVideoFormat* p_video_format);
|
||||
|
||||
/**
|
||||
* @brief This function gets called when a picture is ready to be decoded. cuvidDecodePicture
|
||||
* is called from this function to decode the picture
|
||||
*/
|
||||
int HandlePictureDecode(RocdecPicParams* p_pic_params);
|
||||
|
||||
/**
|
||||
* @brief This function gets called after a picture is decoded and available for display.
|
||||
Frames are fetched and stored in internal buffer
|
||||
*/
|
||||
int HandlePictureDisplay(RocdecParserDispInfo* p_disp_info);
|
||||
/**
|
||||
* @brief This function gets called when all unregistered user SEI messages are parsed for a
|
||||
* frame
|
||||
*/
|
||||
int GetSEIMessage(RocdecSeiMessageInfo* p_sei_message_info);
|
||||
|
||||
/**
|
||||
* @brief function to release all internal frames and clear the vp_frames_q_ (used with
|
||||
* reconfigure): Only used with "OUT_SURFACE_MEM_DEV_INTERNAL"
|
||||
*
|
||||
* @return true - success
|
||||
* @return false - falied
|
||||
*/
|
||||
bool ReleaseInternalFrames();
|
||||
|
||||
/**
|
||||
* @brief Function to Initialize GPU-HIP
|
||||
*
|
||||
*/
|
||||
bool InitHIP(int device_id);
|
||||
|
||||
/**
|
||||
* @brief Function to get start time
|
||||
*
|
||||
*/
|
||||
std::chrono::_V2::system_clock::time_point StartTimer();
|
||||
|
||||
/**
|
||||
* @brief Function to get elapsed time
|
||||
*
|
||||
*/
|
||||
double StopTimer(const std::chrono::_V2::system_clock::time_point& start_time);
|
||||
|
||||
int num_devices_;
|
||||
int device_id_;
|
||||
RocdecVideoParser rocdec_parser_ = nullptr;
|
||||
rocDecDecoderHandle roc_decoder_ = nullptr;
|
||||
OutputSurfaceMemoryType out_mem_type_ = OUT_SURFACE_MEM_DEV_INTERNAL;
|
||||
bool b_extract_sei_message_ = false;
|
||||
bool b_force_zero_latency_ = false;
|
||||
uint32_t disp_delay_;
|
||||
ReconfigParams* p_reconfig_params_ = nullptr;
|
||||
bool b_force_recofig_flush_ = false;
|
||||
int32_t num_frames_flushed_during_reconfig_ = 0;
|
||||
hipDeviceProp_t hip_dev_prop_;
|
||||
hipStream_t hip_stream_;
|
||||
rocDecVideoCodec codec_id_ = rocDecVideoCodec_NumCodecs;
|
||||
rocDecVideoChromaFormat video_chroma_format_ = rocDecVideoChromaFormat_420;
|
||||
rocDecVideoSurfaceFormat video_surface_format_ = rocDecVideoSurfaceFormat_NV12;
|
||||
RocdecSeiMessageInfo* curr_sei_message_ptr_ = nullptr;
|
||||
RocdecSeiMessageInfo sei_message_display_q_[MAX_FRAME_NUM];
|
||||
RocdecVideoFormat* curr_video_format_ptr_ = nullptr;
|
||||
int output_frame_cnt_ = 0, output_frame_cnt_ret_ = 0;
|
||||
int decoded_pic_cnt_ = 0;
|
||||
int decode_poc_ = 0, pic_num_in_dec_order_[MAX_FRAME_NUM];
|
||||
int num_alloced_frames_ = 0;
|
||||
int last_decode_surf_idx_ = 0;
|
||||
std::ostringstream input_video_info_str_;
|
||||
int bitdepth_minus_8_ = 0;
|
||||
uint32_t byte_per_pixel_ = 1;
|
||||
uint32_t coded_width_ = 0;
|
||||
uint32_t disp_width_ = 0;
|
||||
uint32_t coded_height_ = 0;
|
||||
uint32_t disp_height_ = 0;
|
||||
uint32_t target_width_ = 0;
|
||||
uint32_t target_height_ = 0;
|
||||
int max_width_ = 0, max_height_ = 0;
|
||||
uint32_t chroma_height_ = 0, chroma_width_ = 0;
|
||||
uint32_t num_chroma_planes_ = 0;
|
||||
uint32_t num_components_ = 0;
|
||||
uint32_t surface_stride_ = 0;
|
||||
uint32_t surface_vstride_ = 0,
|
||||
chroma_vstride_ =
|
||||
0; // vertical stride between planes: used when using internal dev memory
|
||||
size_t surface_size_ = 0;
|
||||
OutputSurfaceInfo output_surface_info_ = {};
|
||||
std::mutex mtx_vp_frame_;
|
||||
std::vector<DecFrameBuffer> vp_frames_; // vector of decoded frames
|
||||
std::queue<DecFrameBuffer> vp_frames_q_;
|
||||
Rect disp_rect_ = {}; // displayable area specified in the bitstream
|
||||
Rect crop_rect_ = {}; // user specified region of interest within diplayable area disp_rect_
|
||||
FILE* fp_sei_ = NULL;
|
||||
FILE* fp_out_ = NULL;
|
||||
bool is_decoder_reconfigured_ = false;
|
||||
std::string current_output_filename = "";
|
||||
uint32_t extra_output_file_count_ = 0;
|
||||
std::thread::id
|
||||
decoder_session_id_; // Decoder session identifier. Used to gather session level stats.
|
||||
std::unordered_map<std::thread::id, double>
|
||||
session_overhead_; // Records session overhead of initialization+deinitialization time.
|
||||
// Format is (thread id, duration)
|
||||
};
|
||||
@@ -0,0 +1,109 @@
|
||||
/*
|
||||
Copyright (c) 2024 - 2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <rocdecode/roc_bitstream_reader.h>
|
||||
#include <rocdecode/rocdecode.h>
|
||||
#include <rocdecode/rocparser.h>
|
||||
#include <iostream>
|
||||
#include "roc_video_dec.h"
|
||||
|
||||
int
|
||||
main(int argc, char** argv)
|
||||
{
|
||||
// Get input file
|
||||
std::string input_file_path{};
|
||||
for(int i = 1; i < argc; i++)
|
||||
{
|
||||
if(!strcmp(argv[i], "-i"))
|
||||
{
|
||||
if(++i == argc)
|
||||
{
|
||||
std::cerr << "Provide path to input file" << std::endl;
|
||||
}
|
||||
input_file_path = argv[i];
|
||||
continue;
|
||||
}
|
||||
}
|
||||
// Set up bitstreamreader
|
||||
RocdecBitstreamReader bs_reader = nullptr;
|
||||
rocDecVideoCodec rocdec_codec_id{};
|
||||
int bit_depth{};
|
||||
if(rocDecCreateBitstreamReader(&bs_reader, input_file_path.c_str()) != ROCDEC_SUCCESS)
|
||||
{
|
||||
std::cerr << "Failed to create the bitstream reader." << std::endl;
|
||||
return 1;
|
||||
}
|
||||
if(rocDecGetBitstreamCodecType(bs_reader, &rocdec_codec_id) != ROCDEC_SUCCESS)
|
||||
{
|
||||
std::cerr << "Failed to get stream codec type." << std::endl;
|
||||
return 1;
|
||||
}
|
||||
if(rocdec_codec_id >= rocDecVideoCodec_NumCodecs)
|
||||
{
|
||||
std::cerr << "Unsupported stream file type or codec type by the bitstream reader. Exiting."
|
||||
<< std::endl;
|
||||
return 1;
|
||||
}
|
||||
if(rocDecGetBitstreamBitDepth(bs_reader, &bit_depth) != ROCDEC_SUCCESS)
|
||||
{
|
||||
std::cerr << "Failed to get stream bit depth." << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Set up video decoder
|
||||
int device_id = 0;
|
||||
OutputSurfaceMemoryType mem_type = OUT_SURFACE_MEM_DEV_INTERNAL;
|
||||
bool b_force_zero_latency = false;
|
||||
Rect* p_crop_rect = nullptr;
|
||||
int disp_delay = 1;
|
||||
bool b_extract_sei_messages = false;
|
||||
RocVideoDecoder* viddec = new RocVideoDecoder(device_id,
|
||||
mem_type,
|
||||
rocdec_codec_id,
|
||||
b_force_zero_latency,
|
||||
p_crop_rect,
|
||||
b_extract_sei_messages,
|
||||
disp_delay);
|
||||
|
||||
uint8_t* pvideo = nullptr;
|
||||
int n_video_bytes = 0;
|
||||
int64_t pts = 0;
|
||||
int pkg_flags = 0;
|
||||
int decoded_pics = 0;
|
||||
if(rocDecGetBitstreamPicData(bs_reader, &pvideo, &n_video_bytes, &pts) != ROCDEC_SUCCESS)
|
||||
{
|
||||
std::cerr << "Failed to get picture data." << std::endl;
|
||||
return 1;
|
||||
}
|
||||
// Treat 0 bitstream size as end of stream indicator
|
||||
if(n_video_bytes == 0)
|
||||
{
|
||||
pkg_flags |= ROCDEC_PKT_ENDOFSTREAM;
|
||||
}
|
||||
viddec->DecodeFrame(pvideo, n_video_bytes, pkg_flags, pts, &decoded_pics);
|
||||
viddec->DecodeFrame(pvideo, n_video_bytes, pkg_flags, pts, &decoded_pics);
|
||||
viddec->DecodeFrame(pvideo, n_video_bytes, pkg_flags, pts, &decoded_pics);
|
||||
if(bs_reader)
|
||||
{
|
||||
rocDecDestroyBitstreamReader(bs_reader);
|
||||
}
|
||||
}
|
||||
@@ -26,7 +26,15 @@ from __future__ import absolute_import
|
||||
def test_perfetto_data(
|
||||
pftrace_data,
|
||||
json_data,
|
||||
categories=("hip", "hsa", "marker", "kernel", "memory_copy", "memory_allocation"),
|
||||
categories=(
|
||||
"hip",
|
||||
"hsa",
|
||||
"marker",
|
||||
"kernel",
|
||||
"memory_copy",
|
||||
"memory_allocation",
|
||||
"rocdecode_api",
|
||||
),
|
||||
):
|
||||
|
||||
mapping = {
|
||||
@@ -36,6 +44,7 @@ def test_perfetto_data(
|
||||
"kernel": ("kernel_dispatch", "kernel_dispatch"),
|
||||
"memory_copy": ("memory_copy", "memory_copy"),
|
||||
"memory_allocation": ("memory_allocation", "memory_allocation"),
|
||||
"rocdecode_api": ("rocdecode_api", "rocdecode_api"),
|
||||
}
|
||||
|
||||
# make sure they specified valid categories
|
||||
@@ -73,6 +82,7 @@ def test_otf2_data(
|
||||
"kernel": ("kernel_dispatch", "kernel_dispatch"),
|
||||
"memory_copy": ("memory_copy", "memory_copy"),
|
||||
"memory_allocation": ("memory_allocation", "memory_allocation"),
|
||||
"rocdecode_api": ("rocdecode_api", "rocdecode_api"),
|
||||
}
|
||||
|
||||
# make sure they specified valid categories
|
||||
|
||||
@@ -0,0 +1,53 @@
|
||||
#
|
||||
#
|
||||
#
|
||||
cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR)
|
||||
|
||||
project(
|
||||
rocprofiler-tests-rocdecode-tracing
|
||||
LANGUAGES CXX
|
||||
VERSION 0.0.0)
|
||||
|
||||
find_package(rocprofiler-sdk REQUIRED)
|
||||
|
||||
if(ROCPROFILER_MEMCHECK_PRELOAD_ENV)
|
||||
set(PRELOAD_ENV
|
||||
"${ROCPROFILER_MEMCHECK_PRELOAD_ENV}:$<TARGET_FILE:rocprofiler-sdk-json-tool>")
|
||||
else()
|
||||
set(PRELOAD_ENV "LD_PRELOAD=$<TARGET_FILE:rocprofiler-sdk-json-tool>")
|
||||
endif()
|
||||
|
||||
set(ROCDECODE_VIDEO_FILE
|
||||
"${ROCM_PATH}/share/rocdecode/video/AMD_driving_virtual_20-H265.265")
|
||||
if(NOT EXISTS "${ROCDECODE_VIDEO_FILE}")
|
||||
message(
|
||||
FATAL_ERROR
|
||||
"Unable to find video file for rocdecode tests: ${ROCDECODE_VIDEO_FILE}")
|
||||
endif()
|
||||
add_test(NAME test-rocdecode-tracing-execute COMMAND $<TARGET_FILE:rocdecode> -i
|
||||
${ROCDECODE_VIDEO_FILE})
|
||||
|
||||
set(rocdecode-tracing-env
|
||||
"${PRELOAD_ENV}"
|
||||
"ROCPROFILER_TOOL_OUTPUT_FILE=rocdecode-tracing-test.json"
|
||||
"LD_LIBRARY_PATH=$<TARGET_FILE_DIR:rocprofiler-sdk::rocprofiler-sdk-shared-library>:$ENV{LD_LIBRARY_PATH}"
|
||||
)
|
||||
|
||||
set_tests_properties(
|
||||
test-rocdecode-tracing-execute
|
||||
PROPERTIES TIMEOUT 45 LABELS "integration-tests" ENVIRONMENT
|
||||
"${rocdecode-tracing-env}" FAIL_REGULAR_EXPRESSION
|
||||
"${ROCPROFILER_DEFAULT_FAIL_REGEX}")
|
||||
|
||||
# copy to binary directory
|
||||
rocprofiler_configure_pytest_files(COPY validate.py conftest.py CONFIG pytest.ini)
|
||||
|
||||
add_test(NAME test-rocdecode-tracing-validate
|
||||
COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py --input
|
||||
${CMAKE_CURRENT_BINARY_DIR}/rocdecode-tracing-test.json)
|
||||
|
||||
set_tests_properties(
|
||||
test-rocdecode-tracing-validate
|
||||
PROPERTIES TIMEOUT 45 LABELS "integration-tests" DEPENDS
|
||||
test-rocdecode-tracing-execute FAIL_REGULAR_EXPRESSION
|
||||
"${ROCPROFILER_DEFAULT_FAIL_REGEX}")
|
||||
@@ -0,0 +1,22 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import json
|
||||
import pytest
|
||||
|
||||
from rocprofiler_sdk.pytest_utils.dotdict import dotdict
|
||||
|
||||
|
||||
def pytest_addoption(parser):
|
||||
parser.addoption(
|
||||
"--input",
|
||||
action="store",
|
||||
default="rocdecode-tracing-test.json",
|
||||
help="Input JSON",
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def input_data(request):
|
||||
filename = request.config.getoption("--input")
|
||||
with open(filename, "r") as inp:
|
||||
return dotdict(json.load(inp))
|
||||
@@ -0,0 +1,5 @@
|
||||
|
||||
[pytest]
|
||||
addopts = --durations=20 -rA -s -vv
|
||||
testpaths = validate.py
|
||||
pythonpath = @ROCPROFILER_SDK_TESTS_BINARY_DIR@/pytest-packages
|
||||
@@ -0,0 +1,285 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
import pytest
|
||||
|
||||
|
||||
# helper function
|
||||
def node_exists(name, data, min_len=1):
|
||||
assert name in data
|
||||
assert data[name] is not None
|
||||
if isinstance(data[name], (list, tuple, dict, set)):
|
||||
assert len(data[name]) >= min_len, f"{name}:\n{data}"
|
||||
|
||||
|
||||
def test_data_structure(input_data):
|
||||
"""verify minimum amount of expected data is present"""
|
||||
data = input_data
|
||||
|
||||
node_exists("rocprofiler-sdk-json-tool", data)
|
||||
|
||||
sdk_data = data["rocprofiler-sdk-json-tool"]
|
||||
|
||||
node_exists("metadata", sdk_data)
|
||||
node_exists("pid", sdk_data["metadata"])
|
||||
node_exists("main_tid", sdk_data["metadata"])
|
||||
node_exists("init_time", sdk_data["metadata"])
|
||||
node_exists("fini_time", sdk_data["metadata"])
|
||||
|
||||
node_exists("agents", sdk_data)
|
||||
node_exists("call_stack", sdk_data)
|
||||
node_exists("callback_records", sdk_data)
|
||||
node_exists("buffer_records", sdk_data)
|
||||
|
||||
node_exists("names", sdk_data["callback_records"])
|
||||
node_exists("hsa_api_traces", sdk_data["callback_records"])
|
||||
node_exists("hip_api_traces", sdk_data["callback_records"])
|
||||
node_exists("memory_allocations", sdk_data["callback_records"])
|
||||
node_exists("rocdecode_api_traces", sdk_data["callback_records"])
|
||||
|
||||
node_exists("names", sdk_data["buffer_records"])
|
||||
node_exists("hsa_api_traces", sdk_data["buffer_records"])
|
||||
node_exists("hip_api_traces", sdk_data["buffer_records"])
|
||||
node_exists("memory_allocations", sdk_data["buffer_records"])
|
||||
node_exists("rocdecode_api_traces", sdk_data["buffer_records"])
|
||||
|
||||
|
||||
def test_size_entries(input_data):
|
||||
# check that size fields are > 0 but account for function arguments
|
||||
# which are named "size"
|
||||
def check_size(data, bt):
|
||||
if "size" in data.keys():
|
||||
if isinstance(data["size"], str) and bt.endswith('["args"]'):
|
||||
pass
|
||||
else:
|
||||
assert data["size"] > 0, f"origin: {bt}"
|
||||
|
||||
# recursively check the entire data structure
|
||||
def iterate_data(data, bt):
|
||||
if isinstance(data, (list, tuple)):
|
||||
for i, itr in enumerate(data):
|
||||
if isinstance(itr, dict):
|
||||
check_size(itr, f"{bt}[{i}]")
|
||||
iterate_data(itr, f"{bt}[{i}]")
|
||||
elif isinstance(data, dict):
|
||||
check_size(data, f"{bt}")
|
||||
for key, itr in data.items():
|
||||
iterate_data(itr, f'{bt}["{key}"]')
|
||||
|
||||
# start recursive check over entire JSON dict
|
||||
iterate_data(input_data, "input_data")
|
||||
|
||||
|
||||
def test_timestamps(input_data):
|
||||
"""Verify starting timestamps are less than ending timestamps"""
|
||||
data = input_data
|
||||
sdk_data = data["rocprofiler-sdk-json-tool"]
|
||||
|
||||
cb_start = {}
|
||||
cb_end = {}
|
||||
for titr in ["hsa_api_traces", "hip_api_traces", "rocdecode_api_traces"]:
|
||||
for itr in sdk_data["callback_records"][titr]:
|
||||
cid = itr["correlation_id"]["internal"]
|
||||
phase = itr["phase"]
|
||||
if phase == 1:
|
||||
cb_start[cid] = itr["timestamp"]
|
||||
elif phase == 2:
|
||||
cb_end[cid] = itr["timestamp"]
|
||||
assert cb_start[cid] <= itr["timestamp"]
|
||||
else:
|
||||
assert phase == 1 or phase == 2
|
||||
|
||||
for itr in sdk_data["buffer_records"][titr]:
|
||||
assert itr["start_timestamp"] <= itr["end_timestamp"]
|
||||
|
||||
for titr in ["memory_allocations"]:
|
||||
for itr in sdk_data["buffer_records"][titr]:
|
||||
assert itr["start_timestamp"] < itr["end_timestamp"], f"[{titr}] {itr}"
|
||||
assert itr["correlation_id"]["internal"] > 0, f"[{titr}] {itr}"
|
||||
assert itr["correlation_id"]["external"] > 0, f"[{titr}] {itr}"
|
||||
assert (
|
||||
sdk_data["metadata"]["init_time"] < itr["start_timestamp"]
|
||||
), f"[{titr}] {itr}"
|
||||
assert (
|
||||
sdk_data["metadata"]["init_time"] < itr["end_timestamp"]
|
||||
), f"[{titr}] {itr}"
|
||||
assert (
|
||||
sdk_data["metadata"]["fini_time"] > itr["start_timestamp"]
|
||||
), f"[{titr}] {itr}"
|
||||
assert (
|
||||
sdk_data["metadata"]["fini_time"] > itr["end_timestamp"]
|
||||
), f"[{titr}] {itr}"
|
||||
|
||||
api_start = cb_start[itr["correlation_id"]["internal"]]
|
||||
# api_end = cb_end[itr["correlation_id"]["internal"]]
|
||||
assert api_start < itr["start_timestamp"], f"[{titr}] {itr}"
|
||||
# assert api_end <= itr["end_timestamp"], f"[{titr}] {itr}"
|
||||
|
||||
|
||||
def test_internal_correlation_ids(input_data):
|
||||
"""Assure correlation ids are unique"""
|
||||
data = input_data
|
||||
sdk_data = data["rocprofiler-sdk-json-tool"]
|
||||
|
||||
api_corr_ids = []
|
||||
for titr in ["hsa_api_traces", "hip_api_traces", "rocdecode_api_traces"]:
|
||||
for itr in sdk_data["callback_records"][titr]:
|
||||
api_corr_ids.append(itr["correlation_id"]["internal"])
|
||||
|
||||
for itr in sdk_data["buffer_records"][titr]:
|
||||
api_corr_ids.append(itr["correlation_id"]["internal"])
|
||||
|
||||
api_corr_ids_sorted = sorted(api_corr_ids)
|
||||
api_corr_ids_unique = list(set(api_corr_ids))
|
||||
|
||||
for itr in sdk_data["buffer_records"]["memory_allocations"]:
|
||||
assert itr["correlation_id"]["internal"] in api_corr_ids_unique
|
||||
|
||||
len_corr_id_unq = len(api_corr_ids_unique)
|
||||
assert len(api_corr_ids) != len_corr_id_unq
|
||||
assert max(api_corr_ids_sorted) == len_corr_id_unq
|
||||
|
||||
|
||||
def test_external_correlation_ids(input_data):
|
||||
data = input_data
|
||||
sdk_data = data["rocprofiler-sdk-json-tool"]
|
||||
|
||||
extern_corr_ids = []
|
||||
for titr in ["hsa_api_traces", "hip_api_traces", "rocdecode_api_traces"]:
|
||||
for itr in sdk_data["callback_records"][titr]:
|
||||
assert itr["correlation_id"]["external"] > 0
|
||||
assert itr["thread_id"] == itr["correlation_id"]["external"]
|
||||
extern_corr_ids.append(itr["correlation_id"]["external"])
|
||||
|
||||
extern_corr_ids = list(set(sorted(extern_corr_ids)))
|
||||
for titr in ["hsa_api_traces", "hip_api_traces", "rocdecode_api_traces"]:
|
||||
for itr in sdk_data["buffer_records"][titr]:
|
||||
assert itr["correlation_id"]["external"] > 0, f"[{titr}] {itr}"
|
||||
assert (
|
||||
itr["thread_id"] == itr["correlation_id"]["external"]
|
||||
), f"[{titr}] {itr}"
|
||||
assert itr["thread_id"] in extern_corr_ids, f"[{titr}] {itr}"
|
||||
assert itr["correlation_id"]["external"] in extern_corr_ids, f"[{titr}] {itr}"
|
||||
|
||||
for titr in ["memory_allocations"]:
|
||||
for itr in sdk_data["buffer_records"][titr]:
|
||||
assert itr["correlation_id"]["external"] > 0, f"[{titr}] {itr}"
|
||||
assert itr["correlation_id"]["external"] in extern_corr_ids, f"[{titr}] {itr}"
|
||||
|
||||
for itr in sdk_data["callback_records"][titr]:
|
||||
assert itr["correlation_id"]["external"] > 0, f"[{titr}] {itr}"
|
||||
assert itr["correlation_id"]["external"] in extern_corr_ids, f"[{titr}] {itr}"
|
||||
|
||||
|
||||
def get_operation(record, kind_name, op_name=None):
|
||||
for idx, itr in enumerate(record["names"]):
|
||||
if kind_name == itr["kind"]:
|
||||
if op_name is None:
|
||||
return idx, itr["operations"]
|
||||
else:
|
||||
for oidx, oname in enumerate(itr["operations"]):
|
||||
if op_name == oname:
|
||||
return oidx
|
||||
return None
|
||||
|
||||
|
||||
def test_rocdecode_traces(input_data):
|
||||
data = input_data
|
||||
sdk_data = data["rocprofiler-sdk-json-tool"]
|
||||
|
||||
callback_records = sdk_data["callback_records"]
|
||||
buffer_records = sdk_data["buffer_records"]
|
||||
|
||||
rocdecode_bf_traces = sdk_data["buffer_records"]["rocdecode_api_traces"]
|
||||
rocdecode_api_bf_ops = get_operation(buffer_records, "ROCDECODE_API")
|
||||
assert len(rocdecode_api_bf_ops[1]) == 16
|
||||
|
||||
rocdecode_cb_traces = sdk_data["callback_records"]["rocdecode_api_traces"]
|
||||
rocdecode_api_cb_ops = get_operation(callback_records, "ROCDECODE_API")
|
||||
|
||||
assert (
|
||||
rocdecode_api_bf_ops[1] == rocdecode_api_cb_ops[1]
|
||||
and len(rocdecode_api_cb_ops[1]) == 16
|
||||
)
|
||||
|
||||
# check that buffer and callback records agree
|
||||
phase_enter_count = 0
|
||||
phase_end_count = 0
|
||||
|
||||
api_calls = []
|
||||
|
||||
for api_call in rocdecode_cb_traces:
|
||||
if api_call["phase"] == 1:
|
||||
phase_enter_count += 1
|
||||
api_calls.append(rocdecode_api_cb_ops[1][api_call["operation"]])
|
||||
if api_call["phase"] == 2:
|
||||
phase_end_count += 1
|
||||
|
||||
assert phase_enter_count == phase_end_count == len(rocdecode_bf_traces)
|
||||
|
||||
for call in [
|
||||
"rocDecCreateBitstreamReader",
|
||||
"rocDecGetBitstreamCodecType",
|
||||
"rocDecGetBitstreamBitDepth",
|
||||
"rocDecCreateVideoParser",
|
||||
"rocDecGetBitstreamPicData",
|
||||
"rocDecGetDecoderCaps",
|
||||
"rocDecCreateDecoder",
|
||||
"rocDecDecodeFrame",
|
||||
"rocDecParseVideoData",
|
||||
"rocDecGetVideoFrame",
|
||||
"rocDecGetDecodeStatus",
|
||||
"rocDecDestroyBitstreamReader",
|
||||
]:
|
||||
assert call in api_calls
|
||||
|
||||
|
||||
def test_retired_correlation_ids(input_data):
|
||||
data = input_data
|
||||
sdk_data = data["rocprofiler-sdk-json-tool"]
|
||||
|
||||
def _sort_dict(inp):
|
||||
return dict(sorted(inp.items()))
|
||||
|
||||
api_corr_ids = {}
|
||||
for titr in ["hsa_api_traces", "hip_api_traces", "rocdecode_api_traces"]:
|
||||
for itr in sdk_data["buffer_records"][titr]:
|
||||
corr_id = itr["correlation_id"]["internal"]
|
||||
assert corr_id not in api_corr_ids.keys()
|
||||
api_corr_ids[corr_id] = itr
|
||||
|
||||
alloc_corr_ids = {}
|
||||
for titr in ["memory_allocations"]:
|
||||
for itr in sdk_data["buffer_records"][titr]:
|
||||
corr_id = itr["correlation_id"]["internal"]
|
||||
assert corr_id not in alloc_corr_ids.keys()
|
||||
alloc_corr_ids[corr_id] = itr
|
||||
|
||||
retired_corr_ids = {}
|
||||
for itr in sdk_data["buffer_records"]["retired_correlation_ids"]:
|
||||
corr_id = itr["internal_correlation_id"]
|
||||
assert corr_id not in retired_corr_ids.keys()
|
||||
retired_corr_ids[corr_id] = itr
|
||||
|
||||
api_corr_ids = _sort_dict(api_corr_ids)
|
||||
alloc_corr_ids = _sort_dict(alloc_corr_ids)
|
||||
retired_corr_ids = _sort_dict(retired_corr_ids)
|
||||
|
||||
for cid, itr in alloc_corr_ids.items():
|
||||
assert cid in retired_corr_ids.keys()
|
||||
retired_ts = retired_corr_ids[cid]["timestamp"]
|
||||
end_ts = itr["end_timestamp"]
|
||||
assert (retired_ts - end_ts) > 0, f"correlation-id: {cid}, data: {itr}"
|
||||
|
||||
for cid, itr in api_corr_ids.items():
|
||||
assert cid in retired_corr_ids.keys()
|
||||
retired_ts = retired_corr_ids[cid]["timestamp"]
|
||||
end_ts = itr["end_timestamp"]
|
||||
assert (retired_ts - end_ts) > 0, f"correlation-id: {cid}, data: {itr}"
|
||||
|
||||
assert len(api_corr_ids.keys()) == (len(retired_corr_ids.keys()))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit_code = pytest.main(["-x", __file__] + sys.argv[1:])
|
||||
sys.exit(exit_code)
|
||||
@@ -36,3 +36,6 @@ add_subdirectory(roctracer-roctx)
|
||||
add_subdirectory(scratch-memory)
|
||||
add_subdirectory(pc-sampling)
|
||||
add_subdirectory(collection-period)
|
||||
if(ROCPROFILER_BUILD_ROCDECODE_TESTS)
|
||||
add_subdirectory(rocdecode-trace)
|
||||
endif()
|
||||
|
||||
@@ -0,0 +1,52 @@
|
||||
#
|
||||
#
|
||||
#
|
||||
cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR)
|
||||
|
||||
project(
|
||||
rocprofiler-tests-rocprofv3-rocdecode-tracing
|
||||
LANGUAGES CXX
|
||||
VERSION 0.0.0)
|
||||
|
||||
find_package(rocprofiler-sdk REQUIRED)
|
||||
|
||||
rocprofiler_configure_pytest_files(CONFIG pytest.ini COPY validate.py conftest.py)
|
||||
|
||||
string(REPLACE "LD_PRELOAD=" "ROCPROF_PRELOAD=" PRELOAD_ENV
|
||||
"${ROCPROFILER_MEMCHECK_PRELOAD_ENV}")
|
||||
|
||||
set(rocdecode-tracing-env "${PRELOAD_ENV}")
|
||||
|
||||
set(ROCDECODE_VIDEO_FILE
|
||||
"${ROCM_PATH}/share/rocdecode/video/AMD_driving_virtual_20-H265.265")
|
||||
if(NOT EXISTS "${ROCDECODE_VIDEO_FILE}")
|
||||
message(
|
||||
FATAL_ERROR
|
||||
"Unable to find video file for rocdecode tests: ${ROCDECODE_VIDEO_FILE}")
|
||||
endif()
|
||||
add_test(
|
||||
NAME rocprofv3-test-rocdecode-tracing-execute
|
||||
COMMAND
|
||||
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> --rocdecode-trace -d
|
||||
${CMAKE_CURRENT_BINARY_DIR}/%tag%-trace -o out --output-format json otf2 pftrace
|
||||
csv --log-level env -- $<TARGET_FILE:rocdecode> -i ${ROCDECODE_VIDEO_FILE})
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-rocdecode-tracing-execute
|
||||
PROPERTIES TIMEOUT 45 LABELS "integration-tests" ENVIRONMENT
|
||||
"${rocdecode-tracing-env}" FAIL_REGULAR_EXPRESSION "threw an exception")
|
||||
|
||||
add_test(
|
||||
NAME rocprofv3-test-rocdecode-tracing-validate
|
||||
COMMAND
|
||||
${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py --json-input
|
||||
${CMAKE_CURRENT_BINARY_DIR}/rocdecode-trace/out_results.json --otf2-input
|
||||
${CMAKE_CURRENT_BINARY_DIR}/rocdecode-trace/out_results.otf2 --pftrace-input
|
||||
${CMAKE_CURRENT_BINARY_DIR}/rocdecode-trace/out_results.pftrace --csv-input
|
||||
${CMAKE_CURRENT_BINARY_DIR}/rocdecode-trace/out_rocdecode_api_trace.csv)
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-rocdecode-tracing-validate
|
||||
PROPERTIES TIMEOUT 45 LABELS "integration-tests" DEPENDS
|
||||
rocprofv3-test-rocdecode-tracing-execute FAIL_REGULAR_EXPRESSION
|
||||
"AssertionError")
|
||||
@@ -0,0 +1,71 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import csv
|
||||
import json
|
||||
import os
|
||||
import pytest
|
||||
|
||||
from rocprofiler_sdk.pytest_utils.dotdict import dotdict
|
||||
from rocprofiler_sdk.pytest_utils import collapse_dict_list
|
||||
from rocprofiler_sdk.pytest_utils.perfetto_reader import PerfettoReader
|
||||
from rocprofiler_sdk.pytest_utils.otf2_reader import OTF2Reader
|
||||
|
||||
|
||||
def pytest_addoption(parser):
|
||||
parser.addoption(
|
||||
"--json-input",
|
||||
action="store",
|
||||
default="rocdecode-tracing/out_results.json",
|
||||
help="Input JSON",
|
||||
)
|
||||
parser.addoption(
|
||||
"--otf2-input",
|
||||
action="store",
|
||||
default="rocdecode-tracing/out_results.otf2",
|
||||
help="Input OTF2",
|
||||
)
|
||||
parser.addoption(
|
||||
"--pftrace-input",
|
||||
action="store",
|
||||
default="rocdecode-tracing/out_results.pftrace",
|
||||
help="Input pftrace file",
|
||||
)
|
||||
parser.addoption(
|
||||
"--csv-input",
|
||||
action="store",
|
||||
default="rocdecode-tracing/out_rocdecode_api_trace.csv",
|
||||
help="Input CSV",
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def json_data(request):
|
||||
filename = request.config.getoption("--json-input")
|
||||
with open(filename, "r") as inp:
|
||||
return dotdict(collapse_dict_list(json.load(inp)))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def csv_data(request):
|
||||
filename = request.config.getoption("--csv-input")
|
||||
data = []
|
||||
with open(filename, "r") as inp:
|
||||
reader = csv.DictReader(inp)
|
||||
for row in reader:
|
||||
data.append(row)
|
||||
|
||||
return data
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def otf2_data(request):
|
||||
filename = request.config.getoption("--otf2-input")
|
||||
if not os.path.exists(filename):
|
||||
raise FileExistsError(f"{filename} does not exist")
|
||||
return OTF2Reader(filename).read()[0]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def pftrace_data(request):
|
||||
filename = request.config.getoption("--pftrace-input")
|
||||
return PerfettoReader(filename).read()[0]
|
||||
@@ -0,0 +1,5 @@
|
||||
|
||||
[pytest]
|
||||
addopts = --durations=20 -rA -s -vv
|
||||
testpaths = validate.py
|
||||
pythonpath = @ROCPROFILER_SDK_TESTS_BINARY_DIR@/pytest-packages
|
||||
@@ -0,0 +1,138 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
import pytest
|
||||
import json
|
||||
|
||||
from collections import defaultdict
|
||||
|
||||
|
||||
# helper function
|
||||
def node_exists(name, data, min_len=1):
|
||||
assert name in data
|
||||
assert data[name] is not None
|
||||
if isinstance(data[name], (list, tuple, dict, set)):
|
||||
assert len(data[name]) >= min_len
|
||||
|
||||
|
||||
def get_operation(record, kind_name, op_name=None):
|
||||
for idx, itr in enumerate(record["strings"]["buffer_records"]):
|
||||
if kind_name == itr["kind"]:
|
||||
if op_name is None:
|
||||
return idx, itr["operations"]
|
||||
else:
|
||||
for oidx, oname in enumerate(itr["operations"]):
|
||||
if op_name == oname:
|
||||
return oidx
|
||||
return None
|
||||
|
||||
|
||||
def test_rocdeocde(json_data):
|
||||
data = json_data["rocprofiler-sdk-tool"]
|
||||
buffer_records = data["buffer_records"]
|
||||
|
||||
rocdecode_data = buffer_records["rocdecode_api"]
|
||||
|
||||
_, bf_op_names = get_operation(data, "ROCDECODE_API")
|
||||
|
||||
assert len(bf_op_names) == 16
|
||||
|
||||
rocdecode_reported_agent_ids = set()
|
||||
# check buffering data
|
||||
for node in rocdecode_data:
|
||||
assert "size" in node
|
||||
assert "kind" in node
|
||||
assert "operation" in node
|
||||
assert "correlation_id" in node
|
||||
assert "end_timestamp" in node
|
||||
assert "start_timestamp" in node
|
||||
assert "thread_id" in node
|
||||
|
||||
assert node.size > 0
|
||||
assert node.thread_id > 0
|
||||
assert node.start_timestamp > 0
|
||||
assert node.end_timestamp > 0
|
||||
assert node.start_timestamp < node.end_timestamp
|
||||
|
||||
assert data.strings.buffer_records[node.kind].kind == "ROCDECODE_API"
|
||||
assert (
|
||||
data.strings.buffer_records[node.kind].operations[node.operation]
|
||||
in bf_op_names
|
||||
)
|
||||
|
||||
|
||||
def test_csv_data(csv_data):
|
||||
assert len(csv_data) > 0, "Expected non-empty csv data"
|
||||
|
||||
api_calls = []
|
||||
|
||||
for row in csv_data:
|
||||
assert "Domain" in row, "'Domain' was not present in csv data for rocdecode-trace"
|
||||
assert (
|
||||
"Function" in row
|
||||
), "'Function' was not present in csv data for rocdecode-trace"
|
||||
assert (
|
||||
"Process_Id" in row
|
||||
), "'Process_Id' was not present in csv data for rocdecode-trace"
|
||||
assert (
|
||||
"Thread_Id" in row
|
||||
), "'Thread_Id' was not present in csv data for rocdecode-trace"
|
||||
assert (
|
||||
"Correlation_Id" in row
|
||||
), "'Correlation_Id' was not present in csv data for rocdecode-trace"
|
||||
assert (
|
||||
"Start_Timestamp" in row
|
||||
), "'Start_Timestamp' was not present in csv data for rocdecode-trace"
|
||||
assert (
|
||||
"End_Timestamp" in row
|
||||
), "'End_Timestamp' was not present in csv data for rocdecode-trace"
|
||||
|
||||
api_calls.append(row["Function"])
|
||||
|
||||
assert row["Domain"] == "ROCDECODE_API"
|
||||
assert int(row["Process_Id"]) > 0
|
||||
assert int(row["Thread_Id"]) > 0
|
||||
assert int(row["Start_Timestamp"]) > 0
|
||||
assert int(row["End_Timestamp"]) > 0
|
||||
assert int(row["Start_Timestamp"]) < int(row["End_Timestamp"])
|
||||
|
||||
for call in [
|
||||
"rocDecCreateBitstreamReader",
|
||||
"rocDecGetBitstreamCodecType",
|
||||
"rocDecGetBitstreamBitDepth",
|
||||
"rocDecCreateVideoParser",
|
||||
"rocDecGetBitstreamPicData",
|
||||
"rocDecGetDecoderCaps",
|
||||
"rocDecCreateDecoder",
|
||||
"rocDecDecodeFrame",
|
||||
"rocDecParseVideoData",
|
||||
"rocDecGetVideoFrame",
|
||||
"rocDecGetDecodeStatus",
|
||||
"rocDecDestroyBitstreamReader",
|
||||
]:
|
||||
assert call in api_calls
|
||||
|
||||
|
||||
def test_perfetto_data(pftrace_data, json_data):
|
||||
import rocprofiler_sdk.tests.rocprofv3 as rocprofv3
|
||||
|
||||
rocprofv3.test_perfetto_data(
|
||||
pftrace_data,
|
||||
json_data,
|
||||
("hip", "hsa", "memory_allocation", "rocdecode_api"),
|
||||
)
|
||||
|
||||
|
||||
def test_otf2_data(otf2_data, json_data):
|
||||
import rocprofiler_sdk.tests.rocprofv3 as rocprofv3
|
||||
|
||||
rocprofv3.test_otf2_data(
|
||||
otf2_data,
|
||||
json_data,
|
||||
("hip", "hsa", "memory_allocation", "rocdecode_api"),
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit_code = pytest.main(["-x", __file__] + sys.argv[1:])
|
||||
sys.exit(exit_code)
|
||||
@@ -397,6 +397,23 @@ struct rccl_api_callback_record_t
|
||||
}
|
||||
};
|
||||
|
||||
struct rocdecode_api_callback_record_t
|
||||
{
|
||||
uint64_t timestamp = 0;
|
||||
rocprofiler_callback_tracing_record_t record = {};
|
||||
rocprofiler_callback_tracing_rocdecode_api_data_t payload = {};
|
||||
callback_arg_array_t args = {};
|
||||
|
||||
template <typename ArchiveT>
|
||||
void save(ArchiveT& ar) const
|
||||
{
|
||||
ar(cereal::make_nvp("timestamp", timestamp));
|
||||
cereal::save(ar, record);
|
||||
ar(cereal::make_nvp("payload", payload));
|
||||
serialize_args(ar, args);
|
||||
}
|
||||
};
|
||||
|
||||
struct ompt_callback_record_t
|
||||
{
|
||||
uint64_t timestamp = 0;
|
||||
@@ -555,6 +572,7 @@ auto kernel_dispatch_cb_records = std::deque<kernel_dispatch_callback_record_
|
||||
auto memory_copy_cb_records = std::deque<memory_copy_callback_record_t>{};
|
||||
auto memory_allocation_cb_records = std::deque<memory_allocation_callback_record_t>{};
|
||||
auto rccl_api_cb_records = std::deque<rccl_api_callback_record_t>{};
|
||||
auto rocdecode_api_cb_records = std::deque<rocdecode_api_callback_record_t>{};
|
||||
auto ompt_cb_records = std::deque<ompt_callback_record_t>{};
|
||||
|
||||
int
|
||||
@@ -824,6 +842,20 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record,
|
||||
runtime_init_cb_records.emplace_back(
|
||||
runtime_init_callback_record_t{ts, record, *data, std::move(args)});
|
||||
}
|
||||
else if(record.kind == ROCPROFILER_CALLBACK_TRACING_ROCDECODE_API)
|
||||
{
|
||||
auto* data =
|
||||
static_cast<rocprofiler_callback_tracing_rocdecode_api_data_t*>(record.payload);
|
||||
auto args = callback_arg_array_t{};
|
||||
if(record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT)
|
||||
rocprofiler_iterate_callback_tracing_kind_operation_args(
|
||||
record, save_args, record.phase, &args);
|
||||
|
||||
static auto _mutex = std::mutex{};
|
||||
auto _lk = std::unique_lock<std::mutex>{_mutex};
|
||||
rocdecode_api_cb_records.emplace_back(
|
||||
rocdecode_api_callback_record_t{ts, record, *data, std::move(args)});
|
||||
}
|
||||
else
|
||||
{
|
||||
throw std::runtime_error{"unsupported callback kind"};
|
||||
@@ -843,8 +875,9 @@ auto scratch_memory_records = std::deque<rocprofiler_buffer_tracing_scratch_memo
|
||||
auto page_migration_records = std::deque<rocprofiler_buffer_tracing_page_migration_record_t>{};
|
||||
auto corr_id_retire_records =
|
||||
std::deque<rocprofiler_buffer_tracing_correlation_id_retirement_record_t>{};
|
||||
auto rccl_api_bf_records = std::deque<rocprofiler_buffer_tracing_rccl_api_record_t>{};
|
||||
auto ompt_bf_records = std::deque<rocprofiler_buffer_tracing_ompt_record_t>{};
|
||||
auto rccl_api_bf_records = std::deque<rocprofiler_buffer_tracing_rccl_api_record_t>{};
|
||||
auto rocdecode_api_bf_records = std::deque<rocprofiler_buffer_tracing_rocdecode_api_record_t>{};
|
||||
auto ompt_bf_records = std::deque<rocprofiler_buffer_tracing_ompt_record_t>{};
|
||||
|
||||
void
|
||||
tool_tracing_buffered(rocprofiler_context_id_t /*context*/,
|
||||
@@ -971,6 +1004,13 @@ tool_tracing_buffered(rocprofiler_context_id_t /*context*/,
|
||||
|
||||
runtime_init_bf_records.emplace_back(*record);
|
||||
}
|
||||
else if(header->kind == ROCPROFILER_BUFFER_TRACING_ROCDECODE_API)
|
||||
{
|
||||
auto* record = static_cast<rocprofiler_buffer_tracing_rocdecode_api_record_t*>(
|
||||
header->payload);
|
||||
|
||||
rocdecode_api_bf_records.emplace_back(*record);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw std::runtime_error{
|
||||
@@ -1069,6 +1109,9 @@ rocprofiler_context_id_t kernel_dispatch_buffered_ctx = {0};
|
||||
rocprofiler_context_id_t page_migration_ctx = {0};
|
||||
rocprofiler_context_id_t runtime_init_callback_ctx = {};
|
||||
rocprofiler_context_id_t runtime_init_buffered_ctx = {};
|
||||
rocprofiler_context_id_t rocdecode_api_callback_ctx = {0};
|
||||
rocprofiler_context_id_t rocdecode_api_buffered_ctx = {0};
|
||||
|
||||
// buffers
|
||||
rocprofiler_buffer_id_t runtime_init_buffered_buffer = {};
|
||||
rocprofiler_buffer_id_t hsa_api_buffered_buffer = {};
|
||||
@@ -1082,6 +1125,7 @@ rocprofiler_buffer_id_t counter_collection_buffer = {};
|
||||
rocprofiler_buffer_id_t scratch_memory_buffer = {};
|
||||
rocprofiler_buffer_id_t corr_id_retire_buffer = {};
|
||||
rocprofiler_buffer_id_t rccl_api_buffered_buffer = {};
|
||||
rocprofiler_buffer_id_t rocdecode_api_buffer = {};
|
||||
rocprofiler_buffer_id_t ompt_buffered_buffer = {};
|
||||
|
||||
auto contexts = std::unordered_map<std::string_view, rocprofiler_context_id_t*>{
|
||||
@@ -1107,10 +1151,12 @@ auto contexts = std::unordered_map<std::string_view, rocprofiler_context_id_t*>{
|
||||
{"SCRATCH_MEMORY", &scratch_memory_ctx},
|
||||
{"CORRELATION_ID_RETIREMENT", &corr_id_retire_ctx},
|
||||
{"RCCL_API_BUFFERED", &rccl_api_buffered_ctx},
|
||||
{"ROCDECODE_API_CALLBACK", &rocdecode_api_callback_ctx},
|
||||
{"ROCDECODE_API_BUFFERED", &rocdecode_api_buffered_ctx},
|
||||
{"OMPT_BUFFERED", &ompt_buffered_ctx},
|
||||
};
|
||||
|
||||
auto buffers = std::array<rocprofiler_buffer_id_t*, 13>{&runtime_init_buffered_buffer,
|
||||
auto buffers = std::array<rocprofiler_buffer_id_t*, 14>{&runtime_init_buffered_buffer,
|
||||
&hsa_api_buffered_buffer,
|
||||
&hip_api_buffered_buffer,
|
||||
&marker_api_buffered_buffer,
|
||||
@@ -1122,7 +1168,8 @@ auto buffers = std::array<rocprofiler_buffer_id_t*, 13>{&runtime_init_buffered_b
|
||||
&counter_collection_buffer,
|
||||
&corr_id_retire_buffer,
|
||||
&rccl_api_buffered_buffer,
|
||||
&ompt_buffered_buffer};
|
||||
&ompt_buffered_buffer,
|
||||
&rocdecode_api_buffer};
|
||||
|
||||
auto agents = std::vector<rocprofiler_agent_t>{};
|
||||
auto agents_map = std::unordered_map<rocprofiler_agent_id_t, rocprofiler_agent_t>{};
|
||||
@@ -1288,6 +1335,15 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
|
||||
nullptr),
|
||||
"rccl api callback tracing service configure");
|
||||
|
||||
ROCPROFILER_CALL(
|
||||
rocprofiler_configure_callback_tracing_service(rocdecode_api_callback_ctx,
|
||||
ROCPROFILER_CALLBACK_TRACING_ROCDECODE_API,
|
||||
nullptr,
|
||||
0,
|
||||
tool_tracing_callback,
|
||||
nullptr),
|
||||
"rocdecode api callback tracing service configure");
|
||||
|
||||
ROCPROFILER_CALL(
|
||||
rocprofiler_configure_callback_tracing_service(ompt_callback_ctx,
|
||||
ROCPROFILER_CALLBACK_TRACING_OMPT,
|
||||
@@ -1408,6 +1464,15 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
|
||||
&rccl_api_buffered_buffer),
|
||||
"buffer creation");
|
||||
|
||||
ROCPROFILER_CALL(rocprofiler_create_buffer(rocdecode_api_buffered_ctx,
|
||||
buffer_size,
|
||||
watermark,
|
||||
ROCPROFILER_BUFFER_POLICY_LOSSLESS,
|
||||
tool_tracing_buffered,
|
||||
tool_data,
|
||||
&rocdecode_api_buffer),
|
||||
"buffer creation");
|
||||
|
||||
ROCPROFILER_CALL(rocprofiler_create_buffer(ompt_buffered_ctx,
|
||||
buffer_size,
|
||||
watermark,
|
||||
@@ -1532,6 +1597,14 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
|
||||
rccl_api_buffered_buffer),
|
||||
"buffer tracing service for rccl api configure");
|
||||
|
||||
ROCPROFILER_CALL(
|
||||
rocprofiler_configure_buffer_tracing_service(rocdecode_api_buffered_ctx,
|
||||
ROCPROFILER_BUFFER_TRACING_ROCDECODE_API,
|
||||
nullptr,
|
||||
0,
|
||||
rocdecode_api_buffer),
|
||||
"buffer tracing service for rocdecode api configure");
|
||||
|
||||
ROCPROFILER_CALL(
|
||||
rocprofiler_configure_buffer_tracing_service(
|
||||
ompt_buffered_ctx, ROCPROFILER_BUFFER_TRACING_OMPT, nullptr, 0, ompt_buffered_buffer),
|
||||
@@ -1701,7 +1774,8 @@ tool_fini(void* tool_data)
|
||||
<< ", rccl_api_bf_records=" << rccl_api_bf_records.size()
|
||||
<< ", ompt_bf_records=" << ompt_bf_records.size()
|
||||
<< ", counter_collection_value_records=" << counter_collection_bf_records.size()
|
||||
<< "...\n"
|
||||
<< ", rocdecode_api_callback_records=" << rocdecode_api_cb_records.size()
|
||||
<< ", rocdecode_api_bf_records=" << rocdecode_api_bf_records.size() << "...\n"
|
||||
<< std::flush;
|
||||
|
||||
auto* _call_stack = static_cast<call_stack_t*>(tool_data);
|
||||
@@ -1797,6 +1871,7 @@ write_json(call_stack_t* _call_stack)
|
||||
json_ar(cereal::make_nvp("kernel_dispatch", kernel_dispatch_cb_records));
|
||||
json_ar(cereal::make_nvp("memory_copies", memory_copy_cb_records));
|
||||
json_ar(cereal::make_nvp("memory_allocations", memory_allocation_cb_records));
|
||||
json_ar(cereal::make_nvp("rocdecode_api_traces", rocdecode_api_cb_records));
|
||||
} catch(std::exception& e)
|
||||
{
|
||||
std::cerr << "[" << getpid() << "][" << __FUNCTION__
|
||||
@@ -1823,6 +1898,7 @@ write_json(call_stack_t* _call_stack)
|
||||
json_ar(cereal::make_nvp("ompt_traces", ompt_bf_records));
|
||||
json_ar(cereal::make_nvp("retired_correlation_ids", corr_id_retire_records));
|
||||
json_ar(cereal::make_nvp("counter_collection", counter_collection_bf_records));
|
||||
json_ar(cereal::make_nvp("rocdecode_api_traces", rocdecode_api_bf_records));
|
||||
} catch(std::exception& e)
|
||||
{
|
||||
std::cerr << "[" << getpid() << "][" << __FUNCTION__
|
||||
@@ -1894,6 +1970,8 @@ write_perfetto()
|
||||
tids.emplace(itr.thread_id);
|
||||
for(auto itr : ompt_bf_records)
|
||||
tids.emplace(itr.thread_id);
|
||||
for(auto itr : rocdecode_api_bf_records)
|
||||
tids.emplace(itr.thread_id);
|
||||
|
||||
for(auto itr : memory_copy_bf_records)
|
||||
{
|
||||
@@ -2147,6 +2225,47 @@ write_perfetto()
|
||||
itr.end_timestamp);
|
||||
}
|
||||
|
||||
for(auto itr : rocdecode_api_bf_records)
|
||||
{
|
||||
auto name = buffer_names.at(itr.kind, itr.operation);
|
||||
auto& track = thread_tracks.at(itr.thread_id);
|
||||
|
||||
auto _args = callback_arg_array_t{};
|
||||
auto ritr = std::find_if(
|
||||
rocdecode_api_cb_records.begin(),
|
||||
rocdecode_api_cb_records.end(),
|
||||
[&itr](const auto& citr) {
|
||||
return (citr.record.correlation_id.internal == itr.correlation_id.internal &&
|
||||
!citr.args.empty());
|
||||
});
|
||||
if(ritr != rocdecode_api_cb_records.end()) _args = ritr->args;
|
||||
|
||||
TRACE_EVENT_BEGIN(sdk::perfetto_category<sdk::category::rocdecode_api>::name,
|
||||
::perfetto::StaticString(name.data()),
|
||||
track,
|
||||
itr.start_timestamp,
|
||||
::perfetto::Flow::ProcessScoped(itr.correlation_id.internal),
|
||||
"begin_ns",
|
||||
itr.start_timestamp,
|
||||
"tid",
|
||||
itr.thread_id,
|
||||
"kind",
|
||||
itr.kind,
|
||||
"operation",
|
||||
itr.operation,
|
||||
"corr_id",
|
||||
itr.correlation_id.internal,
|
||||
[&](::perfetto::EventContext ctx) {
|
||||
for(const auto& aitr : _args)
|
||||
sdk::add_perfetto_annotation(ctx, aitr.first, aitr.second);
|
||||
});
|
||||
TRACE_EVENT_END(sdk::perfetto_category<sdk::category::rocdecode_api>::name,
|
||||
track,
|
||||
itr.end_timestamp,
|
||||
"end_ns",
|
||||
itr.end_timestamp);
|
||||
}
|
||||
|
||||
for(auto itr : ompt_bf_records)
|
||||
{
|
||||
auto name = buffer_names.at(itr.kind, itr.operation);
|
||||
|
||||
Referens i nytt ärende
Block a user