HIP Streams to Queues Translation (#235)
* rocprofiler_stream_id_t: opaque handle for a stream
- e.g. HIP stream
- the same HIP stream may map to different HSA queues at different points in the application
- added to:
- rocprofiler_buffer_tracing_hip_api_record_t
- rocprofiler_buffer_tracing_memory_copy_record_t
- rocprofiler_callback_tracing_hip_api_data_t
- rocprofiler_callback_tracing_memory_copy_data_t
---------
Co-authored-by: Jonathan R. Madsen <jonathanrmadsen@gmail.com>
Co-authored-by: Mark Meserve <mark.meserve@amd.com>
Co-authored-by: Elwazir, Ammar <Ammar.Elwazir@amd.com>
Co-authored-by: Ammar ELWazir <aelwazir@amd.com>
Co-authored-by: Jakaraddi, Manjunath <Manjunath.Jakaraddi@amd.com>
Co-authored-by: Bhardwaj, Gopesh <Gopesh.Bhardwaj@amd.com>
Co-authored-by: Nagaraj, Sriraksha <Sriraksha.Nagaraj@amd.com>
Co-authored-by: U, Srihari <Srihari.U@amd.com>
Co-authored-by: Madsen, Jonathan <Jonathan.Madsen@amd.com>
Co-authored-by: Welton, Benjamin <Benjamin.Welton@amd.com>
Co-authored-by: Benjamin Welton <ben@amd.com>
Co-authored-by: Indic, Vladimir <Vladimir.Indic@amd.com>
Co-authored-by: Benjamin Welton <bewelton@amd.com>
[ROCm/rocprofiler-sdk commit: ccd1e54293]
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
c08db2daa1
Коммит
7aeaffd871
@@ -542,6 +542,12 @@ For MPI applications (or other job launchers such as SLURM), place rocprofv3 ins
|
||||
help="List available PC sampling configurations and metrics for counter collection. Backed by a valid YAML file. In earlier rocprof versions, this was known as --list-basic, --list-derived and --list-counters",
|
||||
)
|
||||
|
||||
add_parser_bool_argument(
|
||||
display_options,
|
||||
"--group-by-queue",
|
||||
help="For displaying the HIP streams that kernels and memory copy operations are submitted to rather than HSA queues.",
|
||||
)
|
||||
|
||||
advanced_options = parser.add_argument_group("Advanced options")
|
||||
|
||||
advanced_options.add_argument(
|
||||
@@ -1043,6 +1049,7 @@ def run(app_args, args, **kwargs):
|
||||
["memory_copy_trace", "MEMORY_COPY_TRACE"],
|
||||
["memory_allocation_trace", "MEMORY_ALLOCATION_TRACE"],
|
||||
["scratch_memory_trace", "SCRATCH_MEMORY_TRACE"],
|
||||
["group_by_queue", "GROUP_BY_QUEUE"],
|
||||
]
|
||||
).items():
|
||||
val = getattr(args, f"{opt}")
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
"Kind","Agent_Id","Queue_Id","Thread_Id","Dispatch_Id","Kernel_Id","Kernel_Name","Correlation_Id","Start_Timestamp","End_Timestamp","Private_Segment_Size","Group_Segment_Size","Workgroup_Size_X","Workgroup_Size_Y","Workgroup_Size_Z","Grid_Size_X","Grid_Size_Y","Grid_Size_Z"
|
||||
"KERNEL_DISPATCH",1,1,69,1,16,"void addition_kernel<float>(float*, float const*, float const*, int, int)",1451,8819330200067564,8819330200116308,0,0,64,1,1,1024,1024,1
|
||||
"KERNEL_DISPATCH",1,2,69,5,16,"void addition_kernel<float>(float*, float const*, float const*, int, int)",1484,8819330200118678,8819330200219573,0,0,64,1,1,1024,1024,1
|
||||
"KERNEL_DISPATCH",1,1,69,2,19,"subtract_kernel(float*, float const*, float const*, int, int)",1459,8819330200120456,8819330200223721,0,0,64,1,1,1024,1024,1
|
||||
"KERNEL_DISPATCH",1,3,69,9,16,"void addition_kernel<float>(float*, float const*, float const*, int, int)",1517,8819330200152902,8819330200283428,0,0,64,1,1,1024,1024,1
|
||||
"KERNEL_DISPATCH",1,4,69,13,16,"void addition_kernel<float>(float*, float const*, float const*, int, int)",1550,8819330200187127,8819330200320468,0,0,64,1,1,1024,1024,1
|
||||
"KERNEL_DISPATCH",1,2,69,6,19,"subtract_kernel(float*, float const*, float const*, int, int)",1492,8819330200225499,8819330200364618,0,0,64,1,1,1024,1024,1
|
||||
"KERNEL_DISPATCH",1,1,69,3,18,"multiply_kernel(float*, float const*, float const*, int, int)",1467,8819330200229796,8819330200369359,0,0,64,1,1,1024,1024,1
|
||||
|
||||
"Kind","Agent_Id","Queue_Id","Stream_Id","Thread_Id","Dispatch_Id","Kernel_Id","Kernel_Name","Correlation_Id","Start_Timestamp","End_Timestamp","Private_Segment_Size","Group_Segment_Size","Workgroup_Size_X","Workgroup_Size_Y","Workgroup_Size_Z","Grid_Size_X","Grid_Size_Y","Grid_Size_Z"
|
||||
"KERNEL_DISPATCH",2,1,1,21228,1,11,"void addition_kernel<float>(float*, float const*, float const*, int, int)",1937,2414192765353337,2414192765369494,0,0,64,1,1,1024,1024,1
|
||||
"KERNEL_DISPATCH",2,1,1,21228,2,14,"subtract_kernel(float*, float const*, float const*, int, int)",1945,2414192765424862,2414192765435326,0,0,64,1,1,1024,1024,1
|
||||
"KERNEL_DISPATCH",2,1,1,21228,3,13,"multiply_kernel(float*, float const*, float const*, int, int)",1953,2414192765487486,2414192765497669,0,0,64,1,1,1024,1024,1
|
||||
"KERNEL_DISPATCH",2,1,1,21228,4,12,"divide_kernel(float*, float const*, float const*, int, int)",1961,2414192765545619,2414192765555722,0,0,64,1,1,1024,1024,1
|
||||
"KERNEL_DISPATCH",2,2,2,21228,5,11,"void addition_kernel<float>(float*, float const*, float const*, int, int)",1969,2414192765608844,2414192765621674,0,0,64,1,1,1024,1024,1
|
||||
"KERNEL_DISPATCH",2,2,2,21228,6,14,"subtract_kernel(float*, float const*, float const*, int, int)",1977,2414192765658519,2414192765669424,0,0,64,1,1,1024,1024,1
|
||||
"KERNEL_DISPATCH",2,2,2,21228,7,13,"multiply_kernel(float*, float const*, float const*, int, int)",1985,2414192765715650,2414192765726795,0,0,64,1,1,1024,1024,1
|
||||
"KERNEL_DISPATCH",2,2,2,21228,8,12,"divide_kernel(float*, float const*, float const*, int, int)",1993,2414192765773422,2414192765784969,0,0,64,1,1,1024,1024,1
|
||||
|
||||
|
@@ -1,3 +1,5 @@
|
||||
"Kind","Direction","Source_Agent_Id","Destination_Agent_Id","Correlation_Id","Start_Timestamp","End_Timestamp"
|
||||
"MEMORY_COPY","MEMORY_COPY_HOST_TO_DEVICE",0,1,0,14955949675563,14955950239443
|
||||
"MEMORY_COPY","MEMORY_COPY_DEVICE_TO_HOST",1,0,0,14955952733485,14955953315285
|
||||
"Kind","Direction","Stream_Id","Source_Agent_Id","Destination_Agent_Id","Correlation_Id","Start_Timestamp","End_Timestamp"
|
||||
"MEMORY_COPY","MEMORY_COPY_HOST_TO_DEVICE",1,0,2,952,2414192684609085,2414192684710679
|
||||
"MEMORY_COPY","MEMORY_COPY_HOST_TO_DEVICE",1,0,2,960,2414192684873841,2414192684973470
|
||||
"MEMORY_COPY","MEMORY_COPY_HOST_TO_DEVICE",2,0,2,1066,2414192706436949,2414192706538622
|
||||
"MEMORY_COPY","MEMORY_COPY_HOST_TO_DEVICE",2,0,2,1074,2414192706592442,2414192706692312
|
||||
|
||||
|
@@ -354,7 +354,7 @@ Here are the contents of ``kernel_trace.csv`` file:
|
||||
|
||||
.. csv-table:: Kernel trace
|
||||
:file: /data/kernel_trace.csv
|
||||
:widths: 10,10,10,10,10,10,10,10,20,20,10,10,10,10,10,10,10,10
|
||||
:widths: 10,10,10,10,10,10,10,10,10,20,20,10,10,10,10,10,10,10,10
|
||||
:header-rows: 1
|
||||
|
||||
For the description of the fields in the output file, see :ref:`output-file-fields`.
|
||||
@@ -378,7 +378,7 @@ Here are the contents of ``memory_copy_trace.csv`` file:
|
||||
|
||||
.. csv-table:: Memory copy trace
|
||||
:file: /data/memory_copy_trace.csv
|
||||
:widths: 10,10,10,10,10,20,20
|
||||
:widths: 10,10,10,10,10,10,20,20
|
||||
:header-rows: 1
|
||||
|
||||
For the description of the fields in the output file, see :ref:`output-file-fields`.
|
||||
@@ -1086,6 +1086,9 @@ The following table lists the various fields or the columns in the output CSV fi
|
||||
* - Queue_Id
|
||||
- ROCm queue unique identifier to which the kernel was submitted.
|
||||
|
||||
* - Stream_Id
|
||||
- Identifies HIP stream ID to which kernel or memory copy operation was submitted. Defaults to 0 if the hip-stream-display option is not enabled
|
||||
|
||||
* - Private_Segment_Size
|
||||
- The amount of memory required in bytes for the combined private, spill, and arg segments for a work item.
|
||||
|
||||
|
||||
@@ -78,7 +78,12 @@ typedef struct
|
||||
{
|
||||
uint64_t size; ///< size of this struct
|
||||
rocprofiler_hip_api_args_t args;
|
||||
rocprofiler_hip_api_retval_t retval;
|
||||
rocprofiler_hip_api_retval_t retval; ///< return value of function call
|
||||
|
||||
/// @var args
|
||||
/// @brief Arguments of the function call. @see
|
||||
/// ::rocprofiler_iterate_callback_tracing_kind_operation_args for generic
|
||||
/// access/stringification of the arguments.
|
||||
} rocprofiler_callback_tracing_hip_api_data_t;
|
||||
|
||||
/**
|
||||
@@ -329,6 +334,15 @@ typedef struct rocprofiler_callback_tracing_runtime_initialization_data_t
|
||||
/// Version number is encoded as: (10000 * MAJOR) + (100 * MINOR) + PATCH
|
||||
} rocprofiler_callback_tracing_runtime_initialization_data_t;
|
||||
|
||||
/**
|
||||
* @brief ROCProfiler Stream Handle Callback Data.
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
uint64_t size; ///< size of this struct
|
||||
rocprofiler_stream_id_t stream_id; ///< HIP stream ID
|
||||
} rocprofiler_callback_tracing_stream_handle_data_t;
|
||||
|
||||
/**
|
||||
* @brief API Tracing callback function. This function is invoked twice per API function: once
|
||||
* before the function is invoked and once after the function is invoked. The external correlation
|
||||
|
||||
@@ -60,6 +60,7 @@ ROCPROFILER_CXX_SPECIALIZE_HANDLE_HASHER(rocprofiler_context_id_t)
|
||||
ROCPROFILER_CXX_SPECIALIZE_HANDLE_HASHER(rocprofiler_agent_id_t)
|
||||
ROCPROFILER_CXX_SPECIALIZE_HANDLE_HASHER(rocprofiler_address_t)
|
||||
ROCPROFILER_CXX_SPECIALIZE_HANDLE_HASHER(rocprofiler_queue_id_t)
|
||||
ROCPROFILER_CXX_SPECIALIZE_HANDLE_HASHER(rocprofiler_stream_id_t)
|
||||
ROCPROFILER_CXX_SPECIALIZE_HANDLE_HASHER(rocprofiler_buffer_id_t)
|
||||
ROCPROFILER_CXX_SPECIALIZE_HANDLE_HASHER(rocprofiler_counter_id_t)
|
||||
ROCPROFILER_CXX_SPECIALIZE_HANDLE_HASHER(rocprofiler_profile_config_id_t)
|
||||
|
||||
@@ -97,6 +97,7 @@ ROCPROFILER_CXX_DECLARE_OPERATORS(rocprofiler_context_id_t)
|
||||
ROCPROFILER_CXX_DECLARE_OPERATORS(rocprofiler_address_t)
|
||||
ROCPROFILER_CXX_DECLARE_OPERATORS(rocprofiler_agent_id_t)
|
||||
ROCPROFILER_CXX_DECLARE_OPERATORS(rocprofiler_queue_id_t)
|
||||
ROCPROFILER_CXX_DECLARE_OPERATORS(rocprofiler_stream_id_t)
|
||||
ROCPROFILER_CXX_DECLARE_OPERATORS(rocprofiler_buffer_id_t)
|
||||
ROCPROFILER_CXX_DECLARE_OPERATORS(rocprofiler_counter_id_t)
|
||||
ROCPROFILER_CXX_DECLARE_OPERATORS(rocprofiler_profile_config_id_t)
|
||||
@@ -114,6 +115,7 @@ ROCPROFILER_CXX_DEFINE_EQ_HANDLE_OPERATOR(rocprofiler_context_id_t)
|
||||
ROCPROFILER_CXX_DEFINE_EQ_HANDLE_OPERATOR(rocprofiler_address_t)
|
||||
ROCPROFILER_CXX_DEFINE_EQ_HANDLE_OPERATOR(rocprofiler_agent_id_t)
|
||||
ROCPROFILER_CXX_DEFINE_EQ_HANDLE_OPERATOR(rocprofiler_queue_id_t)
|
||||
ROCPROFILER_CXX_DEFINE_EQ_HANDLE_OPERATOR(rocprofiler_stream_id_t)
|
||||
ROCPROFILER_CXX_DEFINE_EQ_HANDLE_OPERATOR(rocprofiler_buffer_id_t)
|
||||
ROCPROFILER_CXX_DEFINE_EQ_HANDLE_OPERATOR(rocprofiler_counter_id_t)
|
||||
ROCPROFILER_CXX_DEFINE_EQ_HANDLE_OPERATOR(rocprofiler_profile_config_id_t)
|
||||
@@ -141,6 +143,7 @@ ROCPROFILER_CXX_DEFINE_NE_OPERATOR(rocprofiler_context_id_t)
|
||||
ROCPROFILER_CXX_DEFINE_NE_OPERATOR(rocprofiler_address_t)
|
||||
ROCPROFILER_CXX_DEFINE_NE_OPERATOR(rocprofiler_agent_id_t)
|
||||
ROCPROFILER_CXX_DEFINE_NE_OPERATOR(rocprofiler_queue_id_t)
|
||||
ROCPROFILER_CXX_DEFINE_NE_OPERATOR(rocprofiler_stream_id_t)
|
||||
ROCPROFILER_CXX_DEFINE_NE_OPERATOR(rocprofiler_buffer_id_t)
|
||||
ROCPROFILER_CXX_DEFINE_NE_OPERATOR(rocprofiler_counter_id_t)
|
||||
ROCPROFILER_CXX_DEFINE_NE_OPERATOR(rocprofiler_profile_config_id_t)
|
||||
@@ -158,6 +161,7 @@ ROCPROFILER_CXX_DEFINE_LT_HANDLE_OPERATOR(rocprofiler_context_id_t)
|
||||
ROCPROFILER_CXX_DEFINE_LT_HANDLE_OPERATOR(rocprofiler_address_t)
|
||||
ROCPROFILER_CXX_DEFINE_LT_HANDLE_OPERATOR(rocprofiler_agent_id_t)
|
||||
ROCPROFILER_CXX_DEFINE_LT_HANDLE_OPERATOR(rocprofiler_queue_id_t)
|
||||
ROCPROFILER_CXX_DEFINE_LT_HANDLE_OPERATOR(rocprofiler_stream_id_t)
|
||||
ROCPROFILER_CXX_DEFINE_LT_HANDLE_OPERATOR(rocprofiler_buffer_id_t)
|
||||
ROCPROFILER_CXX_DEFINE_LT_HANDLE_OPERATOR(rocprofiler_counter_id_t)
|
||||
ROCPROFILER_CXX_DEFINE_LT_HANDLE_OPERATOR(rocprofiler_profile_config_id_t)
|
||||
@@ -190,6 +194,7 @@ ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(rocprofiler_context_id_t)
|
||||
ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(rocprofiler_address_t)
|
||||
ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(rocprofiler_agent_id_t)
|
||||
ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(rocprofiler_queue_id_t)
|
||||
ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(rocprofiler_stream_id_t)
|
||||
ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(rocprofiler_buffer_id_t)
|
||||
ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(rocprofiler_counter_id_t)
|
||||
ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(rocprofiler_profile_config_id_t)
|
||||
|
||||
@@ -114,6 +114,13 @@ save(ArchiveT& ar, rocprofiler_queue_id_t data)
|
||||
ROCP_SDK_SAVE_DATA_FIELD(handle);
|
||||
}
|
||||
|
||||
template <typename ArchiveT>
|
||||
void
|
||||
save(ArchiveT& ar, rocprofiler_stream_id_t data)
|
||||
{
|
||||
ROCP_SDK_SAVE_DATA_FIELD(handle);
|
||||
}
|
||||
|
||||
template <typename ArchiveT>
|
||||
void
|
||||
save(ArchiveT& ar, rocprofiler_counter_id_t data)
|
||||
|
||||
@@ -178,6 +178,8 @@ typedef enum // NOLINT(performance-enum-size)
|
||||
///< library has been initialized
|
||||
ROCPROFILER_CALLBACK_TRACING_ROCDECODE_API, ///< rocDecode API Tracing
|
||||
ROCPROFILER_CALLBACK_TRACING_ROCJPEG_API, ///< rocJPEG API Tracing
|
||||
ROCPROFILER_CALLBACK_TRACING_HIP_STREAM_API, ///< @see
|
||||
///< ::rocprofiler_hip_stream_operation_t
|
||||
ROCPROFILER_CALLBACK_TRACING_LAST,
|
||||
} rocprofiler_callback_tracing_kind_t;
|
||||
|
||||
@@ -209,8 +211,9 @@ typedef enum // NOLINT(performance-enum-size)
|
||||
ROCPROFILER_BUFFER_TRACING_RUNTIME_INITIALIZATION, ///< Record indicating a runtime library has
|
||||
///< been initialized. @see
|
||||
///< ::rocprofiler_runtime_initialization_operation_t
|
||||
ROCPROFILER_BUFFER_TRACING_ROCDECODE_API, ///< rocDecode tracing
|
||||
ROCPROFILER_BUFFER_TRACING_ROCJPEG_API, ///< rocJPEG tracing
|
||||
ROCPROFILER_BUFFER_TRACING_ROCDECODE_API, ///< rocDecode tracing
|
||||
ROCPROFILER_BUFFER_TRACING_ROCJPEG_API, ///< rocJPEG tracing
|
||||
ROCPROFILER_BUFFER_TRACING_HIP_STREAM_API, ///< Display HIP Stream
|
||||
ROCPROFILER_BUFFER_TRACING_LAST,
|
||||
} rocprofiler_buffer_tracing_kind_t;
|
||||
|
||||
@@ -226,6 +229,23 @@ typedef enum // NOLINT(performance-enum-size)
|
||||
ROCPROFILER_CODE_OBJECT_LAST,
|
||||
} rocprofiler_code_object_operation_t;
|
||||
|
||||
/**
|
||||
* @brief ROCProfiler Stream Handle Operations.
|
||||
*/
|
||||
typedef enum // NOLINT(performance-enum-size)
|
||||
{
|
||||
ROCPROFILER_HIP_STREAM_NONE = 0, ///< Unknown stream handle operation
|
||||
ROCPROFILER_HIP_STREAM_CREATE, ///< A stream handle is created
|
||||
ROCPROFILER_HIP_STREAM_DESTROY, ///< A stream handle is destroyed
|
||||
ROCPROFILER_HIP_STREAM_SET,
|
||||
ROCPROFILER_HIP_STREAM_LAST,
|
||||
|
||||
/// @var ROCPROFILER_HIP_STREAM_SET
|
||||
/// @brief Invokes callbacks before and after a HIP API, kernel dispatch, or memory copy
|
||||
/// operation that has a stream handle associated with it. HIP API calls will always have a
|
||||
/// stream, but kernel dispatches and memory copy operations may or may not.
|
||||
} rocprofiler_hip_stream_operation_t;
|
||||
|
||||
/**
|
||||
* @brief Memory Copy Operations.
|
||||
*/
|
||||
@@ -556,7 +576,7 @@ typedef union rocprofiler_uuid_t
|
||||
/**
|
||||
* @brief Context ID.
|
||||
*/
|
||||
typedef struct
|
||||
typedef struct rocprofiler_context_id_t
|
||||
{
|
||||
uint64_t handle;
|
||||
} rocprofiler_context_id_t;
|
||||
@@ -564,15 +584,23 @@ typedef struct
|
||||
/**
|
||||
* @brief Queue ID.
|
||||
*/
|
||||
typedef struct
|
||||
typedef struct rocprofiler_queue_id_t
|
||||
{
|
||||
uint64_t handle;
|
||||
} rocprofiler_queue_id_t;
|
||||
|
||||
/**
|
||||
* @brief Stream ID.
|
||||
*/
|
||||
typedef struct rocprofiler_stream_id_t
|
||||
{
|
||||
uint64_t handle;
|
||||
} rocprofiler_stream_id_t;
|
||||
|
||||
/**
|
||||
* @brief ROCProfiler Record Correlation ID.
|
||||
*/
|
||||
typedef struct
|
||||
typedef struct rocprofiler_correlation_id_t
|
||||
{
|
||||
uint64_t internal;
|
||||
rocprofiler_user_data_t external;
|
||||
@@ -587,7 +615,7 @@ typedef struct
|
||||
* @struct rocprofiler_buffer_id_t
|
||||
* @brief Buffer ID.
|
||||
*/
|
||||
typedef struct
|
||||
typedef struct rocprofiler_buffer_id_t
|
||||
{
|
||||
uint64_t handle;
|
||||
} rocprofiler_buffer_id_t;
|
||||
@@ -595,7 +623,7 @@ typedef struct
|
||||
/**
|
||||
* @brief Agent Identifier
|
||||
*/
|
||||
typedef struct
|
||||
typedef struct rocprofiler_agent_id_t
|
||||
{
|
||||
uint64_t handle;
|
||||
} rocprofiler_agent_id_t;
|
||||
@@ -603,7 +631,7 @@ typedef struct
|
||||
/**
|
||||
* @brief Counter ID.
|
||||
*/
|
||||
typedef struct
|
||||
typedef struct rocprofiler_counter_id_t
|
||||
{
|
||||
uint64_t handle;
|
||||
} rocprofiler_counter_id_t;
|
||||
@@ -612,7 +640,7 @@ typedef struct
|
||||
* @brief Profile Configurations
|
||||
* @see rocprofiler_create_profile_config for how to create.
|
||||
*/
|
||||
typedef struct
|
||||
typedef struct rocprofiler_profile_config_id_t
|
||||
{
|
||||
uint64_t handle; // Opaque handle
|
||||
} rocprofiler_profile_config_id_t;
|
||||
|
||||
@@ -52,3 +52,5 @@ target_link_libraries(
|
||||
|
||||
set_target_properties(rocprofiler-sdk-common-library PROPERTIES OUTPUT_NAME
|
||||
rocprofiler-sdk-common)
|
||||
|
||||
add_subdirectory(details)
|
||||
|
||||
@@ -0,0 +1,8 @@
|
||||
#
|
||||
# add details sources and headers to common library target
|
||||
#
|
||||
set(details_headers mpl.hpp)
|
||||
set(details_sources)
|
||||
|
||||
target_sources(rocprofiler-sdk-common-library PRIVATE ${details_sources}
|
||||
${details_headers})
|
||||
@@ -0,0 +1,211 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <functional>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <tuple>
|
||||
#include <type_traits>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace common
|
||||
{
|
||||
namespace mpl
|
||||
{
|
||||
namespace impl
|
||||
{
|
||||
template <typename... Tp>
|
||||
struct type_list
|
||||
{
|
||||
static constexpr auto size() { return sizeof...(Tp); }
|
||||
};
|
||||
|
||||
template <typename InTuple, typename OutTuple>
|
||||
struct reverse;
|
||||
|
||||
template <template <typename...> class InTuple,
|
||||
typename InT,
|
||||
typename... InTail,
|
||||
template <typename...>
|
||||
class OutTuple,
|
||||
typename... OutTail>
|
||||
struct reverse<InTuple<InT, InTail...>, OutTuple<OutTail...>>
|
||||
: reverse<InTuple<InTail...>, OutTuple<InT, OutTail...>>
|
||||
{};
|
||||
|
||||
template <template <typename...> class InTuple,
|
||||
template <typename...>
|
||||
class OutTuple,
|
||||
typename... OutTail>
|
||||
struct reverse<InTuple<>, OutTuple<OutTail...>>
|
||||
{
|
||||
using type = OutTuple<OutTail...>;
|
||||
};
|
||||
|
||||
template <template <typename...> class InTuple, typename... InTail>
|
||||
struct reverse<InTuple<InTail...>, void> : reverse<InTuple<InTail...>, InTuple<>>
|
||||
{};
|
||||
|
||||
template <typename T>
|
||||
struct function_traits;
|
||||
|
||||
template <typename T>
|
||||
struct function_traits<T&> : function_traits<T>
|
||||
{};
|
||||
|
||||
template <typename R, typename... Args>
|
||||
struct function_traits<std::function<R(Args...)>>
|
||||
{
|
||||
static constexpr bool is_memfun = false;
|
||||
static constexpr bool is_const = false;
|
||||
static constexpr size_t nargs = sizeof...(Args);
|
||||
using result_type = R;
|
||||
using args_type = type_list<Args...>;
|
||||
using call_type = args_type;
|
||||
};
|
||||
|
||||
template <typename R, typename... Args>
|
||||
struct function_traits<R (*)(Args...)>
|
||||
{
|
||||
static constexpr bool is_memfun = false;
|
||||
static constexpr bool is_const = false;
|
||||
static constexpr size_t nargs = sizeof...(Args);
|
||||
using result_type = R;
|
||||
using args_type = type_list<Args...>;
|
||||
using call_type = args_type;
|
||||
};
|
||||
|
||||
template <typename R, typename... Args>
|
||||
struct function_traits<R(Args...)>
|
||||
{
|
||||
static constexpr bool is_memfun = false;
|
||||
static constexpr bool is_const = false;
|
||||
static constexpr size_t nargs = sizeof...(Args);
|
||||
using result_type = R;
|
||||
using args_type = type_list<Args...>;
|
||||
using call_type = args_type;
|
||||
};
|
||||
|
||||
// member function pointer
|
||||
template <typename C, typename R, typename... Args>
|
||||
struct function_traits<R (C::*)(Args...)>
|
||||
{
|
||||
static constexpr bool is_memfun = true;
|
||||
static constexpr bool is_const = false;
|
||||
static constexpr size_t nargs = sizeof...(Args);
|
||||
using result_type = R;
|
||||
using args_type = type_list<Args...>;
|
||||
using call_type = type_list<C&, Args...>;
|
||||
};
|
||||
|
||||
// const member function pointer
|
||||
template <typename C, typename R, typename... Args>
|
||||
struct function_traits<R (C::*)(Args...) const>
|
||||
{
|
||||
static constexpr bool is_memfun = true;
|
||||
static constexpr bool is_const = true;
|
||||
static constexpr size_t nargs = sizeof...(Args);
|
||||
using result_type = R;
|
||||
using args_type = type_list<Args...>;
|
||||
using call_type = type_list<C&, Args...>;
|
||||
};
|
||||
|
||||
// member object pointer
|
||||
template <typename C, typename R>
|
||||
struct function_traits<R(C::*)>
|
||||
{
|
||||
static constexpr bool is_memfun = true;
|
||||
static constexpr bool is_const = false;
|
||||
static const size_t nargs = 0;
|
||||
using result_type = R;
|
||||
using args_type = type_list<>;
|
||||
using call_type = type_list<C&>;
|
||||
};
|
||||
|
||||
#if __cplusplus >= 201703L
|
||||
|
||||
template <typename R, typename... Args>
|
||||
struct function_traits<std::function<R(Args...) noexcept>>
|
||||
{
|
||||
static constexpr bool is_memfun = false;
|
||||
static constexpr bool is_const = false;
|
||||
static constexpr size_t nargs = sizeof...(Args);
|
||||
using result_type = R;
|
||||
using args_type = type_list<Args...>;
|
||||
using call_type = args_type;
|
||||
};
|
||||
|
||||
template <typename R, typename... Args>
|
||||
struct function_traits<R (*)(Args...) noexcept>
|
||||
{
|
||||
static constexpr bool is_memfun = false;
|
||||
static constexpr bool is_const = false;
|
||||
static constexpr size_t nargs = sizeof...(Args);
|
||||
using result_type = R;
|
||||
using args_type = type_list<Args...>;
|
||||
using call_type = args_type;
|
||||
};
|
||||
|
||||
template <typename R, typename... Args>
|
||||
struct function_traits<R(Args...) noexcept>
|
||||
{
|
||||
static constexpr bool is_memfun = false;
|
||||
static constexpr bool is_const = false;
|
||||
static constexpr size_t nargs = sizeof...(Args);
|
||||
using result_type = R;
|
||||
using args_type = type_list<Args...>;
|
||||
using call_type = args_type;
|
||||
};
|
||||
|
||||
// member function pointer
|
||||
template <typename C, typename R, typename... Args>
|
||||
struct function_traits<R (C::*)(Args...) noexcept>
|
||||
{
|
||||
static constexpr bool is_memfun = true;
|
||||
static constexpr bool is_const = false;
|
||||
static constexpr size_t nargs = sizeof...(Args);
|
||||
using result_type = R;
|
||||
using args_type = type_list<Args...>;
|
||||
using call_type = type_list<C&, Args...>;
|
||||
};
|
||||
|
||||
// const member function pointer
|
||||
template <typename C, typename R, typename... Args>
|
||||
struct function_traits<R (C::*)(Args...) const noexcept>
|
||||
{
|
||||
static constexpr bool is_memfun = true;
|
||||
static constexpr bool is_const = true;
|
||||
static constexpr size_t nargs = sizeof...(Args);
|
||||
using result_type = R;
|
||||
using args_type = type_list<Args...>;
|
||||
using call_type = type_list<C&, Args...>;
|
||||
};
|
||||
|
||||
#endif
|
||||
} // namespace impl
|
||||
} // namespace mpl
|
||||
} // namespace common
|
||||
} // namespace rocprofiler
|
||||
@@ -22,6 +22,8 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "lib/common/details/mpl.hpp"
|
||||
|
||||
#include <cstddef>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
@@ -162,6 +164,15 @@ struct assert_false
|
||||
{
|
||||
static constexpr auto value = false;
|
||||
};
|
||||
|
||||
template <typename InTuple>
|
||||
using reverse = typename impl::reverse<InTuple, void>::type;
|
||||
|
||||
template <typename Tp>
|
||||
using function_traits = impl::function_traits<Tp>;
|
||||
|
||||
template <typename Tp>
|
||||
using function_args_t = typename impl::function_traits<Tp>::args_type;
|
||||
} // namespace mpl
|
||||
} // namespace common
|
||||
} // namespace rocprofiler
|
||||
|
||||
@@ -25,6 +25,7 @@ set(TOOL_OUTPUT_HEADERS
|
||||
output_key.hpp
|
||||
output_stream.hpp
|
||||
statistics.hpp
|
||||
stream_info.hpp
|
||||
timestamps.hpp
|
||||
tmp_file_buffer.hpp
|
||||
tmp_file.hpp)
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#include "generator.hpp"
|
||||
#include "pc_sample_transform.hpp"
|
||||
#include "statistics.hpp"
|
||||
#include "stream_info.hpp"
|
||||
#include "tmp_file_buffer.hpp"
|
||||
|
||||
#include "lib/common/container/ring_buffer.hpp"
|
||||
@@ -140,11 +141,6 @@ using hip_buffered_output_t =
|
||||
buffered_output<rocprofiler_buffer_tracing_hip_api_record_t, domain_type::HIP>;
|
||||
using hsa_buffered_output_t =
|
||||
buffered_output<rocprofiler_buffer_tracing_hsa_api_record_t, domain_type::HSA>;
|
||||
using kernel_dispatch_buffered_output_t =
|
||||
buffered_output<rocprofiler_buffer_tracing_kernel_dispatch_record_t,
|
||||
domain_type::KERNEL_DISPATCH>;
|
||||
using memory_copy_buffered_output_t =
|
||||
buffered_output<rocprofiler_buffer_tracing_memory_copy_record_t, domain_type::MEMORY_COPY>;
|
||||
using marker_buffered_output_t =
|
||||
buffered_output<rocprofiler_buffer_tracing_marker_api_record_t, domain_type::MARKER>;
|
||||
using rccl_buffered_output_t =
|
||||
@@ -167,5 +163,10 @@ using rocdecode_buffered_output_t =
|
||||
buffered_output<rocprofiler_buffer_tracing_rocdecode_api_record_t, domain_type::ROCDECODE>;
|
||||
using rocjpeg_buffered_output_t =
|
||||
buffered_output<rocprofiler_buffer_tracing_rocjpeg_api_record_t, domain_type::ROCJPEG>;
|
||||
using kernel_dispatch_buffered_output_with_stream_t =
|
||||
buffered_output<tool_buffer_tracing_kernel_dispatch_with_stream_record_t,
|
||||
domain_type::KERNEL_DISPATCH>;
|
||||
using memory_copy_buffered_output_with_stream_t =
|
||||
buffered_output<tool_buffer_tracing_memory_copy_with_stream_record_t, domain_type::MEMORY_COPY>;
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
|
||||
@@ -93,9 +93,10 @@ struct tool_counter_record_t
|
||||
{
|
||||
using container_type = std::vector<tool_counter_value_t>;
|
||||
|
||||
uint64_t thread_id = 0;
|
||||
rocprofiler_dispatch_counting_service_data_t dispatch_data = {};
|
||||
serialized_counter_record_t record = {};
|
||||
uint64_t thread_id = 0;
|
||||
rocprofiler_dispatch_counting_service_data_t dispatch_data = {};
|
||||
serialized_counter_record_t record = {};
|
||||
uint64_t kernel_rename_val = {};
|
||||
|
||||
template <typename ArchiveT>
|
||||
void save(ArchiveT& ar) const
|
||||
@@ -106,6 +107,7 @@ struct tool_counter_record_t
|
||||
ar(cereal::make_nvp("thread_id", thread_id));
|
||||
ar(cereal::make_nvp("dispatch_data", dispatch_data));
|
||||
ar(cereal::make_nvp("records", tmp));
|
||||
ar(cereal::make_nvp("kernel_rename_val", kernel_rename_val));
|
||||
}
|
||||
|
||||
container_type read() const;
|
||||
|
||||
@@ -99,18 +99,18 @@ struct csv_encoder
|
||||
}
|
||||
};
|
||||
|
||||
using api_csv_encoder = csv_encoder<7>;
|
||||
using agent_info_csv_encoder = csv_encoder<53>;
|
||||
using kernel_trace_csv_encoder = csv_encoder<18>;
|
||||
using counter_collection_csv_encoder = csv_encoder<19>;
|
||||
using memory_copy_csv_encoder = csv_encoder<7>;
|
||||
using memory_allocation_csv_encoder = csv_encoder<8>;
|
||||
using marker_csv_encoder = csv_encoder<7>;
|
||||
using list_basic_metrics_csv_encoder = csv_encoder<5>;
|
||||
using list_derived_metrics_csv_encoder = csv_encoder<5>;
|
||||
using scratch_memory_encoder = csv_encoder<8>;
|
||||
using stats_csv_encoder = csv_encoder<8>;
|
||||
using pc_sampling_host_trap_csv_encoder = csv_encoder<6>;
|
||||
using api_csv_encoder = csv_encoder<7>;
|
||||
using agent_info_csv_encoder = csv_encoder<53>;
|
||||
using counter_collection_csv_encoder = csv_encoder<19>;
|
||||
using memory_allocation_csv_encoder = csv_encoder<8>;
|
||||
using marker_csv_encoder = csv_encoder<7>;
|
||||
using list_basic_metrics_csv_encoder = csv_encoder<5>;
|
||||
using list_derived_metrics_csv_encoder = csv_encoder<5>;
|
||||
using scratch_memory_encoder = csv_encoder<8>;
|
||||
using stats_csv_encoder = csv_encoder<8>;
|
||||
using pc_sampling_host_trap_csv_encoder = csv_encoder<6>;
|
||||
using kernel_trace_with_stream_csv_encoder = csv_encoder<19>;
|
||||
using memory_copy_with_stream_csv_encoder = csv_encoder<8>;
|
||||
} // namespace csv
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
|
||||
@@ -249,22 +249,22 @@ generate_csv(const output_config& cfg,
|
||||
}
|
||||
|
||||
void
|
||||
generate_csv(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<rocprofiler_buffer_tracing_kernel_dispatch_record_t>& data,
|
||||
const stats_entry_t& stats)
|
||||
generate_csv(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<tool_buffer_tracing_kernel_dispatch_with_stream_record_t>& data,
|
||||
const stats_entry_t& stats)
|
||||
{
|
||||
if(data.empty()) return;
|
||||
|
||||
if(cfg.stats && stats)
|
||||
write_stats(get_stats_output_file(cfg, domain_type::KERNEL_DISPATCH), stats.entries);
|
||||
|
||||
auto ofs = tool::csv_output_file{cfg,
|
||||
domain_type::KERNEL_DISPATCH,
|
||||
tool::csv::kernel_trace_csv_encoder{},
|
||||
tool::csv::kernel_trace_with_stream_csv_encoder{},
|
||||
{"Kind",
|
||||
"Agent_Id",
|
||||
"Queue_Id",
|
||||
"Stream_Id",
|
||||
"Thread_Id",
|
||||
"Dispatch_Id",
|
||||
"Kernel_Id",
|
||||
@@ -287,14 +287,14 @@ generate_csv(const output_config&
|
||||
{
|
||||
auto row_ss = std::stringstream{};
|
||||
auto kernel_name = tool_metadata.get_kernel_name(record.dispatch_info.kernel_id,
|
||||
record.correlation_id.external.value);
|
||||
|
||||
rocprofiler::tool::csv::kernel_trace_csv_encoder::write_row(
|
||||
record.kernel_rename_val);
|
||||
rocprofiler::tool::csv::kernel_trace_with_stream_csv_encoder::write_row(
|
||||
row_ss,
|
||||
tool_metadata.get_kind_name(record.kind),
|
||||
tool_metadata.get_agent_index(record.dispatch_info.agent_id, cfg.agent_index_value)
|
||||
.as_string(),
|
||||
record.dispatch_info.queue_id.handle,
|
||||
record.stream_id.handle,
|
||||
record.thread_id,
|
||||
record.dispatch_info.dispatch_id,
|
||||
record.dispatch_info.kernel_id,
|
||||
@@ -310,7 +310,6 @@ generate_csv(const output_config&
|
||||
record.dispatch_info.grid_size.x,
|
||||
record.dispatch_info.grid_size.y,
|
||||
record.dispatch_info.grid_size.z);
|
||||
|
||||
ofs << row_ss.str();
|
||||
}
|
||||
}
|
||||
@@ -400,10 +399,10 @@ generate_csv(const output_config& cfg,
|
||||
}
|
||||
|
||||
void
|
||||
generate_csv(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<rocprofiler_buffer_tracing_memory_copy_record_t>& data,
|
||||
const stats_entry_t& stats)
|
||||
generate_csv(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<tool_buffer_tracing_memory_copy_with_stream_record_t>& data,
|
||||
const stats_entry_t& stats)
|
||||
{
|
||||
if(data.empty()) return;
|
||||
|
||||
@@ -412,9 +411,10 @@ generate_csv(const output_config& c
|
||||
|
||||
auto ofs = tool::csv_output_file{cfg,
|
||||
domain_type::MEMORY_COPY,
|
||||
tool::csv::memory_copy_csv_encoder{},
|
||||
tool::csv::memory_copy_with_stream_csv_encoder{},
|
||||
{"Kind",
|
||||
"Direction",
|
||||
"Stream_Id",
|
||||
"Source_Agent_Id",
|
||||
"Destination_Agent_Id",
|
||||
"Correlation_Id",
|
||||
@@ -427,10 +427,11 @@ generate_csv(const output_config& c
|
||||
{
|
||||
auto row_ss = std::stringstream{};
|
||||
auto api_name = tool_metadata.get_operation_name(record.kind, record.operation);
|
||||
rocprofiler::tool::csv::memory_copy_csv_encoder::write_row(
|
||||
rocprofiler::tool::csv::memory_copy_with_stream_csv_encoder::write_row(
|
||||
row_ss,
|
||||
tool_metadata.get_kind_name(record.kind),
|
||||
api_name,
|
||||
record.stream_id.handle,
|
||||
tool_metadata.get_agent_index(record.src_agent_id, cfg.agent_index_value)
|
||||
.as_string(),
|
||||
tool_metadata.get_agent_index(record.dst_agent_id, cfg.agent_index_value)
|
||||
@@ -438,7 +439,6 @@ generate_csv(const output_config& c
|
||||
record.correlation_id.internal,
|
||||
record.start_timestamp,
|
||||
record.end_timestamp);
|
||||
|
||||
ofs << row_ss.str();
|
||||
}
|
||||
}
|
||||
@@ -626,7 +626,7 @@ generate_csv(const output_config& cfg,
|
||||
record.thread_id,
|
||||
magnitude(record.dispatch_data.dispatch_info.grid_size),
|
||||
record.dispatch_data.dispatch_info.kernel_id,
|
||||
tool_metadata.get_kernel_name(kernel_id, correlation_id.external.value),
|
||||
tool_metadata.get_kernel_name(kernel_id, record.kernel_rename_val),
|
||||
magnitude(record.dispatch_data.dispatch_info.workgroup_size),
|
||||
lds_block_size_v,
|
||||
record.dispatch_data.dispatch_info.private_segment_size,
|
||||
|
||||
@@ -40,10 +40,10 @@ generate_csv(const output_config& cfg,
|
||||
std::vector<agent_info>& data);
|
||||
|
||||
void
|
||||
generate_csv(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<rocprofiler_buffer_tracing_kernel_dispatch_record_t>& data,
|
||||
const stats_entry_t& stats);
|
||||
generate_csv(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<tool_buffer_tracing_kernel_dispatch_with_stream_record_t>& data,
|
||||
const stats_entry_t& stats);
|
||||
|
||||
void
|
||||
generate_csv(const output_config& cfg,
|
||||
@@ -58,10 +58,10 @@ generate_csv(const output_config& cfg,
|
||||
const stats_entry_t& stats);
|
||||
|
||||
void
|
||||
generate_csv(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<rocprofiler_buffer_tracing_memory_copy_record_t>& data,
|
||||
const stats_entry_t& stats);
|
||||
generate_csv(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<tool_buffer_tracing_memory_copy_with_stream_record_t>& data,
|
||||
const stats_entry_t& stats);
|
||||
|
||||
void
|
||||
generate_csv(const output_config& cfg,
|
||||
|
||||
@@ -185,11 +185,11 @@ void
|
||||
write_json(json_output& json_ar,
|
||||
const output_config& /*cfg*/,
|
||||
const metadata& /*tool_metadata*/,
|
||||
const domain_stats_vec_t& domain_stats,
|
||||
generator<rocprofiler_buffer_tracing_hip_api_record_t>&& hip_api_gen,
|
||||
generator<rocprofiler_buffer_tracing_hsa_api_record_t> hsa_api_gen,
|
||||
generator<rocprofiler_buffer_tracing_kernel_dispatch_record_t> kernel_dispatch_gen,
|
||||
generator<rocprofiler_buffer_tracing_memory_copy_record_t> memory_copy_gen,
|
||||
const domain_stats_vec_t& domain_stats,
|
||||
generator<rocprofiler_buffer_tracing_hip_api_record_t>&& hip_api_gen,
|
||||
generator<rocprofiler_buffer_tracing_hsa_api_record_t> hsa_api_gen,
|
||||
generator<tool_buffer_tracing_kernel_dispatch_with_stream_record_t> kernel_dispatch_gen,
|
||||
generator<tool_buffer_tracing_memory_copy_with_stream_record_t> memory_copy_gen,
|
||||
generator<tool_counter_record_t> counter_collection_gen,
|
||||
generator<rocprofiler_buffer_tracing_marker_api_record_t> marker_api_gen,
|
||||
generator<rocprofiler_buffer_tracing_scratch_memory_record_t> scratch_memory_gen,
|
||||
|
||||
@@ -81,14 +81,14 @@ void
|
||||
write_json(json_output&, const output_config& cfg, const metadata& tool_metadata, uint64_t pid);
|
||||
|
||||
void
|
||||
write_json(json_output& json_ar,
|
||||
const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const domain_stats_vec_t& domain_stats,
|
||||
generator<rocprofiler_buffer_tracing_hip_api_record_t>&& hip_api_gen,
|
||||
generator<rocprofiler_buffer_tracing_hsa_api_record_t> hsa_api_gen,
|
||||
generator<rocprofiler_buffer_tracing_kernel_dispatch_record_t> kernel_dispatch_gen,
|
||||
generator<rocprofiler_buffer_tracing_memory_copy_record_t> memory_copy_gen,
|
||||
write_json(json_output& json_ar,
|
||||
const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const domain_stats_vec_t& domain_stats,
|
||||
generator<rocprofiler_buffer_tracing_hip_api_record_t>&& hip_api_gen,
|
||||
generator<rocprofiler_buffer_tracing_hsa_api_record_t> hsa_api_gen,
|
||||
generator<tool_buffer_tracing_kernel_dispatch_with_stream_record_t> kernel_dispatch_gen,
|
||||
generator<tool_buffer_tracing_memory_copy_with_stream_record_t> memory_copy_gen,
|
||||
generator<tool_counter_record_t> counter_collection_gen,
|
||||
generator<rocprofiler_buffer_tracing_marker_api_record_t> marker_api_gen,
|
||||
generator<rocprofiler_buffer_tracing_scratch_memory_record_t> scratch_memory_gen,
|
||||
|
||||
@@ -356,15 +356,15 @@ create_attribute_list()
|
||||
|
||||
void
|
||||
write_otf2(
|
||||
const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
uint64_t pid,
|
||||
const std::vector<agent_info>& agent_data,
|
||||
std::deque<rocprofiler_buffer_tracing_hip_api_record_t>* hip_api_data,
|
||||
std::deque<rocprofiler_buffer_tracing_hsa_api_record_t>* hsa_api_data,
|
||||
std::deque<rocprofiler_buffer_tracing_kernel_dispatch_record_t>* kernel_dispatch_data,
|
||||
std::deque<rocprofiler_buffer_tracing_memory_copy_record_t>* memory_copy_data,
|
||||
std::deque<rocprofiler_buffer_tracing_marker_api_record_t>* marker_api_data,
|
||||
const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
uint64_t pid,
|
||||
const std::vector<agent_info>& agent_data,
|
||||
std::deque<rocprofiler_buffer_tracing_hip_api_record_t>* hip_api_data,
|
||||
std::deque<rocprofiler_buffer_tracing_hsa_api_record_t>* hsa_api_data,
|
||||
std::deque<tool_buffer_tracing_kernel_dispatch_with_stream_record_t>* kernel_dispatch_data,
|
||||
std::deque<tool_buffer_tracing_memory_copy_with_stream_record_t>* memory_copy_data,
|
||||
std::deque<rocprofiler_buffer_tracing_marker_api_record_t>* marker_api_data,
|
||||
std::deque<rocprofiler_buffer_tracing_scratch_memory_record_t>* /*scratch_memory_data*/,
|
||||
std::deque<rocprofiler_buffer_tracing_rccl_api_record_t>* rccl_api_data,
|
||||
std::deque<rocprofiler_buffer_tracing_memory_allocation_record_t>* memory_allocation_data,
|
||||
@@ -676,8 +676,7 @@ write_otf2(
|
||||
const auto* sym = _get_kernel_sym_data(info);
|
||||
CHECK(sym != nullptr);
|
||||
|
||||
auto name =
|
||||
tool_metadata.get_kernel_name(info.kernel_id, itr.correlation_id.external.value);
|
||||
auto name = tool_metadata.get_kernel_name(info.kernel_id, itr.kernel_rename_val);
|
||||
_hash_data.emplace(
|
||||
get_hash_id(name),
|
||||
region_info{std::string{name}, OTF2_REGION_ROLE_FUNCTION, OTF2_PARADIGM_HIP});
|
||||
|
||||
@@ -25,6 +25,7 @@
|
||||
#include "agent_info.hpp"
|
||||
#include "metadata.hpp"
|
||||
#include "output_config.hpp"
|
||||
#include "stream_info.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
#include <deque>
|
||||
@@ -35,19 +36,19 @@ namespace tool
|
||||
{
|
||||
void
|
||||
write_otf2(
|
||||
const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
uint64_t pid,
|
||||
const std::vector<agent_info>& agent_data,
|
||||
std::deque<rocprofiler_buffer_tracing_hip_api_record_t>* hip_api_data,
|
||||
std::deque<rocprofiler_buffer_tracing_hsa_api_record_t>* hsa_api_data,
|
||||
std::deque<rocprofiler_buffer_tracing_kernel_dispatch_record_t>* kernel_dispatch_data,
|
||||
std::deque<rocprofiler_buffer_tracing_memory_copy_record_t>* memory_copy_data,
|
||||
std::deque<rocprofiler_buffer_tracing_marker_api_record_t>* marker_api_data,
|
||||
std::deque<rocprofiler_buffer_tracing_scratch_memory_record_t>* scratch_memory_data,
|
||||
std::deque<rocprofiler_buffer_tracing_rccl_api_record_t>* rccl_api_data,
|
||||
std::deque<rocprofiler_buffer_tracing_memory_allocation_record_t>* memory_allocation_data,
|
||||
std::deque<rocprofiler_buffer_tracing_rocdecode_api_record_t>* rocdecode_api_data,
|
||||
std::deque<rocprofiler_buffer_tracing_rocjpeg_api_record_t>* rocjpeg_api_data);
|
||||
const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
uint64_t pid,
|
||||
const std::vector<agent_info>& agent_data,
|
||||
std::deque<rocprofiler_buffer_tracing_hip_api_record_t>* hip_api_data,
|
||||
std::deque<rocprofiler_buffer_tracing_hsa_api_record_t>* hsa_api_data,
|
||||
std::deque<tool_buffer_tracing_kernel_dispatch_with_stream_record_t>* kernel_dispatch_data,
|
||||
std::deque<tool_buffer_tracing_memory_copy_with_stream_record_t>* memory_copy_data,
|
||||
std::deque<rocprofiler_buffer_tracing_marker_api_record_t>* marker_api_data,
|
||||
std::deque<rocprofiler_buffer_tracing_scratch_memory_record_t>* scratch_memory_data,
|
||||
std::deque<rocprofiler_buffer_tracing_rccl_api_record_t>* rccl_api_data,
|
||||
std::deque<rocprofiler_buffer_tracing_memory_allocation_record_t>* memory_allocation_data,
|
||||
std::deque<rocprofiler_buffer_tracing_rocdecode_api_record_t>* rocdecode_api_data,
|
||||
std::deque<rocprofiler_buffer_tracing_rocjpeg_api_record_t>* rocjpeg_api_data);
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
|
||||
@@ -65,14 +65,14 @@ get_hash_id(Tp&& _val)
|
||||
|
||||
void
|
||||
write_perfetto(
|
||||
const output_config& ocfg,
|
||||
const metadata& tool_metadata,
|
||||
std::vector<agent_info> agent_data,
|
||||
const generator<rocprofiler_buffer_tracing_hip_api_record_t>& hip_api_gen,
|
||||
const generator<rocprofiler_buffer_tracing_hsa_api_record_t>& hsa_api_gen,
|
||||
const generator<rocprofiler_buffer_tracing_kernel_dispatch_record_t>& kernel_dispatch_gen,
|
||||
const generator<rocprofiler_buffer_tracing_memory_copy_record_t>& memory_copy_gen,
|
||||
const generator<rocprofiler_buffer_tracing_marker_api_record_t>& marker_api_gen,
|
||||
const output_config& ocfg,
|
||||
const metadata& tool_metadata,
|
||||
std::vector<agent_info> agent_data,
|
||||
const generator<rocprofiler_buffer_tracing_hip_api_record_t>& hip_api_gen,
|
||||
const generator<rocprofiler_buffer_tracing_hsa_api_record_t>& hsa_api_gen,
|
||||
const generator<tool_buffer_tracing_kernel_dispatch_with_stream_record_t>& kernel_dispatch_gen,
|
||||
const generator<tool_buffer_tracing_memory_copy_with_stream_record_t>& memory_copy_gen,
|
||||
const generator<rocprofiler_buffer_tracing_marker_api_record_t>& marker_api_gen,
|
||||
const generator<rocprofiler_buffer_tracing_scratch_memory_record_t>& /*scratch_memory_gen*/,
|
||||
const generator<rocprofiler_buffer_tracing_rccl_api_record_t>& rccl_api_gen,
|
||||
const generator<rocprofiler_buffer_tracing_memory_allocation_record_t>& memory_allocation_gen,
|
||||
@@ -139,6 +139,8 @@ write_perfetto(
|
||||
auto agent_thread_ids_alloc = std::unordered_map<rocprofiler_agent_id_t, std::set<uint64_t>>{};
|
||||
auto agent_queue_ids =
|
||||
std::unordered_map<rocprofiler_agent_id_t, std::unordered_set<rocprofiler_queue_id_t>>{};
|
||||
auto agent_stream_ids =
|
||||
std::unordered_map<rocprofiler_agent_id_t, std::unordered_set<rocprofiler_stream_id_t>>{};
|
||||
auto thread_indexes = std::unordered_map<rocprofiler_thread_id_t, uint64_t>{};
|
||||
|
||||
auto thread_tracks = std::unordered_map<rocprofiler_thread_id_t, ::perfetto::Track>{};
|
||||
@@ -151,6 +153,12 @@ write_perfetto(
|
||||
auto agent_queue_tracks =
|
||||
std::unordered_map<rocprofiler_agent_id_t,
|
||||
std::unordered_map<rocprofiler_queue_id_t, ::perfetto::Track>>{};
|
||||
auto agent_stream_compute_tracks =
|
||||
std::unordered_map<rocprofiler_agent_id_t,
|
||||
std::unordered_map<rocprofiler_stream_id_t, ::perfetto::Track>>{};
|
||||
auto agent_stream_copy_tracks =
|
||||
std::unordered_map<rocprofiler_agent_id_t,
|
||||
std::unordered_map<rocprofiler_stream_id_t, ::perfetto::Track>>{};
|
||||
|
||||
auto _get_agent = [&agent_data](rocprofiler_agent_id_t _id) -> const rocprofiler_agent_t* {
|
||||
for(const auto& itr : agent_data)
|
||||
@@ -184,7 +192,11 @@ write_perfetto(
|
||||
for(auto itr : memory_copy_gen.get(ditr))
|
||||
{
|
||||
tids.emplace(itr.thread_id);
|
||||
agent_thread_ids[itr.dst_agent_id].emplace(itr.thread_id);
|
||||
agent_stream_ids[itr.dst_agent_id].emplace(itr.stream_id);
|
||||
if(ocfg.group_by_queue)
|
||||
{
|
||||
agent_thread_ids[itr.dst_agent_id].emplace(itr.thread_id);
|
||||
}
|
||||
}
|
||||
|
||||
for(auto ditr : memory_allocation_gen)
|
||||
@@ -198,7 +210,11 @@ write_perfetto(
|
||||
for(auto itr : kernel_dispatch_gen.get(ditr))
|
||||
{
|
||||
tids.emplace(itr.thread_id);
|
||||
agent_queue_ids[itr.dispatch_info.agent_id].emplace(itr.dispatch_info.queue_id);
|
||||
agent_stream_ids[itr.dispatch_info.agent_id].emplace(itr.stream_id);
|
||||
if(ocfg.group_by_queue)
|
||||
{
|
||||
agent_queue_ids[itr.dispatch_info.agent_id].emplace(itr.dispatch_info.queue_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -276,6 +292,57 @@ write_perfetto(
|
||||
}
|
||||
}
|
||||
|
||||
for(const auto& aitr : agent_stream_ids)
|
||||
{
|
||||
for(auto sitr : aitr.second)
|
||||
{
|
||||
const auto* _agent = _get_agent(aitr.first);
|
||||
const auto stream_id = sitr.handle;
|
||||
|
||||
{
|
||||
auto _namess = std::stringstream{};
|
||||
_namess << "COMPUTE AGENT [" << _agent->logical_node_id << "] STREAM [" << stream_id
|
||||
<< "] ";
|
||||
|
||||
if(_agent->type == ROCPROFILER_AGENT_TYPE_CPU)
|
||||
_namess << "(CPU)";
|
||||
else if(_agent->type == ROCPROFILER_AGENT_TYPE_GPU)
|
||||
_namess << "(GPU)";
|
||||
else
|
||||
_namess << "(UNK)";
|
||||
|
||||
auto _track = ::perfetto::Track{get_hash_id(_namess.str())};
|
||||
auto _desc = _track.Serialize();
|
||||
_desc.set_name(_namess.str());
|
||||
|
||||
perfetto::TrackEvent::SetTrackDescriptor(_track, _desc);
|
||||
|
||||
agent_stream_compute_tracks[aitr.first].emplace(sitr, _track);
|
||||
}
|
||||
|
||||
{
|
||||
auto _namess = std::stringstream{};
|
||||
_namess << "COPY to AGENT [" << _agent->logical_node_id << "] STREAM [" << stream_id
|
||||
<< "] ";
|
||||
|
||||
if(_agent->type == ROCPROFILER_AGENT_TYPE_CPU)
|
||||
_namess << "(CPU)";
|
||||
else if(_agent->type == ROCPROFILER_AGENT_TYPE_GPU)
|
||||
_namess << "(GPU)";
|
||||
else
|
||||
_namess << "(UNK)";
|
||||
|
||||
auto _track = ::perfetto::Track{get_hash_id(_namess.str())};
|
||||
auto _desc = _track.Serialize();
|
||||
_desc.set_name(_namess.str());
|
||||
|
||||
perfetto::TrackEvent::SetTrackDescriptor(_track, _desc);
|
||||
|
||||
agent_stream_copy_tracks[aitr.first].emplace(sitr, _track);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// trace events
|
||||
{
|
||||
auto buffer_names = sdk::get_buffer_tracing_names();
|
||||
@@ -471,13 +538,22 @@ write_perfetto(
|
||||
for(auto ditr : memory_copy_gen)
|
||||
for(auto itr : memory_copy_gen.get(ditr))
|
||||
{
|
||||
auto name = buffer_names.at(itr.kind, itr.operation);
|
||||
auto& track = agent_thread_tracks.at(itr.dst_agent_id).at(itr.thread_id);
|
||||
auto name = buffer_names.at(itr.kind, itr.operation);
|
||||
|
||||
::perfetto::Track* _track = nullptr;
|
||||
if(ocfg.group_by_queue)
|
||||
{
|
||||
_track = &agent_thread_tracks.at(itr.dst_agent_id).at(itr.thread_id);
|
||||
}
|
||||
else
|
||||
{
|
||||
_track = &agent_stream_copy_tracks.at(itr.dst_agent_id).at(itr.stream_id);
|
||||
}
|
||||
|
||||
TRACE_EVENT_BEGIN(
|
||||
sdk::perfetto_category<sdk::category::memory_copy>::name,
|
||||
::perfetto::StaticString(name.data()),
|
||||
track,
|
||||
*_track,
|
||||
itr.start_timestamp,
|
||||
::perfetto::Flow::ProcessScoped(itr.correlation_id.internal),
|
||||
"begin_ns",
|
||||
@@ -503,8 +579,9 @@ write_perfetto(
|
||||
"tid",
|
||||
itr.thread_id);
|
||||
TRACE_EVENT_END(sdk::perfetto_category<sdk::category::memory_copy>::name,
|
||||
track,
|
||||
*_track,
|
||||
itr.end_timestamp);
|
||||
|
||||
tracing_session->FlushBlocking();
|
||||
}
|
||||
for(auto ditr : kernel_dispatch_gen)
|
||||
@@ -516,7 +593,7 @@ write_perfetto(
|
||||
rocprofiler_agent_id_t,
|
||||
std::unordered_map<
|
||||
rocprofiler_queue_id_t,
|
||||
std::vector<rocprofiler_buffer_tracing_kernel_dispatch_record_t*>>>{};
|
||||
std::vector<tool_buffer_tracing_kernel_dispatch_with_stream_record_t*>>>{};
|
||||
for(auto& itr : generator)
|
||||
{
|
||||
const auto& info = itr.dispatch_info;
|
||||
@@ -544,8 +621,18 @@ write_perfetto(
|
||||
|
||||
CHECK(sym != nullptr);
|
||||
|
||||
auto name = std::string_view{sym->kernel_name};
|
||||
auto& track = agent_queue_tracks.at(info.agent_id).at(info.queue_id);
|
||||
auto name = std::string_view{sym->kernel_name};
|
||||
|
||||
::perfetto::Track* _track = nullptr;
|
||||
if(ocfg.group_by_queue)
|
||||
{
|
||||
_track = &agent_queue_tracks.at(info.agent_id).at(info.queue_id);
|
||||
}
|
||||
else
|
||||
{
|
||||
_track =
|
||||
&agent_stream_compute_tracks.at(info.agent_id).at((*it)->stream_id);
|
||||
}
|
||||
|
||||
// Temporary fix until timestamp issues are resolved: Set timestamps to be
|
||||
// halfway between ending timestamp and starting timestamp of overlapping
|
||||
@@ -579,7 +666,7 @@ write_perfetto(
|
||||
TRACE_EVENT_BEGIN(
|
||||
sdk::perfetto_category<sdk::category::kernel_dispatch>::name,
|
||||
::perfetto::StaticString(demangled.at(name).c_str()),
|
||||
track,
|
||||
*_track,
|
||||
current.start_timestamp,
|
||||
::perfetto::Flow::ProcessScoped(current.correlation_id.internal),
|
||||
"begin_ns",
|
||||
@@ -613,7 +700,7 @@ write_perfetto(
|
||||
info.grid_size.x * info.grid_size.y * info.grid_size.z);
|
||||
TRACE_EVENT_END(
|
||||
sdk::perfetto_category<sdk::category::kernel_dispatch>::name,
|
||||
track,
|
||||
*_track,
|
||||
current.end_timestamp);
|
||||
tracing_session->FlushBlocking();
|
||||
}
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#include "generator.hpp"
|
||||
#include "metadata.hpp"
|
||||
#include "output_config.hpp"
|
||||
#include "stream_info.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
#include <deque>
|
||||
@@ -36,16 +37,16 @@ namespace tool
|
||||
{
|
||||
void
|
||||
write_perfetto(
|
||||
const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
std::vector<agent_info> agent_data,
|
||||
const generator<rocprofiler_buffer_tracing_hip_api_record_t>& hip_api_gen,
|
||||
const generator<rocprofiler_buffer_tracing_hsa_api_record_t>& hsa_api_gen,
|
||||
const generator<rocprofiler_buffer_tracing_kernel_dispatch_record_t>& kernel_dispatch_gen,
|
||||
const generator<rocprofiler_buffer_tracing_memory_copy_record_t>& memory_copy_gen,
|
||||
const generator<rocprofiler_buffer_tracing_marker_api_record_t>& marker_api_gen,
|
||||
const generator<rocprofiler_buffer_tracing_scratch_memory_record_t>& scratch_memory_gen,
|
||||
const generator<rocprofiler_buffer_tracing_rccl_api_record_t>& rccl_api_gen,
|
||||
const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
std::vector<agent_info> agent_data,
|
||||
const generator<rocprofiler_buffer_tracing_hip_api_record_t>& hip_api_gen,
|
||||
const generator<rocprofiler_buffer_tracing_hsa_api_record_t>& hsa_api_gen,
|
||||
const generator<tool_buffer_tracing_kernel_dispatch_with_stream_record_t>& kernel_dispatch_gen,
|
||||
const generator<tool_buffer_tracing_memory_copy_with_stream_record_t>& memory_copy_gen,
|
||||
const generator<rocprofiler_buffer_tracing_marker_api_record_t>& marker_api_gen,
|
||||
const generator<rocprofiler_buffer_tracing_scratch_memory_record_t>& scratch_memory_gen,
|
||||
const generator<rocprofiler_buffer_tracing_rccl_api_record_t>& rccl_api_gen,
|
||||
const generator<rocprofiler_buffer_tracing_memory_allocation_record_t>& memory_allocation_gen,
|
||||
const generator<rocprofiler_buffer_tracing_rocdecode_api_record_t>& rocdecode_api_gen,
|
||||
const generator<rocprofiler_buffer_tracing_rocjpeg_api_record_t>& rocjpeg_api_gen);
|
||||
|
||||
@@ -63,8 +63,8 @@ get_stats(const stats_map_t& data_v)
|
||||
|
||||
stats_entry_t
|
||||
generate_stats(const output_config& /*cfg*/,
|
||||
const metadata& tool_metadata,
|
||||
const generator<rocprofiler_buffer_tracing_kernel_dispatch_record_t>& data)
|
||||
const metadata& tool_metadata,
|
||||
const generator<tool_buffer_tracing_kernel_dispatch_with_stream_record_t>& data)
|
||||
{
|
||||
auto kernel_stats = stats_map_t{};
|
||||
for(auto ditr : data)
|
||||
@@ -72,7 +72,7 @@ generate_stats(const output_config& /*cfg*/,
|
||||
for(auto record : data.get(ditr))
|
||||
{
|
||||
auto kernel_name = tool_metadata.get_kernel_name(record.dispatch_info.kernel_id,
|
||||
record.correlation_id.external.value);
|
||||
record.kernel_rename_val);
|
||||
|
||||
kernel_stats[kernel_name] += (record.end_timestamp - record.start_timestamp);
|
||||
}
|
||||
@@ -119,8 +119,8 @@ generate_stats(const output_config& /*cfg*/,
|
||||
|
||||
stats_entry_t
|
||||
generate_stats(const output_config& /*cfg*/,
|
||||
const metadata& tool_metadata,
|
||||
const generator<rocprofiler_buffer_tracing_memory_copy_record_t>& data)
|
||||
const metadata& tool_metadata,
|
||||
const generator<tool_buffer_tracing_memory_copy_with_stream_record_t>& data)
|
||||
{
|
||||
auto memory_copy_stats = stats_map_t{};
|
||||
for(auto ditr : data)
|
||||
|
||||
@@ -25,15 +25,16 @@
|
||||
#include "generator.hpp"
|
||||
#include "metadata.hpp"
|
||||
#include "statistics.hpp"
|
||||
#include "stream_info.hpp"
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace tool
|
||||
{
|
||||
stats_entry_t
|
||||
generate_stats(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<rocprofiler_buffer_tracing_kernel_dispatch_record_t>& data);
|
||||
generate_stats(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<tool_buffer_tracing_kernel_dispatch_with_stream_record_t>& data);
|
||||
|
||||
stats_entry_t
|
||||
generate_stats(const output_config& cfg,
|
||||
@@ -46,9 +47,9 @@ generate_stats(const output_config& cfg
|
||||
const generator<rocprofiler_buffer_tracing_hsa_api_record_t>& data);
|
||||
|
||||
stats_entry_t
|
||||
generate_stats(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<rocprofiler_buffer_tracing_memory_copy_record_t>& data);
|
||||
generate_stats(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<tool_buffer_tracing_memory_copy_with_stream_record_t>& data);
|
||||
|
||||
stats_entry_t
|
||||
generate_stats(const output_config& cfg,
|
||||
|
||||
@@ -58,12 +58,12 @@ output_config::parse_env()
|
||||
common::get_env("ROCPROF_PERFETTO_SHMEM_SIZE_HINT_KB", perfetto_shmem_size_hint);
|
||||
perfetto_buffer_size = common::get_env("ROCPROF_PERFETTO_BUFFER_SIZE_KB", perfetto_buffer_size);
|
||||
|
||||
output_path = common::get_env("ROCPROF_OUTPUT_PATH", output_path);
|
||||
output_file = common::get_env("ROCPROF_OUTPUT_FILE_NAME", output_file);
|
||||
tmp_directory = common::get_env("ROCPROF_TMPDIR", tmp_directory);
|
||||
kernel_rename = common::get_env("ROCPROF_KERNEL_RENAME", false);
|
||||
|
||||
auto to_upper = [](std::string val) {
|
||||
output_path = common::get_env("ROCPROF_OUTPUT_PATH", output_path);
|
||||
output_file = common::get_env("ROCPROF_OUTPUT_FILE_NAME", output_file);
|
||||
tmp_directory = common::get_env("ROCPROF_TMPDIR", tmp_directory);
|
||||
kernel_rename = common::get_env("ROCPROF_KERNEL_RENAME", false);
|
||||
group_by_queue = common::get_env("ROCPROF_GROUP_BY_QUEUE", false);
|
||||
auto to_upper = [](std::string val) {
|
||||
for(auto& vitr : val)
|
||||
vitr = toupper(vitr);
|
||||
return val;
|
||||
|
||||
@@ -69,6 +69,7 @@ struct output_config
|
||||
bool otf2_output = false;
|
||||
bool summary_output = false;
|
||||
bool kernel_rename = false;
|
||||
bool group_by_queue = false;
|
||||
uint64_t stats_summary_unit_value = 1;
|
||||
size_t perfetto_shmem_size_hint = defaults::perfetto_shmem_size_hint_kb;
|
||||
size_t perfetto_buffer_size = defaults::perfetto_buffer_size_kb;
|
||||
|
||||
@@ -0,0 +1,118 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "lib/common/logging.hpp"
|
||||
|
||||
#include <rocprofiler-sdk/buffer_tracing.h>
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
#include <rocprofiler-sdk/cxx/hash.hpp>
|
||||
#include <rocprofiler-sdk/cxx/name_info.hpp>
|
||||
#include <rocprofiler-sdk/cxx/operators.hpp>
|
||||
#include <rocprofiler-sdk/cxx/serialization.hpp>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace tool
|
||||
{
|
||||
struct tool_buffer_tracing_kernel_dispatch_with_stream_record_t
|
||||
: rocprofiler_buffer_tracing_kernel_dispatch_record_t
|
||||
{
|
||||
using base_type = rocprofiler_buffer_tracing_kernel_dispatch_record_t;
|
||||
|
||||
tool_buffer_tracing_kernel_dispatch_with_stream_record_t(
|
||||
const base_type& _base,
|
||||
const rocprofiler_stream_id_t& _stream_id,
|
||||
const uint64_t& _kernel_rename_val)
|
||||
: base_type{_base}
|
||||
, stream_id{_stream_id}
|
||||
, kernel_rename_val{_kernel_rename_val}
|
||||
{}
|
||||
|
||||
tool_buffer_tracing_kernel_dispatch_with_stream_record_t();
|
||||
~tool_buffer_tracing_kernel_dispatch_with_stream_record_t() = default;
|
||||
tool_buffer_tracing_kernel_dispatch_with_stream_record_t(
|
||||
const tool_buffer_tracing_kernel_dispatch_with_stream_record_t&) = default;
|
||||
tool_buffer_tracing_kernel_dispatch_with_stream_record_t(
|
||||
tool_buffer_tracing_kernel_dispatch_with_stream_record_t&&) noexcept = default;
|
||||
tool_buffer_tracing_kernel_dispatch_with_stream_record_t& operator =(
|
||||
const tool_buffer_tracing_kernel_dispatch_with_stream_record_t&) = default;
|
||||
tool_buffer_tracing_kernel_dispatch_with_stream_record_t& operator =(
|
||||
tool_buffer_tracing_kernel_dispatch_with_stream_record_t&&) noexcept = default;
|
||||
|
||||
rocprofiler_stream_id_t stream_id = {};
|
||||
uint64_t kernel_rename_val = {};
|
||||
};
|
||||
|
||||
struct tool_buffer_tracing_memory_copy_with_stream_record_t
|
||||
: rocprofiler_buffer_tracing_memory_copy_record_t
|
||||
{
|
||||
using base_type = rocprofiler_buffer_tracing_memory_copy_record_t;
|
||||
|
||||
tool_buffer_tracing_memory_copy_with_stream_record_t(const base_type& _base,
|
||||
const rocprofiler_stream_id_t& _stream_id)
|
||||
: base_type{_base}
|
||||
, stream_id{_stream_id}
|
||||
{}
|
||||
|
||||
tool_buffer_tracing_memory_copy_with_stream_record_t();
|
||||
~tool_buffer_tracing_memory_copy_with_stream_record_t() = default;
|
||||
tool_buffer_tracing_memory_copy_with_stream_record_t(
|
||||
const tool_buffer_tracing_memory_copy_with_stream_record_t&) = default;
|
||||
tool_buffer_tracing_memory_copy_with_stream_record_t(
|
||||
tool_buffer_tracing_memory_copy_with_stream_record_t&&) noexcept = default;
|
||||
tool_buffer_tracing_memory_copy_with_stream_record_t& operator =(
|
||||
const tool_buffer_tracing_memory_copy_with_stream_record_t&) = default;
|
||||
tool_buffer_tracing_memory_copy_with_stream_record_t& operator =(
|
||||
tool_buffer_tracing_memory_copy_with_stream_record_t&&) noexcept = default;
|
||||
|
||||
rocprofiler_stream_id_t stream_id = {};
|
||||
};
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
|
||||
namespace cereal
|
||||
{
|
||||
#define SAVE_DATA_FIELD(FIELD) ar(make_nvp(#FIELD, data.FIELD))
|
||||
|
||||
template <typename ArchiveT>
|
||||
void
|
||||
save(ArchiveT& ar,
|
||||
const ::rocprofiler::tool::tool_buffer_tracing_kernel_dispatch_with_stream_record_t& data)
|
||||
{
|
||||
cereal::save(ar, static_cast<const rocprofiler_buffer_tracing_kernel_dispatch_record_t&>(data));
|
||||
SAVE_DATA_FIELD(stream_id);
|
||||
SAVE_DATA_FIELD(kernel_rename_val);
|
||||
}
|
||||
|
||||
template <typename ArchiveT>
|
||||
void
|
||||
save(ArchiveT& ar,
|
||||
const ::rocprofiler::tool::tool_buffer_tracing_memory_copy_with_stream_record_t& data)
|
||||
{
|
||||
cereal::save(ar, static_cast<const rocprofiler_buffer_tracing_memory_copy_record_t&>(data));
|
||||
SAVE_DATA_FIELD(stream_id);
|
||||
}
|
||||
|
||||
#undef SAVE_DATA_FIELD
|
||||
} // namespace cereal
|
||||
@@ -4,9 +4,9 @@
|
||||
|
||||
rocprofiler_activate_clang_tidy()
|
||||
|
||||
set(TOOL_HEADERS config.hpp helper.hpp)
|
||||
set(TOOL_HEADERS config.hpp helper.hpp stream_stack.hpp)
|
||||
|
||||
set(TOOL_SOURCES config.cpp main.c tool.cpp)
|
||||
set(TOOL_SOURCES config.cpp main.c tool.cpp stream_stack.cpp)
|
||||
|
||||
add_library(rocprofiler-sdk-tool SHARED)
|
||||
target_sources(rocprofiler-sdk-tool PRIVATE ${TOOL_SOURCES} ${TOOL_HEADERS})
|
||||
|
||||
@@ -0,0 +1,74 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include "stream_stack.hpp"
|
||||
|
||||
#include "lib/common/container/small_vector.hpp"
|
||||
#include "lib/common/static_tl_object.hpp"
|
||||
#include "lib/rocprofiler-sdk/hip/stream.hpp"
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace tool
|
||||
{
|
||||
namespace stream
|
||||
{
|
||||
namespace
|
||||
{
|
||||
auto*
|
||||
get_stream_stack()
|
||||
{
|
||||
static thread_local auto*& _v =
|
||||
common::static_tl_object<common::container::small_vector<rocprofiler_stream_id_t>>::
|
||||
construct(rocprofiler_stream_id_t{.handle = 0});
|
||||
return _v;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void
|
||||
push_stream_id(rocprofiler_stream_id_t id)
|
||||
{
|
||||
CHECK_NOTNULL(get_stream_stack())->emplace_back(id);
|
||||
}
|
||||
|
||||
void
|
||||
pop_stream_id()
|
||||
{
|
||||
CHECK_NOTNULL(get_stream_stack())->pop_back();
|
||||
}
|
||||
|
||||
rocprofiler_stream_id_t
|
||||
get_stream_id()
|
||||
{
|
||||
return CHECK_NOTNULL(get_stream_stack())->back();
|
||||
}
|
||||
|
||||
bool
|
||||
stream_stack_empty()
|
||||
{
|
||||
return CHECK_NOTNULL(get_stream_stack())->empty();
|
||||
}
|
||||
|
||||
} // namespace stream
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
@@ -0,0 +1,46 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <rocprofiler-sdk/rocprofiler.h>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace tool
|
||||
{
|
||||
namespace stream
|
||||
{
|
||||
rocprofiler_stream_id_t
|
||||
get_stream_id();
|
||||
|
||||
void
|
||||
push_stream_id(rocprofiler_stream_id_t id);
|
||||
|
||||
void
|
||||
pop_stream_id();
|
||||
|
||||
bool
|
||||
stream_stack_empty();
|
||||
} // namespace stream
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
@@ -22,11 +22,13 @@
|
||||
|
||||
#include "config.hpp"
|
||||
#include "helper.hpp"
|
||||
#include "stream_stack.hpp"
|
||||
|
||||
#include "lib/common/environment.hpp"
|
||||
#include "lib/common/filesystem.hpp"
|
||||
#include "lib/common/logging.hpp"
|
||||
#include "lib/common/scope_destructor.hpp"
|
||||
#include "lib/common/static_object.hpp"
|
||||
#include "lib/common/string_entry.hpp"
|
||||
#include "lib/common/synchronized.hpp"
|
||||
#include "lib/common/units.hpp"
|
||||
@@ -43,6 +45,7 @@
|
||||
#include "lib/output/generateStats.hpp"
|
||||
#include "lib/output/output_stream.hpp"
|
||||
#include "lib/output/statistics.hpp"
|
||||
#include "lib/output/stream_info.hpp"
|
||||
#include "lib/output/tmp_file.hpp"
|
||||
#include "lib/output/tmp_file_buffer.hpp"
|
||||
#include "lib/rocprofiler-sdk-att/att_lib_wrapper.hpp"
|
||||
@@ -209,6 +212,32 @@ thread_local auto thread_dispatch_rename_dtor = common::scope_destructor{[]() {
|
||||
thread_dispatch_rename = nullptr;
|
||||
}};
|
||||
|
||||
// Stores stream_ids and kernel_rename_vals for kernel-rename service and hip stream display service
|
||||
struct kernel_rename_and_stream_display_pair
|
||||
{
|
||||
uint64_t kernel_rename_val{0};
|
||||
rocprofiler_stream_id_t stream_id{.handle = 0};
|
||||
};
|
||||
auto kernel_rename_and_stream_display_pair_dtors =
|
||||
new std::vector<kernel_rename_and_stream_display_pair*>{};
|
||||
|
||||
auto
|
||||
get_kernel_rename_and_stream_display_pair_lock()
|
||||
{
|
||||
static auto _mutex = std::mutex{};
|
||||
return std::unique_lock<std::mutex>{_mutex};
|
||||
}
|
||||
|
||||
void
|
||||
add_kernel_rename_and_stream_display_pairs(kernel_rename_and_stream_display_pair* ptr)
|
||||
{
|
||||
auto lock = get_kernel_rename_and_stream_display_pair_lock();
|
||||
if(ptr != nullptr && kernel_rename_and_stream_display_pair_dtors != nullptr)
|
||||
{
|
||||
kernel_rename_and_stream_display_pair_dtors->emplace_back(ptr);
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
add_kernel_target(uint64_t _kern_id, const std::unordered_set<uint32_t>& range)
|
||||
{
|
||||
@@ -357,23 +386,49 @@ collection_period_cntrl(std::promise<void>&& _promise, rocprofiler_context_id_t
|
||||
}
|
||||
|
||||
int
|
||||
set_kernel_rename_correlation_id(rocprofiler_thread_id_t thr_id,
|
||||
rocprofiler_context_id_t ctx_id,
|
||||
rocprofiler_external_correlation_id_request_kind_t kind,
|
||||
rocprofiler_tracing_operation_t op,
|
||||
uint64_t internal_corr_id,
|
||||
rocprofiler_user_data_t* external_corr_id,
|
||||
void* user_data)
|
||||
set_kernel_rename_and_stream_display_correlation_id(
|
||||
rocprofiler_thread_id_t thr_id,
|
||||
rocprofiler_context_id_t ctx_id,
|
||||
rocprofiler_external_correlation_id_request_kind_t kind,
|
||||
rocprofiler_tracing_operation_t op,
|
||||
uint64_t internal_corr_id,
|
||||
rocprofiler_user_data_t* external_corr_id,
|
||||
void* user_data)
|
||||
{
|
||||
ROCP_FATAL_IF(kind != ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_KERNEL_DISPATCH)
|
||||
ROCP_FATAL_IF(kind != ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_KERNEL_DISPATCH &&
|
||||
kind != ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_MEMORY_COPY)
|
||||
<< "unexpected kind: " << kind;
|
||||
// Check whether services are enabled
|
||||
const bool kernel_rename_service_enabled =
|
||||
tool::get_config().kernel_rename && thread_dispatch_rename != nullptr &&
|
||||
!thread_dispatch_rename->empty() &&
|
||||
kind == ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_KERNEL_DISPATCH;
|
||||
const bool hip_stream_display_enabled =
|
||||
!tool::get_config().group_by_queue &&
|
||||
kernel_rename_and_stream_display_pair_dtors != nullptr &&
|
||||
!rocprofiler::tool::stream::stream_stack_empty();
|
||||
|
||||
if(thread_dispatch_rename != nullptr && !thread_dispatch_rename->empty())
|
||||
kernel_rename_and_stream_display_pair* kernel_rename_and_stream_display_vals = nullptr;
|
||||
if(kernel_rename_service_enabled || hip_stream_display_enabled)
|
||||
{
|
||||
kernel_rename_and_stream_display_vals = new kernel_rename_and_stream_display_pair{};
|
||||
}
|
||||
// Get value for kernel rename service
|
||||
if(kernel_rename_service_enabled && kernel_rename_and_stream_display_vals != nullptr)
|
||||
{
|
||||
auto val = thread_dispatch_rename->top();
|
||||
if(tool_metadata) tool_metadata->add_external_correlation_id(val);
|
||||
external_corr_id->value = val;
|
||||
kernel_rename_and_stream_display_vals->kernel_rename_val = val;
|
||||
}
|
||||
// Get stream ID from stream HIP display service
|
||||
if(hip_stream_display_enabled && kernel_rename_and_stream_display_vals != nullptr)
|
||||
{
|
||||
auto stream_id = rocprofiler::tool::stream::get_stream_id();
|
||||
kernel_rename_and_stream_display_vals->stream_id = stream_id;
|
||||
}
|
||||
// Set the external correlation id service to point to struct
|
||||
external_corr_id->ptr = kernel_rename_and_stream_display_vals;
|
||||
add_kernel_rename_and_stream_display_pairs(kernel_rename_and_stream_display_vals);
|
||||
|
||||
common::consume_args(thr_id, ctx_id, kind, op, internal_corr_id, user_data);
|
||||
|
||||
@@ -461,6 +516,54 @@ kernel_rename_callback(rocprofiler_callback_tracing_record_t record,
|
||||
common::consume_args(user_data, data);
|
||||
}
|
||||
|
||||
// Stores stream IDs onto stack when callback is triggered
|
||||
void
|
||||
hip_stream_display_callback(rocprofiler_callback_tracing_record_t record,
|
||||
rocprofiler_user_data_t* user_data,
|
||||
void* data)
|
||||
{
|
||||
if(tool::get_config().group_by_queue ||
|
||||
record.kind != ROCPROFILER_CALLBACK_TRACING_HIP_STREAM_API)
|
||||
return;
|
||||
// Extract stream ID from record
|
||||
auto* stream_handle_data =
|
||||
static_cast<rocprofiler_callback_tracing_stream_handle_data_t*>(record.payload);
|
||||
auto stream_id = stream_handle_data->stream_id;
|
||||
// STREAM_HANDLE_CREATE and DESTROY are no-ops
|
||||
if(record.operation == ROCPROFILER_HIP_STREAM_CREATE)
|
||||
{
|
||||
ROCP_INFO
|
||||
<< "Entered hip_stream_display_callback function for ROCPROFILER_HIP_STREAM_CREATE";
|
||||
}
|
||||
else if(record.operation == ROCPROFILER_HIP_STREAM_DESTROY)
|
||||
{
|
||||
ROCP_INFO
|
||||
<< "Entered hip_stream_display_callback function for ROCPROFILER_HIP_STREAM_DESTROY";
|
||||
}
|
||||
else if(record.operation == ROCPROFILER_HIP_STREAM_SET)
|
||||
{
|
||||
// Push the stream ID onto the stream stack when before underlying HIP function is called
|
||||
if(record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER)
|
||||
{
|
||||
ROCP_INFO << "Entered hip_stream_display_callback function for "
|
||||
"ROCPROFILER_HIP_STREAM_SET with ROCPROFILER_CALLBACK_PHASE_ENTER";
|
||||
rocprofiler::tool::stream::push_stream_id(stream_id);
|
||||
}
|
||||
// Pop stream ID off of stream stack after underlying HIP function is completed
|
||||
else if(record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT)
|
||||
{
|
||||
ROCP_INFO << "Entered hip_stream_display_callback function for "
|
||||
"ROCPROFILER_HIP_STREAM_SET with ROCPROFILER_CALLBACK_PHASE_EXIT";
|
||||
rocprofiler::tool::stream::pop_stream_id();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ROCP_FATAL << "Unsupported operation for ROCPROFILER_HIP_STREAM";
|
||||
}
|
||||
common::consume_args(user_data, data);
|
||||
}
|
||||
|
||||
void
|
||||
callback_tracing_callback(rocprofiler_callback_tracing_record_t record,
|
||||
rocprofiler_user_data_t* user_data,
|
||||
@@ -759,7 +862,7 @@ buffered_tracing_callback(rocprofiler_context_id_t /*context*/,
|
||||
rocprofiler_buffer_id_t /*buffer_id*/,
|
||||
rocprofiler_record_header_t** headers,
|
||||
size_t num_headers,
|
||||
void* /*user_data*/,
|
||||
void* /* user_data*/,
|
||||
uint64_t /*drop_count*/)
|
||||
{
|
||||
ROCP_INFO << "Executing buffered tracing callback for " << num_headers << " headers";
|
||||
@@ -776,8 +879,21 @@ buffered_tracing_callback(rocprofiler_context_id_t /*context*/,
|
||||
{
|
||||
auto* record = static_cast<rocprofiler_buffer_tracing_kernel_dispatch_record_t*>(
|
||||
header->payload);
|
||||
|
||||
tool::write_ring_buffer(*record, domain_type::KERNEL_DISPATCH);
|
||||
rocprofiler_stream_id_t stream_id{.handle = 0};
|
||||
uint64_t kernel_rename_val = 0;
|
||||
if((!tool::get_config().group_by_queue || tool::get_config().kernel_rename) &&
|
||||
record->correlation_id.external.ptr != nullptr)
|
||||
{
|
||||
// Extract the stream id
|
||||
auto* kernel_stream_pair_ptr =
|
||||
static_cast<kernel_rename_and_stream_display_pair*>(
|
||||
record->correlation_id.external.ptr);
|
||||
stream_id = kernel_stream_pair_ptr->stream_id;
|
||||
kernel_rename_val = kernel_stream_pair_ptr->kernel_rename_val;
|
||||
}
|
||||
rocprofiler::tool::tool_buffer_tracing_kernel_dispatch_with_stream_record_t
|
||||
record_with_stream{*record, stream_id, kernel_rename_val};
|
||||
tool::write_ring_buffer(record_with_stream, domain_type::KERNEL_DISPATCH);
|
||||
}
|
||||
|
||||
else if(header->kind == ROCPROFILER_BUFFER_TRACING_HSA_CORE_API ||
|
||||
@@ -794,8 +910,19 @@ buffered_tracing_callback(rocprofiler_context_id_t /*context*/,
|
||||
{
|
||||
auto* record =
|
||||
static_cast<rocprofiler_buffer_tracing_memory_copy_record_t*>(header->payload);
|
||||
|
||||
tool::write_ring_buffer(*record, domain_type::MEMORY_COPY);
|
||||
rocprofiler_stream_id_t stream_id{.handle = 0};
|
||||
if(!tool::get_config().group_by_queue &&
|
||||
record->correlation_id.external.ptr != nullptr)
|
||||
{
|
||||
// Extract the stream id
|
||||
auto* kernel_stream_pair_ptr =
|
||||
static_cast<kernel_rename_and_stream_display_pair*>(
|
||||
record->correlation_id.external.ptr);
|
||||
stream_id = kernel_stream_pair_ptr->stream_id;
|
||||
}
|
||||
rocprofiler::tool::tool_buffer_tracing_memory_copy_with_stream_record_t
|
||||
record_with_stream{*record, stream_id};
|
||||
tool::write_ring_buffer(record_with_stream, domain_type::MEMORY_COPY);
|
||||
}
|
||||
else if(header->kind == ROCPROFILER_BUFFER_TRACING_MEMORY_ALLOCATION)
|
||||
{
|
||||
@@ -1171,6 +1298,13 @@ counter_record_callback(rocprofiler_dispatch_counting_service_data_t dispatch_da
|
||||
|
||||
counter_record.dispatch_data = dispatch_data;
|
||||
counter_record.thread_id = user_data.value;
|
||||
if(dispatch_data.correlation_id.external.ptr != nullptr)
|
||||
{
|
||||
// Extract the kernel id
|
||||
auto* kernel_stream_pair_ptr = static_cast<kernel_rename_and_stream_display_pair*>(
|
||||
dispatch_data.correlation_id.external.ptr);
|
||||
counter_record.kernel_rename_val = kernel_stream_pair_ptr->kernel_rename_val;
|
||||
}
|
||||
|
||||
auto serialized_records = std::vector<tool::tool_counter_value_t>{};
|
||||
serialized_records.reserve(record_count);
|
||||
@@ -1552,7 +1686,6 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
|
||||
get_buffers().rocjpeg_api_trace),
|
||||
"buffer tracing service for ROCDecode api configure");
|
||||
}
|
||||
|
||||
if(tool::get_config().kernel_rename)
|
||||
{
|
||||
auto rename_ctx = rocprofiler_context_id_t{0};
|
||||
@@ -1573,16 +1706,36 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
|
||||
"callback tracing service failed to configure");
|
||||
|
||||
ROCPROFILER_CALL(rocprofiler_start_context(rename_ctx), "start context failed");
|
||||
}
|
||||
if(!tool::get_config().group_by_queue)
|
||||
{
|
||||
auto hip_stream_display_ctx = rocprofiler_context_id_t{0};
|
||||
|
||||
ROCPROFILER_CALL(rocprofiler_create_context(&hip_stream_display_ctx),
|
||||
"failed to create context");
|
||||
|
||||
ROCPROFILER_CALL(rocprofiler_configure_callback_tracing_service(
|
||||
hip_stream_display_ctx,
|
||||
ROCPROFILER_CALLBACK_TRACING_HIP_STREAM_API,
|
||||
nullptr,
|
||||
0,
|
||||
hip_stream_display_callback,
|
||||
nullptr),
|
||||
"stream tracing configure failed");
|
||||
ROCPROFILER_CALL(rocprofiler_start_context(hip_stream_display_ctx), "start context failed");
|
||||
}
|
||||
if(tool::get_config().kernel_rename || !tool::get_config().group_by_queue)
|
||||
{
|
||||
auto external_corr_id_request_kinds =
|
||||
std::array<rocprofiler_external_correlation_id_request_kind_t, 1>{
|
||||
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_KERNEL_DISPATCH};
|
||||
std::array<rocprofiler_external_correlation_id_request_kind_t, 2>{
|
||||
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_KERNEL_DISPATCH,
|
||||
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_MEMORY_COPY};
|
||||
|
||||
ROCPROFILER_CALL(rocprofiler_configure_external_correlation_id_request_service(
|
||||
get_client_ctx(),
|
||||
external_corr_id_request_kinds.data(),
|
||||
external_corr_id_request_kinds.size(),
|
||||
set_kernel_rename_correlation_id,
|
||||
set_kernel_rename_and_stream_display_correlation_id,
|
||||
nullptr),
|
||||
"Could not configure external correlation id request service");
|
||||
}
|
||||
@@ -1715,16 +1868,17 @@ tool_fini(void* /*tool_data*/)
|
||||
rocprofiler_stop_context(get_client_ctx());
|
||||
flush();
|
||||
|
||||
auto kernel_dispatch_output =
|
||||
tool::kernel_dispatch_buffered_output_t{tool::get_config().kernel_trace};
|
||||
auto kernel_dispatch_with_stream_output =
|
||||
rocprofiler::tool::kernel_dispatch_buffered_output_with_stream_t{
|
||||
tool::get_config().kernel_trace};
|
||||
auto hsa_output = tool::hsa_buffered_output_t{tool::get_config().hsa_core_api_trace ||
|
||||
tool::get_config().hsa_amd_ext_api_trace ||
|
||||
tool::get_config().hsa_image_ext_api_trace ||
|
||||
tool::get_config().hsa_finalizer_ext_api_trace};
|
||||
auto hip_output = tool::hip_buffered_output_t{tool::get_config().hip_runtime_api_trace ||
|
||||
tool::get_config().hip_compiler_api_trace};
|
||||
auto memory_copy_output =
|
||||
tool::memory_copy_buffered_output_t{tool::get_config().memory_copy_trace};
|
||||
auto memory_copy_output_with_stream_output =
|
||||
tool::memory_copy_buffered_output_with_stream_t{tool::get_config().memory_copy_trace};
|
||||
auto marker_output = tool::marker_buffered_output_t{tool::get_config().marker_api_trace};
|
||||
auto counters_output =
|
||||
tool::counter_collection_buffered_output_t{tool::get_config().counter_collection};
|
||||
@@ -1748,10 +1902,10 @@ tool_fini(void* /*tool_data*/)
|
||||
uint64_t num_output = 0;
|
||||
auto contributions = domain_stats_vec_t{};
|
||||
|
||||
generate_output(kernel_dispatch_output, num_output, contributions);
|
||||
generate_output(kernel_dispatch_with_stream_output, num_output, contributions);
|
||||
generate_output(hsa_output, num_output, contributions);
|
||||
generate_output(hip_output, num_output, contributions);
|
||||
generate_output(memory_copy_output, num_output, contributions);
|
||||
generate_output(memory_copy_output_with_stream_output, num_output, contributions);
|
||||
generate_output(memory_allocation_output, num_output, contributions);
|
||||
generate_output(marker_output, num_output, contributions);
|
||||
generate_output(rccl_output, num_output, contributions);
|
||||
@@ -1825,8 +1979,8 @@ tool_fini(void* /*tool_data*/)
|
||||
contributions,
|
||||
hip_output.get_generator(),
|
||||
hsa_output.get_generator(),
|
||||
kernel_dispatch_output.get_generator(),
|
||||
memory_copy_output.get_generator(),
|
||||
kernel_dispatch_with_stream_output.get_generator(),
|
||||
memory_copy_output_with_stream_output.get_generator(),
|
||||
counters_output.get_generator(),
|
||||
marker_output.get_generator(),
|
||||
scratch_memory_output.get_generator(),
|
||||
@@ -1847,8 +2001,8 @@ tool_fini(void* /*tool_data*/)
|
||||
agents_output,
|
||||
hip_output.get_generator(),
|
||||
hsa_output.get_generator(),
|
||||
kernel_dispatch_output.get_generator(),
|
||||
memory_copy_output.get_generator(),
|
||||
kernel_dispatch_with_stream_output.get_generator(),
|
||||
memory_copy_output_with_stream_output.get_generator(),
|
||||
marker_output.get_generator(),
|
||||
scratch_memory_output.get_generator(),
|
||||
rccl_output.get_generator(),
|
||||
@@ -1861,8 +2015,8 @@ tool_fini(void* /*tool_data*/)
|
||||
{
|
||||
auto hip_elem_data = hip_output.load_all();
|
||||
auto hsa_elem_data = hsa_output.load_all();
|
||||
auto kernel_dispatch_elem_data = kernel_dispatch_output.load_all();
|
||||
auto memory_copy_elem_data = memory_copy_output.load_all();
|
||||
auto kernel_dispatch_elem_data = kernel_dispatch_with_stream_output.load_all();
|
||||
auto memory_copy_elem_data = memory_copy_output_with_stream_output.load_all();
|
||||
auto marker_elem_data = marker_output.load_all();
|
||||
auto scratch_memory_elem_data = scratch_memory_output.load_all();
|
||||
auto rccl_elem_data = rccl_output.load_all();
|
||||
@@ -1890,13 +2044,12 @@ tool_fini(void* /*tool_data*/)
|
||||
{
|
||||
tool::generate_stats(tool::get_config(), *tool_metadata, contributions);
|
||||
}
|
||||
|
||||
auto destroy_output = [](auto& _buffered_output_v) { _buffered_output_v.destroy(); };
|
||||
|
||||
destroy_output(kernel_dispatch_output);
|
||||
destroy_output(kernel_dispatch_with_stream_output);
|
||||
destroy_output(hsa_output);
|
||||
destroy_output(hip_output);
|
||||
destroy_output(memory_copy_output);
|
||||
destroy_output(memory_copy_output_with_stream_output);
|
||||
destroy_output(memory_allocation_output);
|
||||
destroy_output(marker_output);
|
||||
destroy_output(counters_output);
|
||||
@@ -1907,6 +2060,17 @@ tool_fini(void* /*tool_data*/)
|
||||
destroy_output(rocdecode_output);
|
||||
destroy_output(rocjpeg_output);
|
||||
|
||||
if(kernel_rename_and_stream_display_pair_dtors != nullptr)
|
||||
{
|
||||
for(auto& itr : *kernel_rename_and_stream_display_pair_dtors)
|
||||
{
|
||||
delete itr;
|
||||
itr = nullptr;
|
||||
}
|
||||
delete kernel_rename_and_stream_display_pair_dtors;
|
||||
kernel_rename_and_stream_display_pair_dtors = nullptr;
|
||||
}
|
||||
|
||||
if(destructors)
|
||||
{
|
||||
for(const auto& itr : *destructors)
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
#include "lib/rocprofiler-sdk/context/context.hpp"
|
||||
#include "lib/rocprofiler-sdk/context/domain.hpp"
|
||||
#include "lib/rocprofiler-sdk/hip/hip.hpp"
|
||||
#include "lib/rocprofiler-sdk/hip/stream.hpp"
|
||||
#include "lib/rocprofiler-sdk/hsa/async_copy.hpp"
|
||||
#include "lib/rocprofiler-sdk/hsa/hsa.hpp"
|
||||
#include "lib/rocprofiler-sdk/hsa/memory_allocation.hpp"
|
||||
@@ -97,6 +98,7 @@ ROCPROFILER_BUFFER_TRACING_KIND_STRING(OMPT)
|
||||
ROCPROFILER_BUFFER_TRACING_KIND_STRING(RUNTIME_INITIALIZATION)
|
||||
ROCPROFILER_BUFFER_TRACING_KIND_STRING(ROCDECODE_API)
|
||||
ROCPROFILER_BUFFER_TRACING_KIND_STRING(ROCJPEG_API)
|
||||
ROCPROFILER_BUFFER_TRACING_KIND_STRING(HIP_STREAM_API)
|
||||
|
||||
template <size_t Idx, size_t... Tail>
|
||||
std::pair<const char*, size_t>
|
||||
@@ -305,6 +307,11 @@ rocprofiler_query_buffer_tracing_kind_operation_name(rocprofiler_buffer_tracing_
|
||||
val = rocprofiler::rocjpeg::name_by_id<ROCPROFILER_ROCJPEG_TABLE_ID_CORE>(operation);
|
||||
break;
|
||||
}
|
||||
case ROCPROFILER_BUFFER_TRACING_HIP_STREAM_API:
|
||||
{
|
||||
val = rocprofiler::hip::stream::name_by_id(operation);
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
if(!val)
|
||||
@@ -446,6 +453,11 @@ rocprofiler_iterate_buffer_tracing_kind_operations(
|
||||
ops = rocprofiler::rocjpeg::get_ids<ROCPROFILER_ROCJPEG_TABLE_ID_CORE>();
|
||||
break;
|
||||
}
|
||||
case ROCPROFILER_BUFFER_TRACING_HIP_STREAM_API:
|
||||
{
|
||||
ops = rocprofiler::hip::stream::get_ids();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for(const auto& itr : ops)
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
#include "lib/rocprofiler-sdk/context/context.hpp"
|
||||
#include "lib/rocprofiler-sdk/context/domain.hpp"
|
||||
#include "lib/rocprofiler-sdk/hip/hip.hpp"
|
||||
#include "lib/rocprofiler-sdk/hip/stream.hpp"
|
||||
#include "lib/rocprofiler-sdk/hsa/async_copy.hpp"
|
||||
#include "lib/rocprofiler-sdk/hsa/hsa.hpp"
|
||||
#include "lib/rocprofiler-sdk/hsa/memory_allocation.hpp"
|
||||
@@ -94,6 +95,7 @@ ROCPROFILER_CALLBACK_TRACING_KIND_STRING(OMPT)
|
||||
ROCPROFILER_CALLBACK_TRACING_KIND_STRING(RUNTIME_INITIALIZATION)
|
||||
ROCPROFILER_CALLBACK_TRACING_KIND_STRING(ROCDECODE_API)
|
||||
ROCPROFILER_CALLBACK_TRACING_KIND_STRING(ROCJPEG_API)
|
||||
ROCPROFILER_CALLBACK_TRACING_KIND_STRING(HIP_STREAM_API)
|
||||
|
||||
template <size_t Idx, size_t... Tail>
|
||||
std::pair<const char*, size_t>
|
||||
@@ -288,6 +290,11 @@ rocprofiler_query_callback_tracing_kind_operation_name(rocprofiler_callback_trac
|
||||
val = rocprofiler::rocjpeg::name_by_id<ROCPROFILER_ROCJPEG_TABLE_ID_CORE>(operation);
|
||||
break;
|
||||
}
|
||||
case ROCPROFILER_CALLBACK_TRACING_HIP_STREAM_API:
|
||||
{
|
||||
val = rocprofiler::hip::stream::name_by_id(operation);
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
if(!val)
|
||||
@@ -427,6 +434,11 @@ rocprofiler_iterate_callback_tracing_kind_operations(
|
||||
ops = rocprofiler::rocjpeg::get_ids<ROCPROFILER_ROCJPEG_TABLE_ID_CORE>();
|
||||
break;
|
||||
}
|
||||
case ROCPROFILER_CALLBACK_TRACING_HIP_STREAM_API:
|
||||
{
|
||||
ops = rocprofiler::hip::stream::get_ids();
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
for(const auto& itr : ops)
|
||||
@@ -571,6 +583,7 @@ rocprofiler_iterate_callback_tracing_kind_operation_args(
|
||||
case ROCPROFILER_CALLBACK_TRACING_RUNTIME_INITIALIZATION:
|
||||
case ROCPROFILER_CALLBACK_TRACING_ROCDECODE_API:
|
||||
case ROCPROFILER_CALLBACK_TRACING_ROCJPEG_API:
|
||||
case ROCPROFILER_CALLBACK_TRACING_HIP_STREAM_API:
|
||||
{
|
||||
return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
set(ROCPROFILER_LIB_HIP_SOURCES abi.cpp hip.cpp)
|
||||
set(ROCPROFILER_LIB_HIP_HEADERS defines.hpp hip.hpp utils.hpp)
|
||||
set(ROCPROFILER_LIB_HIP_SOURCES abi.cpp hip.cpp stream.cpp)
|
||||
set(ROCPROFILER_LIB_HIP_HEADERS defines.hpp hip.hpp stream.hpp utils.hpp)
|
||||
|
||||
target_sources(rocprofiler-sdk-object-library PRIVATE ${ROCPROFILER_LIB_HIP_SOURCES}
|
||||
${ROCPROFILER_LIB_HIP_HEADERS})
|
||||
|
||||
@@ -46,6 +46,8 @@
|
||||
using domain_type::retval_type; \
|
||||
using domain_type::callback_data_type; \
|
||||
\
|
||||
static constexpr auto get_args_type() { return common::mpl::type_list<>{}; } \
|
||||
\
|
||||
static constexpr auto offset() \
|
||||
{ \
|
||||
return offsetof(hip_table_lookup<table_idx>::type, HIP_FUNC_PTR); \
|
||||
@@ -169,6 +171,12 @@
|
||||
return &base_type::functor<RetT, Args...>; \
|
||||
} \
|
||||
\
|
||||
static constexpr auto get_args_type() \
|
||||
{ \
|
||||
using func_t = decltype(get_table_func()); \
|
||||
return common::mpl::function_args_t<func_t>{}; \
|
||||
} \
|
||||
\
|
||||
static std::vector<void*> as_arg_addr(callback_data_type trace_data) \
|
||||
{ \
|
||||
return std::vector<void*>{ \
|
||||
|
||||
@@ -0,0 +1,512 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "lib/rocprofiler-sdk/hip/stream.hpp"
|
||||
#include "lib/common/container/small_vector.hpp"
|
||||
#include "lib/common/defines.hpp"
|
||||
#include "lib/common/scope_destructor.hpp"
|
||||
#include "lib/common/static_object.hpp"
|
||||
#include "lib/common/static_tl_object.hpp"
|
||||
#include "lib/common/synchronized.hpp"
|
||||
#include "lib/common/utility.hpp"
|
||||
#include "lib/rocprofiler-sdk/buffer.hpp"
|
||||
#include "lib/rocprofiler-sdk/context/context.hpp"
|
||||
#include "lib/rocprofiler-sdk/hip/utils.hpp"
|
||||
#include "lib/rocprofiler-sdk/hsa/queue_controller.hpp"
|
||||
#include "lib/rocprofiler-sdk/registration.hpp"
|
||||
#include "lib/rocprofiler-sdk/tracing/tracing.hpp"
|
||||
|
||||
#include <rocprofiler-sdk/buffer.h>
|
||||
#include <rocprofiler-sdk/callback_tracing.h>
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
#include <rocprofiler-sdk/hip/runtime_api_id.h>
|
||||
#include <rocprofiler-sdk/hip/table_id.h>
|
||||
|
||||
#include <hip/driver_types.h>
|
||||
#include <hip/hip_runtime_api.h>
|
||||
// must be included after runtime api
|
||||
#include <hip/hip_deprecated.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
|
||||
#define ROCPROFILER_LIB_ROCPROFILER_HIP_HIP_CPP_IMPL 1
|
||||
|
||||
// template specializations
|
||||
#include "hip.def.cpp"
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace hip
|
||||
{
|
||||
namespace stream
|
||||
{
|
||||
using stream_map_t = std::unordered_map<hipStream_t, rocprofiler_stream_id_t>;
|
||||
|
||||
namespace
|
||||
{
|
||||
auto*
|
||||
get_stream_map()
|
||||
{
|
||||
static auto*& _v = common::static_object<common::Synchronized<stream_map_t>>::construct();
|
||||
return _v;
|
||||
}
|
||||
|
||||
auto
|
||||
add_stream(hipStream_t stream)
|
||||
{
|
||||
return get_stream_map()->wlock(
|
||||
[](stream_map_t& _data, hipStream_t _stream) {
|
||||
if(_data.count(_stream) == 0)
|
||||
{
|
||||
auto idx = _data.size();
|
||||
ROCP_INFO << fmt::format("hipStream_t={} :: id={}.handle={}{}",
|
||||
static_cast<void*>(_stream),
|
||||
'{',
|
||||
idx,
|
||||
'}');
|
||||
_data.emplace(_stream, rocprofiler_stream_id_t{.handle = idx});
|
||||
}
|
||||
return _data.at(_stream);
|
||||
},
|
||||
stream);
|
||||
}
|
||||
|
||||
auto
|
||||
get_stream_id(hipStream_t stream)
|
||||
{
|
||||
return get_stream_map()->rlock(
|
||||
[](const stream_map_t& _data, hipStream_t _stream) { return _data.at(_stream); }, stream);
|
||||
}
|
||||
|
||||
// Map rocprofiler_hip_stream_operation_t to respective name
|
||||
template <size_t OpIdx>
|
||||
struct hip_stream_operation_name;
|
||||
|
||||
#define HIP_STREAM_OPERATION_NAME(ENUM) \
|
||||
template <> \
|
||||
struct hip_stream_operation_name<ROCPROFILER_HIP_STREAM_##ENUM> \
|
||||
{ \
|
||||
static constexpr auto name = "HIP_STREAM_" #ENUM; \
|
||||
static constexpr auto operation_idx = ROCPROFILER_HIP_STREAM_##ENUM; \
|
||||
};
|
||||
|
||||
HIP_STREAM_OPERATION_NAME(NONE)
|
||||
HIP_STREAM_OPERATION_NAME(CREATE)
|
||||
HIP_STREAM_OPERATION_NAME(DESTROY)
|
||||
HIP_STREAM_OPERATION_NAME(SET)
|
||||
#undef HIP_STREAM_OPERATION_NAME
|
||||
|
||||
template <size_t OpIdx, size_t... OpIdxTail>
|
||||
const char*
|
||||
name_by_id(const uint32_t id, std::index_sequence<OpIdx, OpIdxTail...>)
|
||||
{
|
||||
if(OpIdx == id) return hip_stream_operation_name<OpIdx>::name;
|
||||
|
||||
if constexpr(sizeof...(OpIdxTail) > 0)
|
||||
return name_by_id(id, std::index_sequence<OpIdxTail...>{});
|
||||
else
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
template <size_t OpIdx, size_t... OpIdxTail>
|
||||
void
|
||||
get_ids(std::vector<uint32_t>& _id_list, std::index_sequence<OpIdx, OpIdxTail...>)
|
||||
{
|
||||
auto _idx = hip_stream_operation_name<OpIdx>::operation_idx;
|
||||
if(_idx < ROCPROFILER_HIP_STREAM_LAST) _id_list.emplace_back(_idx);
|
||||
|
||||
if constexpr(sizeof...(OpIdxTail) > 0) get_ids(_id_list, std::index_sequence<OpIdxTail...>{});
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
template <size_t TableIdx,
|
||||
size_t OpIdx,
|
||||
typename RetT,
|
||||
typename... Args,
|
||||
typename FuncT = RetT (*)(Args...)>
|
||||
FuncT create_write_functor(RetT (*func)(Args...))
|
||||
{
|
||||
using function_type = FuncT;
|
||||
|
||||
static function_type next_func = func;
|
||||
|
||||
return [](Args... args) -> RetT {
|
||||
using function_args_type = common::mpl::type_list<Args...>;
|
||||
|
||||
using callback_api_data_t = rocprofiler_callback_tracing_stream_handle_data_t;
|
||||
|
||||
constexpr auto external_corr_id_domain_idx =
|
||||
hip_domain_info<TableIdx>::external_correlation_id_domain_idx;
|
||||
|
||||
auto thr_id = common::get_tid();
|
||||
auto callback_contexts = tracing::callback_context_data_vec_t{};
|
||||
auto buffered_contexts = tracing::buffered_context_data_vec_t{};
|
||||
auto external_corr_ids = tracing::external_correlation_id_map_t{};
|
||||
|
||||
tracing::populate_contexts(ROCPROFILER_CALLBACK_TRACING_HIP_STREAM_API,
|
||||
ROCPROFILER_BUFFER_TRACING_HIP_STREAM_API,
|
||||
callback_contexts,
|
||||
buffered_contexts,
|
||||
external_corr_ids);
|
||||
assert(buffered_contexts.empty() && "Stream tracing should not have any buffered contexts");
|
||||
|
||||
auto tracer_data = common::init_public_api_struct(callback_api_data_t{});
|
||||
auto internal_corr_id = 0;
|
||||
|
||||
constexpr auto stream_idx = common::mpl::index_of<hipStream_t*, function_args_type>::value;
|
||||
auto stream = std::get<stream_idx>(std::make_tuple(std::forward<Args>(args)...));
|
||||
|
||||
tracing::update_external_correlation_ids(
|
||||
external_corr_ids, thr_id, external_corr_id_domain_idx);
|
||||
|
||||
auto _ret = next_func(std::forward<Args>(args)...);
|
||||
if(!callback_contexts.empty())
|
||||
{
|
||||
if(stream)
|
||||
{
|
||||
tracer_data.stream_id = add_stream(*stream);
|
||||
}
|
||||
tracing::execute_phase_none_callbacks(callback_contexts,
|
||||
thr_id,
|
||||
internal_corr_id,
|
||||
external_corr_ids,
|
||||
ROCPROFILER_CALLBACK_TRACING_HIP_STREAM_API,
|
||||
ROCPROFILER_HIP_STREAM_CREATE,
|
||||
tracer_data);
|
||||
}
|
||||
|
||||
if constexpr(!std::is_void<RetT>::value) return _ret;
|
||||
};
|
||||
}
|
||||
|
||||
template <size_t TableIdx,
|
||||
size_t OpIdx,
|
||||
typename RetT,
|
||||
typename... Args,
|
||||
typename FuncT = RetT (*)(Args...)>
|
||||
FuncT create_destroy_functor(RetT (*func)(Args...))
|
||||
{
|
||||
using function_type = FuncT;
|
||||
|
||||
static function_type next_func = func;
|
||||
|
||||
return [](Args... args) -> RetT {
|
||||
using function_args_type = common::mpl::type_list<Args...>;
|
||||
constexpr auto stream_idx = common::mpl::index_of<hipStream_t, function_args_type>::value;
|
||||
|
||||
using callback_api_data_t = rocprofiler_callback_tracing_stream_handle_data_t;
|
||||
|
||||
constexpr auto external_corr_id_domain_idx =
|
||||
hip_domain_info<TableIdx>::external_correlation_id_domain_idx;
|
||||
|
||||
auto thr_id = common::get_tid();
|
||||
auto callback_contexts = tracing::callback_context_data_vec_t{};
|
||||
auto buffered_contexts = tracing::buffered_context_data_vec_t{};
|
||||
auto external_corr_ids = tracing::external_correlation_id_map_t{};
|
||||
|
||||
tracing::populate_contexts(ROCPROFILER_CALLBACK_TRACING_HIP_STREAM_API,
|
||||
ROCPROFILER_BUFFER_TRACING_HIP_STREAM_API,
|
||||
callback_contexts,
|
||||
buffered_contexts,
|
||||
external_corr_ids);
|
||||
assert(buffered_contexts.empty() && "Stream tracing should not have any buffered contexts");
|
||||
|
||||
auto tracer_data = common::init_public_api_struct(callback_api_data_t{});
|
||||
auto internal_corr_id = 0;
|
||||
|
||||
auto stream = std::get<stream_idx>(std::make_tuple(std::forward<Args>(args)...));
|
||||
|
||||
tracing::update_external_correlation_ids(
|
||||
external_corr_ids, thr_id, external_corr_id_domain_idx);
|
||||
|
||||
auto _ret = next_func(std::forward<Args>(args)...);
|
||||
|
||||
if(!callback_contexts.empty())
|
||||
{
|
||||
tracer_data.stream_id = get_stream_id(stream);
|
||||
tracing::execute_phase_none_callbacks(callback_contexts,
|
||||
thr_id,
|
||||
internal_corr_id,
|
||||
external_corr_ids,
|
||||
ROCPROFILER_CALLBACK_TRACING_HIP_STREAM_API,
|
||||
ROCPROFILER_HIP_STREAM_DESTROY,
|
||||
tracer_data);
|
||||
}
|
||||
|
||||
if constexpr(!std::is_void<RetT>::value) return _ret;
|
||||
};
|
||||
}
|
||||
|
||||
template <size_t TableIdx,
|
||||
size_t OpIdx,
|
||||
typename RetT,
|
||||
typename... Args,
|
||||
typename FuncT = RetT (*)(Args...)>
|
||||
FuncT create_read_functor(RetT (*func)(Args...))
|
||||
{
|
||||
using function_type = FuncT;
|
||||
|
||||
static function_type next_func = func;
|
||||
|
||||
return [](Args... args) -> RetT {
|
||||
using function_args_type = common::mpl::type_list<Args...>;
|
||||
constexpr auto stream_idx = common::mpl::index_of<hipStream_t, function_args_type>::value;
|
||||
|
||||
using callback_api_data_t = rocprofiler_callback_tracing_stream_handle_data_t;
|
||||
|
||||
constexpr auto external_corr_id_domain_idx =
|
||||
hip_domain_info<TableIdx>::external_correlation_id_domain_idx;
|
||||
|
||||
auto thr_id = common::get_tid();
|
||||
auto callback_contexts = tracing::callback_context_data_vec_t{};
|
||||
auto buffered_contexts = tracing::buffered_context_data_vec_t{};
|
||||
auto external_corr_ids = tracing::external_correlation_id_map_t{};
|
||||
|
||||
tracing::populate_contexts(ROCPROFILER_CALLBACK_TRACING_HIP_STREAM_API,
|
||||
ROCPROFILER_BUFFER_TRACING_HIP_STREAM_API,
|
||||
callback_contexts,
|
||||
buffered_contexts,
|
||||
external_corr_ids);
|
||||
|
||||
assert(buffered_contexts.empty() && "Stream tracing should not have any buffered contexts");
|
||||
|
||||
auto tracer_data = common::init_public_api_struct(callback_api_data_t{});
|
||||
auto internal_corr_id = 0;
|
||||
|
||||
auto stream = std::get<stream_idx>(std::make_tuple(std::forward<Args>(args)...));
|
||||
|
||||
if(!callback_contexts.empty())
|
||||
{
|
||||
tracer_data.stream_id = get_stream_id(stream);
|
||||
tracing::execute_phase_enter_callbacks(callback_contexts,
|
||||
thr_id,
|
||||
internal_corr_id,
|
||||
external_corr_ids,
|
||||
ROCPROFILER_CALLBACK_TRACING_HIP_STREAM_API,
|
||||
ROCPROFILER_HIP_STREAM_SET,
|
||||
tracer_data);
|
||||
}
|
||||
|
||||
tracing::update_external_correlation_ids(
|
||||
external_corr_ids, thr_id, external_corr_id_domain_idx);
|
||||
|
||||
auto _ret = next_func(std::forward<Args>(args)...);
|
||||
|
||||
if(!callback_contexts.empty())
|
||||
{
|
||||
tracing::execute_phase_exit_callbacks(callback_contexts,
|
||||
external_corr_ids,
|
||||
ROCPROFILER_CALLBACK_TRACING_HIP_STREAM_API,
|
||||
ROCPROFILER_HIP_STREAM_SET,
|
||||
tracer_data);
|
||||
}
|
||||
|
||||
if constexpr(!std::is_void<RetT>::value) return _ret;
|
||||
};
|
||||
}
|
||||
} // namespace stream
|
||||
} // namespace hip
|
||||
} // namespace rocprofiler
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace hip
|
||||
{
|
||||
namespace stream
|
||||
{
|
||||
namespace
|
||||
{
|
||||
bool
|
||||
enable_stream_stack()
|
||||
{
|
||||
if(hsa::enable_queue_intercept()) return true;
|
||||
|
||||
for(const auto& itr : context::get_registered_contexts())
|
||||
{
|
||||
if(itr->is_tracing(ROCPROFILER_CALLBACK_TRACING_MEMORY_COPY) ||
|
||||
itr->is_tracing(ROCPROFILER_CALLBACK_TRACING_HIP_RUNTIME_API) ||
|
||||
itr->is_tracing(ROCPROFILER_CALLBACK_TRACING_HIP_COMPILER_API) ||
|
||||
itr->is_tracing(ROCPROFILER_BUFFER_TRACING_HIP_STREAM_API) ||
|
||||
itr->is_tracing(ROCPROFILER_BUFFER_TRACING_MEMORY_COPY) ||
|
||||
itr->is_tracing(ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API) ||
|
||||
itr->is_tracing(ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
enable_compiler_stream_stack()
|
||||
{
|
||||
for(const auto& itr : context::get_registered_contexts())
|
||||
{
|
||||
if(itr->is_tracing(ROCPROFILER_CALLBACK_TRACING_HIP_COMPILER_API) ||
|
||||
itr->is_tracing(ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
template <size_t TableIdx, typename Tp, size_t OpIdx>
|
||||
void
|
||||
update_table(Tp* _orig, std::integral_constant<size_t, OpIdx>)
|
||||
{
|
||||
using table_type = typename hip_table_lookup<TableIdx>::type;
|
||||
using info_type = hip_api_info<TableIdx, OpIdx>;
|
||||
using function_args_type = decltype(info_type::get_args_type());
|
||||
|
||||
if constexpr(std::is_same<table_type, Tp>::value &&
|
||||
(common::mpl::is_one_of<hipStream_t, function_args_type>::value ||
|
||||
common::mpl::is_one_of<hipStream_t*, function_args_type>::value))
|
||||
{
|
||||
auto _info = info_type{};
|
||||
|
||||
// make sure we don't access a field that doesn't exist in input table
|
||||
if(_info.offset() >= _orig->size) return;
|
||||
|
||||
ROCP_TRACE << "updating table entry for " << _info.name;
|
||||
|
||||
constexpr auto num_args = function_args_type::size();
|
||||
|
||||
if constexpr(common::mpl::is_one_of<hipStream_t, function_args_type>::value)
|
||||
{
|
||||
constexpr auto stream_idx =
|
||||
common::mpl::index_of<hipStream_t, function_args_type>::value;
|
||||
constexpr auto rstream_idx =
|
||||
common::mpl::index_of<hipStream_t, common::mpl::reverse<function_args_type>>::value;
|
||||
|
||||
// index_of finds the first argument of that type. So find the first and last
|
||||
// arg of the given type and make sure it resolves to the same distance
|
||||
assert(stream_idx == (num_args - rstream_idx - 1) &&
|
||||
"function has more than one stream argument");
|
||||
|
||||
// don't wrap the compiler API functions unless HIP compiler API tracing is enabled
|
||||
if constexpr(TableIdx == ROCPROFILER_HIP_TABLE_ID_Compiler)
|
||||
{
|
||||
if(!enable_compiler_stream_stack()) return;
|
||||
}
|
||||
|
||||
// 1. get the sub-table containing the function pointer in original table
|
||||
// 2. get reference to function pointer in sub-table in original table
|
||||
// 3. update function pointer with wrapper
|
||||
auto& _table = _info.get_table(_orig);
|
||||
auto& _func = _info.get_table_func(_table);
|
||||
constexpr auto is_hip_destroy_func =
|
||||
std::is_same<decltype(info_type::operation_idx),
|
||||
rocprofiler_hip_runtime_api_id_t>::value &&
|
||||
(static_cast<rocprofiler_hip_runtime_api_id_t>(info_type::operation_idx) ==
|
||||
ROCPROFILER_HIP_RUNTIME_API_ID_hipStreamDestroy);
|
||||
if constexpr(is_hip_destroy_func)
|
||||
{
|
||||
_func = create_destroy_functor<TableIdx, OpIdx>(_func);
|
||||
}
|
||||
else
|
||||
{
|
||||
_func = create_read_functor<TableIdx, OpIdx>(_func);
|
||||
}
|
||||
}
|
||||
else if constexpr(common::mpl::is_one_of<hipStream_t*, function_args_type>::value)
|
||||
{
|
||||
constexpr auto stream_idx =
|
||||
common::mpl::index_of<hipStream_t*, function_args_type>::value;
|
||||
constexpr auto rstream_idx =
|
||||
common::mpl::index_of<hipStream_t*,
|
||||
common::mpl::reverse<function_args_type>>::value;
|
||||
|
||||
// index_of finds the first argument of that type. So find the first and last
|
||||
// arg of the given type and make sure it resolves to the same distance
|
||||
assert(stream_idx == (num_args - rstream_idx - 1) &&
|
||||
"function has more than one stream argument");
|
||||
|
||||
// don't wrap the compiler API functions unless HIP compiler API tracing is enabled
|
||||
if constexpr(TableIdx == ROCPROFILER_HIP_TABLE_ID_Compiler)
|
||||
{
|
||||
if(!enable_compiler_stream_stack()) return;
|
||||
}
|
||||
|
||||
// 1. get the sub-table containing the function pointer in original table
|
||||
// 2. get reference to function pointer in sub-table in original table
|
||||
// 3. update function pointer with wrapper
|
||||
auto& _table = _info.get_table(_orig);
|
||||
auto& _func = _info.get_table_func(_table);
|
||||
_func = create_write_functor<TableIdx, OpIdx>(_func);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <size_t TableIdx, typename Tp, size_t OpIdx, size_t... OpIdxTail>
|
||||
void
|
||||
update_table(Tp* _orig, std::index_sequence<OpIdx, OpIdxTail...>)
|
||||
{
|
||||
update_table<TableIdx>(_orig, std::integral_constant<size_t, OpIdx>{});
|
||||
if constexpr(sizeof...(OpIdxTail) > 0)
|
||||
update_table<TableIdx>(_orig, std::index_sequence<OpIdxTail...>{});
|
||||
}
|
||||
} // namespace
|
||||
|
||||
const char*
|
||||
name_by_id(uint32_t id)
|
||||
{
|
||||
return name_by_id(id, std::make_index_sequence<ROCPROFILER_HIP_STREAM_LAST>{});
|
||||
}
|
||||
|
||||
std::vector<uint32_t>
|
||||
get_ids()
|
||||
{
|
||||
constexpr auto last_id = ROCPROFILER_HIP_STREAM_LAST;
|
||||
auto _data = std::vector<uint32_t>{};
|
||||
_data.reserve(last_id);
|
||||
get_ids(_data, std::make_index_sequence<ROCPROFILER_HIP_STREAM_LAST>{});
|
||||
return _data;
|
||||
}
|
||||
|
||||
template <typename TableT>
|
||||
void
|
||||
update_table(TableT* _orig)
|
||||
{
|
||||
add_stream(nullptr);
|
||||
|
||||
if(!enable_stream_stack()) return;
|
||||
|
||||
constexpr auto TableIdx = hip_table_id_lookup<TableT>::value;
|
||||
if(_orig)
|
||||
update_table<TableIdx>(_orig, std::make_index_sequence<hip_domain_info<TableIdx>::last>{});
|
||||
}
|
||||
|
||||
using hip_api_data_t = rocprofiler_callback_tracing_hip_api_data_t;
|
||||
using hip_op_args_cb_t = rocprofiler_callback_tracing_operation_args_cb_t;
|
||||
|
||||
#define INSTANTIATE_HIP_TABLE_FUNC(TABLE_TYPE, TABLE_IDX) \
|
||||
template void update_table<TABLE_TYPE>(TABLE_TYPE * _tbl);
|
||||
|
||||
INSTANTIATE_HIP_TABLE_FUNC(hip_runtime_api_table_t, ROCPROFILER_HIP_TABLE_ID_Runtime)
|
||||
INSTANTIATE_HIP_TABLE_FUNC(hip_compiler_api_table_t, ROCPROFILER_HIP_TABLE_ID_Compiler)
|
||||
} // namespace stream
|
||||
} // namespace hip
|
||||
} // namespace rocprofiler
|
||||
@@ -0,0 +1,56 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <rocprofiler-sdk/rocprofiler.h>
|
||||
|
||||
#include <hip/hip_version.h>
|
||||
#include <hip/amd_detail/hip_api_trace.hpp>
|
||||
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace hip
|
||||
{
|
||||
namespace stream
|
||||
{
|
||||
using hip_compiler_api_table_t = HipCompilerDispatchTable;
|
||||
using hip_runtime_api_table_t = HipDispatchTable;
|
||||
|
||||
rocprofiler_stream_id_t
|
||||
get_stream_id();
|
||||
|
||||
const char*
|
||||
name_by_id(uint32_t id);
|
||||
|
||||
std::vector<uint32_t>
|
||||
get_ids();
|
||||
|
||||
template <typename TableT>
|
||||
void
|
||||
update_table(TableT* _orig);
|
||||
} // namespace stream
|
||||
} // namespace hip
|
||||
} // namespace rocprofiler
|
||||
+27
-25
@@ -274,31 +274,7 @@ QueueController::init(CoreApiTable& core_table, AmdExtTable& ext_table)
|
||||
}
|
||||
}
|
||||
|
||||
auto enable_intercepter = false;
|
||||
for(const auto& itr : context::get_registered_contexts())
|
||||
{
|
||||
constexpr auto expected_context_size = 216UL;
|
||||
static_assert(
|
||||
sizeof(context::context) == expected_context_size,
|
||||
"If you added a new field to context struct, make sure there is a check here if it "
|
||||
"requires queue interception. Once you have done so, increment expected_context_size");
|
||||
|
||||
bool has_kernel_tracing = itr->is_tracing(ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH) ||
|
||||
itr->is_tracing(ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH);
|
||||
|
||||
bool has_scratch_reporting = itr->is_tracing(ROCPROFILER_CALLBACK_TRACING_SCRATCH_MEMORY) ||
|
||||
itr->is_tracing(ROCPROFILER_BUFFER_TRACING_SCRATCH_MEMORY);
|
||||
|
||||
if(itr->counter_collection || itr->pc_sampler || has_kernel_tracing ||
|
||||
has_scratch_reporting || itr->device_counter_collection || itr->agent_thread_trace ||
|
||||
itr->dispatch_thread_trace)
|
||||
{
|
||||
enable_intercepter = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if(enable_intercepter)
|
||||
if(enable_queue_intercept())
|
||||
{
|
||||
core_table.hsa_queue_create_fn = hsa::create_queue;
|
||||
core_table.hsa_queue_destroy_fn = hsa::destroy_queue;
|
||||
@@ -475,6 +451,32 @@ get_queue_controller()
|
||||
return controller;
|
||||
}
|
||||
|
||||
bool
|
||||
enable_queue_intercept()
|
||||
{
|
||||
for(const auto& itr : context::get_registered_contexts())
|
||||
{
|
||||
constexpr auto expected_context_size = 216UL;
|
||||
static_assert(
|
||||
sizeof(context::context) == expected_context_size,
|
||||
"If you added a new field to context struct, make sure there is a check here if it "
|
||||
"requires queue interception. Once you have done so, increment expected_context_size");
|
||||
|
||||
bool has_kernel_tracing = itr->is_tracing(ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH) ||
|
||||
itr->is_tracing(ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH);
|
||||
|
||||
bool has_scratch_reporting = itr->is_tracing(ROCPROFILER_CALLBACK_TRACING_SCRATCH_MEMORY) ||
|
||||
itr->is_tracing(ROCPROFILER_BUFFER_TRACING_SCRATCH_MEMORY);
|
||||
|
||||
if(itr->counter_collection || itr->pc_sampler || has_kernel_tracing ||
|
||||
has_scratch_reporting || itr->device_counter_collection || itr->agent_thread_trace ||
|
||||
itr->dispatch_thread_trace)
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void
|
||||
queue_controller_init(HsaApiTable* table)
|
||||
{
|
||||
|
||||
@@ -123,6 +123,9 @@ private:
|
||||
QueueController*
|
||||
get_queue_controller();
|
||||
|
||||
bool
|
||||
enable_queue_intercept();
|
||||
|
||||
void
|
||||
queue_controller_init(HsaApiTable* table);
|
||||
|
||||
@@ -134,6 +137,5 @@ queue_controller_sync();
|
||||
|
||||
void
|
||||
profiler_serializer_kernel_completion_signal(hsa_signal_t queue_block_signal);
|
||||
|
||||
} // namespace hsa
|
||||
} // namespace rocprofiler
|
||||
|
||||
@@ -32,6 +32,7 @@
|
||||
#include "lib/rocprofiler-sdk/code_object/code_object.hpp"
|
||||
#include "lib/rocprofiler-sdk/context/context.hpp"
|
||||
#include "lib/rocprofiler-sdk/hip/hip.hpp"
|
||||
#include "lib/rocprofiler-sdk/hip/stream.hpp"
|
||||
#include "lib/rocprofiler-sdk/hsa/async_copy.hpp"
|
||||
#include "lib/rocprofiler-sdk/hsa/hsa.hpp"
|
||||
#include "lib/rocprofiler-sdk/hsa/memory_allocation.hpp"
|
||||
@@ -754,6 +755,8 @@ rocprofiler_is_finalized(int* status)
|
||||
rocprofiler_status_t
|
||||
rocprofiler_force_configure(rocprofiler_configure_func_t configure_func)
|
||||
{
|
||||
rocprofiler::registration::init_logging();
|
||||
|
||||
ROCP_INFO << "forcing rocprofiler configuration";
|
||||
|
||||
auto& forced_config = rocprofiler::registration::get_forced_configure();
|
||||
@@ -816,6 +819,9 @@ rocprofiler_set_api_table(const char* name,
|
||||
rocprofiler::runtime_init::initialize(
|
||||
ROCPROFILER_RUNTIME_INITIALIZATION_HIP, lib_version, lib_instance);
|
||||
|
||||
// install HIP stream deduction wrappers
|
||||
rocprofiler::hip::stream::update_table(hip_runtime_api_table);
|
||||
|
||||
// allow tools to install API wrappers
|
||||
rocprofiler::intercept_table::notify_intercept_table_registration(
|
||||
ROCPROFILER_HIP_RUNTIME_TABLE,
|
||||
@@ -841,6 +847,9 @@ rocprofiler_set_api_table(const char* name,
|
||||
// install rocprofiler API wrappers
|
||||
rocprofiler::hip::update_table(hip_compiler_api_table);
|
||||
|
||||
// install HIP stream deduction wrappers
|
||||
rocprofiler::hip::stream::update_table(hip_compiler_api_table);
|
||||
|
||||
// allow tools to install API wrappers
|
||||
rocprofiler::intercept_table::notify_intercept_table_registration(
|
||||
ROCPROFILER_HIP_COMPILER_TABLE,
|
||||
|
||||
@@ -41,4 +41,5 @@ add_subdirectory(rocjpeg-trace)
|
||||
if(TARGET att_decoder_testing)
|
||||
add_subdirectory(advanced-thread-trace)
|
||||
endif()
|
||||
add_subdirectory(hip-stream-display)
|
||||
add_subdirectory(agent-index)
|
||||
|
||||
@@ -0,0 +1,59 @@
|
||||
#
|
||||
#
|
||||
#
|
||||
cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR)
|
||||
|
||||
project(
|
||||
rocprofiler-tests-rocprofv3-hip-stream-display
|
||||
LANGUAGES CXX
|
||||
VERSION 0.0.0)
|
||||
|
||||
find_package(rocprofiler-sdk REQUIRED)
|
||||
find_package(rocDecode)
|
||||
|
||||
rocprofiler_configure_pytest_files(CONFIG pytest.ini COPY validate.py conftest.py)
|
||||
|
||||
string(REPLACE "LD_PRELOAD=" "ROCPROF_PRELOAD=" PRELOAD_ENV
|
||||
"${ROCPROFILER_MEMCHECK_PRELOAD_ENV}")
|
||||
|
||||
set(hip-stream-display-env "${PRELOAD_ENV}")
|
||||
|
||||
add_test(
|
||||
NAME rocprofv3-test-hip-stream-display-execute
|
||||
COMMAND
|
||||
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> --kernel-rename -d
|
||||
${CMAKE_CURRENT_BINARY_DIR}/%tag%-trace -o out --output-format json pftrace
|
||||
--log-level env -- $<TARGET_FILE:transpose>)
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-hip-stream-display-execute
|
||||
PROPERTIES TIMEOUT
|
||||
45
|
||||
LABELS
|
||||
"integration-tests"
|
||||
ENVIRONMENT
|
||||
"${hip-stream-display-env}"
|
||||
FAIL_REGULAR_EXPRESSION
|
||||
"threw an exception"
|
||||
DISABLED
|
||||
$<NOT:$<TARGET_EXISTS:transpose>>)
|
||||
|
||||
add_test(
|
||||
NAME rocprofv3-test-hip-stream-display-validate
|
||||
COMMAND
|
||||
${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py --json-input
|
||||
${CMAKE_CURRENT_BINARY_DIR}/hip-stream-display/out_results.json --pftrace-input
|
||||
${CMAKE_CURRENT_BINARY_DIR}/hip-stream-display/out_results.pftrace)
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-hip-stream-display-validate
|
||||
PROPERTIES TIMEOUT
|
||||
45
|
||||
LABELS
|
||||
"integration-tests"
|
||||
DEPENDS
|
||||
rocprofv3-test-hip-stream-display-execute
|
||||
FAIL_REGULAR_EXPRESSION
|
||||
"AssertionError"
|
||||
DISABLED
|
||||
$<NOT:$<TARGET_EXISTS:transpose>>)
|
||||
@@ -0,0 +1,42 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import csv
|
||||
import json
|
||||
import os
|
||||
import pytest
|
||||
|
||||
from rocprofiler_sdk.pytest_utils.dotdict import dotdict
|
||||
from rocprofiler_sdk.pytest_utils import collapse_dict_list
|
||||
from rocprofiler_sdk.pytest_utils.perfetto_reader import PerfettoReader
|
||||
|
||||
|
||||
def pytest_addoption(parser):
|
||||
parser.addoption(
|
||||
"--json-input",
|
||||
action="store",
|
||||
default="hip-stream-display/out_results.json",
|
||||
help="Input JSON",
|
||||
)
|
||||
parser.addoption(
|
||||
"--pftrace-input",
|
||||
action="store",
|
||||
default="hip-stream-display/out_results.pftrace",
|
||||
help="Input pftrace file",
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def json_data(request):
|
||||
filename = request.config.getoption("--json-input")
|
||||
if not os.path.isfile(filename):
|
||||
return pytest.skip("stream tracing unavailable")
|
||||
with open(filename, "r") as inp:
|
||||
return dotdict(collapse_dict_list(json.load(inp)))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def pftrace_data(request):
|
||||
filename = request.config.getoption("--pftrace-input")
|
||||
if not os.path.isfile(filename):
|
||||
return pytest.skip("stream tracing unavailable")
|
||||
return PerfettoReader(filename).read()[0]
|
||||
@@ -0,0 +1,5 @@
|
||||
|
||||
[pytest]
|
||||
addopts = --durations=20 -rA -s -vv
|
||||
testpaths = validate.py
|
||||
pythonpath = @ROCPROFILER_SDK_TESTS_BINARY_DIR@/pytest-packages
|
||||
@@ -0,0 +1,75 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
import pytest
|
||||
import json
|
||||
|
||||
from collections import defaultdict
|
||||
|
||||
|
||||
# helper function
|
||||
def node_exists(name, data, min_len=1):
|
||||
assert name in data
|
||||
assert data[name] is not None
|
||||
if isinstance(data[name], (list, tuple, dict, set)):
|
||||
assert len(data[name]) >= min_len
|
||||
|
||||
|
||||
def get_operation(record, kind_name, op_name=None):
|
||||
for idx, itr in enumerate(record["strings"]["buffer_records"]):
|
||||
if kind_name == itr["kind"]:
|
||||
if op_name is None:
|
||||
return idx, itr["operations"]
|
||||
else:
|
||||
for oidx, oname in enumerate(itr["operations"]):
|
||||
if op_name == oname:
|
||||
return oidx
|
||||
return None
|
||||
|
||||
|
||||
def test_stream_trace(json_data):
|
||||
data = json_data["rocprofiler-sdk-tool"]
|
||||
buffer_records = data["buffer_records"]
|
||||
|
||||
kernel_dispatch_data = buffer_records["kernel_dispatch"]
|
||||
memory_copies_data = buffer_records["memory_copies"]
|
||||
assert len(kernel_dispatch_data) > 0
|
||||
assert len(memory_copies_data) > 0
|
||||
|
||||
# Expect stream ids to be set to 1 or 2 for transpose executable
|
||||
expected_stream_ids = set((1, 2))
|
||||
# check buffering data
|
||||
for titr in (kernel_dispatch_data, memory_copies_data):
|
||||
for node in rocdecode_data:
|
||||
assert "size" in node
|
||||
assert "kind" in node
|
||||
assert "operation" in node
|
||||
assert "correlation_id" in node
|
||||
assert "end_timestamp" in node
|
||||
assert "start_timestamp" in node
|
||||
assert "thread_id" in node
|
||||
assert "_stream_id" in node
|
||||
|
||||
assert node.size > 0
|
||||
assert node.thread_id > 0
|
||||
assert node.start_timestamp > 0
|
||||
assert node.end_timestamp > 0
|
||||
assert node.start_timestamp < node.end_timestamp
|
||||
|
||||
assert node._stream_id.handle in expected_stream_ids
|
||||
|
||||
|
||||
def test_perfetto_data(pftrace_data, json_data):
|
||||
import rocprofiler_sdk.tests.rocprofv3 as rocprofv3
|
||||
|
||||
assert pftrace_data != None
|
||||
rocprofv3.test_perfetto_data(
|
||||
pftrace_data,
|
||||
json_data,
|
||||
("kernel", "memory_copy"),
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit_code = pytest.main(["-x", __file__] + sys.argv[1:])
|
||||
sys.exit(exit_code)
|
||||
@@ -137,7 +137,7 @@ def test_kernel_trace(json_data):
|
||||
external_corr_id = dispatch["correlation_id"]["external"]
|
||||
assert external_corr_id > 0
|
||||
|
||||
kernel_rename = get_kernel_rename(external_corr_id)
|
||||
kernel_rename = get_kernel_rename(dispatch.kernel_rename_val)
|
||||
assert kernel_rename is not None, f"{dispatch}"
|
||||
assert kernel_rename != kernel_name, f"{dispatch}"
|
||||
assert (
|
||||
|
||||
@@ -75,6 +75,7 @@ def test_kernel_trace(json_data):
|
||||
|
||||
def get_kernel_rename(corr_id):
|
||||
for itr in data.strings.correlation_id.external:
|
||||
print(itr)
|
||||
if itr.key == corr_id:
|
||||
return itr.value
|
||||
return None
|
||||
@@ -113,7 +114,7 @@ def test_kernel_trace(json_data):
|
||||
external_corr_id = dispatch["correlation_id"]["external"]
|
||||
assert external_corr_id > 0
|
||||
|
||||
kernel_rename = get_kernel_rename(external_corr_id)
|
||||
kernel_rename = get_kernel_rename(dispatch.kernel_rename_val)
|
||||
assert kernel_rename is not None, f"{dispatch}"
|
||||
assert kernel_rename != kernel_name, f"{dispatch}"
|
||||
assert (
|
||||
|
||||
Ссылка в новой задаче
Block a user