HIP Streams to Queues Translation (#235)

* rocprofiler_stream_id_t: opaque handle for a stream

- e.g. HIP stream
- the same HIP stream may map to different HSA queues at different points in the application
- added to:
  - rocprofiler_buffer_tracing_hip_api_record_t
  - rocprofiler_buffer_tracing_memory_copy_record_t
  - rocprofiler_callback_tracing_hip_api_data_t
  - rocprofiler_callback_tracing_memory_copy_data_t
---------

Co-authored-by: Jonathan R. Madsen <jonathanrmadsen@gmail.com>
Co-authored-by: Mark Meserve <mark.meserve@amd.com>
Co-authored-by: Elwazir, Ammar <Ammar.Elwazir@amd.com>
Co-authored-by: Ammar ELWazir <aelwazir@amd.com>
Co-authored-by: Jakaraddi, Manjunath <Manjunath.Jakaraddi@amd.com>
Co-authored-by: Bhardwaj, Gopesh <Gopesh.Bhardwaj@amd.com>
Co-authored-by: Nagaraj, Sriraksha <Sriraksha.Nagaraj@amd.com>
Co-authored-by: U, Srihari <Srihari.U@amd.com>
Co-authored-by: Madsen, Jonathan <Jonathan.Madsen@amd.com>
Co-authored-by: Welton, Benjamin <Benjamin.Welton@amd.com>
Co-authored-by: Benjamin Welton <ben@amd.com>
Co-authored-by: Indic, Vladimir <Vladimir.Indic@amd.com>
Co-authored-by: Benjamin Welton <bewelton@amd.com>

[ROCm/rocprofiler-sdk commit: ccd1e54293]
Этот коммит содержится в:
Trowbridge, Ian
2025-03-14 04:45:13 -05:00
коммит произвёл GitHub
родитель c08db2daa1
Коммит 7aeaffd871
50 изменённых файлов: 1812 добавлений и 220 удалений
+7
Просмотреть файл
@@ -542,6 +542,12 @@ For MPI applications (or other job launchers such as SLURM), place rocprofv3 ins
help="List available PC sampling configurations and metrics for counter collection. Backed by a valid YAML file. In earlier rocprof versions, this was known as --list-basic, --list-derived and --list-counters",
)
add_parser_bool_argument(
display_options,
"--group-by-queue",
help="For displaying the HIP streams that kernels and memory copy operations are submitted to rather than HSA queues.",
)
advanced_options = parser.add_argument_group("Advanced options")
advanced_options.add_argument(
@@ -1043,6 +1049,7 @@ def run(app_args, args, **kwargs):
["memory_copy_trace", "MEMORY_COPY_TRACE"],
["memory_allocation_trace", "MEMORY_ALLOCATION_TRACE"],
["scratch_memory_trace", "SCRATCH_MEMORY_TRACE"],
["group_by_queue", "GROUP_BY_QUEUE"],
]
).items():
val = getattr(args, f"{opt}")
+9 -9
Просмотреть файл
@@ -1,9 +1,9 @@
"Kind","Agent_Id","Queue_Id","Thread_Id","Dispatch_Id","Kernel_Id","Kernel_Name","Correlation_Id","Start_Timestamp","End_Timestamp","Private_Segment_Size","Group_Segment_Size","Workgroup_Size_X","Workgroup_Size_Y","Workgroup_Size_Z","Grid_Size_X","Grid_Size_Y","Grid_Size_Z"
"KERNEL_DISPATCH",1,1,69,1,16,"void addition_kernel<float>(float*, float const*, float const*, int, int)",1451,8819330200067564,8819330200116308,0,0,64,1,1,1024,1024,1
"KERNEL_DISPATCH",1,2,69,5,16,"void addition_kernel<float>(float*, float const*, float const*, int, int)",1484,8819330200118678,8819330200219573,0,0,64,1,1,1024,1024,1
"KERNEL_DISPATCH",1,1,69,2,19,"subtract_kernel(float*, float const*, float const*, int, int)",1459,8819330200120456,8819330200223721,0,0,64,1,1,1024,1024,1
"KERNEL_DISPATCH",1,3,69,9,16,"void addition_kernel<float>(float*, float const*, float const*, int, int)",1517,8819330200152902,8819330200283428,0,0,64,1,1,1024,1024,1
"KERNEL_DISPATCH",1,4,69,13,16,"void addition_kernel<float>(float*, float const*, float const*, int, int)",1550,8819330200187127,8819330200320468,0,0,64,1,1,1024,1024,1
"KERNEL_DISPATCH",1,2,69,6,19,"subtract_kernel(float*, float const*, float const*, int, int)",1492,8819330200225499,8819330200364618,0,0,64,1,1,1024,1024,1
"KERNEL_DISPATCH",1,1,69,3,18,"multiply_kernel(float*, float const*, float const*, int, int)",1467,8819330200229796,8819330200369359,0,0,64,1,1,1024,1024,1
"Kind","Agent_Id","Queue_Id","Stream_Id","Thread_Id","Dispatch_Id","Kernel_Id","Kernel_Name","Correlation_Id","Start_Timestamp","End_Timestamp","Private_Segment_Size","Group_Segment_Size","Workgroup_Size_X","Workgroup_Size_Y","Workgroup_Size_Z","Grid_Size_X","Grid_Size_Y","Grid_Size_Z"
"KERNEL_DISPATCH",2,1,1,21228,1,11,"void addition_kernel<float>(float*, float const*, float const*, int, int)",1937,2414192765353337,2414192765369494,0,0,64,1,1,1024,1024,1
"KERNEL_DISPATCH",2,1,1,21228,2,14,"subtract_kernel(float*, float const*, float const*, int, int)",1945,2414192765424862,2414192765435326,0,0,64,1,1,1024,1024,1
"KERNEL_DISPATCH",2,1,1,21228,3,13,"multiply_kernel(float*, float const*, float const*, int, int)",1953,2414192765487486,2414192765497669,0,0,64,1,1,1024,1024,1
"KERNEL_DISPATCH",2,1,1,21228,4,12,"divide_kernel(float*, float const*, float const*, int, int)",1961,2414192765545619,2414192765555722,0,0,64,1,1,1024,1024,1
"KERNEL_DISPATCH",2,2,2,21228,5,11,"void addition_kernel<float>(float*, float const*, float const*, int, int)",1969,2414192765608844,2414192765621674,0,0,64,1,1,1024,1024,1
"KERNEL_DISPATCH",2,2,2,21228,6,14,"subtract_kernel(float*, float const*, float const*, int, int)",1977,2414192765658519,2414192765669424,0,0,64,1,1,1024,1024,1
"KERNEL_DISPATCH",2,2,2,21228,7,13,"multiply_kernel(float*, float const*, float const*, int, int)",1985,2414192765715650,2414192765726795,0,0,64,1,1,1024,1024,1
"KERNEL_DISPATCH",2,2,2,21228,8,12,"divide_kernel(float*, float const*, float const*, int, int)",1993,2414192765773422,2414192765784969,0,0,64,1,1,1024,1024,1
1 Kind Agent_Id Queue_Id Stream_Id Thread_Id Dispatch_Id Kernel_Id Kernel_Name Correlation_Id Start_Timestamp End_Timestamp Private_Segment_Size Group_Segment_Size Workgroup_Size_X Workgroup_Size_Y Workgroup_Size_Z Grid_Size_X Grid_Size_Y Grid_Size_Z
2 KERNEL_DISPATCH 1 2 1 1 69 21228 1 16 11 void addition_kernel<float>(float*, float const*, float const*, int, int) 1451 1937 8819330200067564 2414192765353337 8819330200116308 2414192765369494 0 0 64 1 1 1024 1024 1
3 KERNEL_DISPATCH 1 2 2 1 1 69 21228 5 2 16 14 void addition_kernel<float>(float*, float const*, float const*, int, int) subtract_kernel(float*, float const*, float const*, int, int) 1484 1945 8819330200118678 2414192765424862 8819330200219573 2414192765435326 0 0 64 1 1 1024 1024 1
4 KERNEL_DISPATCH 1 2 1 1 69 21228 2 3 19 13 subtract_kernel(float*, float const*, float const*, int, int) multiply_kernel(float*, float const*, float const*, int, int) 1459 1953 8819330200120456 2414192765487486 8819330200223721 2414192765497669 0 0 64 1 1 1024 1024 1
5 KERNEL_DISPATCH 1 2 3 1 1 69 21228 9 4 16 12 void addition_kernel<float>(float*, float const*, float const*, int, int) divide_kernel(float*, float const*, float const*, int, int) 1517 1961 8819330200152902 2414192765545619 8819330200283428 2414192765555722 0 0 64 1 1 1024 1024 1
6 KERNEL_DISPATCH 1 2 4 2 2 69 21228 13 5 16 11 void addition_kernel<float>(float*, float const*, float const*, int, int) 1550 1969 8819330200187127 2414192765608844 8819330200320468 2414192765621674 0 0 64 1 1 1024 1024 1
7 KERNEL_DISPATCH 1 2 2 2 69 21228 6 19 14 subtract_kernel(float*, float const*, float const*, int, int) 1492 1977 8819330200225499 2414192765658519 8819330200364618 2414192765669424 0 0 64 1 1 1024 1024 1
8 KERNEL_DISPATCH 1 2 1 2 2 69 21228 3 7 18 13 multiply_kernel(float*, float const*, float const*, int, int) 1467 1985 8819330200229796 2414192765715650 8819330200369359 2414192765726795 0 0 64 1 1 1024 1024 1
9 KERNEL_DISPATCH 2 2 2 21228 8 12 divide_kernel(float*, float const*, float const*, int, int) 1993 2414192765773422 2414192765784969 0 0 64 1 1 1024 1024 1
+5 -3
Просмотреть файл
@@ -1,3 +1,5 @@
"Kind","Direction","Source_Agent_Id","Destination_Agent_Id","Correlation_Id","Start_Timestamp","End_Timestamp"
"MEMORY_COPY","MEMORY_COPY_HOST_TO_DEVICE",0,1,0,14955949675563,14955950239443
"MEMORY_COPY","MEMORY_COPY_DEVICE_TO_HOST",1,0,0,14955952733485,14955953315285
"Kind","Direction","Stream_Id","Source_Agent_Id","Destination_Agent_Id","Correlation_Id","Start_Timestamp","End_Timestamp"
"MEMORY_COPY","MEMORY_COPY_HOST_TO_DEVICE",1,0,2,952,2414192684609085,2414192684710679
"MEMORY_COPY","MEMORY_COPY_HOST_TO_DEVICE",1,0,2,960,2414192684873841,2414192684973470
"MEMORY_COPY","MEMORY_COPY_HOST_TO_DEVICE",2,0,2,1066,2414192706436949,2414192706538622
"MEMORY_COPY","MEMORY_COPY_HOST_TO_DEVICE",2,0,2,1074,2414192706592442,2414192706692312
1 Kind Direction Stream_Id Source_Agent_Id Destination_Agent_Id Correlation_Id Start_Timestamp End_Timestamp
2 MEMORY_COPY MEMORY_COPY_HOST_TO_DEVICE 1 0 1 2 0 952 14955949675563 2414192684609085 14955950239443 2414192684710679
3 MEMORY_COPY MEMORY_COPY_DEVICE_TO_HOST MEMORY_COPY_HOST_TO_DEVICE 1 1 0 0 2 0 960 14955952733485 2414192684873841 14955953315285 2414192684973470
4 MEMORY_COPY MEMORY_COPY_HOST_TO_DEVICE 2 0 2 1066 2414192706436949 2414192706538622
5 MEMORY_COPY MEMORY_COPY_HOST_TO_DEVICE 2 0 2 1074 2414192706592442 2414192706692312
+5 -2
Просмотреть файл
@@ -354,7 +354,7 @@ Here are the contents of ``kernel_trace.csv`` file:
.. csv-table:: Kernel trace
:file: /data/kernel_trace.csv
:widths: 10,10,10,10,10,10,10,10,20,20,10,10,10,10,10,10,10,10
:widths: 10,10,10,10,10,10,10,10,10,20,20,10,10,10,10,10,10,10,10
:header-rows: 1
For the description of the fields in the output file, see :ref:`output-file-fields`.
@@ -378,7 +378,7 @@ Here are the contents of ``memory_copy_trace.csv`` file:
.. csv-table:: Memory copy trace
:file: /data/memory_copy_trace.csv
:widths: 10,10,10,10,10,20,20
:widths: 10,10,10,10,10,10,20,20
:header-rows: 1
For the description of the fields in the output file, see :ref:`output-file-fields`.
@@ -1086,6 +1086,9 @@ The following table lists the various fields or the columns in the output CSV fi
* - Queue_Id
- ROCm queue unique identifier to which the kernel was submitted.
* - Stream_Id
- Identifies HIP stream ID to which kernel or memory copy operation was submitted. Defaults to 0 if the hip-stream-display option is not enabled
* - Private_Segment_Size
- The amount of memory required in bytes for the combined private, spill, and arg segments for a work item.
+15 -1
Просмотреть файл
@@ -78,7 +78,12 @@ typedef struct
{
uint64_t size; ///< size of this struct
rocprofiler_hip_api_args_t args;
rocprofiler_hip_api_retval_t retval;
rocprofiler_hip_api_retval_t retval; ///< return value of function call
/// @var args
/// @brief Arguments of the function call. @see
/// ::rocprofiler_iterate_callback_tracing_kind_operation_args for generic
/// access/stringification of the arguments.
} rocprofiler_callback_tracing_hip_api_data_t;
/**
@@ -329,6 +334,15 @@ typedef struct rocprofiler_callback_tracing_runtime_initialization_data_t
/// Version number is encoded as: (10000 * MAJOR) + (100 * MINOR) + PATCH
} rocprofiler_callback_tracing_runtime_initialization_data_t;
/**
* @brief ROCProfiler Stream Handle Callback Data.
*/
typedef struct
{
uint64_t size; ///< size of this struct
rocprofiler_stream_id_t stream_id; ///< HIP stream ID
} rocprofiler_callback_tracing_stream_handle_data_t;
/**
* @brief API Tracing callback function. This function is invoked twice per API function: once
* before the function is invoked and once after the function is invoked. The external correlation
+1
Просмотреть файл
@@ -60,6 +60,7 @@ ROCPROFILER_CXX_SPECIALIZE_HANDLE_HASHER(rocprofiler_context_id_t)
ROCPROFILER_CXX_SPECIALIZE_HANDLE_HASHER(rocprofiler_agent_id_t)
ROCPROFILER_CXX_SPECIALIZE_HANDLE_HASHER(rocprofiler_address_t)
ROCPROFILER_CXX_SPECIALIZE_HANDLE_HASHER(rocprofiler_queue_id_t)
ROCPROFILER_CXX_SPECIALIZE_HANDLE_HASHER(rocprofiler_stream_id_t)
ROCPROFILER_CXX_SPECIALIZE_HANDLE_HASHER(rocprofiler_buffer_id_t)
ROCPROFILER_CXX_SPECIALIZE_HANDLE_HASHER(rocprofiler_counter_id_t)
ROCPROFILER_CXX_SPECIALIZE_HANDLE_HASHER(rocprofiler_profile_config_id_t)
+5
Просмотреть файл
@@ -97,6 +97,7 @@ ROCPROFILER_CXX_DECLARE_OPERATORS(rocprofiler_context_id_t)
ROCPROFILER_CXX_DECLARE_OPERATORS(rocprofiler_address_t)
ROCPROFILER_CXX_DECLARE_OPERATORS(rocprofiler_agent_id_t)
ROCPROFILER_CXX_DECLARE_OPERATORS(rocprofiler_queue_id_t)
ROCPROFILER_CXX_DECLARE_OPERATORS(rocprofiler_stream_id_t)
ROCPROFILER_CXX_DECLARE_OPERATORS(rocprofiler_buffer_id_t)
ROCPROFILER_CXX_DECLARE_OPERATORS(rocprofiler_counter_id_t)
ROCPROFILER_CXX_DECLARE_OPERATORS(rocprofiler_profile_config_id_t)
@@ -114,6 +115,7 @@ ROCPROFILER_CXX_DEFINE_EQ_HANDLE_OPERATOR(rocprofiler_context_id_t)
ROCPROFILER_CXX_DEFINE_EQ_HANDLE_OPERATOR(rocprofiler_address_t)
ROCPROFILER_CXX_DEFINE_EQ_HANDLE_OPERATOR(rocprofiler_agent_id_t)
ROCPROFILER_CXX_DEFINE_EQ_HANDLE_OPERATOR(rocprofiler_queue_id_t)
ROCPROFILER_CXX_DEFINE_EQ_HANDLE_OPERATOR(rocprofiler_stream_id_t)
ROCPROFILER_CXX_DEFINE_EQ_HANDLE_OPERATOR(rocprofiler_buffer_id_t)
ROCPROFILER_CXX_DEFINE_EQ_HANDLE_OPERATOR(rocprofiler_counter_id_t)
ROCPROFILER_CXX_DEFINE_EQ_HANDLE_OPERATOR(rocprofiler_profile_config_id_t)
@@ -141,6 +143,7 @@ ROCPROFILER_CXX_DEFINE_NE_OPERATOR(rocprofiler_context_id_t)
ROCPROFILER_CXX_DEFINE_NE_OPERATOR(rocprofiler_address_t)
ROCPROFILER_CXX_DEFINE_NE_OPERATOR(rocprofiler_agent_id_t)
ROCPROFILER_CXX_DEFINE_NE_OPERATOR(rocprofiler_queue_id_t)
ROCPROFILER_CXX_DEFINE_NE_OPERATOR(rocprofiler_stream_id_t)
ROCPROFILER_CXX_DEFINE_NE_OPERATOR(rocprofiler_buffer_id_t)
ROCPROFILER_CXX_DEFINE_NE_OPERATOR(rocprofiler_counter_id_t)
ROCPROFILER_CXX_DEFINE_NE_OPERATOR(rocprofiler_profile_config_id_t)
@@ -158,6 +161,7 @@ ROCPROFILER_CXX_DEFINE_LT_HANDLE_OPERATOR(rocprofiler_context_id_t)
ROCPROFILER_CXX_DEFINE_LT_HANDLE_OPERATOR(rocprofiler_address_t)
ROCPROFILER_CXX_DEFINE_LT_HANDLE_OPERATOR(rocprofiler_agent_id_t)
ROCPROFILER_CXX_DEFINE_LT_HANDLE_OPERATOR(rocprofiler_queue_id_t)
ROCPROFILER_CXX_DEFINE_LT_HANDLE_OPERATOR(rocprofiler_stream_id_t)
ROCPROFILER_CXX_DEFINE_LT_HANDLE_OPERATOR(rocprofiler_buffer_id_t)
ROCPROFILER_CXX_DEFINE_LT_HANDLE_OPERATOR(rocprofiler_counter_id_t)
ROCPROFILER_CXX_DEFINE_LT_HANDLE_OPERATOR(rocprofiler_profile_config_id_t)
@@ -190,6 +194,7 @@ ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(rocprofiler_context_id_t)
ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(rocprofiler_address_t)
ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(rocprofiler_agent_id_t)
ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(rocprofiler_queue_id_t)
ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(rocprofiler_stream_id_t)
ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(rocprofiler_buffer_id_t)
ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(rocprofiler_counter_id_t)
ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(rocprofiler_profile_config_id_t)
+7
Просмотреть файл
@@ -114,6 +114,13 @@ save(ArchiveT& ar, rocprofiler_queue_id_t data)
ROCP_SDK_SAVE_DATA_FIELD(handle);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, rocprofiler_stream_id_t data)
{
ROCP_SDK_SAVE_DATA_FIELD(handle);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, rocprofiler_counter_id_t data)
+37 -9
Просмотреть файл
@@ -178,6 +178,8 @@ typedef enum // NOLINT(performance-enum-size)
///< library has been initialized
ROCPROFILER_CALLBACK_TRACING_ROCDECODE_API, ///< rocDecode API Tracing
ROCPROFILER_CALLBACK_TRACING_ROCJPEG_API, ///< rocJPEG API Tracing
ROCPROFILER_CALLBACK_TRACING_HIP_STREAM_API, ///< @see
///< ::rocprofiler_hip_stream_operation_t
ROCPROFILER_CALLBACK_TRACING_LAST,
} rocprofiler_callback_tracing_kind_t;
@@ -209,8 +211,9 @@ typedef enum // NOLINT(performance-enum-size)
ROCPROFILER_BUFFER_TRACING_RUNTIME_INITIALIZATION, ///< Record indicating a runtime library has
///< been initialized. @see
///< ::rocprofiler_runtime_initialization_operation_t
ROCPROFILER_BUFFER_TRACING_ROCDECODE_API, ///< rocDecode tracing
ROCPROFILER_BUFFER_TRACING_ROCJPEG_API, ///< rocJPEG tracing
ROCPROFILER_BUFFER_TRACING_ROCDECODE_API, ///< rocDecode tracing
ROCPROFILER_BUFFER_TRACING_ROCJPEG_API, ///< rocJPEG tracing
ROCPROFILER_BUFFER_TRACING_HIP_STREAM_API, ///< Display HIP Stream
ROCPROFILER_BUFFER_TRACING_LAST,
} rocprofiler_buffer_tracing_kind_t;
@@ -226,6 +229,23 @@ typedef enum // NOLINT(performance-enum-size)
ROCPROFILER_CODE_OBJECT_LAST,
} rocprofiler_code_object_operation_t;
/**
* @brief ROCProfiler Stream Handle Operations.
*/
typedef enum // NOLINT(performance-enum-size)
{
ROCPROFILER_HIP_STREAM_NONE = 0, ///< Unknown stream handle operation
ROCPROFILER_HIP_STREAM_CREATE, ///< A stream handle is created
ROCPROFILER_HIP_STREAM_DESTROY, ///< A stream handle is destroyed
ROCPROFILER_HIP_STREAM_SET,
ROCPROFILER_HIP_STREAM_LAST,
/// @var ROCPROFILER_HIP_STREAM_SET
/// @brief Invokes callbacks before and after a HIP API, kernel dispatch, or memory copy
/// operation that has a stream handle associated with it. HIP API calls will always have a
/// stream, but kernel dispatches and memory copy operations may or may not.
} rocprofiler_hip_stream_operation_t;
/**
* @brief Memory Copy Operations.
*/
@@ -556,7 +576,7 @@ typedef union rocprofiler_uuid_t
/**
* @brief Context ID.
*/
typedef struct
typedef struct rocprofiler_context_id_t
{
uint64_t handle;
} rocprofiler_context_id_t;
@@ -564,15 +584,23 @@ typedef struct
/**
* @brief Queue ID.
*/
typedef struct
typedef struct rocprofiler_queue_id_t
{
uint64_t handle;
} rocprofiler_queue_id_t;
/**
* @brief Stream ID.
*/
typedef struct rocprofiler_stream_id_t
{
uint64_t handle;
} rocprofiler_stream_id_t;
/**
* @brief ROCProfiler Record Correlation ID.
*/
typedef struct
typedef struct rocprofiler_correlation_id_t
{
uint64_t internal;
rocprofiler_user_data_t external;
@@ -587,7 +615,7 @@ typedef struct
* @struct rocprofiler_buffer_id_t
* @brief Buffer ID.
*/
typedef struct
typedef struct rocprofiler_buffer_id_t
{
uint64_t handle;
} rocprofiler_buffer_id_t;
@@ -595,7 +623,7 @@ typedef struct
/**
* @brief Agent Identifier
*/
typedef struct
typedef struct rocprofiler_agent_id_t
{
uint64_t handle;
} rocprofiler_agent_id_t;
@@ -603,7 +631,7 @@ typedef struct
/**
* @brief Counter ID.
*/
typedef struct
typedef struct rocprofiler_counter_id_t
{
uint64_t handle;
} rocprofiler_counter_id_t;
@@ -612,7 +640,7 @@ typedef struct
* @brief Profile Configurations
* @see rocprofiler_create_profile_config for how to create.
*/
typedef struct
typedef struct rocprofiler_profile_config_id_t
{
uint64_t handle; // Opaque handle
} rocprofiler_profile_config_id_t;
+2
Просмотреть файл
@@ -52,3 +52,5 @@ target_link_libraries(
set_target_properties(rocprofiler-sdk-common-library PROPERTIES OUTPUT_NAME
rocprofiler-sdk-common)
add_subdirectory(details)
+8
Просмотреть файл
@@ -0,0 +1,8 @@
#
# add details sources and headers to common library target
#
set(details_headers mpl.hpp)
set(details_sources)
target_sources(rocprofiler-sdk-common-library PRIVATE ${details_sources}
${details_headers})
+211
Просмотреть файл
@@ -0,0 +1,211 @@
// MIT License
//
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
#include <cstddef>
#include <functional>
#include <string>
#include <string_view>
#include <tuple>
#include <type_traits>
namespace rocprofiler
{
namespace common
{
namespace mpl
{
namespace impl
{
template <typename... Tp>
struct type_list
{
static constexpr auto size() { return sizeof...(Tp); }
};
template <typename InTuple, typename OutTuple>
struct reverse;
template <template <typename...> class InTuple,
typename InT,
typename... InTail,
template <typename...>
class OutTuple,
typename... OutTail>
struct reverse<InTuple<InT, InTail...>, OutTuple<OutTail...>>
: reverse<InTuple<InTail...>, OutTuple<InT, OutTail...>>
{};
template <template <typename...> class InTuple,
template <typename...>
class OutTuple,
typename... OutTail>
struct reverse<InTuple<>, OutTuple<OutTail...>>
{
using type = OutTuple<OutTail...>;
};
template <template <typename...> class InTuple, typename... InTail>
struct reverse<InTuple<InTail...>, void> : reverse<InTuple<InTail...>, InTuple<>>
{};
template <typename T>
struct function_traits;
template <typename T>
struct function_traits<T&> : function_traits<T>
{};
template <typename R, typename... Args>
struct function_traits<std::function<R(Args...)>>
{
static constexpr bool is_memfun = false;
static constexpr bool is_const = false;
static constexpr size_t nargs = sizeof...(Args);
using result_type = R;
using args_type = type_list<Args...>;
using call_type = args_type;
};
template <typename R, typename... Args>
struct function_traits<R (*)(Args...)>
{
static constexpr bool is_memfun = false;
static constexpr bool is_const = false;
static constexpr size_t nargs = sizeof...(Args);
using result_type = R;
using args_type = type_list<Args...>;
using call_type = args_type;
};
template <typename R, typename... Args>
struct function_traits<R(Args...)>
{
static constexpr bool is_memfun = false;
static constexpr bool is_const = false;
static constexpr size_t nargs = sizeof...(Args);
using result_type = R;
using args_type = type_list<Args...>;
using call_type = args_type;
};
// member function pointer
template <typename C, typename R, typename... Args>
struct function_traits<R (C::*)(Args...)>
{
static constexpr bool is_memfun = true;
static constexpr bool is_const = false;
static constexpr size_t nargs = sizeof...(Args);
using result_type = R;
using args_type = type_list<Args...>;
using call_type = type_list<C&, Args...>;
};
// const member function pointer
template <typename C, typename R, typename... Args>
struct function_traits<R (C::*)(Args...) const>
{
static constexpr bool is_memfun = true;
static constexpr bool is_const = true;
static constexpr size_t nargs = sizeof...(Args);
using result_type = R;
using args_type = type_list<Args...>;
using call_type = type_list<C&, Args...>;
};
// member object pointer
template <typename C, typename R>
struct function_traits<R(C::*)>
{
static constexpr bool is_memfun = true;
static constexpr bool is_const = false;
static const size_t nargs = 0;
using result_type = R;
using args_type = type_list<>;
using call_type = type_list<C&>;
};
#if __cplusplus >= 201703L
template <typename R, typename... Args>
struct function_traits<std::function<R(Args...) noexcept>>
{
static constexpr bool is_memfun = false;
static constexpr bool is_const = false;
static constexpr size_t nargs = sizeof...(Args);
using result_type = R;
using args_type = type_list<Args...>;
using call_type = args_type;
};
template <typename R, typename... Args>
struct function_traits<R (*)(Args...) noexcept>
{
static constexpr bool is_memfun = false;
static constexpr bool is_const = false;
static constexpr size_t nargs = sizeof...(Args);
using result_type = R;
using args_type = type_list<Args...>;
using call_type = args_type;
};
template <typename R, typename... Args>
struct function_traits<R(Args...) noexcept>
{
static constexpr bool is_memfun = false;
static constexpr bool is_const = false;
static constexpr size_t nargs = sizeof...(Args);
using result_type = R;
using args_type = type_list<Args...>;
using call_type = args_type;
};
// member function pointer
template <typename C, typename R, typename... Args>
struct function_traits<R (C::*)(Args...) noexcept>
{
static constexpr bool is_memfun = true;
static constexpr bool is_const = false;
static constexpr size_t nargs = sizeof...(Args);
using result_type = R;
using args_type = type_list<Args...>;
using call_type = type_list<C&, Args...>;
};
// const member function pointer
template <typename C, typename R, typename... Args>
struct function_traits<R (C::*)(Args...) const noexcept>
{
static constexpr bool is_memfun = true;
static constexpr bool is_const = true;
static constexpr size_t nargs = sizeof...(Args);
using result_type = R;
using args_type = type_list<Args...>;
using call_type = type_list<C&, Args...>;
};
#endif
} // namespace impl
} // namespace mpl
} // namespace common
} // namespace rocprofiler
+11
Просмотреть файл
@@ -22,6 +22,8 @@
#pragma once
#include "lib/common/details/mpl.hpp"
#include <cstddef>
#include <string>
#include <string_view>
@@ -162,6 +164,15 @@ struct assert_false
{
static constexpr auto value = false;
};
template <typename InTuple>
using reverse = typename impl::reverse<InTuple, void>::type;
template <typename Tp>
using function_traits = impl::function_traits<Tp>;
template <typename Tp>
using function_args_t = typename impl::function_traits<Tp>::args_type;
} // namespace mpl
} // namespace common
} // namespace rocprofiler
+1
Просмотреть файл
@@ -25,6 +25,7 @@ set(TOOL_OUTPUT_HEADERS
output_key.hpp
output_stream.hpp
statistics.hpp
stream_info.hpp
timestamps.hpp
tmp_file_buffer.hpp
tmp_file.hpp)
+6 -5
Просмотреть файл
@@ -26,6 +26,7 @@
#include "generator.hpp"
#include "pc_sample_transform.hpp"
#include "statistics.hpp"
#include "stream_info.hpp"
#include "tmp_file_buffer.hpp"
#include "lib/common/container/ring_buffer.hpp"
@@ -140,11 +141,6 @@ using hip_buffered_output_t =
buffered_output<rocprofiler_buffer_tracing_hip_api_record_t, domain_type::HIP>;
using hsa_buffered_output_t =
buffered_output<rocprofiler_buffer_tracing_hsa_api_record_t, domain_type::HSA>;
using kernel_dispatch_buffered_output_t =
buffered_output<rocprofiler_buffer_tracing_kernel_dispatch_record_t,
domain_type::KERNEL_DISPATCH>;
using memory_copy_buffered_output_t =
buffered_output<rocprofiler_buffer_tracing_memory_copy_record_t, domain_type::MEMORY_COPY>;
using marker_buffered_output_t =
buffered_output<rocprofiler_buffer_tracing_marker_api_record_t, domain_type::MARKER>;
using rccl_buffered_output_t =
@@ -167,5 +163,10 @@ using rocdecode_buffered_output_t =
buffered_output<rocprofiler_buffer_tracing_rocdecode_api_record_t, domain_type::ROCDECODE>;
using rocjpeg_buffered_output_t =
buffered_output<rocprofiler_buffer_tracing_rocjpeg_api_record_t, domain_type::ROCJPEG>;
using kernel_dispatch_buffered_output_with_stream_t =
buffered_output<tool_buffer_tracing_kernel_dispatch_with_stream_record_t,
domain_type::KERNEL_DISPATCH>;
using memory_copy_buffered_output_with_stream_t =
buffered_output<tool_buffer_tracing_memory_copy_with_stream_record_t, domain_type::MEMORY_COPY>;
} // namespace tool
} // namespace rocprofiler
+5 -3
Просмотреть файл
@@ -93,9 +93,10 @@ struct tool_counter_record_t
{
using container_type = std::vector<tool_counter_value_t>;
uint64_t thread_id = 0;
rocprofiler_dispatch_counting_service_data_t dispatch_data = {};
serialized_counter_record_t record = {};
uint64_t thread_id = 0;
rocprofiler_dispatch_counting_service_data_t dispatch_data = {};
serialized_counter_record_t record = {};
uint64_t kernel_rename_val = {};
template <typename ArchiveT>
void save(ArchiveT& ar) const
@@ -106,6 +107,7 @@ struct tool_counter_record_t
ar(cereal::make_nvp("thread_id", thread_id));
ar(cereal::make_nvp("dispatch_data", dispatch_data));
ar(cereal::make_nvp("records", tmp));
ar(cereal::make_nvp("kernel_rename_val", kernel_rename_val));
}
container_type read() const;
+12 -12
Просмотреть файл
@@ -99,18 +99,18 @@ struct csv_encoder
}
};
using api_csv_encoder = csv_encoder<7>;
using agent_info_csv_encoder = csv_encoder<53>;
using kernel_trace_csv_encoder = csv_encoder<18>;
using counter_collection_csv_encoder = csv_encoder<19>;
using memory_copy_csv_encoder = csv_encoder<7>;
using memory_allocation_csv_encoder = csv_encoder<8>;
using marker_csv_encoder = csv_encoder<7>;
using list_basic_metrics_csv_encoder = csv_encoder<5>;
using list_derived_metrics_csv_encoder = csv_encoder<5>;
using scratch_memory_encoder = csv_encoder<8>;
using stats_csv_encoder = csv_encoder<8>;
using pc_sampling_host_trap_csv_encoder = csv_encoder<6>;
using api_csv_encoder = csv_encoder<7>;
using agent_info_csv_encoder = csv_encoder<53>;
using counter_collection_csv_encoder = csv_encoder<19>;
using memory_allocation_csv_encoder = csv_encoder<8>;
using marker_csv_encoder = csv_encoder<7>;
using list_basic_metrics_csv_encoder = csv_encoder<5>;
using list_derived_metrics_csv_encoder = csv_encoder<5>;
using scratch_memory_encoder = csv_encoder<8>;
using stats_csv_encoder = csv_encoder<8>;
using pc_sampling_host_trap_csv_encoder = csv_encoder<6>;
using kernel_trace_with_stream_csv_encoder = csv_encoder<19>;
using memory_copy_with_stream_csv_encoder = csv_encoder<8>;
} // namespace csv
} // namespace tool
} // namespace rocprofiler
+18 -18
Просмотреть файл
@@ -249,22 +249,22 @@ generate_csv(const output_config& cfg,
}
void
generate_csv(const output_config& cfg,
const metadata& tool_metadata,
const generator<rocprofiler_buffer_tracing_kernel_dispatch_record_t>& data,
const stats_entry_t& stats)
generate_csv(const output_config& cfg,
const metadata& tool_metadata,
const generator<tool_buffer_tracing_kernel_dispatch_with_stream_record_t>& data,
const stats_entry_t& stats)
{
if(data.empty()) return;
if(cfg.stats && stats)
write_stats(get_stats_output_file(cfg, domain_type::KERNEL_DISPATCH), stats.entries);
auto ofs = tool::csv_output_file{cfg,
domain_type::KERNEL_DISPATCH,
tool::csv::kernel_trace_csv_encoder{},
tool::csv::kernel_trace_with_stream_csv_encoder{},
{"Kind",
"Agent_Id",
"Queue_Id",
"Stream_Id",
"Thread_Id",
"Dispatch_Id",
"Kernel_Id",
@@ -287,14 +287,14 @@ generate_csv(const output_config&
{
auto row_ss = std::stringstream{};
auto kernel_name = tool_metadata.get_kernel_name(record.dispatch_info.kernel_id,
record.correlation_id.external.value);
rocprofiler::tool::csv::kernel_trace_csv_encoder::write_row(
record.kernel_rename_val);
rocprofiler::tool::csv::kernel_trace_with_stream_csv_encoder::write_row(
row_ss,
tool_metadata.get_kind_name(record.kind),
tool_metadata.get_agent_index(record.dispatch_info.agent_id, cfg.agent_index_value)
.as_string(),
record.dispatch_info.queue_id.handle,
record.stream_id.handle,
record.thread_id,
record.dispatch_info.dispatch_id,
record.dispatch_info.kernel_id,
@@ -310,7 +310,6 @@ generate_csv(const output_config&
record.dispatch_info.grid_size.x,
record.dispatch_info.grid_size.y,
record.dispatch_info.grid_size.z);
ofs << row_ss.str();
}
}
@@ -400,10 +399,10 @@ generate_csv(const output_config& cfg,
}
void
generate_csv(const output_config& cfg,
const metadata& tool_metadata,
const generator<rocprofiler_buffer_tracing_memory_copy_record_t>& data,
const stats_entry_t& stats)
generate_csv(const output_config& cfg,
const metadata& tool_metadata,
const generator<tool_buffer_tracing_memory_copy_with_stream_record_t>& data,
const stats_entry_t& stats)
{
if(data.empty()) return;
@@ -412,9 +411,10 @@ generate_csv(const output_config& c
auto ofs = tool::csv_output_file{cfg,
domain_type::MEMORY_COPY,
tool::csv::memory_copy_csv_encoder{},
tool::csv::memory_copy_with_stream_csv_encoder{},
{"Kind",
"Direction",
"Stream_Id",
"Source_Agent_Id",
"Destination_Agent_Id",
"Correlation_Id",
@@ -427,10 +427,11 @@ generate_csv(const output_config& c
{
auto row_ss = std::stringstream{};
auto api_name = tool_metadata.get_operation_name(record.kind, record.operation);
rocprofiler::tool::csv::memory_copy_csv_encoder::write_row(
rocprofiler::tool::csv::memory_copy_with_stream_csv_encoder::write_row(
row_ss,
tool_metadata.get_kind_name(record.kind),
api_name,
record.stream_id.handle,
tool_metadata.get_agent_index(record.src_agent_id, cfg.agent_index_value)
.as_string(),
tool_metadata.get_agent_index(record.dst_agent_id, cfg.agent_index_value)
@@ -438,7 +439,6 @@ generate_csv(const output_config& c
record.correlation_id.internal,
record.start_timestamp,
record.end_timestamp);
ofs << row_ss.str();
}
}
@@ -626,7 +626,7 @@ generate_csv(const output_config& cfg,
record.thread_id,
magnitude(record.dispatch_data.dispatch_info.grid_size),
record.dispatch_data.dispatch_info.kernel_id,
tool_metadata.get_kernel_name(kernel_id, correlation_id.external.value),
tool_metadata.get_kernel_name(kernel_id, record.kernel_rename_val),
magnitude(record.dispatch_data.dispatch_info.workgroup_size),
lds_block_size_v,
record.dispatch_data.dispatch_info.private_segment_size,
+8 -8
Просмотреть файл
@@ -40,10 +40,10 @@ generate_csv(const output_config& cfg,
std::vector<agent_info>& data);
void
generate_csv(const output_config& cfg,
const metadata& tool_metadata,
const generator<rocprofiler_buffer_tracing_kernel_dispatch_record_t>& data,
const stats_entry_t& stats);
generate_csv(const output_config& cfg,
const metadata& tool_metadata,
const generator<tool_buffer_tracing_kernel_dispatch_with_stream_record_t>& data,
const stats_entry_t& stats);
void
generate_csv(const output_config& cfg,
@@ -58,10 +58,10 @@ generate_csv(const output_config& cfg,
const stats_entry_t& stats);
void
generate_csv(const output_config& cfg,
const metadata& tool_metadata,
const generator<rocprofiler_buffer_tracing_memory_copy_record_t>& data,
const stats_entry_t& stats);
generate_csv(const output_config& cfg,
const metadata& tool_metadata,
const generator<tool_buffer_tracing_memory_copy_with_stream_record_t>& data,
const stats_entry_t& stats);
void
generate_csv(const output_config& cfg,
+5 -5
Просмотреть файл
@@ -185,11 +185,11 @@ void
write_json(json_output& json_ar,
const output_config& /*cfg*/,
const metadata& /*tool_metadata*/,
const domain_stats_vec_t& domain_stats,
generator<rocprofiler_buffer_tracing_hip_api_record_t>&& hip_api_gen,
generator<rocprofiler_buffer_tracing_hsa_api_record_t> hsa_api_gen,
generator<rocprofiler_buffer_tracing_kernel_dispatch_record_t> kernel_dispatch_gen,
generator<rocprofiler_buffer_tracing_memory_copy_record_t> memory_copy_gen,
const domain_stats_vec_t& domain_stats,
generator<rocprofiler_buffer_tracing_hip_api_record_t>&& hip_api_gen,
generator<rocprofiler_buffer_tracing_hsa_api_record_t> hsa_api_gen,
generator<tool_buffer_tracing_kernel_dispatch_with_stream_record_t> kernel_dispatch_gen,
generator<tool_buffer_tracing_memory_copy_with_stream_record_t> memory_copy_gen,
generator<tool_counter_record_t> counter_collection_gen,
generator<rocprofiler_buffer_tracing_marker_api_record_t> marker_api_gen,
generator<rocprofiler_buffer_tracing_scratch_memory_record_t> scratch_memory_gen,
+8 -8
Просмотреть файл
@@ -81,14 +81,14 @@ void
write_json(json_output&, const output_config& cfg, const metadata& tool_metadata, uint64_t pid);
void
write_json(json_output& json_ar,
const output_config& cfg,
const metadata& tool_metadata,
const domain_stats_vec_t& domain_stats,
generator<rocprofiler_buffer_tracing_hip_api_record_t>&& hip_api_gen,
generator<rocprofiler_buffer_tracing_hsa_api_record_t> hsa_api_gen,
generator<rocprofiler_buffer_tracing_kernel_dispatch_record_t> kernel_dispatch_gen,
generator<rocprofiler_buffer_tracing_memory_copy_record_t> memory_copy_gen,
write_json(json_output& json_ar,
const output_config& cfg,
const metadata& tool_metadata,
const domain_stats_vec_t& domain_stats,
generator<rocprofiler_buffer_tracing_hip_api_record_t>&& hip_api_gen,
generator<rocprofiler_buffer_tracing_hsa_api_record_t> hsa_api_gen,
generator<tool_buffer_tracing_kernel_dispatch_with_stream_record_t> kernel_dispatch_gen,
generator<tool_buffer_tracing_memory_copy_with_stream_record_t> memory_copy_gen,
generator<tool_counter_record_t> counter_collection_gen,
generator<rocprofiler_buffer_tracing_marker_api_record_t> marker_api_gen,
generator<rocprofiler_buffer_tracing_scratch_memory_record_t> scratch_memory_gen,
+10 -11
Просмотреть файл
@@ -356,15 +356,15 @@ create_attribute_list()
void
write_otf2(
const output_config& cfg,
const metadata& tool_metadata,
uint64_t pid,
const std::vector<agent_info>& agent_data,
std::deque<rocprofiler_buffer_tracing_hip_api_record_t>* hip_api_data,
std::deque<rocprofiler_buffer_tracing_hsa_api_record_t>* hsa_api_data,
std::deque<rocprofiler_buffer_tracing_kernel_dispatch_record_t>* kernel_dispatch_data,
std::deque<rocprofiler_buffer_tracing_memory_copy_record_t>* memory_copy_data,
std::deque<rocprofiler_buffer_tracing_marker_api_record_t>* marker_api_data,
const output_config& cfg,
const metadata& tool_metadata,
uint64_t pid,
const std::vector<agent_info>& agent_data,
std::deque<rocprofiler_buffer_tracing_hip_api_record_t>* hip_api_data,
std::deque<rocprofiler_buffer_tracing_hsa_api_record_t>* hsa_api_data,
std::deque<tool_buffer_tracing_kernel_dispatch_with_stream_record_t>* kernel_dispatch_data,
std::deque<tool_buffer_tracing_memory_copy_with_stream_record_t>* memory_copy_data,
std::deque<rocprofiler_buffer_tracing_marker_api_record_t>* marker_api_data,
std::deque<rocprofiler_buffer_tracing_scratch_memory_record_t>* /*scratch_memory_data*/,
std::deque<rocprofiler_buffer_tracing_rccl_api_record_t>* rccl_api_data,
std::deque<rocprofiler_buffer_tracing_memory_allocation_record_t>* memory_allocation_data,
@@ -676,8 +676,7 @@ write_otf2(
const auto* sym = _get_kernel_sym_data(info);
CHECK(sym != nullptr);
auto name =
tool_metadata.get_kernel_name(info.kernel_id, itr.correlation_id.external.value);
auto name = tool_metadata.get_kernel_name(info.kernel_id, itr.kernel_rename_val);
_hash_data.emplace(
get_hash_id(name),
region_info{std::string{name}, OTF2_REGION_ROLE_FUNCTION, OTF2_PARADIGM_HIP});
+15 -14
Просмотреть файл
@@ -25,6 +25,7 @@
#include "agent_info.hpp"
#include "metadata.hpp"
#include "output_config.hpp"
#include "stream_info.hpp"
#include <cstdint>
#include <deque>
@@ -35,19 +36,19 @@ namespace tool
{
void
write_otf2(
const output_config& cfg,
const metadata& tool_metadata,
uint64_t pid,
const std::vector<agent_info>& agent_data,
std::deque<rocprofiler_buffer_tracing_hip_api_record_t>* hip_api_data,
std::deque<rocprofiler_buffer_tracing_hsa_api_record_t>* hsa_api_data,
std::deque<rocprofiler_buffer_tracing_kernel_dispatch_record_t>* kernel_dispatch_data,
std::deque<rocprofiler_buffer_tracing_memory_copy_record_t>* memory_copy_data,
std::deque<rocprofiler_buffer_tracing_marker_api_record_t>* marker_api_data,
std::deque<rocprofiler_buffer_tracing_scratch_memory_record_t>* scratch_memory_data,
std::deque<rocprofiler_buffer_tracing_rccl_api_record_t>* rccl_api_data,
std::deque<rocprofiler_buffer_tracing_memory_allocation_record_t>* memory_allocation_data,
std::deque<rocprofiler_buffer_tracing_rocdecode_api_record_t>* rocdecode_api_data,
std::deque<rocprofiler_buffer_tracing_rocjpeg_api_record_t>* rocjpeg_api_data);
const output_config& cfg,
const metadata& tool_metadata,
uint64_t pid,
const std::vector<agent_info>& agent_data,
std::deque<rocprofiler_buffer_tracing_hip_api_record_t>* hip_api_data,
std::deque<rocprofiler_buffer_tracing_hsa_api_record_t>* hsa_api_data,
std::deque<tool_buffer_tracing_kernel_dispatch_with_stream_record_t>* kernel_dispatch_data,
std::deque<tool_buffer_tracing_memory_copy_with_stream_record_t>* memory_copy_data,
std::deque<rocprofiler_buffer_tracing_marker_api_record_t>* marker_api_data,
std::deque<rocprofiler_buffer_tracing_scratch_memory_record_t>* scratch_memory_data,
std::deque<rocprofiler_buffer_tracing_rccl_api_record_t>* rccl_api_data,
std::deque<rocprofiler_buffer_tracing_memory_allocation_record_t>* memory_allocation_data,
std::deque<rocprofiler_buffer_tracing_rocdecode_api_record_t>* rocdecode_api_data,
std::deque<rocprofiler_buffer_tracing_rocjpeg_api_record_t>* rocjpeg_api_data);
} // namespace tool
} // namespace rocprofiler
+106 -19
Просмотреть файл
@@ -65,14 +65,14 @@ get_hash_id(Tp&& _val)
void
write_perfetto(
const output_config& ocfg,
const metadata& tool_metadata,
std::vector<agent_info> agent_data,
const generator<rocprofiler_buffer_tracing_hip_api_record_t>& hip_api_gen,
const generator<rocprofiler_buffer_tracing_hsa_api_record_t>& hsa_api_gen,
const generator<rocprofiler_buffer_tracing_kernel_dispatch_record_t>& kernel_dispatch_gen,
const generator<rocprofiler_buffer_tracing_memory_copy_record_t>& memory_copy_gen,
const generator<rocprofiler_buffer_tracing_marker_api_record_t>& marker_api_gen,
const output_config& ocfg,
const metadata& tool_metadata,
std::vector<agent_info> agent_data,
const generator<rocprofiler_buffer_tracing_hip_api_record_t>& hip_api_gen,
const generator<rocprofiler_buffer_tracing_hsa_api_record_t>& hsa_api_gen,
const generator<tool_buffer_tracing_kernel_dispatch_with_stream_record_t>& kernel_dispatch_gen,
const generator<tool_buffer_tracing_memory_copy_with_stream_record_t>& memory_copy_gen,
const generator<rocprofiler_buffer_tracing_marker_api_record_t>& marker_api_gen,
const generator<rocprofiler_buffer_tracing_scratch_memory_record_t>& /*scratch_memory_gen*/,
const generator<rocprofiler_buffer_tracing_rccl_api_record_t>& rccl_api_gen,
const generator<rocprofiler_buffer_tracing_memory_allocation_record_t>& memory_allocation_gen,
@@ -139,6 +139,8 @@ write_perfetto(
auto agent_thread_ids_alloc = std::unordered_map<rocprofiler_agent_id_t, std::set<uint64_t>>{};
auto agent_queue_ids =
std::unordered_map<rocprofiler_agent_id_t, std::unordered_set<rocprofiler_queue_id_t>>{};
auto agent_stream_ids =
std::unordered_map<rocprofiler_agent_id_t, std::unordered_set<rocprofiler_stream_id_t>>{};
auto thread_indexes = std::unordered_map<rocprofiler_thread_id_t, uint64_t>{};
auto thread_tracks = std::unordered_map<rocprofiler_thread_id_t, ::perfetto::Track>{};
@@ -151,6 +153,12 @@ write_perfetto(
auto agent_queue_tracks =
std::unordered_map<rocprofiler_agent_id_t,
std::unordered_map<rocprofiler_queue_id_t, ::perfetto::Track>>{};
auto agent_stream_compute_tracks =
std::unordered_map<rocprofiler_agent_id_t,
std::unordered_map<rocprofiler_stream_id_t, ::perfetto::Track>>{};
auto agent_stream_copy_tracks =
std::unordered_map<rocprofiler_agent_id_t,
std::unordered_map<rocprofiler_stream_id_t, ::perfetto::Track>>{};
auto _get_agent = [&agent_data](rocprofiler_agent_id_t _id) -> const rocprofiler_agent_t* {
for(const auto& itr : agent_data)
@@ -184,7 +192,11 @@ write_perfetto(
for(auto itr : memory_copy_gen.get(ditr))
{
tids.emplace(itr.thread_id);
agent_thread_ids[itr.dst_agent_id].emplace(itr.thread_id);
agent_stream_ids[itr.dst_agent_id].emplace(itr.stream_id);
if(ocfg.group_by_queue)
{
agent_thread_ids[itr.dst_agent_id].emplace(itr.thread_id);
}
}
for(auto ditr : memory_allocation_gen)
@@ -198,7 +210,11 @@ write_perfetto(
for(auto itr : kernel_dispatch_gen.get(ditr))
{
tids.emplace(itr.thread_id);
agent_queue_ids[itr.dispatch_info.agent_id].emplace(itr.dispatch_info.queue_id);
agent_stream_ids[itr.dispatch_info.agent_id].emplace(itr.stream_id);
if(ocfg.group_by_queue)
{
agent_queue_ids[itr.dispatch_info.agent_id].emplace(itr.dispatch_info.queue_id);
}
}
}
@@ -276,6 +292,57 @@ write_perfetto(
}
}
for(const auto& aitr : agent_stream_ids)
{
for(auto sitr : aitr.second)
{
const auto* _agent = _get_agent(aitr.first);
const auto stream_id = sitr.handle;
{
auto _namess = std::stringstream{};
_namess << "COMPUTE AGENT [" << _agent->logical_node_id << "] STREAM [" << stream_id
<< "] ";
if(_agent->type == ROCPROFILER_AGENT_TYPE_CPU)
_namess << "(CPU)";
else if(_agent->type == ROCPROFILER_AGENT_TYPE_GPU)
_namess << "(GPU)";
else
_namess << "(UNK)";
auto _track = ::perfetto::Track{get_hash_id(_namess.str())};
auto _desc = _track.Serialize();
_desc.set_name(_namess.str());
perfetto::TrackEvent::SetTrackDescriptor(_track, _desc);
agent_stream_compute_tracks[aitr.first].emplace(sitr, _track);
}
{
auto _namess = std::stringstream{};
_namess << "COPY to AGENT [" << _agent->logical_node_id << "] STREAM [" << stream_id
<< "] ";
if(_agent->type == ROCPROFILER_AGENT_TYPE_CPU)
_namess << "(CPU)";
else if(_agent->type == ROCPROFILER_AGENT_TYPE_GPU)
_namess << "(GPU)";
else
_namess << "(UNK)";
auto _track = ::perfetto::Track{get_hash_id(_namess.str())};
auto _desc = _track.Serialize();
_desc.set_name(_namess.str());
perfetto::TrackEvent::SetTrackDescriptor(_track, _desc);
agent_stream_copy_tracks[aitr.first].emplace(sitr, _track);
}
}
}
// trace events
{
auto buffer_names = sdk::get_buffer_tracing_names();
@@ -471,13 +538,22 @@ write_perfetto(
for(auto ditr : memory_copy_gen)
for(auto itr : memory_copy_gen.get(ditr))
{
auto name = buffer_names.at(itr.kind, itr.operation);
auto& track = agent_thread_tracks.at(itr.dst_agent_id).at(itr.thread_id);
auto name = buffer_names.at(itr.kind, itr.operation);
::perfetto::Track* _track = nullptr;
if(ocfg.group_by_queue)
{
_track = &agent_thread_tracks.at(itr.dst_agent_id).at(itr.thread_id);
}
else
{
_track = &agent_stream_copy_tracks.at(itr.dst_agent_id).at(itr.stream_id);
}
TRACE_EVENT_BEGIN(
sdk::perfetto_category<sdk::category::memory_copy>::name,
::perfetto::StaticString(name.data()),
track,
*_track,
itr.start_timestamp,
::perfetto::Flow::ProcessScoped(itr.correlation_id.internal),
"begin_ns",
@@ -503,8 +579,9 @@ write_perfetto(
"tid",
itr.thread_id);
TRACE_EVENT_END(sdk::perfetto_category<sdk::category::memory_copy>::name,
track,
*_track,
itr.end_timestamp);
tracing_session->FlushBlocking();
}
for(auto ditr : kernel_dispatch_gen)
@@ -516,7 +593,7 @@ write_perfetto(
rocprofiler_agent_id_t,
std::unordered_map<
rocprofiler_queue_id_t,
std::vector<rocprofiler_buffer_tracing_kernel_dispatch_record_t*>>>{};
std::vector<tool_buffer_tracing_kernel_dispatch_with_stream_record_t*>>>{};
for(auto& itr : generator)
{
const auto& info = itr.dispatch_info;
@@ -544,8 +621,18 @@ write_perfetto(
CHECK(sym != nullptr);
auto name = std::string_view{sym->kernel_name};
auto& track = agent_queue_tracks.at(info.agent_id).at(info.queue_id);
auto name = std::string_view{sym->kernel_name};
::perfetto::Track* _track = nullptr;
if(ocfg.group_by_queue)
{
_track = &agent_queue_tracks.at(info.agent_id).at(info.queue_id);
}
else
{
_track =
&agent_stream_compute_tracks.at(info.agent_id).at((*it)->stream_id);
}
// Temporary fix until timestamp issues are resolved: Set timestamps to be
// halfway between ending timestamp and starting timestamp of overlapping
@@ -579,7 +666,7 @@ write_perfetto(
TRACE_EVENT_BEGIN(
sdk::perfetto_category<sdk::category::kernel_dispatch>::name,
::perfetto::StaticString(demangled.at(name).c_str()),
track,
*_track,
current.start_timestamp,
::perfetto::Flow::ProcessScoped(current.correlation_id.internal),
"begin_ns",
@@ -613,7 +700,7 @@ write_perfetto(
info.grid_size.x * info.grid_size.y * info.grid_size.z);
TRACE_EVENT_END(
sdk::perfetto_category<sdk::category::kernel_dispatch>::name,
track,
*_track,
current.end_timestamp);
tracing_session->FlushBlocking();
}
+11 -10
Просмотреть файл
@@ -26,6 +26,7 @@
#include "generator.hpp"
#include "metadata.hpp"
#include "output_config.hpp"
#include "stream_info.hpp"
#include <cstdint>
#include <deque>
@@ -36,16 +37,16 @@ namespace tool
{
void
write_perfetto(
const output_config& cfg,
const metadata& tool_metadata,
std::vector<agent_info> agent_data,
const generator<rocprofiler_buffer_tracing_hip_api_record_t>& hip_api_gen,
const generator<rocprofiler_buffer_tracing_hsa_api_record_t>& hsa_api_gen,
const generator<rocprofiler_buffer_tracing_kernel_dispatch_record_t>& kernel_dispatch_gen,
const generator<rocprofiler_buffer_tracing_memory_copy_record_t>& memory_copy_gen,
const generator<rocprofiler_buffer_tracing_marker_api_record_t>& marker_api_gen,
const generator<rocprofiler_buffer_tracing_scratch_memory_record_t>& scratch_memory_gen,
const generator<rocprofiler_buffer_tracing_rccl_api_record_t>& rccl_api_gen,
const output_config& cfg,
const metadata& tool_metadata,
std::vector<agent_info> agent_data,
const generator<rocprofiler_buffer_tracing_hip_api_record_t>& hip_api_gen,
const generator<rocprofiler_buffer_tracing_hsa_api_record_t>& hsa_api_gen,
const generator<tool_buffer_tracing_kernel_dispatch_with_stream_record_t>& kernel_dispatch_gen,
const generator<tool_buffer_tracing_memory_copy_with_stream_record_t>& memory_copy_gen,
const generator<rocprofiler_buffer_tracing_marker_api_record_t>& marker_api_gen,
const generator<rocprofiler_buffer_tracing_scratch_memory_record_t>& scratch_memory_gen,
const generator<rocprofiler_buffer_tracing_rccl_api_record_t>& rccl_api_gen,
const generator<rocprofiler_buffer_tracing_memory_allocation_record_t>& memory_allocation_gen,
const generator<rocprofiler_buffer_tracing_rocdecode_api_record_t>& rocdecode_api_gen,
const generator<rocprofiler_buffer_tracing_rocjpeg_api_record_t>& rocjpeg_api_gen);
+5 -5
Просмотреть файл
@@ -63,8 +63,8 @@ get_stats(const stats_map_t& data_v)
stats_entry_t
generate_stats(const output_config& /*cfg*/,
const metadata& tool_metadata,
const generator<rocprofiler_buffer_tracing_kernel_dispatch_record_t>& data)
const metadata& tool_metadata,
const generator<tool_buffer_tracing_kernel_dispatch_with_stream_record_t>& data)
{
auto kernel_stats = stats_map_t{};
for(auto ditr : data)
@@ -72,7 +72,7 @@ generate_stats(const output_config& /*cfg*/,
for(auto record : data.get(ditr))
{
auto kernel_name = tool_metadata.get_kernel_name(record.dispatch_info.kernel_id,
record.correlation_id.external.value);
record.kernel_rename_val);
kernel_stats[kernel_name] += (record.end_timestamp - record.start_timestamp);
}
@@ -119,8 +119,8 @@ generate_stats(const output_config& /*cfg*/,
stats_entry_t
generate_stats(const output_config& /*cfg*/,
const metadata& tool_metadata,
const generator<rocprofiler_buffer_tracing_memory_copy_record_t>& data)
const metadata& tool_metadata,
const generator<tool_buffer_tracing_memory_copy_with_stream_record_t>& data)
{
auto memory_copy_stats = stats_map_t{};
for(auto ditr : data)
+7 -6
Просмотреть файл
@@ -25,15 +25,16 @@
#include "generator.hpp"
#include "metadata.hpp"
#include "statistics.hpp"
#include "stream_info.hpp"
namespace rocprofiler
{
namespace tool
{
stats_entry_t
generate_stats(const output_config& cfg,
const metadata& tool_metadata,
const generator<rocprofiler_buffer_tracing_kernel_dispatch_record_t>& data);
generate_stats(const output_config& cfg,
const metadata& tool_metadata,
const generator<tool_buffer_tracing_kernel_dispatch_with_stream_record_t>& data);
stats_entry_t
generate_stats(const output_config& cfg,
@@ -46,9 +47,9 @@ generate_stats(const output_config& cfg
const generator<rocprofiler_buffer_tracing_hsa_api_record_t>& data);
stats_entry_t
generate_stats(const output_config& cfg,
const metadata& tool_metadata,
const generator<rocprofiler_buffer_tracing_memory_copy_record_t>& data);
generate_stats(const output_config& cfg,
const metadata& tool_metadata,
const generator<tool_buffer_tracing_memory_copy_with_stream_record_t>& data);
stats_entry_t
generate_stats(const output_config& cfg,
+6 -6
Просмотреть файл
@@ -58,12 +58,12 @@ output_config::parse_env()
common::get_env("ROCPROF_PERFETTO_SHMEM_SIZE_HINT_KB", perfetto_shmem_size_hint);
perfetto_buffer_size = common::get_env("ROCPROF_PERFETTO_BUFFER_SIZE_KB", perfetto_buffer_size);
output_path = common::get_env("ROCPROF_OUTPUT_PATH", output_path);
output_file = common::get_env("ROCPROF_OUTPUT_FILE_NAME", output_file);
tmp_directory = common::get_env("ROCPROF_TMPDIR", tmp_directory);
kernel_rename = common::get_env("ROCPROF_KERNEL_RENAME", false);
auto to_upper = [](std::string val) {
output_path = common::get_env("ROCPROF_OUTPUT_PATH", output_path);
output_file = common::get_env("ROCPROF_OUTPUT_FILE_NAME", output_file);
tmp_directory = common::get_env("ROCPROF_TMPDIR", tmp_directory);
kernel_rename = common::get_env("ROCPROF_KERNEL_RENAME", false);
group_by_queue = common::get_env("ROCPROF_GROUP_BY_QUEUE", false);
auto to_upper = [](std::string val) {
for(auto& vitr : val)
vitr = toupper(vitr);
return val;
+1
Просмотреть файл
@@ -69,6 +69,7 @@ struct output_config
bool otf2_output = false;
bool summary_output = false;
bool kernel_rename = false;
bool group_by_queue = false;
uint64_t stats_summary_unit_value = 1;
size_t perfetto_shmem_size_hint = defaults::perfetto_shmem_size_hint_kb;
size_t perfetto_buffer_size = defaults::perfetto_buffer_size_kb;
+118
Просмотреть файл
@@ -0,0 +1,118 @@
// MIT License
//
// Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
#include "lib/common/logging.hpp"
#include <rocprofiler-sdk/buffer_tracing.h>
#include <rocprofiler-sdk/fwd.h>
#include <rocprofiler-sdk/cxx/hash.hpp>
#include <rocprofiler-sdk/cxx/name_info.hpp>
#include <rocprofiler-sdk/cxx/operators.hpp>
#include <rocprofiler-sdk/cxx/serialization.hpp>
namespace rocprofiler
{
namespace tool
{
struct tool_buffer_tracing_kernel_dispatch_with_stream_record_t
: rocprofiler_buffer_tracing_kernel_dispatch_record_t
{
using base_type = rocprofiler_buffer_tracing_kernel_dispatch_record_t;
tool_buffer_tracing_kernel_dispatch_with_stream_record_t(
const base_type& _base,
const rocprofiler_stream_id_t& _stream_id,
const uint64_t& _kernel_rename_val)
: base_type{_base}
, stream_id{_stream_id}
, kernel_rename_val{_kernel_rename_val}
{}
tool_buffer_tracing_kernel_dispatch_with_stream_record_t();
~tool_buffer_tracing_kernel_dispatch_with_stream_record_t() = default;
tool_buffer_tracing_kernel_dispatch_with_stream_record_t(
const tool_buffer_tracing_kernel_dispatch_with_stream_record_t&) = default;
tool_buffer_tracing_kernel_dispatch_with_stream_record_t(
tool_buffer_tracing_kernel_dispatch_with_stream_record_t&&) noexcept = default;
tool_buffer_tracing_kernel_dispatch_with_stream_record_t& operator =(
const tool_buffer_tracing_kernel_dispatch_with_stream_record_t&) = default;
tool_buffer_tracing_kernel_dispatch_with_stream_record_t& operator =(
tool_buffer_tracing_kernel_dispatch_with_stream_record_t&&) noexcept = default;
rocprofiler_stream_id_t stream_id = {};
uint64_t kernel_rename_val = {};
};
struct tool_buffer_tracing_memory_copy_with_stream_record_t
: rocprofiler_buffer_tracing_memory_copy_record_t
{
using base_type = rocprofiler_buffer_tracing_memory_copy_record_t;
tool_buffer_tracing_memory_copy_with_stream_record_t(const base_type& _base,
const rocprofiler_stream_id_t& _stream_id)
: base_type{_base}
, stream_id{_stream_id}
{}
tool_buffer_tracing_memory_copy_with_stream_record_t();
~tool_buffer_tracing_memory_copy_with_stream_record_t() = default;
tool_buffer_tracing_memory_copy_with_stream_record_t(
const tool_buffer_tracing_memory_copy_with_stream_record_t&) = default;
tool_buffer_tracing_memory_copy_with_stream_record_t(
tool_buffer_tracing_memory_copy_with_stream_record_t&&) noexcept = default;
tool_buffer_tracing_memory_copy_with_stream_record_t& operator =(
const tool_buffer_tracing_memory_copy_with_stream_record_t&) = default;
tool_buffer_tracing_memory_copy_with_stream_record_t& operator =(
tool_buffer_tracing_memory_copy_with_stream_record_t&&) noexcept = default;
rocprofiler_stream_id_t stream_id = {};
};
} // namespace tool
} // namespace rocprofiler
namespace cereal
{
#define SAVE_DATA_FIELD(FIELD) ar(make_nvp(#FIELD, data.FIELD))
template <typename ArchiveT>
void
save(ArchiveT& ar,
const ::rocprofiler::tool::tool_buffer_tracing_kernel_dispatch_with_stream_record_t& data)
{
cereal::save(ar, static_cast<const rocprofiler_buffer_tracing_kernel_dispatch_record_t&>(data));
SAVE_DATA_FIELD(stream_id);
SAVE_DATA_FIELD(kernel_rename_val);
}
template <typename ArchiveT>
void
save(ArchiveT& ar,
const ::rocprofiler::tool::tool_buffer_tracing_memory_copy_with_stream_record_t& data)
{
cereal::save(ar, static_cast<const rocprofiler_buffer_tracing_memory_copy_record_t&>(data));
SAVE_DATA_FIELD(stream_id);
}
#undef SAVE_DATA_FIELD
} // namespace cereal
+2 -2
Просмотреть файл
@@ -4,9 +4,9 @@
rocprofiler_activate_clang_tidy()
set(TOOL_HEADERS config.hpp helper.hpp)
set(TOOL_HEADERS config.hpp helper.hpp stream_stack.hpp)
set(TOOL_SOURCES config.cpp main.c tool.cpp)
set(TOOL_SOURCES config.cpp main.c tool.cpp stream_stack.cpp)
add_library(rocprofiler-sdk-tool SHARED)
target_sources(rocprofiler-sdk-tool PRIVATE ${TOOL_SOURCES} ${TOOL_HEADERS})
+74
Просмотреть файл
@@ -0,0 +1,74 @@
// MIT License
//
// Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include "stream_stack.hpp"
#include "lib/common/container/small_vector.hpp"
#include "lib/common/static_tl_object.hpp"
#include "lib/rocprofiler-sdk/hip/stream.hpp"
namespace rocprofiler
{
namespace tool
{
namespace stream
{
namespace
{
auto*
get_stream_stack()
{
static thread_local auto*& _v =
common::static_tl_object<common::container::small_vector<rocprofiler_stream_id_t>>::
construct(rocprofiler_stream_id_t{.handle = 0});
return _v;
}
} // namespace
void
push_stream_id(rocprofiler_stream_id_t id)
{
CHECK_NOTNULL(get_stream_stack())->emplace_back(id);
}
void
pop_stream_id()
{
CHECK_NOTNULL(get_stream_stack())->pop_back();
}
rocprofiler_stream_id_t
get_stream_id()
{
return CHECK_NOTNULL(get_stream_stack())->back();
}
bool
stream_stack_empty()
{
return CHECK_NOTNULL(get_stream_stack())->empty();
}
} // namespace stream
} // namespace tool
} // namespace rocprofiler
+46
Просмотреть файл
@@ -0,0 +1,46 @@
// MIT License
//
// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#pragma once
#include <rocprofiler-sdk/rocprofiler.h>
namespace rocprofiler
{
namespace tool
{
namespace stream
{
rocprofiler_stream_id_t
get_stream_id();
void
push_stream_id(rocprofiler_stream_id_t id);
void
pop_stream_id();
bool
stream_stack_empty();
} // namespace stream
} // namespace tool
} // namespace rocprofiler
+198 -34
Просмотреть файл
@@ -22,11 +22,13 @@
#include "config.hpp"
#include "helper.hpp"
#include "stream_stack.hpp"
#include "lib/common/environment.hpp"
#include "lib/common/filesystem.hpp"
#include "lib/common/logging.hpp"
#include "lib/common/scope_destructor.hpp"
#include "lib/common/static_object.hpp"
#include "lib/common/string_entry.hpp"
#include "lib/common/synchronized.hpp"
#include "lib/common/units.hpp"
@@ -43,6 +45,7 @@
#include "lib/output/generateStats.hpp"
#include "lib/output/output_stream.hpp"
#include "lib/output/statistics.hpp"
#include "lib/output/stream_info.hpp"
#include "lib/output/tmp_file.hpp"
#include "lib/output/tmp_file_buffer.hpp"
#include "lib/rocprofiler-sdk-att/att_lib_wrapper.hpp"
@@ -209,6 +212,32 @@ thread_local auto thread_dispatch_rename_dtor = common::scope_destructor{[]() {
thread_dispatch_rename = nullptr;
}};
// Stores stream_ids and kernel_rename_vals for kernel-rename service and hip stream display service
struct kernel_rename_and_stream_display_pair
{
uint64_t kernel_rename_val{0};
rocprofiler_stream_id_t stream_id{.handle = 0};
};
auto kernel_rename_and_stream_display_pair_dtors =
new std::vector<kernel_rename_and_stream_display_pair*>{};
auto
get_kernel_rename_and_stream_display_pair_lock()
{
static auto _mutex = std::mutex{};
return std::unique_lock<std::mutex>{_mutex};
}
void
add_kernel_rename_and_stream_display_pairs(kernel_rename_and_stream_display_pair* ptr)
{
auto lock = get_kernel_rename_and_stream_display_pair_lock();
if(ptr != nullptr && kernel_rename_and_stream_display_pair_dtors != nullptr)
{
kernel_rename_and_stream_display_pair_dtors->emplace_back(ptr);
}
}
bool
add_kernel_target(uint64_t _kern_id, const std::unordered_set<uint32_t>& range)
{
@@ -357,23 +386,49 @@ collection_period_cntrl(std::promise<void>&& _promise, rocprofiler_context_id_t
}
int
set_kernel_rename_correlation_id(rocprofiler_thread_id_t thr_id,
rocprofiler_context_id_t ctx_id,
rocprofiler_external_correlation_id_request_kind_t kind,
rocprofiler_tracing_operation_t op,
uint64_t internal_corr_id,
rocprofiler_user_data_t* external_corr_id,
void* user_data)
set_kernel_rename_and_stream_display_correlation_id(
rocprofiler_thread_id_t thr_id,
rocprofiler_context_id_t ctx_id,
rocprofiler_external_correlation_id_request_kind_t kind,
rocprofiler_tracing_operation_t op,
uint64_t internal_corr_id,
rocprofiler_user_data_t* external_corr_id,
void* user_data)
{
ROCP_FATAL_IF(kind != ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_KERNEL_DISPATCH)
ROCP_FATAL_IF(kind != ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_KERNEL_DISPATCH &&
kind != ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_MEMORY_COPY)
<< "unexpected kind: " << kind;
// Check whether services are enabled
const bool kernel_rename_service_enabled =
tool::get_config().kernel_rename && thread_dispatch_rename != nullptr &&
!thread_dispatch_rename->empty() &&
kind == ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_KERNEL_DISPATCH;
const bool hip_stream_display_enabled =
!tool::get_config().group_by_queue &&
kernel_rename_and_stream_display_pair_dtors != nullptr &&
!rocprofiler::tool::stream::stream_stack_empty();
if(thread_dispatch_rename != nullptr && !thread_dispatch_rename->empty())
kernel_rename_and_stream_display_pair* kernel_rename_and_stream_display_vals = nullptr;
if(kernel_rename_service_enabled || hip_stream_display_enabled)
{
kernel_rename_and_stream_display_vals = new kernel_rename_and_stream_display_pair{};
}
// Get value for kernel rename service
if(kernel_rename_service_enabled && kernel_rename_and_stream_display_vals != nullptr)
{
auto val = thread_dispatch_rename->top();
if(tool_metadata) tool_metadata->add_external_correlation_id(val);
external_corr_id->value = val;
kernel_rename_and_stream_display_vals->kernel_rename_val = val;
}
// Get stream ID from stream HIP display service
if(hip_stream_display_enabled && kernel_rename_and_stream_display_vals != nullptr)
{
auto stream_id = rocprofiler::tool::stream::get_stream_id();
kernel_rename_and_stream_display_vals->stream_id = stream_id;
}
// Set the external correlation id service to point to struct
external_corr_id->ptr = kernel_rename_and_stream_display_vals;
add_kernel_rename_and_stream_display_pairs(kernel_rename_and_stream_display_vals);
common::consume_args(thr_id, ctx_id, kind, op, internal_corr_id, user_data);
@@ -461,6 +516,54 @@ kernel_rename_callback(rocprofiler_callback_tracing_record_t record,
common::consume_args(user_data, data);
}
// Stores stream IDs onto stack when callback is triggered
void
hip_stream_display_callback(rocprofiler_callback_tracing_record_t record,
rocprofiler_user_data_t* user_data,
void* data)
{
if(tool::get_config().group_by_queue ||
record.kind != ROCPROFILER_CALLBACK_TRACING_HIP_STREAM_API)
return;
// Extract stream ID from record
auto* stream_handle_data =
static_cast<rocprofiler_callback_tracing_stream_handle_data_t*>(record.payload);
auto stream_id = stream_handle_data->stream_id;
// STREAM_HANDLE_CREATE and DESTROY are no-ops
if(record.operation == ROCPROFILER_HIP_STREAM_CREATE)
{
ROCP_INFO
<< "Entered hip_stream_display_callback function for ROCPROFILER_HIP_STREAM_CREATE";
}
else if(record.operation == ROCPROFILER_HIP_STREAM_DESTROY)
{
ROCP_INFO
<< "Entered hip_stream_display_callback function for ROCPROFILER_HIP_STREAM_DESTROY";
}
else if(record.operation == ROCPROFILER_HIP_STREAM_SET)
{
// Push the stream ID onto the stream stack when before underlying HIP function is called
if(record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER)
{
ROCP_INFO << "Entered hip_stream_display_callback function for "
"ROCPROFILER_HIP_STREAM_SET with ROCPROFILER_CALLBACK_PHASE_ENTER";
rocprofiler::tool::stream::push_stream_id(stream_id);
}
// Pop stream ID off of stream stack after underlying HIP function is completed
else if(record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT)
{
ROCP_INFO << "Entered hip_stream_display_callback function for "
"ROCPROFILER_HIP_STREAM_SET with ROCPROFILER_CALLBACK_PHASE_EXIT";
rocprofiler::tool::stream::pop_stream_id();
}
}
else
{
ROCP_FATAL << "Unsupported operation for ROCPROFILER_HIP_STREAM";
}
common::consume_args(user_data, data);
}
void
callback_tracing_callback(rocprofiler_callback_tracing_record_t record,
rocprofiler_user_data_t* user_data,
@@ -759,7 +862,7 @@ buffered_tracing_callback(rocprofiler_context_id_t /*context*/,
rocprofiler_buffer_id_t /*buffer_id*/,
rocprofiler_record_header_t** headers,
size_t num_headers,
void* /*user_data*/,
void* /* user_data*/,
uint64_t /*drop_count*/)
{
ROCP_INFO << "Executing buffered tracing callback for " << num_headers << " headers";
@@ -776,8 +879,21 @@ buffered_tracing_callback(rocprofiler_context_id_t /*context*/,
{
auto* record = static_cast<rocprofiler_buffer_tracing_kernel_dispatch_record_t*>(
header->payload);
tool::write_ring_buffer(*record, domain_type::KERNEL_DISPATCH);
rocprofiler_stream_id_t stream_id{.handle = 0};
uint64_t kernel_rename_val = 0;
if((!tool::get_config().group_by_queue || tool::get_config().kernel_rename) &&
record->correlation_id.external.ptr != nullptr)
{
// Extract the stream id
auto* kernel_stream_pair_ptr =
static_cast<kernel_rename_and_stream_display_pair*>(
record->correlation_id.external.ptr);
stream_id = kernel_stream_pair_ptr->stream_id;
kernel_rename_val = kernel_stream_pair_ptr->kernel_rename_val;
}
rocprofiler::tool::tool_buffer_tracing_kernel_dispatch_with_stream_record_t
record_with_stream{*record, stream_id, kernel_rename_val};
tool::write_ring_buffer(record_with_stream, domain_type::KERNEL_DISPATCH);
}
else if(header->kind == ROCPROFILER_BUFFER_TRACING_HSA_CORE_API ||
@@ -794,8 +910,19 @@ buffered_tracing_callback(rocprofiler_context_id_t /*context*/,
{
auto* record =
static_cast<rocprofiler_buffer_tracing_memory_copy_record_t*>(header->payload);
tool::write_ring_buffer(*record, domain_type::MEMORY_COPY);
rocprofiler_stream_id_t stream_id{.handle = 0};
if(!tool::get_config().group_by_queue &&
record->correlation_id.external.ptr != nullptr)
{
// Extract the stream id
auto* kernel_stream_pair_ptr =
static_cast<kernel_rename_and_stream_display_pair*>(
record->correlation_id.external.ptr);
stream_id = kernel_stream_pair_ptr->stream_id;
}
rocprofiler::tool::tool_buffer_tracing_memory_copy_with_stream_record_t
record_with_stream{*record, stream_id};
tool::write_ring_buffer(record_with_stream, domain_type::MEMORY_COPY);
}
else if(header->kind == ROCPROFILER_BUFFER_TRACING_MEMORY_ALLOCATION)
{
@@ -1171,6 +1298,13 @@ counter_record_callback(rocprofiler_dispatch_counting_service_data_t dispatch_da
counter_record.dispatch_data = dispatch_data;
counter_record.thread_id = user_data.value;
if(dispatch_data.correlation_id.external.ptr != nullptr)
{
// Extract the kernel id
auto* kernel_stream_pair_ptr = static_cast<kernel_rename_and_stream_display_pair*>(
dispatch_data.correlation_id.external.ptr);
counter_record.kernel_rename_val = kernel_stream_pair_ptr->kernel_rename_val;
}
auto serialized_records = std::vector<tool::tool_counter_value_t>{};
serialized_records.reserve(record_count);
@@ -1552,7 +1686,6 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
get_buffers().rocjpeg_api_trace),
"buffer tracing service for ROCDecode api configure");
}
if(tool::get_config().kernel_rename)
{
auto rename_ctx = rocprofiler_context_id_t{0};
@@ -1573,16 +1706,36 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
"callback tracing service failed to configure");
ROCPROFILER_CALL(rocprofiler_start_context(rename_ctx), "start context failed");
}
if(!tool::get_config().group_by_queue)
{
auto hip_stream_display_ctx = rocprofiler_context_id_t{0};
ROCPROFILER_CALL(rocprofiler_create_context(&hip_stream_display_ctx),
"failed to create context");
ROCPROFILER_CALL(rocprofiler_configure_callback_tracing_service(
hip_stream_display_ctx,
ROCPROFILER_CALLBACK_TRACING_HIP_STREAM_API,
nullptr,
0,
hip_stream_display_callback,
nullptr),
"stream tracing configure failed");
ROCPROFILER_CALL(rocprofiler_start_context(hip_stream_display_ctx), "start context failed");
}
if(tool::get_config().kernel_rename || !tool::get_config().group_by_queue)
{
auto external_corr_id_request_kinds =
std::array<rocprofiler_external_correlation_id_request_kind_t, 1>{
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_KERNEL_DISPATCH};
std::array<rocprofiler_external_correlation_id_request_kind_t, 2>{
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_KERNEL_DISPATCH,
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_MEMORY_COPY};
ROCPROFILER_CALL(rocprofiler_configure_external_correlation_id_request_service(
get_client_ctx(),
external_corr_id_request_kinds.data(),
external_corr_id_request_kinds.size(),
set_kernel_rename_correlation_id,
set_kernel_rename_and_stream_display_correlation_id,
nullptr),
"Could not configure external correlation id request service");
}
@@ -1715,16 +1868,17 @@ tool_fini(void* /*tool_data*/)
rocprofiler_stop_context(get_client_ctx());
flush();
auto kernel_dispatch_output =
tool::kernel_dispatch_buffered_output_t{tool::get_config().kernel_trace};
auto kernel_dispatch_with_stream_output =
rocprofiler::tool::kernel_dispatch_buffered_output_with_stream_t{
tool::get_config().kernel_trace};
auto hsa_output = tool::hsa_buffered_output_t{tool::get_config().hsa_core_api_trace ||
tool::get_config().hsa_amd_ext_api_trace ||
tool::get_config().hsa_image_ext_api_trace ||
tool::get_config().hsa_finalizer_ext_api_trace};
auto hip_output = tool::hip_buffered_output_t{tool::get_config().hip_runtime_api_trace ||
tool::get_config().hip_compiler_api_trace};
auto memory_copy_output =
tool::memory_copy_buffered_output_t{tool::get_config().memory_copy_trace};
auto memory_copy_output_with_stream_output =
tool::memory_copy_buffered_output_with_stream_t{tool::get_config().memory_copy_trace};
auto marker_output = tool::marker_buffered_output_t{tool::get_config().marker_api_trace};
auto counters_output =
tool::counter_collection_buffered_output_t{tool::get_config().counter_collection};
@@ -1748,10 +1902,10 @@ tool_fini(void* /*tool_data*/)
uint64_t num_output = 0;
auto contributions = domain_stats_vec_t{};
generate_output(kernel_dispatch_output, num_output, contributions);
generate_output(kernel_dispatch_with_stream_output, num_output, contributions);
generate_output(hsa_output, num_output, contributions);
generate_output(hip_output, num_output, contributions);
generate_output(memory_copy_output, num_output, contributions);
generate_output(memory_copy_output_with_stream_output, num_output, contributions);
generate_output(memory_allocation_output, num_output, contributions);
generate_output(marker_output, num_output, contributions);
generate_output(rccl_output, num_output, contributions);
@@ -1825,8 +1979,8 @@ tool_fini(void* /*tool_data*/)
contributions,
hip_output.get_generator(),
hsa_output.get_generator(),
kernel_dispatch_output.get_generator(),
memory_copy_output.get_generator(),
kernel_dispatch_with_stream_output.get_generator(),
memory_copy_output_with_stream_output.get_generator(),
counters_output.get_generator(),
marker_output.get_generator(),
scratch_memory_output.get_generator(),
@@ -1847,8 +2001,8 @@ tool_fini(void* /*tool_data*/)
agents_output,
hip_output.get_generator(),
hsa_output.get_generator(),
kernel_dispatch_output.get_generator(),
memory_copy_output.get_generator(),
kernel_dispatch_with_stream_output.get_generator(),
memory_copy_output_with_stream_output.get_generator(),
marker_output.get_generator(),
scratch_memory_output.get_generator(),
rccl_output.get_generator(),
@@ -1861,8 +2015,8 @@ tool_fini(void* /*tool_data*/)
{
auto hip_elem_data = hip_output.load_all();
auto hsa_elem_data = hsa_output.load_all();
auto kernel_dispatch_elem_data = kernel_dispatch_output.load_all();
auto memory_copy_elem_data = memory_copy_output.load_all();
auto kernel_dispatch_elem_data = kernel_dispatch_with_stream_output.load_all();
auto memory_copy_elem_data = memory_copy_output_with_stream_output.load_all();
auto marker_elem_data = marker_output.load_all();
auto scratch_memory_elem_data = scratch_memory_output.load_all();
auto rccl_elem_data = rccl_output.load_all();
@@ -1890,13 +2044,12 @@ tool_fini(void* /*tool_data*/)
{
tool::generate_stats(tool::get_config(), *tool_metadata, contributions);
}
auto destroy_output = [](auto& _buffered_output_v) { _buffered_output_v.destroy(); };
destroy_output(kernel_dispatch_output);
destroy_output(kernel_dispatch_with_stream_output);
destroy_output(hsa_output);
destroy_output(hip_output);
destroy_output(memory_copy_output);
destroy_output(memory_copy_output_with_stream_output);
destroy_output(memory_allocation_output);
destroy_output(marker_output);
destroy_output(counters_output);
@@ -1907,6 +2060,17 @@ tool_fini(void* /*tool_data*/)
destroy_output(rocdecode_output);
destroy_output(rocjpeg_output);
if(kernel_rename_and_stream_display_pair_dtors != nullptr)
{
for(auto& itr : *kernel_rename_and_stream_display_pair_dtors)
{
delete itr;
itr = nullptr;
}
delete kernel_rename_and_stream_display_pair_dtors;
kernel_rename_and_stream_display_pair_dtors = nullptr;
}
if(destructors)
{
for(const auto& itr : *destructors)
+12
Просмотреть файл
@@ -24,6 +24,7 @@
#include "lib/rocprofiler-sdk/context/context.hpp"
#include "lib/rocprofiler-sdk/context/domain.hpp"
#include "lib/rocprofiler-sdk/hip/hip.hpp"
#include "lib/rocprofiler-sdk/hip/stream.hpp"
#include "lib/rocprofiler-sdk/hsa/async_copy.hpp"
#include "lib/rocprofiler-sdk/hsa/hsa.hpp"
#include "lib/rocprofiler-sdk/hsa/memory_allocation.hpp"
@@ -97,6 +98,7 @@ ROCPROFILER_BUFFER_TRACING_KIND_STRING(OMPT)
ROCPROFILER_BUFFER_TRACING_KIND_STRING(RUNTIME_INITIALIZATION)
ROCPROFILER_BUFFER_TRACING_KIND_STRING(ROCDECODE_API)
ROCPROFILER_BUFFER_TRACING_KIND_STRING(ROCJPEG_API)
ROCPROFILER_BUFFER_TRACING_KIND_STRING(HIP_STREAM_API)
template <size_t Idx, size_t... Tail>
std::pair<const char*, size_t>
@@ -305,6 +307,11 @@ rocprofiler_query_buffer_tracing_kind_operation_name(rocprofiler_buffer_tracing_
val = rocprofiler::rocjpeg::name_by_id<ROCPROFILER_ROCJPEG_TABLE_ID_CORE>(operation);
break;
}
case ROCPROFILER_BUFFER_TRACING_HIP_STREAM_API:
{
val = rocprofiler::hip::stream::name_by_id(operation);
break;
}
};
if(!val)
@@ -446,6 +453,11 @@ rocprofiler_iterate_buffer_tracing_kind_operations(
ops = rocprofiler::rocjpeg::get_ids<ROCPROFILER_ROCJPEG_TABLE_ID_CORE>();
break;
}
case ROCPROFILER_BUFFER_TRACING_HIP_STREAM_API:
{
ops = rocprofiler::hip::stream::get_ids();
break;
}
}
for(const auto& itr : ops)
+13
Просмотреть файл
@@ -24,6 +24,7 @@
#include "lib/rocprofiler-sdk/context/context.hpp"
#include "lib/rocprofiler-sdk/context/domain.hpp"
#include "lib/rocprofiler-sdk/hip/hip.hpp"
#include "lib/rocprofiler-sdk/hip/stream.hpp"
#include "lib/rocprofiler-sdk/hsa/async_copy.hpp"
#include "lib/rocprofiler-sdk/hsa/hsa.hpp"
#include "lib/rocprofiler-sdk/hsa/memory_allocation.hpp"
@@ -94,6 +95,7 @@ ROCPROFILER_CALLBACK_TRACING_KIND_STRING(OMPT)
ROCPROFILER_CALLBACK_TRACING_KIND_STRING(RUNTIME_INITIALIZATION)
ROCPROFILER_CALLBACK_TRACING_KIND_STRING(ROCDECODE_API)
ROCPROFILER_CALLBACK_TRACING_KIND_STRING(ROCJPEG_API)
ROCPROFILER_CALLBACK_TRACING_KIND_STRING(HIP_STREAM_API)
template <size_t Idx, size_t... Tail>
std::pair<const char*, size_t>
@@ -288,6 +290,11 @@ rocprofiler_query_callback_tracing_kind_operation_name(rocprofiler_callback_trac
val = rocprofiler::rocjpeg::name_by_id<ROCPROFILER_ROCJPEG_TABLE_ID_CORE>(operation);
break;
}
case ROCPROFILER_CALLBACK_TRACING_HIP_STREAM_API:
{
val = rocprofiler::hip::stream::name_by_id(operation);
break;
}
};
if(!val)
@@ -427,6 +434,11 @@ rocprofiler_iterate_callback_tracing_kind_operations(
ops = rocprofiler::rocjpeg::get_ids<ROCPROFILER_ROCJPEG_TABLE_ID_CORE>();
break;
}
case ROCPROFILER_CALLBACK_TRACING_HIP_STREAM_API:
{
ops = rocprofiler::hip::stream::get_ids();
break;
}
};
for(const auto& itr : ops)
@@ -571,6 +583,7 @@ rocprofiler_iterate_callback_tracing_kind_operation_args(
case ROCPROFILER_CALLBACK_TRACING_RUNTIME_INITIALIZATION:
case ROCPROFILER_CALLBACK_TRACING_ROCDECODE_API:
case ROCPROFILER_CALLBACK_TRACING_ROCJPEG_API:
case ROCPROFILER_CALLBACK_TRACING_HIP_STREAM_API:
{
return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED;
}
+2 -2
Просмотреть файл
@@ -1,5 +1,5 @@
set(ROCPROFILER_LIB_HIP_SOURCES abi.cpp hip.cpp)
set(ROCPROFILER_LIB_HIP_HEADERS defines.hpp hip.hpp utils.hpp)
set(ROCPROFILER_LIB_HIP_SOURCES abi.cpp hip.cpp stream.cpp)
set(ROCPROFILER_LIB_HIP_HEADERS defines.hpp hip.hpp stream.hpp utils.hpp)
target_sources(rocprofiler-sdk-object-library PRIVATE ${ROCPROFILER_LIB_HIP_SOURCES}
${ROCPROFILER_LIB_HIP_HEADERS})
+8
Просмотреть файл
@@ -46,6 +46,8 @@
using domain_type::retval_type; \
using domain_type::callback_data_type; \
\
static constexpr auto get_args_type() { return common::mpl::type_list<>{}; } \
\
static constexpr auto offset() \
{ \
return offsetof(hip_table_lookup<table_idx>::type, HIP_FUNC_PTR); \
@@ -169,6 +171,12 @@
return &base_type::functor<RetT, Args...>; \
} \
\
static constexpr auto get_args_type() \
{ \
using func_t = decltype(get_table_func()); \
return common::mpl::function_args_t<func_t>{}; \
} \
\
static std::vector<void*> as_arg_addr(callback_data_type trace_data) \
{ \
return std::vector<void*>{ \
+512
Просмотреть файл
@@ -0,0 +1,512 @@
// MIT License
//
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#include "lib/rocprofiler-sdk/hip/stream.hpp"
#include "lib/common/container/small_vector.hpp"
#include "lib/common/defines.hpp"
#include "lib/common/scope_destructor.hpp"
#include "lib/common/static_object.hpp"
#include "lib/common/static_tl_object.hpp"
#include "lib/common/synchronized.hpp"
#include "lib/common/utility.hpp"
#include "lib/rocprofiler-sdk/buffer.hpp"
#include "lib/rocprofiler-sdk/context/context.hpp"
#include "lib/rocprofiler-sdk/hip/utils.hpp"
#include "lib/rocprofiler-sdk/hsa/queue_controller.hpp"
#include "lib/rocprofiler-sdk/registration.hpp"
#include "lib/rocprofiler-sdk/tracing/tracing.hpp"
#include <rocprofiler-sdk/buffer.h>
#include <rocprofiler-sdk/callback_tracing.h>
#include <rocprofiler-sdk/fwd.h>
#include <rocprofiler-sdk/hip/runtime_api_id.h>
#include <rocprofiler-sdk/hip/table_id.h>
#include <hip/driver_types.h>
#include <hip/hip_runtime_api.h>
// must be included after runtime api
#include <hip/hip_deprecated.h>
#include <atomic>
#include <cstddef>
#include <cstdint>
#include <type_traits>
#include <utility>
#define ROCPROFILER_LIB_ROCPROFILER_HIP_HIP_CPP_IMPL 1
// template specializations
#include "hip.def.cpp"
namespace rocprofiler
{
namespace hip
{
namespace stream
{
using stream_map_t = std::unordered_map<hipStream_t, rocprofiler_stream_id_t>;
namespace
{
auto*
get_stream_map()
{
static auto*& _v = common::static_object<common::Synchronized<stream_map_t>>::construct();
return _v;
}
auto
add_stream(hipStream_t stream)
{
return get_stream_map()->wlock(
[](stream_map_t& _data, hipStream_t _stream) {
if(_data.count(_stream) == 0)
{
auto idx = _data.size();
ROCP_INFO << fmt::format("hipStream_t={} :: id={}.handle={}{}",
static_cast<void*>(_stream),
'{',
idx,
'}');
_data.emplace(_stream, rocprofiler_stream_id_t{.handle = idx});
}
return _data.at(_stream);
},
stream);
}
auto
get_stream_id(hipStream_t stream)
{
return get_stream_map()->rlock(
[](const stream_map_t& _data, hipStream_t _stream) { return _data.at(_stream); }, stream);
}
// Map rocprofiler_hip_stream_operation_t to respective name
template <size_t OpIdx>
struct hip_stream_operation_name;
#define HIP_STREAM_OPERATION_NAME(ENUM) \
template <> \
struct hip_stream_operation_name<ROCPROFILER_HIP_STREAM_##ENUM> \
{ \
static constexpr auto name = "HIP_STREAM_" #ENUM; \
static constexpr auto operation_idx = ROCPROFILER_HIP_STREAM_##ENUM; \
};
HIP_STREAM_OPERATION_NAME(NONE)
HIP_STREAM_OPERATION_NAME(CREATE)
HIP_STREAM_OPERATION_NAME(DESTROY)
HIP_STREAM_OPERATION_NAME(SET)
#undef HIP_STREAM_OPERATION_NAME
template <size_t OpIdx, size_t... OpIdxTail>
const char*
name_by_id(const uint32_t id, std::index_sequence<OpIdx, OpIdxTail...>)
{
if(OpIdx == id) return hip_stream_operation_name<OpIdx>::name;
if constexpr(sizeof...(OpIdxTail) > 0)
return name_by_id(id, std::index_sequence<OpIdxTail...>{});
else
return nullptr;
}
template <size_t OpIdx, size_t... OpIdxTail>
void
get_ids(std::vector<uint32_t>& _id_list, std::index_sequence<OpIdx, OpIdxTail...>)
{
auto _idx = hip_stream_operation_name<OpIdx>::operation_idx;
if(_idx < ROCPROFILER_HIP_STREAM_LAST) _id_list.emplace_back(_idx);
if constexpr(sizeof...(OpIdxTail) > 0) get_ids(_id_list, std::index_sequence<OpIdxTail...>{});
}
} // namespace
template <size_t TableIdx,
size_t OpIdx,
typename RetT,
typename... Args,
typename FuncT = RetT (*)(Args...)>
FuncT create_write_functor(RetT (*func)(Args...))
{
using function_type = FuncT;
static function_type next_func = func;
return [](Args... args) -> RetT {
using function_args_type = common::mpl::type_list<Args...>;
using callback_api_data_t = rocprofiler_callback_tracing_stream_handle_data_t;
constexpr auto external_corr_id_domain_idx =
hip_domain_info<TableIdx>::external_correlation_id_domain_idx;
auto thr_id = common::get_tid();
auto callback_contexts = tracing::callback_context_data_vec_t{};
auto buffered_contexts = tracing::buffered_context_data_vec_t{};
auto external_corr_ids = tracing::external_correlation_id_map_t{};
tracing::populate_contexts(ROCPROFILER_CALLBACK_TRACING_HIP_STREAM_API,
ROCPROFILER_BUFFER_TRACING_HIP_STREAM_API,
callback_contexts,
buffered_contexts,
external_corr_ids);
assert(buffered_contexts.empty() && "Stream tracing should not have any buffered contexts");
auto tracer_data = common::init_public_api_struct(callback_api_data_t{});
auto internal_corr_id = 0;
constexpr auto stream_idx = common::mpl::index_of<hipStream_t*, function_args_type>::value;
auto stream = std::get<stream_idx>(std::make_tuple(std::forward<Args>(args)...));
tracing::update_external_correlation_ids(
external_corr_ids, thr_id, external_corr_id_domain_idx);
auto _ret = next_func(std::forward<Args>(args)...);
if(!callback_contexts.empty())
{
if(stream)
{
tracer_data.stream_id = add_stream(*stream);
}
tracing::execute_phase_none_callbacks(callback_contexts,
thr_id,
internal_corr_id,
external_corr_ids,
ROCPROFILER_CALLBACK_TRACING_HIP_STREAM_API,
ROCPROFILER_HIP_STREAM_CREATE,
tracer_data);
}
if constexpr(!std::is_void<RetT>::value) return _ret;
};
}
template <size_t TableIdx,
size_t OpIdx,
typename RetT,
typename... Args,
typename FuncT = RetT (*)(Args...)>
FuncT create_destroy_functor(RetT (*func)(Args...))
{
using function_type = FuncT;
static function_type next_func = func;
return [](Args... args) -> RetT {
using function_args_type = common::mpl::type_list<Args...>;
constexpr auto stream_idx = common::mpl::index_of<hipStream_t, function_args_type>::value;
using callback_api_data_t = rocprofiler_callback_tracing_stream_handle_data_t;
constexpr auto external_corr_id_domain_idx =
hip_domain_info<TableIdx>::external_correlation_id_domain_idx;
auto thr_id = common::get_tid();
auto callback_contexts = tracing::callback_context_data_vec_t{};
auto buffered_contexts = tracing::buffered_context_data_vec_t{};
auto external_corr_ids = tracing::external_correlation_id_map_t{};
tracing::populate_contexts(ROCPROFILER_CALLBACK_TRACING_HIP_STREAM_API,
ROCPROFILER_BUFFER_TRACING_HIP_STREAM_API,
callback_contexts,
buffered_contexts,
external_corr_ids);
assert(buffered_contexts.empty() && "Stream tracing should not have any buffered contexts");
auto tracer_data = common::init_public_api_struct(callback_api_data_t{});
auto internal_corr_id = 0;
auto stream = std::get<stream_idx>(std::make_tuple(std::forward<Args>(args)...));
tracing::update_external_correlation_ids(
external_corr_ids, thr_id, external_corr_id_domain_idx);
auto _ret = next_func(std::forward<Args>(args)...);
if(!callback_contexts.empty())
{
tracer_data.stream_id = get_stream_id(stream);
tracing::execute_phase_none_callbacks(callback_contexts,
thr_id,
internal_corr_id,
external_corr_ids,
ROCPROFILER_CALLBACK_TRACING_HIP_STREAM_API,
ROCPROFILER_HIP_STREAM_DESTROY,
tracer_data);
}
if constexpr(!std::is_void<RetT>::value) return _ret;
};
}
template <size_t TableIdx,
size_t OpIdx,
typename RetT,
typename... Args,
typename FuncT = RetT (*)(Args...)>
FuncT create_read_functor(RetT (*func)(Args...))
{
using function_type = FuncT;
static function_type next_func = func;
return [](Args... args) -> RetT {
using function_args_type = common::mpl::type_list<Args...>;
constexpr auto stream_idx = common::mpl::index_of<hipStream_t, function_args_type>::value;
using callback_api_data_t = rocprofiler_callback_tracing_stream_handle_data_t;
constexpr auto external_corr_id_domain_idx =
hip_domain_info<TableIdx>::external_correlation_id_domain_idx;
auto thr_id = common::get_tid();
auto callback_contexts = tracing::callback_context_data_vec_t{};
auto buffered_contexts = tracing::buffered_context_data_vec_t{};
auto external_corr_ids = tracing::external_correlation_id_map_t{};
tracing::populate_contexts(ROCPROFILER_CALLBACK_TRACING_HIP_STREAM_API,
ROCPROFILER_BUFFER_TRACING_HIP_STREAM_API,
callback_contexts,
buffered_contexts,
external_corr_ids);
assert(buffered_contexts.empty() && "Stream tracing should not have any buffered contexts");
auto tracer_data = common::init_public_api_struct(callback_api_data_t{});
auto internal_corr_id = 0;
auto stream = std::get<stream_idx>(std::make_tuple(std::forward<Args>(args)...));
if(!callback_contexts.empty())
{
tracer_data.stream_id = get_stream_id(stream);
tracing::execute_phase_enter_callbacks(callback_contexts,
thr_id,
internal_corr_id,
external_corr_ids,
ROCPROFILER_CALLBACK_TRACING_HIP_STREAM_API,
ROCPROFILER_HIP_STREAM_SET,
tracer_data);
}
tracing::update_external_correlation_ids(
external_corr_ids, thr_id, external_corr_id_domain_idx);
auto _ret = next_func(std::forward<Args>(args)...);
if(!callback_contexts.empty())
{
tracing::execute_phase_exit_callbacks(callback_contexts,
external_corr_ids,
ROCPROFILER_CALLBACK_TRACING_HIP_STREAM_API,
ROCPROFILER_HIP_STREAM_SET,
tracer_data);
}
if constexpr(!std::is_void<RetT>::value) return _ret;
};
}
} // namespace stream
} // namespace hip
} // namespace rocprofiler
namespace rocprofiler
{
namespace hip
{
namespace stream
{
namespace
{
bool
enable_stream_stack()
{
if(hsa::enable_queue_intercept()) return true;
for(const auto& itr : context::get_registered_contexts())
{
if(itr->is_tracing(ROCPROFILER_CALLBACK_TRACING_MEMORY_COPY) ||
itr->is_tracing(ROCPROFILER_CALLBACK_TRACING_HIP_RUNTIME_API) ||
itr->is_tracing(ROCPROFILER_CALLBACK_TRACING_HIP_COMPILER_API) ||
itr->is_tracing(ROCPROFILER_BUFFER_TRACING_HIP_STREAM_API) ||
itr->is_tracing(ROCPROFILER_BUFFER_TRACING_MEMORY_COPY) ||
itr->is_tracing(ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API) ||
itr->is_tracing(ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API))
return true;
}
return false;
}
bool
enable_compiler_stream_stack()
{
for(const auto& itr : context::get_registered_contexts())
{
if(itr->is_tracing(ROCPROFILER_CALLBACK_TRACING_HIP_COMPILER_API) ||
itr->is_tracing(ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API))
return true;
}
return false;
}
template <size_t TableIdx, typename Tp, size_t OpIdx>
void
update_table(Tp* _orig, std::integral_constant<size_t, OpIdx>)
{
using table_type = typename hip_table_lookup<TableIdx>::type;
using info_type = hip_api_info<TableIdx, OpIdx>;
using function_args_type = decltype(info_type::get_args_type());
if constexpr(std::is_same<table_type, Tp>::value &&
(common::mpl::is_one_of<hipStream_t, function_args_type>::value ||
common::mpl::is_one_of<hipStream_t*, function_args_type>::value))
{
auto _info = info_type{};
// make sure we don't access a field that doesn't exist in input table
if(_info.offset() >= _orig->size) return;
ROCP_TRACE << "updating table entry for " << _info.name;
constexpr auto num_args = function_args_type::size();
if constexpr(common::mpl::is_one_of<hipStream_t, function_args_type>::value)
{
constexpr auto stream_idx =
common::mpl::index_of<hipStream_t, function_args_type>::value;
constexpr auto rstream_idx =
common::mpl::index_of<hipStream_t, common::mpl::reverse<function_args_type>>::value;
// index_of finds the first argument of that type. So find the first and last
// arg of the given type and make sure it resolves to the same distance
assert(stream_idx == (num_args - rstream_idx - 1) &&
"function has more than one stream argument");
// don't wrap the compiler API functions unless HIP compiler API tracing is enabled
if constexpr(TableIdx == ROCPROFILER_HIP_TABLE_ID_Compiler)
{
if(!enable_compiler_stream_stack()) return;
}
// 1. get the sub-table containing the function pointer in original table
// 2. get reference to function pointer in sub-table in original table
// 3. update function pointer with wrapper
auto& _table = _info.get_table(_orig);
auto& _func = _info.get_table_func(_table);
constexpr auto is_hip_destroy_func =
std::is_same<decltype(info_type::operation_idx),
rocprofiler_hip_runtime_api_id_t>::value &&
(static_cast<rocprofiler_hip_runtime_api_id_t>(info_type::operation_idx) ==
ROCPROFILER_HIP_RUNTIME_API_ID_hipStreamDestroy);
if constexpr(is_hip_destroy_func)
{
_func = create_destroy_functor<TableIdx, OpIdx>(_func);
}
else
{
_func = create_read_functor<TableIdx, OpIdx>(_func);
}
}
else if constexpr(common::mpl::is_one_of<hipStream_t*, function_args_type>::value)
{
constexpr auto stream_idx =
common::mpl::index_of<hipStream_t*, function_args_type>::value;
constexpr auto rstream_idx =
common::mpl::index_of<hipStream_t*,
common::mpl::reverse<function_args_type>>::value;
// index_of finds the first argument of that type. So find the first and last
// arg of the given type and make sure it resolves to the same distance
assert(stream_idx == (num_args - rstream_idx - 1) &&
"function has more than one stream argument");
// don't wrap the compiler API functions unless HIP compiler API tracing is enabled
if constexpr(TableIdx == ROCPROFILER_HIP_TABLE_ID_Compiler)
{
if(!enable_compiler_stream_stack()) return;
}
// 1. get the sub-table containing the function pointer in original table
// 2. get reference to function pointer in sub-table in original table
// 3. update function pointer with wrapper
auto& _table = _info.get_table(_orig);
auto& _func = _info.get_table_func(_table);
_func = create_write_functor<TableIdx, OpIdx>(_func);
}
}
}
template <size_t TableIdx, typename Tp, size_t OpIdx, size_t... OpIdxTail>
void
update_table(Tp* _orig, std::index_sequence<OpIdx, OpIdxTail...>)
{
update_table<TableIdx>(_orig, std::integral_constant<size_t, OpIdx>{});
if constexpr(sizeof...(OpIdxTail) > 0)
update_table<TableIdx>(_orig, std::index_sequence<OpIdxTail...>{});
}
} // namespace
const char*
name_by_id(uint32_t id)
{
return name_by_id(id, std::make_index_sequence<ROCPROFILER_HIP_STREAM_LAST>{});
}
std::vector<uint32_t>
get_ids()
{
constexpr auto last_id = ROCPROFILER_HIP_STREAM_LAST;
auto _data = std::vector<uint32_t>{};
_data.reserve(last_id);
get_ids(_data, std::make_index_sequence<ROCPROFILER_HIP_STREAM_LAST>{});
return _data;
}
template <typename TableT>
void
update_table(TableT* _orig)
{
add_stream(nullptr);
if(!enable_stream_stack()) return;
constexpr auto TableIdx = hip_table_id_lookup<TableT>::value;
if(_orig)
update_table<TableIdx>(_orig, std::make_index_sequence<hip_domain_info<TableIdx>::last>{});
}
using hip_api_data_t = rocprofiler_callback_tracing_hip_api_data_t;
using hip_op_args_cb_t = rocprofiler_callback_tracing_operation_args_cb_t;
#define INSTANTIATE_HIP_TABLE_FUNC(TABLE_TYPE, TABLE_IDX) \
template void update_table<TABLE_TYPE>(TABLE_TYPE * _tbl);
INSTANTIATE_HIP_TABLE_FUNC(hip_runtime_api_table_t, ROCPROFILER_HIP_TABLE_ID_Runtime)
INSTANTIATE_HIP_TABLE_FUNC(hip_compiler_api_table_t, ROCPROFILER_HIP_TABLE_ID_Compiler)
} // namespace stream
} // namespace hip
} // namespace rocprofiler
+56
Просмотреть файл
@@ -0,0 +1,56 @@
// MIT License
//
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#pragma once
#include <rocprofiler-sdk/rocprofiler.h>
#include <hip/hip_version.h>
#include <hip/amd_detail/hip_api_trace.hpp>
#include <cstdint>
#include <vector>
namespace rocprofiler
{
namespace hip
{
namespace stream
{
using hip_compiler_api_table_t = HipCompilerDispatchTable;
using hip_runtime_api_table_t = HipDispatchTable;
rocprofiler_stream_id_t
get_stream_id();
const char*
name_by_id(uint32_t id);
std::vector<uint32_t>
get_ids();
template <typename TableT>
void
update_table(TableT* _orig);
} // namespace stream
} // namespace hip
} // namespace rocprofiler
+27 -25
Просмотреть файл
@@ -274,31 +274,7 @@ QueueController::init(CoreApiTable& core_table, AmdExtTable& ext_table)
}
}
auto enable_intercepter = false;
for(const auto& itr : context::get_registered_contexts())
{
constexpr auto expected_context_size = 216UL;
static_assert(
sizeof(context::context) == expected_context_size,
"If you added a new field to context struct, make sure there is a check here if it "
"requires queue interception. Once you have done so, increment expected_context_size");
bool has_kernel_tracing = itr->is_tracing(ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH) ||
itr->is_tracing(ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH);
bool has_scratch_reporting = itr->is_tracing(ROCPROFILER_CALLBACK_TRACING_SCRATCH_MEMORY) ||
itr->is_tracing(ROCPROFILER_BUFFER_TRACING_SCRATCH_MEMORY);
if(itr->counter_collection || itr->pc_sampler || has_kernel_tracing ||
has_scratch_reporting || itr->device_counter_collection || itr->agent_thread_trace ||
itr->dispatch_thread_trace)
{
enable_intercepter = true;
break;
}
}
if(enable_intercepter)
if(enable_queue_intercept())
{
core_table.hsa_queue_create_fn = hsa::create_queue;
core_table.hsa_queue_destroy_fn = hsa::destroy_queue;
@@ -475,6 +451,32 @@ get_queue_controller()
return controller;
}
bool
enable_queue_intercept()
{
for(const auto& itr : context::get_registered_contexts())
{
constexpr auto expected_context_size = 216UL;
static_assert(
sizeof(context::context) == expected_context_size,
"If you added a new field to context struct, make sure there is a check here if it "
"requires queue interception. Once you have done so, increment expected_context_size");
bool has_kernel_tracing = itr->is_tracing(ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH) ||
itr->is_tracing(ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH);
bool has_scratch_reporting = itr->is_tracing(ROCPROFILER_CALLBACK_TRACING_SCRATCH_MEMORY) ||
itr->is_tracing(ROCPROFILER_BUFFER_TRACING_SCRATCH_MEMORY);
if(itr->counter_collection || itr->pc_sampler || has_kernel_tracing ||
has_scratch_reporting || itr->device_counter_collection || itr->agent_thread_trace ||
itr->dispatch_thread_trace)
return true;
}
return false;
}
void
queue_controller_init(HsaApiTable* table)
{
+3 -1
Просмотреть файл
@@ -123,6 +123,9 @@ private:
QueueController*
get_queue_controller();
bool
enable_queue_intercept();
void
queue_controller_init(HsaApiTable* table);
@@ -134,6 +137,5 @@ queue_controller_sync();
void
profiler_serializer_kernel_completion_signal(hsa_signal_t queue_block_signal);
} // namespace hsa
} // namespace rocprofiler
+9
Просмотреть файл
@@ -32,6 +32,7 @@
#include "lib/rocprofiler-sdk/code_object/code_object.hpp"
#include "lib/rocprofiler-sdk/context/context.hpp"
#include "lib/rocprofiler-sdk/hip/hip.hpp"
#include "lib/rocprofiler-sdk/hip/stream.hpp"
#include "lib/rocprofiler-sdk/hsa/async_copy.hpp"
#include "lib/rocprofiler-sdk/hsa/hsa.hpp"
#include "lib/rocprofiler-sdk/hsa/memory_allocation.hpp"
@@ -754,6 +755,8 @@ rocprofiler_is_finalized(int* status)
rocprofiler_status_t
rocprofiler_force_configure(rocprofiler_configure_func_t configure_func)
{
rocprofiler::registration::init_logging();
ROCP_INFO << "forcing rocprofiler configuration";
auto& forced_config = rocprofiler::registration::get_forced_configure();
@@ -816,6 +819,9 @@ rocprofiler_set_api_table(const char* name,
rocprofiler::runtime_init::initialize(
ROCPROFILER_RUNTIME_INITIALIZATION_HIP, lib_version, lib_instance);
// install HIP stream deduction wrappers
rocprofiler::hip::stream::update_table(hip_runtime_api_table);
// allow tools to install API wrappers
rocprofiler::intercept_table::notify_intercept_table_registration(
ROCPROFILER_HIP_RUNTIME_TABLE,
@@ -841,6 +847,9 @@ rocprofiler_set_api_table(const char* name,
// install rocprofiler API wrappers
rocprofiler::hip::update_table(hip_compiler_api_table);
// install HIP stream deduction wrappers
rocprofiler::hip::stream::update_table(hip_compiler_api_table);
// allow tools to install API wrappers
rocprofiler::intercept_table::notify_intercept_table_registration(
ROCPROFILER_HIP_COMPILER_TABLE,
+1
Просмотреть файл
@@ -41,4 +41,5 @@ add_subdirectory(rocjpeg-trace)
if(TARGET att_decoder_testing)
add_subdirectory(advanced-thread-trace)
endif()
add_subdirectory(hip-stream-display)
add_subdirectory(agent-index)
+59
Просмотреть файл
@@ -0,0 +1,59 @@
#
#
#
cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR)
project(
rocprofiler-tests-rocprofv3-hip-stream-display
LANGUAGES CXX
VERSION 0.0.0)
find_package(rocprofiler-sdk REQUIRED)
find_package(rocDecode)
rocprofiler_configure_pytest_files(CONFIG pytest.ini COPY validate.py conftest.py)
string(REPLACE "LD_PRELOAD=" "ROCPROF_PRELOAD=" PRELOAD_ENV
"${ROCPROFILER_MEMCHECK_PRELOAD_ENV}")
set(hip-stream-display-env "${PRELOAD_ENV}")
add_test(
NAME rocprofv3-test-hip-stream-display-execute
COMMAND
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> --kernel-rename -d
${CMAKE_CURRENT_BINARY_DIR}/%tag%-trace -o out --output-format json pftrace
--log-level env -- $<TARGET_FILE:transpose>)
set_tests_properties(
rocprofv3-test-hip-stream-display-execute
PROPERTIES TIMEOUT
45
LABELS
"integration-tests"
ENVIRONMENT
"${hip-stream-display-env}"
FAIL_REGULAR_EXPRESSION
"threw an exception"
DISABLED
$<NOT:$<TARGET_EXISTS:transpose>>)
add_test(
NAME rocprofv3-test-hip-stream-display-validate
COMMAND
${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py --json-input
${CMAKE_CURRENT_BINARY_DIR}/hip-stream-display/out_results.json --pftrace-input
${CMAKE_CURRENT_BINARY_DIR}/hip-stream-display/out_results.pftrace)
set_tests_properties(
rocprofv3-test-hip-stream-display-validate
PROPERTIES TIMEOUT
45
LABELS
"integration-tests"
DEPENDS
rocprofv3-test-hip-stream-display-execute
FAIL_REGULAR_EXPRESSION
"AssertionError"
DISABLED
$<NOT:$<TARGET_EXISTS:transpose>>)
+42
Просмотреть файл
@@ -0,0 +1,42 @@
#!/usr/bin/env python3
import csv
import json
import os
import pytest
from rocprofiler_sdk.pytest_utils.dotdict import dotdict
from rocprofiler_sdk.pytest_utils import collapse_dict_list
from rocprofiler_sdk.pytest_utils.perfetto_reader import PerfettoReader
def pytest_addoption(parser):
parser.addoption(
"--json-input",
action="store",
default="hip-stream-display/out_results.json",
help="Input JSON",
)
parser.addoption(
"--pftrace-input",
action="store",
default="hip-stream-display/out_results.pftrace",
help="Input pftrace file",
)
@pytest.fixture
def json_data(request):
filename = request.config.getoption("--json-input")
if not os.path.isfile(filename):
return pytest.skip("stream tracing unavailable")
with open(filename, "r") as inp:
return dotdict(collapse_dict_list(json.load(inp)))
@pytest.fixture
def pftrace_data(request):
filename = request.config.getoption("--pftrace-input")
if not os.path.isfile(filename):
return pytest.skip("stream tracing unavailable")
return PerfettoReader(filename).read()[0]
+5
Просмотреть файл
@@ -0,0 +1,5 @@
[pytest]
addopts = --durations=20 -rA -s -vv
testpaths = validate.py
pythonpath = @ROCPROFILER_SDK_TESTS_BINARY_DIR@/pytest-packages
+75
Просмотреть файл
@@ -0,0 +1,75 @@
#!/usr/bin/env python3
import sys
import pytest
import json
from collections import defaultdict
# helper function
def node_exists(name, data, min_len=1):
assert name in data
assert data[name] is not None
if isinstance(data[name], (list, tuple, dict, set)):
assert len(data[name]) >= min_len
def get_operation(record, kind_name, op_name=None):
for idx, itr in enumerate(record["strings"]["buffer_records"]):
if kind_name == itr["kind"]:
if op_name is None:
return idx, itr["operations"]
else:
for oidx, oname in enumerate(itr["operations"]):
if op_name == oname:
return oidx
return None
def test_stream_trace(json_data):
data = json_data["rocprofiler-sdk-tool"]
buffer_records = data["buffer_records"]
kernel_dispatch_data = buffer_records["kernel_dispatch"]
memory_copies_data = buffer_records["memory_copies"]
assert len(kernel_dispatch_data) > 0
assert len(memory_copies_data) > 0
# Expect stream ids to be set to 1 or 2 for transpose executable
expected_stream_ids = set((1, 2))
# check buffering data
for titr in (kernel_dispatch_data, memory_copies_data):
for node in rocdecode_data:
assert "size" in node
assert "kind" in node
assert "operation" in node
assert "correlation_id" in node
assert "end_timestamp" in node
assert "start_timestamp" in node
assert "thread_id" in node
assert "_stream_id" in node
assert node.size > 0
assert node.thread_id > 0
assert node.start_timestamp > 0
assert node.end_timestamp > 0
assert node.start_timestamp < node.end_timestamp
assert node._stream_id.handle in expected_stream_ids
def test_perfetto_data(pftrace_data, json_data):
import rocprofiler_sdk.tests.rocprofv3 as rocprofv3
assert pftrace_data != None
rocprofv3.test_perfetto_data(
pftrace_data,
json_data,
("kernel", "memory_copy"),
)
if __name__ == "__main__":
exit_code = pytest.main(["-x", __file__] + sys.argv[1:])
sys.exit(exit_code)
+1 -1
Просмотреть файл
@@ -137,7 +137,7 @@ def test_kernel_trace(json_data):
external_corr_id = dispatch["correlation_id"]["external"]
assert external_corr_id > 0
kernel_rename = get_kernel_rename(external_corr_id)
kernel_rename = get_kernel_rename(dispatch.kernel_rename_val)
assert kernel_rename is not None, f"{dispatch}"
assert kernel_rename != kernel_name, f"{dispatch}"
assert (
+2 -1
Просмотреть файл
@@ -75,6 +75,7 @@ def test_kernel_trace(json_data):
def get_kernel_rename(corr_id):
for itr in data.strings.correlation_id.external:
print(itr)
if itr.key == corr_id:
return itr.value
return None
@@ -113,7 +114,7 @@ def test_kernel_trace(json_data):
external_corr_id = dispatch["correlation_id"]["external"]
assert external_corr_id > 0
kernel_rename = get_kernel_rename(external_corr_id)
kernel_rename = get_kernel_rename(dispatch.kernel_rename_val)
assert kernel_rename is not None, f"{dispatch}"
assert kernel_rename != kernel_name, f"{dispatch}"
assert (