diff --git a/projects/rocprofiler-sdk/samples/api_buffered_tracing/client.cpp b/projects/rocprofiler-sdk/samples/api_buffered_tracing/client.cpp index c73aa3fcd4..b7303b5885 100644 --- a/projects/rocprofiler-sdk/samples/api_buffered_tracing/client.cpp +++ b/projects/rocprofiler-sdk/samples/api_buffered_tracing/client.cpp @@ -60,6 +60,7 @@ #include #include #include +#include #include #include #include @@ -222,19 +223,22 @@ tool_tracing_callback(rocprofiler_context_id_t context, auto info = std::stringstream{}; - info << "agent_id=" << record->agent_id.handle - << ", queue_id=" << record->queue_id.handle << ", kernel_id=" << record->kernel_id - << ", kernel=" << client_kernels.at(record->kernel_id).kernel_name + info << "agent_id=" << record->dispatch_info.agent_id.handle + << ", queue_id=" << record->dispatch_info.queue_id.handle + << ", kernel_id=" << record->dispatch_info.kernel_id + << ", kernel=" << client_kernels.at(record->dispatch_info.kernel_id).kernel_name << ", context=" << context.handle << ", buffer_id=" << buffer_id.handle << ", cid=" << record->correlation_id.internal << ", extern_cid=" << record->correlation_id.external.value << ", kind=" << record->kind << ", start=" << record->start_timestamp << ", stop=" << record->end_timestamp - << ", private_segment_size=" << record->private_segment_size - << ", group_segment_size=" << record->group_segment_size << ", workgroup_size=(" - << record->workgroup_size.x << "," << record->workgroup_size.y << "," - << record->workgroup_size.z << "), grid_size=(" << record->grid_size.x << "," - << record->grid_size.y << "," << record->grid_size.z << ")"; + << ", private_segment_size=" << record->dispatch_info.private_segment_size + << ", group_segment_size=" << record->dispatch_info.group_segment_size + << ", workgroup_size=(" << record->dispatch_info.workgroup_size.x << "," + << record->dispatch_info.workgroup_size.y << "," + << record->dispatch_info.workgroup_size.z << "), grid_size=(" + << record->dispatch_info.grid_size.x << "," << record->dispatch_info.grid_size.y + << "," << record->dispatch_info.grid_size.z << ")"; if(record->start_timestamp > record->end_timestamp) throw std::runtime_error("kernel dispatch: start > end"); @@ -267,7 +271,10 @@ tool_tracing_callback(rocprofiler_context_id_t context, } else { - throw std::runtime_error{"unexpected rocprofiler_record_header_t category + kind"}; + auto _msg = std::stringstream{}; + _msg << "unexpected rocprofiler_record_header_t category + kind: (" << header->category + << " + " << header->kind << ")"; + throw std::runtime_error{_msg.str()}; } } } diff --git a/projects/rocprofiler-sdk/samples/counter_collection/callback_client.cpp b/projects/rocprofiler-sdk/samples/counter_collection/callback_client.cpp index e86d90ac4d..93d72158b6 100644 --- a/projects/rocprofiler-sdk/samples/counter_collection/callback_client.cpp +++ b/projects/rocprofiler-sdk/samples/counter_collection/callback_client.cpp @@ -74,12 +74,13 @@ record_callback(rocprofiler_profile_counting_dispatch_data_t dispatch_data, void* callback_data_args) { std::stringstream ss; - ss << "Kernel_id " << dispatch_data.kernel_id << ": "; + ss << "Dispatch_Id=" << dispatch_data.dispatch_info.dispatch_id + << ", Kernel_id=" << dispatch_data.dispatch_info.kernel_id + << ", Corr_Id=" << dispatch_data.correlation_id.internal << ": "; for(size_t i = 0; i < record_count; ++i) - { ss << "(Id: " << record_data[i].id << " Value [D]: " << record_data[i].counter_value - << " Corr_Id: " << record_data[i].correlation_id.internal << "),"; - } + << "),"; + auto* output_stream = static_cast(callback_data_args); if(!output_stream) throw std::runtime_error{"nullptr to output stream"}; *output_stream << "[" << __FUNCTION__ << "] " << ss.str() << "\n"; @@ -110,7 +111,8 @@ dispatch_callback(rocprofiler_profile_counting_dispatch_data_t dispatch_data, static std::unordered_map profile_cache = {}; auto search_cache = [&]() { - if(auto pos = profile_cache.find(dispatch_data.agent_id.handle); pos != profile_cache.end()) + if(auto pos = profile_cache.find(dispatch_data.dispatch_info.agent_id.handle); + pos != profile_cache.end()) { *config = pos->second; return true; @@ -133,7 +135,7 @@ dispatch_callback(rocprofiler_profile_counting_dispatch_data_t dispatch_data, // Iterate through the agents and get the counters available on that agent ROCPROFILER_CALL(rocprofiler_iterate_agent_supported_counters( - dispatch_data.agent_id, + dispatch_data.dispatch_info.agent_id, [](rocprofiler_agent_id_t, rocprofiler_counter_id_t* counters, size_t num_counters, @@ -167,12 +169,13 @@ dispatch_callback(rocprofiler_profile_counting_dispatch_data_t dispatch_data, // Create a colleciton profile for the counters rocprofiler_profile_config_id_t profile; - ROCPROFILER_CALL( - rocprofiler_create_profile_config( - dispatch_data.agent_id, collect_counters.data(), collect_counters.size(), &profile), - "Could not construct profile cfg"); + ROCPROFILER_CALL(rocprofiler_create_profile_config(dispatch_data.dispatch_info.agent_id, + collect_counters.data(), + collect_counters.size(), + &profile), + "Could not construct profile cfg"); - profile_cache.emplace(dispatch_data.agent_id.handle, profile); + profile_cache.emplace(dispatch_data.dispatch_info.agent_id.handle, profile); // Return the profile to collect those counters for this dispatch *config = profile; } diff --git a/projects/rocprofiler-sdk/samples/counter_collection/client.cpp b/projects/rocprofiler-sdk/samples/counter_collection/client.cpp index 970c295a1f..c8b54cfd8f 100644 --- a/projects/rocprofiler-sdk/samples/counter_collection/client.cpp +++ b/projects/rocprofiler-sdk/samples/counter_collection/client.cpp @@ -95,12 +95,23 @@ buffered_callback(rocprofiler_context_id_t, for(size_t i = 0; i < num_headers; ++i) { auto* header = headers[i]; - if(header->category == ROCPROFILER_BUFFER_CATEGORY_COUNTERS && header->kind == 0) + if(header->category == ROCPROFILER_BUFFER_CATEGORY_COUNTERS && + header->kind == ROCPROFILER_COUNTER_RECORD_PROFILE_COUNTING_DISPATCH_HEADER) + { + // Print the returned counter data. + auto* record = + static_cast(header->payload); + ss << "[Dispatch_Id: " << record->dispatch_info.dispatch_id + << " Kernel_ID: " << record->dispatch_info.kernel_id + << " Corr_Id: " << record->correlation_id.internal << ")]\n"; + } + else if(header->category == ROCPROFILER_BUFFER_CATEGORY_COUNTERS && + header->kind == ROCPROFILER_COUNTER_RECORD_VALUE) { // Print the returned counter data. auto* record = static_cast(header->payload); - ss << "(Id: " << record->id << " Value [D]: " << record->counter_value - << " Corr_Id: " << record->correlation_id.internal << "),"; + ss << " (Dispatch_Id: " << record->dispatch_id << " Id: " << record->id + << " Value [D]: " << record->counter_value << "),"; } } @@ -133,7 +144,8 @@ dispatch_callback(rocprofiler_profile_counting_dispatch_data_t dispatch_data, static std::unordered_map profile_cache = {}; auto search_cache = [&]() { - if(auto pos = profile_cache.find(dispatch_data.agent_id.handle); pos != profile_cache.end()) + if(auto pos = profile_cache.find(dispatch_data.dispatch_info.agent_id.handle); + pos != profile_cache.end()) { *config = pos->second; return true; @@ -156,7 +168,7 @@ dispatch_callback(rocprofiler_profile_counting_dispatch_data_t dispatch_data, // Iterate through the agents and get the counters available on that agent ROCPROFILER_CALL(rocprofiler_iterate_agent_supported_counters( - dispatch_data.agent_id, + dispatch_data.dispatch_info.agent_id, [](rocprofiler_agent_id_t, rocprofiler_counter_id_t* counters, size_t num_counters, @@ -190,12 +202,13 @@ dispatch_callback(rocprofiler_profile_counting_dispatch_data_t dispatch_data, // Create a colleciton profile for the counters rocprofiler_profile_config_id_t profile; - ROCPROFILER_CALL( - rocprofiler_create_profile_config( - dispatch_data.agent_id, collect_counters.data(), collect_counters.size(), &profile), - "Could not construct profile cfg"); + ROCPROFILER_CALL(rocprofiler_create_profile_config(dispatch_data.dispatch_info.agent_id, + collect_counters.data(), + collect_counters.size(), + &profile), + "Could not construct profile cfg"); - profile_cache.emplace(dispatch_data.agent_id.handle, profile); + profile_cache.emplace(dispatch_data.dispatch_info.agent_id.handle, profile); // Return the profile to collect those counters for this dispatch *config = profile; } diff --git a/projects/rocprofiler-sdk/samples/counter_collection/print_functional_counters.cpp b/projects/rocprofiler-sdk/samples/counter_collection/print_functional_counters.cpp index 872e53dadb..971955b0a9 100644 --- a/projects/rocprofiler-sdk/samples/counter_collection/print_functional_counters.cpp +++ b/projects/rocprofiler-sdk/samples/counter_collection/print_functional_counters.cpp @@ -170,7 +170,8 @@ buffered_callback(rocprofiler_context_id_t, for(size_t i = 0; i < num_headers; ++i) { auto* header = headers[i]; - if(header->category == ROCPROFILER_BUFFER_CATEGORY_COUNTERS && header->kind == 0) + if(header->category == ROCPROFILER_BUFFER_CATEGORY_COUNTERS && + header->kind == ROCPROFILER_COUNTER_RECORD_VALUE) { // Record the counters we have in the buffer and the number of instances of // the counter we have seen. @@ -242,7 +243,7 @@ dispatch_callback(rocprofiler_profile_counting_dispatch_data_t dispatch_data, { std::vector counters_needed; ROCPROFILER_CALL(rocprofiler_iterate_agent_supported_counters( - dispatch_data.agent_id, + dispatch_data.dispatch_info.agent_id, [](rocprofiler_agent_id_t, rocprofiler_counter_id_t* counters, size_t num_counters, @@ -269,7 +270,7 @@ dispatch_callback(rocprofiler_profile_counting_dispatch_data_t dispatch_data, cap.expected_counter_names.emplace(found_counter.handle, std::string(version.name)); size_t expected = 0; ROCPROFILER_CALL(rocprofiler_query_counter_instance_count( - dispatch_data.agent_id, found_counter, &expected), + dispatch_data.dispatch_info.agent_id, found_counter, &expected), "COULD NOT QUERY INSTANCES"); cap.remaining.push_back(found_counter); cap.expected.emplace(found_counter.handle, expected); @@ -297,8 +298,9 @@ dispatch_callback(rocprofiler_profile_counting_dispatch_data_t dispatch_data, } if(cap.expected.empty()) { - std::clog << "No counters found for agent " << dispatch_data.agent_id.handle << " (" - << agents.at(dispatch_data.agent_id.handle)->name << ")"; + std::clog << "No counters found for agent " + << dispatch_data.dispatch_info.agent_id.handle << " (" + << agents.at(dispatch_data.dispatch_info.agent_id.handle)->name << ")"; } } if(cap.remaining.empty()) return; @@ -306,9 +308,10 @@ dispatch_callback(rocprofiler_profile_counting_dispatch_data_t dispatch_data, rocprofiler_profile_config_id_t profile; // Select the next counter to collect. - ROCPROFILER_CALL(rocprofiler_create_profile_config( - dispatch_data.agent_id, &(cap.remaining.back()), 1, &profile), - "Could not construct profile cfg"); + ROCPROFILER_CALL( + rocprofiler_create_profile_config( + dispatch_data.dispatch_info.agent_id, &(cap.remaining.back()), 1, &profile), + "Could not construct profile cfg"); cap.remaining.pop_back(); *config = profile; diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/buffer_tracing.h b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/buffer_tracing.h index 90261bd66b..eaff458c9e 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/buffer_tracing.h +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/buffer_tracing.h @@ -99,8 +99,9 @@ typedef struct /// ::ROCPROFILER_CALLBACK_TRACING_HSA_IMAGE_EXT_API, or /// ::ROCPROFILER_CALLBACK_TRACING_HSA_FINALIZE_EXT_API /// @var operation - /// @brief ::rocprofiler_hsa_core_api_id_t, ::rocprofiler_hsa_amd_ext_api_id_t, - /// ::rocprofiler_hsa_image_ext_api_id_t, or ::rocprofiler_hsa_finalize_ext_api_id_t + /// @brief Specification of the API function, e.g., ::rocprofiler_hsa_core_api_id_t, + /// ::rocprofiler_hsa_amd_ext_api_id_t, ::rocprofiler_hsa_image_ext_api_id_t, or + /// ::rocprofiler_hsa_finalize_ext_api_id_t } rocprofiler_buffer_tracing_hsa_api_record_t; /** @@ -120,7 +121,8 @@ typedef struct /// @brief ::ROCPROFILER_CALLBACK_TRACING_HIP_RUNTIME_API or /// ::ROCPROFILER_CALLBACK_TRACING_HIP_COMPILER_API /// @var operation - /// @brief ::rocprofiler_hip_runtime_api_id_t or ::rocprofiler_hip_compiler_api_id_t + /// @brief Specification of the API function, e.g., ::rocprofiler_hip_runtime_api_id_t or + /// ::rocprofiler_hip_compiler_api_id_t } rocprofiler_buffer_tracing_hip_api_record_t; /** @@ -140,7 +142,8 @@ typedef struct /// @brief ::ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API, /// ::ROCPROFILER_CALLBACK_TRACING_MARKER_CONTROL_API, or /// ::ROCPROFILER_CALLBACK_TRACING_MARKER_NAME_API - /// @brief ::rocprofiler_marker_core_api_id_t, ::rocprofiler_marker_control_api_id_t, or + /// @brief Specification of the API function, e.g., ::rocprofiler_marker_core_api_id_t, + /// ::rocprofiler_marker_control_api_id_t, or /// ::rocprofiler_marker_name_api_id_t } rocprofiler_buffer_tracing_marker_api_record_t; @@ -161,7 +164,8 @@ typedef struct /// @var kind /// @brief ::ROCPROFILER_BUFFER_TRACING_MEMORY_COPY /// @var operation - /// @brief memory copy direction (::rocprofiler_memory_copy_operation_t) + /// @brief Specification of the memory copy direction (@see + /// ::rocprofiler_memory_copy_operation_t) } rocprofiler_buffer_tracing_memory_copy_record_t; /** @@ -171,28 +175,17 @@ typedef struct rocprofiler_buffer_tracing_kernel_dispatch_record_t { uint64_t size; ///< size of this struct rocprofiler_buffer_tracing_kind_t kind; ///< ::ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH - rocprofiler_kernel_dispatch_operation_t operation; ///< + rocprofiler_kernel_dispatch_operation_t operation; rocprofiler_correlation_id_t correlation_id; ///< correlation ids for record rocprofiler_thread_id_t thread_id; ///< id for thread that launched kernel rocprofiler_timestamp_t start_timestamp; ///< start time in nanoseconds rocprofiler_timestamp_t end_timestamp; ///< end time in nanoseconds - rocprofiler_agent_id_t agent_id; ///< agent kernel was dispatched on - rocprofiler_queue_id_t queue_id; ///< queue kernel was dispatched on - rocprofiler_kernel_id_t kernel_id; ///< identifier for kernel - rocprofiler_dispatch_id_t dispatch_id; ///< unique id for each dispatch - uint32_t private_segment_size; - uint32_t group_segment_size; - rocprofiler_dim3_t workgroup_size; - rocprofiler_dim3_t grid_size; + rocprofiler_kernel_dispatch_info_t dispatch_info; ///< Dispatch info - /// @var private_segment_size - /// @brief runtime private memory segment size - /// @var group_segment_size - /// @brief runtime group memory segment size - /// @var workgroup_size - /// @brief runtime workgroup size (grid * threads) - /// @var grid_size - /// @brief runtime grid size + /// @var operation + /// @brief Kernel dispatch buffer records only report the ::ROCPROFILER_KERNEL_DISPATCH_COMPLETE + /// operation because there are no "real" wrapper around the enqueuing of an individual kernel + /// dispatch } rocprofiler_buffer_tracing_kernel_dispatch_record_t; typedef struct @@ -232,7 +225,7 @@ typedef struct /** * @brief ROCProfiler Buffer Page Migration Tracer Record */ -typedef struct +typedef struct rocprofiler_buffer_tracing_page_migration_record_t { uint64_t size; ///< size of this struct rocprofiler_buffer_tracing_kind_t kind; ///< ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION @@ -259,17 +252,16 @@ typedef struct */ typedef struct { - uint64_t size; ///< size of this struct - rocprofiler_buffer_tracing_kind_t kind; ///< ::ROCPROFILER_BUFFER_TRACING_SCRATCH_MEMORY - rocprofiler_scratch_memory_operation_t - operation; ///< @see rocprofiler_scratch_memory_operation_t - rocprofiler_agent_id_t agent_id; ///< agent kernel was dispatched on - rocprofiler_queue_id_t queue_id; ///< queue kernel was dispatched on - rocprofiler_thread_id_t thread_id; ///< id for thread generating this record - rocprofiler_timestamp_t start_timestamp; ///< start time in nanoseconds - rocprofiler_timestamp_t end_timestamp; ///< end time in nanoseconds - rocprofiler_correlation_id_t correlation_id; ///< correlation ids for record - rocprofiler_scratch_alloc_flag_t flags; + uint64_t size; ///< size of this struct + rocprofiler_buffer_tracing_kind_t kind; ///< ::ROCPROFILER_BUFFER_TRACING_SCRATCH_MEMORY + rocprofiler_scratch_memory_operation_t operation; ///< specification of the kind + rocprofiler_agent_id_t agent_id; ///< agent kernel was dispatched on + rocprofiler_queue_id_t queue_id; ///< queue kernel was dispatched on + rocprofiler_thread_id_t thread_id; ///< id for thread generating this record + rocprofiler_timestamp_t start_timestamp; ///< start time in nanoseconds + rocprofiler_timestamp_t end_timestamp; ///< end time in nanoseconds + rocprofiler_correlation_id_t correlation_id; ///< correlation ids for record + rocprofiler_scratch_alloc_flag_t flags; } rocprofiler_buffer_tracing_scratch_memory_record_t; /** @@ -286,7 +278,9 @@ typedef struct /// @brief ::ROCPROFILER_BUFFER_TRACING_CORRELATION_ID_RETIREMENT /// @var timestamp /// @brief Timestamp (in nanosec) of when rocprofiler detected the correlation ID could be - /// retired + /// retired. Due to clock skew between the CPU and GPU, this may at times, *appear* to be before + /// the kernel or memory copy completed but the reality is that if this ever occurred, the API + /// would report a FATAL error /// @var internal_correlation_id /// @brief Only internal correlation ID is provided } rocprofiler_buffer_tracing_correlation_id_retirement_record_t; diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/callback_tracing.h b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/callback_tracing.h index eefb353d74..306524f908 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/callback_tracing.h +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/callback_tracing.h @@ -162,26 +162,10 @@ typedef struct */ typedef struct rocprofiler_callback_tracing_kernel_dispatch_data_t { - uint64_t size; ///< size of this struct - rocprofiler_timestamp_t start_timestamp; ///< start time in nanoseconds - rocprofiler_timestamp_t end_timestamp; ///< end time in nanoseconds - rocprofiler_agent_id_t agent_id; ///< agent kernel was dispatched on - rocprofiler_queue_id_t queue_id; ///< queue kernel was dispatched on - rocprofiler_kernel_id_t kernel_id; ///< identifier for kernel - rocprofiler_dispatch_id_t dispatch_id; ///< unique id for each dispatch - uint32_t private_segment_size; - uint32_t group_segment_size; - rocprofiler_dim3_t workgroup_size; - rocprofiler_dim3_t grid_size; - - /// @var private_segment_size - /// @brief runtime private memory segment size - /// @var group_segment_size - /// @brief runtime group memory segment size - /// @var workgroup_size - /// @brief runtime workgroup size (grid * threads) - /// @var grid_size - /// @brief runtime grid size + uint64_t size; ///< size of this struct + rocprofiler_timestamp_t start_timestamp; ///< start time in nanoseconds + rocprofiler_timestamp_t end_timestamp; ///< end time in nanoseconds + rocprofiler_kernel_dispatch_info_t dispatch_info; ///< Dispatch info } rocprofiler_callback_tracing_kernel_dispatch_data_t; /** diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/defines.h b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/defines.h index be77594661..01117ab109 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/defines.h +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/defines.h @@ -39,10 +39,10 @@ */ /** - * @brief The function was introduced in version 10.0 of the interface and has the - * symbol version string of ``"ROCPROFILER_10.0"``. + * @brief The function was introduced in version 0.0 of the interface and has the + * symbol version string of ``"ROCPROFILER_SDK_0.0"``. */ -#define ROCPROFILER_VERSION_10_0 +#define ROCPROFILER_SDK_VERSION_0_0 /** @} */ @@ -123,7 +123,9 @@ #ifdef __cplusplus # define ROCPROFILER_EXTERN_C_INIT extern "C" { # define ROCPROFILER_EXTERN_C_FINI } +# define ROCPROFILER_CXX_CODE(...) __VA_ARGS__ #else # define ROCPROFILER_EXTERN_C_INIT # define ROCPROFILER_EXTERN_C_FINI +# define ROCPROFILER_CXX_CODE(...) #endif diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/dispatch_profile.h b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/dispatch_profile.h index b6aa204900..0cdd25d840 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/dispatch_profile.h +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/dispatch_profile.h @@ -38,23 +38,29 @@ ROCPROFILER_EXTERN_C_INIT */ /** - * @brief Kernel dispatch data for profile counting callbacks + * @brief Kernel dispatch data for profile counting callbacks. * */ typedef struct rocprofiler_profile_counting_dispatch_data_t { - uint64_t size; ///< Size of this struct - rocprofiler_agent_id_t agent_id; ///< Agent ID where kernel is launched - rocprofiler_queue_id_t queue_id; ///< Queue ID where kernel packet is enqueued - rocprofiler_kernel_id_t kernel_id; ///< Kernel identifier - rocprofiler_dispatch_id_t dispatch_id; ///< unique id for each dispatch - rocprofiler_correlation_id_t correlation_id; ///< Correlation ID for this dispatch - uint32_t private_segment_size; ///< runtime private memory segment size - uint32_t group_segment_size; ///< runtime group memory segment size - rocprofiler_dim3_t workgroup_size; ///< runtime workgroup size (grid * threads) - rocprofiler_dim3_t grid_size; ///< runtime grid size + uint64_t size; ///< Size of this struct + rocprofiler_correlation_id_t correlation_id; ///< Correlation ID for this dispatch + rocprofiler_kernel_dispatch_info_t dispatch_info; ///< Dispatch info } rocprofiler_profile_counting_dispatch_data_t; +/** + * @brief ROCProfiler Profile Counting Counter Record Header Information + * + * This is buffer equivalent of ::rocprofiler_profile_counting_dispatch_data_t + */ +typedef struct rocprofiler_profile_counting_dispatch_record_t +{ + uint64_t size; ///< Size of this struct + uint64_t num_records; ///< number of ::rocprofiler_record_counter_t records + rocprofiler_correlation_id_t correlation_id; ///< Correlation ID for this dispatch + rocprofiler_kernel_dispatch_info_t dispatch_info; ///< Contains the `dispatch_id` +} rocprofiler_profile_counting_dispatch_record_t; + /** * @brief Kernel Dispatch Callback. This is a callback that is invoked before the kernel * is enqueued into the HSA queue. What counters to collect for a kernel are set diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/fwd.h b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/fwd.h index 00e6976050..aad7ebf70d 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/fwd.h +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/fwd.h @@ -364,6 +364,22 @@ typedef enum ROCPROFILER_COUNTER_INFO_VERSION_LAST, } rocprofiler_counter_info_version_id_t; +/** + * @brief Enumeration for distinguishing different buffer record kinds within the + * ::ROCPROFILER_BUFFER_CATEGORY_COUNTERS category + */ +typedef enum +{ + ROCPROFILER_COUNTER_RECORD_NONE = 0, + ROCPROFILER_COUNTER_RECORD_PROFILE_COUNTING_DISPATCH_HEADER, ///< ::rocprofiler_profile_counting_dispatch_record_t + ROCPROFILER_COUNTER_RECORD_VALUE, + ROCPROFILER_COUNTER_RECORD_LAST, + + /// @var ROCPROFILER_COUNTER_RECORD_KIND_DISPATCH_PROFILE_HEADER + /// @brief Indicates the payload type is of type + /// ::rocprofiler_profile_counting_dispatch_record_t +} rocprofiler_counter_record_kind_t; + //--------------------------------------------------------------------------------------// // // ALIASES @@ -591,6 +607,24 @@ rocprofiler_record_header_compute_hash(uint32_t category, uint32_t kind) return value; } +/** + * @brief ROCProfiler kernel dispatch information + * + */ +typedef struct rocprofiler_kernel_dispatch_info_t +{ + uint64_t size; ///< Size of this struct + rocprofiler_agent_id_t agent_id; ///< Agent ID where kernel is launched + rocprofiler_queue_id_t queue_id; ///< Queue ID where kernel packet is enqueued + rocprofiler_kernel_id_t kernel_id; ///< Kernel identifier + rocprofiler_dispatch_id_t dispatch_id; ///< unique id for each dispatch + uint32_t private_segment_size; ///< runtime private memory segment size + uint32_t group_segment_size; ///< runtime group memory segment size + rocprofiler_dim3_t workgroup_size; ///< runtime workgroup size (grid * threads) + rocprofiler_dim3_t grid_size; ///< runtime grid size + uint8_t reserved_padding[56]; // reserved for extensions w/o ABI break +} rocprofiler_kernel_dispatch_info_t; + /** * @brief Details for the dimension, including its size, for a counter record. */ @@ -611,10 +645,17 @@ typedef struct { rocprofiler_counter_instance_id_t id; ///< counter identifier double counter_value; ///< counter value - rocprofiler_correlation_id_t correlation_id; + rocprofiler_dispatch_id_t dispatch_id; - /// @var correlation_id - /// @brief Used to correlate the kernel data to an API call + /// @var dispatch_id + /// @brief A value greater than zero indicates that this counter record is associated with a + /// specific dispatch. + /// + /// This value can be mapped to a dispatch via the `dispatch_info` field (@see + /// ::rocprofiler_kernel_dispatch_info_t) of a ::rocprofiler_profile_counting_dispatch_data_t + /// instance (provided during callback for profile config) or a + /// ::rocprofiler_profile_counting_dispatch_record_t records (which will be insert into the + /// buffer prior to the associated ::rocprofiler_record_counter_t records). } rocprofiler_record_counter_t; /** @@ -622,11 +663,13 @@ typedef struct */ typedef struct { - int is_derived; ///< If this counter is a derived counter - const char* name; ///< Name of the counter - const char* description; ///< Description of the counter - const char* block; ///< Block of the counter (non-derived only) - const char* expression; ///< Counter expression (derived counters only) + rocprofiler_counter_id_t id; ///< Id of this counter + const char* name; ///< Name of the counter + const char* description; ///< Description of the counter + const char* block; ///< Block of the counter (non-derived only) + const char* expression; ///< Counter expression (derived counters only) + uint8_t is_constant : 1; ///< If this counter is HW constant + uint8_t is_derived : 1; ///< If this counter is a derived counter } rocprofiler_counter_info_v0_t; /** @@ -646,3 +689,7 @@ typedef struct /** @} */ ROCPROFILER_EXTERN_C_FINI + +ROCPROFILER_CXX_CODE( + static_assert(sizeof(rocprofiler_kernel_dispatch_info_t) == 128, + "Increasing the size of the kernel dispatch info is not permitted");) diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/generateCSV.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/generateCSV.cpp index dd4266bfca..d26d1e4ffc 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/generateCSV.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/generateCSV.cpp @@ -154,25 +154,26 @@ generate_csv(tool_table* tool_functions, std::vectortool_get_kernel_name_fn(record->kernel_id); + auto kernel_name = + tool_functions->tool_get_kernel_name_fn(record->dispatch_info.kernel_id); rocprofiler::tool::csv::kernel_trace_csv_encoder::write_row( kernel_trace_ss, tool_functions->tool_get_domain_name_fn(record->kind), - tool_functions->tool_get_agent_node_id_fn(record->agent_id), - record->queue_id.handle, - record->kernel_id, + tool_functions->tool_get_agent_node_id_fn(record->dispatch_info.agent_id), + record->dispatch_info.queue_id.handle, + record->dispatch_info.kernel_id, kernel_name, record->correlation_id.internal, record->start_timestamp, record->end_timestamp, - record->private_segment_size, - record->group_segment_size, - record->workgroup_size.x, - record->workgroup_size.y, - record->workgroup_size.z, - record->grid_size.x, - record->grid_size.y, - record->grid_size.z); + record->dispatch_info.private_segment_size, + record->dispatch_info.group_segment_size, + record->dispatch_info.workgroup_size.x, + record->dispatch_info.workgroup_size.y, + record->dispatch_info.workgroup_size.z, + record->dispatch_info.grid_size.x, + record->dispatch_info.grid_size.y, + record->dispatch_info.grid_size.z); if(tool::get_config().stats) kernel_stats[kernel_name] += (record->end_timestamp - record->start_timestamp); @@ -355,7 +356,7 @@ generate_csv(tool_table* tool_functions, std::vectordispatch_data.kernel_id; + auto kernel_id = record->dispatch_data.dispatch_info.kernel_id; auto counter_name_value = std::map{}; for(const auto& count : record->profiler_record) { @@ -379,13 +380,14 @@ generate_csv(tool_table* tool_functions, std::vectordispatch_index, - tool_functions->tool_get_agent_node_id_fn(record->dispatch_data.agent_id), - record->dispatch_data.queue_id.handle, + tool_functions->tool_get_agent_node_id_fn( + record->dispatch_data.dispatch_info.agent_id), + record->dispatch_data.dispatch_info.queue_id.handle, record->pid, record->thread_id, - magnitude(record->dispatch_data.grid_size), + magnitude(record->dispatch_data.dispatch_info.grid_size), tool_functions->tool_get_kernel_name_fn(kernel_id), - magnitude(record->dispatch_data.workgroup_size), + magnitude(record->dispatch_data.dispatch_info.workgroup_size), record->lds_block_size_v, record->private_segment_size, record->arch_vgpr_count, diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/tool.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/tool.cpp index 0c4d2eb900..58773d5c00 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/tool.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/tool.cpp @@ -1246,8 +1246,8 @@ dispatch_callback(rocprofiler_profile_counting_dispatch_data_t dispatch_data, rocprofiler_user_data_t* user_data, void* /*callback_data_args*/) { - auto kernel_id = dispatch_data.kernel_id; - auto agent_id = dispatch_data.agent_id; + auto kernel_id = dispatch_data.dispatch_info.kernel_id; + auto agent_id = dispatch_data.dispatch_info.agent_id; if(!is_targeted_kernel(kernel_id)) { @@ -1283,7 +1283,7 @@ counter_record_callback(rocprofiler_profile_counting_dispatch_data_t dispatch_da rocprofiler_user_data_t user_data, void* /*callback_data_args*/) { - auto kernel_id = dispatch_data.kernel_id; + auto kernel_id = dispatch_data.dispatch_info.kernel_id; const auto* cnt_dispatch_data_v = static_cast(user_data.ptr); rocprofiler_tool_counter_collection_record_t counter_record; diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters.cpp index 10f96df097..4d28c490b0 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters.cpp @@ -51,7 +51,7 @@ extern "C" { * @retval ROCPROFILER_STATUS_ERROR_COUNTER_NOT_FOUND if counter not found * @retval ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_ABI Version is not supported */ -rocprofiler_status_t ROCPROFILER_API +rocprofiler_status_t rocprofiler_query_counter_info(rocprofiler_counter_id_t counter_id, rocprofiler_counter_info_version_id_t version, void* info) @@ -64,6 +64,8 @@ rocprofiler_query_counter_info(rocprofiler_counter_id_t counter_id, if(const auto* metric_ptr = rocprofiler::common::get_val(id_map, counter_id.handle)) { + out_struct.id = counter_id; + out_struct.is_constant = (metric_ptr->special().empty()) ? 0 : 1; out_struct.is_derived = (metric_ptr->expression().empty()) ? 0 : 1; out_struct.name = metric_ptr->name().c_str(); out_struct.description = metric_ptr->description().c_str(); diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/core.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/core.cpp index 262c8838ea..439db1bd50 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/core.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/core.cpp @@ -326,19 +326,23 @@ queue_cb(const context::context* ctx, auto dispatch_data = common::init_public_api_struct(rocprofiler_profile_counting_dispatch_data_t{}); - dispatch_data.kernel_id = kernel_id; - dispatch_data.dispatch_id = dispatch_id; - dispatch_data.agent_id = CHECK_NOTNULL(queue.get_agent().get_rocp_agent())->id; - dispatch_data.queue_id = queue.get_id(); - dispatch_data.correlation_id = _corr_id_v; - dispatch_data.private_segment_size = pkt.kernel_dispatch.private_segment_size; - dispatch_data.group_segment_size = pkt.kernel_dispatch.group_segment_size; - dispatch_data.workgroup_size = {pkt.kernel_dispatch.workgroup_size_x, - pkt.kernel_dispatch.workgroup_size_y, - pkt.kernel_dispatch.workgroup_size_z}; - dispatch_data.grid_size = {pkt.kernel_dispatch.grid_size_x, - pkt.kernel_dispatch.grid_size_y, - pkt.kernel_dispatch.grid_size_z}; + dispatch_data.correlation_id = _corr_id_v; + { + auto dispatch_info = common::init_public_api_struct(rocprofiler_kernel_dispatch_info_t{}); + dispatch_info.kernel_id = kernel_id; + dispatch_info.dispatch_id = dispatch_id; + dispatch_info.agent_id = CHECK_NOTNULL(queue.get_agent().get_rocp_agent())->id; + dispatch_info.queue_id = queue.get_id(); + dispatch_info.private_segment_size = pkt.kernel_dispatch.private_segment_size; + dispatch_info.group_segment_size = pkt.kernel_dispatch.group_segment_size; + dispatch_info.workgroup_size = {pkt.kernel_dispatch.workgroup_size_x, + pkt.kernel_dispatch.workgroup_size_y, + pkt.kernel_dispatch.workgroup_size_z}; + dispatch_info.grid_size = {pkt.kernel_dispatch.grid_size_x, + pkt.kernel_dispatch.grid_size_y, + pkt.kernel_dispatch.grid_size_z}; + dispatch_data.dispatch_info = dispatch_info; + } info->user_cb(dispatch_data, &req_profile, user_data, info->callback_args); @@ -441,6 +445,7 @@ completed_cb(const context::context* ctx, } } + auto _dispatch_id = session.callback_record.dispatch_info.dispatch_id; for(auto& ast : prof_config->asts) { std::vector>> cache; @@ -448,35 +453,47 @@ completed_cb(const context::context* ctx, CHECK(ret); ast.set_out_id(*ret); + out.reserve(out.size() + ret->size()); for(auto& val : *ret) { - val.correlation_id = _corr_id_v; - if(buf) - buf->emplace(ROCPROFILER_BUFFER_CATEGORY_COUNTERS, 0, val); - else - out.push_back(val); + val.dispatch_id = _dispatch_id; + out.emplace_back(val); } } if(!out.empty()) { - CHECK(info->record_callback); + if(buf) + { + auto _header = + common::init_public_api_struct(rocprofiler_profile_counting_dispatch_record_t{}); + _header.num_records = out.size(); + _header.correlation_id = _corr_id_v; + _header.dispatch_info = session.callback_record.dispatch_info; + buf->emplace(ROCPROFILER_BUFFER_CATEGORY_COUNTERS, + ROCPROFILER_COUNTER_RECORD_PROFILE_COUNTING_DISPATCH_HEADER, + _header); - auto dispatch_data = - common::init_public_api_struct(rocprofiler_profile_counting_dispatch_data_t{}); + for(auto itr : out) + buf->emplace( + ROCPROFILER_BUFFER_CATEGORY_COUNTERS, ROCPROFILER_COUNTER_RECORD_VALUE, itr); + } + else + { + CHECK(info->record_callback); - dispatch_data.kernel_id = session.callback_record.kernel_id; - dispatch_data.dispatch_id = session.callback_record.dispatch_id; - dispatch_data.agent_id = session.callback_record.agent_id; - dispatch_data.queue_id = session.callback_record.queue_id; - dispatch_data.correlation_id = _corr_id_v; - dispatch_data.private_segment_size = session.callback_record.private_segment_size; - dispatch_data.group_segment_size = session.callback_record.group_segment_size; - dispatch_data.workgroup_size = session.callback_record.workgroup_size; - dispatch_data.grid_size = session.callback_record.grid_size; + auto dispatch_data = + common::init_public_api_struct(rocprofiler_profile_counting_dispatch_data_t{}); - info->record_callback( - dispatch_data, out.data(), out.size(), session.user_data, info->record_callback_args); + dispatch_data.dispatch_info = session.callback_record.dispatch_info; + dispatch_data.correlation_id = _corr_id_v; + + info->record_callback(dispatch_data, + out.data(), + out.size(), + session.user_data, + info->record_callback_args); + } } } diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/tests/core.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/tests/core.cpp index ef6bd1721d..17d37ec3ca 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/tests/core.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/tests/core.cpp @@ -180,7 +180,8 @@ buffered_callback(rocprofiler_context_id_t, for(size_t i = 0; i < num_headers; ++i) { auto* header = headers[i]; - if(header->category == ROCPROFILER_BUFFER_CATEGORY_COUNTERS && header->kind == 0) + if(header->category == ROCPROFILER_BUFFER_CATEGORY_COUNTERS && + header->kind == ROCPROFILER_COUNTER_RECORD_VALUE) { // Print the returned counter data. auto* record = static_cast(header->payload); @@ -380,11 +381,11 @@ user_dispatch_cb(rocprofiler_profile_counting_dispatch_data_t dispatch_data, { expected_dispatch& expected = *static_cast(callback_data_args); - auto agent_id = dispatch_data.agent_id; - auto queue_id = dispatch_data.queue_id; + auto agent_id = dispatch_data.dispatch_info.agent_id; + auto queue_id = dispatch_data.dispatch_info.queue_id; auto correlation_id = dispatch_data.correlation_id; - auto kernel_id = dispatch_data.kernel_id; - auto dispatch_id = dispatch_data.dispatch_id; + auto kernel_id = dispatch_data.dispatch_info.kernel_id; + auto dispatch_id = dispatch_data.dispatch_info.dispatch_id; EXPECT_EQ(sizeof(rocprofiler_profile_counting_dispatch_data_t), dispatch_data.size); EXPECT_EQ(expected.kernel_id, kernel_id); @@ -394,8 +395,8 @@ user_dispatch_cb(rocprofiler_profile_counting_dispatch_data_t dispatch_data, EXPECT_EQ(expected.correlation_id.internal, correlation_id.internal); EXPECT_EQ(expected.correlation_id.external.ptr, correlation_id.external.ptr); EXPECT_EQ(expected.correlation_id.external.value, correlation_id.external.value); - EXPECT_EQ(expected.workgroup_size, dispatch_data.workgroup_size); - EXPECT_EQ(expected.grid_size, dispatch_data.grid_size); + EXPECT_EQ(expected.workgroup_size, dispatch_data.dispatch_info.workgroup_size); + EXPECT_EQ(expected.grid_size, dispatch_data.dispatch_info.grid_size); ASSERT_NE(config, nullptr); config->handle = expected.id.handle; diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/xml/derived_counters.xml b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/xml/derived_counters.xml index da14d3eab6..ec3af7a570 100755 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/xml/derived_counters.xml +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/xml/derived_counters.xml @@ -210,7 +210,7 @@ - + @@ -410,7 +410,7 @@ - + diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/queue.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/queue.cpp index c8fdc7f4a9..59da156854 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/queue.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/queue.cpp @@ -275,23 +275,32 @@ WriteInterceptor(const void* packets, corr_id->add_kern_count(); } - auto dispatch_id = ++sequence_counter; - auto callback_record = - callback_record_t{sizeof(callback_record_t), - rocprofiler_timestamp_t{0}, - rocprofiler_timestamp_t{0}, - queue.get_agent().get_rocp_agent()->id, - queue.get_id(), - kernel_id, - dispatch_id, - kernel_pkt.kernel_dispatch.private_segment_size, - kernel_pkt.kernel_dispatch.group_segment_size, - rocprofiler_dim3_t{kernel_pkt.kernel_dispatch.workgroup_size_x, - kernel_pkt.kernel_dispatch.workgroup_size_y, - kernel_pkt.kernel_dispatch.workgroup_size_z}, - rocprofiler_dim3_t{kernel_pkt.kernel_dispatch.grid_size_x, - kernel_pkt.kernel_dispatch.grid_size_y, - kernel_pkt.kernel_dispatch.grid_size_z}}; + // computes the "size" based on the offset of reserved_padding field + constexpr auto kernel_dispatch_info_rt_size = + common::compute_runtime_sizeof(); + + static_assert(kernel_dispatch_info_rt_size < sizeof(rocprofiler_kernel_dispatch_info_t), + "failed to compute size field based on offset of reserved_padding field"); + + auto dispatch_id = ++sequence_counter; + auto callback_record = callback_record_t{ + sizeof(callback_record_t), + rocprofiler_timestamp_t{0}, + rocprofiler_timestamp_t{0}, + rocprofiler_kernel_dispatch_info_t{ + .size = kernel_dispatch_info_rt_size, + .agent_id = queue.get_agent().get_rocp_agent()->id, + .queue_id = queue.get_id(), + .kernel_id = kernel_id, + .dispatch_id = dispatch_id, + .private_segment_size = kernel_pkt.kernel_dispatch.private_segment_size, + .group_segment_size = kernel_pkt.kernel_dispatch.group_segment_size, + .workgroup_size = rocprofiler_dim3_t{kernel_pkt.kernel_dispatch.workgroup_size_x, + kernel_pkt.kernel_dispatch.workgroup_size_y, + kernel_pkt.kernel_dispatch.workgroup_size_z}, + .grid_size = rocprofiler_dim3_t{kernel_pkt.kernel_dispatch.grid_size_x, + kernel_pkt.kernel_dispatch.grid_size_y, + kernel_pkt.kernel_dispatch.grid_size_z}}}; { auto tracer_data = callback_record; diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/kernel_dispatch/tracing.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/kernel_dispatch/tracing.cpp index e443177210..8824014e21 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/kernel_dispatch/tracing.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/kernel_dispatch/tracing.cpp @@ -66,9 +66,9 @@ dispatch_complete(queue_info_session_t& session) // only do the following work if there are contexts that require this info auto& callback_record = session.callback_record; const auto& _extern_corr_ids = session.tracing_data.external_correlation_ids; - const auto* _rocp_agent = agent::get_agent(callback_record.agent_id); + const auto* _rocp_agent = agent::get_agent(callback_record.dispatch_info.agent_id); auto _hsa_agent = agent::get_hsa_agent(_rocp_agent); - auto _kern_id = callback_record.kernel_id; + auto _kern_id = callback_record.dispatch_info.kernel_id; auto _signal = session.kernel_pkt.kernel_dispatch.completion_signal; auto _tid = session.tid; @@ -122,14 +122,7 @@ dispatch_complete(queue_info_session_t& session) _tid, callback_record.start_timestamp, callback_record.end_timestamp, - callback_record.agent_id, - callback_record.queue_id, - callback_record.kernel_id, - callback_record.dispatch_id, - callback_record.private_segment_size, - callback_record.group_segment_size, - callback_record.workgroup_size, - callback_record.grid_size}; + callback_record.dispatch_info}; tracing::execute_buffer_record_emplace(tracing_data_v.buffered_contexts, _tid, diff --git a/projects/rocprofiler-sdk/tests/async-copy-tracing/validate.py b/projects/rocprofiler-sdk/tests/async-copy-tracing/validate.py index 7368ad7de2..1742722964 100644 --- a/projects/rocprofiler-sdk/tests/async-copy-tracing/validate.py +++ b/projects/rocprofiler-sdk/tests/async-copy-tracing/validate.py @@ -194,10 +194,10 @@ def test_kernel_ids(input_data): assert payload["kernel_name"] == symbol_info[kern_id]["kernel_name"] for itr in sdk_data["buffer_records"]["kernel_dispatches"]: - assert itr["kernel_id"] in symbol_info.keys() + assert itr["dispatch_info"]["kernel_id"] in symbol_info.keys() for itr in sdk_data["callback_records"]["kernel_dispatches"]: - assert itr["payload"]["kernel_id"] in symbol_info.keys() + assert itr["payload"]["dispatch_info"]["kernel_id"] in symbol_info.keys() def test_kernel_dispatch_ids(input_data): @@ -211,11 +211,11 @@ def test_kernel_dispatch_ids(input_data): bf_seq_ids = [] for itr in sdk_data["buffer_records"]["kernel_dispatches"]: - bf_seq_ids.append(itr["dispatch_id"]) + bf_seq_ids.append(itr["dispatch_info"]["dispatch_id"]) cb_seq_ids = [] for itr in sdk_data["callback_records"]["kernel_dispatches"]: - cb_seq_ids.append(itr["payload"]["dispatch_id"]) + cb_seq_ids.append(itr["payload"]["dispatch_info"]["dispatch_id"]) bf_seq_ids = sorted(bf_seq_ids) cb_seq_ids = sorted(cb_seq_ids) diff --git a/projects/rocprofiler-sdk/tests/common/serialization.hpp b/projects/rocprofiler-sdk/tests/common/serialization.hpp index 10d3aab622..643568c07a 100644 --- a/projects/rocprofiler-sdk/tests/common/serialization.hpp +++ b/projects/rocprofiler-sdk/tests/common/serialization.hpp @@ -97,6 +97,13 @@ save(ArchiveT& ar, rocprofiler_queue_id_t data) SAVE_DATA_FIELD(handle); } +template +void +save(ArchiveT& ar, rocprofiler_counter_id_t data) +{ + SAVE_DATA_FIELD(handle); +} + template void save(ArchiveT& ar, rocprofiler_correlation_id_t data) @@ -270,11 +277,9 @@ save(ArchiveT& ar, rocprofiler_callback_tracing_scratch_memory_data_t data) template void -save(ArchiveT& ar, rocprofiler_callback_tracing_kernel_dispatch_data_t data) +save(ArchiveT& ar, rocprofiler_kernel_dispatch_info_t data) { SAVE_DATA_FIELD(size); - SAVE_DATA_FIELD(start_timestamp); - SAVE_DATA_FIELD(end_timestamp); SAVE_DATA_FIELD(agent_id); SAVE_DATA_FIELD(queue_id); SAVE_DATA_FIELD(kernel_id); @@ -285,20 +290,33 @@ save(ArchiveT& ar, rocprofiler_callback_tracing_kernel_dispatch_data_t data) SAVE_DATA_FIELD(group_segment_size); } +template +void +save(ArchiveT& ar, rocprofiler_callback_tracing_kernel_dispatch_data_t data) +{ + SAVE_DATA_FIELD(size); + SAVE_DATA_FIELD(start_timestamp); + SAVE_DATA_FIELD(end_timestamp); + SAVE_DATA_FIELD(dispatch_info); +} + template void save(ArchiveT& ar, rocprofiler_profile_counting_dispatch_data_t data) { SAVE_DATA_FIELD(size); - SAVE_DATA_FIELD(agent_id); - SAVE_DATA_FIELD(queue_id); - SAVE_DATA_FIELD(kernel_id); - SAVE_DATA_FIELD(dispatch_id); SAVE_DATA_FIELD(correlation_id); - SAVE_DATA_FIELD(private_segment_size); - SAVE_DATA_FIELD(group_segment_size); - SAVE_DATA_FIELD(workgroup_size); - SAVE_DATA_FIELD(grid_size); + SAVE_DATA_FIELD(dispatch_info); +} + +template +void +save(ArchiveT& ar, rocprofiler_profile_counting_dispatch_record_t data) +{ + SAVE_DATA_FIELD(size); + SAVE_DATA_FIELD(num_records); + SAVE_DATA_FIELD(correlation_id); + SAVE_DATA_FIELD(dispatch_info); } template @@ -339,7 +357,7 @@ save(ArchiveT& ar, rocprofiler_record_counter_t data) { SAVE_DATA_FIELD(id); SAVE_DATA_FIELD(counter_value); - SAVE_DATA_FIELD(correlation_id); + SAVE_DATA_FIELD(dispatch_id); } template @@ -367,14 +385,7 @@ save(ArchiveT& ar, rocprofiler_buffer_tracing_kernel_dispatch_record_t data) SAVE_DATA_FIELD(correlation_id); SAVE_DATA_FIELD(start_timestamp); SAVE_DATA_FIELD(end_timestamp); - SAVE_DATA_FIELD(agent_id); - SAVE_DATA_FIELD(queue_id); - SAVE_DATA_FIELD(kernel_id); - SAVE_DATA_FIELD(dispatch_id); - SAVE_DATA_FIELD(private_segment_size); - SAVE_DATA_FIELD(group_segment_size); - SAVE_DATA_FIELD(workgroup_size); - SAVE_DATA_FIELD(grid_size); + SAVE_DATA_FIELD(dispatch_info); } template @@ -705,6 +716,19 @@ save(ArchiveT& ar, const rocprofiler_agent_t& data) generate("caches", data.caches, data.caches_count); generate("io_links", data.io_links, data.io_links_count); } + +template +void +save(ArchiveT& ar, rocprofiler_counter_info_v0_t data) +{ + SAVE_DATA_FIELD(id); + SAVE_DATA_BITFIELD("is_constant", is_constant); + SAVE_DATA_BITFIELD("is_derived", is_derived); + SAVE_DATA_CSTR(name); + SAVE_DATA_CSTR(description); + SAVE_DATA_CSTR(block); + SAVE_DATA_CSTR(expression); +} } // namespace cereal #undef SAVE_DATA_FIELD diff --git a/projects/rocprofiler-sdk/tests/counter-collection/validate.py b/projects/rocprofiler-sdk/tests/counter-collection/validate.py index db40c4d9f4..9d187c49f8 100644 --- a/projects/rocprofiler-sdk/tests/counter-collection/validate.py +++ b/projects/rocprofiler-sdk/tests/counter-collection/validate.py @@ -22,24 +22,50 @@ def test_data_structure(input_data): def test_counter_values(input_data): - data = input_data - agent_data = data["rocprofiler-sdk-json-tool"]["agents"] - counter_data = data["rocprofiler-sdk-json-tool"]["buffer_records"][ - "counter_collection" - ] + data = input_data["rocprofiler-sdk-json-tool"] + agent_data = data["agents"] + counter_info = data["counter_info"] + counter_data = data["buffer_records"]["counter_collection"] - scaling_factor = 1 - for itr in agent_data: - if itr["type"] == 2 and itr["wave_front_size"] > 0: - scaling_factor = 64 / itr["wave_front_size"] - break + for itr in counter_info: + if itr["is_constant"] == 1 and itr["name"] == "size": + continue + assert itr["id"]["handle"] > 0, f"{itr}" + assert itr["is_constant"] in (0, 1), f"{itr}" + assert itr["is_derived"] in (0, 1), f"{itr}" + assert len(itr["name"]) >= 4, f"{itr}" + assert len(itr["description"]) >= 4, f"{itr}" + if itr["is_constant"] == 0: + if itr["is_derived"] == 0: + assert len(itr["block"]) > 0, f"{itr}" + if itr["is_derived"] == 1: + assert len(itr["expression"]) > 0, f"{itr}" + + def get_agent(agent_id): + for itr in agent_data: + if itr["id"]["handle"] == agent_id["handle"]: + return itr + return None + + def get_scaling_factor(agent_id): + agent = get_agent(agent_id) + assert agent is not None, f"id={agent_id}" + if agent["type"] == 2 and agent["wave_front_size"] > 0: + return 64 / agent["wave_front_size"] + return 0 for itr in counter_data: - value = itr["counter_value"] - if int(round(value, 0)) > 0: - assert int(round(value, 0)) == int( - round(1 * scaling_factor, 0) - ), f"agent_data:\n{agent_data}\n\ncounter_data:\n{counter_data}" + assert itr["num_records"] == len(itr["records"]), f"itr={itr}" + agent_id = itr["dispatch_info"]["agent_id"] + agent = get_agent(agent_id) + scaling_factor = get_scaling_factor(agent_id) + assert agent is not None, f"itr={itr}\nagent={agent}" + for ritr in itr["records"]: + value = ritr["counter_value"] + if int(round(value, 0)) > 0: + assert int(round(value, 0)) == int( + round(1 * scaling_factor, 0) + ), f"itr={itr}\nagent={agent}" if __name__ == "__main__": diff --git a/projects/rocprofiler-sdk/tests/kernel-tracing/validate.py b/projects/rocprofiler-sdk/tests/kernel-tracing/validate.py index 02f20aa3b9..686c473c34 100644 --- a/projects/rocprofiler-sdk/tests/kernel-tracing/validate.py +++ b/projects/rocprofiler-sdk/tests/kernel-tracing/validate.py @@ -254,10 +254,10 @@ def test_kernel_ids(input_data): assert payload["kernel_name"] == symbol_info[kern_id]["kernel_name"] for itr in sdk_data["buffer_records"]["kernel_dispatches"]: - assert itr["kernel_id"] in symbol_info.keys() + assert itr["dispatch_info"]["kernel_id"] in symbol_info.keys() for itr in sdk_data["callback_records"]["kernel_dispatches"]: - assert itr["payload"]["kernel_id"] in symbol_info.keys() + assert itr["payload"]["dispatch_info"]["kernel_id"] in symbol_info.keys() def test_kernel_dispatch_ids(input_data): @@ -271,11 +271,11 @@ def test_kernel_dispatch_ids(input_data): bf_seq_ids = [] for itr in sdk_data["buffer_records"]["kernel_dispatches"]: - bf_seq_ids.append(itr["dispatch_id"]) + bf_seq_ids.append(itr["dispatch_info"]["dispatch_id"]) cb_seq_ids = [] for itr in sdk_data["callback_records"]["kernel_dispatches"]: - cb_seq_ids.append(itr["payload"]["dispatch_id"]) + cb_seq_ids.append(itr["payload"]["dispatch_info"]["dispatch_id"]) bf_seq_ids = sorted(bf_seq_ids) cb_seq_ids = sorted(cb_seq_ids) diff --git a/projects/rocprofiler-sdk/tests/page-migration/validate.py b/projects/rocprofiler-sdk/tests/page-migration/validate.py index 1f831ea313..2443f2fa08 100644 --- a/projects/rocprofiler-sdk/tests/page-migration/validate.py +++ b/projects/rocprofiler-sdk/tests/page-migration/validate.py @@ -209,7 +209,10 @@ def test_kernel_ids(input_data): assert payload["kernel_name"] == symbol_info[kern_id]["kernel_name"] for itr in sdk_data["buffer_records"]["kernel_dispatches"]: - assert itr["kernel_id"] in symbol_info.keys() + assert itr["dispatch_info"]["kernel_id"] in symbol_info.keys() + + for itr in sdk_data["callback_records"]["kernel_dispatches"]: + assert itr["payload"]["dispatch_info"]["kernel_id"] in symbol_info.keys() def test_retired_correlation_ids(input_data): diff --git a/projects/rocprofiler-sdk/tests/tools/json-tool.cpp b/projects/rocprofiler-sdk/tests/tools/json-tool.cpp index 9cdd65dca7..8267524fba 100644 --- a/projects/rocprofiler-sdk/tests/tools/json-tool.cpp +++ b/projects/rocprofiler-sdk/tests/tools/json-tool.cpp @@ -40,6 +40,8 @@ #include #include #include +#include +#include #include #include #include @@ -62,6 +64,7 @@ #include #include #include +#include #include #include #include @@ -549,11 +552,59 @@ struct scratch_memory_callback_record_t } }; +struct profile_counting_record +{ + profile_counting_record(rocprofiler_profile_counting_dispatch_record_t hdr) + : header{hdr} + {} + + rocprofiler_profile_counting_dispatch_record_t header = {}; + std::vector data = {}; + + profile_counting_record() = default; + ~profile_counting_record() = default; + profile_counting_record(const profile_counting_record&) = default; + profile_counting_record(profile_counting_record&&) noexcept = default; + profile_counting_record& operator=(const profile_counting_record&) = default; + profile_counting_record& operator=(profile_counting_record&&) noexcept = default; + + template + void save(ArchiveT& ar) const + { + cereal::save(ar, header); + auto _data = data; + for(auto& itr : _data) + { + auto _counter_id = rocprofiler_counter_id_t{}; + ROCPROFILER_CALL(rocprofiler_query_record_counter_id(itr.id, &_counter_id), + "failed to query counter id"); + itr.id = _counter_id.handle; + } + + ar(cereal::make_nvp("records", _data)); + } + + void emplace_back(rocprofiler_record_counter_t val) + { + if(*this != val) + throw std::runtime_error{"invalid profile_counting_record::emplace_back(...)"}; + data.emplace_back(val); + } + + bool operator==(rocprofiler_record_counter_t rhs) const + { + return (header.dispatch_info.dispatch_id == rhs.dispatch_id); + } + + bool operator!=(rocprofiler_record_counter_t rhs) const { return !(*this == rhs); } +}; + +auto counter_info = std::deque{}; auto code_object_records = std::deque{}; auto kernel_symbol_records = std::deque{}; auto hsa_api_cb_records = std::deque{}; auto marker_api_cb_records = std::deque{}; -auto counter_collection_bf_records = std::deque{}; +auto counter_collection_bf_records = std::deque{}; auto hip_api_cb_records = std::deque{}; auto scratch_memory_cb_records = std::deque{}; auto kernel_dispatch_cb_records = std::deque{}; @@ -583,7 +634,8 @@ dispatch_callback(rocprofiler_profile_counting_dispatch_data_t dispatch_data, static std::unordered_map profile_cache = {}; auto search_cache = [&]() { - if(auto pos = profile_cache.find(dispatch_data.agent_id.handle); pos != profile_cache.end()) + if(auto pos = profile_cache.find(dispatch_data.dispatch_info.agent_id.handle); + pos != profile_cache.end()) { *config = pos->second; return true; @@ -610,7 +662,7 @@ dispatch_callback(rocprofiler_profile_counting_dispatch_data_t dispatch_data, // Iterate through the agents and get the counters available on that agent ROCPROFILER_CALL(rocprofiler_iterate_agent_supported_counters( - dispatch_data.agent_id, + dispatch_data.dispatch_info.agent_id, []([[maybe_unused]] rocprofiler_agent_id_t id, rocprofiler_counter_id_t* counters, size_t num_counters, @@ -626,18 +678,30 @@ dispatch_callback(rocprofiler_profile_counting_dispatch_data_t dispatch_data, static_cast(&gpu_counters)), "Could not fetch supported counters"); + for(auto& counter : gpu_counters) + { + auto info = rocprofiler_counter_info_v0_t{}; + + ROCPROFILER_CALL( + rocprofiler_query_counter_info( + counter, ROCPROFILER_COUNTER_INFO_VERSION_0, static_cast(&info)), + "Could not query counter_id"); + + counter_info.emplace_back(info); + } + std::vector collect_counters; // Look for the counters contained in counters_to_collect in gpu_counters for(auto& counter : gpu_counters) { - rocprofiler_counter_info_v0_t version; + rocprofiler_counter_info_v0_t info; ROCPROFILER_CALL( rocprofiler_query_counter_info( - counter, ROCPROFILER_COUNTER_INFO_VERSION_0, static_cast(&version)), + counter, ROCPROFILER_COUNTER_INFO_VERSION_0, static_cast(&info)), "Could not query counter_id"); - if(counters_to_collect.count(std::string(version.name)) > 0) + if(counters_to_collect.count(std::string(info.name)) > 0) { collect_counters.push_back(counter); } @@ -645,12 +709,13 @@ dispatch_callback(rocprofiler_profile_counting_dispatch_data_t dispatch_data, // Create a colleciton profile for the counters rocprofiler_profile_config_id_t profile; - ROCPROFILER_CALL( - rocprofiler_create_profile_config( - dispatch_data.agent_id, collect_counters.data(), collect_counters.size(), &profile), - "Could not construct profile cfg"); + ROCPROFILER_CALL(rocprofiler_create_profile_config(dispatch_data.dispatch_info.agent_id, + collect_counters.data(), + collect_counters.size(), + &profile), + "Could not construct profile cfg"); - profile_cache.emplace(dispatch_data.agent_id.handle, profile); + profile_cache.emplace(dispatch_data.dispatch_info.agent_id.handle, profile); // Return the profile to collect those counters for this dispatch *config = profile; } @@ -865,10 +930,21 @@ tool_tracing_buffered(rocprofiler_context_id_t /*context*/, "unexpected rocprofiler_record_header_t tracing category kind"}; } } - else if(header->category == ROCPROFILER_BUFFER_CATEGORY_COUNTERS && header->kind == 0) + else if(header->category == ROCPROFILER_BUFFER_CATEGORY_COUNTERS && + header->kind == ROCPROFILER_COUNTER_RECORD_PROFILE_COUNTING_DISPATCH_HEADER) + { + auto* profiler_record = + static_cast(header->payload); + counter_collection_bf_records.emplace_back(*profiler_record); + } + else if(header->category == ROCPROFILER_BUFFER_CATEGORY_COUNTERS && + header->kind == ROCPROFILER_COUNTER_RECORD_VALUE) { auto* profiler_record = static_cast(header->payload); - counter_collection_bf_records.emplace_back(*profiler_record); + if(counter_collection_bf_records.empty()) + throw std::runtime_error{ + "missing rocprofiler_profile_counting_dispatch_record_t (header)"}; + counter_collection_bf_records.back().emplace_back(*profiler_record); } else { @@ -1427,7 +1503,8 @@ tool_fini(void* tool_data) << ", hip_api_bf_records=" << hip_api_bf_records.size() << ", marker_api_bf_records=" << marker_api_bf_records.size() << ", corr_id_retire_records=" << corr_id_retire_records.size() - << ", counter_collection_records=" << counter_collection_bf_records.size() << "...\n" + << ", counter_collection_value_records=" << counter_collection_bf_records.size() + << "...\n" << std::flush; auto* _call_stack = static_cast(tool_data); @@ -1501,6 +1578,7 @@ write_json(call_stack_t* _call_stack) json_ar.finishNode(); json_ar(cereal::make_nvp("agents", agents)); + json_ar(cereal::make_nvp("counter_info", counter_info)); if(_call_stack) json_ar(cereal::make_nvp("call_stack", *_call_stack)); json_ar.setNextName("callback_records"); @@ -1611,7 +1689,8 @@ write_perfetto() } for(auto itr : kernel_dispatch_bf_records) - agent_queue_ids[itr.agent_id.handle].emplace(itr.queue_id.handle); + agent_queue_ids[itr.dispatch_info.agent_id.handle].emplace( + itr.dispatch_info.queue_id.handle); } auto thread_tracks = std::unordered_map{}; @@ -1804,10 +1883,11 @@ write_perfetto() auto demangled = std::unordered_map{}; for(auto itr : kernel_dispatch_bf_records) { - const kernel_symbol_callback_record_t* sym = nullptr; + const auto& info = itr.dispatch_info; + const kernel_symbol_callback_record_t* sym = nullptr; for(const auto& kitr : kernel_symbol_records) { - if(kitr.payload.kernel_id == itr.kernel_id) + if(kitr.payload.kernel_id == info.kernel_id) { sym = &kitr; break; @@ -1815,7 +1895,7 @@ write_perfetto() } auto name = std::string_view{sym->payload.kernel_name}; - auto& track = agent_queue_tracks.at(itr.agent_id.handle).at(itr.queue_id.handle); + auto& track = agent_queue_tracks.at(info.agent_id.handle).at(info.queue_id.handle); if(demangled.find(name) == demangled.end()) { @@ -1833,21 +1913,21 @@ write_perfetto() "kind", itr.kind, "agent", - itr.agent_id.handle, + info.agent_id.handle, "corr_id", itr.correlation_id.internal, "queue", - itr.queue_id.handle, + info.queue_id.handle, "kernel_id", - itr.kernel_id, + info.kernel_id, "private_segment_size", - itr.private_segment_size, + info.private_segment_size, "group_segment_size", - itr.group_segment_size, + info.group_segment_size, "workgroup_size", - itr.workgroup_size.x * itr.workgroup_size.y * itr.workgroup_size.z, + info.workgroup_size.x * info.workgroup_size.y * info.workgroup_size.z, "grid_size", - itr.grid_size.x * itr.grid_size.y * itr.grid_size.z); + info.grid_size.x * info.grid_size.y * info.grid_size.z); TRACE_EVENT_END(rocprofiler::trait::name::value, track,