Add support for scratch reporting (#523)
* Add ToolsApiTable Add ToolsApiTable wrapping for scratch memory tracking * Add initial support for scratch memory tracking Buffering is implemented * cmake formatting (cmake-format) (#525) Co-authored-by: MythreyaK <MythreyaK@users.noreply.github.com> * source formatting (clang-format v11) (#524) Co-authored-by: MythreyaK <MythreyaK@users.noreply.github.com> * Add callback tracing for scratch Fixed the error where scratch tracking init was called irrespective of whether any client requested for it * Apply suggestions from code review Co-authored-by: Jonathan R. Madsen <jrmadsen@users.noreply.github.com> * Fix tools api copy/update Table were saved/updated incorrectly in previous commit. Also adds passing user data through the callback * Fix OpKind sequence for scratch tracking Previously scratch was using OpKind from rocprofiler-sdk, but templates were instantiated using API ID. These differ by 1 * Integration tests for scratch reporting Added buffer and callback integration tests for scratch reporting * source formatting (clang-format v11) (#550) Co-authored-by: MythreyaK <26112391+MythreyaK@users.noreply.github.com> * cmake formatting (cmake-format) (#551) Co-authored-by: MythreyaK <26112391+MythreyaK@users.noreply.github.com> * python formatting (black) (#549) Co-authored-by: MythreyaK <26112391+MythreyaK@users.noreply.github.com> * CI fixes * source formatting (clang-format v11) (#554) Co-authored-by: MythreyaK <26112391+MythreyaK@users.noreply.github.com> * Update api Rebase on main and updates based on PR feedback * Update scratch reporting and address PR comments - Added agent id to buffer records - Updated `test_internal_correlation_ids` - Is almost identical to one in async-copy - Updated scratch test to check for agent id - Updated queue id serialization in callback records (prints handle as nested key) - Remove `marker_api_traces` from scratch `test_internal_correlation_ids` validation test - Rename `amd_tools_api` to `scratch_memory` - Added doxygen comments - Remove scratch callback from `tool.cpp` - Replace assert with `LOF_IF` in `scratch_memory.cpp` * Update tools table Changed to match up with changes to hsa tables in main branch * Rework scratch memory structure * Update tests - Added suggestions from PR review, and updated tests accordingly * Misc cleanup * Update scratch test As of Apr 4th, `hsa_amd_agent_set_async_scratch_limit` is disabled. Note, > This API: `hsa_amd_agent_set_async_scratch_limit` is currently > disabled. We need some changes in CP firmware to be able to do this > and these changes are not ready yet. > With the current code, you will also not get notifications for > alternate-scratch allocations because this feature has been disabled > while CP firmware is making additional changes > We are hoping to have that feature enabled by ROCm-6.3 * Minor update to lib/rocprofiler-sdk/internal_threading.* - delay destruction of shared_ptrs of the tasks to prevent rare (but possible) data race on the destruction of the shared_ptr --------- Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: MythreyaK <MythreyaK@users.noreply.github.com> Co-authored-by: Jonathan R. Madsen <jrmadsen@users.noreply.github.com> Co-authored-by: Jonathan R. Madsen <jonathanrmadsen@gmail.com>
Tento commit je obsažen v:
@@ -152,16 +152,21 @@ typedef struct
|
||||
} rocprofiler_buffer_tracing_page_migration_record_t;
|
||||
|
||||
/**
|
||||
* @brief ROCProfiler Buffer Scratch Memory Tracer Record. Not implemented.
|
||||
* @brief ROCProfiler Buffer Scratch Memory Tracer Record
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
uint64_t size; ///< size of this struct
|
||||
rocprofiler_buffer_tracing_kind_t kind; ///< ::ROCPROFILER_BUFFER_TRACING_SCRATCH_MEMORY
|
||||
rocprofiler_correlation_id_t correlation_id; ///< correlation ids for record
|
||||
rocprofiler_timestamp_t start_timestamp; ///< start time in nanoseconds
|
||||
rocprofiler_timestamp_t end_timestamp; ///< end time in nanoseconds
|
||||
// Not Sure What is the info needed here?
|
||||
rocprofiler_scratch_memory_operation_t
|
||||
operation; ///< @see rocprofiler_scratch_memory_operation_t
|
||||
rocprofiler_agent_id_t agent_id; ///< agent kernel was dispatched on
|
||||
rocprofiler_queue_id_t queue_id; ///< queue kernel was dispatched on
|
||||
rocprofiler_thread_id_t thread_id; ///< id for thread generating this record
|
||||
rocprofiler_timestamp_t start_timestamp; ///< start time in nanoseconds
|
||||
rocprofiler_timestamp_t end_timestamp; ///< end time in nanoseconds
|
||||
rocprofiler_correlation_id_t correlation_id; ///< correlation ids for record
|
||||
rocprofiler_scratch_alloc_flag_t flags;
|
||||
} rocprofiler_buffer_tracing_scratch_memory_record_t;
|
||||
|
||||
/**
|
||||
|
||||
@@ -28,6 +28,9 @@
|
||||
#include <rocprofiler-sdk/hsa.h>
|
||||
#include <rocprofiler-sdk/marker.h>
|
||||
|
||||
#include <hsa/hsa.h>
|
||||
#include <hsa/hsa_amd_tool.h>
|
||||
#include <hsa/hsa_ext_amd.h>
|
||||
#include <hsa/hsa_ven_amd_loader.h>
|
||||
|
||||
#include <stdint.h>
|
||||
@@ -153,6 +156,19 @@ typedef struct
|
||||
|
||||
} rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t;
|
||||
|
||||
/**
|
||||
* @brief ROCProfiler Scratch Memory Callback Data.
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
uint64_t size; ///< size of this struct
|
||||
rocprofiler_agent_id_t agent_id;
|
||||
rocprofiler_queue_id_t queue_id;
|
||||
rocprofiler_scratch_alloc_flag_t flags;
|
||||
hsa_amd_tool_event_kind_t args_kind;
|
||||
rocprofiler_scratch_memory_args_t args;
|
||||
} rocprofiler_callback_tracing_scratch_memory_data_t;
|
||||
|
||||
/**
|
||||
* @brief API Tracing callback function. This function is invoked twice per API function: once
|
||||
* before the function is invoked and once after the function is invoked. The external correlation
|
||||
|
||||
@@ -24,6 +24,8 @@
|
||||
|
||||
#include <rocprofiler-sdk/defines.h>
|
||||
|
||||
#include <hsa/hsa_amd_tool.h>
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
@@ -146,7 +148,8 @@ typedef enum // NOLINT(performance-enum-size)
|
||||
ROCPROFILER_CALLBACK_TRACING_MARKER_CONTROL_API, ///< @see
|
||||
///< ::rocprofiler_marker_control_api_id_t
|
||||
ROCPROFILER_CALLBACK_TRACING_MARKER_NAME_API, ///< @see ::rocprofiler_marker_name_api_id_t
|
||||
ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT, ///< @see ::rocprofiler_code_object_operation_t
|
||||
ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT, ///< @see ::rocprofiler_code_object_operation_t
|
||||
ROCPROFILER_CALLBACK_TRACING_SCRATCH_MEMORY, ///< @see ::rocprofiler_scratch_memory_operation_t
|
||||
ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH, ///< Callbacks for kernel dispatches
|
||||
ROCPROFILER_CALLBACK_TRACING_LAST,
|
||||
} rocprofiler_callback_tracing_kind_t;
|
||||
@@ -233,6 +236,31 @@ typedef enum // NOLINT(performance-enum-size)
|
||||
ROCPROFILER_BUFFER_POLICY_LAST,
|
||||
} rocprofiler_buffer_policy_t;
|
||||
|
||||
/**
|
||||
* @brief Scratch event kind
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
ROCPROFILER_SCRATCH_MEMORY_NONE = 0, ///< Unknown scratch operation
|
||||
ROCPROFILER_SCRATCH_MEMORY_ALLOC, ///< Scratch memory allocation event
|
||||
ROCPROFILER_SCRATCH_MEMORY_FREE, ///< Scratch memory free event
|
||||
ROCPROFILER_SCRATCH_MEMORY_ASYNC_RECLAIM, ///< Scratch memory asynchronously reclaimed
|
||||
ROCPROFILER_SCRATCH_MEMORY_LAST,
|
||||
} rocprofiler_scratch_memory_operation_t;
|
||||
|
||||
/**
|
||||
* @brief Allocation flags for @see rocprofiler_buffer_tracing_scratch_memory_record_t
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
ROCPROFILER_SCRATCH_ALLOC_FLAG_NONE = 0,
|
||||
ROCPROFILER_SCRATCH_ALLOC_FLAG_USE_ONCE =
|
||||
HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_USE_ONCE, ///< This scratch allocation is only valid for 1
|
||||
///< dispatch.
|
||||
ROCPROFILER_SCRATCH_ALLOC_FLAG_ALT =
|
||||
HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_ALT, ///< Used alternate scratch instead of main scratch
|
||||
} rocprofiler_scratch_alloc_flag_t;
|
||||
|
||||
/**
|
||||
* @brief Enumeration for specifying runtime libraries supported by rocprofiler. This enumeration is
|
||||
* used for thread creation callbacks. @see INTERNAL_THREADING.
|
||||
|
||||
@@ -38,6 +38,7 @@
|
||||
|
||||
#include <rocprofiler-sdk/hsa/api_args.h>
|
||||
#include <rocprofiler-sdk/hsa/api_id.h>
|
||||
#include <rocprofiler-sdk/hsa/scratch_memory_args.h>
|
||||
#include <rocprofiler-sdk/hsa/table_id.h>
|
||||
|
||||
#if defined(ROCPROFILER_DEFINED_AMD_INTERNAL_BUILD) && ROCPROFILER_DEFINED_AMD_INTERNAL_BUILD > 0
|
||||
|
||||
@@ -4,8 +4,15 @@
|
||||
#
|
||||
#
|
||||
set(ROCPROFILER_HSA_HEADER_FILES
|
||||
amd_ext_api_id.h api_args.h api_id.h api_trace_version.h core_api_id.h
|
||||
finalize_ext_api_id.h image_ext_api_id.h table_id.h)
|
||||
amd_ext_api_id.h
|
||||
api_args.h
|
||||
api_id.h
|
||||
api_trace_version.h
|
||||
core_api_id.h
|
||||
finalize_ext_api_id.h
|
||||
image_ext_api_id.h
|
||||
scratch_memory_args.h
|
||||
table_id.h)
|
||||
|
||||
install(
|
||||
FILES ${ROCPROFILER_HSA_HEADER_FILES}
|
||||
|
||||
@@ -0,0 +1,72 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <rocprofiler-sdk/defines.h>
|
||||
#include <rocprofiler-sdk/hsa/api_trace_version.h>
|
||||
#include <rocprofiler-sdk/version.h>
|
||||
|
||||
#include <hsa/hsa.h>
|
||||
#include <hsa/hsa_amd_tool.h>
|
||||
|
||||
ROCPROFILER_EXTERN_C_INIT
|
||||
|
||||
// Empty struct has a size of 0 in C but size of 1 in C++.
|
||||
// This struct is added to the union members which represent
|
||||
// functions with no arguments to ensure ABI compatibility
|
||||
typedef struct rocprofiler_scratch_memory_no_args
|
||||
{
|
||||
char empty;
|
||||
} rocprofiler_scratch_memory_no_args;
|
||||
|
||||
typedef union rocprofiler_scratch_memory_args_t
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint64_t dispatch_id;
|
||||
} alloc_start;
|
||||
struct
|
||||
{
|
||||
uint64_t dispatch_id;
|
||||
size_t size;
|
||||
size_t num_slots;
|
||||
} alloc_end;
|
||||
struct
|
||||
{
|
||||
rocprofiler_scratch_memory_no_args no_args;
|
||||
} free_start;
|
||||
struct
|
||||
{
|
||||
rocprofiler_scratch_memory_no_args no_args;
|
||||
} free_end;
|
||||
struct
|
||||
{
|
||||
rocprofiler_scratch_memory_no_args no_args;
|
||||
} async_reclaim_start;
|
||||
struct
|
||||
{
|
||||
rocprofiler_scratch_memory_no_args no_args;
|
||||
} async_reclaim_end;
|
||||
} rocprofiler_scratch_memory_args_t;
|
||||
|
||||
ROCPROFILER_EXTERN_C_FINI
|
||||
@@ -30,5 +30,6 @@ typedef enum
|
||||
ROCPROFILER_HSA_TABLE_ID_AmdExt,
|
||||
ROCPROFILER_HSA_TABLE_ID_ImageExt,
|
||||
ROCPROFILER_HSA_TABLE_ID_FinalizeExt,
|
||||
ROCPROFILER_HSA_TABLE_ID_AmdTool,
|
||||
ROCPROFILER_HSA_TABLE_ID_LAST,
|
||||
} rocprofiler_hsa_table_id_t;
|
||||
|
||||
Odkázat v novém úkolu
Zablokovat Uživatele