From cafeaadb8cb1d2b53dd1622cce31bfbdc1e267a5 Mon Sep 17 00:00:00 2001 From: Ammar ELWazir Date: Thu, 24 Aug 2023 15:39:56 -0700 Subject: [PATCH] Updated rocprofiler.h for v2 (#18) * Update and rename rocprofiler.h to rocprofiler.h.in - Removing Service IDs - Fixing agent_id to be agent * [0/N] New rocprofiler headers - created rocprofiler/defines.h - ppdef macros - created rocprofiler/hip.h - HIP specific types - created rocprofiler/hsa.h - HSA specific types - created rocprofiler/marker.h - Marker (ROCTx) specific types - create version.h.in - file containing version info - updated source/lib/rocprofiler/CMakeLists.txt - set DEFINE_SYMBOL - compile defs provided by rocprofiler::rocprofiler-headers * [1/N] Update rocprofiler.h - pragma once - removed some ppdefs (in version.h.in and defines.h) - extern "C" after includes - added *_NONE and *_LAST enum values to all enums - provided some rocprofiler_status_t enums - tweaked rocprofiler_agent_type_t enum fields - tweaked rocprofiler_agent_info_t enum fields - provided rocprofiler_tracer_activity_domain_t - added missing rocprofiler_counter_instance_id_t typedef - may not be correct - provided rocprofiler_record_header_t struct - provided rocprofiler_record_tracer_t struct - add ROCPROFILER_NONNULL attribute where appropriate - CMakeLists.txt: add subdirectories for hsa, hip, and marker - defines.h: remove ROCPROFILER_CALL ppdef - rocprofiler.h - ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED - extend rocprofiler_agent_t - modify rocprofiler_query_available_agents signature to callback - rocprofiler_pc_sampling_config_array_t - update rocprofiler_buffer_callback_t to include context id - update rocprofiler_create_buffer to accept context - rocprofiler_plugin.h - non-const rocprofiler_record_header_t** * [2/N] Update include/rocprofiler/rocprofiler_plugin.h - change prototype of rocprofiler_plugin_write_buffer_records to resemble rocprofiler_buffer_callback_t * [3/N] Update include/rocprofiler/hsa - Update hsa.h - Details in hsa subfolder * [4/N] Update include/rocprofiler/hip - Update hip.h - Details in hip subfolder * [5/N] Update include/rocprofiler/marker - Update marker.h - Details in marker subfolder * [6/N] Update samples/pc_sampling - fix issues with macros - fix API changes --------- Co-authored-by: Jonathan Madsen [ROCm/rocprofiler-sdk commit: 39b209c2a743c3606430b694f44953a1a6d2d39e] --- .../rocprofiler-sdk/samples/CMakeLists.txt | 6 + .../samples/pc_sampling/CMakeLists.txt | 16 + .../samples/pc_sampling/common.h | 124 + ...ost-trap-retries-service-instantiation.cpp | 165 + .../pc_sampling/single-user-host-trap.cpp | 61 + .../source/include/rocprofiler/CMakeLists.txt | 13 +- .../source/include/rocprofiler/defines.h | 97 + .../source/include/rocprofiler/hip.h | 51 + .../include/rocprofiler/hip/CMakeLists.txt | 9 + .../source/include/rocprofiler/hip/api_args.h | 2167 +++++++++++ .../source/include/rocprofiler/hip/api_id.h | 432 +++ .../source/include/rocprofiler/hsa.h | 62 + .../include/rocprofiler/hsa/CMakeLists.txt | 9 + .../source/include/rocprofiler/hsa/api_args.h | 1224 +++++++ .../source/include/rocprofiler/hsa/api_id.h | 227 ++ .../include/rocprofiler/hsa/table_api_id.h | 31 + .../source/include/rocprofiler/marker.h | 35 + .../include/rocprofiler/marker/CMakeLists.txt | 9 + .../include/rocprofiler/marker/api_args.h | 51 + .../include/rocprofiler/marker/api_id.h | 33 + .../source/include/rocprofiler/rocprofiler.h | 3161 ++++++----------- .../include/rocprofiler/rocprofiler_plugin.h | 59 +- .../source/include/rocprofiler/version.h.in | 61 + .../source/lib/rocprofiler/CMakeLists.txt | 3 +- 24 files changed, 5988 insertions(+), 2118 deletions(-) create mode 100644 projects/rocprofiler-sdk/samples/pc_sampling/CMakeLists.txt create mode 100644 projects/rocprofiler-sdk/samples/pc_sampling/common.h create mode 100644 projects/rocprofiler-sdk/samples/pc_sampling/single-user-host-trap-retries-service-instantiation.cpp create mode 100644 projects/rocprofiler-sdk/samples/pc_sampling/single-user-host-trap.cpp create mode 100644 projects/rocprofiler-sdk/source/include/rocprofiler/defines.h create mode 100644 projects/rocprofiler-sdk/source/include/rocprofiler/hip.h create mode 100644 projects/rocprofiler-sdk/source/include/rocprofiler/hip/CMakeLists.txt create mode 100644 projects/rocprofiler-sdk/source/include/rocprofiler/hip/api_args.h create mode 100644 projects/rocprofiler-sdk/source/include/rocprofiler/hip/api_id.h create mode 100644 projects/rocprofiler-sdk/source/include/rocprofiler/hsa.h create mode 100644 projects/rocprofiler-sdk/source/include/rocprofiler/hsa/CMakeLists.txt create mode 100644 projects/rocprofiler-sdk/source/include/rocprofiler/hsa/api_args.h create mode 100644 projects/rocprofiler-sdk/source/include/rocprofiler/hsa/api_id.h create mode 100644 projects/rocprofiler-sdk/source/include/rocprofiler/hsa/table_api_id.h create mode 100644 projects/rocprofiler-sdk/source/include/rocprofiler/marker.h create mode 100644 projects/rocprofiler-sdk/source/include/rocprofiler/marker/CMakeLists.txt create mode 100644 projects/rocprofiler-sdk/source/include/rocprofiler/marker/api_args.h create mode 100644 projects/rocprofiler-sdk/source/include/rocprofiler/marker/api_id.h create mode 100644 projects/rocprofiler-sdk/source/include/rocprofiler/version.h.in diff --git a/projects/rocprofiler-sdk/samples/CMakeLists.txt b/projects/rocprofiler-sdk/samples/CMakeLists.txt index 8b13789179..a481c79707 100644 --- a/projects/rocprofiler-sdk/samples/CMakeLists.txt +++ b/projects/rocprofiler-sdk/samples/CMakeLists.txt @@ -1 +1,7 @@ +# +# +# +project(rocprofiler-samples LANGUAGES C CXX) +# add_subdirectory(api_tracing) +add_subdirectory(pc_sampling) diff --git a/projects/rocprofiler-sdk/samples/pc_sampling/CMakeLists.txt b/projects/rocprofiler-sdk/samples/pc_sampling/CMakeLists.txt new file mode 100644 index 0000000000..86ae370891 --- /dev/null +++ b/projects/rocprofiler-sdk/samples/pc_sampling/CMakeLists.txt @@ -0,0 +1,16 @@ +# +# +# +project(rocprofiler-samples-pc-sampling LANGUAGES C CXX) + +add_executable(pc_sampling_single-user-host-trap) +target_sources(pc_sampling_single-user-host-trap PRIVATE common.h + single-user-host-trap.cpp) +target_link_libraries(pc_sampling_single-user-host-trap + PRIVATE rocprofiler::rocprofiler-library) + +add_executable(pc_sampling_single-user-host-trap-retry) +target_sources(pc_sampling_single-user-host-trap-retry + PRIVATE common.h single-user-host-trap-retries-service-instantiation.cpp) +target_link_libraries(pc_sampling_single-user-host-trap-retry + PRIVATE rocprofiler::rocprofiler-library) diff --git a/projects/rocprofiler-sdk/samples/pc_sampling/common.h b/projects/rocprofiler-sdk/samples/pc_sampling/common.h new file mode 100644 index 0000000000..2a67ad19b9 --- /dev/null +++ b/projects/rocprofiler-sdk/samples/pc_sampling/common.h @@ -0,0 +1,124 @@ +#ifndef PC_SAMPLING_COMMON_H +#define PC_SAMPLING_COMMON_H + +#include + +#include +#include +#include +#include + +constexpr size_t BUFFER_SIZE_BYTES = 4096; +constexpr size_t WATERMARK = (BUFFER_SIZE_BYTES / 2); + +#define ROCPROFILER_CALL(result, msg) \ + { \ + rocprofiler_status_t status = result; \ + if(status != ROCPROFILER_STATUS_SUCCESS) \ + { \ + puts(#result " failed"); \ + } \ + } + +// We might want to test the calls that fails +// e.g. calling `rocprofiler_configure_pc_sampling_service ` +// after previous initialization. +#define ROCPROFILER_CALL_FAILS(result, msg) \ + { \ + rocprofiler_status_t status = result; \ + if(status == ROCPROFILER_STATUS_SUCCESS) \ + { \ + puts(#result " succeeded"); \ + } \ + } + +static rocprofiler_status_t +find_first_gpu_agent_impl(rocprofiler_agent_t** agents, size_t num_agents, void* data) +{ + // data is required + if(!data) return ROCPROFILER_STATUS_ERROR; + + auto* _out_agent = static_cast(data); + // find the first GPU agent + for(size_t i = 0; i < num_agents; i++) + { + if(agents[i]->type == ROCPROFILER_AGENT_TYPE_GPU) + { + *_out_agent = *agents[i]; + printf("[%s] %s :: id=%zu, type=%i, num pc sample configs=%zu\n", + __FUNCTION__, + _out_agent->name, + _out_agent->id.handle, + _out_agent->type, + _out_agent->pc_sampling_configs.size); + return ROCPROFILER_STATUS_SUCCESS; + } + else + { + printf("[%s] %s :: id=%zu, type=%i, num pc sample configs=%zu\n", + __FUNCTION__, + agents[i]->name, + agents[i]->id.handle, + agents[i]->type, + agents[i]->pc_sampling_configs.size); + } + } + return ROCPROFILER_STATUS_ERROR; +} + +static rocprofiler_agent_t +find_first_gpu_agent() +{ + // This function returns the first gpu agent it encounters. + // TODO: write the better function querying information about the agent, + // and return if the agent is MI200. + rocprofiler_agent_t gpu_agent; + + ROCPROFILER_CALL(rocprofiler_query_available_agents(&find_first_gpu_agent_impl, + sizeof(rocprofiler_agent_t), + static_cast(&gpu_agent)), + "Failed to find GPU agents"); + + return gpu_agent; +} + +static void +rocprofiler_pc_sampling_callback(rocprofiler_context_id_t /*context_id*/, + rocprofiler_buffer_id_t /*buffer_id*/, + rocprofiler_record_header_t** headers, + size_t num_headers, + void* /*data*/, + uint64_t drop_count) +{ + // Vladimir: I am not sure if this is the right way of iterating over PC sampling records. + printf( + "The number of delivered samples is: %zu, while the number of dropped samples is: %lu.\n", + num_headers, + drop_count); + + for(size_t i = 0; i < num_headers; i++) + { + auto* cur_header = headers[i]; + if(cur_header->kind == 0) + { + auto* pc_sample = static_cast(cur_header->payload); + printf("--- pc: %lx, dispatch_id: %lx, timestamp: %lu, hardware_id: %lu\n", + pc_sample->pc, + pc_sample->dispatch_id, + pc_sample->timestamp, + pc_sample->hardware_id); + // Vladimir: How to parse the remaining part of the `rocprofiler_pc_sampling_record_t` + // struct? + } + } + // Vladimr: We might want to add somewhere in the documentation that headars actually contain PC + // samples. +} + +static void +run_HIP_app() +{ + // TODO: provide the simple HIP app +} + +#endif diff --git a/projects/rocprofiler-sdk/samples/pc_sampling/single-user-host-trap-retries-service-instantiation.cpp b/projects/rocprofiler-sdk/samples/pc_sampling/single-user-host-trap-retries-service-instantiation.cpp new file mode 100644 index 0000000000..320d07ebaa --- /dev/null +++ b/projects/rocprofiler-sdk/samples/pc_sampling/single-user-host-trap-retries-service-instantiation.cpp @@ -0,0 +1,165 @@ +// Vladimir: The example of using Host-trap PC sampling on a system with single MI200/300 by two +// users. The first user initiates Host-Trap sampling with the configuration A. The second user +// tries initiaiting stochastic sampling with configuration B and fails. Then it queries available +// configurations and observes only the configuration A. It accepts it and starts PC sampling. +// Vladimir: Currently, this example is written as a single-threaded program. +// Decide whether to move the second user to a separate thread or process + +#include + +#include "common.h" + +#include + +#define HOST_TRAP_INTERVAL 1000 + +rocprofiler_pc_sampling_method_t host_trap_sampling_method; +rocprofiler_pc_sampling_unit_t host_trap_sampling_unit_time; +uint64_t host_trap_interval; + +void +second_user() +{ + // creating a context + rocprofiler_context_id_t context_id2; + ROCPROFILER_CALL(rocprofiler_create_context(&context_id2), + "Cannot create context for the second user\n"); + + rocprofiler_agent_t gpu_agent = find_first_gpu_agent(); + + // creating a buffer that will hold pc sampling information + rocprofiler_buffer_policy_t lossless_buffer_action = ROCPROFILER_BUFFER_POLICY_LOSSLESS; + rocprofiler_buffer_id_t buffer_id2; + ROCPROFILER_CALL(rocprofiler_create_buffer(context_id2, + BUFFER_SIZE_BYTES, + WATERMARK, + lossless_buffer_action, + rocprofiler_pc_sampling_callback, + nullptr, + &buffer_id2), + "Cannot create pc sampling buffer for the second user"); + + // The second user tries to create another pc sampling service with different configuration, + // but the rocprofiler rejects it. + rocprofiler_pc_sampling_method_t sampling_method2 = ROCPROFILER_PC_SAMPLING_METHOD_STOCHASTIC; + rocprofiler_pc_sampling_unit_t sampling_unit2 = ROCPROFILER_PC_SAMPLING_UNIT_CYCLES; + uint64_t interval2 = 2048; // I assumed micro secs, so this should be 1ms + // The following function returns an error code indicating the PC sampling has already been + // configured. + ROCPROFILER_CALL_FAILS( + rocprofiler_configure_pc_sampling_service( + context_id2, gpu_agent, sampling_method2, sampling_unit2, interval2, buffer_id2), + "Instantiation of the PC sampling service should fail"); + + // After failure, the second user queries available configuration and observes the one chosen by + // the first user. + rocprofiler_pc_sampling_configuration_t* configs; + size_t config_count; + ROCPROFILER_CALL( + rocprofiler_query_pc_sampling_agent_configurations(gpu_agent, configs, &config_count), + "The second user cannot query available configurations"); + + // Only one configuration should be listed, and its parameters should match the parameters set + // by the first user. Vladimir: Is it ok to use assertions? In the release mode, they might be + // ignored. + assert(config_count == 1); + rocprofiler_pc_sampling_configuration_t first_user_config = configs[0]; + assert(first_user_config.method == host_trap_sampling_method); + assert(first_user_config.unit == host_trap_sampling_unit_time); + // Vladimir: Should the min_interval and max_interval have the same value at this point (the PC + // sampling is alredy configured)?? + assert(first_user_config.min_interval == host_trap_interval && + first_user_config.min_interval == first_user_config.max_interval); + + // Vladimir: Do we need to explicitly free queried configurations? + free(configs); + + // Reuse the same configuration set by the first user. + // The second user is satisfied with the configuration chosen by the first user, so it + // starts PC sampling. + ROCPROFILER_CALL(rocprofiler_configure_pc_sampling_service(context_id2, + gpu_agent, + first_user_config.method, + first_user_config.unit, + first_user_config.min_interval, + buffer_id2), + "The second user cannot share already created PC sampling configuration"); + + // Starting the context that should trigger PC sampling? + ROCPROFILER_CALL(rocprofiler_start_context(context_id2), + "Cannot start PC sampling context for the second user"); + + // Running the applicaiton + run_HIP_app(); + + // Stop the context that should stop PC sampling? + ROCPROFILER_CALL(rocprofiler_stop_context(context_id2), + "Cannot start PC sampling context for the second user"); + + // Explicit buffer flush, before destroying it + ROCPROFILER_CALL(rocprofiler_flush_buffer(buffer_id2), + "Cannot destroy the second user's buffer"); + // Destroying the buffer + ROCPROFILER_CALL(rocprofiler_destroy_buffer(buffer_id2), "Cannot destroy the second user's"); +} + +int +main(int /*argc*/, char** /*argv*/) +{ + rocprofiler_status_t status; + + // creating a context + rocprofiler_context_id_t context_id; + ROCPROFILER_CALL(rocprofiler_create_context(&context_id), "Cannot create context\n"); + + rocprofiler_agent_t gpu_agent = find_first_gpu_agent(); + + // creating a buffer that will hold pc sampling information + rocprofiler_buffer_policy_t drop_buffer_action = ROCPROFILER_BUFFER_POLICY_DISCARD; + rocprofiler_buffer_id_t buffer_id; + ROCPROFILER_CALL(rocprofiler_create_buffer(context_id, + BUFFER_SIZE_BYTES, + WATERMARK, + drop_buffer_action, + rocprofiler_pc_sampling_callback, + nullptr, + &buffer_id), + "Cannot create pc sampling buffer"); + + // PC sampling service configuration + rocprofiler_pc_sampling_method_t host_trap_sampling_method = + ROCPROFILER_PC_SAMPLING_METHOD_HOST_TRAP; + rocprofiler_pc_sampling_unit_t host_trap_sampling_unit_time = ROCPROFILER_PC_SAMPLING_UNIT_TIME; + // Vladimir: What units are we using for time? ms, micro secs, ns? + uint64_t host_trap_interval = HOST_TRAP_INTERVAL; + // Instantiating the first PC sampling service succeeds. + ROCPROFILER_CALL(rocprofiler_configure_pc_sampling_service(context_id, + gpu_agent, + host_trap_sampling_method, + host_trap_sampling_unit_time, + host_trap_interval, + buffer_id), + "Cannot create PC sampling service"); + + // Trigger the second user code. + // Vladimir: Discuss whether this should be put in a separate thread/process. + second_user(); + + // Starting the context that should trigger PC sampling? + ROCPROFILER_CALL(rocprofiler_start_context(context_id), "Cannot start PC sampling context"); + + // Running the applicaiton + run_HIP_app(); + + // Stop the context that should stop PC sampling? + ROCPROFILER_CALL(rocprofiler_stop_context(context_id), "Cannot start PC sampling context"); + + // Explicit buffer flush, before destroying it + ROCPROFILER_CALL(rocprofiler_flush_buffer(buffer_id), "Cannot destroy buffer"); + // Destroying the buffer + ROCPROFILER_CALL(rocprofiler_destroy_buffer(buffer_id), "Cannot destroy buffer"); + + // Vladimir: Do we need to destroy context or a service? + + return 0; +} diff --git a/projects/rocprofiler-sdk/samples/pc_sampling/single-user-host-trap.cpp b/projects/rocprofiler-sdk/samples/pc_sampling/single-user-host-trap.cpp new file mode 100644 index 0000000000..cc1fa5a7d7 --- /dev/null +++ b/projects/rocprofiler-sdk/samples/pc_sampling/single-user-host-trap.cpp @@ -0,0 +1,61 @@ +// Vladimir: The example of using Host-trap PC sampling exclusively on the system with single MI200. +// If any of the rocprofiler calls returns status fail, we simply stop the application. + +#include +#include "common.h" + +int +main(int /*argc*/, char** /*argv*/) +{ + rocprofiler_status_t status; + + // creating a context + rocprofiler_context_id_t context_id; + ROCPROFILER_CALL(rocprofiler_create_context(&context_id), "Cannot create context\n"); + + rocprofiler_agent_t gpu_agent = find_first_gpu_agent(); + + // creating a buffer that will hold pc sampling information + rocprofiler_buffer_policy_t drop_buffer_action = ROCPROFILER_BUFFER_POLICY_DISCARD; + rocprofiler_buffer_id_t buffer_id; + ROCPROFILER_CALL(rocprofiler_create_buffer(context_id, + BUFFER_SIZE_BYTES, + WATERMARK, + drop_buffer_action, + rocprofiler_pc_sampling_callback, + nullptr, + &buffer_id), + "Cannot create pc sampling buffer"); + + // PC sampling service configuration + rocprofiler_pc_sampling_method_t sampling_method = ROCPROFILER_PC_SAMPLING_METHOD_HOST_TRAP; + rocprofiler_pc_sampling_unit_t sampling_unit = ROCPROFILER_PC_SAMPLING_UNIT_TIME; + // What units are we using for time? ms, micro secs, ns? + uint64_t interval = 1000; // I assumed micro secs, so this should be 1ms + // Instantiating the PC sampling service + ROCPROFILER_CALL( + rocprofiler_configure_pc_sampling_service( + context_id, gpu_agent, sampling_method, sampling_unit, interval, buffer_id), + "Cannot create PC sampling service"); + + // Vladimir: Is this the place of retrying if someone already created the + // configuration and the previous call fails? + + // Starting the context that should trigger PC sampling? + ROCPROFILER_CALL(rocprofiler_start_context(context_id), "Cannot start PC sampling context"); + + // Running the applicaiton + run_HIP_app(); + + // Stop the context that should stop PC sampling? + ROCPROFILER_CALL(rocprofiler_stop_context(context_id), "Cannot start PC sampling context"); + + // Explicit buffer flush, before destroying it + ROCPROFILER_CALL(rocprofiler_flush_buffer(buffer_id), "Cannot destroy buffer"); + // Destroying the buffer + ROCPROFILER_CALL(rocprofiler_destroy_buffer(buffer_id), "Cannot destroy buffer"); + + // Vladimir: Do we need to destroy context or a service? + + return 0; +} diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler/CMakeLists.txt b/projects/rocprofiler-sdk/source/include/rocprofiler/CMakeLists.txt index ce489644af..151b3616e2 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler/CMakeLists.txt +++ b/projects/rocprofiler-sdk/source/include/rocprofiler/CMakeLists.txt @@ -3,6 +3,15 @@ # Installation of public headers # # -set(ROCPROFILER_INCLUDE_FILES config.h rocprofiler.h rocprofiler_plugin.h) -install(FILES ${ROCPROFILER_INCLUDE_FILES} +configure_file(${CMAKE_CURRENT_LIST_DIR}/version.h.in + ${CMAKE_CURRENT_BINARY_DIR}/version.h @ONLY) + +set(ROCPROFILER_HEADER_FILES config.h defines.h hip.h hsa.h marker.h rocprofiler.h + rocprofiler_plugin.h ${CMAKE_CURRENT_BINARY_DIR}/version.h) + +install(FILES ${ROCPROFILER_HEADER_FILES} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/rocprofiler) + +add_subdirectory(hip) +add_subdirectory(hsa) +add_subdirectory(marker) diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler/defines.h b/projects/rocprofiler-sdk/source/include/rocprofiler/defines.h new file mode 100644 index 0000000000..41da65489a --- /dev/null +++ b/projects/rocprofiler-sdk/source/include/rocprofiler/defines.h @@ -0,0 +1,97 @@ +// MIT License +// +// Copyright (c) 2023 ROCm Developer Tools +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#if !defined(ROCPROFILER_ATTRIBUTE) +# if defined(_MSC_VER) +# define ROCPROFILER_ATTRIBUTE(...) __declspec(__VA_ARGS__) +# else +# define ROCPROFILER_ATTRIBUTE(...) __attribute__((__VA_ARGS__)) +# endif +#endif + +#if !defined(ROCPROFILER_PUBLIC_API) +# if defined(_MSC_VER) +# define ROCPROFILER_PUBLIC_API ROCPROFILER_ATTRIBUTE(dllexport) +# else +# define ROCPROFILER_PUBLIC_API ROCPROFILER_ATTRIBUTE(visibility("default")) +# endif +#endif + +#if !defined(ROCPROFILER_HIDDEN_API) +# if defined(_MSC_VER) +# define ROCPROFILER_HIDDEN_API +# else +# define ROCPROFILER_HIDDEN_API ROCPROFILER_ATTRIBUTE(visibility("hidden")) +# endif +#endif + +#if !defined(ROCPROFILER_EXPORT_DECORATOR) +# define ROCPROFILER_EXPORT_DECORATOR ROCPROFILER_PUBLIC_API +#endif + +#if !defined(ROCPROFILER_IMPORT_DECORATOR) +# if defined(_MSC_VER) +# define ROCPROFILER_IMPORT_DECORATOR ROCPROFILER_ATTRIBUTE(dllimport) +# else +# define ROCPROFILER_IMPORT_DECORATOR +# endif +#endif + +#define ROCPROFILER_EXPORT ROCPROFILER_EXPORT_DECORATOR +#define ROCPROFILER_IMPORT ROCPROFILER_IMPORT_DECORATOR + +#if !defined(ROCPROFILER_API) +# if defined(rocprofiler_EXPORTS) +# define ROCPROFILER_API ROCPROFILER_EXPORT +# else +# define ROCPROFILER_API ROCPROFILER_IMPORT +# endif +#endif + +#if defined(__has_attribute) +# if __has_attribute(nonnull) +# define ROCPROFILER_NONNULL(...) __attribute__((nonnull(__VA_ARGS__))) +# else +# define ROCPROFILER_NONNULL(...) +# endif +#else +# if defined(__GNUC__) +# define ROCPROFILER_NONNULL(...) __attribute__((nonnull(__VA_ARGS__))) +# else +# define ROCPROFILER_NONNULL(...) +# endif +#endif + +#if __cplusplus >= 201103L // C++11 +/* c++11 allows extended initializer lists. */ +# define ROCPROFILER_HANDLE_LITERAL(type, value) (type{value}) +#elif __STDC_VERSION__ >= 199901L +/* c99 allows compound literals. */ +# define ROCPROFILER_HANDLE_LITERAL(type, value) ((type){value}) +#else +# define ROCPROFILER_HANDLE_LITERAL(type, value) \ + { \ + value \ + } +#endif diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler/hip.h b/projects/rocprofiler-sdk/source/include/rocprofiler/hip.h new file mode 100644 index 0000000000..45d56a1431 --- /dev/null +++ b/projects/rocprofiler-sdk/source/include/rocprofiler/hip.h @@ -0,0 +1,51 @@ +// MIT License +// +// Copyright (c) 2023 ROCm Developer Tools +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include +#include + +#include + +typedef uint32_t rocprofiler_trace_record_hip_operation_kind_t; +typedef struct rocprofiler_hip_trace_data_s rocprofiler_hip_trace_data_t; +typedef struct rocprofiler_hip_api_data_s rocprofiler_hip_api_data_t; + +struct rocprofiler_hip_api_data_s +{ + uint64_t correlation_id; + uint32_t phase; + rocprofiler_hip_api_args_t args; + uint64_t* phase_data; +}; + +struct rocprofiler_hip_trace_data_s +{ + rocprofiler_hip_api_data_t api_data; + uint64_t phase_enter_timestamp; + uint64_t phase_exit_timestamp; + uint64_t phase_data; + + void (*phase_enter)(rocprofiler_hip_api_id_t operation_id, rocprofiler_hip_trace_data_t* data); + void (*phase_exit)(rocprofiler_hip_api_id_t operation_id, rocprofiler_hip_trace_data_t* data); +}; diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler/hip/CMakeLists.txt b/projects/rocprofiler-sdk/source/include/rocprofiler/hip/CMakeLists.txt new file mode 100644 index 0000000000..095e42ac9d --- /dev/null +++ b/projects/rocprofiler-sdk/source/include/rocprofiler/hip/CMakeLists.txt @@ -0,0 +1,9 @@ +# +# +# Installation of public HIP headers +# +# +set(ROCPROFILER_HIP_HEADER_FILES api_args.h api_id.h) + +install(FILES ${ROCPROFILER_HIP_HEADER_FILES} + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/rocprofiler/hip) diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler/hip/api_args.h b/projects/rocprofiler-sdk/source/include/rocprofiler/hip/api_args.h new file mode 100644 index 0000000000..79e62d5aa5 --- /dev/null +++ b/projects/rocprofiler-sdk/source/include/rocprofiler/hip/api_args.h @@ -0,0 +1,2167 @@ +// Copyright (c) 2018-2023 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include + +typedef union rocprofiler_hip_api_args_u +{ + struct + { + dim3* gridDim; + dim3* blockDim; + size_t* sharedMem; + hipStream_t* stream; + } __hipPopCallConfiguration; + struct + { + dim3 gridDim; + dim3 blockDim; + size_t sharedMem; + hipStream_t stream; + } __hipPushCallConfiguration; + struct + { + hipArray** array; + const HIP_ARRAY3D_DESCRIPTOR* pAllocateArray; + } hipArray3DCreate; + struct + { + HIP_ARRAY3D_DESCRIPTOR* pArrayDescriptor; + hipArray* array; + } hipArray3DGetDescriptor; + struct + { + hipArray** pHandle; + const HIP_ARRAY_DESCRIPTOR* pAllocateArray; + } hipArrayCreate; + struct + { + hipArray* array; + } hipArrayDestroy; + struct + { + HIP_ARRAY_DESCRIPTOR* pArrayDescriptor; + hipArray* array; + } hipArrayGetDescriptor; + struct + { + hipChannelFormatDesc* desc; + hipExtent* extent; + unsigned int* flags; + hipArray* array; + } hipArrayGetInfo; + struct + { + int* device; + const hipDeviceProp_t* prop; + } hipChooseDevice; + struct + { + dim3 gridDim; + dim3 blockDim; + size_t sharedMem; + hipStream_t stream; + } hipConfigureCall; + struct + { + hipSurfaceObject_t* pSurfObject; + const hipResourceDesc* pResDesc; + } hipCreateSurfaceObject; + struct + { + hipCtx_t* ctx; + unsigned int flags; + hipDevice_t device; + } hipCtxCreate; + struct + { + hipCtx_t ctx; + } hipCtxDestroy; + struct + { + hipCtx_t peerCtx; + } hipCtxDisablePeerAccess; + struct + { + hipCtx_t peerCtx; + unsigned int flags; + } hipCtxEnablePeerAccess; + struct + { + hipCtx_t ctx; + int* apiVersion; + } hipCtxGetApiVersion; + struct + { + hipFuncCache_t* cacheConfig; + } hipCtxGetCacheConfig; + struct + { + hipCtx_t* ctx; + } hipCtxGetCurrent; + struct + { + hipDevice_t* device; + } hipCtxGetDevice; + struct + { + unsigned int* flags; + } hipCtxGetFlags; + struct + { + hipSharedMemConfig* pConfig; + } hipCtxGetSharedMemConfig; + struct + { + hipCtx_t* ctx; + } hipCtxPopCurrent; + struct + { + hipCtx_t ctx; + } hipCtxPushCurrent; + struct + { + hipFuncCache_t cacheConfig; + } hipCtxSetCacheConfig; + struct + { + hipCtx_t ctx; + } hipCtxSetCurrent; + struct + { + hipSharedMemConfig config; + } hipCtxSetSharedMemConfig; + struct + { + hipExternalMemory_t extMem; + } hipDestroyExternalMemory; + struct + { + hipExternalSemaphore_t extSem; + } hipDestroyExternalSemaphore; + struct + { + hipSurfaceObject_t surfaceObject; + } hipDestroySurfaceObject; + struct + { + int* canAccessPeer; + int deviceId; + int peerDeviceId; + } hipDeviceCanAccessPeer; + struct + { + int* major; + int* minor; + hipDevice_t device; + } hipDeviceComputeCapability; + struct + { + int peerDeviceId; + } hipDeviceDisablePeerAccess; + struct + { + int peerDeviceId; + unsigned int flags; + } hipDeviceEnablePeerAccess; + struct + { + hipDevice_t* device; + int ordinal; + } hipDeviceGet; + struct + { + int* pi; + hipDeviceAttribute_t attr; + int deviceId; + } hipDeviceGetAttribute; + struct + { + int* device; + const char* pciBusId; + } hipDeviceGetByPCIBusId; + struct + { + hipFuncCache_t* cacheConfig; + } hipDeviceGetCacheConfig; + struct + { + hipMemPool_t* mem_pool; + int device; + } hipDeviceGetDefaultMemPool; + struct + { + int device; + hipGraphMemAttributeType attr; + void* value; + } hipDeviceGetGraphMemAttribute; + struct + { + size_t* pValue; + enum hipLimit_t limit; + } hipDeviceGetLimit; + struct + { + hipMemPool_t* mem_pool; + int device; + } hipDeviceGetMemPool; + struct + { + char* name; + int len; + hipDevice_t device; + } hipDeviceGetName; + struct + { + int* value; + hipDeviceP2PAttr attr; + int srcDevice; + int dstDevice; + } hipDeviceGetP2PAttribute; + struct + { + char* pciBusId; + int len; + int device; + } hipDeviceGetPCIBusId; + struct + { + hipSharedMemConfig* pConfig; + } hipDeviceGetSharedMemConfig; + struct + { + int* leastPriority; + int* greatestPriority; + } hipDeviceGetStreamPriorityRange; + struct + { + hipUUID* uuid; + hipDevice_t device; + } hipDeviceGetUuid; + struct + { + int device; + } hipDeviceGraphMemTrim; + struct + { + hipDevice_t dev; + unsigned int* flags; + int* active; + } hipDevicePrimaryCtxGetState; + struct + { + hipDevice_t dev; + } hipDevicePrimaryCtxRelease; + struct + { + hipDevice_t dev; + } hipDevicePrimaryCtxReset; + struct + { + hipCtx_t* pctx; + hipDevice_t dev; + } hipDevicePrimaryCtxRetain; + struct + { + hipDevice_t dev; + unsigned int flags; + } hipDevicePrimaryCtxSetFlags; + struct + { + hipFuncCache_t cacheConfig; + } hipDeviceSetCacheConfig; + struct + { + int device; + hipGraphMemAttributeType attr; + void* value; + } hipDeviceSetGraphMemAttribute; + struct + { + enum hipLimit_t limit; + size_t value; + } hipDeviceSetLimit; + struct + { + int device; + hipMemPool_t mem_pool; + } hipDeviceSetMemPool; + struct + { + hipSharedMemConfig config; + } hipDeviceSetSharedMemConfig; + struct + { + size_t* bytes; + hipDevice_t device; + } hipDeviceTotalMem; + struct + { + int* driverVersion; + } hipDriverGetVersion; + struct + { + const hip_Memcpy2D* pCopy; + } hipDrvMemcpy2DUnaligned; + struct + { + const HIP_MEMCPY3D* pCopy; + } hipDrvMemcpy3D; + struct + { + const HIP_MEMCPY3D* pCopy; + hipStream_t stream; + } hipDrvMemcpy3DAsync; + struct + { + unsigned int numAttributes; + hipPointer_attribute* attributes; + void** data; + hipDeviceptr_t ptr; + } hipDrvPointerGetAttributes; + struct + { + hipEvent_t* event; + } hipEventCreate; + struct + { + hipEvent_t* event; + unsigned int flags; + } hipEventCreateWithFlags; + struct + { + hipEvent_t event; + } hipEventDestroy; + struct + { + float* ms; + hipEvent_t start; + hipEvent_t stop; + } hipEventElapsedTime; + struct + { + hipEvent_t event; + } hipEventQuery; + struct + { + hipEvent_t event; + hipStream_t stream; + } hipEventRecord; + struct + { + hipEvent_t event; + } hipEventSynchronize; + struct + { + int device1; + int device2; + unsigned int* linktype; + unsigned int* hopcount; + } hipExtGetLinkTypeAndHopCount; + struct + { + const void* function_address; + dim3 numBlocks; + dim3 dimBlocks; + void** args; + size_t sharedMemBytes; + hipStream_t stream; + hipEvent_t startEvent; + hipEvent_t stopEvent; + int flags; + } hipExtLaunchKernel; + struct + { + hipLaunchParams* launchParamsList; + int numDevices; + unsigned int flags; + } hipExtLaunchMultiKernelMultiDevice; + struct + { + void** ptr; + size_t sizeBytes; + unsigned int flags; + } hipExtMallocWithFlags; + struct + { + hipFunction_t f; + unsigned int globalWorkSizeX; + unsigned int globalWorkSizeY; + unsigned int globalWorkSizeZ; + unsigned int localWorkSizeX; + unsigned int localWorkSizeY; + unsigned int localWorkSizeZ; + size_t sharedMemBytes; + hipStream_t hStream; + void** kernelParams; + void** extra; + hipEvent_t startEvent; + hipEvent_t stopEvent; + unsigned int flags; + } hipExtModuleLaunchKernel; + struct + { + hipStream_t* stream; + unsigned int cuMaskSize; + const unsigned int* cuMask; + } hipExtStreamCreateWithCUMask; + struct + { + hipStream_t stream; + unsigned int cuMaskSize; + unsigned int* cuMask; + } hipExtStreamGetCUMask; + struct + { + void** devPtr; + hipExternalMemory_t extMem; + const hipExternalMemoryBufferDesc* bufferDesc; + } hipExternalMemoryGetMappedBuffer; + struct + { + void* ptr; + } hipFree; + struct + { + hipArray* array; + } hipFreeArray; + struct + { + void* dev_ptr; + hipStream_t stream; + } hipFreeAsync; + struct + { + void* ptr; + } hipFreeHost; + struct + { + hipMipmappedArray_t mipmappedArray; + } hipFreeMipmappedArray; + struct + { + int* value; + hipFunction_attribute attrib; + hipFunction_t hfunc; + } hipFuncGetAttribute; + struct + { + hipFuncAttributes* attr; + const void* func; + } hipFuncGetAttributes; + struct + { + const void* func; + hipFuncAttribute attr; + int value; + } hipFuncSetAttribute; + struct + { + const void* func; + hipFuncCache_t config; + } hipFuncSetCacheConfig; + struct + { + const void* func; + hipSharedMemConfig config; + } hipFuncSetSharedMemConfig; + struct + { + unsigned int* pHipDeviceCount; + int* pHipDevices; + unsigned int hipDeviceCount; + hipGLDeviceList deviceList; + } hipGLGetDevices; + struct + { + hipChannelFormatDesc* desc; + hipArray_const_t array; + } hipGetChannelDesc; + struct + { + int* deviceId; + } hipGetDevice; + struct + { + int* count; + } hipGetDeviceCount; + struct + { + unsigned int* flags; + } hipGetDeviceFlags; + struct + { + hipDeviceProp_t* props; + hipDevice_t device; + } hipGetDeviceProperties; + struct + { + hipArray_t* levelArray; + hipMipmappedArray_const_t mipmappedArray; + unsigned int level; + } hipGetMipmappedArrayLevel; + struct + { + void** devPtr; + const void* symbol; + } hipGetSymbolAddress; + struct + { + size_t* size; + const void* symbol; + } hipGetSymbolSize; + struct + { + hipGraphNode_t* pGraphNode; + hipGraph_t graph; + const hipGraphNode_t* pDependencies; + size_t numDependencies; + hipGraph_t childGraph; + } hipGraphAddChildGraphNode; + struct + { + hipGraph_t graph; + const hipGraphNode_t* from; + const hipGraphNode_t* to; + size_t numDependencies; + } hipGraphAddDependencies; + struct + { + hipGraphNode_t* pGraphNode; + hipGraph_t graph; + const hipGraphNode_t* pDependencies; + size_t numDependencies; + } hipGraphAddEmptyNode; + struct + { + hipGraphNode_t* pGraphNode; + hipGraph_t graph; + const hipGraphNode_t* pDependencies; + size_t numDependencies; + hipEvent_t event; + } hipGraphAddEventRecordNode; + struct + { + hipGraphNode_t* pGraphNode; + hipGraph_t graph; + const hipGraphNode_t* pDependencies; + size_t numDependencies; + hipEvent_t event; + } hipGraphAddEventWaitNode; + struct + { + hipGraphNode_t* pGraphNode; + hipGraph_t graph; + const hipGraphNode_t* pDependencies; + size_t numDependencies; + const hipHostNodeParams* pNodeParams; + } hipGraphAddHostNode; + struct + { + hipGraphNode_t* pGraphNode; + hipGraph_t graph; + const hipGraphNode_t* pDependencies; + size_t numDependencies; + const hipKernelNodeParams* pNodeParams; + } hipGraphAddKernelNode; + struct + { + hipGraphNode_t* pGraphNode; + hipGraph_t graph; + const hipGraphNode_t* pDependencies; + size_t numDependencies; + hipMemAllocNodeParams* pNodeParams; + } hipGraphAddMemAllocNode; + struct + { + hipGraphNode_t* pGraphNode; + hipGraph_t graph; + const hipGraphNode_t* pDependencies; + size_t numDependencies; + void* dev_ptr; + } hipGraphAddMemFreeNode; + struct + { + hipGraphNode_t* pGraphNode; + hipGraph_t graph; + const hipGraphNode_t* pDependencies; + size_t numDependencies; + const hipMemcpy3DParms* pCopyParams; + } hipGraphAddMemcpyNode; + struct + { + hipGraphNode_t* pGraphNode; + hipGraph_t graph; + const hipGraphNode_t* pDependencies; + size_t numDependencies; + void* dst; + const void* src; + size_t count; + hipMemcpyKind kind; + } hipGraphAddMemcpyNode1D; + struct + { + hipGraphNode_t* pGraphNode; + hipGraph_t graph; + const hipGraphNode_t* pDependencies; + size_t numDependencies; + void* dst; + const void* symbol; + size_t count; + size_t offset; + hipMemcpyKind kind; + } hipGraphAddMemcpyNodeFromSymbol; + struct + { + hipGraphNode_t* pGraphNode; + hipGraph_t graph; + const hipGraphNode_t* pDependencies; + size_t numDependencies; + const void* symbol; + const void* src; + size_t count; + size_t offset; + hipMemcpyKind kind; + } hipGraphAddMemcpyNodeToSymbol; + struct + { + hipGraphNode_t* pGraphNode; + hipGraph_t graph; + const hipGraphNode_t* pDependencies; + size_t numDependencies; + const hipMemsetParams* pMemsetParams; + } hipGraphAddMemsetNode; + struct + { + hipGraphNode_t node; + hipGraph_t* pGraph; + } hipGraphChildGraphNodeGetGraph; + struct + { + hipGraph_t* pGraphClone; + hipGraph_t originalGraph; + } hipGraphClone; + struct + { + hipGraph_t* pGraph; + unsigned int flags; + } hipGraphCreate; + struct + { + hipGraph_t graph; + const char* path; + unsigned int flags; + } hipGraphDebugDotPrint; + struct + { + hipGraph_t graph; + } hipGraphDestroy; + struct + { + hipGraphNode_t node; + } hipGraphDestroyNode; + struct + { + hipGraphNode_t node; + hipEvent_t* event_out; + } hipGraphEventRecordNodeGetEvent; + struct + { + hipGraphNode_t node; + hipEvent_t event; + } hipGraphEventRecordNodeSetEvent; + struct + { + hipGraphNode_t node; + hipEvent_t* event_out; + } hipGraphEventWaitNodeGetEvent; + struct + { + hipGraphNode_t node; + hipEvent_t event; + } hipGraphEventWaitNodeSetEvent; + struct + { + hipGraphExec_t hGraphExec; + hipGraphNode_t node; + hipGraph_t childGraph; + } hipGraphExecChildGraphNodeSetParams; + struct + { + hipGraphExec_t graphExec; + } hipGraphExecDestroy; + struct + { + hipGraphExec_t hGraphExec; + hipGraphNode_t hNode; + hipEvent_t event; + } hipGraphExecEventRecordNodeSetEvent; + struct + { + hipGraphExec_t hGraphExec; + hipGraphNode_t hNode; + hipEvent_t event; + } hipGraphExecEventWaitNodeSetEvent; + struct + { + hipGraphExec_t hGraphExec; + hipGraphNode_t node; + const hipHostNodeParams* pNodeParams; + } hipGraphExecHostNodeSetParams; + struct + { + hipGraphExec_t hGraphExec; + hipGraphNode_t node; + const hipKernelNodeParams* pNodeParams; + } hipGraphExecKernelNodeSetParams; + struct + { + hipGraphExec_t hGraphExec; + hipGraphNode_t node; + hipMemcpy3DParms* pNodeParams; + } hipGraphExecMemcpyNodeSetParams; + struct + { + hipGraphExec_t hGraphExec; + hipGraphNode_t node; + void* dst; + const void* src; + size_t count; + hipMemcpyKind kind; + } hipGraphExecMemcpyNodeSetParams1D; + struct + { + hipGraphExec_t hGraphExec; + hipGraphNode_t node; + void* dst; + const void* symbol; + size_t count; + size_t offset; + hipMemcpyKind kind; + } hipGraphExecMemcpyNodeSetParamsFromSymbol; + struct + { + hipGraphExec_t hGraphExec; + hipGraphNode_t node; + const void* symbol; + const void* src; + size_t count; + size_t offset; + hipMemcpyKind kind; + } hipGraphExecMemcpyNodeSetParamsToSymbol; + struct + { + hipGraphExec_t hGraphExec; + hipGraphNode_t node; + const hipMemsetParams* pNodeParams; + } hipGraphExecMemsetNodeSetParams; + struct + { + hipGraphExec_t hGraphExec; + hipGraph_t hGraph; + hipGraphNode_t* hErrorNode_out; + hipGraphExecUpdateResult* updateResult_out; + } hipGraphExecUpdate; + struct + { + hipGraph_t graph; + hipGraphNode_t* from; + hipGraphNode_t* to; + size_t* numEdges; + } hipGraphGetEdges; + struct + { + hipGraph_t graph; + hipGraphNode_t* nodes; + size_t* numNodes; + } hipGraphGetNodes; + struct + { + hipGraph_t graph; + hipGraphNode_t* pRootNodes; + size_t* pNumRootNodes; + } hipGraphGetRootNodes; + struct + { + hipGraphNode_t node; + hipHostNodeParams* pNodeParams; + } hipGraphHostNodeGetParams; + struct + { + hipGraphNode_t node; + const hipHostNodeParams* pNodeParams; + } hipGraphHostNodeSetParams; + struct + { + hipGraphExec_t* pGraphExec; + hipGraph_t graph; + hipGraphNode_t* pErrorNode; + char* pLogBuffer; + size_t bufferSize; + } hipGraphInstantiate; + struct + { + hipGraphExec_t* pGraphExec; + hipGraph_t graph; + unsigned long long flags; + } hipGraphInstantiateWithFlags; + struct + { + hipGraphNode_t hSrc; + hipGraphNode_t hDst; + } hipGraphKernelNodeCopyAttributes; + struct + { + hipGraphNode_t hNode; + hipKernelNodeAttrID attr; + hipKernelNodeAttrValue* value; + } hipGraphKernelNodeGetAttribute; + struct + { + hipGraphNode_t node; + hipKernelNodeParams* pNodeParams; + } hipGraphKernelNodeGetParams; + struct + { + hipGraphNode_t hNode; + hipKernelNodeAttrID attr; + const hipKernelNodeAttrValue* value; + } hipGraphKernelNodeSetAttribute; + struct + { + hipGraphNode_t node; + const hipKernelNodeParams* pNodeParams; + } hipGraphKernelNodeSetParams; + struct + { + hipGraphExec_t graphExec; + hipStream_t stream; + } hipGraphLaunch; + struct + { + hipGraphNode_t node; + hipMemAllocNodeParams* pNodeParams; + } hipGraphMemAllocNodeGetParams; + struct + { + hipGraphNode_t node; + void* dev_ptr; + } hipGraphMemFreeNodeGetParams; + struct + { + hipGraphNode_t node; + hipMemcpy3DParms* pNodeParams; + } hipGraphMemcpyNodeGetParams; + struct + { + hipGraphNode_t node; + const hipMemcpy3DParms* pNodeParams; + } hipGraphMemcpyNodeSetParams; + struct + { + hipGraphNode_t node; + void* dst; + const void* src; + size_t count; + hipMemcpyKind kind; + } hipGraphMemcpyNodeSetParams1D; + struct + { + hipGraphNode_t node; + void* dst; + const void* symbol; + size_t count; + size_t offset; + hipMemcpyKind kind; + } hipGraphMemcpyNodeSetParamsFromSymbol; + struct + { + hipGraphNode_t node; + const void* symbol; + const void* src; + size_t count; + size_t offset; + hipMemcpyKind kind; + } hipGraphMemcpyNodeSetParamsToSymbol; + struct + { + hipGraphNode_t node; + hipMemsetParams* pNodeParams; + } hipGraphMemsetNodeGetParams; + struct + { + hipGraphNode_t node; + const hipMemsetParams* pNodeParams; + } hipGraphMemsetNodeSetParams; + struct + { + hipGraphNode_t* pNode; + hipGraphNode_t originalNode; + hipGraph_t clonedGraph; + } hipGraphNodeFindInClone; + struct + { + hipGraphNode_t node; + hipGraphNode_t* pDependencies; + size_t* pNumDependencies; + } hipGraphNodeGetDependencies; + struct + { + hipGraphNode_t node; + hipGraphNode_t* pDependentNodes; + size_t* pNumDependentNodes; + } hipGraphNodeGetDependentNodes; + struct + { + hipGraphExec_t hGraphExec; + hipGraphNode_t hNode; + unsigned int* isEnabled; + } hipGraphNodeGetEnabled; + struct + { + hipGraphNode_t node; + hipGraphNodeType* pType; + } hipGraphNodeGetType; + struct + { + hipGraphExec_t hGraphExec; + hipGraphNode_t hNode; + unsigned int isEnabled; + } hipGraphNodeSetEnabled; + struct + { + hipGraph_t graph; + hipUserObject_t object; + unsigned int count; + } hipGraphReleaseUserObject; + struct + { + hipGraph_t graph; + const hipGraphNode_t* from; + const hipGraphNode_t* to; + size_t numDependencies; + } hipGraphRemoveDependencies; + struct + { + hipGraph_t graph; + hipUserObject_t object; + unsigned int count; + unsigned int flags; + } hipGraphRetainUserObject; + struct + { + hipGraphExec_t graphExec; + hipStream_t stream; + } hipGraphUpload; + struct + { + hipGraphicsResource** resource; + GLuint buffer; + unsigned int flags; + } hipGraphicsGLRegisterBuffer; + struct + { + hipGraphicsResource** resource; + GLuint image; + GLenum target; + unsigned int flags; + } hipGraphicsGLRegisterImage; + struct + { + int count; + hipGraphicsResource_t* resources; + hipStream_t stream; + } hipGraphicsMapResources; + struct + { + void** devPtr; + size_t* size; + hipGraphicsResource_t resource; + } hipGraphicsResourceGetMappedPointer; + struct + { + hipArray_t* array; + hipGraphicsResource_t resource; + unsigned int arrayIndex; + unsigned int mipLevel; + } hipGraphicsSubResourceGetMappedArray; + struct + { + int count; + hipGraphicsResource_t* resources; + hipStream_t stream; + } hipGraphicsUnmapResources; + struct + { + hipGraphicsResource_t resource; + } hipGraphicsUnregisterResource; + struct + { + hipFunction_t f; + unsigned int globalWorkSizeX; + unsigned int globalWorkSizeY; + unsigned int globalWorkSizeZ; + unsigned int blockDimX; + unsigned int blockDimY; + unsigned int blockDimZ; + size_t sharedMemBytes; + hipStream_t hStream; + void** kernelParams; + void** extra; + hipEvent_t startEvent; + hipEvent_t stopEvent; + } hipHccModuleLaunchKernel; + struct + { + void** ptr; + size_t size; + unsigned int flags; + } hipHostAlloc; + struct + { + void* ptr; + } hipHostFree; + struct + { + void** devPtr; + void* hstPtr; + unsigned int flags; + } hipHostGetDevicePointer; + struct + { + unsigned int* flagsPtr; + void* hostPtr; + } hipHostGetFlags; + struct + { + void** ptr; + size_t size; + unsigned int flags; + } hipHostMalloc; + struct + { + void* hostPtr; + size_t sizeBytes; + unsigned int flags; + } hipHostRegister; + struct + { + void* hostPtr; + } hipHostUnregister; + struct + { + hipExternalMemory_t* extMem_out; + const hipExternalMemoryHandleDesc* memHandleDesc; + } hipImportExternalMemory; + struct + { + hipExternalSemaphore_t* extSem_out; + const hipExternalSemaphoreHandleDesc* semHandleDesc; + } hipImportExternalSemaphore; + struct + { + unsigned int flags; + } hipInit; + struct + { + void* devPtr; + } hipIpcCloseMemHandle; + struct + { + hipIpcEventHandle_t* handle; + hipEvent_t event; + } hipIpcGetEventHandle; + struct + { + hipIpcMemHandle_t* handle; + void* devPtr; + } hipIpcGetMemHandle; + struct + { + hipEvent_t* event; + hipIpcEventHandle_t handle; + } hipIpcOpenEventHandle; + struct + { + void** devPtr; + hipIpcMemHandle_t handle; + unsigned int flags; + } hipIpcOpenMemHandle; + struct + { + const void* hostFunction; + } hipLaunchByPtr; + struct + { + const void* f; + dim3 gridDim; + dim3 blockDimX; + void** kernelParams; + unsigned int sharedMemBytes; + hipStream_t stream; + } hipLaunchCooperativeKernel; + struct + { + hipLaunchParams* launchParamsList; + int numDevices; + unsigned int flags; + } hipLaunchCooperativeKernelMultiDevice; + struct + { + hipStream_t stream; + hipHostFn_t fn; + void* userData; + } hipLaunchHostFunc; + struct + { + const void* function_address; + dim3 numBlocks; + dim3 dimBlocks; + void** args; + size_t sharedMemBytes; + hipStream_t stream; + } hipLaunchKernel; + struct + { + void** ptr; + size_t size; + } hipMalloc; + struct + { + hipPitchedPtr* pitchedDevPtr; + hipExtent extent; + } hipMalloc3D; + struct + { + hipArray_t* array; + const hipChannelFormatDesc* desc; + hipExtent extent; + unsigned int flags; + } hipMalloc3DArray; + struct + { + hipArray** array; + const hipChannelFormatDesc* desc; + size_t width; + size_t height; + unsigned int flags; + } hipMallocArray; + struct + { + void** dev_ptr; + size_t size; + hipStream_t stream; + } hipMallocAsync; + struct + { + void** dev_ptr; + size_t size; + hipMemPool_t mem_pool; + hipStream_t stream; + } hipMallocFromPoolAsync; + struct + { + void** ptr; + size_t size; + } hipMallocHost; + struct + { + void** dev_ptr; + size_t size; + unsigned int flags; + } hipMallocManaged; + struct + { + hipMipmappedArray_t* mipmappedArray; + const hipChannelFormatDesc* desc; + hipExtent extent; + unsigned int numLevels; + unsigned int flags; + } hipMallocMipmappedArray; + struct + { + void** ptr; + size_t* pitch; + size_t width; + size_t height; + } hipMallocPitch; + struct + { + void* devPtr; + size_t size; + } hipMemAddressFree; + struct + { + void** ptr; + size_t size; + size_t alignment; + void* addr; + unsigned long long flags; + } hipMemAddressReserve; + struct + { + const void* dev_ptr; + size_t count; + hipMemoryAdvise advice; + int device; + } hipMemAdvise; + struct + { + void** ptr; + size_t size; + } hipMemAllocHost; + struct + { + hipDeviceptr_t* dptr; + size_t* pitch; + size_t widthInBytes; + size_t height; + unsigned int elementSizeBytes; + } hipMemAllocPitch; + struct + { + hipMemGenericAllocationHandle_t* handle; + size_t size; + const hipMemAllocationProp* prop; + unsigned long long flags; + } hipMemCreate; + struct + { + void* shareableHandle; + hipMemGenericAllocationHandle_t handle; + hipMemAllocationHandleType handleType; + unsigned long long flags; + } hipMemExportToShareableHandle; + struct + { + unsigned long long* flags; + const hipMemLocation* location; + void* ptr; + } hipMemGetAccess; + struct + { + hipDeviceptr_t* pbase; + size_t* psize; + hipDeviceptr_t dptr; + } hipMemGetAddressRange; + struct + { + size_t* granularity; + const hipMemAllocationProp* prop; + hipMemAllocationGranularity_flags option; + } hipMemGetAllocationGranularity; + struct + { + hipMemAllocationProp* prop; + hipMemGenericAllocationHandle_t handle; + } hipMemGetAllocationPropertiesFromHandle; + struct + { + size_t* free; + size_t* total; + } hipMemGetInfo; + struct + { + hipMemGenericAllocationHandle_t* handle; + void* osHandle; + hipMemAllocationHandleType shHandleType; + } hipMemImportFromShareableHandle; + struct + { + void* ptr; + size_t size; + size_t offset; + hipMemGenericAllocationHandle_t handle; + unsigned long long flags; + } hipMemMap; + struct + { + hipArrayMapInfo* mapInfoList; + unsigned int count; + hipStream_t stream; + } hipMemMapArrayAsync; + struct + { + hipMemPool_t* mem_pool; + const hipMemPoolProps* pool_props; + } hipMemPoolCreate; + struct + { + hipMemPool_t mem_pool; + } hipMemPoolDestroy; + struct + { + hipMemPoolPtrExportData* export_data; + void* dev_ptr; + } hipMemPoolExportPointer; + struct + { + void* shared_handle; + hipMemPool_t mem_pool; + hipMemAllocationHandleType handle_type; + unsigned int flags; + } hipMemPoolExportToShareableHandle; + struct + { + hipMemAccessFlags* flags; + hipMemPool_t mem_pool; + hipMemLocation* location; + } hipMemPoolGetAccess; + struct + { + hipMemPool_t mem_pool; + hipMemPoolAttr attr; + void* value; + } hipMemPoolGetAttribute; + struct + { + hipMemPool_t* mem_pool; + void* shared_handle; + hipMemAllocationHandleType handle_type; + unsigned int flags; + } hipMemPoolImportFromShareableHandle; + struct + { + void** dev_ptr; + hipMemPool_t mem_pool; + hipMemPoolPtrExportData* export_data; + } hipMemPoolImportPointer; + struct + { + hipMemPool_t mem_pool; + const hipMemAccessDesc* desc_list; + size_t count; + } hipMemPoolSetAccess; + struct + { + hipMemPool_t mem_pool; + hipMemPoolAttr attr; + void* value; + } hipMemPoolSetAttribute; + struct + { + hipMemPool_t mem_pool; + size_t min_bytes_to_hold; + } hipMemPoolTrimTo; + struct + { + const void* dev_ptr; + size_t count; + int device; + hipStream_t stream; + } hipMemPrefetchAsync; + struct + { + void* ptr; + size_t* size; + } hipMemPtrGetInfo; + struct + { + void* data; + size_t data_size; + hipMemRangeAttribute attribute; + const void* dev_ptr; + size_t count; + } hipMemRangeGetAttribute; + struct + { + void** data; + size_t* data_sizes; + hipMemRangeAttribute* attributes; + size_t num_attributes; + const void* dev_ptr; + size_t count; + } hipMemRangeGetAttributes; + struct + { + hipMemGenericAllocationHandle_t handle; + } hipMemRelease; + struct + { + hipMemGenericAllocationHandle_t* handle; + void* addr; + } hipMemRetainAllocationHandle; + struct + { + void* ptr; + size_t size; + const hipMemAccessDesc* desc; + size_t count; + } hipMemSetAccess; + struct + { + void* ptr; + size_t size; + } hipMemUnmap; + struct + { + void* dst; + const void* src; + size_t sizeBytes; + hipMemcpyKind kind; + } hipMemcpy; + struct + { + void* dst; + size_t dpitch; + const void* src; + size_t spitch; + size_t width; + size_t height; + hipMemcpyKind kind; + } hipMemcpy2D; + struct + { + void* dst; + size_t dpitch; + const void* src; + size_t spitch; + size_t width; + size_t height; + hipMemcpyKind kind; + hipStream_t stream; + } hipMemcpy2DAsync; + struct + { + void* dst; + size_t dpitch; + hipArray_const_t src; + size_t wOffset; + size_t hOffset; + size_t width; + size_t height; + hipMemcpyKind kind; + } hipMemcpy2DFromArray; + struct + { + void* dst; + size_t dpitch; + hipArray_const_t src; + size_t wOffset; + size_t hOffset; + size_t width; + size_t height; + hipMemcpyKind kind; + hipStream_t stream; + } hipMemcpy2DFromArrayAsync; + struct + { + hipArray* dst; + size_t wOffset; + size_t hOffset; + const void* src; + size_t spitch; + size_t width; + size_t height; + hipMemcpyKind kind; + } hipMemcpy2DToArray; + struct + { + hipArray* dst; + size_t wOffset; + size_t hOffset; + const void* src; + size_t spitch; + size_t width; + size_t height; + hipMemcpyKind kind; + hipStream_t stream; + } hipMemcpy2DToArrayAsync; + struct + { + const hipMemcpy3DParms* p; + } hipMemcpy3D; + struct + { + const hipMemcpy3DParms* p; + hipStream_t stream; + } hipMemcpy3DAsync; + struct + { + void* dst; + const void* src; + size_t sizeBytes; + hipMemcpyKind kind; + hipStream_t stream; + } hipMemcpyAsync; + struct + { + void* dst; + hipArray* srcArray; + size_t srcOffset; + size_t count; + } hipMemcpyAtoH; + struct + { + hipDeviceptr_t dst; + hipDeviceptr_t src; + size_t sizeBytes; + } hipMemcpyDtoD; + struct + { + hipDeviceptr_t dst; + hipDeviceptr_t src; + size_t sizeBytes; + hipStream_t stream; + } hipMemcpyDtoDAsync; + struct + { + void* dst; + hipDeviceptr_t src; + size_t sizeBytes; + } hipMemcpyDtoH; + struct + { + void* dst; + hipDeviceptr_t src; + size_t sizeBytes; + hipStream_t stream; + } hipMemcpyDtoHAsync; + struct + { + void* dst; + hipArray_const_t srcArray; + size_t wOffset; + size_t hOffset; + size_t count; + hipMemcpyKind kind; + } hipMemcpyFromArray; + struct + { + void* dst; + const void* symbol; + size_t sizeBytes; + size_t offset; + hipMemcpyKind kind; + } hipMemcpyFromSymbol; + struct + { + void* dst; + const void* symbol; + size_t sizeBytes; + size_t offset; + hipMemcpyKind kind; + hipStream_t stream; + } hipMemcpyFromSymbolAsync; + struct + { + hipArray* dstArray; + size_t dstOffset; + const void* srcHost; + size_t count; + } hipMemcpyHtoA; + struct + { + hipDeviceptr_t dst; + void* src; + size_t sizeBytes; + } hipMemcpyHtoD; + struct + { + hipDeviceptr_t dst; + void* src; + size_t sizeBytes; + hipStream_t stream; + } hipMemcpyHtoDAsync; + struct + { + const hip_Memcpy2D* pCopy; + } hipMemcpyParam2D; + struct + { + const hip_Memcpy2D* pCopy; + hipStream_t stream; + } hipMemcpyParam2DAsync; + struct + { + void* dst; + int dstDeviceId; + const void* src; + int srcDeviceId; + size_t sizeBytes; + } hipMemcpyPeer; + struct + { + void* dst; + int dstDeviceId; + const void* src; + int srcDevice; + size_t sizeBytes; + hipStream_t stream; + } hipMemcpyPeerAsync; + struct + { + hipArray* dst; + size_t wOffset; + size_t hOffset; + const void* src; + size_t count; + hipMemcpyKind kind; + } hipMemcpyToArray; + struct + { + const void* symbol; + const void* src; + size_t sizeBytes; + size_t offset; + hipMemcpyKind kind; + } hipMemcpyToSymbol; + struct + { + const void* symbol; + const void* src; + size_t sizeBytes; + size_t offset; + hipMemcpyKind kind; + hipStream_t stream; + } hipMemcpyToSymbolAsync; + struct + { + void* dst; + const void* src; + size_t sizeBytes; + hipMemcpyKind kind; + hipStream_t stream; + } hipMemcpyWithStream; + struct + { + void* dst; + int value; + size_t sizeBytes; + } hipMemset; + struct + { + void* dst; + size_t pitch; + int value; + size_t width; + size_t height; + } hipMemset2D; + struct + { + void* dst; + size_t pitch; + int value; + size_t width; + size_t height; + hipStream_t stream; + } hipMemset2DAsync; + struct + { + hipPitchedPtr pitchedDevPtr; + int value; + hipExtent extent; + } hipMemset3D; + struct + { + hipPitchedPtr pitchedDevPtr; + int value; + hipExtent extent; + hipStream_t stream; + } hipMemset3DAsync; + struct + { + void* dst; + int value; + size_t sizeBytes; + hipStream_t stream; + } hipMemsetAsync; + struct + { + hipDeviceptr_t dest; + unsigned short value; + size_t count; + } hipMemsetD16; + struct + { + hipDeviceptr_t dest; + unsigned short value; + size_t count; + hipStream_t stream; + } hipMemsetD16Async; + struct + { + hipDeviceptr_t dest; + int value; + size_t count; + } hipMemsetD32; + struct + { + hipDeviceptr_t dst; + int value; + size_t count; + hipStream_t stream; + } hipMemsetD32Async; + struct + { + hipDeviceptr_t dest; + unsigned char value; + size_t count; + } hipMemsetD8; + struct + { + hipDeviceptr_t dest; + unsigned char value; + size_t count; + hipStream_t stream; + } hipMemsetD8Async; + struct + { + hipMipmappedArray_t* pHandle; + HIP_ARRAY3D_DESCRIPTOR* pMipmappedArrayDesc; + unsigned int numMipmapLevels; + } hipMipmappedArrayCreate; + struct + { + hipMipmappedArray_t hMipmappedArray; + } hipMipmappedArrayDestroy; + struct + { + hipArray_t* pLevelArray; + hipMipmappedArray_t hMipMappedArray; + unsigned int level; + } hipMipmappedArrayGetLevel; + struct + { + hipFunction_t* function; + hipModule_t module; + const char* kname; + } hipModuleGetFunction; + struct + { + hipDeviceptr_t* dptr; + size_t* bytes; + hipModule_t hmod; + const char* name; + } hipModuleGetGlobal; + struct + { + textureReference** texRef; + hipModule_t hmod; + const char* name; + } hipModuleGetTexRef; + struct + { + hipFunction_t f; + unsigned int gridDimX; + unsigned int gridDimY; + unsigned int gridDimZ; + unsigned int blockDimX; + unsigned int blockDimY; + unsigned int blockDimZ; + unsigned int sharedMemBytes; + hipStream_t stream; + void** kernelParams; + } hipModuleLaunchCooperativeKernel; + struct + { + hipFunctionLaunchParams* launchParamsList; + unsigned int numDevices; + unsigned int flags; + } hipModuleLaunchCooperativeKernelMultiDevice; + struct + { + hipFunction_t f; + unsigned int gridDimX; + unsigned int gridDimY; + unsigned int gridDimZ; + unsigned int blockDimX; + unsigned int blockDimY; + unsigned int blockDimZ; + unsigned int sharedMemBytes; + hipStream_t stream; + void** kernelParams; + void** extra; + } hipModuleLaunchKernel; + struct + { + hipModule_t* module; + const char* fname; + } hipModuleLoad; + struct + { + hipModule_t* module; + const void* image; + } hipModuleLoadData; + struct + { + hipModule_t* module; + const void* image; + unsigned int numOptions; + hipJitOption* options; + void** optionsValues; + } hipModuleLoadDataEx; + struct + { + int* numBlocks; + hipFunction_t f; + int blockSize; + size_t dynSharedMemPerBlk; + } hipModuleOccupancyMaxActiveBlocksPerMultiprocessor; + struct + { + int* numBlocks; + hipFunction_t f; + int blockSize; + size_t dynSharedMemPerBlk; + unsigned int flags; + } hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags; + struct + { + int* gridSize; + int* blockSize; + hipFunction_t f; + size_t dynSharedMemPerBlk; + int blockSizeLimit; + } hipModuleOccupancyMaxPotentialBlockSize; + struct + { + int* gridSize; + int* blockSize; + hipFunction_t f; + size_t dynSharedMemPerBlk; + int blockSizeLimit; + unsigned int flags; + } hipModuleOccupancyMaxPotentialBlockSizeWithFlags; + struct + { + hipModule_t module; + } hipModuleUnload; + struct + { + int* numBlocks; + const void* f; + int blockSize; + size_t dynamicSMemSize; + } hipOccupancyMaxActiveBlocksPerMultiprocessor; + struct + { + int* numBlocks; + const void* f; + int blockSize; + size_t dynamicSMemSize; + unsigned int flags; + } hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags; + struct + { + int* gridSize; + int* blockSize; + const void* f; + size_t dynSharedMemPerBlk; + int blockSizeLimit; + } hipOccupancyMaxPotentialBlockSize; + struct + { + void* data; + hipPointer_attribute attribute; + hipDeviceptr_t ptr; + } hipPointerGetAttribute; + struct + { + hipPointerAttribute_t* attributes; + const void* ptr; + } hipPointerGetAttributes; + struct + { + const void* value; + hipPointer_attribute attribute; + hipDeviceptr_t ptr; + } hipPointerSetAttribute; + struct + { + int* runtimeVersion; + } hipRuntimeGetVersion; + struct + { + int deviceId; + } hipSetDevice; + struct + { + unsigned int flags; + } hipSetDeviceFlags; + struct + { + const void* arg; + size_t size; + size_t offset; + } hipSetupArgument; + struct + { + const hipExternalSemaphore_t* extSemArray; + const hipExternalSemaphoreSignalParams* paramsArray; + unsigned int numExtSems; + hipStream_t stream; + } hipSignalExternalSemaphoresAsync; + struct + { + hipStream_t stream; + hipStreamCallback_t callback; + void* userData; + unsigned int flags; + } hipStreamAddCallback; + struct + { + hipStream_t stream; + void* dev_ptr; + size_t length; + unsigned int flags; + } hipStreamAttachMemAsync; + struct + { + hipStream_t stream; + hipStreamCaptureMode mode; + } hipStreamBeginCapture; + struct + { + hipStream_t* stream; + } hipStreamCreate; + struct + { + hipStream_t* stream; + unsigned int flags; + } hipStreamCreateWithFlags; + struct + { + hipStream_t* stream; + unsigned int flags; + int priority; + } hipStreamCreateWithPriority; + struct + { + hipStream_t stream; + } hipStreamDestroy; + struct + { + hipStream_t stream; + hipGraph_t* pGraph; + } hipStreamEndCapture; + struct + { + hipStream_t stream; + hipStreamCaptureStatus* pCaptureStatus; + unsigned long long* pId; + } hipStreamGetCaptureInfo; + struct + { + hipStream_t stream; + hipStreamCaptureStatus* captureStatus_out; + unsigned long long* id_out; + hipGraph_t* graph_out; + const hipGraphNode_t** dependencies_out; + size_t* numDependencies_out; + } hipStreamGetCaptureInfo_v2; + struct + { + hipStream_t stream; + hipDevice_t* device; + } hipStreamGetDevice; + struct + { + hipStream_t stream; + unsigned int* flags; + } hipStreamGetFlags; + struct + { + hipStream_t stream; + int* priority; + } hipStreamGetPriority; + struct + { + hipStream_t stream; + hipStreamCaptureStatus* pCaptureStatus; + } hipStreamIsCapturing; + struct + { + hipStream_t stream; + } hipStreamQuery; + struct + { + hipStream_t stream; + } hipStreamSynchronize; + struct + { + hipStream_t stream; + hipGraphNode_t* dependencies; + size_t numDependencies; + unsigned int flags; + } hipStreamUpdateCaptureDependencies; + struct + { + hipStream_t stream; + hipEvent_t event; + unsigned int flags; + } hipStreamWaitEvent; + struct + { + hipStream_t stream; + void* ptr; + unsigned int value; + unsigned int flags; + unsigned int mask; + } hipStreamWaitValue32; + struct + { + hipStream_t stream; + void* ptr; + uint64_t value; + unsigned int flags; + uint64_t mask; + } hipStreamWaitValue64; + struct + { + hipStream_t stream; + void* ptr; + unsigned int value; + unsigned int flags; + } hipStreamWriteValue32; + struct + { + hipStream_t stream; + void* ptr; + uint64_t value; + unsigned int flags; + } hipStreamWriteValue64; + struct + { + hipDeviceptr_t* dev_ptr; + const textureReference* texRef; + } hipTexRefGetAddress; + struct + { + unsigned int* pFlags; + const textureReference* texRef; + } hipTexRefGetFlags; + struct + { + hipArray_Format* pFormat; + int* pNumChannels; + const textureReference* texRef; + } hipTexRefGetFormat; + struct + { + int* pmaxAnsio; + const textureReference* texRef; + } hipTexRefGetMaxAnisotropy; + struct + { + hipMipmappedArray_t* pArray; + const textureReference* texRef; + } hipTexRefGetMipMappedArray; + struct + { + float* pbias; + const textureReference* texRef; + } hipTexRefGetMipmapLevelBias; + struct + { + float* pminMipmapLevelClamp; + float* pmaxMipmapLevelClamp; + const textureReference* texRef; + } hipTexRefGetMipmapLevelClamp; + struct + { + size_t* ByteOffset; + textureReference* texRef; + hipDeviceptr_t dptr; + size_t bytes; + } hipTexRefSetAddress; + struct + { + textureReference* texRef; + const HIP_ARRAY_DESCRIPTOR* desc; + hipDeviceptr_t dptr; + size_t Pitch; + } hipTexRefSetAddress2D; + struct + { + textureReference* tex; + hipArray_const_t array; + unsigned int flags; + } hipTexRefSetArray; + struct + { + textureReference* texRef; + float* pBorderColor; + } hipTexRefSetBorderColor; + struct + { + textureReference* texRef; + unsigned int Flags; + } hipTexRefSetFlags; + struct + { + textureReference* texRef; + hipArray_Format fmt; + int NumPackedComponents; + } hipTexRefSetFormat; + struct + { + textureReference* texRef; + unsigned int maxAniso; + } hipTexRefSetMaxAnisotropy; + struct + { + textureReference* texRef; + float bias; + } hipTexRefSetMipmapLevelBias; + struct + { + textureReference* texRef; + float minMipMapLevelClamp; + float maxMipMapLevelClamp; + } hipTexRefSetMipmapLevelClamp; + struct + { + textureReference* texRef; + hipMipmappedArray* mipmappedArray; + unsigned int Flags; + } hipTexRefSetMipmappedArray; + struct + { + hipStreamCaptureMode* mode; + } hipThreadExchangeStreamCaptureMode; + struct + { + hipUserObject_t* object_out; + void* ptr; + hipHostFn_t destroy; + unsigned int initialRefcount; + unsigned int flags; + } hipUserObjectCreate; + struct + { + hipUserObject_t object; + unsigned int count; + } hipUserObjectRelease; + struct + { + hipUserObject_t object; + unsigned int count; + } hipUserObjectRetain; + struct + { + const hipExternalSemaphore_t* extSemArray; + const hipExternalSemaphoreWaitParams* paramsArray; + unsigned int numExtSems; + hipStream_t stream; + } hipWaitExternalSemaphoresAsync; +} rocprofiler_hip_api_args_t; diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler/hip/api_id.h b/projects/rocprofiler-sdk/source/include/rocprofiler/hip/api_id.h new file mode 100644 index 0000000000..e0b572135a --- /dev/null +++ b/projects/rocprofiler-sdk/source/include/rocprofiler/hip/api_id.h @@ -0,0 +1,432 @@ +// Copyright (c) 2018-2023 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +// NOLINTNEXTLINE(performance-enum-size) +typedef enum +{ + ROCPROFILER_HIP_API_ID_NONE = -1, + ROCPROFILER_HIP_API_ID___hipPopCallConfiguration = 0, + ROCPROFILER_HIP_API_ID___hipPushCallConfiguration, + ROCPROFILER_HIP_API_ID_hipArray3DCreate, + ROCPROFILER_HIP_API_ID_hipArrayCreate, + ROCPROFILER_HIP_API_ID_hipArrayDestroy, + ROCPROFILER_HIP_API_ID_hipChooseDevice, + ROCPROFILER_HIP_API_ID_hipConfigureCall, + ROCPROFILER_HIP_API_ID_hipCtxCreate, + ROCPROFILER_HIP_API_ID_hipCtxDestroy, + ROCPROFILER_HIP_API_ID_hipCtxDisablePeerAccess, + ROCPROFILER_HIP_API_ID_hipCtxEnablePeerAccess, + ROCPROFILER_HIP_API_ID_hipCtxGetApiVersion, + ROCPROFILER_HIP_API_ID_hipCtxGetCacheConfig, + ROCPROFILER_HIP_API_ID_hipCtxGetCurrent, + ROCPROFILER_HIP_API_ID_hipCtxGetDevice, + ROCPROFILER_HIP_API_ID_hipCtxGetFlags, + ROCPROFILER_HIP_API_ID_hipCtxGetSharedMemConfig, + ROCPROFILER_HIP_API_ID_hipCtxPopCurrent, + ROCPROFILER_HIP_API_ID_hipCtxPushCurrent, + ROCPROFILER_HIP_API_ID_hipCtxSetCacheConfig, + ROCPROFILER_HIP_API_ID_hipCtxSetCurrent, + ROCPROFILER_HIP_API_ID_hipCtxSetSharedMemConfig, + ROCPROFILER_HIP_API_ID_hipCtxSynchronize, + ROCPROFILER_HIP_API_ID_hipDestroyExternalMemory, + ROCPROFILER_HIP_API_ID_hipDestroyExternalSemaphore, + ROCPROFILER_HIP_API_ID_hipDeviceCanAccessPeer, + ROCPROFILER_HIP_API_ID_hipDeviceComputeCapability, + ROCPROFILER_HIP_API_ID_hipDeviceDisablePeerAccess, + ROCPROFILER_HIP_API_ID_hipDeviceEnablePeerAccess, + ROCPROFILER_HIP_API_ID_hipDeviceGet, + ROCPROFILER_HIP_API_ID_hipDeviceGetAttribute, + ROCPROFILER_HIP_API_ID_hipDeviceGetByPCIBusId, + ROCPROFILER_HIP_API_ID_hipDeviceGetCacheConfig, + ROCPROFILER_HIP_API_ID_hipDeviceGetLimit, + ROCPROFILER_HIP_API_ID_hipDeviceGetName, + ROCPROFILER_HIP_API_ID_hipDeviceGetP2PAttribute, + ROCPROFILER_HIP_API_ID_hipDeviceGetPCIBusId, + ROCPROFILER_HIP_API_ID_hipDeviceGetSharedMemConfig, + ROCPROFILER_HIP_API_ID_hipDeviceGetStreamPriorityRange, + ROCPROFILER_HIP_API_ID_hipDevicePrimaryCtxGetState, + ROCPROFILER_HIP_API_ID_hipDevicePrimaryCtxRelease, + ROCPROFILER_HIP_API_ID_hipDevicePrimaryCtxReset, + ROCPROFILER_HIP_API_ID_hipDevicePrimaryCtxRetain, + ROCPROFILER_HIP_API_ID_hipDevicePrimaryCtxSetFlags, + ROCPROFILER_HIP_API_ID_hipDeviceReset, + ROCPROFILER_HIP_API_ID_hipDeviceSetCacheConfig, + ROCPROFILER_HIP_API_ID_hipDeviceSetSharedMemConfig, + ROCPROFILER_HIP_API_ID_hipDeviceSynchronize, + ROCPROFILER_HIP_API_ID_hipDeviceTotalMem, + ROCPROFILER_HIP_API_ID_RESERVED_50, + ROCPROFILER_HIP_API_ID_hipDrvMemcpy2DUnaligned, + ROCPROFILER_HIP_API_ID_hipDrvMemcpy3D, + ROCPROFILER_HIP_API_ID_hipDrvMemcpy3DAsync, + ROCPROFILER_HIP_API_ID_hipEventCreate, + ROCPROFILER_HIP_API_ID_hipEventCreateWithFlags, + ROCPROFILER_HIP_API_ID_hipEventDestroy, + ROCPROFILER_HIP_API_ID_hipEventElapsedTime, + ROCPROFILER_HIP_API_ID_hipEventQuery, + ROCPROFILER_HIP_API_ID_hipEventRecord, + ROCPROFILER_HIP_API_ID_hipEventSynchronize, + ROCPROFILER_HIP_API_ID_hipExtGetLinkTypeAndHopCount, + ROCPROFILER_HIP_API_ID_hipExtLaunchKernel, + ROCPROFILER_HIP_API_ID_hipExtLaunchMultiKernelMultiDevice, + ROCPROFILER_HIP_API_ID_hipExtMallocWithFlags, + ROCPROFILER_HIP_API_ID_hipExtModuleLaunchKernel, + ROCPROFILER_HIP_API_ID_hipExtStreamCreateWithCUMask, + ROCPROFILER_HIP_API_ID_hipExtStreamGetCUMask, + ROCPROFILER_HIP_API_ID_hipExternalMemoryGetMappedBuffer, + ROCPROFILER_HIP_API_ID_hipFree, + ROCPROFILER_HIP_API_ID_hipFreeArray, + ROCPROFILER_HIP_API_ID_hipFreeHost, + ROCPROFILER_HIP_API_ID_hipFreeMipmappedArray, + ROCPROFILER_HIP_API_ID_hipFuncGetAttribute, + ROCPROFILER_HIP_API_ID_hipFuncGetAttributes, + ROCPROFILER_HIP_API_ID_hipFuncSetAttribute, + ROCPROFILER_HIP_API_ID_hipFuncSetCacheConfig, + ROCPROFILER_HIP_API_ID_hipFuncSetSharedMemConfig, + ROCPROFILER_HIP_API_ID_hipGetDevice, + ROCPROFILER_HIP_API_ID_hipGetDeviceCount, + ROCPROFILER_HIP_API_ID_hipGetDeviceFlags, + ROCPROFILER_HIP_API_ID_hipGetDeviceProperties, + ROCPROFILER_HIP_API_ID_RESERVED_82, + ROCPROFILER_HIP_API_ID_hipGetErrorString, + ROCPROFILER_HIP_API_ID_hipGetLastError, + ROCPROFILER_HIP_API_ID_hipGetMipmappedArrayLevel, + ROCPROFILER_HIP_API_ID_hipGetSymbolAddress, + ROCPROFILER_HIP_API_ID_hipGetSymbolSize, + ROCPROFILER_HIP_API_ID_hipHccModuleLaunchKernel, + ROCPROFILER_HIP_API_ID_hipHostAlloc, + ROCPROFILER_HIP_API_ID_hipHostFree, + ROCPROFILER_HIP_API_ID_hipHostGetDevicePointer, + ROCPROFILER_HIP_API_ID_hipHostGetFlags, + ROCPROFILER_HIP_API_ID_hipHostMalloc, + ROCPROFILER_HIP_API_ID_hipHostRegister, + ROCPROFILER_HIP_API_ID_hipHostUnregister, + ROCPROFILER_HIP_API_ID_hipImportExternalMemory, + ROCPROFILER_HIP_API_ID_hipImportExternalSemaphore, + ROCPROFILER_HIP_API_ID_hipInit, + ROCPROFILER_HIP_API_ID_hipIpcCloseMemHandle, + ROCPROFILER_HIP_API_ID_hipIpcGetEventHandle, + ROCPROFILER_HIP_API_ID_hipIpcGetMemHandle, + ROCPROFILER_HIP_API_ID_hipIpcOpenEventHandle, + ROCPROFILER_HIP_API_ID_hipIpcOpenMemHandle, + ROCPROFILER_HIP_API_ID_hipLaunchByPtr, + ROCPROFILER_HIP_API_ID_hipLaunchCooperativeKernel, + ROCPROFILER_HIP_API_ID_hipLaunchCooperativeKernelMultiDevice, + ROCPROFILER_HIP_API_ID_hipLaunchKernel, + ROCPROFILER_HIP_API_ID_hipMalloc, + ROCPROFILER_HIP_API_ID_hipMalloc3D, + ROCPROFILER_HIP_API_ID_hipMalloc3DArray, + ROCPROFILER_HIP_API_ID_hipMallocArray, + ROCPROFILER_HIP_API_ID_hipMallocHost, + ROCPROFILER_HIP_API_ID_hipMallocManaged, + ROCPROFILER_HIP_API_ID_hipMallocMipmappedArray, + ROCPROFILER_HIP_API_ID_hipMallocPitch, + ROCPROFILER_HIP_API_ID_hipMemAdvise, + ROCPROFILER_HIP_API_ID_hipMemAllocHost, + ROCPROFILER_HIP_API_ID_hipMemAllocPitch, + ROCPROFILER_HIP_API_ID_hipMemGetAddressRange, + ROCPROFILER_HIP_API_ID_hipMemGetInfo, + ROCPROFILER_HIP_API_ID_hipMemPrefetchAsync, + ROCPROFILER_HIP_API_ID_hipMemPtrGetInfo, + ROCPROFILER_HIP_API_ID_hipMemRangeGetAttribute, + ROCPROFILER_HIP_API_ID_hipMemRangeGetAttributes, + ROCPROFILER_HIP_API_ID_hipMemcpy, + ROCPROFILER_HIP_API_ID_hipMemcpy2D, + ROCPROFILER_HIP_API_ID_hipMemcpy2DAsync, + ROCPROFILER_HIP_API_ID_hipMemcpy2DFromArray, + ROCPROFILER_HIP_API_ID_hipMemcpy2DFromArrayAsync, + ROCPROFILER_HIP_API_ID_hipMemcpy2DToArray, + ROCPROFILER_HIP_API_ID_hipMemcpy2DToArrayAsync, + ROCPROFILER_HIP_API_ID_hipMemcpy3D, + ROCPROFILER_HIP_API_ID_hipMemcpy3DAsync, + ROCPROFILER_HIP_API_ID_hipMemcpyAsync, + ROCPROFILER_HIP_API_ID_hipMemcpyAtoH, + ROCPROFILER_HIP_API_ID_hipMemcpyDtoD, + ROCPROFILER_HIP_API_ID_hipMemcpyDtoDAsync, + ROCPROFILER_HIP_API_ID_hipMemcpyDtoH, + ROCPROFILER_HIP_API_ID_hipMemcpyDtoHAsync, + ROCPROFILER_HIP_API_ID_hipMemcpyFromArray, + ROCPROFILER_HIP_API_ID_hipMemcpyFromSymbol, + ROCPROFILER_HIP_API_ID_hipMemcpyFromSymbolAsync, + ROCPROFILER_HIP_API_ID_hipMemcpyHtoA, + ROCPROFILER_HIP_API_ID_hipMemcpyHtoD, + ROCPROFILER_HIP_API_ID_hipMemcpyHtoDAsync, + ROCPROFILER_HIP_API_ID_hipMemcpyParam2D, + ROCPROFILER_HIP_API_ID_hipMemcpyParam2DAsync, + ROCPROFILER_HIP_API_ID_hipMemcpyPeer, + ROCPROFILER_HIP_API_ID_hipMemcpyPeerAsync, + ROCPROFILER_HIP_API_ID_hipMemcpyToArray, + ROCPROFILER_HIP_API_ID_hipMemcpyToSymbol, + ROCPROFILER_HIP_API_ID_hipMemcpyToSymbolAsync, + ROCPROFILER_HIP_API_ID_hipMemcpyWithStream, + ROCPROFILER_HIP_API_ID_hipMemset, + ROCPROFILER_HIP_API_ID_hipMemset2D, + ROCPROFILER_HIP_API_ID_hipMemset2DAsync, + ROCPROFILER_HIP_API_ID_hipMemset3D, + ROCPROFILER_HIP_API_ID_hipMemset3DAsync, + ROCPROFILER_HIP_API_ID_hipMemsetAsync, + ROCPROFILER_HIP_API_ID_hipMemsetD16, + ROCPROFILER_HIP_API_ID_hipMemsetD16Async, + ROCPROFILER_HIP_API_ID_hipMemsetD32, + ROCPROFILER_HIP_API_ID_hipMemsetD32Async, + ROCPROFILER_HIP_API_ID_hipMemsetD8, + ROCPROFILER_HIP_API_ID_hipMemsetD8Async, + ROCPROFILER_HIP_API_ID_hipModuleGetFunction, + ROCPROFILER_HIP_API_ID_hipModuleGetGlobal, + ROCPROFILER_HIP_API_ID_hipModuleGetTexRef, + ROCPROFILER_HIP_API_ID_hipModuleLaunchKernel, + ROCPROFILER_HIP_API_ID_hipModuleLoad, + ROCPROFILER_HIP_API_ID_hipModuleLoadData, + ROCPROFILER_HIP_API_ID_hipModuleLoadDataEx, + ROCPROFILER_HIP_API_ID_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor, + ROCPROFILER_HIP_API_ID_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags, + ROCPROFILER_HIP_API_ID_hipModuleOccupancyMaxPotentialBlockSize, + ROCPROFILER_HIP_API_ID_hipModuleOccupancyMaxPotentialBlockSizeWithFlags, + ROCPROFILER_HIP_API_ID_hipModuleUnload, + ROCPROFILER_HIP_API_ID_hipOccupancyMaxActiveBlocksPerMultiprocessor, + ROCPROFILER_HIP_API_ID_hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags, + ROCPROFILER_HIP_API_ID_hipOccupancyMaxPotentialBlockSize, + ROCPROFILER_HIP_API_ID_hipPeekAtLastError, + ROCPROFILER_HIP_API_ID_hipPointerGetAttributes, + ROCPROFILER_HIP_API_ID_hipProfilerStart, + ROCPROFILER_HIP_API_ID_hipProfilerStop, + ROCPROFILER_HIP_API_ID_RESERVED_185, + ROCPROFILER_HIP_API_ID_hipSetDevice, + ROCPROFILER_HIP_API_ID_hipSetDeviceFlags, + ROCPROFILER_HIP_API_ID_hipSetupArgument, + ROCPROFILER_HIP_API_ID_hipSignalExternalSemaphoresAsync, + ROCPROFILER_HIP_API_ID_hipStreamAddCallback, + ROCPROFILER_HIP_API_ID_hipStreamAttachMemAsync, + ROCPROFILER_HIP_API_ID_hipStreamCreate, + ROCPROFILER_HIP_API_ID_hipStreamCreateWithFlags, + ROCPROFILER_HIP_API_ID_hipStreamCreateWithPriority, + ROCPROFILER_HIP_API_ID_hipStreamDestroy, + ROCPROFILER_HIP_API_ID_hipStreamGetFlags, + ROCPROFILER_HIP_API_ID_hipStreamGetPriority, + ROCPROFILER_HIP_API_ID_hipStreamQuery, + ROCPROFILER_HIP_API_ID_hipStreamSynchronize, + ROCPROFILER_HIP_API_ID_hipStreamWaitEvent, + ROCPROFILER_HIP_API_ID_hipStreamWaitValue32, + ROCPROFILER_HIP_API_ID_hipStreamWaitValue64, + ROCPROFILER_HIP_API_ID_hipStreamWriteValue32, + ROCPROFILER_HIP_API_ID_hipStreamWriteValue64, + ROCPROFILER_HIP_API_ID_hipWaitExternalSemaphoresAsync, + ROCPROFILER_HIP_API_ID_hipCreateSurfaceObject, + ROCPROFILER_HIP_API_ID_hipDestroySurfaceObject, + ROCPROFILER_HIP_API_ID_hipGraphAddKernelNode, + ROCPROFILER_HIP_API_ID_hipGraphAddMemcpyNode, + ROCPROFILER_HIP_API_ID_hipGraphAddMemsetNode, + ROCPROFILER_HIP_API_ID_hipGraphCreate, + ROCPROFILER_HIP_API_ID_hipGraphDestroy, + ROCPROFILER_HIP_API_ID_hipGraphExecDestroy, + ROCPROFILER_HIP_API_ID_hipGraphInstantiate, + ROCPROFILER_HIP_API_ID_hipGraphLaunch, + ROCPROFILER_HIP_API_ID_hipMipmappedArrayCreate, + ROCPROFILER_HIP_API_ID_hipMipmappedArrayDestroy, + ROCPROFILER_HIP_API_ID_hipMipmappedArrayGetLevel, + ROCPROFILER_HIP_API_ID_hipStreamBeginCapture, + ROCPROFILER_HIP_API_ID_hipStreamEndCapture, + ROCPROFILER_HIP_API_ID_hipTexRefGetAddress, + ROCPROFILER_HIP_API_ID_hipTexRefGetFlags, + ROCPROFILER_HIP_API_ID_hipTexRefGetFormat, + ROCPROFILER_HIP_API_ID_hipTexRefGetMaxAnisotropy, + ROCPROFILER_HIP_API_ID_hipTexRefGetMipMappedArray, + ROCPROFILER_HIP_API_ID_hipTexRefGetMipmapLevelBias, + ROCPROFILER_HIP_API_ID_hipTexRefGetMipmapLevelClamp, + ROCPROFILER_HIP_API_ID_hipTexRefSetAddress, + ROCPROFILER_HIP_API_ID_hipTexRefSetAddress2D, + ROCPROFILER_HIP_API_ID_hipTexRefSetBorderColor, + ROCPROFILER_HIP_API_ID_hipTexRefSetFormat, + ROCPROFILER_HIP_API_ID_hipTexRefSetMaxAnisotropy, + ROCPROFILER_HIP_API_ID_hipTexRefSetMipmapLevelClamp, + ROCPROFILER_HIP_API_ID_hipTexRefSetMipmappedArray, + ROCPROFILER_HIP_API_ID_hipGLGetDevices, + ROCPROFILER_HIP_API_ID_hipGraphAddDependencies, + ROCPROFILER_HIP_API_ID_hipGraphAddEmptyNode, + ROCPROFILER_HIP_API_ID_hipGraphExecKernelNodeSetParams, + ROCPROFILER_HIP_API_ID_hipGraphGetNodes, + ROCPROFILER_HIP_API_ID_hipGraphGetRootNodes, + ROCPROFILER_HIP_API_ID_hipGraphKernelNodeGetParams, + ROCPROFILER_HIP_API_ID_hipGraphKernelNodeSetParams, + ROCPROFILER_HIP_API_ID_hipGraphMemcpyNodeGetParams, + ROCPROFILER_HIP_API_ID_hipGraphMemcpyNodeSetParams, + ROCPROFILER_HIP_API_ID_hipGraphMemsetNodeGetParams, + ROCPROFILER_HIP_API_ID_hipGraphMemsetNodeSetParams, + ROCPROFILER_HIP_API_ID_hipGraphicsGLRegisterBuffer, + ROCPROFILER_HIP_API_ID_hipGraphicsMapResources, + ROCPROFILER_HIP_API_ID_hipGraphicsResourceGetMappedPointer, + ROCPROFILER_HIP_API_ID_hipGraphicsUnmapResources, + ROCPROFILER_HIP_API_ID_hipGraphicsUnregisterResource, + ROCPROFILER_HIP_API_ID_hipGraphAddChildGraphNode, + ROCPROFILER_HIP_API_ID_hipGraphAddEventRecordNode, + ROCPROFILER_HIP_API_ID_hipGraphAddEventWaitNode, + ROCPROFILER_HIP_API_ID_hipGraphAddHostNode, + ROCPROFILER_HIP_API_ID_hipGraphAddMemcpyNode1D, + ROCPROFILER_HIP_API_ID_hipGraphAddMemcpyNodeFromSymbol, + ROCPROFILER_HIP_API_ID_hipGraphAddMemcpyNodeToSymbol, + ROCPROFILER_HIP_API_ID_hipGraphChildGraphNodeGetGraph, + ROCPROFILER_HIP_API_ID_hipGraphClone, + ROCPROFILER_HIP_API_ID_hipGraphDestroyNode, + ROCPROFILER_HIP_API_ID_hipGraphEventRecordNodeGetEvent, + ROCPROFILER_HIP_API_ID_hipGraphEventRecordNodeSetEvent, + ROCPROFILER_HIP_API_ID_hipGraphEventWaitNodeGetEvent, + ROCPROFILER_HIP_API_ID_hipGraphEventWaitNodeSetEvent, + ROCPROFILER_HIP_API_ID_hipGraphExecChildGraphNodeSetParams, + ROCPROFILER_HIP_API_ID_hipGraphExecEventRecordNodeSetEvent, + ROCPROFILER_HIP_API_ID_hipGraphExecEventWaitNodeSetEvent, + ROCPROFILER_HIP_API_ID_hipGraphExecHostNodeSetParams, + ROCPROFILER_HIP_API_ID_hipGraphExecMemcpyNodeSetParams, + ROCPROFILER_HIP_API_ID_hipGraphExecMemcpyNodeSetParams1D, + ROCPROFILER_HIP_API_ID_hipGraphExecMemcpyNodeSetParamsFromSymbol, + ROCPROFILER_HIP_API_ID_hipGraphExecMemcpyNodeSetParamsToSymbol, + ROCPROFILER_HIP_API_ID_hipGraphExecMemsetNodeSetParams, + ROCPROFILER_HIP_API_ID_hipGraphExecUpdate, + ROCPROFILER_HIP_API_ID_hipGraphGetEdges, + ROCPROFILER_HIP_API_ID_hipGraphHostNodeGetParams, + ROCPROFILER_HIP_API_ID_hipGraphHostNodeSetParams, + ROCPROFILER_HIP_API_ID_hipGraphInstantiateWithFlags, + ROCPROFILER_HIP_API_ID_hipGraphMemcpyNodeSetParams1D, + ROCPROFILER_HIP_API_ID_hipGraphMemcpyNodeSetParamsFromSymbol, + ROCPROFILER_HIP_API_ID_hipGraphMemcpyNodeSetParamsToSymbol, + ROCPROFILER_HIP_API_ID_hipGraphNodeFindInClone, + ROCPROFILER_HIP_API_ID_hipGraphNodeGetDependencies, + ROCPROFILER_HIP_API_ID_hipGraphNodeGetDependentNodes, + ROCPROFILER_HIP_API_ID_hipGraphNodeGetType, + ROCPROFILER_HIP_API_ID_hipGraphRemoveDependencies, + ROCPROFILER_HIP_API_ID_hipStreamGetCaptureInfo, + ROCPROFILER_HIP_API_ID_hipStreamGetCaptureInfo_v2, + ROCPROFILER_HIP_API_ID_hipStreamIsCapturing, + ROCPROFILER_HIP_API_ID_hipStreamUpdateCaptureDependencies, + ROCPROFILER_HIP_API_ID_hipDrvPointerGetAttributes, + ROCPROFILER_HIP_API_ID_hipGraphicsGLRegisterImage, + ROCPROFILER_HIP_API_ID_hipGraphicsSubResourceGetMappedArray, + ROCPROFILER_HIP_API_ID_hipPointerGetAttribute, + ROCPROFILER_HIP_API_ID_RESERVED_296, + ROCPROFILER_HIP_API_ID_hipThreadExchangeStreamCaptureMode, + ROCPROFILER_HIP_API_ID_hipDeviceGetUuid, + ROCPROFILER_HIP_API_ID_hipGetChannelDesc, + ROCPROFILER_HIP_API_ID_hipGraphKernelNodeGetAttribute, + ROCPROFILER_HIP_API_ID_hipGraphKernelNodeSetAttribute, + ROCPROFILER_HIP_API_ID_hipLaunchHostFunc, + ROCPROFILER_HIP_API_ID_hipDeviceGetDefaultMemPool, + ROCPROFILER_HIP_API_ID_hipDeviceGetMemPool, + ROCPROFILER_HIP_API_ID_hipDeviceSetMemPool, + ROCPROFILER_HIP_API_ID_hipFreeAsync, + ROCPROFILER_HIP_API_ID_hipMallocAsync, + ROCPROFILER_HIP_API_ID_hipMallocFromPoolAsync, + ROCPROFILER_HIP_API_ID_hipMemPoolCreate, + ROCPROFILER_HIP_API_ID_hipMemPoolDestroy, + ROCPROFILER_HIP_API_ID_hipMemPoolExportPointer, + ROCPROFILER_HIP_API_ID_hipMemPoolExportToShareableHandle, + ROCPROFILER_HIP_API_ID_hipMemPoolGetAccess, + ROCPROFILER_HIP_API_ID_hipMemPoolGetAttribute, + ROCPROFILER_HIP_API_ID_hipMemPoolImportFromShareableHandle, + ROCPROFILER_HIP_API_ID_hipMemPoolImportPointer, + ROCPROFILER_HIP_API_ID_hipMemPoolSetAccess, + ROCPROFILER_HIP_API_ID_hipMemPoolSetAttribute, + ROCPROFILER_HIP_API_ID_hipMemPoolTrimTo, + ROCPROFILER_HIP_API_ID_hipMemAddressFree, + ROCPROFILER_HIP_API_ID_hipMemAddressReserve, + ROCPROFILER_HIP_API_ID_hipMemCreate, + ROCPROFILER_HIP_API_ID_hipMemExportToShareableHandle, + ROCPROFILER_HIP_API_ID_hipMemGetAccess, + ROCPROFILER_HIP_API_ID_hipMemGetAllocationGranularity, + ROCPROFILER_HIP_API_ID_hipMemGetAllocationPropertiesFromHandle, + ROCPROFILER_HIP_API_ID_hipMemImportFromShareableHandle, + ROCPROFILER_HIP_API_ID_hipMemMap, + ROCPROFILER_HIP_API_ID_hipMemMapArrayAsync, + ROCPROFILER_HIP_API_ID_hipMemRelease, + ROCPROFILER_HIP_API_ID_hipMemRetainAllocationHandle, + ROCPROFILER_HIP_API_ID_hipMemSetAccess, + ROCPROFILER_HIP_API_ID_hipMemUnmap, + ROCPROFILER_HIP_API_ID_hipDeviceSetGraphMemAttribute, + ROCPROFILER_HIP_API_ID_hipDeviceGetGraphMemAttribute, + ROCPROFILER_HIP_API_ID_hipDeviceGraphMemTrim, + ROCPROFILER_HIP_API_ID_hipDeviceSetLimit, + ROCPROFILER_HIP_API_ID_hipTexRefSetArray, + ROCPROFILER_HIP_API_ID_hipTexRefSetFlags, + ROCPROFILER_HIP_API_ID_hipTexRefSetMipmapLevelBias, + ROCPROFILER_HIP_API_ID_hipDriverGetVersion, + ROCPROFILER_HIP_API_ID_hipGraphUpload, + ROCPROFILER_HIP_API_ID_hipRuntimeGetVersion, + ROCPROFILER_HIP_API_ID_hipUserObjectCreate, + ROCPROFILER_HIP_API_ID_hipUserObjectRelease, + ROCPROFILER_HIP_API_ID_hipUserObjectRetain, + ROCPROFILER_HIP_API_ID_hipGraphRetainUserObject, + ROCPROFILER_HIP_API_ID_hipGraphReleaseUserObject, + ROCPROFILER_HIP_API_ID_hipGraphDebugDotPrint, + ROCPROFILER_HIP_API_ID_hipGraphKernelNodeCopyAttributes, + ROCPROFILER_HIP_API_ID_hipGraphNodeGetEnabled, + ROCPROFILER_HIP_API_ID_hipGraphNodeSetEnabled, + ROCPROFILER_HIP_API_ID_hipPointerSetAttribute, + ROCPROFILER_HIP_API_ID_hipGraphAddMemAllocNode, + ROCPROFILER_HIP_API_ID_hipGraphAddMemFreeNode, + ROCPROFILER_HIP_API_ID_hipGraphMemAllocNodeGetParams, + ROCPROFILER_HIP_API_ID_hipGraphMemFreeNodeGetParams, + ROCPROFILER_HIP_API_ID_hipModuleLaunchCooperativeKernel, + ROCPROFILER_HIP_API_ID_hipModuleLaunchCooperativeKernelMultiDevice, + ROCPROFILER_HIP_API_ID_hipArray3DGetDescriptor, + ROCPROFILER_HIP_API_ID_hipArrayGetDescriptor, + ROCPROFILER_HIP_API_ID_hipArrayGetInfo, + ROCPROFILER_HIP_API_ID_hipStreamGetDevice, + ROCPROFILER_HIP_API_ID_LAST, + // + // Deprecated or removed + // + ROCPROFILER_HIP_API_ID_hipBindTexture = ROCPROFILER_HIP_API_ID_NONE, + ROCPROFILER_HIP_API_ID_hipBindTexture2D = ROCPROFILER_HIP_API_ID_NONE, + ROCPROFILER_HIP_API_ID_hipBindTextureToArray = ROCPROFILER_HIP_API_ID_NONE, + ROCPROFILER_HIP_API_ID_hipBindTextureToMipmappedArray = ROCPROFILER_HIP_API_ID_NONE, + ROCPROFILER_HIP_API_ID_hipCreateTextureObject = ROCPROFILER_HIP_API_ID_NONE, + ROCPROFILER_HIP_API_ID_hipDestroyTextureObject = ROCPROFILER_HIP_API_ID_NONE, + ROCPROFILER_HIP_API_ID_hipDeviceGetCount = ROCPROFILER_HIP_API_ID_NONE, + ROCPROFILER_HIP_API_ID_hipGetTextureAlignmentOffset = ROCPROFILER_HIP_API_ID_NONE, + ROCPROFILER_HIP_API_ID_hipGetTextureObjectResourceDesc = ROCPROFILER_HIP_API_ID_NONE, + ROCPROFILER_HIP_API_ID_hipGetTextureObjectResourceViewDesc = ROCPROFILER_HIP_API_ID_NONE, + ROCPROFILER_HIP_API_ID_hipGetTextureObjectTextureDesc = ROCPROFILER_HIP_API_ID_NONE, + ROCPROFILER_HIP_API_ID_hipGetTextureReference = ROCPROFILER_HIP_API_ID_NONE, + ROCPROFILER_HIP_API_ID_hipMemcpy2DArrayToArray = ROCPROFILER_HIP_API_ID_NONE, + ROCPROFILER_HIP_API_ID_hipMemcpyArrayToArray = ROCPROFILER_HIP_API_ID_NONE, + ROCPROFILER_HIP_API_ID_hipMemcpyAtoA = ROCPROFILER_HIP_API_ID_NONE, + ROCPROFILER_HIP_API_ID_hipMemcpyAtoD = ROCPROFILER_HIP_API_ID_NONE, + ROCPROFILER_HIP_API_ID_hipMemcpyAtoHAsync = ROCPROFILER_HIP_API_ID_NONE, + ROCPROFILER_HIP_API_ID_hipMemcpyDtoA = ROCPROFILER_HIP_API_ID_NONE, + ROCPROFILER_HIP_API_ID_hipMemcpyFromArrayAsync = ROCPROFILER_HIP_API_ID_NONE, + ROCPROFILER_HIP_API_ID_hipMemcpyHtoAAsync = ROCPROFILER_HIP_API_ID_NONE, + ROCPROFILER_HIP_API_ID_hipMemcpyToArrayAsync = ROCPROFILER_HIP_API_ID_NONE, + ROCPROFILER_HIP_API_ID_hipModuleLaunchKernelExt = ROCPROFILER_HIP_API_ID_NONE, + ROCPROFILER_HIP_API_ID_hipSetValidDevices = ROCPROFILER_HIP_API_ID_NONE, + ROCPROFILER_HIP_API_ID_hipTexObjectCreate = ROCPROFILER_HIP_API_ID_NONE, + ROCPROFILER_HIP_API_ID_hipTexObjectDestroy = ROCPROFILER_HIP_API_ID_NONE, + ROCPROFILER_HIP_API_ID_hipTexObjectGetResourceDesc = ROCPROFILER_HIP_API_ID_NONE, + ROCPROFILER_HIP_API_ID_hipTexObjectGetResourceViewDesc = ROCPROFILER_HIP_API_ID_NONE, + ROCPROFILER_HIP_API_ID_hipTexObjectGetTextureDesc = ROCPROFILER_HIP_API_ID_NONE, + ROCPROFILER_HIP_API_ID_hipTexRefGetAddressMode = ROCPROFILER_HIP_API_ID_NONE, + ROCPROFILER_HIP_API_ID_hipTexRefGetArray = ROCPROFILER_HIP_API_ID_NONE, + ROCPROFILER_HIP_API_ID_hipTexRefGetBorderColor = ROCPROFILER_HIP_API_ID_NONE, + ROCPROFILER_HIP_API_ID_hipTexRefGetFilterMode = ROCPROFILER_HIP_API_ID_NONE, + ROCPROFILER_HIP_API_ID_hipTexRefGetMipmapFilterMode = ROCPROFILER_HIP_API_ID_NONE, + ROCPROFILER_HIP_API_ID_hipTexRefGetMipmappedArray = ROCPROFILER_HIP_API_ID_NONE, + ROCPROFILER_HIP_API_ID_hipTexRefSetAddressMode = ROCPROFILER_HIP_API_ID_NONE, + ROCPROFILER_HIP_API_ID_hipTexRefSetFilterMode = ROCPROFILER_HIP_API_ID_NONE, + ROCPROFILER_HIP_API_ID_hipTexRefSetMipmapFilterMode = ROCPROFILER_HIP_API_ID_NONE, + ROCPROFILER_HIP_API_ID_hipUnbindTexture = ROCPROFILER_HIP_API_ID_NONE, +} rocprofiler_hip_api_id_t; diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler/hsa.h b/projects/rocprofiler-sdk/source/include/rocprofiler/hsa.h new file mode 100644 index 0000000000..c0abfd7fbc --- /dev/null +++ b/projects/rocprofiler-sdk/source/include/rocprofiler/hsa.h @@ -0,0 +1,62 @@ +// MIT License +// +// Copyright (c) 2023 ROCm Developer Tools +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include +#include +#include + +#include + +#include + +typedef uint32_t rocprofiler_trace_record_hsa_operation_kind_t; +typedef struct hsa_kernel_dispatch_packet_s hsa_kernel_dispatch_packet_t; +typedef struct rocprofiler_hsa_trace_data_s rocprofiler_hsa_trace_data_t; +typedef struct rocprofiler_hsa_api_data_s rocprofiler_hsa_api_data_t; + +struct rocprofiler_hsa_api_data_s +{ + uint64_t correlation_id; + uint32_t phase; + union + { + uint64_t uint64_t_retval; + uint32_t uint32_t_retval; + hsa_signal_value_t hsa_signal_value_t_retval; + hsa_status_t hsa_status_t_retval; + }; + rocprofiler_hsa_api_args_t args; + uint64_t* phase_data; +}; + +struct rocprofiler_hsa_trace_data_s +{ + rocprofiler_hsa_api_data_t api_data; + uint64_t phase_enter_timestamp; + uint64_t phase_exit_timestamp; + uint64_t phase_data; + + void (*phase_enter)(rocprofiler_hsa_api_id_t operation_id, rocprofiler_hsa_trace_data_t* data); + void (*phase_exit)(rocprofiler_hsa_api_id_t operation_id, rocprofiler_hsa_trace_data_t* data); +}; diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler/hsa/CMakeLists.txt b/projects/rocprofiler-sdk/source/include/rocprofiler/hsa/CMakeLists.txt new file mode 100644 index 0000000000..4998e24a59 --- /dev/null +++ b/projects/rocprofiler-sdk/source/include/rocprofiler/hsa/CMakeLists.txt @@ -0,0 +1,9 @@ +# +# +# Installation of public HSA headers +# +# +set(ROCPROFILER_HSA_HEADER_FILES api_args.h api_id.h table_api_id.h) + +install(FILES ${ROCPROFILER_HSA_HEADER_FILES} + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/rocprofiler/hsa) diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler/hsa/api_args.h b/projects/rocprofiler-sdk/source/include/rocprofiler/hsa/api_args.h new file mode 100644 index 0000000000..8668e7cfd6 --- /dev/null +++ b/projects/rocprofiler-sdk/source/include/rocprofiler/hsa/api_args.h @@ -0,0 +1,1224 @@ +// Copyright (c) 2018-2023 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include +#include +#include +#include + +typedef union rocprofiler_hsa_api_args_u +{ + // block: CoreApi API + struct + { + } hsa_init; + struct + { + } hsa_shut_down; + struct + { + hsa_system_info_t attribute; + void* value; + } hsa_system_get_info; + struct + { + uint16_t extension; + uint16_t version_major; + uint16_t version_minor; + bool* result; + } hsa_system_extension_supported; + struct + { + uint16_t extension; + uint16_t version_major; + uint16_t version_minor; + void* table; + } hsa_system_get_extension_table; + struct + { + hsa_status_t (*callback)(hsa_agent_t agent, void* data); + void* data; + } hsa_iterate_agents; + struct + { + hsa_agent_t agent; + hsa_agent_info_t attribute; + void* value; + } hsa_agent_get_info; + struct + { + hsa_agent_t agent; + uint32_t size; + hsa_queue_type32_t type; + void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data); + void* data; + uint32_t private_segment_size; + uint32_t group_segment_size; + hsa_queue_t** queue; + } hsa_queue_create; + struct + { + hsa_region_t region; + uint32_t size; + hsa_queue_type32_t type; + uint32_t features; + hsa_signal_t doorbell_signal; + hsa_queue_t** queue; + } hsa_soft_queue_create; + struct + { + hsa_queue_t* queue; + } hsa_queue_destroy; + struct + { + hsa_queue_t* queue; + } hsa_queue_inactivate; + struct + { + const hsa_queue_t* queue; + } hsa_queue_load_read_index_scacquire; + struct + { + const hsa_queue_t* queue; + } hsa_queue_load_read_index_relaxed; + struct + { + const hsa_queue_t* queue; + } hsa_queue_load_write_index_scacquire; + struct + { + const hsa_queue_t* queue; + } hsa_queue_load_write_index_relaxed; + struct + { + const hsa_queue_t* queue; + uint64_t value; + } hsa_queue_store_write_index_relaxed; + struct + { + const hsa_queue_t* queue; + uint64_t value; + } hsa_queue_store_write_index_screlease; + struct + { + const hsa_queue_t* queue; + uint64_t expected; + uint64_t value; + } hsa_queue_cas_write_index_scacq_screl; + struct + { + const hsa_queue_t* queue; + uint64_t expected; + uint64_t value; + } hsa_queue_cas_write_index_scacquire; + struct + { + const hsa_queue_t* queue; + uint64_t expected; + uint64_t value; + } hsa_queue_cas_write_index_relaxed; + struct + { + const hsa_queue_t* queue; + uint64_t expected; + uint64_t value; + } hsa_queue_cas_write_index_screlease; + struct + { + const hsa_queue_t* queue; + uint64_t value; + } hsa_queue_add_write_index_scacq_screl; + struct + { + const hsa_queue_t* queue; + uint64_t value; + } hsa_queue_add_write_index_scacquire; + struct + { + const hsa_queue_t* queue; + uint64_t value; + } hsa_queue_add_write_index_relaxed; + struct + { + const hsa_queue_t* queue; + uint64_t value; + } hsa_queue_add_write_index_screlease; + struct + { + const hsa_queue_t* queue; + uint64_t value; + } hsa_queue_store_read_index_relaxed; + struct + { + const hsa_queue_t* queue; + uint64_t value; + } hsa_queue_store_read_index_screlease; + struct + { + hsa_agent_t agent; + hsa_status_t (*callback)(hsa_region_t region, void* data); + void* data; + } hsa_agent_iterate_regions; + struct + { + hsa_region_t region; + hsa_region_info_t attribute; + void* value; + } hsa_region_get_info; + struct + { + hsa_agent_t agent; + hsa_profile_t profile; + uint16_t* mask; + } hsa_agent_get_exception_policies; + struct + { + uint16_t extension; + hsa_agent_t agent; + uint16_t version_major; + uint16_t version_minor; + bool* result; + } hsa_agent_extension_supported; + struct + { + void* ptr; + size_t size; + } hsa_memory_register; + struct + { + void* ptr; + size_t size; + } hsa_memory_deregister; + struct + { + hsa_region_t region; + size_t size; + void** ptr; + } hsa_memory_allocate; + struct + { + void* ptr; + } hsa_memory_free; + struct + { + void* dst; + const void* src; + size_t size; + } hsa_memory_copy; + struct + { + void* ptr; + hsa_agent_t agent; + hsa_access_permission_t access; + } hsa_memory_assign_agent; + struct + { + hsa_signal_value_t initial_value; + uint32_t num_consumers; + const hsa_agent_t* consumers; + hsa_signal_t* signal; + } hsa_signal_create; + struct + { + hsa_signal_t signal; + } hsa_signal_destroy; + struct + { + hsa_signal_t signal; + } hsa_signal_load_relaxed; + struct + { + hsa_signal_t signal; + } hsa_signal_load_scacquire; + struct + { + hsa_signal_t signal; + hsa_signal_value_t value; + } hsa_signal_store_relaxed; + struct + { + hsa_signal_t signal; + hsa_signal_value_t value; + } hsa_signal_store_screlease; + struct + { + hsa_signal_t signal; + hsa_signal_condition_t condition; + hsa_signal_value_t compare_value; + uint64_t timeout_hint; + hsa_wait_state_t wait_state_hint; + } hsa_signal_wait_relaxed; + struct + { + hsa_signal_t signal; + hsa_signal_condition_t condition; + hsa_signal_value_t compare_value; + uint64_t timeout_hint; + hsa_wait_state_t wait_state_hint; + } hsa_signal_wait_scacquire; + struct + { + hsa_signal_t signal; + hsa_signal_value_t value; + } hsa_signal_and_relaxed; + struct + { + hsa_signal_t signal; + hsa_signal_value_t value; + } hsa_signal_and_scacquire; + struct + { + hsa_signal_t signal; + hsa_signal_value_t value; + } hsa_signal_and_screlease; + struct + { + hsa_signal_t signal; + hsa_signal_value_t value; + } hsa_signal_and_scacq_screl; + struct + { + hsa_signal_t signal; + hsa_signal_value_t value; + } hsa_signal_or_relaxed; + struct + { + hsa_signal_t signal; + hsa_signal_value_t value; + } hsa_signal_or_scacquire; + struct + { + hsa_signal_t signal; + hsa_signal_value_t value; + } hsa_signal_or_screlease; + struct + { + hsa_signal_t signal; + hsa_signal_value_t value; + } hsa_signal_or_scacq_screl; + struct + { + hsa_signal_t signal; + hsa_signal_value_t value; + } hsa_signal_xor_relaxed; + struct + { + hsa_signal_t signal; + hsa_signal_value_t value; + } hsa_signal_xor_scacquire; + struct + { + hsa_signal_t signal; + hsa_signal_value_t value; + } hsa_signal_xor_screlease; + struct + { + hsa_signal_t signal; + hsa_signal_value_t value; + } hsa_signal_xor_scacq_screl; + struct + { + hsa_signal_t signal; + hsa_signal_value_t value; + } hsa_signal_exchange_relaxed; + struct + { + hsa_signal_t signal; + hsa_signal_value_t value; + } hsa_signal_exchange_scacquire; + struct + { + hsa_signal_t signal; + hsa_signal_value_t value; + } hsa_signal_exchange_screlease; + struct + { + hsa_signal_t signal; + hsa_signal_value_t value; + } hsa_signal_exchange_scacq_screl; + struct + { + hsa_signal_t signal; + hsa_signal_value_t value; + } hsa_signal_add_relaxed; + struct + { + hsa_signal_t signal; + hsa_signal_value_t value; + } hsa_signal_add_scacquire; + struct + { + hsa_signal_t signal; + hsa_signal_value_t value; + } hsa_signal_add_screlease; + struct + { + hsa_signal_t signal; + hsa_signal_value_t value; + } hsa_signal_add_scacq_screl; + struct + { + hsa_signal_t signal; + hsa_signal_value_t value; + } hsa_signal_subtract_relaxed; + struct + { + hsa_signal_t signal; + hsa_signal_value_t value; + } hsa_signal_subtract_scacquire; + struct + { + hsa_signal_t signal; + hsa_signal_value_t value; + } hsa_signal_subtract_screlease; + struct + { + hsa_signal_t signal; + hsa_signal_value_t value; + } hsa_signal_subtract_scacq_screl; + struct + { + hsa_signal_t signal; + hsa_signal_value_t expected; + hsa_signal_value_t value; + } hsa_signal_cas_relaxed; + struct + { + hsa_signal_t signal; + hsa_signal_value_t expected; + hsa_signal_value_t value; + } hsa_signal_cas_scacquire; + struct + { + hsa_signal_t signal; + hsa_signal_value_t expected; + hsa_signal_value_t value; + } hsa_signal_cas_screlease; + struct + { + hsa_signal_t signal; + hsa_signal_value_t expected; + hsa_signal_value_t value; + } hsa_signal_cas_scacq_screl; + struct + { + const char* name; + hsa_isa_t* isa; + } hsa_isa_from_name; + struct + { + hsa_isa_t isa; + hsa_isa_info_t attribute; + uint32_t index; + void* value; + } hsa_isa_get_info; + struct + { + hsa_isa_t code_object_isa; + hsa_isa_t agent_isa; + bool* result; + } hsa_isa_compatible; + struct + { + hsa_code_object_t code_object; + hsa_status_t (*alloc_callback)(size_t size, hsa_callback_data_t data, void** address); + hsa_callback_data_t callback_data; + const char* options; + void** serialized_code_object; + size_t* serialized_code_object_size; + } hsa_code_object_serialize; + struct + { + void* serialized_code_object; + size_t serialized_code_object_size; + const char* options; + hsa_code_object_t* code_object; + } hsa_code_object_deserialize; + struct + { + hsa_code_object_t code_object; + } hsa_code_object_destroy; + struct + { + hsa_code_object_t code_object; + hsa_code_object_info_t attribute; + void* value; + } hsa_code_object_get_info; + struct + { + hsa_code_object_t code_object; + const char* symbol_name; + hsa_code_symbol_t* symbol; + } hsa_code_object_get_symbol; + struct + { + hsa_code_symbol_t code_symbol; + hsa_code_symbol_info_t attribute; + void* value; + } hsa_code_symbol_get_info; + struct + { + hsa_code_object_t code_object; + hsa_status_t (*callback)(hsa_code_object_t code_object, + hsa_code_symbol_t symbol, + void* data); + void* data; + } hsa_code_object_iterate_symbols; + struct + { + hsa_profile_t profile; + hsa_executable_state_t executable_state; + const char* options; + hsa_executable_t* executable; + } hsa_executable_create; + struct + { + hsa_executable_t executable; + } hsa_executable_destroy; + struct + { + hsa_executable_t executable; + hsa_agent_t agent; + hsa_code_object_t code_object; + const char* options; + } hsa_executable_load_code_object; + struct + { + hsa_executable_t executable; + const char* options; + } hsa_executable_freeze; + struct + { + hsa_executable_t executable; + hsa_executable_info_t attribute; + void* value; + } hsa_executable_get_info; + struct + { + hsa_executable_t executable; + const char* variable_name; + void* address; + } hsa_executable_global_variable_define; + struct + { + hsa_executable_t executable; + hsa_agent_t agent; + const char* variable_name; + void* address; + } hsa_executable_agent_global_variable_define; + struct + { + hsa_executable_t executable; + hsa_agent_t agent; + const char* variable_name; + void* address; + } hsa_executable_readonly_variable_define; + struct + { + hsa_executable_t executable; + uint32_t* result; + } hsa_executable_validate; + struct + { + hsa_executable_t executable; + const char* module_name; + const char* symbol_name; + hsa_agent_t agent; + int32_t call_convention; + hsa_executable_symbol_t* symbol; + } hsa_executable_get_symbol; + struct + { + hsa_executable_symbol_t executable_symbol; + hsa_executable_symbol_info_t attribute; + void* value; + } hsa_executable_symbol_get_info; + struct + { + hsa_executable_t executable; + hsa_status_t (*callback)(hsa_executable_t exec, hsa_executable_symbol_t symbol, void* data); + void* data; + } hsa_executable_iterate_symbols; + struct + { + hsa_status_t status; + const char** status_string; + } hsa_status_string; + struct + { + uint16_t extension; + const char** name; + } hsa_extension_get_name; + struct + { + uint16_t extension; + uint16_t version_major; + uint16_t* version_minor; + bool* result; + } hsa_system_major_extension_supported; + struct + { + uint16_t extension; + uint16_t version_major; + size_t table_length; + void* table; + } hsa_system_get_major_extension_table; + struct + { + uint16_t extension; + hsa_agent_t agent; + uint16_t version_major; + uint16_t* version_minor; + bool* result; + } hsa_agent_major_extension_supported; + struct + { + hsa_cache_t cache; + hsa_cache_info_t attribute; + void* value; + } hsa_cache_get_info; + struct + { + hsa_agent_t agent; + hsa_status_t (*callback)(hsa_cache_t cache, void* data); + void* data; + } hsa_agent_iterate_caches; + struct + { + hsa_signal_t signal; + hsa_signal_value_t value; + } hsa_signal_silent_store_relaxed; + struct + { + hsa_signal_t signal; + hsa_signal_value_t value; + } hsa_signal_silent_store_screlease; + struct + { + uint32_t num_signals; + const hsa_signal_t* signals; + uint32_t num_consumers; + const hsa_agent_t* consumers; + hsa_signal_group_t* signal_group; + } hsa_signal_group_create; + struct + { + hsa_signal_group_t signal_group; + } hsa_signal_group_destroy; + struct + { + hsa_signal_group_t signal_group; + const hsa_signal_condition_t* conditions; + const hsa_signal_value_t* compare_values; + hsa_wait_state_t wait_state_hint; + hsa_signal_t* signal; + hsa_signal_value_t* value; + } hsa_signal_group_wait_any_scacquire; + struct + { + hsa_signal_group_t signal_group; + const hsa_signal_condition_t* conditions; + const hsa_signal_value_t* compare_values; + hsa_wait_state_t wait_state_hint; + hsa_signal_t* signal; + hsa_signal_value_t* value; + } hsa_signal_group_wait_any_relaxed; + struct + { + hsa_agent_t agent; + hsa_status_t (*callback)(hsa_isa_t isa, void* data); + void* data; + } hsa_agent_iterate_isas; + struct + { + hsa_isa_t isa; + hsa_isa_info_t attribute; + void* value; + } hsa_isa_get_info_alt; + struct + { + hsa_isa_t isa; + hsa_profile_t profile; + uint16_t* mask; + } hsa_isa_get_exception_policies; + struct + { + hsa_isa_t isa; + hsa_fp_type_t fp_type; + hsa_flush_mode_t flush_mode; + hsa_round_method_t* round_method; + } hsa_isa_get_round_method; + struct + { + hsa_wavefront_t wavefront; + hsa_wavefront_info_t attribute; + void* value; + } hsa_wavefront_get_info; + struct + { + hsa_isa_t isa; + hsa_status_t (*callback)(hsa_wavefront_t wavefront, void* data); + void* data; + } hsa_isa_iterate_wavefronts; + struct + { + hsa_code_object_t code_object; + const char* module_name; + const char* symbol_name; + hsa_code_symbol_t* symbol; + } hsa_code_object_get_symbol_from_name; + struct + { + hsa_file_t file; + hsa_code_object_reader_t* code_object_reader; + } hsa_code_object_reader_create_from_file; + struct + { + const void* code_object; + size_t size; + hsa_code_object_reader_t* code_object_reader; + } hsa_code_object_reader_create_from_memory; + struct + { + hsa_code_object_reader_t code_object_reader; + } hsa_code_object_reader_destroy; + struct + { + hsa_profile_t profile; + hsa_default_float_rounding_mode_t default_float_rounding_mode; + const char* options; + hsa_executable_t* executable; + } hsa_executable_create_alt; + struct + { + hsa_executable_t executable; + hsa_code_object_reader_t code_object_reader; + const char* options; + hsa_loaded_code_object_t* loaded_code_object; + } hsa_executable_load_program_code_object; + struct + { + hsa_executable_t executable; + hsa_agent_t agent; + hsa_code_object_reader_t code_object_reader; + const char* options; + hsa_loaded_code_object_t* loaded_code_object; + } hsa_executable_load_agent_code_object; + struct + { + hsa_executable_t executable; + const char* options; + uint32_t* result; + } hsa_executable_validate_alt; + struct + { + hsa_executable_t executable; + const char* symbol_name; + const hsa_agent_t* agent; + hsa_executable_symbol_t* symbol; + } hsa_executable_get_symbol_by_name; + struct + { + hsa_executable_t executable; + hsa_agent_t agent; + hsa_status_t (*callback)(hsa_executable_t exec, + hsa_agent_t agent, + hsa_executable_symbol_t symbol, + void* data); + void* data; + } hsa_executable_iterate_agent_symbols; + struct + { + hsa_executable_t executable; + hsa_status_t (*callback)(hsa_executable_t exec, hsa_executable_symbol_t symbol, void* data); + void* data; + } hsa_executable_iterate_program_symbols; + + // block: AmdExt API + struct + { + hsa_agent_t agent; + hsa_amd_coherency_type_t* type; + } hsa_amd_coherency_get_type; + struct + { + hsa_agent_t agent; + hsa_amd_coherency_type_t type; + } hsa_amd_coherency_set_type; + struct + { + hsa_queue_t* queue; + int enable; + } hsa_amd_profiling_set_profiler_enabled; + struct + { + bool enable; + } hsa_amd_profiling_async_copy_enable; + struct + { + hsa_agent_t agent; + hsa_signal_t signal; + hsa_amd_profiling_dispatch_time_t* time; + } hsa_amd_profiling_get_dispatch_time; + struct + { + hsa_signal_t signal; + hsa_amd_profiling_async_copy_time_t* time; + } hsa_amd_profiling_get_async_copy_time; + struct + { + hsa_agent_t agent; + uint64_t agent_tick; + uint64_t* system_tick; + } hsa_amd_profiling_convert_tick_to_system_domain; + struct + { + hsa_signal_t signal; + hsa_signal_condition_t cond; + hsa_signal_value_t value; + hsa_amd_signal_handler handler; + void* arg; + } hsa_amd_signal_async_handler; + struct + { + void (*callback)(void* arg); + void* arg; + } hsa_amd_async_function; + struct + { + uint32_t signal_count; + hsa_signal_t* signals; + hsa_signal_condition_t* conds; + hsa_signal_value_t* values; + uint64_t timeout_hint; + hsa_wait_state_t wait_hint; + hsa_signal_value_t* satisfying_value; + } hsa_amd_signal_wait_any; + struct + { + const hsa_queue_t* queue; + uint32_t num_cu_mask_count; + const uint32_t* cu_mask; + } hsa_amd_queue_cu_set_mask; + struct + { + hsa_amd_memory_pool_t memory_pool; + hsa_amd_memory_pool_info_t attribute; + void* value; + } hsa_amd_memory_pool_get_info; + struct + { + hsa_agent_t agent; + hsa_status_t (*callback)(hsa_amd_memory_pool_t memory_pool, void* data); + void* data; + } hsa_amd_agent_iterate_memory_pools; + struct + { + hsa_amd_memory_pool_t memory_pool; + size_t size; + uint32_t flags; + void** ptr; + } hsa_amd_memory_pool_allocate; + struct + { + void* ptr; + } hsa_amd_memory_pool_free; + struct + { + void* dst; + hsa_agent_t dst_agent; + const void* src; + hsa_agent_t src_agent; + size_t size; + uint32_t num_dep_signals; + const hsa_signal_t* dep_signals; + hsa_signal_t completion_signal; + } hsa_amd_memory_async_copy; + struct + { + void* dst; + hsa_agent_t dst_agent; + const void* src; + hsa_agent_t src_agent; + size_t size; + uint32_t num_dep_signals; + const hsa_signal_t* dep_signals; + hsa_signal_t completion_signal; + hsa_amd_sdma_engine_id_t engine_id; + bool force_copy_on_sdma; + } hsa_amd_memory_async_copy_on_engine; + struct + { + hsa_agent_t dst_agent; + hsa_agent_t src_agent; + uint32_t* engine_ids_mask; + } hsa_amd_memory_copy_engine_status; + struct + { + hsa_agent_t agent; + hsa_amd_memory_pool_t memory_pool; + hsa_amd_agent_memory_pool_info_t attribute; + void* value; + } hsa_amd_agent_memory_pool_get_info; + struct + { + uint32_t num_agents; + const hsa_agent_t* agents; + const uint32_t* flags; + const void* ptr; + } hsa_amd_agents_allow_access; + struct + { + hsa_amd_memory_pool_t src_memory_pool; + hsa_amd_memory_pool_t dst_memory_pool; + bool* result; + } hsa_amd_memory_pool_can_migrate; + struct + { + const void* ptr; + hsa_amd_memory_pool_t memory_pool; + uint32_t flags; + } hsa_amd_memory_migrate; + struct + { + void* host_ptr; + size_t size; + hsa_agent_t* agents; + int num_agent; + void** agent_ptr; + } hsa_amd_memory_lock; + struct + { + void* host_ptr; + } hsa_amd_memory_unlock; + struct + { + void* ptr; + uint32_t value; + size_t count; + } hsa_amd_memory_fill; + struct + { + uint32_t num_agents; + hsa_agent_t* agents; + int interop_handle; + uint32_t flags; + size_t* size; + void** ptr; + size_t* metadata_size; + const void** metadata; + } hsa_amd_interop_map_buffer; + struct + { + void* ptr; + } hsa_amd_interop_unmap_buffer; + struct + { + hsa_agent_t agent; + const hsa_ext_image_descriptor_t* image_descriptor; + const hsa_amd_image_descriptor_t* image_layout; + const void* image_data; + hsa_access_permission_t access_permission; + hsa_ext_image_t* image; + } hsa_amd_image_create; + struct + { + const void* ptr; + hsa_amd_pointer_info_t* info; + void* (*alloc)(size_t); + uint32_t* num_agents_accessible; + hsa_agent_t** accessible; + } hsa_amd_pointer_info; + struct + { + const void* ptr; + void* userdata; + } hsa_amd_pointer_info_set_userdata; + struct + { + void* ptr; + size_t len; + hsa_amd_ipc_memory_t* handle; + } hsa_amd_ipc_memory_create; + struct + { + const hsa_amd_ipc_memory_t* handle; + size_t len; + uint32_t num_agents; + const hsa_agent_t* mapping_agents; + void** mapped_ptr; + } hsa_amd_ipc_memory_attach; + struct + { + void* mapped_ptr; + } hsa_amd_ipc_memory_detach; + struct + { + hsa_signal_value_t initial_value; + uint32_t num_consumers; + const hsa_agent_t* consumers; + uint64_t attributes; + hsa_signal_t* signal; + } hsa_amd_signal_create; + struct + { + hsa_signal_t signal; + hsa_amd_ipc_signal_t* handle; + } hsa_amd_ipc_signal_create; + struct + { + const hsa_amd_ipc_signal_t* handle; + hsa_signal_t* signal; + } hsa_amd_ipc_signal_attach; + struct + { + hsa_amd_system_event_callback_t callback; + void* data; + } hsa_amd_register_system_event_handler; + struct + { + hsa_agent_t agent_handle; + uint32_t size; + hsa_queue_type32_t type; + void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data); + void* data; + uint32_t private_segment_size; + uint32_t group_segment_size; + hsa_queue_t** queue; + } hsa_amd_queue_intercept_create; + struct + { + hsa_queue_t* queue; + hsa_amd_queue_intercept_handler callback; + void* user_data; + } hsa_amd_queue_intercept_register; + struct + { + hsa_queue_t* queue; + hsa_amd_queue_priority_t priority; + } hsa_amd_queue_set_priority; + struct + { + const hsa_pitched_ptr_t* dst; + const hsa_dim3_t* dst_offset; + const hsa_pitched_ptr_t* src; + const hsa_dim3_t* src_offset; + const hsa_dim3_t* range; + hsa_dim3_t range__val; + hsa_agent_t copy_agent; + hsa_amd_copy_direction_t dir; + uint32_t num_dep_signals; + const hsa_signal_t* dep_signals; + hsa_signal_t completion_signal; + } hsa_amd_memory_async_copy_rect; + struct + { + hsa_amd_runtime_queue_notifier callback; + void* user_data; + } hsa_amd_runtime_queue_create_register; + struct + { + void* host_ptr; + size_t size; + hsa_agent_t* agents; + int num_agent; + hsa_amd_memory_pool_t pool; + uint32_t flags; + void** agent_ptr; + } hsa_amd_memory_lock_to_pool; + struct + { + void* ptr; + hsa_amd_deallocation_callback_t callback; + void* user_data; + } hsa_amd_register_deallocation_callback; + struct + { + void* ptr; + hsa_amd_deallocation_callback_t callback; + } hsa_amd_deregister_deallocation_callback; + struct + { + hsa_signal_t signal; + volatile hsa_signal_value_t** value_ptr; + } hsa_amd_signal_value_pointer; + struct + { + void* ptr; + size_t size; + hsa_amd_svm_attribute_pair_t* attribute_list; + size_t attribute_count; + } hsa_amd_svm_attributes_set; + struct + { + void* ptr; + size_t size; + hsa_amd_svm_attribute_pair_t* attribute_list; + size_t attribute_count; + } hsa_amd_svm_attributes_get; + struct + { + void* ptr; + size_t size; + hsa_agent_t agent; + uint32_t num_dep_signals; + const hsa_signal_t* dep_signals; + hsa_signal_t completion_signal; + } hsa_amd_svm_prefetch_async; + struct + { + hsa_agent_t preferred_agent; + } hsa_amd_spm_acquire; + struct + { + hsa_agent_t preferred_agent; + } hsa_amd_spm_release; + struct + { + hsa_agent_t preferred_agent; + size_t size_in_bytes; + uint32_t* timeout; + uint32_t* size_copied; + void* dest; + bool* is_data_loss; + } hsa_amd_spm_set_dest_buffer; + struct + { + const hsa_queue_t* queue; + uint32_t num_cu_mask_count; + uint32_t* cu_mask; + } hsa_amd_queue_cu_get_mask; + struct + { + const void* ptr; + size_t size; + int* dmabuf; + uint64_t* offset; + } hsa_amd_portable_export_dmabuf; + struct + { + int dmabuf; + } hsa_amd_portable_close_dmabuf; + + // block: ImageExt API + struct + { + hsa_agent_t agent; + hsa_ext_image_geometry_t geometry; + const hsa_ext_image_format_t* image_format; + uint32_t* capability_mask; + } hsa_ext_image_get_capability; + struct + { + hsa_agent_t agent; + const hsa_ext_image_descriptor_t* image_descriptor; + hsa_access_permission_t access_permission; + hsa_ext_image_data_info_t* image_data_info; + } hsa_ext_image_data_get_info; + struct + { + hsa_agent_t agent; + const hsa_ext_image_descriptor_t* image_descriptor; + const void* image_data; + hsa_access_permission_t access_permission; + hsa_ext_image_t* image; + } hsa_ext_image_create; + struct + { + hsa_agent_t agent; + const void* src_memory; + size_t src_row_pitch; + size_t src_slice_pitch; + hsa_ext_image_t dst_image; + const hsa_ext_image_region_t* image_region; + } hsa_ext_image_import; + struct + { + hsa_agent_t agent; + hsa_ext_image_t src_image; + void* dst_memory; + size_t dst_row_pitch; + size_t dst_slice_pitch; + const hsa_ext_image_region_t* image_region; + } hsa_ext_image_export; + struct + { + hsa_agent_t agent; + hsa_ext_image_t src_image; + const hsa_dim3_t* src_offset; + hsa_ext_image_t dst_image; + const hsa_dim3_t* dst_offset; + const hsa_dim3_t* range; + } hsa_ext_image_copy; + struct + { + hsa_agent_t agent; + hsa_ext_image_t image; + const void* data; + const hsa_ext_image_region_t* image_region; + } hsa_ext_image_clear; + struct + { + hsa_agent_t agent; + hsa_ext_image_t image; + } hsa_ext_image_destroy; + struct + { + hsa_agent_t agent; + const hsa_ext_sampler_descriptor_t* sampler_descriptor; + hsa_ext_sampler_t* sampler; + } hsa_ext_sampler_create; + struct + { + hsa_agent_t agent; + hsa_ext_sampler_t sampler; + } hsa_ext_sampler_destroy; + struct + { + hsa_agent_t agent; + hsa_ext_image_geometry_t geometry; + const hsa_ext_image_format_t* image_format; + hsa_ext_image_data_layout_t image_data_layout; + uint32_t* capability_mask; + } hsa_ext_image_get_capability_with_layout; + struct + { + hsa_agent_t agent; + const hsa_ext_image_descriptor_t* image_descriptor; + hsa_access_permission_t access_permission; + hsa_ext_image_data_layout_t image_data_layout; + size_t image_data_row_pitch; + size_t image_data_slice_pitch; + hsa_ext_image_data_info_t* image_data_info; + } hsa_ext_image_data_get_info_with_layout; + struct + { + hsa_agent_t agent; + const hsa_ext_image_descriptor_t* image_descriptor; + const void* image_data; + hsa_access_permission_t access_permission; + hsa_ext_image_data_layout_t image_data_layout; + size_t image_data_row_pitch; + size_t image_data_slice_pitch; + hsa_ext_image_t* image; + } hsa_ext_image_create_with_layout; +} rocprofiler_hsa_api_args_t; diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler/hsa/api_id.h b/projects/rocprofiler-sdk/source/include/rocprofiler/hsa/api_id.h new file mode 100644 index 0000000000..c833964fa6 --- /dev/null +++ b/projects/rocprofiler-sdk/source/include/rocprofiler/hsa/api_id.h @@ -0,0 +1,227 @@ +// Copyright (c) 2018-2023 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +// NOLINTNEXTLINE(performance-enum-size) +typedef enum +{ + ROCPROFILER_HSA_API_ID_NONE = -1, + // block: CoreApi API + ROCPROFILER_HSA_API_ID_hsa_init = 0, + ROCPROFILER_HSA_API_ID_hsa_shut_down, + ROCPROFILER_HSA_API_ID_hsa_system_get_info, + ROCPROFILER_HSA_API_ID_hsa_system_extension_supported, + ROCPROFILER_HSA_API_ID_hsa_system_get_extension_table, + ROCPROFILER_HSA_API_ID_hsa_iterate_agents, + ROCPROFILER_HSA_API_ID_hsa_agent_get_info, + ROCPROFILER_HSA_API_ID_hsa_queue_create, + ROCPROFILER_HSA_API_ID_hsa_soft_queue_create, + ROCPROFILER_HSA_API_ID_hsa_queue_destroy, + ROCPROFILER_HSA_API_ID_hsa_queue_inactivate, + ROCPROFILER_HSA_API_ID_hsa_queue_load_read_index_scacquire, + ROCPROFILER_HSA_API_ID_hsa_queue_load_read_index_relaxed, + ROCPROFILER_HSA_API_ID_hsa_queue_load_write_index_scacquire, + ROCPROFILER_HSA_API_ID_hsa_queue_load_write_index_relaxed, + ROCPROFILER_HSA_API_ID_hsa_queue_store_write_index_relaxed, + ROCPROFILER_HSA_API_ID_hsa_queue_store_write_index_screlease, + ROCPROFILER_HSA_API_ID_hsa_queue_cas_write_index_scacq_screl, + ROCPROFILER_HSA_API_ID_hsa_queue_cas_write_index_scacquire, + ROCPROFILER_HSA_API_ID_hsa_queue_cas_write_index_relaxed, + ROCPROFILER_HSA_API_ID_hsa_queue_cas_write_index_screlease, + ROCPROFILER_HSA_API_ID_hsa_queue_add_write_index_scacq_screl, + ROCPROFILER_HSA_API_ID_hsa_queue_add_write_index_scacquire, + ROCPROFILER_HSA_API_ID_hsa_queue_add_write_index_relaxed, + ROCPROFILER_HSA_API_ID_hsa_queue_add_write_index_screlease, + ROCPROFILER_HSA_API_ID_hsa_queue_store_read_index_relaxed, + ROCPROFILER_HSA_API_ID_hsa_queue_store_read_index_screlease, + ROCPROFILER_HSA_API_ID_hsa_agent_iterate_regions, + ROCPROFILER_HSA_API_ID_hsa_region_get_info, + ROCPROFILER_HSA_API_ID_hsa_agent_get_exception_policies, + ROCPROFILER_HSA_API_ID_hsa_agent_extension_supported, + ROCPROFILER_HSA_API_ID_hsa_memory_register, + ROCPROFILER_HSA_API_ID_hsa_memory_deregister, + ROCPROFILER_HSA_API_ID_hsa_memory_allocate, + ROCPROFILER_HSA_API_ID_hsa_memory_free, + ROCPROFILER_HSA_API_ID_hsa_memory_copy, + ROCPROFILER_HSA_API_ID_hsa_memory_assign_agent, + ROCPROFILER_HSA_API_ID_hsa_signal_create, + ROCPROFILER_HSA_API_ID_hsa_signal_destroy, + ROCPROFILER_HSA_API_ID_hsa_signal_load_relaxed, + ROCPROFILER_HSA_API_ID_hsa_signal_load_scacquire, + ROCPROFILER_HSA_API_ID_hsa_signal_store_relaxed, + ROCPROFILER_HSA_API_ID_hsa_signal_store_screlease, + ROCPROFILER_HSA_API_ID_hsa_signal_wait_relaxed, + ROCPROFILER_HSA_API_ID_hsa_signal_wait_scacquire, + ROCPROFILER_HSA_API_ID_hsa_signal_and_relaxed, + ROCPROFILER_HSA_API_ID_hsa_signal_and_scacquire, + ROCPROFILER_HSA_API_ID_hsa_signal_and_screlease, + ROCPROFILER_HSA_API_ID_hsa_signal_and_scacq_screl, + ROCPROFILER_HSA_API_ID_hsa_signal_or_relaxed, + ROCPROFILER_HSA_API_ID_hsa_signal_or_scacquire, + ROCPROFILER_HSA_API_ID_hsa_signal_or_screlease, + ROCPROFILER_HSA_API_ID_hsa_signal_or_scacq_screl, + ROCPROFILER_HSA_API_ID_hsa_signal_xor_relaxed, + ROCPROFILER_HSA_API_ID_hsa_signal_xor_scacquire, + ROCPROFILER_HSA_API_ID_hsa_signal_xor_screlease, + ROCPROFILER_HSA_API_ID_hsa_signal_xor_scacq_screl, + ROCPROFILER_HSA_API_ID_hsa_signal_exchange_relaxed, + ROCPROFILER_HSA_API_ID_hsa_signal_exchange_scacquire, + ROCPROFILER_HSA_API_ID_hsa_signal_exchange_screlease, + ROCPROFILER_HSA_API_ID_hsa_signal_exchange_scacq_screl, + ROCPROFILER_HSA_API_ID_hsa_signal_add_relaxed, + ROCPROFILER_HSA_API_ID_hsa_signal_add_scacquire, + ROCPROFILER_HSA_API_ID_hsa_signal_add_screlease, + ROCPROFILER_HSA_API_ID_hsa_signal_add_scacq_screl, + ROCPROFILER_HSA_API_ID_hsa_signal_subtract_relaxed, + ROCPROFILER_HSA_API_ID_hsa_signal_subtract_scacquire, + ROCPROFILER_HSA_API_ID_hsa_signal_subtract_screlease, + ROCPROFILER_HSA_API_ID_hsa_signal_subtract_scacq_screl, + ROCPROFILER_HSA_API_ID_hsa_signal_cas_relaxed, + ROCPROFILER_HSA_API_ID_hsa_signal_cas_scacquire, + ROCPROFILER_HSA_API_ID_hsa_signal_cas_screlease, + ROCPROFILER_HSA_API_ID_hsa_signal_cas_scacq_screl, + ROCPROFILER_HSA_API_ID_hsa_isa_from_name, + ROCPROFILER_HSA_API_ID_hsa_isa_get_info, + ROCPROFILER_HSA_API_ID_hsa_isa_compatible, + ROCPROFILER_HSA_API_ID_hsa_code_object_serialize, + ROCPROFILER_HSA_API_ID_hsa_code_object_deserialize, + ROCPROFILER_HSA_API_ID_hsa_code_object_destroy, + ROCPROFILER_HSA_API_ID_hsa_code_object_get_info, + ROCPROFILER_HSA_API_ID_hsa_code_object_get_symbol, + ROCPROFILER_HSA_API_ID_hsa_code_symbol_get_info, + ROCPROFILER_HSA_API_ID_hsa_code_object_iterate_symbols, + ROCPROFILER_HSA_API_ID_hsa_executable_create, + ROCPROFILER_HSA_API_ID_hsa_executable_destroy, + ROCPROFILER_HSA_API_ID_hsa_executable_load_code_object, + ROCPROFILER_HSA_API_ID_hsa_executable_freeze, + ROCPROFILER_HSA_API_ID_hsa_executable_get_info, + ROCPROFILER_HSA_API_ID_hsa_executable_global_variable_define, + ROCPROFILER_HSA_API_ID_hsa_executable_agent_global_variable_define, + ROCPROFILER_HSA_API_ID_hsa_executable_readonly_variable_define, + ROCPROFILER_HSA_API_ID_hsa_executable_validate, + ROCPROFILER_HSA_API_ID_hsa_executable_get_symbol, + ROCPROFILER_HSA_API_ID_hsa_executable_symbol_get_info, + ROCPROFILER_HSA_API_ID_hsa_executable_iterate_symbols, + ROCPROFILER_HSA_API_ID_hsa_status_string, + ROCPROFILER_HSA_API_ID_hsa_extension_get_name, + ROCPROFILER_HSA_API_ID_hsa_system_major_extension_supported, + ROCPROFILER_HSA_API_ID_hsa_system_get_major_extension_table, + ROCPROFILER_HSA_API_ID_hsa_agent_major_extension_supported, + ROCPROFILER_HSA_API_ID_hsa_cache_get_info, + ROCPROFILER_HSA_API_ID_hsa_agent_iterate_caches, + ROCPROFILER_HSA_API_ID_hsa_signal_silent_store_relaxed, + ROCPROFILER_HSA_API_ID_hsa_signal_silent_store_screlease, + ROCPROFILER_HSA_API_ID_hsa_signal_group_create, + ROCPROFILER_HSA_API_ID_hsa_signal_group_destroy, + ROCPROFILER_HSA_API_ID_hsa_signal_group_wait_any_scacquire, + ROCPROFILER_HSA_API_ID_hsa_signal_group_wait_any_relaxed, + ROCPROFILER_HSA_API_ID_hsa_agent_iterate_isas, + ROCPROFILER_HSA_API_ID_hsa_isa_get_info_alt, + ROCPROFILER_HSA_API_ID_hsa_isa_get_exception_policies, + ROCPROFILER_HSA_API_ID_hsa_isa_get_round_method, + ROCPROFILER_HSA_API_ID_hsa_wavefront_get_info, + ROCPROFILER_HSA_API_ID_hsa_isa_iterate_wavefronts, + ROCPROFILER_HSA_API_ID_hsa_code_object_get_symbol_from_name, + ROCPROFILER_HSA_API_ID_hsa_code_object_reader_create_from_file, + ROCPROFILER_HSA_API_ID_hsa_code_object_reader_create_from_memory, + ROCPROFILER_HSA_API_ID_hsa_code_object_reader_destroy, + ROCPROFILER_HSA_API_ID_hsa_executable_create_alt, + ROCPROFILER_HSA_API_ID_hsa_executable_load_program_code_object, + ROCPROFILER_HSA_API_ID_hsa_executable_load_agent_code_object, + ROCPROFILER_HSA_API_ID_hsa_executable_validate_alt, + ROCPROFILER_HSA_API_ID_hsa_executable_get_symbol_by_name, + ROCPROFILER_HSA_API_ID_hsa_executable_iterate_agent_symbols, + ROCPROFILER_HSA_API_ID_hsa_executable_iterate_program_symbols, + + // block: AmdExt API + ROCPROFILER_HSA_API_ID_hsa_amd_coherency_get_type, + ROCPROFILER_HSA_API_ID_hsa_amd_coherency_set_type, + ROCPROFILER_HSA_API_ID_hsa_amd_profiling_set_profiler_enabled, + ROCPROFILER_HSA_API_ID_hsa_amd_profiling_async_copy_enable, + ROCPROFILER_HSA_API_ID_hsa_amd_profiling_get_dispatch_time, + ROCPROFILER_HSA_API_ID_hsa_amd_profiling_get_async_copy_time, + ROCPROFILER_HSA_API_ID_hsa_amd_profiling_convert_tick_to_system_domain, + ROCPROFILER_HSA_API_ID_hsa_amd_signal_async_handler, + ROCPROFILER_HSA_API_ID_hsa_amd_async_function, + ROCPROFILER_HSA_API_ID_hsa_amd_signal_wait_any, + ROCPROFILER_HSA_API_ID_hsa_amd_queue_cu_set_mask, + ROCPROFILER_HSA_API_ID_hsa_amd_memory_pool_get_info, + ROCPROFILER_HSA_API_ID_hsa_amd_agent_iterate_memory_pools, + ROCPROFILER_HSA_API_ID_hsa_amd_memory_pool_allocate, + ROCPROFILER_HSA_API_ID_hsa_amd_memory_pool_free, + ROCPROFILER_HSA_API_ID_hsa_amd_memory_async_copy, + ROCPROFILER_HSA_API_ID_hsa_amd_memory_async_copy_on_engine, + ROCPROFILER_HSA_API_ID_hsa_amd_memory_copy_engine_status, + ROCPROFILER_HSA_API_ID_hsa_amd_agent_memory_pool_get_info, + ROCPROFILER_HSA_API_ID_hsa_amd_agents_allow_access, + ROCPROFILER_HSA_API_ID_hsa_amd_memory_pool_can_migrate, + ROCPROFILER_HSA_API_ID_hsa_amd_memory_migrate, + ROCPROFILER_HSA_API_ID_hsa_amd_memory_lock, + ROCPROFILER_HSA_API_ID_hsa_amd_memory_unlock, + ROCPROFILER_HSA_API_ID_hsa_amd_memory_fill, + ROCPROFILER_HSA_API_ID_hsa_amd_interop_map_buffer, + ROCPROFILER_HSA_API_ID_hsa_amd_interop_unmap_buffer, + ROCPROFILER_HSA_API_ID_hsa_amd_image_create, + ROCPROFILER_HSA_API_ID_hsa_amd_pointer_info, + ROCPROFILER_HSA_API_ID_hsa_amd_pointer_info_set_userdata, + ROCPROFILER_HSA_API_ID_hsa_amd_ipc_memory_create, + ROCPROFILER_HSA_API_ID_hsa_amd_ipc_memory_attach, + ROCPROFILER_HSA_API_ID_hsa_amd_ipc_memory_detach, + ROCPROFILER_HSA_API_ID_hsa_amd_signal_create, + ROCPROFILER_HSA_API_ID_hsa_amd_ipc_signal_create, + ROCPROFILER_HSA_API_ID_hsa_amd_ipc_signal_attach, + ROCPROFILER_HSA_API_ID_hsa_amd_register_system_event_handler, + ROCPROFILER_HSA_API_ID_hsa_amd_queue_intercept_create, + ROCPROFILER_HSA_API_ID_hsa_amd_queue_intercept_register, + ROCPROFILER_HSA_API_ID_hsa_amd_queue_set_priority, + ROCPROFILER_HSA_API_ID_hsa_amd_memory_async_copy_rect, + ROCPROFILER_HSA_API_ID_hsa_amd_runtime_queue_create_register, + ROCPROFILER_HSA_API_ID_hsa_amd_memory_lock_to_pool, + ROCPROFILER_HSA_API_ID_hsa_amd_register_deallocation_callback, + ROCPROFILER_HSA_API_ID_hsa_amd_deregister_deallocation_callback, + ROCPROFILER_HSA_API_ID_hsa_amd_signal_value_pointer, + ROCPROFILER_HSA_API_ID_hsa_amd_svm_attributes_set, + ROCPROFILER_HSA_API_ID_hsa_amd_svm_attributes_get, + ROCPROFILER_HSA_API_ID_hsa_amd_svm_prefetch_async, + ROCPROFILER_HSA_API_ID_hsa_amd_spm_acquire, + ROCPROFILER_HSA_API_ID_hsa_amd_spm_release, + ROCPROFILER_HSA_API_ID_hsa_amd_spm_set_dest_buffer, + ROCPROFILER_HSA_API_ID_hsa_amd_queue_cu_get_mask, + ROCPROFILER_HSA_API_ID_hsa_amd_portable_export_dmabuf, + ROCPROFILER_HSA_API_ID_hsa_amd_portable_close_dmabuf, + + // block: ImageExt API + ROCPROFILER_HSA_API_ID_hsa_ext_image_get_capability, + ROCPROFILER_HSA_API_ID_hsa_ext_image_data_get_info, + ROCPROFILER_HSA_API_ID_hsa_ext_image_create, + ROCPROFILER_HSA_API_ID_hsa_ext_image_import, + ROCPROFILER_HSA_API_ID_hsa_ext_image_export, + ROCPROFILER_HSA_API_ID_hsa_ext_image_copy, + ROCPROFILER_HSA_API_ID_hsa_ext_image_clear, + ROCPROFILER_HSA_API_ID_hsa_ext_image_destroy, + ROCPROFILER_HSA_API_ID_hsa_ext_sampler_create, + ROCPROFILER_HSA_API_ID_hsa_ext_sampler_destroy, + ROCPROFILER_HSA_API_ID_hsa_ext_image_get_capability_with_layout, + ROCPROFILER_HSA_API_ID_hsa_ext_image_data_get_info_with_layout, + ROCPROFILER_HSA_API_ID_hsa_ext_image_create_with_layout, + + ROCPROFILER_HSA_API_ID_LAST, +} rocprofiler_hsa_api_id_t; diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler/hsa/table_api_id.h b/projects/rocprofiler-sdk/source/include/rocprofiler/hsa/table_api_id.h new file mode 100644 index 0000000000..c3ce1cc1c1 --- /dev/null +++ b/projects/rocprofiler-sdk/source/include/rocprofiler/hsa/table_api_id.h @@ -0,0 +1,31 @@ +// Copyright (c) 2018-2023 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +// NOLINTNEXTLINE(performance-enum-size) +typedef enum +{ + ROCPROFILER_HSA_API_TABLE_ID_NONE = -1, + ROCPROFILER_HSA_API_TABLE_ID_CoreApi = 0, + ROCPROFILER_HSA_API_TABLE_ID_AmdExt, + ROCPROFILER_HSA_API_TABLE_ID_ImageExt, + ROCPROFILER_HSA_API_TABLE_ID_LAST, +} rocprofiler_hsa_table_api_id_t; diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler/marker.h b/projects/rocprofiler-sdk/source/include/rocprofiler/marker.h new file mode 100644 index 0000000000..432bfca574 --- /dev/null +++ b/projects/rocprofiler-sdk/source/include/rocprofiler/marker.h @@ -0,0 +1,35 @@ +// MIT License +// +// Copyright (c) 2023 ROCm Developer Tools +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include + +#include + +typedef uint32_t rocprofiler_trace_record_marker_operation_kind_t; +typedef struct rocprofiler_roctx_api_data_s rocprofiler_roctx_api_data_t; + +struct rocprofiler_roctx_api_data_s +{ + rocprofiler_roctx_api_args_t args; +}; diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler/marker/CMakeLists.txt b/projects/rocprofiler-sdk/source/include/rocprofiler/marker/CMakeLists.txt new file mode 100644 index 0000000000..457d7e7c3e --- /dev/null +++ b/projects/rocprofiler-sdk/source/include/rocprofiler/marker/CMakeLists.txt @@ -0,0 +1,9 @@ +# +# +# Installation of public HSA headers +# +# +set(ROCPROFILER_MARKER_HEADER_FILES api_args.h api_id.h) + +install(FILES ${ROCPROFILER_MARKER_HEADER_FILES} + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/rocprofiler/marker) diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler/marker/api_args.h b/projects/rocprofiler-sdk/source/include/rocprofiler/marker/api_args.h new file mode 100644 index 0000000000..e3db2ed01e --- /dev/null +++ b/projects/rocprofiler-sdk/source/include/rocprofiler/marker/api_args.h @@ -0,0 +1,51 @@ +// Copyright (c) 2018-2023 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include + +typedef uint64_t roctx_range_id_t; + +typedef union rocprofiler_roctx_api_args_u +{ + struct + { + const char* message; + } roctxMarkA; + struct + { + const char* message; + } roctxRangePushA; + struct + { + const char* message; + } roctxRangePop; + struct + { + const char* message; + roctx_range_id_t id; + } roctxRangeStartA; + struct + { + const char* message; + roctx_range_id_t id; + } roctxRangeStop; +} rocprofiler_roctx_api_args_t; diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler/marker/api_id.h b/projects/rocprofiler-sdk/source/include/rocprofiler/marker/api_id.h new file mode 100644 index 0000000000..95cc8c00ba --- /dev/null +++ b/projects/rocprofiler-sdk/source/include/rocprofiler/marker/api_id.h @@ -0,0 +1,33 @@ +// Copyright (c) 2018-2023 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +// NOLINTNEXTLINE(performance-enum-size) +typedef enum +{ + ROCPROFILER_ROCTX_API_ID_NONE = -1, + ROCPROFILER_ROCTX_API_ID_roctxMarkA = 0, + ROCPROFILER_ROCTX_API_ID_roctxRangePushA, + ROCPROFILER_ROCTX_API_ID_roctxRangePop, + ROCPROFILER_ROCTX_API_ID_roctxRangeStartA, + ROCPROFILER_ROCTX_API_ID_roctxRangeStop, + ROCPROFILER_ROCTX_API_ID_LAST, +} rocprofiler_roctx_api_id_t; diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler/rocprofiler.h b/projects/rocprofiler-sdk/source/include/rocprofiler/rocprofiler.h index 6b02e031e6..834f8e965f 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler/rocprofiler.h +++ b/projects/rocprofiler-sdk/source/include/rocprofiler/rocprofiler.h @@ -1,220 +1,39 @@ -/****************************************************************************** -Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*******************************************************************************/ - -//////////////////////////////////////////////////////////////////////////////// +// MIT License // -// ROC Profiler API +// Copyright (c) 2023 ROCm Developer Tools // -// The goal of the implementation is to provide a HW specific low-level -// performance analysis interface for profiling of GPU compute applications. -// The profiling includes HW performance counters (PMC) with complex -// performance metrics and traces. +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: // -// The library can be used by a tool library loaded by HSA runtime or by -// higher level HW independent performance analysis API like PAPI. +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. // -// The library is written on C and will be based on AQLprofile AMD specific -// HSA extension. The library implementation requires HSA API intercepting and -// a profiling queue supporting a submit callback interface. -// -// -/** \mainpage ROCProfiler API Specification - * - * \section introduction Introduction - * - * The ROCProfiler library provides GPU Applications Profiling/Tracing APIs. - * The API offers functionality for profiling GPU applications in kernel, - * application and user mode. It also supports no replay mode and provides - * the records pool support through a simple sequence of calls. This enables - * users to profile and trace in easy small steps. Our samples code provides - * good examples on how to use the API calls for both profiling and - * tracing. - * - * \section supported_amd_gpu_architectures Supported AMD GPU Architectures - * - * The following AMD GPU architectures are supported: - * - * - gfx900 (AMD Vega 10) - * - gfx906 (AMD Vega 7nm also referred to as AMD Vega 20) - * - gfx908 (AMD Instinct™ MI100 accelerator) - * - gfx90a (Aldebaran) - * - gfx940 (AMD Instinct™ MI300) - * - gfx1010 (Navi10) - * - gfx1011 (Navi12) - * - gfx1012 (Navi14) - * - gfx1030 (Sienna Cichlid) - * - gfx1031 (Navy Flounder) - * - gfx1032 (Dimgrey Cavefish) - * - gfx1100 (Navi31) - * For more information about the AMD ROCm ecosystem, please refer to: - * - * - https://docs.amd.com/ - * -* - * \section known_limitations Known Limitations and Restrictions - * - * The AMD Profiler API library implementation currently has the following - * restrictions. Future releases aim to address these restrictions. - * - * 1. The following profiling modes are not yet implemented: - * - * - ::ROCPROFILER_APPLICATION_REPLAY_MODE - * - ::ROCPROFILER_USER_REPLAY_MODE - * - * 2. While setting filters, properties can mix up and may produce - * undesirable results. - * - * \section references References - * - * 1. Advanced Micro Devices: [www.amd.com] (https://www.amd.com/) - * 2. AMD ROCm Ecosystem: [docs.amd.com] (https://docs.amd.com/) - * - * \section disclaimer Legal Disclaimer and Copyright Information - * - * AMD ROCm software is made available by Advanced Micro Devices, Inc. under - * the open source license identified in the top-level directory for the - * library in the repository on [Github.com](https://github.com/) (Portions of - * AMD ROCm software are licensed under MITx11 and UIL/NCSA. For more - * information on the license, review the \p license.txt in the top-level - * directory for the library on [Github.com](https://github.com/)). The - * additional terms and conditions below apply to your use of AMD ROCm - * technical documentation. - * - * ©2019-2023 Advanced Micro Devices, Inc. All rights reserved. - * - * The information presented in this document is for informational purposes - * only and may contain technical inaccuracies, omissions, and typographical - * errors. The information contained herein is subject to change and may be - * rendered inaccurate for many reasons, including but not limited to product - * and roadmap changes, component and motherboard version changes, new model - * and/or product releases, product differences between differing - * manufacturers, software changes, BIOS flashes, firmware upgrades, or the - * like. Any computer system has risks of security vulnerabilities that cannot - * be completely prevented or mitigated. AMD assumes no obligation to update - * or otherwise correct or revise this information. However, AMD reserves the - * right to revise this information and to make changes from time to time to - * the content hereof without obligation of AMD to notify any person of such - * revisions or changes. - * - * THIS INFORMATION IS PROVIDED "AS IS." AMD MAKES NO REPRESENTATIONS OR - * WARRANTIES WITH RESPECT TO THE CONTENTS HEREOF AND ASSUMES NO RESPONSIBILITY - * FOR ANY INACCURACIES, ERRORS, OR OMISSIONS THAT MAY APPEAR IN THIS - * INFORMATION. AMD SPECIFICALLY DISCLAIMS ANY IMPLIED WARRANTIES OF - * NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR ANY PARTICULAR PURPOSE. - * IN NO EVENT WILL AMD BE LIABLE TO ANY PERSON FOR ANY RELIANCE, DIRECT, - * INDIRECT, SPECIAL, OR OTHER CONSEQUENTIAL DAMAGES ARISING FROM THE USE OF - * ANY INFORMATION CONTAINED HEREIN, EVEN IF AMD IS EXPRESSLY ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGES. - * - * AMD, the AMD Arrow logo, AMD Instinct™, Radeon™, AMD ROCm™, and combinations - * thereof are trademarks of Advanced Micro Devices, Inc. Linux® is the - * registered trademark of Linus Torvalds in the U.S. and other countries. - * PCIe® is a registered trademark of PCI-SIG Corporation. Other product names - * used in this publication are for identification purposes only and may be - * trademarks of their respective companies. - * +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. - * This document is going to discuss the following: - * 1. @ref symbol_versions_group - * 2. @ref versioning_group - * 3. @ref status_codes_group - * 4. @ref rocprofiler_general_group - * 5. @ref timestamp_group - * 6. @ref generic_record_group - * - @ref record_agents_group - * - @ref record_queues_group - * - @ref record_kernels_group - * 7. @ref profiling_api_group - * - @ref profiling_api_counters_group - * 8. @ref tracing_api_group - * - @ref roctx_tracer_api_data_group - * - @ref hsa_tracer_api_data_group - * - @ref hip_tracer_api_data_group - * 9. @ref memory_storage_buffer_group - * 10. @ref sessions_handling_group - * - @ref session_filter_group - * - @ref session_range_group - * - @ref session_user_replay_pass_group - * 11. @ref device_profiling - * 12. @ref rocprofiler_plugins - */ -// -/** - * \file - * ROCPROFILER API interface. - */ -//////////////////////////////////////////////////////////////////////////////// - -#ifndef INC_ROCPROFILER_H_ -#define INC_ROCPROFILER_H_ - -/* Placeholder for calling convention and import/export macros */ -#if !defined(ROCPROFILER_CALL) -# define ROCPROFILER_CALL -#endif /* !defined (ROCPROFILER_CALL) */ - -#if !defined(ROCPROFILER_EXPORT_DECORATOR) -# if defined(__GNUC__) -# define ROCPROFILER_EXPORT_DECORATOR __attribute__((visibility("default"))) -# elif defined(_MSC_VER) -# define ROCPROFILER_EXPORT_DECORATOR __declspec(dllexport) -# endif /* defined (_MSC_VER) */ -#endif /* !defined (ROCPROFILER_EXPORT_DECORATOR) */ - -#if !defined(ROCPROFILER_IMPORT_DECORATOR) -# if defined(__GNUC__) -# define ROCPROFILER_IMPORT_DECORATOR -# elif defined(_MSC_VER) -# define ROCPROFILER_IMPORT_DECORATOR __declspec(dllimport) -# endif /* defined (_MSC_VER) */ -#endif /* !defined (ROCPROFILER_IMPORT_DECORATOR) */ - -#define ROCPROFILER_EXPORT ROCPROFILER_EXPORT_DECORATOR ROCPROFILER_CALL -#define ROCPROFILER_IMPORT ROCPROFILER_IMPORT_DECORATOR ROCPROFILER_CALL - -#if !defined(ROCPROFILER) -# if defined(ROCPROFILER_EXPORTS) -# define ROCPROFILER_API ROCPROFILER_EXPORT -# else /* !defined (ROCPROFILER_EXPORTS) */ -# define ROCPROFILER_API ROCPROFILER_IMPORT -# endif /* !defined (ROCPROFILER_EXPORTS) */ -#endif /* !defined (ROCPROFILER) */ +#pragma once #include #include -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - -/** \defgroup symbol_versions_group Symbol Versions +/** @defgroup SYMBOL_VERSIONING_GROUP Symbol Versions * * The names used for the shared library versioned symbols. * * Every function is annotated with one of the version macros defined in this * section. Each macro specifies a corresponding symbol version string. After - * dynamically loading the shared library with \p dlopen, the address of each - * function can be obtained using \p dlsym with the name of the function and - * its corresponding symbol version string. An error will be reported by \p + * dynamically loading the shared library with @p dlopen, the address of each + * function can be obtained using @p dlsym with the name of the function and + * its corresponding symbol version string. An error will be reported by @p * dlvsym if the installed library does not support the version for the * function specified in this version of the interface. * @@ -222,2119 +41,1291 @@ extern "C" { */ /** - * The function was introduced in version 9.0 of the interface and has the - * symbol version string of ``"ROCPROFILER_9.0"``. + * The function was introduced in version 10.0 of the interface and has the + * symbol version string of ``"ROCPROFILER_10.0"``. */ -#define ROCPROFILER_VERSION_9_0 +#define ROCPROFILER_VERSION_10_0 /** @} */ -/** \defgroup versioning_group Library Versioning +/** @defgroup VERSIONING_GROUP Library Versioning * * Version information about the interface and the associated installed * library. * - * The semantic version of the interface following rules. A client + * The semantic version of the interface following semver.org rules. A context * that uses this interface is only compatible with the installed library if * the major version numbers match and the interface minor version number is * less than or equal to the installed library minor version number. + */ + +#include "rocprofiler/defines.h" +#include "rocprofiler/hip.h" +#include "rocprofiler/hsa.h" +#include "rocprofiler/marker.h" +#include "rocprofiler/version.h" + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/** + * @fn void rocprofiler_get_version(uint32_t* major, uint32_t* minor, uint32_t* patch) + * @param [out] major The major version number is stored if non-NULL. + * @param [out] minor The minor version number is stored if non-NULL. + * @param [out] patch The patch version number is stored if non-NULL. + * @addtogroup VERSIONING_GROUP * + * @brief Query the version of the installed library. + * + * Return the version of the installed library. This can be used to check if + * it is compatible with this interface version. This function can be used + * even when the library is not initialized. + */ +void ROCPROFILER_API +rocprofiler_get_version(uint32_t* major, uint32_t* minor, uint32_t* patch) + ROCPROFILER_NONNULL(1, 2, 3); + +/** + * @defgroup STATUS_CODES Status codes * @{ */ /** - * The major version of the interface as a macro so it can be used by the - * preprocessor. - */ -#define ROCPROFILER_VERSION_MAJOR 9 - -/** - * The minor version of the interface as a macro so it can be used by the - * preprocessor. - */ -#define ROCPROFILER_VERSION_MINOR 0 - -/** - * Query the major version of the installed library. + * @brief Status codes. * - * Return the major version of the installed library. This can be used to - * check if it is compatible with this interface version. This function can be - * used even when the library is not initialized. - */ -ROCPROFILER_API uint32_t -rocprofiler_version_major(); - -/** - * Query the minor version of the installed library. - * - * Return the minor version of the installed library. This can be used to - * check if it is compatible with this interface version. This function can be - * used even when the library is not initialized. - */ -ROCPROFILER_API uint32_t -rocprofiler_version_minor(); - -/** @} */ - -// TODO(aelwazir): Fix them to use the new Error codes -/** \defgroup status_codes_group Status Codes - * - * Most operations return a status code to indicate success or error. - * - * @{ - */ - -/** - * ROCProfiler API status codes. */ typedef enum { - /** - * The function has executed successfully. - */ ROCPROFILER_STATUS_SUCCESS = 0, - /** - * A generic error has occurred. - */ - ROCPROFILER_STATUS_ERROR = -1, - /** - * ROCProfiler is already initialized. - */ - ROCPROFILER_STATUS_ERROR_ALREADY_INITIALIZED = -2, - /** - * ROCProfiler is not initialized. - */ - ROCPROFILER_STATUS_ERROR_NOT_INITIALIZED = -3, - /** - * Missing Buffer for a session. - */ - ROCPROFILER_STATUS_ERROR_SESSION_MISSING_BUFFER = -4, - /** - * Timestamps can't be collected - */ - ROCPROFILER_STATUS_ERROR_TIMESTAMP_NOT_APPLICABLE = -5, - /** - * Agent is not found with given identifier. - */ - ROCPROFILER_STATUS_ERROR_AGENT_NOT_FOUND = -6, - /** - * Agent information is missing for the given identifier - */ - ROCPROFILER_STATUS_ERROR_AGENT_INFORMATION_MISSING = -7, - /** - * Queue is not found for the given identifier. - */ - ROCPROFILER_STATUS_ERROR_QUEUE_NOT_FOUND = -8, - /** - * The requested information about the queue is not found. - */ - ROCPROFILER_STATUS_ERROR_QUEUE_INFORMATION_MISSING = -9, - /** - * Kernel is not found with given identifier. - */ - ROCPROFILER_STATUS_ERROR_KERNEL_NOT_FOUND = -10, - /** - * The requested information about the kernel is not found. - */ - ROCPROFILER_STATUS_ERROR_KERNEL_INFORMATION_MISSING = -11, - /** - * Counter is not found with the given identifier. - */ - ROCPROFILER_STATUS_ERROR_COUNTER_NOT_FOUND = -12, - /** - * The requested Counter information for the given kernel is missing. - */ - ROCPROFILER_STATUS_ERROR_COUNTER_INFORMATION_MISSING = -13, - /** - * The requested Tracing API Data for the given data identifier is missing. - */ - ROCPROFILER_STATUS_ERROR_TRACER_API_DATA_NOT_FOUND = -14, - /** - * The requested information for the tracing API Data is missing. - */ - ROCPROFILER_STATUS_ERROR_TRACER_API_DATA_INFORMATION_MISSING = -15, - /** - * The given Domain is incorrect. - */ - ROCPROFILER_STATUS_ERROR_INCORRECT_DOMAIN = -16, - /** - * The requested Session given the session identifier is not found. - */ - ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND = -17, - /** - * The requested Session Buffer given the session identifier is corrupted or - * deleted. - */ - ROCPROFILER_STATUS_ERROR_CORRUPTED_SESSION_BUFFER = -18, - /** - * The requested record given the record identifier is corrupted or deleted. - */ - ROCPROFILER_STATUS_ERROR_RECORD_CORRUPTED = -19, - /** - * Incorrect Replay mode. - */ - ROCPROFILER_STATUS_ERROR_INCORRECT_REPLAY_MODE = -20, - /** - * Missing Filter for a session. - */ - ROCPROFILER_STATUS_ERROR_SESSION_MISSING_FILTER = -21, - /** - * The size given for the buffer is not applicable. - */ - ROCPROFILER_STATUS_ERROR_INCORRECT_SIZE = -22, - /** - * Incorrect Flush interval. - */ - ROCPROFILER_STATUS_ERROR_INCORRECT_FLUSH_INTERVAL = -23, - /** - * The session filter can't accept the given data. - */ - ROCPROFILER_STATUS_ERROR_SESSION_FILTER_DATA_MISMATCH = -24, - /** - * The given filter data is corrupted. - */ - ROCPROFILER_STATUS_ERROR_FILTER_DATA_CORRUPTED = -25, - /** - * The given label is corrupted. - */ - ROCPROFILER_STATUS_ERROR_CORRUPTED_LABEL_DATA = -26, - /** - * There is no label in the labels stack to be popped. - */ - ROCPROFILER_STATUS_ERROR_RANGE_STACK_IS_EMPTY = -27, - /** - * There is no pass that started. - */ - ROCPROFILER_STATUS_ERROR_PASS_NOT_STARTED = -28, - /** - * There is already Active session, Can't activate two session at the same - * time - */ - ROCPROFILER_STATUS_ERROR_HAS_ACTIVE_SESSION = -29, - /** - * Can't terminate a non active session - */ - ROCPROFILER_STATUS_ERROR_SESSION_NOT_ACTIVE = -30, - /** - * The required filter is not found for the given session - */ - ROCPROFILER_STATUS_ERROR_FILTER_NOT_FOUND = -31, - /** - * The required buffer is not found for the given session - */ - ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND = -32, - /** - * The required Filter is not supported - */ - ROCPROFILER_STATUS_ERROR_FILTER_NOT_SUPPORTED = -33, - /** - * Invalid Arguments were given to the function - */ - ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENTS = -34, - /** - * The given operation id is not valid. - */ - ROCPROFILER_STATUS_ERROR_INVALID_OPERATION_ID = -35, - /** - * The given domain id is not valid. - */ - ROCPROFILER_STATUS_ERROR_INVALID_DOMAIN_ID = -36, - /** - * The feature requested is not implemented. - */ - ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED = -37, - /** - * External Correlation id pop called without matching push. - */ - ROCPROFILER_STATUS_ERROR_MISMATCHED_EXTERNAL_CORRELATION_ID = -38, + ROCPROFILER_STATUS_ERROR, + ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND, + ROCPROFILER_STATUS_ERROR_FILTER_NOT_FOUND, + ROCPROFILER_STATUS_ERROR_INCORRECT_DOMAIN, + ROCPROFILER_STATUS_ERROR_INVALID_DOMAIN_ID, + ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND, + ROCPROFILER_STATUS_ERROR_HAS_ACTIVE_CONTEXT, + ROCPROFILER_STATUS_ERROR_INVALID_OPERATION_ID, + ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_ACTIVE, + ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED, + ROCPROFILER_STATUS_LAST, } rocprofiler_status_t; -/** - * Query the textual description of the given error for the current thread. - * - * Returns a NULL terminated string describing the error of the given ROCProfiler - * API call by the calling thread that did not return success. - * - * \retval Return the error string. - */ -ROCPROFILER_API const char* -rocprofiler_error_str(rocprofiler_status_t status) ROCPROFILER_VERSION_9_0; - /** @} */ -/** \defgroup rocprofiler_general_group General ROCProfiler Requirements +/** + * @defgroup CONTEXT_OPERATIONS Context * @{ */ -// TODO(aelwazir): More clear description, (think about nested!!??) - /** - * Initialize the API Tools + * @brief Context ID. * - * \retval ::ROCPROFILER_STATUS_SUCCESS The function has been executed - * successfully. - * \retval ::ROCPROFILER_STATUS_ERROR_API_ALREADY_INITIALIZED If initialize - * wasn't called or finalized called twice - */ -ROCPROFILER_API rocprofiler_status_t -rocprofiler_initialize() ROCPROFILER_VERSION_9_0; - -/** - * Finalize the API Tools - * - * \retval ::ROCPROFILER_STATUS_SUCCESS The function has been executed - * successfully. - * \retval ::ROCPROFILER_STATUS_ERROR_API_NOT_INITIALIZED If initialize wasn't - * called or finalized called twice - */ -ROCPROFILER_API rocprofiler_status_t -rocprofiler_finalize() ROCPROFILER_VERSION_9_0; - -/** - * \addtogroup sessions_handling_group - * @{ - * ROCProfiler Session Modes. - */ - -/** - * Session Identifier */ typedef struct { - /** - * Session Identifier to get the session or to be used to call any API that - * needs to deal with a specific session - */ uint64_t handle; -} rocprofiler_session_id_t; +} rocprofiler_context_id_t; + +/** + * The NULL Context handle. + */ +#define ROCPROFILER_CONTEXT_NONE ROCPROFILER_HANDLE_LITERAL(rocprofiler_context_id_t, 0) + +/** + * @brief Create context. + * + * @param context_id [out] Context identifier + * @return ::rocprofiler_status_t + */ +rocprofiler_status_t ROCPROFILER_API +rocprofiler_create_context(rocprofiler_context_id_t* context_id) ROCPROFILER_NONNULL(1); + +/** + * @brief Start context. + * + * @param [in] context_id + * @return ::rocprofiler_status_t + */ +rocprofiler_status_t ROCPROFILER_API +rocprofiler_start_context(rocprofiler_context_id_t context_id); + +/** + * @brief Stop context. + * + * @param [in] context_id + * @return ::rocprofiler_status_t + */ +rocprofiler_status_t ROCPROFILER_API +rocprofiler_stop_context(rocprofiler_context_id_t context_id); /** @} */ -/** @} */ - -/** \defgroup timestamp_group Timestamp Operations - * - * For this group we are focusing on timestamps collection and timestamp - * definition - * +/** + * @defgroup RECORDS ROCProfiler Records * @{ */ -/** - * ROCProfiling Timestamp Type. - */ -typedef struct -{ - uint64_t value; -} rocprofiler_timestamp_t; - -/** - * Get the system clock timestamp. - * - * \param[out] timestamp The system clock timestamp in nano seconds. - * \retval ::ROCPROFILER_STATUS_SUCCESS The function has been executed - * successfully. - * \retval ::ROCPROFILER_STATUS_ERROR_TIMESTAMP_NOT_APPLICABLE
- * The function failed to get the timestamp using HSA Function. - */ -ROCPROFILER_API rocprofiler_status_t -rocprofiler_get_timestamp(rocprofiler_timestamp_t* timestamp) ROCPROFILER_VERSION_9_0; - -/** - * Timestamps (start & end), it will be used for kernel dispatch tracing as - * well as API Tracing - */ -typedef struct -{ - rocprofiler_timestamp_t begin; - rocprofiler_timestamp_t end; -} rocprofiler_record_header_timestamp_t; - /** @} */ -/** \defgroup generic_record_group General Records Type - * @{ - */ - /** - * A unique identifier for every record + * @brief Buffer ID. + * @addtogroup BUFFER_HANDLING */ typedef struct { - /** - * Record ID handle - */ uint64_t handle; -} rocprofiler_record_id_t; +} rocprofiler_buffer_id_t; + +/** @defgroup SERVICE_OPERATIONS Services + * @{ + */ /** - * Record kind + * @brief Agent type. */ typedef enum { - /** - * Represents records that have profiling data (ex. counter collection - * records) - */ - ROCPROFILER_PROFILER_RECORD = 0, - /** - * Represents records that have tracing data (ex. hip api tracing records) - */ - ROCPROFILER_TRACER_RECORD = 1, - /** - * Represents a ATT tracing record (Not available yet) - */ - ROCPROFILER_ATT_TRACER_RECORD = 2, - /** - * Represents a PC sampling record - */ - ROCPROFILER_PC_SAMPLING_RECORD = 3, - /** - * Represents SPM records - */ - ROCPROFILER_SPM_RECORD = 4, - /** - * Represents Counters sampler records - */ - ROCPROFILER_COUNTERS_SAMPLER_RECORD = 5 -} rocprofiler_record_kind_t; - -/** - * Generic ROCProfiler record header. - */ -typedef struct -{ - /** - * Represents the kind of the record using ::rocprofiler_record_kind_t - */ - rocprofiler_record_kind_t kind; - /** - * Represents the id of the record - */ - rocprofiler_record_id_t id; -} rocprofiler_record_header_t; - -/** \defgroup record_agents_group Agents(AMD CPU/GPU) Handling - * \ingroup generic_record_group - * @{ - */ - -/** - * Agent ID handle, which represents a unique id to the agent reported as it - * can be used to retrieve Agent information using - * ::rocprofiler_query_agent_info, Agents can be CPUs or GPUs - */ -typedef struct -{ - /** - * a unique id to represent every agent on the system, this handle should be - * unique across all nodes in multi-node system - */ - uint64_t handle; // Topology folder serial number -} rocprofiler_agent_id_t; - -/** - * Using ::rocprofiler_query_agent_info, user can determine the type of the agent - * the following struct will be the output in case of retrieving - * ::ROCPROFILER_AGENT_TYPE agent info - */ -typedef enum -{ - /** - * CPU Agent - */ - ROCPROFILER_CPU_AGENT = 0, - /** - * GPU Agent - */ - ROCPROFILER_GPU_AGENT = 1 + ROCPROFILER_AGENT_TYPE_NONE = 0, /// agent is unknown type + ROCPROFILER_AGENT_TYPE_CPU, /// agent is CPU + ROCPROFILER_AGENT_TYPE_GPU, /// agent is GPU + ROCPROFILER_AGENT_TYPE_LAST, } rocprofiler_agent_type_t; -// TODO(aelwazir): check if we need to report the family name as well!!?? OR -// return the agent itself so that they can use HSA API /** - * Types of information that can be requested about the Agents - */ -typedef enum -{ - /** - * GPU Agent Name - */ - ROCPROFILER_AGENT_NAME = 0, - /** - * GPU Agent Type - */ - ROCPROFILER_AGENT_TYPE = 1 -} rocprofiler_agent_info_kind_t; - -/** - * Query Agent Information size to allow the user to allocate the right size - * for the information data requested, the information will be collected using - * ::rocprofiler_agent_id_t to identify one type of information available in - * ::rocprofiler_agent_info_t - * - * \param[in] kind Information kind requested by the user - * \param[in] agent_id Agent ID - * \param[out] data_size Size of the information data output - * \retval ::ROCPROFILER_STATUS_SUCCESS if the information was found - * \retval ::ROCPROFILER_STATUS_ERROR_NOT_INITIALIZED if rocprofiler_initialize - * wasn't called before or if rocprofiler_finalize is called - * \retval ::ROCPROFILER_STATUS_ERROR_AGENT_NOT_FOUND
, if the agent was not found - * in the saved agents - * \retval ::ROCPROFILER_STATUS_ERROR_AGENT_INFORMATION_MISSING \n if the agent - * was found in the saved agents but the required information is missing - */ -ROCPROFILER_API rocprofiler_status_t -rocprofiler_query_agent_info_size(rocprofiler_agent_info_kind_t kind, - rocprofiler_agent_id_t agent_id, - size_t* data_size) ROCPROFILER_VERSION_9_0; - -/** - * Query Agent Information Data using an allocated data pointer by the user, - * user can get the size of the data using ::rocprofiler_query_agent_info_size, - * the user can get the data using ::rocprofiler_agent_id_t and the user need to - * identify one type of information available in ::rocprofiler_agent_info_t - * - * \param[in] kind Information kind requested by the user - * \param[in] agent_id Agent ID - * \param[out] data_size Size of the information data output - * \retval ::ROCPROFILER_STATUS_SUCCESS, if the information was found - * \retval ::ROCPROFILER_STATUS_ERROR_NOT_INITIALIZED
if rocprofiler_initialize - * wasn't called before or if rocprofiler_finalize is called - * \retval ::ROCPROFILER_STATUS_ERROR_AGENT_NOT_FOUND
if the agent was not found - * in the saved agents - * \retval ::ROCPROFILER_STATUS_ERROR_AGENT_INFORMATION_MISSING \n if the agent - * was found in the saved agents but the required information is missing - */ -ROCPROFILER_API rocprofiler_status_t -rocprofiler_query_agent_info(rocprofiler_agent_info_kind_t kind, - rocprofiler_agent_id_t descriptor, - const char** name) ROCPROFILER_VERSION_9_0; - -/** @} */ - -/** \defgroup record_queues_group Queues(AMD HSA QUEUES) Handling - * \ingroup generic_record_group - * @{ - */ - -/** - * Unique ID handle to represent an HSA Queue of type \p hsa_queue_t, this id - * can be used by the user to get queue information using - * ::rocprofiler_query_queue_info + * @brief Agent Identifier + */ +typedef struct +{ + uint64_t handle; +} rocprofiler_agent_id_t; + +typedef struct rocprofiler_pc_sampling_configuration_s rocprofiler_pc_sampling_configuration_t; + +typedef struct rocprofiler_pc_sampling_config_array_s +{ + rocprofiler_pc_sampling_configuration_t* data; + size_t size; +} rocprofiler_pc_sampling_config_array_t; + +/** + * @brief Agent. + */ +typedef struct +{ + rocprofiler_agent_id_t id; + rocprofiler_agent_type_t type; + const char* name; + rocprofiler_pc_sampling_config_array_t pc_sampling_configs; +} rocprofiler_agent_t; + +/** + * @brief Callback function type for querying the available agents + * + * @param [in] agents Array of pointers to agents + * @param [in] num_agents Number of agents in array + * @param [in] user_data Data pointer passback + * @return ::rocprofiler_status_t + */ +typedef rocprofiler_status_t (*rocprofiler_available_agents_cb_t)(rocprofiler_agent_t** agents, + size_t num_agents, + void* user_data); + +/** + * @brief Receive synchronous callback with an array of available agents at moment of invocation + * + * @param [in] callback Callback function accepting list of agents + * @param [in] agent_size Should be set to sizeof(rocprofiler_agent_t) + * @param [in] user_data Data pointer provided to callback + * @return ::rocprofiler_status_t + */ +rocprofiler_status_t ROCPROFILER_API +rocprofiler_query_available_agents(rocprofiler_available_agents_cb_t callback, + size_t agent_size, + void* user_data) ROCPROFILER_NONNULL(1); + +/** + * @brief Queue ID. */ typedef struct { - /** - * Unique Id for every queue for one agent for one system - */ uint64_t handle; } rocprofiler_queue_id_t; -// TODO(aelwazir): Check if there is anymore Queue Information needed /** - * Types of information that can be requested about the Queues + * @brief Thread ID */ -typedef enum -{ - /** - * AMD HSA Queue Size. - */ - ROCPROFILER_QUEUE_SIZE = 0 -} rocprofiler_queue_info_kind_t; +typedef uint64_t rocprofiler_thread_id_t; /** - * Query Queue Information size to allow the user to allocate the right size - * for the information data requested, the information will be collected using - * ::rocprofiler_queue_id_t by using ::rocprofiler_query_queue_info and the user - * need to identify one type of information available in - * ::rocprofiler_queue_info_t - * - * \param[in] kind Information kind requested by the user - * \param[in] agent_id Queue ID - * \param[out] data_size Size of the information data output - * \retval ::ROCPROFILER_STATUS_SUCCESS if the information was found - * \retval ::ROCPROFILER_STATUS_ERROR_NOT_INITIALIZED if rocprofiler_initialize - * wasn't called before or if rocprofiler_finalize is called - * \retval ::ROCPROFILER_STATUS_ERROR_QUEUE_NOT_FOUND \n if the queue was not found - * in the saved agents - * \retval ::ROCPROFILER_STATUS_ERROR_QUEUE_INFORMATION_MISSING \n - * if the queue was found in the saved queues but the required information is missing - */ -ROCPROFILER_API rocprofiler_status_t -rocprofiler_query_queue_info_size(rocprofiler_queue_info_kind_t kind, - rocprofiler_queue_id_t agent_id, - size_t* data_size) ROCPROFILER_VERSION_9_0; - -/** - * Query Queue Information Data using an allocated data pointer by the user, - * user can get the size of the data using ::rocprofiler_query_queue_info_size, - * the user can get the data using ::rocprofiler_queue_id_t and the user need to - * identify one type of information available in ::rocprofiler_queue_info_t - * - * \param[in] kind Information kind requested by the user - * \param[in] agent_id Queue ID - * \param[out] data_size Size of the information data output - * \retval ::ROCPROFILER_STATUS_SUCCESS, if the information was found - * \retval ::ROCPROFILER_STATUS_ERROR_NOT_INITIALIZED, if rocprofiler_initialize - * wasn't called before or if rocprofiler_finalize is called - * \retval ::ROCPROFILER_STATUS_ERROR_QUEUE_NOT_FOUND \n if the queue was not found - * in the saved agents - * \retval ::ROCPROFILER_STATUS_ERROR_QUEUE_INFORMATION_MISSING \n if the queue - * was found in the saved agents but the required information is missing - */ -ROCPROFILER_API rocprofiler_status_t -rocprofiler_query_queue_info(rocprofiler_queue_info_kind_t kind, - rocprofiler_queue_id_t descriptor, - const char** name) ROCPROFILER_VERSION_9_0; - -/** @} */ - -/** \defgroup record_kernels_group Kernels Handling - * \ingroup generic_record_group - * @{ - */ - -/** - * Kernel identifier that represent a unique id for every kernel + * @brief ROCProfiler Record Correlation ID. + * To be reviewed? */ typedef struct { - /** - * Kernel object identifier - */ uint64_t handle; -} rocprofiler_kernel_id_t; - -/** - * Kernel Information Types, can be used by ::rocprofiler_query_kernel_info - */ -typedef enum -{ - /** - * Kernel Name Information Type - */ - ROCPROFILER_KERNEL_NAME = 0 -} rocprofiler_kernel_info_kind_t; - -/** - * Query Kernel Information Data size to allow the user to allocate the right - * size for the information data requested, the information will be collected - * using - * ::rocprofiler_kernel_id_t by using ::rocprofiler_query_kernel_info and the - * user need to identify one type of information available in - * ::rocprofiler_kernel_info_t - * - * \param[in] kernel_info_type The tyoe of information needed - * \param[in] kernel_id Kernel ID - * \param[out] data_size Kernel Information Data size - * \retval ::ROCPROFILER_STATUS_SUCCESS, if the information was found - * \retval ::ROCPROFILER_STATUS_ERROR_NOT_INITIALIZED, if rocprofiler_initialize - * wasn't called before or if rocprofiler_finalize is called - * \retval ::ROCPROFILER_STATUS_ERROR_KERNEL_NOT_FOUND \n if the kernel was not - * found in the saved kernels - * \retval ::ROCPROFILER_STATUS_ERROR_KERNEL_INFORMATION_MISSING \n if the kernel - * was found in the saved counters but the required information is missing - */ -ROCPROFILER_API rocprofiler_status_t -rocprofiler_query_kernel_info_size(rocprofiler_kernel_info_kind_t kind, - rocprofiler_kernel_id_t kernel_id, - size_t* data_size) ROCPROFILER_VERSION_9_0; - -/** - * Query Kernel Information Data using an allocated data pointer by the user, - * user can get the size of the data using ::rocprofiler_query_kernel_info_size, - * the user can get the data using ::rocprofiler_kernel_id_t and the user need - * to identify one type of information available in ::rocprofiler_kernel_info_t - * - * \param[in] kind Information kind requested by the user - * \param[in] kernel_id Kernel ID - * \param[out] data Information Data - * \retval ::ROCPROFILER_STATUS_SUCCESS, if the information was found - * \retval ::ROCPROFILER_STATUS_ERROR_NOT_INITIALIZED, if rocprofiler_initialize - * wasn't called before or if rocprofiler_finalize is called - * \retval ::ROCPROFILER_STATUS_ERROR_KERNEL_NOT_FOUND \n if the kernel was not - * found in the saved kernels - * \retval ::ROCPROFILER_STATUS_ERROR_KERNEL_INFORMATION_MISSING \n if the kernel - * was found in the saved kernels but the required information is missing - */ -ROCPROFILER_API rocprofiler_status_t -rocprofiler_query_kernel_info(rocprofiler_kernel_info_kind_t kind, - rocprofiler_kernel_id_t kernel_id, - const char** data) ROCPROFILER_VERSION_9_0; - -/** @} */ - -/** - * Holds the thread id - */ -typedef struct -{ - /** - * Thread ID - */ - uint32_t value; -} rocprofiler_thread_id_t; - -/** @} */ - -/** \defgroup profiling_api_group Profiling Part Handling - * - * The profiling records are asynchronously logged to the pool and can be - * associated with the respective GPU kernels. - * Profiling API can be used to enable collecting of the records with or - * without timestamping data for the GPU Application in continuous mode or - * kernel mode. - * - * @{ - */ - -/** \defgroup profiling_api_counters_group Counter Collection Handling - * records - * \ingroup profiling_api_group - * @{ - */ - -typedef struct -{ - const char* name; - const char* description; - const char* expression; - uint32_t instances_count; - const char* block_name; - uint32_t block_counters; -} rocprofiler_counter_info_t; - -typedef int (*rocprofiler_counters_info_callback_t)(rocprofiler_counter_info_t counter, - const char* gpu_name, - uint32_t gpu_index) ROCPROFILER_VERSION_9_0; - -ROCPROFILER_API rocprofiler_status_t -rocprofiler_iterate_counters(rocprofiler_counters_info_callback_t counters_info_callback) - ROCPROFILER_VERSION_9_0; - -/** - * Counter ID to be used to query counter information using - * ::rocprofiler_query_counter_info - */ -typedef struct -{ - /** - * A unique id generated for every counter requested by the user - */ - uint64_t handle; -} rocprofiler_counter_id_t; - -/** - * Counter Information Types, can be used by ::rocprofiler_query_counter_info - */ -typedef enum -{ - /** - * Can be used to get the counter name - */ - ROCPROFILER_COUNTER_NAME = 0, - /** - * Can be used to get the block id of a counter - */ - ROCPROFILER_COUNTER_BLOCK_ID = 2, - /** - * This is the level of hierarchy from the GFX_IP where the counter value - * should be collected - */ - ROCPROFILER_COUNTER_HIERARCHY_LEVEL = 3 -} rocprofiler_counter_info_kind_t; - -/** - * Query Counter Information Data size to allow the user to allocate the right - * size for the information data requested, the information will be collected - * using - * ::rocprofiler_counter_id_t by using ::rocprofiler_query_counter_info and the - * user need to identify one type of information available in - * ::rocprofiler_counter_info_t - * - * \param[in] session_id Session id where this data was collected - * \param[in] counter_info_type The tyoe of information needed - * \param[in] counter_id Counter ID - * \param[out] data_size Counter Information Data size - * \retval ::ROCPROFILER_STATUS_SUCCESS, if the information was found - * \retval ::ROCPROFILER_STATUS_ERROR_NOT_INITIALIZED \n if rocprofiler_initialize - * wasn't called before or if rocprofiler_finalize is called - * \retval ::ROCPROFILER_STATUS_ERROR_COUNTER_NOT_FOUND \n if the counter was not - * found in the saved counters - * \retval ::ROCPROFILER_STATUS_ERROR_COUNTER_INFORMATION_MISSING \n if the counter - * was found in the saved counters but the required information is missing - */ -ROCPROFILER_API rocprofiler_status_t -rocprofiler_query_counter_info_size(rocprofiler_session_id_t session_id, - rocprofiler_counter_info_kind_t counter_info_type, - rocprofiler_counter_id_t counter_id, - size_t* data_size) ROCPROFILER_VERSION_9_0; - -/** - * Query Counter Information Data using an allocated data pointer by the user, - * user can get the size of the data using ::rocprofiler_query_counter_info_size, - * the user can get the data using ::rocprofiler_counter_id_t and the user need - * to identify one type of information available in ::rocprofiler_counter_info_t - * - * \param[in] session_id Session id where this data was collected - * \param[in] kind Information kind requested by the user - * \param[in] counter_id Counter ID - * \param[out] data Information Data - * \retval ::ROCPROFILER_STATUS_SUCCESS, if the information was found - * \retval ::ROCPROFILER_STATUS_ERROR_NOT_INITIALIZED \n if rocprofiler_initialize - * wasn't called before or if rocprofiler_finalize is called - * \retval ::ROCPROFILER_STATUS_ERROR_COUNTER_NOT_FOUND \n if the counter was not - * found in the saved counters - * \retval ::ROCPROFILER_STATUS_ERROR_COUNTER_INFORMATION_MISSING \n if the counter - * was found in the saved counters but the required information is missing - */ -ROCPROFILER_API rocprofiler_status_t -rocprofiler_query_counter_info(rocprofiler_session_id_t session_id, - rocprofiler_counter_info_kind_t kind, - rocprofiler_counter_id_t counter_id, - const char** data) ROCPROFILER_VERSION_9_0; - -typedef struct -{ - /** - * queue index value - */ - uint64_t value; -} rocprofiler_queue_index_t; - -// TODO(aelwazir): add more types to the values should we use unions??!! -/** - * Counter Value Structure - */ -typedef struct -{ - /** - * Counter value - */ - double value; -} rocprofiler_record_counter_value_t; - -/** - * Counter Instance Structure, it will represent every counter reported in the - * array of counters reported by every profiler record if counters were needed - * to be collected - */ -typedef struct -{ - /** - * Counter Instance Identifier - */ - rocprofiler_counter_id_t counter_handler; // Counter Handler - /** - * Counter Instance Value - */ - rocprofiler_record_counter_value_t value; // Counter Value -} rocprofiler_record_counter_instance_t; - -/** - * Counters Instances Count Structure, every profiling record has this - * structure included to report the number of counters collected for this - * kernel dispatch - */ -typedef struct -{ - /** - * Counters Instances Count for every record - */ - uint64_t value; -} rocprofiler_record_counters_instances_count_t; - -/** - * Kernel properties, this will represent the kernel properties - * such as its grid size, workgroup size, wave_size - */ - -typedef struct -{ - /** - * Grid Size - */ - uint64_t grid_size; - /** - * workgroup size - */ - uint64_t workgroup_size; - /** - * lds_size - */ - uint64_t lds_size; - /** - * scratch_size - */ - uint64_t scratch_size; - /** - * arch vgpr count - */ - uint64_t arch_vgpr_count; - /** - * accum vgpr count - */ - uint64_t accum_vgpr_count; - /** - * sgpr_count - */ - uint64_t sgpr_count; - /** - * wave size - */ - uint64_t wave_size; - /** - * Dispatch completion signal handle - */ - uint64_t signal_handle; - -} rocprofiler_kernel_properties_t; - -/** - * Correlation ID - */ -typedef struct -{ - uint64_t value; } rocprofiler_correlation_id_t; /** - * Profiling record, this will represent all the information reported by the - * profiler regarding kernel dispatches and their counters that were collected - * by the profiler and requested by the user, this can be used as the type of - * the flushed records that is reported to the user using - * ::rocprofiler_buffer_callback_t + * @brief ROCProfiler Timestamp. + * */ -typedef struct -{ - /** - * ROCProfiler General Record base header to identify the id and kind of every - * record - */ - rocprofiler_record_header_t header; - /** - * Kernel Identifier to be used by the user to get the kernel info using - * ::rocprofiler_query_kernel_info - */ - rocprofiler_kernel_id_t kernel_id; - /** - * Agent Identifier to be used by the user to get the Agent Information using - * ::rocprofiler_query_agent_info - */ - rocprofiler_agent_id_t gpu_id; - /** - * Queue Identifier to be used by the user to get the Queue Information using - * ::rocprofiler_query_agent_info - */ - rocprofiler_queue_id_t queue_id; - /** - * Timestamps, start and end timestamps of the record data (ex. Kernel - * Dispatches) - */ - rocprofiler_record_header_timestamp_t timestamps; - /** - * Counters, including identifiers to get counter information and Counters - * values - */ - const rocprofiler_record_counter_instance_t* counters; - /** - * The count of the counters that were collected by the profiler - */ - rocprofiler_record_counters_instances_count_t counters_count; /* Counters Count */ - /** - * kernel properties, including the grid size, work group size, - * registers count, wave size and completion signal - */ - rocprofiler_kernel_properties_t kernel_properties; - /** - * Thread id - */ - rocprofiler_thread_id_t thread_id; - /** - * Queue Index - packet index in the queue - */ - rocprofiler_queue_index_t queue_idx; - /** - * Correlation id - */ - rocprofiler_correlation_id_t correlation_id; -} rocprofiler_record_profiler_t; - -typedef struct -{ - uint32_t value; - -} rocprofiler_event_id_t; - -typedef struct -{ - uint16_t value; // Counter Value - -} rocprofiler_record_spm_counters_instances_count_t; +typedef uint64_t rocprofiler_timestamp_t; /** - * Counters, including identifiers to get counter information and Counters - * values + * @brief ROCProfiler Address. */ -typedef struct -{ - rocprofiler_record_spm_counters_instances_count_t counters_data[32]; +typedef uint64_t rocprofiler_address_t; -} rocprofiler_record_se_spm_data_t; - -/** - * SPM record, this will represent all the information reported by the - * SPM regarding counters and their timestamps this can be used as the type of - * the flushed records that is reported to the user using - * ::rocprofiler_buffer_callback_t - */ -typedef struct -{ - /** - * ROCProfiler General Record base header to identify the id and kind of every - * record - */ - rocprofiler_record_header_t header; - - /** - * Timestamps at which the counters were sampled. - */ - rocprofiler_record_header_timestamp_t timestamps; - /** - * Counter values per shader engine - */ - rocprofiler_record_se_spm_data_t shader_engine_data[4]; - -} rocprofiler_record_spm_t; - -/** - * struct to store the trace data from a shader engine. - */ -typedef struct -{ - void* buffer_ptr; - uint32_t buffer_size; -} rocprofiler_record_se_att_data_t; - -/** - * ATT tracing record structure. - * This will represent all the information reported by the - * ATT tracer such as the kernel and its thread trace data. - * This record can be flushed to the user using - * ::rocprofiler_buffer_callback_t - */ -typedef struct -{ - /** - * ROCProfiler General Record base header to identify the id and kind of every - * record - */ - rocprofiler_record_header_t header; - /** - * Kernel Identifier to be used by the user to get the kernel info using - * ::rocprofiler_query_kernel_info - */ - rocprofiler_kernel_id_t kernel_id; - /** - * Agent Identifier to be used by the user to get the Agent Information using - * ::rocprofiler_query_agent_info - */ - rocprofiler_agent_id_t gpu_id; - /** - * Queue Identifier to be used by the user to get the Queue Information using - * ::rocprofiler_query_agent_info - */ - rocprofiler_queue_id_t queue_id; - /** - * kernel properties, including the grid size, work group size, - * registers count, wave size and completion signal - */ - rocprofiler_kernel_properties_t kernel_properties; - /** - * Thread id - */ - rocprofiler_thread_id_t thread_id; - /** - * Queue Index - packet index in the queue - */ - rocprofiler_queue_index_t queue_idx; - /** - * ATT data output from each shader engine. - */ - rocprofiler_record_se_att_data_t* shader_engine_data; - /** - * The count of the shader engine ATT data - */ - uint64_t shader_engine_data_count; -} rocprofiler_record_att_tracer_t; - -/** @} */ - -/** \defgroup tracing_api_group Tracer Part Handling +/** @defgroup TRACING_SERVICES Tracing Services * @{ */ /** - * Traced API domains + * @brief Tracing Domain ID. + * + * Domains for tracing + * + * if the value is equal to zero that means all operations will be considered + * for tracing. + * */ typedef enum { - ACTIVITY_DOMAIN_NONE = -1, - /** - * HSA API domain - */ - ACTIVITY_DOMAIN_HSA_API = 0, - /** - * HSA async activity domain - */ - ACTIVITY_DOMAIN_HSA_OPS = 1, - /** - * HIP async activity domain - */ - ACTIVITY_DOMAIN_HIP_OPS = 2, - /** - * HIP API domain - */ - ACTIVITY_DOMAIN_HIP_API = 3, - /** - * KFD API domain - */ - ACTIVITY_DOMAIN_KFD_API = 4, - /** - * External ID domain - */ - ACTIVITY_DOMAIN_EXT_API = 5, - /** - * ROCTX domain - */ - ACTIVITY_DOMAIN_ROCTX = 6, - // TODO(aelwazir): Used in kernel Info, memcpy, ..etc, refer to hsa_support - // TODO(aelwazir): Move HSA Events to hsa_support - /** - * HSA events (Device Activity) - */ - ACTIVITY_DOMAIN_HSA_EVT = 7, - ACTIVITY_DOMAIN_NUMBER + ROCPROFILER_TRACER_ACTIVITY_DOMAIN_NONE = 0, + ROCPROFILER_TRACER_ACTIVITY_DOMAIN_HSA_API, + ROCPROFILER_TRACER_ACTIVITY_DOMAIN_HIP_API, + ROCPROFILER_TRACER_ACTIVITY_DOMAIN_MARKER_API, + ROCPROFILER_TRACER_ACTIVITY_DOMAIN_ROCTX = ROCPROFILER_TRACER_ACTIVITY_DOMAIN_MARKER_API, + ROCPROFILER_TRACER_ACTIVITY_DOMAIN_KFD_API, + ROCPROFILER_TRACER_ACTIVITY_DOMAIN_EXT_API, + ROCPROFILER_TRACER_ACTIVITY_DOMAIN_HSA_OPS, + ROCPROFILER_TRACER_ACTIVITY_DOMAIN_HIP_OPS, + ROCPROFILER_TRACER_ACTIVITY_DOMAIN_HSA_EVT, + ROCPROFILER_TRACER_ACTIVITY_DOMAIN_LAST } rocprofiler_tracer_activity_domain_t; /** - * Tracing Operation ID for HIP/HSA + * @brief Tracing Operation ID. + * + * Depending on the kind, operations can be determined + * + * if the value is equal to zero that means all operations will be considered + * for tracing. + * + */ +typedef uint32_t rocprofiler_trace_operation_t; + +/** @defgroup CALLBACK_TRACING_SERVICE Callback Tracing Service + * @{ */ -typedef struct -{ - uint32_t id; -} rocprofiler_tracer_operation_id_t; /** - * Correlation identifier + * @brief Service Callback Tracing Kind. + */ +typedef enum +{ + ROCPROFILER_SERVICE_CALLBACK_TRACING_NONE = 0, + ROCPROFILER_SERVICE_CALLBACK_TRACING_HSA_API = 1, + ROCPROFILER_SERVICE_CALLBACK_TRACING_HIP_API = 2, + ROCPROFILER_SERVICE_CALLBACK_TRACING_MARKER = 3, + ROCPROFILER_SERVICE_CALLBACK_TRACING_CODE_OBJECT = 4, + ROCPROFILER_SERVICE_CALLBACK_TRACING_KERNEL_DISPATCH = 5, + ROCPROFILER_SERVICE_CALLBACK_TRACING_HELPER_THREAD = 6, + // TODO: Is tracing runtime threads possible? + // ROCPROFILER_SERVICE_CALLBACK_TRACING_RUNTIME_THREAD = 7, + ROCPROFILER_SERVICE_CALLBACK_TRACING_LAST, +} rocprofiler_service_callback_tracing_kind_t; + +/** + * @defgroup HSA_API_CALLBACK_TRACING_RECORDS HSA API Callback Tracing Records + * @{ + */ + +/** + * @brief ROCProfiler HSA API Callback Data. + * + * Depending on the operation kind, the data can be casted to the corresponding + * structure. + * + */ +typedef void* rocprofiler_hsa_api_callback_api_data_t; + +/** + * @brief ROCProfiler HSA API Callback Data. */ typedef struct { + rocprofiler_correlation_id_t correlation_id; + rocprofiler_hsa_api_callback_api_data_t data; // Arguments or api_data? +} rocprofiler_hsa_api_callback_tracer_data_t; + +/** + * @brief ROCProfiler HIP API Callback Data. + * + * Depending on the operation kind, the data can be casted to the corresponding + * structure. + * + */ +typedef void* rocprofiler_hip_api_callback_api_data_t; + +/** + * @brief ROCProfiler HIP API Tracer Callback Data. + */ +typedef struct +{ + rocprofiler_correlation_id_t correlation_id; + rocprofiler_address_t host_kernel_address; + rocprofiler_hip_api_callback_api_data_t data; // Arguments or api_data? +} rocprofiler_hip_api_callback_tracer_data_t; + +/** + * @brief ROCProfiler Marker Callback Data. + * + * Depending on the operation kind, the data can be casted to the corresponding + * structure. + * + */ +typedef void* rocprofiler_marker_callback_api_data_t; + +/** + * @brief ROCProfiler Marker Tracer Callback Data. + */ +typedef struct +{ + rocprofiler_correlation_id_t correlation_id; + rocprofiler_marker_callback_api_data_t data; // Arguments or api_data? +} rocprofiler_marker_callback_tracer_data_t; + +/** + * @brief ROCProfiler Tracing Helper Thread. + * + */ +typedef enum +{ + + ROCPROFILER_TRACING_HELPER_THREAD_START = 0, + ROCPROFILER_TRACING_HELPER_THREAD_COMPLETE = 1, + ROCPROFILER_TRACING_HELPER_THREAD_LAST, +} rocprofiler_tracing_helper_thread_operation_t; + +/** + * @brief ROCProfiler Helper Thread Callback Data. + * + */ +typedef struct +{ + rocprofiler_tracing_helper_thread_operation_t id; +} rocprofiler_helper_thread_callback_tracer_data_t; + +/** + * @brief ROCProfiler Code Object Tracer Operation. + */ +typedef enum +{ + ROCPROFILER_TRACING_CODE_OBJECT_NONE = 0, + ROCPROFILER_TRACING_CODE_OBJECT_LOAD = 1, + ROCPROFILER_TRACING_CODE_OBJECT_UNLOAD = 2, + ROCPROFILER_TRACING_CODE_OBJECT_DEVICE_KERNEL_SYMBOL_REGISTER = 3, + ROCPROFILER_TRACING_CODE_OBJECT_DEVICE_KERNEL_SYMBOL_UNREGISTER = 4, + // Should we remove these as they will be part of hipRegisterFunction API + // tracing? ROCPROFILER_TRACING_CODE_OBJECT_REGISTER_HOST_KERNEL_SYMBOL = 5, + // (?) ROCPROFILER_TRACING_CODE_OBJECT_UNREGISTER_HOST_KERNEL_SYMBOL = 6, (?) + ROCPROFILER_TRACING_CODE_OBJECT_LAST, +} rocprofiler_tracing_code_object_operation_t; + +/** + * @brief ROCProfiler Code Object Load Tracer Callback Record. + */ +typedef struct +{ + uint64_t load_base; // code object load base + uint64_t load_size; // code object load size + const char* uri; // URI string (NULL terminated) + // uint32_t storage_type; // code object storage type (Need Review?) + // int storage_file; // origin file descriptor (Need Review?) + // uint64_t memory_base; // origin memory base (Need Review?) + // uint64_t memory_size; // origin memory size (Need Review?) + // uint64_t load_delta; // code object load delta (Need Review?) +} rocprofiler_callback_tracer_code_object_load_data_t; + +/** + * @brief ROCProfiler Code Object UnLoad Tracer Callback Record. + * + */ +typedef struct +{ + uint64_t load_base; // code object load base +} rocprofiler_callback_tracer_code_object_unload_data_t; + +/** + * @brief ROCProfiler Code Object Device Kernel Symbol Tracer Callback Record. + * + */ +typedef struct +{ + const char* kernel_name; // kernel name string (NULL terminated) + rocprofiler_address_t kernel_descriptor; // kernel descriptor +} rocprofiler_callback_tracer_code_object_device_kernel_symbol_data_t; + +/** + * @brief ROCProfiler Code Object Register Host Kernel Symbol Tracer Callback + * Record. + * + */ +typedef struct +{ + rocprofiler_address_t host_address; // host address + // Should this be nullptr if it is unregister? + const char* kernel_name; // kernel name string (NULL terminated) + rocprofiler_address_t kernel_descriptor; // kernel descriptor +} rocprofiler_callback_tracer_code_object_register_host_kernel_symbol_data_t; + +/** @} */ + +/** + * @brief API Tracing callback data. + * + * This can be casted to: + * ::rocprofiler_hsa_callback_data_t if the record kind is + * ROCPROFILER_SERVICE_CALLBACK_TRACING_HSA_API + * ::rocprofiler_hip_callback_data_t if the record kind is + * ROCPROFILER_SERVICE_CALLBACK_TRACING_HIP_API + * ::rocprofiler_marker_callback_data_t if the record kind is + * ROCPROFILER_SERVICE_CALLBACK_TRACING_MARKER + * + */ +typedef void* rocprofiler_tracer_callback_data_t; + +/** + * @brief API Tracing callback operation kind. + * + * Depending on the ::rocprofiler_service_callback_tracing_kind_t + * the operation kind can be determined from the following: + * ::rocprofiler_marker_trace_record_operation_t for Markers + * ::rocprofiler_hsa_trace_record_operation_t for HSA API + * ::rocprofiler_hip_trace_record_operation_t for HIP API + * ::rocprofiler_code_object_record_operation_t for Code object tracing + * + */ +typedef uint32_t rocprofiler_tracer_callback_operation_t; + +/** + * @brief API Tracing callback function. + */ +typedef void (*rocprofiler_tracer_callback_t)(rocprofiler_service_callback_tracing_kind_t kind, + rocprofiler_tracer_callback_operation_t operation, + rocprofiler_tracer_callback_data_t data, + void* callback_args); + +/** + * @brief Configure Callback Tracing Service. + * + * @param [in] context_id + * @param [in] kind + * @param [in] operations + * @param [in] operations_count + * @param [in] callback + * @param [in] callback_args + * @return ::rocprofiler_status_t + * + */ +rocprofiler_status_t ROCPROFILER_API +rocprofiler_configure_callback_tracing_service(rocprofiler_context_id_t context_id, + rocprofiler_service_callback_tracing_kind_t kind, + rocprofiler_trace_operation_t* operations, + size_t operations_count, + rocprofiler_tracer_callback_t callback, + void* callback_args); + +/** + * @brief Query Callback Trace Kind Name. + * + * @param [in] kind + * @param [out] name if nullptr, size will be returned + * @param [out] size + * @return ::rocprofiler_status_t + */ +rocprofiler_status_t ROCPROFILER_API +rocprofiler_query_callback_trace_kind_name(rocprofiler_service_callback_tracing_kind_t kind, + const char* name, + size_t* size) ROCPROFILER_NONNULL(3); + +/** + * @brief General Operation kind + * + * That can be used to represent one of the following: + * - ::rocprofiler_trace_record_hsa_operation_kind_t + * - ::rocprofiler_trace_record_hip_operation_kind_t + * - ::rocprofiler_trace_record_marker_operation_kind_t + * + */ +typedef uint32_t rocprofiler_trace_record_operation_kind_t; + +/** + * @brief Query callback kind operation name. + * + * @param [in] kind + * @param [in] api_trace_operation_id + * @param [out] name if nullptr, size will be returned + * @param [out] size + * @return ::rocprofiler_status_t + */ +rocprofiler_status_t ROCPROFILER_API +rocprofiler_query_callback_kind_operation_name( + rocprofiler_service_callback_tracing_kind_t kind, + rocprofiler_trace_record_operation_kind_t api_trace_operation, + const char* name, + size_t* size) ROCPROFILER_NONNULL(4); + +/** @} */ + +/** @defgroup BUFFER_TRACING_SERVICE Buffer Tracing Service + * @{ + */ + +/** + * @brief Service Buffer Tracing Kind. + */ +typedef enum +{ + ROCPROFILER_SERVICE_BUFFER_TRACING_NONE = 0, + ROCPROFILER_SERVICE_BUFFER_TRACING_HSA_API = 1, + ROCPROFILER_SERVICE_BUFFER_TRACING_HIP_API = 2, + ROCPROFILER_SERVICE_BUFFER_TRACING_MARKER = 3, + ROCPROFILER_SERVICE_BUFFER_TRACING_MEMORY_COPY = 4, + ROCPROFILER_SERVICE_BUFFER_TRACING_KERNEL_DISPATCH = 5, + ROCPROFILER_SERVICE_BUFFER_TRACING_PAGE_MIGRATION = 6, + ROCPROFILER_SERVICE_BUFFER_TRACING_SCRATCH_MEMORY = 7, + ROCPROFILER_SERVICE_BUFFER_TRACING_EXTERNAL_CORRELATION = 8, + // To determine if this is possible to implement? + // ROCPROFILER_SERVICE_BUFFER_TRACING_QUEUE_SCHEDULING = 9, + // Do we need to keep it in buffer tracing? + // ROCPROFILER_SERVICE_BUFFER_TRACING_CODE_OBJECT = 10, + ROCPROFILER_SERVICE_BUFFER_TRACING_LAST, +} rocprofiler_service_buffer_tracing_kind_t; + +/** + * @brief ROCProfiler Buffer Tracing Record Header. + */ +typedef struct +{ + rocprofiler_service_buffer_tracing_kind_t kind; + rocprofiler_correlation_id_t correlation_id; +} rocprofiler_buffer_tracing_record_header_t; + +/** + * @defgroup HSA_API_CALLBACK_TRACING_RECORDS HSA API Callback Tracing Records + * @{ + */ + +/** + * @brief ROCProfiler Buffer HSA API Tracer Record. + */ +typedef struct +{ + rocprofiler_buffer_tracing_record_header_t header; + rocprofiler_trace_record_hsa_operation_kind_t operation; // rocprofiler/hsa.h + rocprofiler_timestamp_t start_timestamp; + rocprofiler_timestamp_t end_timestamp; + rocprofiler_thread_id_t thread_id; +} rocprofiler_buffer_tracing_hsa_api_record_t; + +/** + * @brief ROCProfiler Buffer HIP API Tracer Record. + */ +typedef struct +{ + rocprofiler_buffer_tracing_record_header_t header; + rocprofiler_trace_record_hip_operation_kind_t operation; // rocprofiler/hip.h + rocprofiler_timestamp_t start_timestamp; + rocprofiler_timestamp_t end_timestamp; + rocprofiler_thread_id_t thread_id; +} rocprofiler_buffer_tracing_hip_api_record_t; + +/** + * @brief ROCProfiler Buffer Marker Tracer Record. + */ +typedef struct +{ + rocprofiler_buffer_tracing_record_header_t header; + rocprofiler_trace_record_marker_operation_kind_t operation; // rocprofiler/marker.h + rocprofiler_timestamp_t timestamp; + rocprofiler_thread_id_t thread_id; + uint64_t marker_id; // rocprofiler_marker_id_t + // const char* message; // (Need Review?) +} rocprofiler_buffer_tracing_marker_record_t; + +/** + * @brief Memory Copy Operation. + */ +typedef enum +{ + ROCPROFILER_TRACER_MEMORY_NONE = 0, + ROCPROFILER_TRACER_MEMORY_COPY_DEVICE_TO_HOST = 1, + ROCPROFILER_TRACER_MEMORY_HOST_TO_DEVICE = 2, + ROCPROFILER_TRACER_MEMORY_DEVICE_TO_DEVICE = 3, + ROCPROFILER_TRACER_MEMORY_LAST, +} rocprofiler_trace_memory_copy_operation_t; + +/** + * @brief ROCProfiler Buffer Memory Copy Tracer Record. + */ +typedef struct +{ + rocprofiler_buffer_tracing_record_header_t header; /** - * Correlation ID Value + * Memory copy operation that can be derived from + * ::rocprofiler_trace_record_operation_kind_t */ - uint64_t value; -} rocprofiler_tracer_activity_correlation_id_t; + uint32_t operation; + rocprofiler_timestamp_t start_timestamp; + rocprofiler_timestamp_t end_timestamp; + rocprofiler_queue_id_t queue_id; +} rocprofiler_buffer_tracing_memory_copy_record_t; /** - * Tracer API Calls Data Handler + * @brief ROCProfiler Buffer Kernel Dispatch Tracer Record. */ typedef struct { - union - { - const struct hip_api_data_s* hip; - const struct hsa_api_data_s* hsa; - const struct roctx_api_data_s* roctx; - }; -} rocprofiler_tracer_api_data_t; + rocprofiler_buffer_tracing_record_header_t header; + rocprofiler_timestamp_t start_timestamp; + rocprofiler_timestamp_t end_timestamp; + rocprofiler_queue_id_t queue_id; + const char* kernel_name; +} rocprofiler_buffer_tracing_kernel_dispatch_record_t; /** - * @brief Get Tracer API Function Name - * - * Return NULL if the name is not found for given domain and operation_id. - * - * Note: The returned string is NULL terminated. - * - * @param[in] domain - * @param[in] operation_id - * @param[out] name - * @return ::rocprofiler_status_t + * @brief ROCProfiler Buffer Page Migration Tracer Record. */ -ROCPROFILER_API rocprofiler_status_t -rocprofiler_query_tracer_operation_name(rocprofiler_tracer_activity_domain_t domain, - rocprofiler_tracer_operation_id_t operation_id, - const char** name); +typedef struct +{ + rocprofiler_buffer_tracing_record_header_t header; + rocprofiler_timestamp_t start_timestamp; + rocprofiler_timestamp_t end_timestamp; + rocprofiler_queue_id_t queue_id; + // Not Sure What is the info needed here? +} rocprofiler_buffer_tracing_page_migration_record_t; /** - * @brief Get Tracer API Operation ID - * - * @param [in] domain - * @param [in] name - * @param [out] operation_id - * @return ::rocprofiler_status_t + * @brief ROCProfiler Buffer Scratch Memory Tracer Record. */ -ROCPROFILER_API rocprofiler_status_t -rocprofiler_tracer_operation_id(rocprofiler_tracer_activity_domain_t domain, - const char* name, - rocprofiler_tracer_operation_id_t* operation_id); +typedef struct +{ + rocprofiler_buffer_tracing_record_header_t header; + rocprofiler_timestamp_t start_timestamp; + rocprofiler_timestamp_t end_timestamp; + rocprofiler_queue_id_t queue_id; + // Not Sure What is the info needed here? +} rocprofiler_buffer_tracing_scratch_memory_record_t; /** - * Tracing external ID + * @brief ROCProfiler Buffer Queue Scheduling Tracer Record. + */ +typedef struct +{ + rocprofiler_buffer_tracing_record_header_t header; + rocprofiler_timestamp_t start_timestamp; + rocprofiler_timestamp_t end_timestamp; + rocprofiler_queue_id_t queue_id; + // Not Sure What is the info needed here? +} rocprofiler_buffer_tracing_queue_scheduling_record_t; + +/** + * @brief ROCProfiler Code Object Tracer Buffer Record. + * + * We need to guarantee that these records are in the buffer before the + * corresponding Exit Phase API calls are called. + */ +// typedef struct { +// rocprofiler_buffer_tracing_record_header_t header; +// rocprofiler_tracing_code_object_kind_id_t kind; +// } rocprofiler_buffer_tracing_code_object_header_t; + +/** + * @brief ROCProfiler Code Object Load Tracer Buffer Record. + * + */ +// typedef struct { +// rocprofiler_buffer_tracing_code_object_header_t header; +// uint64_t load_base; // code object load base +// uint64_t load_size; // code object load size +// const char *uri; // URI string (NULL terminated) +// rocprofiler_timestamp_t timestamp; +// // uint32_t storage_type; // code object storage type (Need Review?) +// // int storage_file; // origin file descriptor (Need Review?) +// // uint64_t memory_base; // origin memory base (Need Review?) +// // uint64_t memory_size; // origin memory size (Need Review?) +// // uint64_t load_delta; // code object load delta (Need Review?) +// } rocprofiler_buffer_tracing_code_object_load_record_t; + +/** + * @brief ROCProfiler Code Object UnLoad Tracer Buffer Record. + * + */ +// typedef struct { +// rocprofiler_buffer_tracing_code_object_header_t header; +// uint64_t load_base; // code object load base +// rocprofiler_timestamp_t timestamp; +// } rocprofiler_buffer_tracing_code_object_unload_record_t; + +/** + * @brief ROCProfiler Code Object Kernel Symbol Tracer Buffer Record. + * + */ +// typedef struct { +// rocprofiler_buffer_tracing_code_object_header_t header; +// const char *kernel_name; // kernel name string (NULL terminated) +// uint64_t kernel_descriptor; // kernel descriptor (Need to be changed from +// // uint64_t to ::rocprofiler_address_t) +// // rocprofiler_timestamp_t timestamp; // (Need Review?) +// } rocprofiler_buffer_tracing_code_object_kernel_symbol_record_t; + +/** + * @brief ROCProfiler External Correlation ID. + * */ typedef struct { uint64_t id; -} rocprofiler_tracer_external_id_t; - -typedef enum -{ - /** - * No phase, it is an activity record or asynchronous output data - */ - ROCPROFILER_PHASE_NONE = 0, - /** - * Enter phase for API calls - */ - ROCPROFILER_PHASE_ENTER = 1, - /** - * Exit phase for API calls - */ - ROCPROFILER_PHASE_EXIT = 2 -} rocprofiler_api_tracing_phase_t; +} rocprofiler_external_correlation_id_t; /** - * Tracing record, this will represent all the information reported by the - * tracer regarding APIs and their data that were traced and collected - * by the tracer and requested by the user, this can be used as the type of - * the flushed records that is reported to the user using - * ::rocprofiler_buffer_async_callback_t + * @brief ROCProfiler Buffer External Correlation Tracer Record. */ typedef struct { - /** - * ROCProfiler General Record base header to identify the id and kind of every - * record - */ - rocprofiler_record_header_t header; - /** - * Tracing external ID, and ROCTX ID if domain is ::ACTIVITY_DOMAIN_ROCTX - */ - rocprofiler_tracer_external_id_t external_id; - /** - * Activity domain id, represents the type of the APIs that are being traced - */ - rocprofiler_tracer_activity_domain_t domain; - /** - * Tracing Operation ID for HIP/HSA - */ - rocprofiler_tracer_operation_id_t operation_id; - /** - * API Data - */ - rocprofiler_tracer_api_data_t api_data; - /** - * Activity correlation ID - */ - rocprofiler_tracer_activity_correlation_id_t correlation_id; - /** - * Timestamps - */ - rocprofiler_record_header_timestamp_t timestamps; - /** - * Agent identifier that can be used as a handler in - * ::rocprofiler_query_agent_info - */ - rocprofiler_agent_id_t agent_id; - /** - * Queue identifier that can be used as a handler in - * ::rocprofiler_query_queue_info - */ - rocprofiler_queue_id_t queue_id; - /** - * Thread id - */ - rocprofiler_thread_id_t thread_id; - /** - * API Tracing phase (Enter/Exit/None(Activity Records/Asynchronous Output Records)) - */ - rocprofiler_api_tracing_phase_t phase; - /** - * Kernel Name for HIP API calls that launches kernels or ROCTx message for ROCTx api calls - */ - const char* name; -} rocprofiler_record_tracer_t; - -/** - * Kernel dispatch correlation ID, unique across all dispatches - */ -typedef struct -{ - uint64_t value; -} rocprofiler_kernel_dispatch_id_t; - -/** - * An individual PC sample - */ -typedef struct -{ - /** - * Kernel dispatch ID. This is used by PC sampling to associate samples with - * individual dispatches and is unrelated to any user-supplied correlation ID - */ - rocprofiler_kernel_dispatch_id_t dispatch_id; - union - { - /** - * Host timestamp - */ - rocprofiler_timestamp_t timestamp; - /** - * GPU clock counter (not currently used) - */ - uint64_t cycle; - }; - /** - * Sampled program counter - */ - uint64_t pc; - /** - * Sampled shader element - */ - uint32_t se; - /** - * Sampled GPU agent - */ - rocprofiler_agent_id_t gpu_id; -} rocprofiler_pc_sample_t; - -/** - * PC sample record: contains the program counter/instruction pointer observed - * during periodic sampling of a kernel - */ -typedef struct -{ - /** - * ROCProfiler General Record base header to identify the id and kind of every - * record - */ - rocprofiler_record_header_t header; - /** - * PC sample data - */ - rocprofiler_pc_sample_t pc_sample; -} rocprofiler_record_pc_sample_t; + rocprofiler_buffer_tracing_record_header_t header; + rocprofiler_external_correlation_id_t external_correlation_id; +} rocprofiler_buffer_tracing_external_correlation_record_t; /** @} */ -/** \defgroup memory_storage_buffer_group Memory Storage Buffer - * Sessions +/** + * @brief ROCProfiler Push External Correlation ID. * - * In this group, Memory Pools and their types will be discussed. - * @{ + * @param external_correlation_id + * @return rocprofiler_status_t */ +rocprofiler_status_t ROCPROFILER_API +rocprofiler_push_external_correlation_id( + rocprofiler_external_correlation_id_t external_correlation_id); /** - * Buffer Property Options + * @brief ROCProfiler Push External Correlation ID. + * + * @param external_correlation_id + * @return rocprofiler_status_t */ -typedef enum -{ - /** - * Flush interval - */ - ROCPROFILER_BUFFER_PROPERTY_KIND_INTERVAL_FLUSH = 0, - // Periodic Flush - // Size - // Think of using the kind as an end of the array!!?? -} rocprofiler_buffer_property_kind_t; - -typedef struct -{ - rocprofiler_buffer_property_kind_t kind; - uint64_t value; -} rocprofiler_buffer_property_t; - -typedef struct -{ - uint64_t value; -} rocprofiler_buffer_id_t; - -typedef struct -{ - uint64_t value; -} rocprofiler_filter_id_t; +rocprofiler_status_t ROCPROFILER_API +rocprofiler_pop_external_correlation_id( + rocprofiler_external_correlation_id_t* external_correlation_id); /** - * Memory pool buffer callback. - * The callback that will be invoked when a memory pool buffer becomes full or - * is flushed by the user or using flush thread that was initiated using the - * flush interval set by the user ::rocprofiler_create_session. - * The user needs to read the record header to identify the record kind and - * depending on the kind they can reinterpret_cast to either - * ::rocprofiler_record_profiler_t if the kind was ::ROCPROFILER_PROFILER_RECORD or - * ::rocprofiler_record_tracer_t if the kind is ::ROCPROFILER_TRACER_RECORD + * @brief Configure Buffer Tracing Service. + * + * @param [in] context_id + * @param [in] kind + * @param [in] operations + * @param [in] operations_count + * @param [in] buffer_id + * @return ::rocprofiler_status_t * - * \param[in] begin pointer to first entry in the buffer. - * \param[in] end pointer to one past the end entry in the buffer. - * \param[in] session_id The session id associated with that record - * \param[in] buffer_id The buffer id associated with that record */ -typedef void (*rocprofiler_buffer_callback_t)(const rocprofiler_record_header_t* begin, - const rocprofiler_record_header_t* end, - rocprofiler_session_id_t session_id, - rocprofiler_buffer_id_t buffer_id); +rocprofiler_status_t ROCPROFILER_API +rocprofiler_configure_buffer_tracing_service(rocprofiler_context_id_t context_id, + rocprofiler_service_buffer_tracing_kind_t kind, + rocprofiler_trace_operation_t* operations, + size_t operations_count, + rocprofiler_buffer_id_t buffer_id); /** - * Flush specific Buffer + * @brief Query Buffer Trace Kind Name. * - * \param[in] session_id The created session id - * \param[in] buffer_id The buffer ID of the created filter group - * \retval ::ROCPROFILER_STATUS_SUCCESS The function has been executed - * successfully. - * \retval ::ROCPROFILER_STATUS_ERROR_NOT_INITIALIZED, if rocprofiler_initialize - * wasn't called before or if rocprofiler_finalize is called - * \retval ::ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND \n may return if - * the session is not found - * \retval ::ROCPROFILER_STATUS_ERROR_CORRUPTED_SESSION_BUFFER \n may return if - * the session buffer is corrupted + * @param [in] kind + * @param [out] name if nullptr, size will be returned + * @param [out] size + * @return ::rocprofiler_status_t */ -ROCPROFILER_API rocprofiler_status_t -rocprofiler_flush_data(rocprofiler_session_id_t session_id, - rocprofiler_buffer_id_t buffer_id) ROCPROFILER_VERSION_9_0; +rocprofiler_status_t ROCPROFILER_API +rocprofiler_query_buffer_trace_kind_name(rocprofiler_service_buffer_tracing_kind_t kind, + const char* name, + size_t* size) ROCPROFILER_NONNULL(3); /** - * Get a pointer to the next profiling record. - * A memory pool generates buffers that contain multiple profiling records. - * This function steps to the next profiling record. + * @brief Query buffer kind operation name. * - * \param[in] record Pointer to the current profiling record in a memory pool - * buffer. - * \param[out] next Pointer to the following profiling record in the memory - * pool buffer. - * \param[in] session_id Session ID - * \param[in] buffer_id Buffer ID - * \retval ::ROCPROFILER_STATUS_SUCCESS The function has been executed - * successfully. - * \retval ::ROCPROFILER_STATUS_ERROR_NOT_INITIALIZED, if rocprofiler_initialize - * wasn't called before or if rocprofiler_finalize is called - * \retval ::ROCPROFILER_STATUS_ERROR_RECORD_CORRUPTED \n if the function couldn't - * get the next record because of corrupted data reported by the previous - * record + * @param [in] kind + * @param [in] api_trace_operation_id + * @param [out] name if nullptr, size will be returned + * @param [out] size + * @return ::rocprofiler_status_t */ -ROCPROFILER_API rocprofiler_status_t -rocprofiler_next_record(const rocprofiler_record_header_t* record, - const rocprofiler_record_header_t** next, - rocprofiler_session_id_t session_id, - rocprofiler_buffer_id_t buffer_id) ROCPROFILER_VERSION_9_0; +rocprofiler_status_t ROCPROFILER_API +rocprofiler_query_buffer_kind_operation_name( + rocprofiler_service_buffer_tracing_kind_t kind, + rocprofiler_trace_record_operation_kind_t api_trace_operation_id, + const char* name, + size_t* size) ROCPROFILER_NONNULL(4); /** @} */ -/** \defgroup sessions_handling_group ROCProfiler Sessions +/** @} */ + +/** @defgroup PROFILE_CONFIG Profile Configurations * @{ */ /** - * Replay Profiling Modes. + * @brief Counter ID. + * */ -typedef enum +typedef struct { - /** - * No Replay to be done, Mostly for tracing tool or if the user wants to make - * sure that no replays will be done - */ - ROCPROFILER_NONE_REPLAY_MODE = -1, -} rocprofiler_replay_mode_t; + uint64_t handle; +} rocprofiler_counter_id_t; /** - * Create Session - * A ROCProfiler Session is having enough information about what needs to be - * collected or traced and it allows the user to start/stop profiling/tracing - * whenever required. - * Session will hold multiple mode, that can be added using - * ::rocprofiler_add_session_mode, it is required to add at least one session - * mode, if it is tracing or profiling and using ::rocprofiler_session_set_filter - * can set specific data that is required for the profiler or the tracer such - * as the counters for profiling or the APIs for tracing before calling - * ::rocprofiler_start_session, also - * ::rocprofiler_session_set_filter can be used to set optional filters like - * specific GPUs/Kernel Names/API Names and more. Session can be started using - * ::rocprofiler_start_session and can be stopped using - * ::rocprofiler_terminate_session + * @brief Profile Configurations * - * \param[in] replay_mode The Replay strategy that should be used if replay is - * needed - * \param[out] session_id Pointer to the created session id, the session is - * alive up till ::rocprofiler_destroy_session being called, however, the session - * id can be - * used while the session is active which can be activated using - * ::rocprofiler_start_session and deactivated using - * ::rocprofiler_terminate_session but ::rocprofiler_flush_data can use session_id - * even if it is deactivated for flushing the saved records - * \retval ::ROCPROFILER_STATUS_SUCCESS The function has been executed - * successfully. - * \retval ::ROCPROFILER_STATUS_ERROR_NOT_INITIALIZED, if rocprofiler_initialize - * wasn't called before or if rocprofiler_finalize is called */ -ROCPROFILER_API rocprofiler_status_t -rocprofiler_create_session(rocprofiler_replay_mode_t replay_mode, - rocprofiler_session_id_t* session_id) ROCPROFILER_VERSION_9_0; +typedef struct +{ + uint64_t handle; +} rocprofiler_profile_config_id_t; /** - * Destroy Session - * Destroy session created by ::rocprofiler_create_session, please refer to - * the samples for how to use. - * This marks the end of session and its own id life and none of the session - * related functions will be available after this call. + * @brief Create Profile Configuration. * - * \param[in] session_id The created session id - * \retval ::ROCPROFILER_STATUS_SUCCESS The function has been executed - * successfully - * \retval ::ROCPROFILER_STATUS_ERROR_NOT_INITIALIZED, if rocprofiler_initialize - * wasn't called before or if rocprofiler_finalize is called - * \retval ::ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND \n may return if - * the session is not found + * @param [in] agent Agent identifier + * @param [in] counters_list List of GPU counters + * @param [in] counters_count Size of counters list + * @param [out] config_id Identifier for GPU counters group + * @return ::rocprofiler_status_t */ -ROCPROFILER_API rocprofiler_status_t -rocprofiler_destroy_session(rocprofiler_session_id_t session_id) ROCPROFILER_VERSION_9_0; +rocprofiler_status_t ROCPROFILER_API +rocprofiler_create_profile_config(rocprofiler_agent_t agent, + rocprofiler_counter_id_t* counters_list, + size_t counters_count, + rocprofiler_profile_config_id_t* config_id) + ROCPROFILER_NONNULL(4); -/** \defgroup session_filter_group Session Filters Handling - * \ingroup sessions_handling_group +/** + * @brief Destroy Profile Configuration. + * + * @param [in] config_id + * @return ::rocprofiler_status_t + */ +rocprofiler_status_t ROCPROFILER_API +rocprofiler_destroy_profile_config(rocprofiler_profile_config_id_t config_id); + +/** @} */ + +/** @defgroup PROFILE_COUNTING Profile Counting * @{ */ -typedef enum -{ - /** - * Kernel Dispatch Timestamp collection. - */ - ROCPROFILER_DISPATCH_TIMESTAMPS_COLLECTION = 1, - /** - * GPU Application counter collection. - */ - ROCPROFILER_COUNTERS_COLLECTION = 2, - /** - * PC Sampling collection. (Not Yet Supported) - */ - ROCPROFILER_PC_SAMPLING_COLLECTION = 3, - /** - * ATT Tracing. (Not Yet Supported) - */ - ROCPROFILER_ATT_TRACE_COLLECTION = 4, - /** - * SPM collection. (Not Yet Supported) - */ - ROCPROFILER_SPM_COLLECTION = 5, - /** - * HIP/HSA/ROCTX/SYS Trace. - */ - ROCPROFILER_API_TRACE = 6, - /** - * Sampled Counters - */ - ROCPROFILER_COUNTERS_SAMPLER = 7 -} rocprofiler_filter_kind_t; - /** - * Data Filter Types to be used by ::rocprofiler_session_set_filter to add - * filters to a specific session + * @brief Needs non-typedef specification? */ -typedef enum -{ - /** - * Add HSA API calls that will be only traced (ex. hsa_amd_memory_async_copy) - */ - ROCPROFILER_FILTER_HSA_TRACER_API_FUNCTIONS = 1, - /** - * Add HIP API calls that will be only traced (ex. hipLaunchKernel) - */ - ROCPROFILER_FILTER_HIP_TRACER_API_FUNCTIONS = 2, - /** - * Add GPU names that will be only profiled or traced - */ - ROCPROFILER_FILTER_GPU_NAME = 3, - // TODO(aelwazir): Add more clear description on how to use? - /** - * Add Range of calls to be traced or kernels to be profiled - */ - ROCPROFILER_FILTER_RANGE = 4, - /** - * Add Kernel names that will be profiled or traced - */ - ROCPROFILER_FILTER_KERNEL_NAMES = 5, - /** - * Add Kernel correlation ids that will be profiled or traced for ATT - */ - ROCPROFILER_FILTER_DISPATCH_IDS = 6 -} rocprofiler_filter_property_kind_t; - -// TODO(aelwazir): Another way to define this as needed -typedef const char* rocprofiler_hip_function_name_t; -typedef const char* rocprofiler_hsa_function_name_t; +typedef uint32_t rocprofiler_counter_instance_id_t; /** - * ATT parameters to be used by for collection - */ -typedef enum -{ - ROCPROFILER_ATT_COMPUTE_UNIT_TARGET = 0, - ROCPROFILER_ATT_VM_ID_MASK = 1, - ROCPROFILER_ATT_MASK = 2, - ROCPROFILER_ATT_TOKEN_MASK = 3, - ROCPROFILER_ATT_TOKEN_MASK2 = 4, - ROCPROFILER_ATT_SE_MASK = 5, - ROCPROFILER_ATT_SAMPLE_RATE = 6, - ROCPROFILER_ATT_BUFFER_SIZE = 7, //! ATT collection max data size. - ROCPROFILER_ATT_PERF_MASK = 240, - ROCPROFILER_ATT_PERF_CTRL = 241, - ROCPROFILER_ATT_PERFCOUNTER = 242, - ROCPROFILER_ATT_PERFCOUNTER_NAME = 243, - ROCPROFILER_ATT_MAXVALUE -} rocprofiler_att_parameter_name_t; - -// att tracing parameters object -typedef struct -{ - rocprofiler_att_parameter_name_t parameter_name; - union - { - uint32_t value; - const char* counter_name; - }; -} rocprofiler_att_parameter_t; - -/** - * Filter Data Type - * filter data will be used to report required and optional filters for the - * sessions using ::rocprofiler_session_add_filters + * @brief ROCProfiler Profile Counting Counter per instance. */ typedef struct { - /** - * Filter Property kind - */ - rocprofiler_filter_property_kind_t kind; - // TODO(aelwazir): get HIP or HSA or counters as enums - /** - * Array of data required for the filter type chosen - */ - union - { - const char** name_regex; - rocprofiler_hip_function_name_t* hip_functions_names; - rocprofiler_hsa_function_name_t* hsa_functions_names; - uint32_t range[2]; - uint64_t* dispatch_ids; - }; - /** - * Data array count - */ - uint64_t data_count; -} rocprofiler_filter_property_t; + rocprofiler_counter_id_t counter_id; + rocprofiler_counter_instance_id_t instance_id; + double counter_value; +} rocprofiler_record_counter_t; +/** @defgroup DISPATCH_PROFILE_COUNTING_SERVICE Dispatch Profile Counting + * Service + * @{ + */ + +/** + * @brief ROCProfiler Profile Counting Data. + * + */ typedef struct { + rocprofiler_timestamp_t start_timestamp; + rocprofiler_timestamp_t end_timestamp; /** - * Counters to profile + * Counters, including identifiers to get counter information and Counters + * values + * + * Should it be a record per counter? */ - const char** counters_names; - /** - * Counters count - */ - int counters_count; - /** - * Sampling rate - */ - uint32_t sampling_rate; - /** - * Preferred agents to collect SPM on - */ - rocprofiler_agent_id_t* gpu_agent_id; + rocprofiler_record_counter_t* counters; + uint64_t counters_count; + rocprofiler_correlation_id_t correlation_id; +} rocprofiler_dispatch_profile_counting_record_t; -} rocprofiler_spm_parameter_t; +/** + * @brief Kernel Dispatch Callback + * + * @param [out] queue_id + * @param [out] agent_id + * @param [out] correlation_id + * @param [out] dispatch_packet + * @param [out] callback_data_args + * @param [in] config + */ +typedef void (*rocprofiler_profile_counting_dispatch_callback_t)( + rocprofiler_queue_id_t queue_id, + rocprofiler_agent_t agent_id, + rocprofiler_correlation_id_t correlation_id, + /** + * @brief Kernel Dispatch Packet + * + * It can be used to get the kernel descriptor and then using code_object + * tracing, we can get the kernel name. + * + * dispatch_packet->reserved2 is the correlation_id used to correlate the + * dispatch packet with the corresponding API call. + * + */ + const hsa_kernel_dispatch_packet_t* dispatch_packet, + void* callback_data_args, + rocprofiler_profile_config_id_t* config); -typedef enum -{ - ROCPROFILER_COUNTERS_SAMPLER_PCIE_COUNTERS = 0, - ROCPROFILER_COUNTERS_SAMPLER_XGMI_COUNTERS = 1 -} rocprofiler_counters_sampler_counter_type_t; +/** + * @brief Configure Dispatch Profile Counting Service. + * + * @param [in] context_id + * @param [in] buffer_id + * @param [in] callback + * @param [in] callback_data_args + * @return ::rocprofiler_status_t + */ +rocprofiler_status_t ROCPROFILER_API +rocprofiler_configure_dispatch_profile_counting_service( + rocprofiler_context_id_t context_id, + rocprofiler_agent_t agent_id, + rocprofiler_buffer_id_t buffer_id, + rocprofiler_profile_counting_dispatch_callback_t callback, + void* callback_data_args); +/** @} */ + +/** @defgroup AGENT_PROFILE_COUNTING_SERVICE Agent Profile Counting Service + * @{ + */ + +/** + * @brief ROCProfiler Agent Profile Counting Data. + * + */ typedef struct { - char* name; - rocprofiler_counters_sampler_counter_type_t type; -} rocprofiler_counters_sampler_counter_input_t; - -typedef struct -{ - rocprofiler_counters_sampler_counter_type_t type; - rocprofiler_record_counter_value_t value; -} rocprofiler_counters_sampler_counter_output_t; - -typedef struct -{ - /** - * Counters to profile - */ - rocprofiler_counters_sampler_counter_input_t* counters; - /** - * Counters count - */ - int counters_num; - /** - * Sampling rate (ms) - */ - uint32_t sampling_rate; - /** - * Total sampling duration (ms); time between sampling start/stop - */ - uint32_t sampling_duration; - /** - * Initial delay (ms) - */ - uint32_t initial_delay; - /** - * Preferred agents to collect counters from - */ - int gpu_agent_index; -} rocprofiler_counters_sampler_parameters_t; - -typedef struct -{ - /** - * ROCProfiler General Record base header to identify the id and kind of every - * record - */ - rocprofiler_record_header_t header; - /** - * Agent Identifier to be used by the user to get the Agent Information using - * ::rocprofiler_query_agent_info - */ - rocprofiler_agent_id_t gpu_id; /** * Counters, including identifiers to get counter information and Counters * values */ - rocprofiler_counters_sampler_counter_output_t* counters; - /** - * Number of counter values - */ - uint32_t num_counters; -} rocprofiler_record_counters_sampler_t; + rocprofiler_record_counter_t* counters; + uint64_t counters_count; +} rocprofiler_agent_profile_counting_data_t; /** - * Filter Kind Data + * @brief Configure Profile Counting Service for agent. + * + * @param [in] buffer_id + * @param [in] profile_config_id + * @return ::rocprofiler_status_t */ -typedef union -{ - /** - * APIs to trace - */ - rocprofiler_tracer_activity_domain_t* trace_apis; - /** - * Counters to profile - */ - const char** counters_names; - /** - * att parameters - */ - rocprofiler_att_parameter_t* att_parameters; - /** - * spm counters parameters - */ - rocprofiler_spm_parameter_t* spm_parameters; - /** - * sampled counters parameters - */ - rocprofiler_counters_sampler_parameters_t counters_sampler_parameters; -} rocprofiler_filter_data_t; +rocprofiler_status_t ROCPROFILER_API +rocprofiler_configure_agent_profile_counting_service( + rocprofiler_buffer_id_t buffer_id, + rocprofiler_profile_config_id_t profile_config_id); /** - * Create Session Filter - * This function will create filter and associate it with a specific session - * For every kind, one filter only is allowed per session + * @brief Sample Profile Counting Service for agent. * - * \param[in] session_id Session id where these filters will applied to - * \param[in] filter_kind Filter kind associated with these filters - * \param[in] data Pointer to the filter data - * \param[in] data_count Count of data in the data array given in ::data - * \param[out] filter_id The id of the filter created - * \param[in] property property needed for more filteration requests by the - * user (Only one property is allowed per filter) (Optional) - * \retval ::ROCPROFILER_STATUS_SUCCESS The function has been executed - * successfully. - * \retval ::ROCPROFILER_STATUS_ERROR_NOT_INITIALIZED, if rocprofiler_initialize - * wasn't called before or if rocprofiler_finalize is called - * \retval ::ROCPROFILER_STATUS_SESSION_NOT_FOUND Couldn't find session - * associated with the given session identifier - * \retval ::ROCPROFILER_STATUS_ERROR_SESSION_FILTER_DATA_MISMATCH \n The session - * filter can't accept the given data - * \retval ::ROCPROFILER_STATUS_ERROR_FILTER_DATA_CORRUPTED \n Data can't be read or - * corrupted + * @param [out] data // It is always a size of one + * @return ::rocprofiler_status_t */ -ROCPROFILER_API rocprofiler_status_t -rocprofiler_create_filter(rocprofiler_session_id_t session_id, - rocprofiler_filter_kind_t filter_kind, - rocprofiler_filter_data_t data, - uint64_t data_count, - rocprofiler_filter_id_t* filter_id, - rocprofiler_filter_property_t property) ROCPROFILER_VERSION_9_0; - -/** - * Set Session Filter Buffer - * This function will associate buffer to a specific filter - * - * if the user wants to get the API traces for the api calls synchronously then - * the user is required to call ::rocprofiler_set_api_trace_sync_callback - * - * \param[in] session_id Session id where these filters will applied to - * \param[in] filter_id The id of the filter - * \param[in] buffer_id The id of the buffer - * \retval ::ROCPROFILER_STATUS_SUCCESS The function has been executed - * successfully. - * \retval ::ROCPROFILER_STATUS_ERROR_NOT_INITIALIZED, if rocprofiler_initialize - * wasn't called before or if rocprofiler_finalize is called - * \retval ::ROCPROFILER_STATUS_SESSION_NOT_FOUND Couldn't find session - * associated with the given session identifier - */ -ROCPROFILER_API rocprofiler_status_t -rocprofiler_set_filter_buffer(rocprofiler_session_id_t session_id, - rocprofiler_filter_id_t filter_id, - rocprofiler_buffer_id_t buffer_id) ROCPROFILER_VERSION_9_0; - -/** - * Synchronous Callback - * To be only used by ::rocprofiler_set_api_trace_sync_callback, please refer to - * ::rocprofiler_set_api_trace_sync_callback for more details - * - * \param[in] record pointer to the record. - * \param[in] session_id The session id associated with that record - */ -typedef void (*rocprofiler_sync_callback_t)(rocprofiler_record_tracer_t record, - rocprofiler_session_id_t session_id); - -/** - * Set Session API Tracing Filter Synchronous Callback - * This function will associate buffer to a specific filter - * - * Currently Synchronous callbacks are only available to API Tracing filters - * for the api calls tracing and not available for the api activities or any - * other filter type, the user is responsible to create and set buffer for the - * other types - * - * \param[in] session_id Session id where these filters will applied to - * \param[in] filter_id The id of the filter - * \param[in] callback Synchronous callback - * \retval ::ROCPROFILER_STATUS_SUCCESS The function has been executed - * successfully. - * \retval ::ROCPROFILER_STATUS_ERROR_NOT_INITIALIZED, if rocprofiler_initialize - * wasn't called before or if rocprofiler_finalize is called - * \retval ::ROCPROFILER_STATUS_SESSION_NOT_FOUND, Couldn't find session - * associated with the given session identifier - * \retval ::ROCPROFILER_STATUS_ERROR_FILTER_NOT_SUPPORTED \n if the filter is not - * related to API tracing - */ -ROCPROFILER_API rocprofiler_status_t -rocprofiler_set_api_trace_sync_callback(rocprofiler_session_id_t session_id, - rocprofiler_filter_id_t filter_id, - rocprofiler_sync_callback_t callback) - ROCPROFILER_VERSION_9_0; - -/** - * Destroy Session Filter - * This function will destroy a specific filter - * - * \param[in] session_id Session id where these filters will applied to - * \param[in] filter_id The id of the filter - * \retval ::ROCPROFILER_STATUS_SUCCESS The function has been executed - * successfully. - * \retval ::ROCPROFILER_STATUS_ERROR_NOT_INITIALIZED, if rocprofiler_initialize - * wasn't called before or if rocprofiler_finalize is called - * \retval ::ROCPROFILER_STATUS_SESSION_NOT_FOUND Couldn't find session - * associated with the given session identifier - * \retval ::ROCPROFILER_STATUS_FILTER_NOT_FOUND Couldn't find session filter - */ -ROCPROFILER_API rocprofiler_status_t -rocprofiler_destroy_filter(rocprofiler_session_id_t session_id, - rocprofiler_filter_id_t filter_id) ROCPROFILER_VERSION_9_0; - -/** - * Create Buffer - * This function will create a buffer that can be associated with a filter - * - * \param[in] session_id Session id where these filters will applied to - * \param[in] buffer_callback Providing a callback for the buffer specialized - * for that filters - * \param[in] buffer_size Providing size for the buffer that will be created - * \param[in] buffer_properties Array of Flush Properties provided by the user - * \param[in] buffer_properties_count The count of the flush properties in the - * array - * \param[out] buffer_id Buffer id that was created - * \retval ::ROCPROFILER_STATUS_SUCCESS The function has been executed - * successfully. - * \retval ::ROCPROFILER_STATUS_ERROR_NOT_INITIALIZED, if rocprofiler_initialize - * wasn't called before or if rocprofiler_finalize is called - * \retval ::ROCPROFILER_STATUS_SESSION_NOT_FOUND Couldn't find session - * associated with the given session identifier - * \retval ::ROCPROFILER_STATUS_ERROR_SESSION_PROPERTIES_MISMATCH The given - * properties data are mismatching the properties kind - * \retval ::ROCPROFILER_STATUS_ERROR_PROPERTY_DATA_CORRUPTED Data can't be read - * or corrupted - */ -ROCPROFILER_API rocprofiler_status_t -rocprofiler_create_buffer(rocprofiler_session_id_t session_id, - rocprofiler_buffer_callback_t buffer_callback, - size_t buffer_size, - rocprofiler_buffer_id_t* buffer_id) ROCPROFILER_VERSION_9_0; - -/** - * Setting Buffer Properties - * This function will set buffer properties - * - * \param[in] session_id Session id where the buffer is associated with - * \param[in] buffer_id Buffer id of the buffer that the properties are going - * to be associated with for that filters - * \param[in] buffer_properties Array of Flush Properties provided by the user - * \param[in] buffer_properties_count The count of the flush properties in the - * array - * \retval ::ROCPROFILER_STATUS_SUCCESS The function has been executed - * successfully. - * \retval ::ROCPROFILER_STATUS_ERROR_NOT_INITIALIZED, if rocprofiler_initialize - * wasn't called before or if rocprofiler_finalize is called - * \retval ::ROCPROFILER_STATUS_SESSION_NOT_FOUND Couldn't find session - * associated with the given session identifier - * \retval ::ROCPROFILER_STATUS_BUFFER_NOT_FOUND Couldn't find buffer - * associated with the given buffer identifier - * \retval ::ROCPROFILER_STATUS_ERROR_SESSION_PROPERTIES_MISMATCH The given - * properties data are mismatching the properties kind - * \retval ::ROCPROFILER_STATUS_ERROR_PROPERTY_DATA_CORRUPTED Data can't be read - * or corrupted - */ -ROCPROFILER_API rocprofiler_status_t -rocprofiler_set_buffer_properties(rocprofiler_session_id_t session_id, - rocprofiler_buffer_id_t buffer_id, - rocprofiler_buffer_property_t* buffer_properties, - uint32_t buffer_properties_count) ROCPROFILER_VERSION_9_0; - -/** - * Destroy Buffer - * This function will destroy a buffer given its id and session id - * - * \param[in] session_id Session id where these filters will applied to - * \param[in] buffer_id Buffer id that will b e destroyed - * \retval ::ROCPROFILER_STATUS_SUCCESS The function has been executed - * successfully. - * \retval ::ROCPROFILER_STATUS_ERROR_NOT_INITIALIZED, if rocprofiler_initialize - * wasn't called before or if rocprofiler_finalize is called - * \retval ::ROCPROFILER_STATUS_SESSION_NOT_FOUND Couldn't find session - * associated with the given session identifier - * \retval ::ROCPROFILER_STATUS_BUFFER_NOT_FOUND Couldn't find buffer - * associated with the given buffer identifier - * \retval ::ROCPROFILER_STATUS_ERROR_SESSION_PROPERTIES_MISMATCH The given - * properties data are mismatching the properties kind - * \retval ::ROCPROFILER_STATUS_ERROR_PROPERTY_DATA_CORRUPTED Data can't be read - * or corrupted - */ -ROCPROFILER_API rocprofiler_status_t -rocprofiler_destroy_buffer(rocprofiler_session_id_t session_id, - rocprofiler_buffer_id_t buffer_id) ROCPROFILER_VERSION_9_0; +rocprofiler_status_t ROCPROFILER_API +rocprofiler_sample_agent_profile_counting_service(rocprofiler_agent_profile_counting_data_t* data); /** @} */ /** - * Create Ready Session - * A one call to create a ready profiling or tracing session, so that the - * session will be ready to collect counters with a one call to - * ::rocprofiler_start_session. - * ::rocprofiler_session_set_filter can be used to set optional filters like - * specific GPUs/Kernel Names/Counter Names and more. The Creation of the - * session is responsible for the creation of the buffer saving the records - * generated while the session is active. Session can be started using - * ::rocprofiler_start_session and can be stopped using - * ::rocprofiler_terminate_session + * @brief Query Counter name. * - * \param[in] counters counter filter data, it is required from the user to - * create the filter with ::ROCPROFILER_FILTER_PROFILER_COUNTER_NAMES and to - * provide an array of counter names needed and their count - * \param[in] replay_mode The Replay strategy that should be used if replay is - * needed - * \param[in] filter_kind Filter kind associated with these filters - * \param[in] data Pointer to the filter data - * \param[in] data_count Filter data array count - * \param[in] buffer_size Size of the memory pool that will be used to save the - * data from profiling or/and tracing, if the buffer was allocated before it - * will be reallocated with the new size in addition to the old size - * \param[in] buffer_callback Asynchronous callback using Memory buffers saving - * the data and then it will be flushed if the user called - * ::rocprofiler_flush_data or if the buffer is full or if the application - * finished execution - * \param[out] session_id Pointer to the created session id, the session is - * alive up till ::rocprofiler_destroy_session being called, however, the session - * id can be used while the session is active which can be activated using - * ::rocprofiler_start_session and deactivated using - * ::rocprofiler_terminate_session but ::rocprofiler_flush_data can use session_id - * even if it is deactivated for flushing the saved records - * \param[in] property Filter Property (Optional) - * \param[in] callback Synchronous callback for API traces (Optional) - * \retval ::ROCPROFILER_STATUS_SUCCESS The function has been executed - * successfully. - * \retval ::ROCPROFILER_STATUS_ERROR_NOT_INITIALIZED, if rocprofiler_initialize - * wasn't called before or if rocprofiler_finalize is called - * \retval ::ROCPROFILER_STATUS_ERROR_SESSION_MODE_FILTER_MISMATCH \n The session - * doesn't have the required mode for that filter type - * \retval ::ROCPROFILER_STATUS_ERROR_FILTER_DATA_CORRUPTED \n Data can't be read or - * corrupted - * \retval ::ROCPROFILER_STATUS_ERROR_INCORRECT_SIZE If the size is less than one - * potential record size + * @param [in] counter_id + * @param [out] name if nullptr, size will be returned + * @param [out] size + * @return ::rocprofiler_status_t */ -ROCPROFILER_API rocprofiler_status_t -rocprofiler_create_ready_session(rocprofiler_replay_mode_t replay_mode, - rocprofiler_filter_kind_t filter_kind, - rocprofiler_filter_data_t data, - uint64_t data_count, - size_t buffer_size, - rocprofiler_buffer_callback_t buffer_callback, - rocprofiler_session_id_t* session_id, - rocprofiler_filter_property_t property, - rocprofiler_sync_callback_t callback) ROCPROFILER_VERSION_9_0; - -// TODO(aelwazir): Multiple sessions activate for different set of filters -/** - * Activate Session - * Activating session created by ::rocprofiler_create_session, please refer to - * the samples for how to use. - * - * \param[in] session_id Session ID representing the created session - * \retval ::ROCPROFILER_STATUS_SUCCESS The function has been executed - * successfully - * \retval ::ROCPROFILER_STATUS_ERROR_NOT_INITIALIZED, if rocprofiler_initialize - * wasn't called before or if rocprofiler_finalize is called - * \retval ::ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND \n may return if - * the session is not found - * \retval ::ROCPROFILER_STATUS_ERROR_SESSION_MODE_NOT_ADDED if there is no - * session_mode added - * \retval ::ROCPROFILER_STATUS_ERROR_MISSING_SESSION_CALLBACK if any - * session_mode is missing callback set - * \retval ::ROCPROFILER_STATUS_ERROR_HAS_ACTIVE_SESSION \n if there is already - * active session - */ -ROCPROFILER_API rocprofiler_status_t -rocprofiler_start_session(rocprofiler_session_id_t session_id) ROCPROFILER_VERSION_9_0; +rocprofiler_status_t ROCPROFILER_API +rocprofiler_query_counter_name(rocprofiler_counter_id_t counter_id, const char* name, size_t* size) + ROCPROFILER_NONNULL(3); /** - * Deactivate Session - * Deactivate session created by ::rocprofiler_create_session, please refer to - * the samples for how to use. + * @brief Query Counter Instances Count. * - * \param[in] session_id Session ID for the session that will be terminated - * \retval ::ROCPROFILER_STATUS_SUCCESS The function has been executed - * successfully - * \retval ::ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND \n may return if - * the session is not found - * \retval ::ROCPROFILER_STATUS_ERROR_SESSION_NOT_ACTIVE if the session is not - * active + * @param [in] counter_id + * @param [out] instance_count + * @return rocprofiler_status_t */ +rocprofiler_status_t ROCPROFILER_API +rocprofiler_query_counter_instance_count(rocprofiler_counter_id_t counter_id, + size_t* instance_count) ROCPROFILER_NONNULL(2); -ROCPROFILER_API rocprofiler_status_t -rocprofiler_terminate_session(rocprofiler_session_id_t session_id) ROCPROFILER_VERSION_9_0; +/** + * @brief Query Agent Counters Availability. + * + * @param [in] agent + * @param [out] counters_list + * @param [out] counters_count + * @return ::rocprofiler_status_t + */ +rocprofiler_status_t ROCPROFILER_API +rocprofiler_query_agent_supported_counters(rocprofiler_agent_t agent, + rocprofiler_counter_id_t* counters_list, + size_t* counters_count) ROCPROFILER_NONNULL(2, 3); /** @} */ -/** \defgroup device_profiling Device Profiling API +/** @defgroup PC_SAMPLING_SERVICE PC Sampling Service * @{ */ +/** + * @brief ROCProfiler PC Sampling Record. + * + */ typedef struct { - double value; -} rocprofiler_counter_value_t; + uint64_t pc; + uint64_t dispatch_id; + uint64_t timestamp; + uint64_t hardware_id; + union + { + uint8_t arb_value; + }; + union + { + void* data; + }; +} rocprofiler_pc_sampling_record_t; +/** + * @brief PC Sampling Method. + * + */ +typedef enum +{ + ROCPROFILER_PC_SAMPLING_METHOD_NONE = 0, + ROCPROFILER_PC_SAMPLING_METHOD_STOCHASTIC = 1, + ROCPROFILER_PC_SAMPLING_METHOD_HOST_TRAP = 2, + ROCPROFILER_PC_SAMPLING_METHOD_LAST, +} rocprofiler_pc_sampling_method_t; + +/** + * @brief PC Sampling Unit. + * + */ +typedef enum +{ + ROCPROFILER_PC_SAMPLING_UNIT_NONE = 0, + ROCPROFILER_PC_SAMPLING_UNIT_INSTRUCTIONS = 1, + ROCPROFILER_PC_SAMPLING_UNIT_CYCLES = 2, + ROCPROFILER_PC_SAMPLING_UNIT_TIME = 3, + ROCPROFILER_PC_SAMPLING_UNIT_LAST, +} rocprofiler_pc_sampling_unit_t; + +/** + * @brief Create PC Sampling Service. + * + * @param [in] context_id + * @param [in] agent_id + * @param [in] method + * @param [in] unit + * @param [in] interval + * @param [in] buffer_id + * @return ::rocprofiler_status_t + * + */ +rocprofiler_status_t ROCPROFILER_API +rocprofiler_configure_pc_sampling_service(rocprofiler_context_id_t context_id, + rocprofiler_agent_t agent, + rocprofiler_pc_sampling_method_t method, + rocprofiler_pc_sampling_unit_t unit, + uint64_t interval, + rocprofiler_buffer_id_t buffer_id); + +struct rocprofiler_pc_sampling_configuration_s +{ + rocprofiler_pc_sampling_method_t method; + rocprofiler_pc_sampling_unit_t unit; + size_t min_interval; + size_t max_interval; + uint64_t flags; +}; + +/** + * @brief Query PC Sampling Configuration. + * + * @param [in] agent_id + * @param [out] config + * @param [out] config_count + * @return ::rocprofiler_status_t + */ +rocprofiler_status_t ROCPROFILER_API +rocprofiler_query_pc_sampling_agent_configurations(rocprofiler_agent_t agent, + rocprofiler_pc_sampling_configuration_t* config, + size_t* config_count) ROCPROFILER_NONNULL(2, 3); + +/** @} */ + +/** @defgroup SPM_SERVICE SPM Service + * @{ + */ + +/** + * @brief ROCProfiler SPM Record. + * + */ typedef struct { - char metric_name[64]; - rocprofiler_counter_value_t value; -} rocprofiler_device_profile_metric_t; + /** + * Counters, including identifiers to get counter information and Counters + * values + */ + rocprofiler_record_counter_t* counters; + uint64_t counters_count; +} rocprofiler_spm_record_t; /** - * Create a device profiling session + * @brief Configure SPM Service. * - * A device profiling session allows the user to profile the GPU device - * for counters irrespective of the running applications on the GPU. - * This is different from application profiling. device profiling session - * doesn't care about the host running processes and threads. It directly - * provides low level profiling information. - * - * \param[in] counter_names The names of the counters to be collected. - * \param[in] num_counters The number of counters specifief to be collected - * \param[out] session_id Pointer to the created session id. - * \param[in] cpu_index index of the cpu to be used - * \param[in] gpu_index index of the gpu to be used - * \retval ::ROCPROFILER_STATUS_SUCCESS The function has been executed - * successfully. + * @param [in] context_id + * @param [in] buffer_id + * @param [in] profile_config + * @param [in] interval + * @return ::rocprofiler_status_t */ -ROCPROFILER_API rocprofiler_status_t -rocprofiler_device_profiling_session_create(const char** counter_names, - uint64_t num_counters, - rocprofiler_session_id_t* session_id, - int cpu_index, - int gpu_index) ROCPROFILER_VERSION_9_0; +rocprofiler_status_t ROCPROFILER_API +rocprofiler_configure_spm_service(rocprofiler_context_id_t context_id, + rocprofiler_buffer_id_t buffer_id, + rocprofiler_profile_config_id_t profile_config, + uint64_t interval); + +/** @} */ + +/** @} */ + +/** @defgroup BUFFER_HANDLING Buffer + * @{ + * + * Every Buffer is associated with a specific service kind. + * OR + * Every Buffer is associated with a specific service ID. + * + */ + +// TODO: We need to add rocprofiler_record_header_t +/** + * @brief Generic record with a type and a pointer to data + */ +typedef struct +{ + uint64_t kind; + void* payload; +} rocprofiler_record_header_t; + +typedef rocprofiler_record_header_t rocprofiler_record_tracer_t; /** - * Start the device profiling session that was created previously. - * This will enable the GPU device to start incrementing counters + * @brief Async callback function. * - * \param[in] session_id session id of the session to start - * \retval ::ROCPROFILER_STATUS_SUCCESS The function has been executed - * successfully. + * @code{.cpp} + * for(size_t i = 0; i < num_headers; ++i) + * { + * rocprofiler_record_header_t* hdr = headers[i]; + * if(hdr->kind == ROCPROFILER_RECORD_KIND_PC_SAMPLE) + * { + * auto* data = static_cast(&hdr->payload); + * ... + * } + * } + * @endcode */ -ROCPROFILER_API rocprofiler_status_t -rocprofiler_device_profiling_session_start(rocprofiler_session_id_t session_id) - ROCPROFILER_VERSION_9_0; +typedef void (*rocprofiler_buffer_callback_t)(rocprofiler_context_id_t context, + rocprofiler_buffer_id_t buffer_id, + rocprofiler_record_header_t** headers, + size_t num_headers, + void* data, + uint64_t drop_count); /** - * Poll the device profiling session to read counters from the GPU device. - * This will read out the values of the counters from the GPU device at the - * specific instant when this API is called. This is a thread-blocking call. - * Any thread that calls this API will have to wait until - * the counter values are being read out. + * @brief Actions when Buffer is full. * - * \param[in] session_id session id of the session to start - * \param[out] data records of counter data read out from device - * \retval ::ROCPROFILER_STATUS_SUCCESS The function has been executed - * successfully. */ -ROCPROFILER_API rocprofiler_status_t -rocprofiler_device_profiling_session_poll(rocprofiler_session_id_t session_id, - rocprofiler_device_profile_metric_t* data) - ROCPROFILER_VERSION_9_0; +typedef enum +{ + ROCPROFILER_BUFFER_POLICY_NONE = 0, + /** + * Drop records when buffer is full. + */ + ROCPROFILER_BUFFER_POLICY_DISCARD = 1, + /** + * Block when buffer is full. + */ + ROCPROFILER_BUFFER_POLICY_LOSSLESS = 2, + ROCPROFILER_BUFFER_POLICY_LAST, +} rocprofiler_buffer_policy_t; /** - * Stop the device profiling session that was created previously. - * This will inform the GPU device to stop counters collection. + * @brief Create buffer. * - * \param[in] session_id session id of the session to start - * \retval ::ROCPROFILER_STATUS_SUCCESS The function has been executed - * successfully. + * @param [in] context Context identifier associated with buffer + * @param [in] size Size of the buffer in bytes + * @param [in] watermark - watermark size, where the callback is called, if set + * to 0 then the callback will be called on every record + * @param [in] policy Behavior policy when buffer is full + * @param [in] callback Callback to invoke when buffer is flushed/full + * @param [out] buffer_id Identification handle for buffer + * @return ::rocprofiler_status_t */ -ROCPROFILER_API rocprofiler_status_t -rocprofiler_device_profiling_session_stop(rocprofiler_session_id_t session_id) - ROCPROFILER_VERSION_9_0; +rocprofiler_status_t ROCPROFILER_API +rocprofiler_create_buffer(rocprofiler_context_id_t context, + size_t size, + size_t watermark, + rocprofiler_buffer_policy_t policy, + rocprofiler_buffer_callback_t callback, + void* callback_data, + rocprofiler_buffer_id_t* buffer_id) ROCPROFILER_NONNULL(5, 7); /** - * Destroy the device profiling session that was created previously. + * @brief Destroy buffer. * - * \param[in] session_id session id of the session to start - * \retval ::ROCPROFILER_STATUS_SUCCESS The function has been executed - * successfully. + * @param [in] buffer_id + * @return ::rocprofiler_status_t + * + * Note: This will destroy the buffer even if it is not empty. The user can + * call @ref ::rocprofiler_flush_buffer before it to make sure the buffer is empty. */ -ROCPROFILER_API rocprofiler_status_t -rocprofiler_device_profiling_session_destroy(rocprofiler_session_id_t session_id) - ROCPROFILER_VERSION_9_0; +rocprofiler_status_t ROCPROFILER_API +rocprofiler_destroy_buffer(rocprofiler_buffer_id_t buffer_id); + +/** + * @brief Flush buffer. + * + * @param [in] buffer_id + * @return ::rocprofiler_status_t + */ +rocprofiler_status_t ROCPROFILER_API +rocprofiler_flush_buffer(rocprofiler_buffer_id_t buffer_id); /** @} */ #ifdef __cplusplus } // extern "C" block #endif // __cplusplus - -#endif // INC_ROCPROFILER_H_ diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler/rocprofiler_plugin.h b/projects/rocprofiler-sdk/source/include/rocprofiler/rocprofiler_plugin.h index b5ae40d5ee..afe8b8d6d1 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler/rocprofiler_plugin.h +++ b/projects/rocprofiler-sdk/source/include/rocprofiler/rocprofiler_plugin.h @@ -1,22 +1,24 @@ -/* Copyright (c) 2022 Advanced Micro Devices, Inc. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. */ +// MIT License +// +// Copyright (c) 2023 ROCm Developer Tools +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. /** \section rocprofiler_plugin_api ROCProfiler Plugin API * @@ -39,13 +41,12 @@ * ROCProfiler Tool Plugin API interface. */ -#ifndef ROCPROFILER_PLUGIN_H_ -#define ROCPROFILER_PLUGIN_H_ +#pragma once + +#include "rocprofiler/rocprofiler.h" #include -#include "rocprofiler.h" - #ifdef __cplusplus extern "C" { #endif /* __cplusplus */ @@ -109,15 +110,15 @@ rocprofiler_plugin_finalize(); * * @param[in] begin Pointer to the first record. * @param[in] end Pointer to one past the last record. - * @param[in] session_id Session ID + * @param[in] context_id context ID * @param[in] buffer_id Buffer ID * @return Returns 0 on success and -1 on error. */ ROCPROFILER_EXPORT int -rocprofiler_plugin_write_buffer_records(const rocprofiler_record_header_t* begin, - const rocprofiler_record_header_t* end, - rocprofiler_session_id_t session_id, - rocprofiler_buffer_id_t buffer_id); +rocprofiler_plugin_write_buffer_records(rocprofiler_context_id_t context_id, + rocprofiler_buffer_id_t buffer_id, + rocprofiler_record_header_t** headers, + size_t num_headers); /** * Report Synchronous Record. @@ -138,5 +139,3 @@ rocprofiler_plugin_write_record(rocprofiler_record_tracer_t record); #ifdef __cplusplus } /* extern "C" */ #endif /* __cplusplus */ - -#endif /* ROCPROFILER_PLUGIN_H_ */ diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler/version.h.in b/projects/rocprofiler-sdk/source/include/rocprofiler/version.h.in new file mode 100644 index 0000000000..4e24a75f48 --- /dev/null +++ b/projects/rocprofiler-sdk/source/include/rocprofiler/version.h.in @@ -0,0 +1,61 @@ +// MIT License +// +// Copyright (c) 2023 ROCm Developer Tools +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +/** + * @def ROCPROFILER_VERSION_MAJOR + * @brief The major version of the interface as a macro so it can be used + * by the preprocessor. + * @addtogroup VERSIONING_GROUP + * + * @def ROCPROFILER_VERSION_MINOR + * @brief The minor version of the interface as a macro so it can be used + * by the preprocessor. + * @addtogroup VERSIONING_GROUP + */ + +// clang-format off +#define ROCPROFILER_VERSION_MAJOR @PROJECT_VERSION_MAJOR@ +#define ROCPROFILER_VERSION_MINOR @PROJECT_VERSION_MINOR@ +#define ROCPROFILER_VERSION_PATCH @PROJECT_VERSION_PATCH@ +#define ROCPROFILER_VERSION_STRING "@FULL_VERSION_STRING@" +#define ROCPROFILER_SOVERSION "@PROJECT_VERSION_MAJOR@" +#define ROCPROFILER_GIT_DESCRIBE "@ROCPROFILER_GIT_DESCRIBE@" +#define ROCPROFILER_GIT_REVISION "@ROCPROFILER_GIT_REVISION@" + +// system info during compilation +#define ROCPROFILER_LIBRARY_ARCH "@CMAKE_LIBRARY_ARCHITECTURE@" +#define ROCPROFILER_SYSTEM_NAME "@CMAKE_SYSTEM_NAME@" +#define ROCPROFILER_SYSTEM_PROCESSOR "@CMAKE_SYSTEM_PROCESSOR@" +#define ROCPROFILER_SYSTEM_VERSION "@CMAKE_SYSTEM_VERSION@" + +// compiler information +#define ROCPROFILER_COMPILER_ID "@CMAKE_CXX_COMPILER_ID@" +#define ROCPROFILER_COMPILER_VERSION "@CMAKE_CXX_COMPILER_VERSION@" +// clang-format on + +#define ROCPROFILER_COMPILER_STRING ROCPROFILER_COMPILER_ID " v" ROCPROFILER_COMPILER_VERSION + +#define ROCPROFILER_VERSION \ + ((10000 * ROCPROFILER_VERSION_MAJOR) + (100 * ROCPROFILER_VERSION_MINOR) + \ + ROCPROFILER_VERSION_PATCH) diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler/CMakeLists.txt b/projects/rocprofiler-sdk/source/lib/rocprofiler/CMakeLists.txt index 58f0f5a3b3..b478d2c1db 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler/CMakeLists.txt +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler/CMakeLists.txt @@ -24,4 +24,5 @@ set_target_properties( rocprofiler-library PROPERTIES OUTPUT_NAME rocprofiler64 SOVERSION ${PROJECT_VERSION_MAJOR} - VERSION ${PROJECT_VERSION}) + VERSION ${PROJECT_VERSION} + DEFINE_SYMBOL rocprofiler_EXPORTS)