6eb06cf201
Squashed commit of the following: commit f029195705a15700380c6f832ba5d15d46fd6de7 Author: Jonathan R. Madsen <jrmadsen@users.noreply.github.com> Date: Thu Jul 13 14:38:56 2023 -0500 Formatting workflows for source (clang-format) and cmake (cmake-format) (#4) * Add .cmake-format.yaml file * Add formatting workflow * provide base input for creating PR * Update scheme for extracting branch name - disable running formatting on push to amd-staging branch * patch .cmake-format.yaml for find_package signature - apparently cmake-format doesn't format the full signature of find_package * run formatting (clang-format v11) (#7) Co-authored-by: jrmadsen <jrmadsen@users.noreply.github.com> * run cmake formatting (cmake-format) (#6) Co-authored-by: jrmadsen <jrmadsen@users.noreply.github.com> --------- Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> commit bc4d135fdd8a1a9e51235f18a5d575fd2b3735e6 Author: Ammar ELWazir <aelwazir@amd.com> Date: Thu Jul 13 12:55:17 2023 -0500 Removing Build cache for potential issues with auto-generated header files (#5) Change-Id: I9e2319f4335e2f88585ffa6fac2bd88a1c952e6e commit ce86dea6a311d44d880fa684eb78f3329295e2a4 Author: Jonathan R. Madsen <jrmadsen@users.noreply.github.com> Date: Thu Jul 13 11:08:58 2023 -0500 Fix decltype(<hsa-function>) function pointer usage (#3) - the following is done in several places: decltype(hsa_memory_allocate)* hsa_memory_allocate - above can cause compiler errors - replace decltype(<hsa-function>) with decltype(::<hsa-function>) - this ensures that the type within the decltype is recognized as the global scope HSA function, not the variable - in many places, the variable has a "_fn" suffix to prevent this issue but added '::' anyway for consistency commit ac49fdd92a72e9c99394253a02da413a6c2e3b3a Merge: a07946a 03a0855 Author: Ammar ELWazir <aelwazir@amd.com> Date: Wed Jul 12 11:36:24 2023 -0500 Merge pull request #2 from ROCm-Developer-Tools/gerrit-amd-staging Pull from gerrit commit 03a085588cffe863e8f466de67be1cfb205b675a Merge:c26b32ba07946a Author: Ammar ELWazir <aelwazir@amd.com> Date: Wed Jul 12 10:57:30 2023 -0500 Merge branch 'amd-staging' into gerrit-amd-staging commit a07946a5cd4c670c83c27ad1a076a9d4567ce6d7 Author: Ammar ELWazir <Ammar.ELWazir@amd.com> Date: Wed Jul 12 15:46:04 2023 +0000 Enabling Cached Builds commit 525e494a7f13941077a8fd4ad6840904db4d27d4 Author: Ammar ELWazir <Ammar.ELWazir@amd.com> Date: Wed Jul 12 04:53:54 2023 +0000 Updating missed GPU Targets commit 42c75862f628c9bee7cfb7dc04dff2619430efbc Author: Ammar ELWazir <Ammar.ELWazir@amd.com> Date: Wed Jul 12 04:43:02 2023 +0000 Adding V1 Testing commit 9d72fd4aee85e4b0c12e717060d2730fa5b73be1 Author: Ammar ELWazir <Ammar.ELWazir@amd.com> Date: Wed Jul 12 03:34:31 2023 +0000 Fixing Artifacts directory path commit f4000cc558b3b2e4676f7994f7ce8c8e6f94518e Author: Ammar ELWazir <Ammar.ELWazir@amd.com> Date: Wed Jul 12 03:27:26 2023 +0000 Fixing CMake for test build job commit 2ce8115d4c33948c3c8f957f545a95a04e1d6cd2 Author: Ammar ELWazir <Ammar.ELWazir@amd.com> Date: Wed Jul 12 03:16:18 2023 +0000 Fixing Ubuntu CMake for ubuntu test build commit 6d0ed439191be900748d0c025157f9d689a73ec7 Author: Ammar ELWazir <Ammar.ELWazir@amd.com> Date: Wed Jul 12 01:28:41 2023 +0000 Removing Navi21 commit e349a7642e5ae5eb03ab9fcd0a0f74f09f78cab5 Author: Ammar ELWazir <Ammar.ELWazir@amd.com> Date: Wed Jul 12 01:14:14 2023 +0000 Removing Navi21 commit fefd02fe68d2a4bca7ec2e381960ad004ee9fc5b Author: Ammar ELWazir <Ammar.ELWazir@amd.com> Date: Wed Jul 12 00:42:48 2023 +0000 Fixing CMake Job commit 2ea46abf7bf92643efa8c549fa70346ffbd79d65 Author: Ammar ELWazir <Ammar.ELWazir@amd.com> Date: Wed Jul 12 00:35:13 2023 +0000 Fixing CMake Job commit d99d681ed1999c5fcf291dc678b11a77205fb0f3 Author: Ammar ELWazir <Ammar.ELWazir@amd.com> Date: Wed Jul 12 00:32:13 2023 +0000 Fixing Pull Latest Dockers and CMake Jobs commit dfc4498072d13b4a1df3a63047d34c682c3d9a29 Author: Ammar ELWazir <Ammar.ELWazir@amd.com> Date: Tue Jul 11 23:54:21 2023 +0000 Fixing CMake job commit 919efe04de707f7c702031be15c3e2c5f8442cbb Author: Ammar ELWazir <Ammar.ELWazir@amd.com> Date: Tue Jul 11 23:52:13 2023 +0000 Adding Pull Last dockers job commit be1b1256e8b0e05308e8f7e7e69bee3acca55281 Author: Ammar ELWazir <aelwazir@amd.com> Date: Tue Jul 11 18:25:40 2023 -0500 Update cmake.yml commit 212299fa4355ae6ec18f9aaacbb79c51ea6c6f97 Author: Ammar ELWazir <aelwazir@amd.com> Date: Tue Jul 11 18:23:35 2023 -0500 Update cmake.yml commit 7c2c1327086a61466cc6cac39f70865c051a8bc7 Author: Ammar ELWazir <aelwazir@amd.com> Date: Tue Jul 11 18:18:53 2023 -0500 Update cmake.yml commit 191b5ce007e612e814c1d7a3afb4ad398f3852e1 Author: Ammar ELWazir <aelwazir@amd.com> Date: Tue Jul 11 16:03:22 2023 -0500 Update cmake.yml commit 8824113d95f3e13c7ce4d0af8e0d9d8f522a6c4a Author: Ammar ELWazir <Ammar.ELWazir@amd.com> Date: Tue Jul 11 16:28:09 2023 +0000 Fixing Pull from Gerrit job name Change-Id: I9e7ed9a27a13ca49d62c93bdadb30f0057e4d385 commit cc3d5e4b02ffb439e8cc2b3efa53527c376f9982 Author: Ammar ELWazir <Ammar.ELWazir@amd.com> Date: Tue Jul 11 16:21:43 2023 +0000 Adding Staging sync job Change-Id: I0551f43878b0678ce4b3e74e27d62357cf95ad95 commit b9be2eee71380a2e6dd34d520e92d0c4209277a0 Author: Ammar ELWazir <Ammar.ELWazir@amd.com> Date: Tue Jul 11 15:57:11 2023 +0000 Fixing build.sh Change-Id: Ia987b0244f0875370d5fe69907b3f5e9cea914de commit 9eee33a95a1abd656a7ac5ca10a9f245e9825431 Author: Ammar ELWazir <aelwazir@amd.com> Date: Mon Jul 10 21:39:46 2023 -0500 Update cmake.yml commit 7093b85a78497140e8b52632ca2a002bdaeacd62 Author: Ammar ELWazir <aelwazir@amd.com> Date: Mon Jul 10 21:33:29 2023 -0500 Update cmake.yml commit f54697172c72a67740f9fdfa0c217b6ea6931576 Author: Ammar ELWazir <aelwazir@amd.com> Date: Mon Jul 10 21:01:26 2023 -0500 Update cmake.yml commit 1b6620e16f8940386b0f4f04e69e2410d21c0e26 Author: Ammar ELWazir <aelwazir@amd.com> Date: Mon Jul 10 20:21:02 2023 -0500 Update cmake.yml commit a94bec740c6b42c4b79c87bca20fa87b99bf060d Author: Ammar ELWazir <aelwazir@amd.com> Date: Mon Jul 10 19:46:35 2023 -0500 Update cmake.yml commit 85d6b29d4375a69d575c18ece8542c50f2ddfcc3 Author: Ammar ELWazir <aelwazir@amd.com> Date: Mon Jul 10 19:34:39 2023 -0500 Update cmake.yml commit 8c004887cf1435f1a6214c3d2455299a8a27bd4c Author: Ammar ELWazir <aelwazir@amd.com> Date: Mon Jul 10 19:31:17 2023 -0500 Update cmake.yml commit a14a9168e17d9348a53c6e9c9a47ba1edb4c4509 Author: Ammar ELWazir <aelwazir@amd.com> Date: Mon Jul 10 19:25:46 2023 -0500 Update cmake.yml commit 000f2f40b84e6a2f7d4becdbf5aed01436ca4c83 Author: Ammar ELWazir <aelwazir@amd.com> Date: Mon Jul 10 19:08:18 2023 -0500 Update cmake.yml commit a28a53d56731cad848fa9133d1c4dbaa8fc7afa7 Author: Ammar ELWazir <aelwazir@amd.com> Date: Mon Jul 10 19:03:39 2023 -0500 Update cmake.yml commit a6a2db01027f0b01fdfbb5997ddb772c7f51b649 Author: Ammar ELWazir <aelwazir@amd.com> Date: Mon Jul 10 18:21:53 2023 -0500 Update cmake.yml commit 118ef2a88b2d44e3207c31c343da3e5e5ec6f176 Author: Ammar ELWazir <aelwazir@amd.com> Date: Mon Jul 10 17:55:57 2023 -0500 Update cmake.yml commit 03c4c232396440cd0be6d2dd7baf4ceea1c2589d Author: Ammar ELWazir <aelwazir@amd.com> Date: Mon Jul 10 17:48:49 2023 -0500 Create cmake.yml Change-Id: I77992f15694e77cbae49c56f9ff02f4f9079235d [ROCm/rocprofiler commit:d4a33cf33a]
601 linhas
25 KiB
C
601 linhas
25 KiB
C
/******************************************************************************
|
|
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
of this software and associated documentation files (the "Software"), to deal
|
|
in the Software without restriction, including without limitation the rights
|
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
copies of the Software, and to permit persons to whom the Software is
|
|
furnished to do so, subject to the following conditions:
|
|
|
|
The above copyright notice and this permission notice shall be included in
|
|
all copies or substantial portions of the Software.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
THE SOFTWARE.
|
|
*******************************************************************************/
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// ROC Profiler API
|
|
//
|
|
// The goal of the implementation is to provide a HW specific low-level
|
|
// performance analysis interface for profiling of GPU compute applications.
|
|
// The profiling includes HW performance counters (PMC) with complex
|
|
// performance metrics and traces.
|
|
//
|
|
// The library can be used by a tool library loaded by HSA runtime or by
|
|
// higher level HW independent performance analysis API like PAPI.
|
|
//
|
|
// The library is written on C and will be based on AQLprofile AMD specific
|
|
// HSA extension. The library implementation requires HSA API intercepting and
|
|
// a profiling queue supporting a submit callback interface.
|
|
//
|
|
//
|
|
|
|
#ifndef INC_ROCPROFILER_H_
|
|
#define INC_ROCPROFILER_H_
|
|
|
|
/* Placeholder for calling convention and import/export macros */
|
|
#if !defined(ROCPROFILER_CALL)
|
|
#define ROCPROFILER_CALL
|
|
#endif /* !defined (ROCPROFILER_CALL) */
|
|
|
|
#if !defined(ROCPROFILER_EXPORT_DECORATOR)
|
|
#if defined(__GNUC__)
|
|
#define ROCPROFILER_EXPORT_DECORATOR __attribute__((visibility("default")))
|
|
#elif defined(_MSC_VER)
|
|
#define ROCPROFILER_EXPORT_DECORATOR __declspec(dllexport)
|
|
#endif /* defined (_MSC_VER) */
|
|
#endif /* !defined (ROCPROFILER_EXPORT_DECORATOR) */
|
|
|
|
#if !defined(ROCPROFILER_IMPORT_DECORATOR)
|
|
#if defined(__GNUC__)
|
|
#define ROCPROFILER_IMPORT_DECORATOR
|
|
#elif defined(_MSC_VER)
|
|
#define ROCPROFILER_IMPORT_DECORATOR __declspec(dllimport)
|
|
#endif /* defined (_MSC_VER) */
|
|
#endif /* !defined (ROCPROFILER_IMPORT_DECORATOR) */
|
|
|
|
#define ROCPROFILER_EXPORT ROCPROFILER_EXPORT_DECORATOR ROCPROFILER_CALL
|
|
#define ROCPROFILER_IMPORT ROCPROFILER_IMPORT_DECORATOR ROCPROFILER_CALL
|
|
|
|
#if !defined(ROCPROFILER)
|
|
#if defined(ROCPROFILER_EXPORTS)
|
|
#define ROCPROFILER_API ROCPROFILER_EXPORT
|
|
#else /* !defined (ROCPROFILER_EXPORTS) */
|
|
#define ROCPROFILER_API ROCPROFILER_IMPORT
|
|
#endif /* !defined (ROCPROFILER_EXPORTS) */
|
|
#endif /* !defined (ROCPROFILER) */
|
|
|
|
#include <stddef.h>
|
|
#include <stdint.h>
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif /* __cplusplus */
|
|
|
|
#include <hsa/amd_hsa_kernel_code.h>
|
|
#include <hsa/hsa.h>
|
|
#include <hsa/hsa_ext_amd.h>
|
|
#include <hsa/hsa_ven_amd_aqlprofile.h>
|
|
#include <stdint.h>
|
|
|
|
|
|
#define ROCPROFILER_VERSION_MAJOR 8
|
|
#define ROCPROFILER_VERSION_MINOR 0
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
// Returning library version
|
|
uint32_t rocprofiler_version_major();
|
|
uint32_t rocprofiler_version_minor();
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
// Global properties structure
|
|
|
|
typedef struct {
|
|
uint32_t intercept_mode;
|
|
uint32_t code_obj_tracking;
|
|
uint32_t memcopy_tracking;
|
|
uint32_t trace_size;
|
|
uint32_t trace_local;
|
|
uint64_t timeout;
|
|
uint32_t timestamp_on;
|
|
uint32_t hsa_intercepting;
|
|
uint32_t k_concurrent;
|
|
uint32_t opt_mode;
|
|
uint32_t obj_dumping;
|
|
} rocprofiler_settings_t;
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
// Returning the error string method
|
|
|
|
hsa_status_t rocprofiler_error_string(
|
|
const char** str); // [out] the API error string pointer returning
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
// Profiling features and data
|
|
//
|
|
// Profiling features objects have profiling feature info, type, parameters and data
|
|
// Also profiling data samplaes can be iterated using a callback
|
|
|
|
// Profiling feature kind
|
|
typedef enum {
|
|
ROCPROFILER_FEATURE_KIND_METRIC = 0,
|
|
ROCPROFILER_FEATURE_KIND_TRACE = 1,
|
|
ROCPROFILER_FEATURE_KIND_SPM_MOD = 2,
|
|
ROCPROFILER_FEATURE_KIND_PCSMP_MOD = 4
|
|
} rocprofiler_feature_kind_t;
|
|
|
|
// Profiling feture parameter
|
|
typedef hsa_ven_amd_aqlprofile_parameter_t rocprofiler_parameter_t;
|
|
|
|
// Profiling data kind
|
|
typedef enum {
|
|
ROCPROFILER_DATA_KIND_UNINIT = 0,
|
|
ROCPROFILER_DATA_KIND_INT32 = 1,
|
|
ROCPROFILER_DATA_KIND_INT64 = 2,
|
|
ROCPROFILER_DATA_KIND_FLOAT = 3,
|
|
ROCPROFILER_DATA_KIND_DOUBLE = 4,
|
|
ROCPROFILER_DATA_KIND_BYTES = 5
|
|
} rocprofiler_data_kind_t;
|
|
|
|
// Profiling data type
|
|
typedef struct {
|
|
rocprofiler_data_kind_t kind; // result kind
|
|
union {
|
|
uint32_t result_int32; // 32bit integer result
|
|
uint64_t result_int64; // 64bit integer result
|
|
float result_float; // float single-precision result
|
|
double result_double; // float double-precision result
|
|
struct {
|
|
void* ptr;
|
|
uint32_t size;
|
|
uint32_t instance_count;
|
|
bool copy;
|
|
} result_bytes; // data by ptr and byte size
|
|
};
|
|
} rocprofiler_data_t;
|
|
|
|
// Profiling feature type
|
|
typedef struct {
|
|
rocprofiler_feature_kind_t kind; // feature kind
|
|
union {
|
|
const char* name; // feature name
|
|
struct {
|
|
const char* block; // counter block name
|
|
uint32_t event; // counter event id
|
|
} counter;
|
|
};
|
|
const rocprofiler_parameter_t* parameters; // feature parameters array
|
|
uint32_t parameter_count; // feature parameters count
|
|
rocprofiler_data_t data; // profiling data
|
|
} rocprofiler_feature_t;
|
|
|
|
// Profiling features set type
|
|
typedef void rocprofiler_feature_set_t;
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
// Profiling context
|
|
//
|
|
// Profiling context object accumuate all profiling information
|
|
|
|
// Profiling context object
|
|
typedef void rocprofiler_t;
|
|
|
|
// Profiling group object
|
|
typedef struct {
|
|
unsigned index; // group index
|
|
rocprofiler_feature_t** features; // profiling info array
|
|
uint32_t feature_count; // profiling info count
|
|
rocprofiler_t* context; // context object
|
|
} rocprofiler_group_t;
|
|
|
|
// Profiling mode mask
|
|
typedef enum {
|
|
ROCPROFILER_MODE_STANDALONE = 1, // standalone mode when ROC profiler supports a queue
|
|
ROCPROFILER_MODE_CREATEQUEUE = 2, // ROC profiler creates queue in standalone mode
|
|
ROCPROFILER_MODE_SINGLEGROUP = 4 // only one group is allowed, failed otherwise
|
|
} rocprofiler_mode_t;
|
|
|
|
// Profiling handler, calling on profiling completion
|
|
typedef bool (*rocprofiler_handler_t)(rocprofiler_group_t group, void* arg);
|
|
|
|
// Profiling preperties
|
|
typedef struct {
|
|
hsa_queue_t* queue; // queue for STANDALONE mode
|
|
// the queue is created and returned in CREATEQUEUE mode
|
|
uint32_t queue_depth; // created queue depth
|
|
rocprofiler_handler_t handler; // handler on completion
|
|
void* handler_arg; // the handler arg
|
|
} rocprofiler_properties_t;
|
|
|
|
// Create new profiling context
|
|
hsa_status_t rocprofiler_open(hsa_agent_t agent, // GPU handle
|
|
rocprofiler_feature_t* features, // [in] profiling features array
|
|
uint32_t feature_count, // profiling info count
|
|
rocprofiler_t** context, // [out] context object
|
|
uint32_t mode, // profiling mode mask
|
|
rocprofiler_properties_t* properties); // profiling properties
|
|
|
|
// Add feature to a features set
|
|
hsa_status_t rocprofiler_add_feature(
|
|
const rocprofiler_feature_t* feature, // [in]
|
|
rocprofiler_feature_set_t* features_set); // [in/out] profiling features set
|
|
|
|
// Create new profiling context
|
|
hsa_status_t rocprofiler_features_set_open(
|
|
hsa_agent_t agent, // GPU handle
|
|
rocprofiler_feature_set_t* features_set, // [in] profiling features set
|
|
rocprofiler_t** context, // [out] context object
|
|
uint32_t mode, // profiling mode mask
|
|
rocprofiler_properties_t* properties); // profiling properties
|
|
|
|
// Delete profiling info
|
|
hsa_status_t rocprofiler_close(rocprofiler_t* context); // [in] profiling context
|
|
|
|
// Context reset before reusing
|
|
hsa_status_t rocprofiler_reset(rocprofiler_t* context, // [in] profiling context
|
|
uint32_t group_index); // group index
|
|
|
|
// Return context agent
|
|
hsa_status_t rocprofiler_get_agent(rocprofiler_t* context, // [in] profiling context
|
|
hsa_agent_t* agent); // [out] GPU handle
|
|
|
|
// Supported time value ID
|
|
typedef enum {
|
|
ROCPROFILER_TIME_ID_CLOCK_REALTIME = 0, // Linux realtime clock time
|
|
ROCPROFILER_TIME_ID_CLOCK_REALTIME_COARSE = 1, // Linux realtime-coarse clock time
|
|
ROCPROFILER_TIME_ID_CLOCK_MONOTONIC = 2, // Linux monotonic clock time
|
|
ROCPROFILER_TIME_ID_CLOCK_MONOTONIC_COARSE = 3, // Linux monotonic-coarse clock time
|
|
ROCPROFILER_TIME_ID_CLOCK_MONOTONIC_RAW = 4, // Linux monotonic-raw clock time
|
|
} rocprofiler_time_id_t;
|
|
|
|
// Return time value for a given time ID and profiling timestamp
|
|
hsa_status_t rocprofiler_get_time(
|
|
rocprofiler_time_id_t time_id, // identifier of the particular time to convert the timesatmp
|
|
uint64_t timestamp, // profiling timestamp
|
|
uint64_t* value_ns, // [out] returned time 'ns' value, ignored if NULL
|
|
uint64_t* error_ns); // [out] returned time error 'ns' value, ignored if NULL
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
// Queue callbacks
|
|
//
|
|
// Queue callbacks for initiating profiling per kernel dispatch and to wait
|
|
// the profiling data on the queue destroy.
|
|
|
|
// Dispatch record
|
|
typedef struct {
|
|
uint64_t dispatch; // dispatch timestamp, ns
|
|
uint64_t begin; // kernel begin timestamp, ns
|
|
uint64_t end; // kernel end timestamp, ns
|
|
uint64_t complete; // completion signal timestamp, ns
|
|
} rocprofiler_dispatch_record_t;
|
|
|
|
// Profiling callback data
|
|
typedef struct {
|
|
hsa_agent_t agent; // GPU agent handle
|
|
uint32_t agent_index; // GPU index (GPU Driver Node ID as reported in the sysfs topology)
|
|
const hsa_queue_t* queue; // HSA queue
|
|
uint64_t queue_index; // Index in the queue
|
|
uint32_t queue_id; // Queue id
|
|
hsa_signal_t completion_signal; // Completion signal
|
|
const hsa_kernel_dispatch_packet_t* packet; // HSA dispatch packet
|
|
const char* kernel_name; // Kernel name
|
|
uint64_t kernel_object; // Kernel object address
|
|
const amd_kernel_code_t* kernel_code; // Kernel code pointer
|
|
uint32_t thread_id; // Thread id
|
|
const rocprofiler_dispatch_record_t* record; // Dispatch record
|
|
} rocprofiler_callback_data_t;
|
|
|
|
// Profiling callback type
|
|
typedef hsa_status_t (*rocprofiler_callback_t)(
|
|
const rocprofiler_callback_data_t* callback_data, // [in] callback data
|
|
void* user_data, // [in/out] user data passed to the callback
|
|
rocprofiler_group_t* group); // [out] returned profiling group
|
|
|
|
// Queue callbacks
|
|
typedef struct {
|
|
rocprofiler_callback_t dispatch; // dispatch callback
|
|
hsa_status_t (*create)(hsa_queue_t* queue, void* data); // create callback
|
|
hsa_status_t (*destroy)(hsa_queue_t* queue, void* data); // destroy callback
|
|
} rocprofiler_queue_callbacks_t;
|
|
|
|
// Set queue callbacks
|
|
hsa_status_t rocprofiler_set_queue_callbacks(rocprofiler_queue_callbacks_t callbacks, // callbacks
|
|
void* data); // [in/out] passed callbacks data
|
|
|
|
// Remove queue callbacks
|
|
hsa_status_t rocprofiler_remove_queue_callbacks();
|
|
|
|
// Start/stop queue callbacks
|
|
hsa_status_t rocprofiler_start_queue_callbacks();
|
|
hsa_status_t rocprofiler_stop_queue_callbacks();
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
// Start/stop profiling
|
|
//
|
|
// Start/stop the context profiling invocation, have to be as many as
|
|
// contect.invocations' to collect all profiling data
|
|
|
|
// Start profiling
|
|
hsa_status_t rocprofiler_start(rocprofiler_t* context, // [in/out] profiling context
|
|
uint32_t group_index); // group index
|
|
|
|
// Stop profiling
|
|
hsa_status_t rocprofiler_stop(rocprofiler_t* context, // [in/out] profiling context
|
|
uint32_t group_index); // group index
|
|
|
|
// Read profiling
|
|
hsa_status_t rocprofiler_read(rocprofiler_t* context, // [in/out] profiling context
|
|
uint32_t group_index); // group index
|
|
|
|
// Read profiling data
|
|
hsa_status_t rocprofiler_get_data(rocprofiler_t* context, // [in/out] profiling context
|
|
uint32_t group_index); // group index
|
|
|
|
// Get profiling groups count
|
|
hsa_status_t rocprofiler_group_count(const rocprofiler_t* context, // [in] profiling context
|
|
uint32_t* group_count); // [out] profiling groups count
|
|
|
|
// Get profiling group for a given index
|
|
hsa_status_t rocprofiler_get_group(rocprofiler_t* context, // [in] profiling context
|
|
uint32_t group_index, // profiling group index
|
|
rocprofiler_group_t* group); // [out] profiling group
|
|
|
|
// Start profiling
|
|
hsa_status_t rocprofiler_group_start(rocprofiler_group_t* group); // [in/out] profiling group
|
|
|
|
// Stop profiling
|
|
hsa_status_t rocprofiler_group_stop(rocprofiler_group_t* group); // [in/out] profiling group
|
|
|
|
// Read profiling
|
|
hsa_status_t rocprofiler_group_read(rocprofiler_group_t* group); // [in/out] profiling group
|
|
|
|
// Get profiling data
|
|
hsa_status_t rocprofiler_group_get_data(rocprofiler_group_t* group); // [in/out] profiling group
|
|
|
|
// Get metrics data
|
|
hsa_status_t rocprofiler_get_metrics(const rocprofiler_t* context); // [in/out] profiling context
|
|
|
|
// Definition of output data iterator callback
|
|
typedef hsa_ven_amd_aqlprofile_data_callback_t rocprofiler_trace_data_callback_t;
|
|
|
|
// Method for iterating the events output data
|
|
hsa_status_t rocprofiler_iterate_trace_data(
|
|
rocprofiler_t* context, // [in] profiling context
|
|
rocprofiler_trace_data_callback_t callback, // callback to iterate the output data
|
|
void* data); // [in/out] callback data
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
// Profiling features and data
|
|
//
|
|
// Profiling features objects have profiling feature info, type, parameters and data
|
|
// Also profiling data samplaes can be iterated using a callback
|
|
|
|
// Profiling info kind
|
|
typedef enum {
|
|
ROCPROFILER_INFO_KIND_METRIC = 0, // metric info
|
|
ROCPROFILER_INFO_KIND_METRIC_COUNT = 1, // metric features count, int32
|
|
ROCPROFILER_INFO_KIND_TRACE = 2, // trace info
|
|
ROCPROFILER_INFO_KIND_TRACE_COUNT = 3, // trace features count, int32
|
|
ROCPROFILER_INFO_KIND_TRACE_PARAMETER = 4, // trace parameter info
|
|
ROCPROFILER_INFO_KIND_TRACE_PARAMETER_COUNT = 5 // trace parameter count, int32
|
|
} rocprofiler_info_kind_t;
|
|
|
|
// Profiling info query
|
|
typedef union {
|
|
rocprofiler_info_kind_t info_kind; // queried profiling info kind
|
|
struct {
|
|
const char* trace_name; // queried info trace name
|
|
} trace_parameter;
|
|
} rocprofiler_info_query_t;
|
|
|
|
// Profiling info data
|
|
typedef struct {
|
|
uint32_t
|
|
agent_index; // GPU HSA agent index (GPU Driver Node ID as reported in the sysfs topology)
|
|
rocprofiler_info_kind_t kind; // info data kind
|
|
union {
|
|
struct {
|
|
const char* name; // metric name
|
|
uint32_t instances; // instances number
|
|
const char* expr; // metric expression, NULL for basic counters
|
|
const char* description; // metric description
|
|
const char* block_name; // block name
|
|
uint32_t block_counters; // number of block counters
|
|
} metric;
|
|
struct {
|
|
const char* name; // trace name
|
|
const char* description; // trace description
|
|
uint32_t parameter_count; // supported by the trace number parameters
|
|
} trace;
|
|
struct {
|
|
uint32_t code; // parameter code
|
|
const char* trace_name; // trace name
|
|
const char* parameter_name; // parameter name
|
|
const char* description; // trace parameter description
|
|
} trace_parameter;
|
|
};
|
|
} rocprofiler_info_data_t;
|
|
|
|
// Return the info for a given info kind
|
|
hsa_status_t rocprofiler_get_info(const hsa_agent_t* agent, // [in] GFXIP handle
|
|
rocprofiler_info_kind_t kind, // kind of iterated info
|
|
void* data); // [in/out] returned data
|
|
|
|
// Iterate over the info for a given info kind, and invoke an application-defined callback on every
|
|
// iteration
|
|
hsa_status_t rocprofiler_iterate_info(const hsa_agent_t* agent, // [in] GFXIP handle
|
|
rocprofiler_info_kind_t kind, // kind of iterated info
|
|
hsa_status_t (*callback)(const rocprofiler_info_data_t info,
|
|
void* data), // callback
|
|
void* data); // [in/out] data passed to callback
|
|
|
|
// Iterate over the info for a given info query, and invoke an application-defined callback on every
|
|
// iteration
|
|
hsa_status_t rocprofiler_query_info(const hsa_agent_t* agent, // [in] GFXIP handle
|
|
rocprofiler_info_query_t query, // iterated info query
|
|
hsa_status_t (*callback)(const rocprofiler_info_data_t info,
|
|
void* data), // callback
|
|
void* data); // [in/out] data passed to callback
|
|
|
|
// Create a profiled queue. All dispatches on this queue will be profiled
|
|
hsa_status_t rocprofiler_queue_create_profiled(
|
|
hsa_agent_t agent_handle, uint32_t size, hsa_queue_type32_t type,
|
|
void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data), void* data,
|
|
uint32_t private_segment_size, uint32_t group_segment_size, hsa_queue_t** queue);
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
// Profiling pool
|
|
//
|
|
// Support for profiling contexts pool
|
|
// The API provide capability to create a contexts pool for a given agent and a set of features,
|
|
// to fetch/relase a context entry, to register a callback for the contexts completion.
|
|
|
|
// Profiling pool handle
|
|
typedef void rocprofiler_pool_t;
|
|
|
|
// Profiling pool entry
|
|
typedef struct {
|
|
rocprofiler_t* context; // context object
|
|
void* payload; // payload data object
|
|
} rocprofiler_pool_entry_t;
|
|
|
|
// Profiling handler, calling on profiling completion
|
|
typedef bool (*rocprofiler_pool_handler_t)(const rocprofiler_pool_entry_t* entry, void* arg);
|
|
|
|
// Profiling preperties
|
|
typedef struct {
|
|
uint32_t num_entries; // pool size entries
|
|
uint32_t payload_bytes; // payload size bytes
|
|
rocprofiler_pool_handler_t handler; // handler on context completion
|
|
void* handler_arg; // the handler arg
|
|
} rocprofiler_pool_properties_t;
|
|
|
|
// Open profiling pool
|
|
hsa_status_t rocprofiler_pool_open(
|
|
hsa_agent_t agent, // GPU handle
|
|
rocprofiler_feature_t* features, // [in] profiling features array
|
|
uint32_t feature_count, // profiling info count
|
|
rocprofiler_pool_t** pool, // [out] context object
|
|
uint32_t mode, // profiling mode mask
|
|
rocprofiler_pool_properties_t*); // pool properties
|
|
|
|
// Close profiling pool
|
|
hsa_status_t rocprofiler_pool_close(rocprofiler_pool_t* pool); // profiling pool handle
|
|
|
|
// Fetch profiling pool entry
|
|
hsa_status_t rocprofiler_pool_fetch(
|
|
rocprofiler_pool_t* pool, // profiling pool handle
|
|
rocprofiler_pool_entry_t* entry); // [out] empty profiling pool entry
|
|
|
|
// Release profiling pool entry
|
|
hsa_status_t rocprofiler_pool_release(
|
|
rocprofiler_pool_entry_t* entry); // released profiling pool entry
|
|
|
|
// Iterate fetched profiling pool entries
|
|
hsa_status_t rocprofiler_pool_iterate(rocprofiler_pool_t* pool, // profiling pool handle
|
|
hsa_status_t (*callback)(rocprofiler_pool_entry_t* entry,
|
|
void* data), // callback
|
|
void* data); // [in/out] data passed to callback
|
|
|
|
// Flush completed entries in profiling pool
|
|
hsa_status_t rocprofiler_pool_flush(rocprofiler_pool_t* pool); // profiling pool handle
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
// HSA intercepting API
|
|
|
|
// HSA callbacks ID enumeration
|
|
typedef enum {
|
|
ROCPROFILER_HSA_CB_ID_ALLOCATE = 0, // Memory allocate callback
|
|
ROCPROFILER_HSA_CB_ID_DEVICE = 1, // Device assign callback
|
|
ROCPROFILER_HSA_CB_ID_MEMCOPY = 2, // Memcopy callback
|
|
ROCPROFILER_HSA_CB_ID_SUBMIT = 3, // Packet submit callback
|
|
ROCPROFILER_HSA_CB_ID_KSYMBOL = 4, // Loading/unloading of kernel symbol
|
|
ROCPROFILER_HSA_CB_ID_CODEOBJ = 5 // Loading/unloading of kernel symbol
|
|
} rocprofiler_hsa_cb_id_t;
|
|
|
|
// HSA callback data type
|
|
typedef struct {
|
|
union {
|
|
struct {
|
|
const void* ptr; // allocated area ptr
|
|
size_t size; // allocated area size, zero size means 'free' callback
|
|
hsa_amd_segment_t segment; // allocated area's memory segment type
|
|
hsa_amd_memory_pool_global_flag_t global_flag; // allocated area's memory global flag
|
|
int is_code; // equal to 1 if code is allocated
|
|
} allocate;
|
|
struct {
|
|
hsa_device_type_t type; // type of assigned device
|
|
uint32_t id; // id of assigned device
|
|
hsa_agent_t agent; // device HSA agent handle
|
|
const void* ptr; // ptr the device is assigned to
|
|
} device;
|
|
struct {
|
|
const void* dst; // memcopy dst ptr
|
|
const void* src; // memcopy src ptr
|
|
size_t size; // memcopy size bytes
|
|
} memcopy;
|
|
struct {
|
|
const void* packet; // submitted to GPU packet
|
|
const char* kernel_name; // kernel name, not NULL if dispatch
|
|
hsa_queue_t* queue; // HSA queue the kernel was submitted to
|
|
uint32_t device_type; // type of device the packed is submitted to
|
|
uint32_t device_id; // id of device the packed is submitted to
|
|
} submit;
|
|
struct {
|
|
uint64_t object; // kernel symbol object
|
|
const char* name; // kernel symbol name
|
|
uint32_t name_length; // kernel symbol name length
|
|
int unload; // symbol executable destroy
|
|
} ksymbol;
|
|
struct {
|
|
uint32_t storage_type; // code object storage type
|
|
int storage_file; // origin file descriptor
|
|
uint64_t memory_base; // origin memory base
|
|
uint64_t memory_size; // origin memory size
|
|
uint64_t load_base; // codeobj load base
|
|
uint64_t load_size; // codeobj load size
|
|
uint64_t load_delta; // codeobj load size
|
|
uint32_t uri_length; // URI string length
|
|
char* uri; // URI string
|
|
int unload; // unload flag
|
|
} codeobj;
|
|
};
|
|
} rocprofiler_hsa_callback_data_t;
|
|
|
|
// HSA callback function type
|
|
typedef hsa_status_t (*rocprofiler_hsa_callback_fun_t)(
|
|
rocprofiler_hsa_cb_id_t id, // callback id
|
|
const rocprofiler_hsa_callback_data_t* data, // [in] callback data
|
|
void* arg); // [in/out] user passed data
|
|
|
|
// HSA callbacks structure
|
|
typedef struct {
|
|
rocprofiler_hsa_callback_fun_t allocate; // memory allocate callback
|
|
rocprofiler_hsa_callback_fun_t device; // agent assign callback
|
|
rocprofiler_hsa_callback_fun_t memcopy; // memory copy callback
|
|
rocprofiler_hsa_callback_fun_t submit; // packet submit callback
|
|
rocprofiler_hsa_callback_fun_t ksymbol; // kernel symbol callback
|
|
rocprofiler_hsa_callback_fun_t codeobj; // codeobject load/unload callback
|
|
} rocprofiler_hsa_callbacks_t;
|
|
|
|
// Set callbacks. If the callback is NULL then it is disabled.
|
|
// If callback returns a value that is not HSA_STATUS_SUCCESS the callback
|
|
// will be unregistered.
|
|
hsa_status_t rocprofiler_set_hsa_callbacks(
|
|
const rocprofiler_hsa_callbacks_t callbacks, // HSA callback function
|
|
void* arg); // callback user data
|
|
|
|
#ifdef __cplusplus
|
|
} // extern "C" block
|
|
#endif // __cplusplus
|
|
|
|
#endif // INC_ROCPROFILER_H_
|