Dosyalar
rocm-systems/inc/rocprofiler.h
T

260 satır
10 KiB
C

////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
//
// ROC Profiler API
//
// The goal of the implementation is to provide a HW specific low-level
// performance analysis interface for profiling of GPU compute applications.
// The profiling includes HW performance counters (PMC) with complex
// performance metrics and thread traces (SQTT). The profiling is supported
// by the SQTT, PMC and Callback APIs.
//
// The library can be used by a tool library loaded by HSA runtime or by
// higher level HW independent performance analysis API like PAPI.
//
// The library is written on C and will be based on AQLprofile AMD specific
// HSA extension. The library implementation requires HSA API intercepting and
// a profiling queue supporting a submit callback interface.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef INC_ROCPROFILER_H_
#define INC_ROCPROFILER_H_
#include <hsa.h>
#include <hsa_ven_amd_aqlprofile.h>
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
////////////////////////////////////////////////////////////////////////////////
// Profiling info
//
// Profiling info objects have profiling feature info, type, parameters and data
// Also profiling data samplaes can be iterated using a callback
// Profiling feature type
typedef enum {
ROCPROFILER_FEATURE_KIND_METRIC = 0,
ROCPROFILER_FEATURE_KIND_TRACE = 1
} rocprofiler_feature_kind_t;
// Profiling feture parameter
typedef hsa_ven_amd_aqlprofile_parameter_t rocprofiler_parameter_t;
// Profiling data kind
typedef enum {
ROCPROFILER_DATA_KIND_UNINIT = 0,
ROCPROFILER_DATA_KIND_INT32 = 1,
ROCPROFILER_DATA_KIND_INT64 = 2,
ROCPROFILER_DATA_KIND_FLOAT = 3,
ROCPROFILER_DATA_KIND_DOUBLE = 4,
ROCPROFILER_DATA_KIND_BYTES = 5
} rocprofiler_data_kind_t;
// Profiling data type
typedef struct {
rocprofiler_data_kind_t kind; // result kind
union {
uint32_t result_int32; // 32bit integer result
uint64_t result_int64; // 64bit integer result
float result_float; // float single-precision result
double result_double; // float double-precision result
struct {
void* ptr;
uint32_t size;
uint32_t instance_count;
bool copy;
} result_bytes; // data by ptr and byte size
};
} rocprofiler_data_t;
// Profiling feature info
typedef struct {
rocprofiler_feature_kind_t kind; // feature kind
const char* name; // feature name
const rocprofiler_parameter_t* parameters; // feature parameters array
uint32_t parameter_count; // feature parameters count
rocprofiler_data_t data; // profiling data
} rocprofiler_feature_t;
////////////////////////////////////////////////////////////////////////////////
// Profiling context
//
// Profiling context object accumuate all profiling information
// Profiling context object
typedef void rocprofiler_t;
// Profiling group object
typedef struct {
unsigned index; // group index
rocprofiler_feature_t** features; // profiling info array
uint32_t feature_count; // profiling info count
rocprofiler_t* context; // context object
} rocprofiler_group_t;
// Profiling mode mask
typedef enum {
ROCPROFILER_MODE_STANDALONE = 1, // standalone mode when ROC profiler supports a queue
ROCPROFILER_MODE_CREATEQUEUE = 2, // ROC profiler creates queue in standalone mode
ROCPROFILER_MODE_SINGLEGROUP = 4 // only one group is allowed, failed otherwise
} rocprofiler_mode_t;
// Profiling handler, calling on profiling completion
typedef void (*rocprofiler_handler_t)(rocprofiler_group_t group, void* arg);
// Profiling preperties
typedef struct {
hsa_queue_t* queue; // queue for STANDALONE mode
// the queue is created and returned in CREATEQUEUE mode
uint32_t queue_depth; // created queue depth
rocprofiler_handler_t handler; // handler on completion
void* handler_arg; // the handler arg
} rocprofiler_properties_t;
// Create new profiling context
hsa_status_t rocprofiler_open(hsa_agent_t agent, // GPU handle
rocprofiler_feature_t* features, // [in] profiling info array
uint32_t feature_count, // profiling info count
rocprofiler_t** context, // [out] context object
uint32_t mode, // profiling mode mask
rocprofiler_properties_t* properties); // profiling properties
// Delete profiling info
hsa_status_t rocprofiler_close(rocprofiler_t* context); // [in] profiling context
// Context reset before reusing
hsa_status_t rocprofiler_reset(rocprofiler_t* context, // [in] profiling context
uint32_t group_index); // group index
////////////////////////////////////////////////////////////////////////////////
// Runtime API observer
//
// Runtime API observer is called on enter and exit for the API
// Profiling callback data
typedef struct {
hsa_agent_t agent;
uint64_t queue_index;
uint64_t kernel_object;
const char* kernel_name;
} rocprofiler_callback_data_t;
// Profiling callback type
typedef hsa_status_t (*rocprofiler_callback_t)(
const rocprofiler_callback_data_t* callback_data, // [in] callback data union, data depends on
// the callback API id
void* user_data, // [in/out] user data passed to the callback
rocprofiler_group_t* group); // [out] profiling group
// Set/remove kernel dispatch observer
hsa_status_t rocprofiler_set_dispatch_callback(
rocprofiler_callback_t callback, // observer callback
void* data); // [in/out] passed callback data
hsa_status_t rocprofiler_remove_dispatch_callback();
////////////////////////////////////////////////////////////////////////////////
// Start/stop profiling
//
// Start/stop the context profiling invocation, have to be as many as
// contect.invocations' to collect all profiling data
// Start profiling
hsa_status_t rocprofiler_start(rocprofiler_t* context, // [in/out] profiling context
uint32_t group_index); // group index
// Stop profiling
hsa_status_t rocprofiler_stop(rocprofiler_t* context, // [in/out] profiling context
uint32_t group_index); // group index
// Read profiling data
hsa_status_t rocprofiler_get_data(rocprofiler_t* context, // [in/out] profiling context
uint32_t group_index); // group index
// Get profiling groups count
hsa_status_t rocprofiler_group_count(const rocprofiler_t* context, // [in] profiling context
uint32_t* group_count); // [out] profiling groups count
// Get profiling group for a given index
hsa_status_t rocprofiler_get_group(rocprofiler_t* context, // [in] profiling context
uint32_t group_index, // [in] profiling group index
rocprofiler_group_t* group); // [out] profiling group
// Start profiling
hsa_status_t rocprofiler_group_start(rocprofiler_group_t* group); // [in/out] profiling group
// Stop profiling
hsa_status_t rocprofiler_group_stop(rocprofiler_group_t* group); // [in/out] profiling group
// Get profiling data
hsa_status_t rocprofiler_group_get_data(rocprofiler_group_t* group); // [in/out] profiling group
// Get metrics data
hsa_status_t rocprofiler_get_metrics(const rocprofiler_t* context); // [in/out] profiling context
// Definition of output data iterator callback
typedef hsa_ven_amd_aqlprofile_data_callback_t rocprofiler_trace_data_callback_t;
// Method for iterating the events output data
hsa_status_t rocprofiler_iterate_trace_data(
rocprofiler_t* context, // [in] profiling context
rocprofiler_trace_data_callback_t callback, // [in] callback to iterate the output data
void* data); // [in/out] callback data
////////////////////////////////////////////////////////////////////////////////
// Returning the error string method
hsa_status_t rocprofiler_error_string(
const char** str); // [out] the API error string pointer returning
#ifdef __cplusplus
} // extern "C" block
#endif // __cplusplus
#endif // INC_ROCPROFILER_H_