Split ROCTx API tables and update intercept table API (#421)

* Update include/rocprofiler-sdk

- buffer_tracing.h
  - fix doxygen for rocprofiler_buffer_tracing_hip_api_record_t
  - update doxygen for rocprofiler_buffer_tracing_marker_api_record_t
    - remove unused marker_id field
- fwd.h
  - Split ROCPROFILER_CALLBACK_TRACING_MARKER_API into ROCPROFILER_CALLBACK_TRACING_MARKER_{CORE,CONTROL,NAME}_API
  - Split ROCPROFILER_BUFFER_TRACING_MARKER_API into ROCPROFILER_BUFFER_TRACING_MARKER_{CORE,CONTROL,NAME}_API
  - split rocprofiler_runtime_library_t into rocprofiler_runtime_library_t and rocprofiler_intercept_table_t
    - after split of ROCTx into 3 tables, specifying rocprofiler_at_internal_thread_create became confusing

* Update include/rocprofiler-sdk-roctx/api_trace.h

- Split into three tables: core, control, and name
  - core: what it sounds like
  - control: functions for controling the profiler
  - name: functions for giving resources names

* Update lib/rocprofiler-sdk-roctx/roctx.cpp

- modifications following split into multiple tables

* Update lib/rocprofiler-sdk/marker/*

- modifications following split of ROCTx API into multiple intercept tables

* Update lib/rocprofiler-sdk/tests

- common.hpp
  - add enums to get_callback_tracing_names() and get_buffer_tracing_names()
- intercept_table.cpp
  - update test to use rocprofiler_intercept_table_t (and enums) instead of rocproifler_runtime_library_t
  - update OR combos tested
- roctx.cpp
  - updates following split of ROCTx API table into multiple tables
  - use simplified specification of control API

* Update lib/rocprofiler-sdk

- buffer_tracing.cpp
  - Updates for ROCPROFILER_BUFFER_TRACING_MARKER_{CORE,CONTROL,NAME}_API enum values
- callback_tracing.cpp
  - Updates for ROCPROFILER_CALLBACK_TRACING_MARKER_{CORE,CONTROL,NAME}_API enum values
- intercept_table.hpp
  - notify_runtime_api_registration -> notify_intercept_table_registration
- intercept_table.cpp
  - updates for new rocprofiler_intercept_table_t enum and new ROCTx tables
- registration.cpp
  - updates for new rocprofiler_intercept_table_t enum and new ROCTx tables
  - updates for notify_runtime_api_registration -> notify_intercept_table_registration

* Update lib/rocprofiler-sdk-tool

- helper.cpp
  - Updates for new enums in get_callback_id_names() and get_buffer_id_names()
- tool.cpp
  - migrate to new enums for split ROCTx tables
  - use simplified split for control table vs. core+name tables

* Update samples/{api_callback_tracing,intercept_table}

- intercept_table/client.cpp
  - rocprofiler_runtime_library_t -> rocprofiler_intercept_table_t
- api_callback_tracing/client.cpp
  - Updates for new enums in get_callback_id_names()
  - use simplified split for control table vs. core+name tables
  - migrate to new enums for split ROCTx tables

* Update tests

- rocprofv3/tracing/validate.py
  - handle new marker domain names
- tools/json-tool.cpp
  - Updates for new enums in get_callback_id_names() and get_buffer_id_names()
  - use simplified split for control table vs. core+name tables
  - migrate to new enums for split ROCTx tables

* Update tests/rocprofv3/tracing/CMakeLists.txt

- fix FAIL_REGULAR_EXPRESSION for rocprofv3-test-trace-execute

* Update lib/rocprofiler-sdk-tool/{output_file,tool}.*

- logging in output_file dtor
- support stdout/stderr

* Update lib/common/container/record_header_buffer.hpp

- reduce probability of is_empty() returning true while emplace is happening

* Update lib/rocprofiler-sdk-tool/tool.cpp

- logging for buffered_tracing_callback
- counter collection uses CSV encoder

* Update bin/rocprofv3

- remove -i flag from help menu

[ROCm/rocprofiler-sdk commit: 9efafc4d23]
此提交包含在:
Jonathan R. Madsen
2024-01-26 13:56:15 -06:00
提交者 GitHub
父節點 24c8d296ba
當前提交 bc67b1e823
共有 32 個檔案被更改,包括 881 行新增628 行删除
+31 -46
查看文件
@@ -136,7 +136,11 @@ get_callback_id_names()
static auto supported = std::unordered_set<rocprofiler_callback_tracing_kind_t>{
ROCPROFILER_CALLBACK_TRACING_HSA_API,
ROCPROFILER_CALLBACK_TRACING_HIP_API,
ROCPROFILER_CALLBACK_TRACING_MARKER_API};
ROCPROFILER_CALLBACK_TRACING_HIP_COMPILER_API,
ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API,
ROCPROFILER_CALLBACK_TRACING_MARKER_CONTROL_API,
ROCPROFILER_CALLBACK_TRACING_MARKER_NAME_API,
};
auto cb_name_info = callback_name_info{};
//
@@ -192,14 +196,14 @@ tool_tracing_ctrl_callback(rocprofiler_callback_tracing_record_t record,
auto* ctx = static_cast<rocprofiler_context_id_t*>(client_data);
if(record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER &&
record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_API &&
record.operation == ROCPROFILER_MARKER_API_ID_roctxProfilerPause)
record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_CONTROL_API &&
record.operation == ROCPROFILER_MARKER_CONTROL_API_ID_roctxProfilerPause)
{
ROCPROFILER_CALL(rocprofiler_stop_context(*ctx), "pausing client context");
}
else if(record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT &&
record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_API &&
record.operation == ROCPROFILER_MARKER_API_ID_roctxProfilerResume)
record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_CONTROL_API &&
record.operation == ROCPROFILER_MARKER_CONTROL_API_ID_roctxProfilerResume)
{
ROCPROFILER_CALL(rocprofiler_start_context(*ctx), "resuming client context");
}
@@ -254,35 +258,9 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record,
_mutex.unlock();
}
std::vector<uint32_t>
void
tool_control_init(rocprofiler_context_id_t& primary_ctx)
{
struct RoctxOperations
{
std::vector<uint32_t> core = {};
std::vector<uint32_t> cntrl = {};
};
auto roctx_ops = RoctxOperations();
// get all the operations for ROCPROFILER_CALLBACK_TRACING_MARKER_API and
// separate them into two arrays; one which contains the pause/resume operations
// and one with everything else
ROCPROFILER_CALL(
rocprofiler_iterate_callback_tracing_kind_operations(
ROCPROFILER_CALLBACK_TRACING_MARKER_API,
[](rocprofiler_callback_tracing_kind_t, uint32_t operation_v, void* data_v) {
auto* roctx_ops_v = static_cast<RoctxOperations*>(data_v);
if(operation_v == ROCPROFILER_MARKER_API_ID_roctxProfilerPause ||
operation_v == ROCPROFILER_MARKER_API_ID_roctxProfilerResume)
roctx_ops_v->cntrl.emplace_back(operation_v);
else
roctx_ops_v->core.emplace_back(operation_v);
return 0;
},
&roctx_ops),
"iterating callback tracing kind operations");
// Create a specialized (throw-away) context for handling ROCTx profiler pause and resume.
// A separate context is used because if the context that is associated with roctxProfilerPause
// disabled that same context, a call to roctxProfilerResume would be ignored because the
@@ -291,19 +269,17 @@ tool_control_init(rocprofiler_context_id_t& primary_ctx)
ROCPROFILER_CALL(rocprofiler_create_context(&cntrl_ctx), "control context creation failed");
// enable callback marker tracing with only the pause/resume operations
ROCPROFILER_CALL(
rocprofiler_configure_callback_tracing_service(cntrl_ctx,
ROCPROFILER_CALLBACK_TRACING_MARKER_API,
roctx_ops.cntrl.data(),
roctx_ops.cntrl.size(),
tool_tracing_ctrl_callback,
&primary_ctx),
"callback tracing service failed to configure");
ROCPROFILER_CALL(rocprofiler_configure_callback_tracing_service(
cntrl_ctx,
ROCPROFILER_CALLBACK_TRACING_MARKER_CONTROL_API,
nullptr,
0,
tool_tracing_ctrl_callback,
&primary_ctx),
"callback tracing service failed to configure");
// start the context so that it is always active
ROCPROFILER_CALL(rocprofiler_start_context(cntrl_ctx), "start of control context");
return roctx_ops.core;
}
int
@@ -344,7 +320,7 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
ROCPROFILER_CALL(rocprofiler_create_context(&client_ctx), "context creation failed");
// enable the control
auto roctx_ops = tool_control_init(client_ctx);
tool_control_init(client_ctx);
ROCPROFILER_CALL(
rocprofiler_configure_callback_tracing_service(client_ctx,
@@ -366,9 +342,18 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
ROCPROFILER_CALL(
rocprofiler_configure_callback_tracing_service(client_ctx,
ROCPROFILER_CALLBACK_TRACING_MARKER_API,
roctx_ops.data(),
roctx_ops.size(),
ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API,
nullptr,
0,
tool_tracing_callback,
tool_data),
"callback tracing service failed to configure");
ROCPROFILER_CALL(
rocprofiler_configure_callback_tracing_service(client_ctx,
ROCPROFILER_CALLBACK_TRACING_MARKER_NAME_API,
nullptr,
0,
tool_tracing_callback,
tool_data),
"callback tracing service failed to configure");
+7 -6
查看文件
@@ -173,14 +173,14 @@ generate_wrapper(const char* name, RetT (*func)(Args...))
TABLE->FUNC##_fn = generate_wrapper<__COUNTER__>(#FUNC, TABLE->FUNC##_fn)
void
api_registration_callback(rocprofiler_runtime_library_t type,
api_registration_callback(rocprofiler_intercept_table_t type,
uint64_t lib_version,
uint64_t lib_instance,
void** tables,
uint64_t num_tables,
void* user_data)
{
if(type != ROCPROFILER_HSA_LIBRARY)
if(type != ROCPROFILER_HSA_TABLE)
throw std::runtime_error{"unexpected library type: " +
std::to_string(static_cast<int>(type))};
if(lib_instance != 0) throw std::runtime_error{"multiple instances of HSA runtime library"};
@@ -282,10 +282,11 @@ rocprofiler_configure(uint32_t version,
client_tool_data->emplace_back(
client::source_location{__FUNCTION__, __FILE__, __LINE__, info.str()});
ROCPROFILER_CALL(rocprofiler_at_runtime_api_registration(client::api_registration_callback,
ROCPROFILER_HSA_LIBRARY,
static_cast<void*>(client_tool_data)),
"runtime api registration");
ROCPROFILER_CALL(
rocprofiler_at_intercept_table_registration(client::api_registration_callback,
ROCPROFILER_HSA_TABLE,
static_cast<void*>(client_tool_data)),
"runtime api registration");
// create configure data
static auto cfg =
+1 -1
查看文件
@@ -19,7 +19,7 @@ usage() {
if [ -z "${EC}" ]; then EC=1; fi
echo -e "${RESET}ROCProfilerV3 Run Script Usage:"
echo -e "${GREEN}-h | --help ${RESET} For showing this message"
echo -e "${GREEN}-i | --input ${RESET} For adding counters file path (every line in the text file represents a counter)"
# echo -e "${GREEN}-i | --input ${RESET} For adding counters file path (every line in the text file represents a counter)"
echo -e "${GREEN}--hsa-trace ${RESET} For Collecting HSA API Traces"
echo -e "${GREEN}--kernel-trace ${RESET} For Collecting Kernel Dispatch Traces"
echo -e "${GREEN}--memory-copy-trace ${RESET} For Collecting Memory Copy Traces"
+31 -12
查看文件
@@ -35,6 +35,15 @@ ROCTX_EXTERN_C_INIT
#define ROCTX_API_TABLE_VERSION_MAJOR 0
#define ROCTX_API_TABLE_VERSION_STEP 0
#define ROCTX_CORE_API_TABLE_VERSION_MAJOR 0
#define ROCTX_CORE_API_TABLE_VERSION_STEP 0
#define ROCTX_CONTROL_API_TABLE_VERSION_MAJOR 0
#define ROCTX_CONTROL_API_TABLE_VERSION_STEP 0
#define ROCTX_RESOURCE_API_TABLE_VERSION_MAJOR 0
#define ROCTX_RESOURCE_API_TABLE_VERSION_STEP 0
typedef uint64_t roctx_range_id_t;
typedef void (*roctxMarkA_fn_t)(const char* message);
typedef int (*roctxRangePushA_fn_t)(const char* message);
@@ -49,21 +58,31 @@ typedef int (*roctxNameHipDevice_fn_t)(const char* name, int device_id);
typedef int (*roctxNameHipStream_fn_t)(const char* name, const struct ihipStream_t* stream);
typedef int (*roctxGetThreadId_fn_t)(roctx_thread_id_t* tid);
typedef struct roctxApiTable_t
typedef struct roctxCoreApiTable_t
{
uint64_t size;
roctxMarkA_fn_t roctxMarkA_fn;
roctxRangePushA_fn_t roctxRangePushA_fn;
roctxRangePop_fn_t roctxRangePop_fn;
roctxRangeStartA_fn_t roctxRangeStartA_fn;
roctxRangeStop_fn_t roctxRangeStop_fn;
roctxGetThreadId_fn_t roctxGetThreadId_fn;
} roctxCoreApiTable_t;
typedef struct roctxControlApiTable_t
{
uint64_t size;
roctxMarkA_fn_t roctxMarkA_fn;
roctxRangePushA_fn_t roctxRangePushA_fn;
roctxRangePop_fn_t roctxRangePop_fn;
roctxRangeStartA_fn_t roctxRangeStartA_fn;
roctxRangeStop_fn_t roctxRangeStop_fn;
roctxProfilerPause_fn_t roctxProfilerPause_fn;
roctxProfilerResume_fn_t roctxProfilerResume_fn;
roctxNameOsThread_fn_t roctxNameOsThread_fn;
roctxNameHsaAgent_fn_t roctxNameHsaAgent_fn;
roctxNameHipDevice_fn_t roctxNameHipDevice_fn;
roctxNameHipStream_fn_t roctxNameHipStream_fn;
roctxGetThreadId_fn_t roctxGetThreadId_fn;
} roctxApiTable_t;
} roctxControlApiTable_t;
typedef struct roctxNameApiTable_t
{
uint64_t size;
roctxNameOsThread_fn_t roctxNameOsThread_fn;
roctxNameHsaAgent_fn_t roctxNameHsaAgent_fn;
roctxNameHipDevice_fn_t roctxNameHipDevice_fn;
roctxNameHipStream_fn_t roctxNameHipStream_fn;
} roctxNameApiTable_t;
ROCTX_EXTERN_C_FINI
+18 -15
查看文件
@@ -54,13 +54,14 @@ typedef struct
*/
typedef struct
{
uint64_t size; ///< size of this struct
rocprofiler_buffer_tracing_kind_t kind; ///< ::ROCPROFILER_CALLBACK_TRACING_HIP_API
rocprofiler_correlation_id_t correlation_id; ///< correlation ids for record
rocprofiler_tracing_operation_t operation; ///< ::rocprofiler_hip_api_id_t
rocprofiler_timestamp_t start_timestamp; ///< start time in nanoseconds
rocprofiler_timestamp_t end_timestamp; ///< end time in nanoseconds
rocprofiler_thread_id_t thread_id; ///< id for thread generating this record
uint64_t size; ///< size of this struct
rocprofiler_buffer_tracing_kind_t kind; ///< ::ROCPROFILER_CALLBACK_TRACING_HIP_API
rocprofiler_correlation_id_t correlation_id; ///< correlation ids for record
rocprofiler_tracing_operation_t
operation; ///< ::rocprofiler_hip_api_id_t or ::rocprofiler_hip_compiler_api_id_t
rocprofiler_timestamp_t start_timestamp; ///< start time in nanoseconds
rocprofiler_timestamp_t end_timestamp; ///< end time in nanoseconds
rocprofiler_thread_id_t thread_id; ///< id for thread generating this record
} rocprofiler_buffer_tracing_hip_api_record_t;
/**
@@ -69,14 +70,16 @@ typedef struct
typedef struct
{
uint64_t size; ///< size of this struct
rocprofiler_buffer_tracing_kind_t kind; ///< ::ROCPROFILER_CALLBACK_TRACING_MARKER_API
rocprofiler_correlation_id_t correlation_id; ///< correlation ids for record
rocprofiler_tracing_operation_t operation; ///< ::rocprofiler_marker_api_id_t
rocprofiler_timestamp_t start_timestamp; ///< start time in nanoseconds
rocprofiler_timestamp_t end_timestamp; ///< end time in nanoseconds
rocprofiler_thread_id_t thread_id; ///< id for thread generating this record
uint64_t marker_id; ///< rocprofiler_marker_id_t
// const char* message; // (Need Review?)
rocprofiler_buffer_tracing_kind_t kind; ///< ::ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API,
///< ::ROCPROFILER_CALLBACK_TRACING_MARKER_CONTROL_API,
///< or ::ROCPROFILER_CALLBACK_TRACING_MARKER_NAME_API
rocprofiler_correlation_id_t correlation_id; ///< correlation ids for record
rocprofiler_tracing_operation_t
operation; ///< ::rocprofiler_marker_core_api_id_t, ::rocprofiler_marker_control_api_id_t,
///< or ::rocprofiler_marker_name_api_id_t
rocprofiler_timestamp_t start_timestamp; ///< start time in nanoseconds
rocprofiler_timestamp_t end_timestamp; ///< end time in nanoseconds
rocprofiler_thread_id_t thread_id; ///< id for thread generating this record
} rocprofiler_buffer_tracing_marker_api_record_t;
/**
+32 -16
查看文件
@@ -128,12 +128,14 @@ typedef enum // NOLINT(performance-enum-size)
typedef enum // NOLINT(performance-enum-size)
{
ROCPROFILER_CALLBACK_TRACING_NONE = 0,
ROCPROFILER_CALLBACK_TRACING_HSA_API, ///< Callbacks for HSA functions
ROCPROFILER_CALLBACK_TRACING_HIP_API, ///< Callbacks for HIP functions
ROCPROFILER_CALLBACK_TRACING_HIP_COMPILER_API, ///< Callbacks for HIP compiler functions
ROCPROFILER_CALLBACK_TRACING_MARKER_API, ///< Callbacks for ROCTx functions
ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT, ///< Callbacks for code object info
ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH, ///< Callbacks for kernel dispatches
ROCPROFILER_CALLBACK_TRACING_HSA_API, ///< Callbacks for HSA functions
ROCPROFILER_CALLBACK_TRACING_HIP_API, ///< Callbacks for HIP functions
ROCPROFILER_CALLBACK_TRACING_HIP_COMPILER_API, ///< Callbacks for HIP compiler functions
ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API, ///< Callbacks for ROCTx functions
ROCPROFILER_CALLBACK_TRACING_MARKER_CONTROL_API, ///< Callbacks for ROCTx functions
ROCPROFILER_CALLBACK_TRACING_MARKER_NAME_API, ///< Callbacks for ROCTx functions
ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT, ///< Callbacks for code object info
ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH, ///< Callbacks for kernel dispatches
ROCPROFILER_CALLBACK_TRACING_LAST,
} rocprofiler_callback_tracing_kind_t;
@@ -146,7 +148,9 @@ typedef enum // NOLINT(performance-enum-size)
ROCPROFILER_BUFFER_TRACING_HSA_API, ///< Buffer HSA function calls
ROCPROFILER_BUFFER_TRACING_HIP_API, ///< Buffer HIP function calls
ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API, ///< Buffer HIP compiler function calls
ROCPROFILER_BUFFER_TRACING_MARKER_API, ///< Buffer ROCTx function calls
ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API, ///< Buffer ROCTx core function calls
ROCPROFILER_BUFFER_TRACING_MARKER_CONTROL_API, ///< Buffer ROCTx name function calls
ROCPROFILER_BUFFER_TRACING_MARKER_NAME_API, ///< Buffer ROCTx name function calls
ROCPROFILER_BUFFER_TRACING_MEMORY_COPY, ///< Buffer memory copy info
ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH, ///< Buffer kernel dispatch info
ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION, ///< Buffer page migration info
@@ -218,20 +222,32 @@ typedef enum // NOLINT(performance-enum-size)
/**
* @brief Enumeration for specifying runtime libraries supported by rocprofiler. This enumeration is
* used for intercept tables and thread creation callbacks. @see INTERCEPT_TABLE and @see
* INTERNAL_THREADING.
* used for thread creation callbacks. @see INTERNAL_THREADING.
*/
typedef enum
{
ROCPROFILER_LIBRARY = (1 << 0),
ROCPROFILER_HSA_LIBRARY = (1 << 1),
ROCPROFILER_HIP_LIBRARY = (1 << 2),
ROCPROFILER_HIP_RUNTIME_LIBRARY = ROCPROFILER_HIP_LIBRARY,
ROCPROFILER_MARKER_LIBRARY = (1 << 3),
ROCPROFILER_HIP_COMPILER_LIBRARY = (1 << 4),
ROCPROFILER_LIBRARY_LAST = ROCPROFILER_HIP_COMPILER_LIBRARY,
ROCPROFILER_LIBRARY = (1 << 0),
ROCPROFILER_HSA_LIBRARY = (1 << 1),
ROCPROFILER_HIP_LIBRARY = (1 << 2),
ROCPROFILER_MARKER_LIBRARY = (1 << 3),
ROCPROFILER_LIBRARY_LAST = ROCPROFILER_MARKER_LIBRARY,
} rocprofiler_runtime_library_t;
/**
* @brief Enumeration for specifying intercept tables supported by rocprofiler. This enumeration is
* used for intercept tables. @see INTERCEPT_TABLE.
*/
typedef enum
{
ROCPROFILER_HSA_TABLE = (1 << 0),
ROCPROFILER_HIP_RUNTIME_TABLE = (1 << 1),
ROCPROFILER_HIP_COMPILER_TABLE = (1 << 2),
ROCPROFILER_MARKER_CORE_TABLE = (1 << 3),
ROCPROFILER_MARKER_CONTROL_TABLE = (1 << 4),
ROCPROFILER_MARKER_NAME_TABLE = (1 << 5),
ROCPROFILER_TABLE_LAST = ROCPROFILER_MARKER_NAME_TABLE,
} rocprofiler_intercept_table_t;
//--------------------------------------------------------------------------------------//
//
// ALIASES
+37 -36
查看文件
@@ -50,7 +50,7 @@ ROCPROFILER_EXTERN_C_INIT
/**
* @brief Callback type when a new runtime library is loaded. @see
* rocprofiler_at_runtime_api_registration
* rocprofiler_at_intercept_table_registration
* @param [in] type Type of API table
* @param [in] lib_version Major, minor, and patch version of library encoded into single number
* similar to @ref ROCPROFILER_VERSION
@@ -58,9 +58,9 @@ ROCPROFILER_EXTERN_C_INIT
* @param [in] tables An array of pointers to the API tables
* @param [in] num_tables The size of the array of pointers to the API tables
* @param [in] user_data The pointer to the data provided to @ref
* rocprofiler_at_runtime_api_registration
* rocprofiler_at_intercept_table_registration
*/
typedef void (*rocprofiler_intercept_library_cb_t)(rocprofiler_runtime_library_t type,
typedef void (*rocprofiler_intercept_library_cb_t)(rocprofiler_intercept_table_t type,
uint64_t lib_version,
uint64_t lib_instance,
void** tables,
@@ -69,9 +69,9 @@ typedef void (*rocprofiler_intercept_library_cb_t)(rocprofiler_runtime_library_t
/**
* @brief Invoke this function to receive callbacks when a ROCm library registers its API
* intercept table with rocprofiler. Use the @ref rocprofiler_runtime_library_t enumeration for
* intercept table with rocprofiler. Use the @ref rocprofiler_intercept_table_t enumeration for
* specifying which raw API tables the tool would like to have access to. E.g. including @ref
* ROCPROFILER_HSA_LIBRARY in the @ref rocprofiler_at_runtime_api_registration function call
* ROCPROFILER_HSA_TABLE in the @ref rocprofiler_at_intercept_table_registration function call
* communicates to rocprofiler that, when rocprofiler receives a `HsaApiTable` instance, the tool
* would like rocprofiler to provide it access too.
*
@@ -81,41 +81,42 @@ typedef void (*rocprofiler_intercept_library_cb_t)(rocprofiler_runtime_library_t
* `rocprofiler_configure` symbol is visible in the application's symbol table. The vast majority of
* tools will want to use the @ref CALLBACK_TRACING_SERVICE to trace these runtime APIs, however,
* some tools may want or require installing their own intercept functions in lieu of receiving
* these callbacks and those tools should use the @ref rocprofiler_at_runtime_api_registration to
* install their intercept functions. There are no restrictions to where or how early this function
* can be invoked but it will return ::ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED if it is
* invoked after rocprofiler has requested all the tool configurations. Thus, it is highly
* these callbacks and those tools should use the @ref rocprofiler_at_intercept_table_registration
* to install their intercept functions. There are no restrictions to where or how early this
* function can be invoked but it will return ::ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED if it
* is invoked after rocprofiler has requested all the tool configurations. Thus, it is highly
* recommended to invoke this function within the @ref rocprofiler_configure function or the
* callback passed to the @ref rocprofiler_force_configure function -- the reason for this
* recommendation is that if @ref rocprofiler_at_runtime_api_registration is invoked in one of these
* locations, rocprofiler can guarantee that the tool will be passed the API table because, at the
* first instance of a runtime registering it's API table, rocprofiler will ensure that, in the case
* of the former, rocprofiler will invoke all of the @ref rocprofiler_configure symbols that are
* visible before checking the list of tools which want to receive the API tables and, in the case
* of the latter, @ref rocprofiler_force_configure will fail with error code @ref
* recommendation is that if @ref rocprofiler_at_intercept_table_registration is invoked in one of
* these locations, rocprofiler can guarantee that the tool will be passed the API table because, at
* the first instance of a runtime registering it's API table, rocprofiler will ensure that, in the
* case of the former, rocprofiler will invoke all of the @ref rocprofiler_configure symbols that
* are visible before checking the list of tools which want to receive the API tables and, in the
* case of the latter, @ref rocprofiler_force_configure will fail with error code @ref
* ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED if a runtime has already been registered (and,
* therefore, already scanned and invoked the visible @ref rocprofiler_configure symbols and
* completed the tool initialization). If @ref rocprofiler_at_runtime_api_registration is invoked
* outside of these recommended places, even if it is done before the `main` function starts (e.g.
* in a library init/constructor function), it is possible that another library, such as ROCm-aware
* MPI, caused the HIP and HSA runtime libraries to be initialized when that library was loaded. In
* this aforementioned scenario, if the ROCm-aware MPI library library init/constructor function
* runs before your library init/constructor function, rocprofiler will have already processed the
* API table and will not provide the API table to the tool due to the fact that the API may already
* be in use and, thus, any modifications to the table might result in thread-safety violations or
* more disastrous consequences.
* completed the tool initialization). If @ref rocprofiler_at_intercept_table_registration is
* invoked outside of these recommended places, even if it is done before the `main` function starts
* (e.g. in a library init/constructor function), it is possible that another library, such as
* ROCm-aware MPI, caused the HIP and HSA runtime libraries to be initialized when that library was
* loaded. In this aforementioned scenario, if the ROCm-aware MPI library library init/constructor
* function runs before your library init/constructor function, rocprofiler will have already
* processed the API table and will not provide the API table to the tool due to the fact that the
* API may already be in use and, thus, any modifications to the table might result in thread-safety
* violations or more disastrous consequences.
*
* @param [in] callback Callback to tool invoked when a runtime registers their API table with
* rocprofiler
* @param [in] libs Bitwise-or of libraries, e.g. `ROCPROFILER_HSA_LIBRARY | ROCPROFILER_HIP_LIBRARY
* | ROCPROFILER_MARKER_LIBRARY` means the callbacks will be invoked whenever the HSA, HIP, and
* ROCTx libraries register the intercept table.
* @param [in] libs Bitwise-or of libraries, e.g. `ROCPROFILER_HSA_TABLE |
* ROCPROFILER_HIP_RUNTIME_TABLE | ROCPROFILER_MARKER_CORE_TABLE` means the callbacks will be
* invoked whenever the HSA, HIP runtime, and ROCTx core API tables register their intercept
* table(s).
* @param [in] data Data to provide to callback(s)
* @return ::rocprofiler_status_t
* @retval ::ROCPROFILER_STATUS_SUCCESS Callback was registered for specified runtime(s)
* @retval ::ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED rocprofiler has already initialized
* @retval ::ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT this error code is returned if
* `ROCPROFILER_LIBRARY` is included in bitwise-or of the libs
* `ROCPROFILER_TABLE` is included in bitwise-or of the libs
* @retval ::ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED this error code is returned if one of the
* specified libraries does not have support for API intercept tables (which should not be the case
* by the time this code is publicly released)
@@ -152,14 +153,14 @@ typedef void (*rocprofiler_intercept_library_cb_t)(rocprofiler_runtime_library_t
* // this is the function that gets called when the HSA runtime
* // intercept table is registered with rocprofiler
* void
* api_registration_callback(rocprofiler_runtime_library_t type,
* api_registration_callback(rocprofiler_intercept_table_t type,
* uint64_t lib_version,
* uint64_t lib_instance,
* void** tables,
* uint64_t num_tables,
* void* user_data)
* {
* if(type != ROCPROFILER_HSA_LIBRARY)
* if(type != ROCPROFILER_HSA_TABLE)
* throw std::runtime_error{"unexpected library type: " +
* std::to_string(static_cast<int>(type))};
* if(lib_instance != 0) throw std::runtime_error{"multiple instances of HSA runtime library"};
@@ -203,20 +204,20 @@ typedef void (*rocprofiler_intercept_library_cb_t)(rocprofiler_runtime_library_t
* id->name = "ExampleTool";
*
* // specify that we only want to intercept the HSA library
* rocprofiler_at_runtime_api_registration(api_registration_callback,
* ROCPROFILER_HSA_LIBRARY, nullptr);
* rocprofiler_at_intercept_table_registration(api_registration_callback,
* ROCPROFILER_HSA_TABLE, nullptr);
*
* return nullptr;
* }
* @endcode
*
* @example intercept_table/client.cpp
* Example demonstrating @ref rocprofiler_at_runtime_api_registration usage
* Example demonstrating @ref rocprofiler_at_intercept_table_registration usage
*/
rocprofiler_status_t
rocprofiler_at_runtime_api_registration(rocprofiler_intercept_library_cb_t callback,
int libs,
void* data) ROCPROFILER_API;
rocprofiler_at_intercept_table_registration(rocprofiler_intercept_library_cb_t callback,
int libs,
void* data) ROCPROFILER_API;
/** @} */
+7 -7
查看文件
@@ -29,9 +29,9 @@
typedef union rocprofiler_marker_api_retval_u
{
int32_t int32_t_retval;
int64_t int64_t_retval;
uint64_t uint64_t_retval;
int32_t int32_t_retval;
int64_t int64_t_retval;
roctx_range_id_t roctx_range_id_t_retval;
} rocprofiler_marker_api_retval_t;
typedef union rocprofiler_marker_api_args_u
@@ -56,6 +56,10 @@ typedef union rocprofiler_marker_api_args_u
roctx_range_id_t id;
} roctxRangeStop;
struct
{
roctx_thread_id_t* tid;
} roctxGetThreadId;
struct
{
roctx_thread_id_t tid;
} roctxProfilerPause;
@@ -82,8 +86,4 @@ typedef union rocprofiler_marker_api_args_u
const char* name;
const struct ihipStream_t* stream;
} roctxNameHipStream;
struct
{
roctx_thread_id_t* tid;
} roctxGetThreadId;
} rocprofiler_marker_api_args_t;
+27 -15
查看文件
@@ -27,18 +27,30 @@
*/
typedef enum // NOLINT(performance-enum-size)
{
ROCPROFILER_MARKER_API_ID_NONE = -1,
ROCPROFILER_MARKER_API_ID_roctxMarkA = 0,
ROCPROFILER_MARKER_API_ID_roctxRangePushA,
ROCPROFILER_MARKER_API_ID_roctxRangePop,
ROCPROFILER_MARKER_API_ID_roctxRangeStartA,
ROCPROFILER_MARKER_API_ID_roctxRangeStop,
ROCPROFILER_MARKER_API_ID_roctxProfilerPause,
ROCPROFILER_MARKER_API_ID_roctxProfilerResume,
ROCPROFILER_MARKER_API_ID_roctxNameOsThread,
ROCPROFILER_MARKER_API_ID_roctxNameHsaAgent,
ROCPROFILER_MARKER_API_ID_roctxNameHipDevice,
ROCPROFILER_MARKER_API_ID_roctxNameHipStream,
ROCPROFILER_MARKER_API_ID_roctxGetThreadId,
ROCPROFILER_MARKER_API_ID_LAST,
} rocprofiler_marker_api_id_t;
ROCPROFILER_MARKER_CORE_API_ID_NONE = -1,
ROCPROFILER_MARKER_CORE_API_ID_roctxMarkA = 0,
ROCPROFILER_MARKER_CORE_API_ID_roctxRangePushA,
ROCPROFILER_MARKER_CORE_API_ID_roctxRangePop,
ROCPROFILER_MARKER_CORE_API_ID_roctxRangeStartA,
ROCPROFILER_MARKER_CORE_API_ID_roctxRangeStop,
ROCPROFILER_MARKER_CORE_API_ID_roctxGetThreadId,
ROCPROFILER_MARKER_CORE_API_ID_LAST,
} rocprofiler_marker_core_api_id_t;
typedef enum // NOLINT(performance-enum-size)
{
ROCPROFILER_MARKER_CONTROL_API_ID_NONE = -1,
ROCPROFILER_MARKER_CONTROL_API_ID_roctxProfilerPause = 0,
ROCPROFILER_MARKER_CONTROL_API_ID_roctxProfilerResume,
ROCPROFILER_MARKER_CONTROL_API_ID_LAST,
} rocprofiler_marker_control_api_id_t;
typedef enum // NOLINT(performance-enum-size)
{
ROCPROFILER_MARKER_NAME_API_ID_NONE = -1,
ROCPROFILER_MARKER_NAME_API_ID_roctxNameOsThread = 0,
ROCPROFILER_MARKER_NAME_API_ID_roctxNameHsaAgent,
ROCPROFILER_MARKER_NAME_API_ID_roctxNameHipDevice,
ROCPROFILER_MARKER_NAME_API_ID_roctxNameHipStream,
ROCPROFILER_MARKER_NAME_API_ID_LAST,
} rocprofiler_marker_name_api_id_t;
+5 -2
查看文件
@@ -25,6 +25,9 @@
// NOLINTNEXTLINE(performance-enum-size)
typedef enum
{
ROCPROFILER_MARKER_API_TABLE_ID_NONE = -1,
ROCPROFILER_MARKER_API_TABLE_ID_RoctxApi = 0,
ROCPROFILER_MARKER_API_TABLE_ID_NONE = -1,
ROCPROFILER_MARKER_API_TABLE_ID_RoctxCore = 0,
ROCPROFILER_MARKER_API_TABLE_ID_RoctxControl,
ROCPROFILER_MARKER_API_TABLE_ID_RoctxName,
ROCPROFILER_MARKER_API_TABLE_ID_LAST,
} rocprofiler_marker_table_api_id_t;
+31 -14
查看文件
@@ -134,11 +134,12 @@ private:
void write_unlock();
private:
std::atomic<int64_t> m_locked = {0};
std::atomic<size_t> m_index = {};
std::shared_mutex m_shared = {};
base_buffer_t m_buffer = {};
record_vec_t m_headers = {};
std::atomic<int64_t> m_requested = {0};
std::atomic<int64_t> m_locked = {0};
std::atomic<size_t> m_index = {};
std::shared_mutex m_shared = {};
base_buffer_t m_buffer = {};
record_vec_t m_headers = {};
};
inline bool
@@ -218,7 +219,7 @@ record_header_buffer::free() const
inline auto
record_header_buffer::is_empty() const
{
return m_buffer.is_empty() || m_headers.empty();
return (m_buffer.is_empty() && m_requested.load() == 0) || m_headers.empty();
}
inline auto
@@ -235,6 +236,9 @@ record_header_buffer::emplace(uint64_t _hash, Tp& _v)
constexpr auto request_size = sizeof(Tp);
// notify there was a request
m_requested.fetch_add(1);
// in theory, we shouldn't need to lock here but the thread sanitizer says there is a race.
// the lock will be short-lived so hopefully, it will scale fine
write_lock();
@@ -244,20 +248,25 @@ record_header_buffer::emplace(uint64_t _hash, Tp& _v)
read_lock();
if(_addr)
{
// placement new
new(_addr) Tp{_v};
// if there is space in the buffer, atomically get an index
// for where the header record should be placed.
// NOTE: m_headers was resized to be large enough to accomodate
// sizeof(Tp) == 1 for every entry in buffer
auto idx = m_index.fetch_add(1, std::memory_order_release);
auto idx = m_index.fetch_add(1, std::memory_order_release);
// placement new
new(_addr) Tp{_v};
rocprofiler_record_header_t record = {};
record.hash = _hash;
record.payload = _addr;
m_headers.at(idx) = record;
}
read_unlock();
// remove notification of request
m_requested.fetch_sub(1);
return (_addr != nullptr);
}
@@ -269,6 +278,9 @@ record_header_buffer::emplace(uint32_t _category, uint32_t _kind, Tp& _v)
constexpr auto request_size = sizeof(Tp);
// notify there was a request
m_requested.fetch_add(1);
// in theory, we shouldn't need to lock here but the thread sanitizer says there is a race.
// the lock will be short-lived so hopefully, it will scale fine
write_lock();
@@ -278,20 +290,25 @@ record_header_buffer::emplace(uint32_t _category, uint32_t _kind, Tp& _v)
read_lock();
if(_addr)
{
// placement new
new(_addr) Tp{_v};
// if there is space in the buffer, atomically get an index
// for where the header record should be placed.
// NOTE: m_headers was resized to be large enough to accomodate
// sizeof(Tp) == 1 for every entry in buffer
auto idx = m_index.fetch_add(1, std::memory_order_release);
auto idx = m_index.fetch_add(1, std::memory_order_release);
// placement new
new(_addr) Tp{_v};
m_headers.at(idx) = rocprofiler_record_header_t{};
m_headers.at(idx).category = _category;
m_headers.at(idx).kind = _kind;
m_headers.at(idx).payload = _addr;
}
read_unlock();
// remove notification of request
m_requested.fetch_sub(1);
return (_addr != nullptr);
}
+101 -74
查看文件
@@ -26,6 +26,7 @@
#include <rocprofiler-sdk-roctx/types.h>
#include "lib/common/logging.hpp"
#include "lib/common/static_object.hpp"
#include "lib/common/utility.hpp"
#include <glog/logging.h>
@@ -45,6 +46,8 @@ namespace roctx
{
namespace
{
namespace common = ::rocprofiler::common;
constexpr size_t
compute_table_offset(size_t n)
{
@@ -57,27 +60,38 @@ compute_table_size(size_t nmembers)
return (sizeof(uint64_t) + (nmembers * sizeof(void*)));
}
#define ROCTX_ASSERT_OFFSET(MEMBER, IDX) \
static_assert(offsetof(roctxApiTable_t, MEMBER) == compute_table_offset(IDX), \
#define ROCTX_ASSERT_OFFSET(TABLE, MEMBER, IDX) \
static_assert(offsetof(TABLE, MEMBER) == compute_table_offset(IDX), \
"Do not re-arrange the table members")
ROCTX_ASSERT_OFFSET(roctxMarkA_fn, 0);
ROCTX_ASSERT_OFFSET(roctxRangePushA_fn, 1);
ROCTX_ASSERT_OFFSET(roctxRangePop_fn, 2);
ROCTX_ASSERT_OFFSET(roctxRangeStartA_fn, 3);
ROCTX_ASSERT_OFFSET(roctxRangeStop_fn, 4);
ROCTX_ASSERT_OFFSET(roctxProfilerPause_fn, 5);
ROCTX_ASSERT_OFFSET(roctxProfilerResume_fn, 6);
ROCTX_ASSERT_OFFSET(roctxNameOsThread_fn, 7);
ROCTX_ASSERT_OFFSET(roctxNameHsaAgent_fn, 8);
ROCTX_ASSERT_OFFSET(roctxNameHipDevice_fn, 9);
ROCTX_ASSERT_OFFSET(roctxNameHipStream_fn, 10);
ROCTX_ASSERT_OFFSET(roctxGetThreadId_fn, 11);
// core
ROCTX_ASSERT_OFFSET(roctxCoreApiTable_t, roctxMarkA_fn, 0);
ROCTX_ASSERT_OFFSET(roctxCoreApiTable_t, roctxRangePushA_fn, 1);
ROCTX_ASSERT_OFFSET(roctxCoreApiTable_t, roctxRangePop_fn, 2);
ROCTX_ASSERT_OFFSET(roctxCoreApiTable_t, roctxRangeStartA_fn, 3);
ROCTX_ASSERT_OFFSET(roctxCoreApiTable_t, roctxRangeStop_fn, 4);
ROCTX_ASSERT_OFFSET(roctxCoreApiTable_t, roctxGetThreadId_fn, 5);
// control
ROCTX_ASSERT_OFFSET(roctxControlApiTable_t, roctxProfilerPause_fn, 0);
ROCTX_ASSERT_OFFSET(roctxControlApiTable_t, roctxProfilerResume_fn, 1);
// name
ROCTX_ASSERT_OFFSET(roctxNameApiTable_t, roctxNameOsThread_fn, 0);
ROCTX_ASSERT_OFFSET(roctxNameApiTable_t, roctxNameHsaAgent_fn, 1);
ROCTX_ASSERT_OFFSET(roctxNameApiTable_t, roctxNameHipDevice_fn, 2);
ROCTX_ASSERT_OFFSET(roctxNameApiTable_t, roctxNameHipStream_fn, 3);
#undef ROCTX_ASSERT_OFFSET
static_assert(
sizeof(roctxApiTable_t) == compute_table_size(12),
sizeof(roctxCoreApiTable_t) == compute_table_size(6),
"Update table major/step version and add a new offset assertion if this fails to compile");
static_assert(
sizeof(roctxControlApiTable_t) == compute_table_size(2),
"Update table major/step version and add a new offset assertion if this fails to compile");
static_assert(
sizeof(roctxNameApiTable_t) == compute_table_size(4),
"Update table major/step version and add a new offset assertion if this fails to compile");
auto&
@@ -155,26 +169,39 @@ roctxNameHipStream(const char*, const struct ihipStream_t*)
return 0;
}
auto&
struct roctx_api_table
{
roctxCoreApiTable_t core = common::init_public_api_struct(roctxCoreApiTable_t{});
roctxControlApiTable_t control = common::init_public_api_struct(roctxControlApiTable_t{});
roctxNameApiTable_t name = common::init_public_api_struct(roctxNameApiTable_t{});
};
auto*&
get_table_impl()
{
rocprofiler::common::init_logging("ROCTX_LOG_LEVEL");
static auto val = roctxApiTable_t{sizeof(roctxApiTable_t),
&::roctx::roctxMarkA,
&::roctx::roctxRangePushA,
&::roctx::roctxRangePop,
&::roctx::roctxRangeStartA,
&::roctx::roctxRangeStop,
&::roctx::roctxProfilerPause,
&::roctx::roctxProfilerResume,
&::roctx::roctxNameOsThread,
&::roctx::roctxNameHsaAgent,
&::roctx::roctxNameHipDevice,
&::roctx::roctxNameHipStream,
&::roctx::roctxGetThreadId};
auto*& tbl = rocprofiler::common::static_object<roctx_api_table>::construct();
auto table_array = std::array<void*, 1>{&val};
tbl->core = roctxCoreApiTable_t{sizeof(roctxCoreApiTable_t),
&::roctx::roctxMarkA,
&::roctx::roctxRangePushA,
&::roctx::roctxRangePop,
&::roctx::roctxRangeStartA,
&::roctx::roctxRangeStop,
&::roctx::roctxGetThreadId};
tbl->control = roctxControlApiTable_t{sizeof(roctxControlApiTable_t),
&::roctx::roctxProfilerPause,
&::roctx::roctxProfilerResume};
tbl->name = roctxNameApiTable_t{sizeof(roctxNameApiTable_t),
&::roctx::roctxNameOsThread,
&::roctx::roctxNameHsaAgent,
&::roctx::roctxNameHipDevice,
&::roctx::roctxNameHipStream};
auto table_array = std::array<void*, 3>{&tbl->core, &tbl->control, &tbl->name};
auto lib_id = rocprofiler_register_library_indentifier_t{};
auto rocp_reg_status =
rocprofiler_register_library_api_table("roctx",
@@ -192,13 +219,13 @@ get_table_impl()
<< "] rocprofiler-register failed with error code " << rocp_reg_status << ": "
<< rocprofiler_register_error_string(rocp_reg_status);
return val;
return tbl;
}
const auto*
get_table()
{
static const auto* tbl = &get_table_impl();
static auto*& tbl = get_table_impl();
return tbl;
}
} // namespace
@@ -209,73 +236,73 @@ ROCTX_EXTERN_C_INIT
void
roctxMarkA(const char* message)
{
::roctx::get_table()->roctxMarkA_fn(message);
::roctx::get_table()->core.roctxMarkA_fn(message);
}
int
roctxRangePushA(const char* message)
{
return ::roctx::get_table()->roctxRangePushA_fn(message);
return ::roctx::get_table()->core.roctxRangePushA_fn(message);
}
int
roctxRangePop()
{
return ::roctx::get_table()->roctxRangePop_fn();
return ::roctx::get_table()->core.roctxRangePop_fn();
}
roctx_range_id_t
roctxRangeStartA(const char* message)
{
return ::roctx::get_table()->roctxRangeStartA_fn(message);
return ::roctx::get_table()->core.roctxRangeStartA_fn(message);
}
void
roctxRangeStop(roctx_range_id_t id)
{
return ::roctx::get_table()->roctxRangeStop_fn(id);
}
int
roctxProfilerPause(roctx_thread_id_t tid)
{
return ::roctx::get_table()->roctxProfilerPause_fn(tid);
}
int
roctxProfilerResume(roctx_thread_id_t tid)
{
return ::roctx::get_table()->roctxProfilerResume_fn(tid);
}
int
roctxNameOsThread(const char* name)
{
return ::roctx::get_table()->roctxNameOsThread_fn(name);
}
int
roctxNameHsaAgent(const char* name, const struct hsa_agent_s* agent)
{
return ::roctx::get_table()->roctxNameHsaAgent_fn(name, agent);
}
int
roctxNameHipDevice(const char* name, int device_id)
{
return ::roctx::get_table()->roctxNameHipDevice_fn(name, device_id);
}
int
roctxNameHipStream(const char* name, const struct ihipStream_t* stream)
{
return ::roctx::get_table()->roctxNameHipStream_fn(name, stream);
return ::roctx::get_table()->core.roctxRangeStop_fn(id);
}
int
roctxGetThreadId(roctx_thread_id_t* tid)
{
return ::roctx::get_table()->roctxGetThreadId_fn(tid);
return ::roctx::get_table()->core.roctxGetThreadId_fn(tid);
}
int
roctxProfilerPause(roctx_thread_id_t tid)
{
return ::roctx::get_table()->control.roctxProfilerPause_fn(tid);
}
int
roctxProfilerResume(roctx_thread_id_t tid)
{
return ::roctx::get_table()->control.roctxProfilerResume_fn(tid);
}
int
roctxNameOsThread(const char* name)
{
return ::roctx::get_table()->name.roctxNameOsThread_fn(name);
}
int
roctxNameHsaAgent(const char* name, const struct hsa_agent_s* agent)
{
return ::roctx::get_table()->name.roctxNameHsaAgent_fn(name, agent);
}
int
roctxNameHipDevice(const char* name, int device_id)
{
return ::roctx::get_table()->name.roctxNameHipDevice_fn(name, device_id);
}
int
roctxNameHipStream(const char* name, const struct ihipStream_t* stream)
{
return ::roctx::get_table()->name.roctxNameHipStream_fn(name, stream);
}
ROCTX_EXTERN_C_FINI
+8 -2
查看文件
@@ -233,7 +233,10 @@ get_buffer_id_names()
ROCPROFILER_BUFFER_TRACING_HIP_API,
ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API,
ROCPROFILER_BUFFER_TRACING_MEMORY_COPY,
ROCPROFILER_BUFFER_TRACING_MARKER_API};
ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API,
ROCPROFILER_BUFFER_TRACING_MARKER_CONTROL_API,
ROCPROFILER_BUFFER_TRACING_MARKER_NAME_API,
};
auto cb_name_info = rocprofiler_tool_buffer_name_info_t{};
//
@@ -291,7 +294,10 @@ get_callback_id_names()
ROCPROFILER_CALLBACK_TRACING_HSA_API,
ROCPROFILER_CALLBACK_TRACING_HIP_API,
ROCPROFILER_CALLBACK_TRACING_HIP_COMPILER_API,
ROCPROFILER_CALLBACK_TRACING_MARKER_API};
ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API,
ROCPROFILER_CALLBACK_TRACING_MARKER_CONTROL_API,
ROCPROFILER_CALLBACK_TRACING_MARKER_NAME_API,
};
auto cb_name_info = rocprofiler_tool_callback_name_info_t{};
//
+21 -4
查看文件
@@ -34,10 +34,17 @@ namespace fs = common::filesystem;
std::pair<std::ostream*, void (*)(std::ostream*&)>
get_output_stream(const std::string& fname, const std::string& ext)
{
auto output_path = fs::path{tool::format(tool::get_config().output_path)};
auto output_file_name = tool::format(tool::get_config().output_file);
auto cfg_output_path = tool::format(tool::get_config().output_path);
if(output_path.string().empty()) return {&std::clog, [](auto*&) {}};
if(cfg_output_path == "stdout" || cfg_output_path == "STDOUT")
return {&std::cout, [](auto*&) {}};
else if(cfg_output_path == "stderr" || cfg_output_path == "STDERR")
return {&std::cout, [](auto*&) {}};
else if(cfg_output_path.empty())
return {&std::clog, [](auto*&) {}};
auto output_path = fs::path{cfg_output_path};
auto output_file_name = tool::format(tool::get_config().output_file);
if(fs::exists(output_path) && !fs::is_directory(fs::status(output_path)))
throw std::runtime_error{
@@ -50,7 +57,7 @@ get_output_stream(const std::string& fname, const std::string& ext)
if(!_ofs && !*_ofs)
throw std::runtime_error{fmt::format("Failed to open {} for output", output_file)};
LOG(ERROR) << "Results File: " << output_file;
LOG(ERROR) << "Opened result file: " << output_file;
return {_ofs, [](std::ostream*& v) {
if(v) dynamic_cast<std::ofstream*>(v)->close();
@@ -58,5 +65,15 @@ get_output_stream(const std::string& fname, const std::string& ext)
v = nullptr;
}};
}
output_file::~output_file()
{
if(m_stream)
LOG(INFO) << "Closing result file: " << m_name;
else
LOG(WARNING) << "output_file::~output_file does not have a output stream instance!";
m_dtor(m_stream);
}
} // namespace tool
} // namespace rocprofiler
+2 -8
查看文件
@@ -49,7 +49,7 @@ struct output_file
template <size_t N>
output_file(std::string name, csv::csv_encoder<N>, std::array<std::string_view, N>&& header);
~output_file() { m_dtor(m_stream); }
~output_file();
output_file(const output_file&) = delete;
output_file& operator=(const output_file&) = delete;
@@ -60,13 +60,7 @@ struct output_file
std::ostream& operator<<(T&& value)
{
auto _lk = std::unique_lock<std::mutex>{m_mutex};
return ((m_stream) ? *m_stream : std::cerr) << std::forward<T>(value);
}
std::ostream& operator<<(std::ostream& (*func)(std::ostream&) )
{
auto _lk = std::unique_lock<std::mutex>{m_mutex};
return ((m_stream) ? *m_stream : std::cerr) << func;
return ((m_stream) ? *m_stream : std::cerr) << std::forward<T>(value) << std::flush;
}
operator bool() const { return m_stream != nullptr; }
+50 -81
查看文件
@@ -237,7 +237,6 @@ flush()
{
LOG(INFO) << "flushing buffer " << itr.handle;
ROCPROFILER_CALL(rocprofiler_flush_buffer(itr), "buffer flush");
ROCPROFILER_CALL(rocprofiler_flush_buffer(itr), "buffer flush");
}
}
LOG(INFO) << "Buffers flushed";
@@ -250,15 +249,15 @@ cntrl_tracing_callback(rocprofiler_callback_tracing_record_t record,
{
auto* ctx = static_cast<rocprofiler_context_id_t*>(cb_data);
if(ctx && record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_API)
if(ctx && record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_CONTROL_API)
{
if(record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER &&
record.operation == ROCPROFILER_MARKER_API_ID_roctxProfilerPause)
record.operation == ROCPROFILER_MARKER_CONTROL_API_ID_roctxProfilerPause)
{
ROCPROFILER_CALL(rocprofiler_stop_context(*ctx), "pausing context");
}
else if(record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT &&
record.operation == ROCPROFILER_MARKER_API_ID_roctxProfilerResume)
record.operation == ROCPROFILER_MARKER_CONTROL_API_ID_roctxProfilerResume)
{
ROCPROFILER_CALL(rocprofiler_start_context(*ctx), "resuming context");
}
@@ -298,7 +297,7 @@ callback_tracing_callback(rocprofiler_callback_tracing_record_t record,
static auto global_range =
common::Synchronized<std::unordered_map<roctx_range_id_t, marker_entry>>{};
if(record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_API)
if(record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API)
{
auto* marker_data =
static_cast<rocprofiler_callback_tracing_marker_api_data_t*>(record.payload);
@@ -307,7 +306,7 @@ callback_tracing_callback(rocprofiler_callback_tracing_record_t record,
rocprofiler_get_timestamp(&ts);
const auto* kind_name = callback_name_info.kind_names.at(record.kind);
if(record.operation == ROCPROFILER_MARKER_API_ID_roctxMarkA)
if(record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxMarkA)
{
if(record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT)
{
@@ -323,7 +322,7 @@ callback_tracing_callback(rocprofiler_callback_tracing_record_t record,
get_marker_api_file() << ss.str();
}
}
else if(record.operation == ROCPROFILER_MARKER_API_ID_roctxRangePushA)
else if(record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangePushA)
{
if(record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT)
{
@@ -336,7 +335,7 @@ callback_tracing_callback(rocprofiler_callback_tracing_record_t record,
}
}
}
else if(record.operation == ROCPROFILER_MARKER_API_ID_roctxRangePop)
else if(record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangePop)
{
if(record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER)
{
@@ -353,12 +352,12 @@ callback_tracing_callback(rocprofiler_callback_tracing_record_t record,
get_marker_api_file() << ss.str();
}
}
else if(record.operation == ROCPROFILER_MARKER_API_ID_roctxRangeStartA)
else if(record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangeStartA)
{
if(record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT &&
marker_data->args.roctxRangeStartA.message)
{
auto _id = marker_data->retval.uint64_t_retval;
auto _id = marker_data->retval.roctx_range_id_t_retval;
auto _entry = marker_entry{};
_entry.cid = record.correlation_id.internal;
_entry.data.value = ts;
@@ -368,7 +367,7 @@ callback_tracing_callback(rocprofiler_callback_tracing_record_t record,
[_id, &_entry](auto& map) { map.emplace(_id, std::move(_entry)); });
}
}
else if(record.operation == ROCPROFILER_MARKER_API_ID_roctxRangeStop)
else if(record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangeStop)
{
if(record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER)
{
@@ -459,16 +458,13 @@ buffered_tracing_callback(rocprofiler_context_id_t /*context*/,
void* /*user_data*/,
uint64_t /*drop_count*/)
{
static auto _sync = std::mutex{};
auto _lk = std::lock_guard<std::mutex>{_sync};
LOG(INFO) << "Executing buffered tracing callback for " << num_headers << " headers";
if(num_headers == 0)
throw std::runtime_error{"rocprofiler invoked a buffer callback with no headers "
"this should never happen"};
LOG_IF(ERROR, headers == nullptr)
<< "rocprofiler invoked a buffer callback with a null pointer to the array of headers. "
"this should never happen";
else if(headers == nullptr)
throw std::runtime_error{"rocprofiler invoked a buffer callback with a null pointer to the "
"array of headers. this should never happen"};
if(!headers) return;
for(size_t i = 0; i < num_headers; ++i)
{
@@ -585,36 +581,25 @@ buffered_tracing_callback(rocprofiler_context_id_t /*context*/,
rocprofiler_query_record_dimension_position(profiler_record->id, 0, &pos);
auto counter_collection_ss = std::stringstream{};
counter_collection_ss << counter_id.handle << ","
<< kernel_properties.gpu_agent.id.handle << ","
<< kernel_properties.queue_id.handle << "," << getpid() << ","
<< kernel_properties.thread_id << ",";
counter_collection_ss << kernel_properties.grid_size << ","
<< kernel_properties.kernel_name << ","
<< kernel_properties.workgroup_size << ","
<< ((kernel_properties.lds_size + (lds_block_size - 1)) &
~(lds_block_size - 1))
<< "," << kernel_properties.scratch_size << ","
<< kernel_properties.arch_vgpr_count << ","
<< kernel_properties.sgpr_count << ",";
/*
Iterate through the N dimensional that is obtained for the counter.
given instance id what is the counter id
given counter id what is the counter name
given instance how many dimension
iterate through dimensions
what is the dimension id
what is the dimension name
what pos in the dimension.
*/
// ss << counter_name << "[" << info.name << "," << pos << "]" << ",";
// ss << profiler_record->counter_value << "\n";
counter_collection_ss << counter_name << "["
<< "," << pos << "]"
<< ",";
counter_collection_ss << counter_name << ",";
counter_collection_ss << profiler_record->counter_value << "\n";
get_counter_collection_file() << counter_collection_ss.str() << "\n";
tool::csv::counter_collection_csv_encoder::write_row(
counter_collection_ss,
counter_id.handle,
kernel_properties.gpu_agent.id.handle,
kernel_properties.queue_id.handle,
getpid(),
kernel_properties.thread_id,
kernel_properties.grid_size,
kernel_properties.kernel_name,
kernel_properties.workgroup_size,
((kernel_properties.lds_size + (lds_block_size - 1)) & ~(lds_block_size - 1)),
kernel_properties.scratch_size,
kernel_properties.arch_vgpr_count,
kernel_properties.sgpr_count,
fmt::format("{}[{}]", counter_name, pos),
profiler_record->counter_value);
get_counter_collection_file() << counter_collection_ss.str();
}
}
}
@@ -764,42 +749,26 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
if(tool::get_config().marker_api_trace)
{
auto operations = std::vector<uint32_t>{};
rocprofiler_iterate_callback_tracing_kind_operations(
ROCPROFILER_CALLBACK_TRACING_MARKER_API,
[](rocprofiler_callback_tracing_kind_t, uint32_t operation, void* data) {
auto* _ops = static_cast<std::vector<uint32_t>*>(data);
if(operation != ROCPROFILER_MARKER_API_ID_roctxProfilerPause &&
operation != ROCPROFILER_MARKER_API_ID_roctxProfilerResume)
_ops->emplace_back(operation);
return 0;
},
&operations);
ROCPROFILER_CALL(
rocprofiler_configure_callback_tracing_service(get_client_ctx(),
ROCPROFILER_CALLBACK_TRACING_MARKER_API,
operations.data(),
operations.size(),
callback_tracing_callback,
nullptr),
"callback tracing service failed to configure");
ROCPROFILER_CALL(rocprofiler_configure_callback_tracing_service(
get_client_ctx(),
ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API,
nullptr,
0,
callback_tracing_callback,
nullptr),
"callback tracing service failed to configure");
auto pause_resume_ctx = rocprofiler_context_id_t{};
ROCPROFILER_CALL(rocprofiler_create_context(&pause_resume_ctx), "failed to create context");
auto pause_resume_ops =
std::array<uint32_t, 2>{ROCPROFILER_MARKER_API_ID_roctxProfilerPause,
ROCPROFILER_MARKER_API_ID_roctxProfilerResume};
ROCPROFILER_CALL(
rocprofiler_configure_callback_tracing_service(pause_resume_ctx,
ROCPROFILER_CALLBACK_TRACING_MARKER_API,
pause_resume_ops.data(),
pause_resume_ops.size(),
cntrl_tracing_callback,
static_cast<void*>(&get_client_ctx())),
"callback tracing service failed to configure");
ROCPROFILER_CALL(rocprofiler_configure_callback_tracing_service(
pause_resume_ctx,
ROCPROFILER_CALLBACK_TRACING_MARKER_CONTROL_API,
nullptr,
0,
cntrl_tracing_callback,
static_cast<void*>(&get_client_ctx())),
"callback tracing service failed to configure");
ROCPROFILER_CALL(rocprofiler_start_context(pause_resume_ctx), "start context failed");
}
+16 -5
查看文件
@@ -65,7 +65,9 @@ ROCPROFILER_BUFFER_TRACING_KIND_STRING(NONE)
ROCPROFILER_BUFFER_TRACING_KIND_STRING(HSA_API)
ROCPROFILER_BUFFER_TRACING_KIND_STRING(HIP_API)
ROCPROFILER_BUFFER_TRACING_KIND_STRING(HIP_COMPILER_API)
ROCPROFILER_BUFFER_TRACING_KIND_STRING(MARKER_API)
ROCPROFILER_BUFFER_TRACING_KIND_STRING(MARKER_CORE_API)
ROCPROFILER_BUFFER_TRACING_KIND_STRING(MARKER_CONTROL_API)
ROCPROFILER_BUFFER_TRACING_KIND_STRING(MARKER_NAME_API)
ROCPROFILER_BUFFER_TRACING_KIND_STRING(MEMORY_COPY)
ROCPROFILER_BUFFER_TRACING_KIND_STRING(KERNEL_DISPATCH)
ROCPROFILER_BUFFER_TRACING_KIND_STRING(PAGE_MIGRATION)
@@ -161,8 +163,13 @@ rocprofiler_query_buffer_tracing_kind_operation_name(rocprofiler_buffer_tracing_
val = rocprofiler::hsa::name_by_id(operation);
else if(kind == ROCPROFILER_BUFFER_TRACING_MEMORY_COPY)
val = rocprofiler::hsa::async_copy::name_by_id(operation);
else if(kind == ROCPROFILER_BUFFER_TRACING_MARKER_API)
val = rocprofiler::marker::name_by_id<ROCPROFILER_MARKER_API_TABLE_ID_RoctxApi>(operation);
else if(kind == ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API)
val = rocprofiler::marker::name_by_id<ROCPROFILER_MARKER_API_TABLE_ID_RoctxCore>(operation);
else if(kind == ROCPROFILER_BUFFER_TRACING_MARKER_CONTROL_API)
val = rocprofiler::marker::name_by_id<ROCPROFILER_MARKER_API_TABLE_ID_RoctxControl>(
operation);
else if(kind == ROCPROFILER_BUFFER_TRACING_MARKER_NAME_API)
val = rocprofiler::marker::name_by_id<ROCPROFILER_MARKER_API_TABLE_ID_RoctxName>(operation);
else if(kind == ROCPROFILER_BUFFER_TRACING_HIP_API)
val = rocprofiler::hip::name_by_id<ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi>(operation);
else if(kind == ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API)
@@ -207,8 +214,12 @@ rocprofiler_iterate_buffer_tracing_kind_operations(
ops = rocprofiler::hsa::get_ids();
else if(kind == ROCPROFILER_BUFFER_TRACING_MEMORY_COPY)
ops = rocprofiler::hsa::async_copy::get_ids();
else if(kind == ROCPROFILER_BUFFER_TRACING_MARKER_API)
ops = rocprofiler::marker::get_ids<ROCPROFILER_MARKER_API_TABLE_ID_RoctxApi>();
else if(kind == ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API)
ops = rocprofiler::marker::get_ids<ROCPROFILER_MARKER_API_TABLE_ID_RoctxCore>();
else if(kind == ROCPROFILER_BUFFER_TRACING_MARKER_CONTROL_API)
ops = rocprofiler::marker::get_ids<ROCPROFILER_MARKER_API_TABLE_ID_RoctxControl>();
else if(kind == ROCPROFILER_BUFFER_TRACING_MARKER_NAME_API)
ops = rocprofiler::marker::get_ids<ROCPROFILER_MARKER_API_TABLE_ID_RoctxName>();
else if(kind == ROCPROFILER_BUFFER_TRACING_HIP_API)
ops = rocprofiler::hip::get_ids<ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi>();
else if(kind == ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API)
+36 -7
查看文件
@@ -63,7 +63,9 @@ ROCPROFILER_CALLBACK_TRACING_KIND_STRING(NONE)
ROCPROFILER_CALLBACK_TRACING_KIND_STRING(HSA_API)
ROCPROFILER_CALLBACK_TRACING_KIND_STRING(HIP_API)
ROCPROFILER_CALLBACK_TRACING_KIND_STRING(HIP_COMPILER_API)
ROCPROFILER_CALLBACK_TRACING_KIND_STRING(MARKER_API)
ROCPROFILER_CALLBACK_TRACING_KIND_STRING(MARKER_CORE_API)
ROCPROFILER_CALLBACK_TRACING_KIND_STRING(MARKER_CONTROL_API)
ROCPROFILER_CALLBACK_TRACING_KIND_STRING(MARKER_NAME_API)
ROCPROFILER_CALLBACK_TRACING_KIND_STRING(CODE_OBJECT)
ROCPROFILER_CALLBACK_TRACING_KIND_STRING(KERNEL_DISPATCH)
@@ -145,8 +147,13 @@ rocprofiler_query_callback_tracing_kind_operation_name(rocprofiler_callback_trac
const char* val = nullptr;
if(kind == ROCPROFILER_CALLBACK_TRACING_HSA_API)
val = rocprofiler::hsa::name_by_id(operation);
else if(kind == ROCPROFILER_CALLBACK_TRACING_MARKER_API)
val = rocprofiler::marker::name_by_id<ROCPROFILER_MARKER_API_TABLE_ID_RoctxApi>(operation);
else if(kind == ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API)
val = rocprofiler::marker::name_by_id<ROCPROFILER_MARKER_API_TABLE_ID_RoctxCore>(operation);
else if(kind == ROCPROFILER_CALLBACK_TRACING_MARKER_CONTROL_API)
val = rocprofiler::marker::name_by_id<ROCPROFILER_MARKER_API_TABLE_ID_RoctxControl>(
operation);
else if(kind == ROCPROFILER_CALLBACK_TRACING_MARKER_NAME_API)
val = rocprofiler::marker::name_by_id<ROCPROFILER_MARKER_API_TABLE_ID_RoctxName>(operation);
else if(kind == ROCPROFILER_CALLBACK_TRACING_HIP_API)
val = rocprofiler::hip::name_by_id<ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi>(operation);
else if(kind == ROCPROFILER_CALLBACK_TRACING_HIP_COMPILER_API)
@@ -190,8 +197,12 @@ rocprofiler_iterate_callback_tracing_kind_operations(
auto ops = std::vector<uint32_t>{};
if(kind == ROCPROFILER_CALLBACK_TRACING_HSA_API)
ops = rocprofiler::hsa::get_ids();
else if(kind == ROCPROFILER_CALLBACK_TRACING_MARKER_API)
ops = rocprofiler::marker::get_ids<ROCPROFILER_MARKER_API_TABLE_ID_RoctxApi>();
else if(kind == ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API)
ops = rocprofiler::marker::get_ids<ROCPROFILER_MARKER_API_TABLE_ID_RoctxCore>();
else if(kind == ROCPROFILER_CALLBACK_TRACING_MARKER_CONTROL_API)
ops = rocprofiler::marker::get_ids<ROCPROFILER_MARKER_API_TABLE_ID_RoctxControl>();
else if(kind == ROCPROFILER_CALLBACK_TRACING_MARKER_NAME_API)
ops = rocprofiler::marker::get_ids<ROCPROFILER_MARKER_API_TABLE_ID_RoctxName>();
else if(kind == ROCPROFILER_CALLBACK_TRACING_HIP_API)
ops = rocprofiler::hip::get_ids<ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi>();
else if(kind == ROCPROFILER_CALLBACK_TRACING_HIP_COMPILER_API)
@@ -222,9 +233,27 @@ rocprofiler_iterate_callback_tracing_kind_operation_args(
user_data);
return ROCPROFILER_STATUS_SUCCESS;
}
else if(record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_API)
else if(record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API)
{
rocprofiler::marker::iterate_args(
rocprofiler::marker::iterate_args<ROCPROFILER_MARKER_API_TABLE_ID_RoctxCore>(
record.operation,
*static_cast<rocprofiler_callback_tracing_marker_api_data_t*>(record.payload),
callback,
user_data);
return ROCPROFILER_STATUS_SUCCESS;
}
else if(record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_CONTROL_API)
{
rocprofiler::marker::iterate_args<ROCPROFILER_MARKER_API_TABLE_ID_RoctxControl>(
record.operation,
*static_cast<rocprofiler_callback_tracing_marker_api_data_t*>(record.payload),
callback,
user_data);
return ROCPROFILER_STATUS_SUCCESS;
}
else if(record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_NAME_API)
{
rocprofiler::marker::iterate_args<ROCPROFILER_MARKER_API_TABLE_ID_RoctxName>(
record.operation,
*static_cast<rocprofiler_callback_tracing_marker_api_data_t*>(record.payload),
callback,
+49 -40
查看文件
@@ -46,21 +46,23 @@ namespace intercept_table
{
namespace
{
template <rocprofiler_runtime_library_t... Idx>
using library_sequence_t = std::integer_sequence<rocprofiler_runtime_library_t, Idx...>;
template <rocprofiler_intercept_table_t... Idx>
using library_sequence_t = std::integer_sequence<rocprofiler_intercept_table_t, Idx...>;
// this is used to loop over the different libraries
constexpr auto intercept_library_seq = library_sequence_t<ROCPROFILER_HSA_LIBRARY,
ROCPROFILER_HIP_RUNTIME_LIBRARY,
ROCPROFILER_MARKER_LIBRARY,
ROCPROFILER_HIP_COMPILER_LIBRARY>{};
constexpr auto intercept_library_seq = library_sequence_t<ROCPROFILER_HSA_TABLE,
ROCPROFILER_HIP_RUNTIME_TABLE,
ROCPROFILER_HIP_COMPILER_TABLE,
ROCPROFILER_MARKER_CORE_TABLE,
ROCPROFILER_MARKER_CONTROL_TABLE,
ROCPROFILER_MARKER_NAME_TABLE>{};
// check that intercept_library_seq is up to date
static_assert((1 << (intercept_library_seq.size())) == ROCPROFILER_LIBRARY_LAST,
static_assert((1 << (intercept_library_seq.size() - 1)) == ROCPROFILER_TABLE_LAST,
"Update intercept_library_seq to include new libraries");
// data structure holding list of callbacks
template <rocprofiler_runtime_library_t LibT>
template <rocprofiler_intercept_table_t LibT>
struct intercept
{
static constexpr auto value = LibT;
@@ -71,7 +73,7 @@ struct intercept
};
// static accessor for intercept instance
template <rocprofiler_runtime_library_t LibT>
template <rocprofiler_intercept_table_t LibT>
auto&
get_intercept()
{
@@ -80,7 +82,7 @@ get_intercept()
}
// adds callbacks to intercept instance(s)
template <rocprofiler_runtime_library_t... Idx>
template <rocprofiler_intercept_table_t... Idx>
void
update_intercepts(rocprofiler_intercept_library_cb_t cb,
int libs,
@@ -109,13 +111,13 @@ get_void_array(std::tuple<Tp*...> data, std::index_sequence<Idx...>)
};
// invokes creation notifiers
template <typename... ApiTableT, rocprofiler_runtime_library_t... Idx>
template <typename... ApiTableT, rocprofiler_intercept_table_t... Idx>
void
execute_intercepts(rocprofiler_runtime_library_t lib,
execute_intercepts(rocprofiler_intercept_table_t lib,
uint64_t lib_version,
uint64_t lib_instance,
std::tuple<ApiTableT*...> tables,
std::integer_sequence<rocprofiler_runtime_library_t, Idx...>)
std::integer_sequence<rocprofiler_intercept_table_t, Idx...>)
{
auto execute = [lib, lib_version, lib_instance, tables](auto& notifier) {
if(((lib & notifier.value) == notifier.value))
@@ -145,52 +147,59 @@ execute_intercepts(rocprofiler_runtime_library_t lib,
template <typename... ApiTableT>
void
notify_runtime_api_registration(rocprofiler_runtime_library_t lib,
uint64_t lib_version,
uint64_t lib_instance,
std::tuple<ApiTableT*...> tables)
notify_intercept_table_registration(rocprofiler_intercept_table_t lib,
uint64_t lib_version,
uint64_t lib_instance,
std::tuple<ApiTableT*...> tables)
{
execute_intercepts(lib, lib_version, lib_instance, tables, intercept_library_seq);
}
// template instantiation for HsaApiTable
template void notify_runtime_api_registration(rocprofiler_runtime_library_t,
uint64_t,
uint64_t,
std::tuple<HsaApiTable*>);
template void notify_intercept_table_registration(rocprofiler_intercept_table_t,
uint64_t,
uint64_t,
std::tuple<HsaApiTable*>);
template void notify_runtime_api_registration(rocprofiler_runtime_library_t,
uint64_t,
uint64_t,
std::tuple<roctxApiTable_t*>);
template void notify_intercept_table_registration(rocprofiler_intercept_table_t,
uint64_t,
uint64_t,
std::tuple<roctxCoreApiTable_t*>);
template void notify_runtime_api_registration(rocprofiler_runtime_library_t,
uint64_t,
uint64_t,
std::tuple<HipDispatchTable*>);
template void notify_intercept_table_registration(rocprofiler_intercept_table_t,
uint64_t,
uint64_t,
std::tuple<roctxControlApiTable_t*>);
template void notify_runtime_api_registration(rocprofiler_runtime_library_t,
uint64_t,
uint64_t,
std::tuple<HipCompilerDispatchTable*>);
template void notify_intercept_table_registration(rocprofiler_intercept_table_t,
uint64_t,
uint64_t,
std::tuple<roctxNameApiTable_t*>);
template void notify_intercept_table_registration(rocprofiler_intercept_table_t,
uint64_t,
uint64_t,
std::tuple<HipDispatchTable*>);
template void notify_intercept_table_registration(rocprofiler_intercept_table_t,
uint64_t,
uint64_t,
std::tuple<HipCompilerDispatchTable*>);
} // namespace intercept_table
} // namespace rocprofiler
extern "C" {
rocprofiler_status_t
rocprofiler_at_runtime_api_registration(rocprofiler_intercept_library_cb_t callback,
int libs,
void* data)
rocprofiler_at_intercept_table_registration(rocprofiler_intercept_library_cb_t callback,
int libs,
void* data)
{
// if this function is invoked after initialization, we cannot guarantee that the runtime
// intercept API has not already be registered and returned to the runtime.
if(rocprofiler::registration::get_init_status() > 0)
return ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED;
if((libs & ROCPROFILER_LIBRARY) == ROCPROFILER_LIBRARY)
return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT;
else if((libs & ROCPROFILER_MARKER_LIBRARY) == ROCPROFILER_MARKER_LIBRARY)
return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED;
LOG_IF(WARNING, libs == 0) << "invoking " << __FUNCTION__ << " with a value of zero is a no-op";
rocprofiler::intercept_table::update_intercepts(
callback, libs, data, rocprofiler::intercept_table::intercept_library_seq);
+4 -4
查看文件
@@ -32,9 +32,9 @@ namespace intercept_table
{
template <typename... ApiTableT>
void
notify_runtime_api_registration(rocprofiler_runtime_library_t lib,
uint64_t lib_version,
uint64_t lib_instance,
std::tuple<ApiTableT*...> tables);
notify_intercept_table_registration(rocprofiler_intercept_table_t lib,
uint64_t lib_version,
uint64_t lib_instance,
std::tuple<ApiTableT*...> tables);
}
} // namespace rocprofiler
+4 -6
查看文件
@@ -69,12 +69,10 @@ using creation_notifier_cb_t = void (*)(rocprofiler_runtime_library_t, void*);
using thread_pool_config_t = PTL::ThreadPool::Config;
// this is used to loop over the different libraries
constexpr auto creation_notifier_library_seq =
library_sequence_t<ROCPROFILER_LIBRARY,
ROCPROFILER_HSA_LIBRARY,
ROCPROFILER_HIP_LIBRARY,
ROCPROFILER_MARKER_LIBRARY,
ROCPROFILER_HIP_COMPILER_LIBRARY>{};
constexpr auto creation_notifier_library_seq = library_sequence_t<ROCPROFILER_LIBRARY,
ROCPROFILER_HSA_LIBRARY,
ROCPROFILER_HIP_LIBRARY,
ROCPROFILER_MARKER_LIBRARY>{};
// check that creation_notifier_library_seq is up to date
static_assert((1 << (creation_notifier_library_seq.size() - 1)) == ROCPROFILER_LIBRARY_LAST,
+18 -3
查看文件
@@ -274,13 +274,28 @@
{ \
namespace marker \
{ \
namespace \
{ \
template <> \
auto* get_table<TABLE_ID>() \
{ \
return get_table_impl<TYPE>(); \
} \
} \
\
template <> \
struct roctx_table_lookup<TABLE_ID> \
{ \
using type = TYPE; \
auto& operator()(roctx_api_table_t& _v) const { return _v; } \
auto& operator()(roctx_api_table_t* _v) const { return *_v; } \
auto& operator()() const { return (*this)(get_table()); } \
auto& operator()(type& _v) const { return _v; } \
auto& operator()(type* _v) const { return *_v; } \
auto& operator()() const { return (*this)(get_table<TABLE_ID>()); } \
}; \
\
template <> \
struct roctx_table_id_lookup<TYPE> \
{ \
static constexpr auto value = TABLE_ID; \
}; \
} \
}
+58 -46
查看文件
@@ -22,6 +22,7 @@
#include "lib/rocprofiler-sdk/marker/marker.hpp"
#include "lib/common/defines.hpp"
#include "lib/common/static_object.hpp"
#include "lib/common/utility.hpp"
#include "lib/rocprofiler-sdk/buffer.hpp"
#include "lib/rocprofiler-sdk/context/context.hpp"
@@ -68,20 +69,25 @@ set_data_retval(DataT& _data, Tp _val)
_data.int32_t_retval = _val;
else if constexpr(std::is_same<int64_t, Tp>::value)
_data.int64_t_retval = _val;
else if constexpr(std::is_same<uint64_t, Tp>::value)
_data.uint64_t_retval = _val;
else if constexpr(std::is_same<roctx_range_id_t, Tp>::value)
_data.roctx_range_id_t_retval = _val;
else
static_assert(std::is_empty<Tp>::value, "Error! unsupported return type");
}
} // namespace
roctx_api_table_t&
get_table()
template <typename Tp>
Tp*
get_table_impl()
{
static auto _v = common::init_public_api_struct(roctx_api_table_t{});
static auto*& _v = common::static_object<Tp>::construct(common::init_public_api_struct(Tp{}));
return _v;
}
template <size_t TableIdx>
auto*
get_table();
} // namespace
template <size_t TableIdx, size_t OpIdx>
template <typename DataArgsT, typename... Args>
auto
@@ -332,7 +338,7 @@ roctx_api_impl<TableIdx, OpIdx>::functor(Args&&... args)
} // namespace marker
} // namespace rocprofiler
#define ROCPROFILER_LIB_ROCPROFILER_MARKER_MARKER_CPP_IMPL 1
#define ROCPROFILER_LIB_ROCPROFILER_SDK_MARKER_MARKER_CPP_IMPL 1
// template specializations
#include "marker.def.cpp"
@@ -343,16 +349,6 @@ namespace marker
{
namespace
{
template <size_t TableIdx>
struct api_id_bounds;
template <>
struct api_id_bounds<ROCPROFILER_MARKER_API_TABLE_ID_RoctxApi>
{
static constexpr auto none = ROCPROFILER_MARKER_API_ID_NONE;
static constexpr auto last = ROCPROFILER_MARKER_API_ID_LAST;
};
template <size_t TableIdx, size_t OpIdx, size_t... OpIdxTail>
const char*
name_by_id(const uint32_t id, std::index_sequence<OpIdx, OpIdxTail...>)
@@ -375,7 +371,7 @@ id_by_name(const char* name, std::index_sequence<OpIdx, OpIdxTail...>)
if constexpr(sizeof...(OpIdxTail) > 0)
return id_by_name<TableIdx>(name, std::index_sequence<OpIdxTail...>{});
else
return api_id_bounds<TableIdx>::none;
return roctx_domain_info<TableIdx>::none;
}
template <size_t TableIdx, size_t OpIdx, size_t... OpIdxTail>
@@ -383,7 +379,7 @@ void
get_ids(std::vector<uint32_t>& _id_list, std::index_sequence<OpIdx, OpIdxTail...>)
{
auto _idx = roctx_api_info<TableIdx, OpIdx>::operation_idx;
if(_idx < api_id_bounds<TableIdx>::last) _id_list.emplace_back(_idx);
if(_idx < roctx_domain_info<TableIdx>::last) _id_list.emplace_back(_idx);
if constexpr(sizeof...(OpIdxTail) > 0)
get_ids<TableIdx>(_id_list, std::index_sequence<OpIdxTail...>{});
@@ -400,7 +396,7 @@ get_names(std::vector<const char*>& _name_list, std::index_sequence<OpIdx, OpIdx
get_names<TableIdx>(_name_list, std::index_sequence<OpIdxTail...>{});
}
template <size_t OpIdx, size_t... OpIdxTail>
template <size_t TableIdx, size_t OpIdx, size_t... OpIdxTail>
void
iterate_args(const uint32_t id,
const rocprofiler_callback_tracing_marker_api_data_t& data,
@@ -410,7 +406,7 @@ iterate_args(const uint32_t id,
{
if(OpIdx == id)
{
using info_type = roctx_api_info<ROCPROFILER_MARKER_API_TABLE_ID_RoctxApi, OpIdx>;
using info_type = roctx_api_info<TableIdx, OpIdx>;
auto&& arg_list = info_type::as_arg_list(data);
auto&& arg_addr = info_type::as_arg_addr(data);
for(size_t i = 0; i < std::min(arg_list.size(), arg_addr.size()); ++i)
@@ -426,7 +422,7 @@ iterate_args(const uint32_t id,
}
}
if constexpr(sizeof...(OpIdxTail) > 0)
iterate_args(id, data, func, user_data, std::index_sequence<OpIdxTail...>{});
iterate_args<TableIdx>(id, data, func, user_data, std::index_sequence<OpIdxTail...>{});
}
bool
@@ -475,7 +471,7 @@ copy_table(Tp* _orig, std::integral_constant<size_t, OpIdx>)
// 3. get the sub-table containing the function pointer in saved table
// 4. get reference to function pointer in sub-table in saved table
// 5. save the original function in the saved table
auto& _saved = _info.get_table(get_table());
auto& _saved = _info.get_table(*get_table<TableIdx>());
auto& _ofunc = _info.get_table_func(_saved);
_ofunc = _func;
}
@@ -539,21 +535,22 @@ template <size_t TableIdx>
const char*
name_by_id(uint32_t id)
{
return name_by_id<TableIdx>(id, std::make_index_sequence<api_id_bounds<TableIdx>::last>{});
return name_by_id<TableIdx>(id, std::make_index_sequence<roctx_domain_info<TableIdx>::last>{});
}
template <size_t TableIdx>
uint32_t
id_by_name(const char* name)
{
return id_by_name<TableIdx>(name, std::make_index_sequence<api_id_bounds<TableIdx>::last>{});
return id_by_name<TableIdx>(name,
std::make_index_sequence<roctx_domain_info<TableIdx>::last>{});
}
template <size_t TableIdx>
std::vector<uint32_t>
get_ids()
{
constexpr auto last_api_id = api_id_bounds<TableIdx>::last;
constexpr auto last_api_id = roctx_domain_info<TableIdx>::last;
auto _data = std::vector<uint32_t>{};
_data.reserve(last_api_id);
get_ids<TableIdx>(_data, std::make_index_sequence<last_api_id>{});
@@ -564,13 +561,14 @@ template <size_t TableIdx>
std::vector<const char*>
get_names()
{
constexpr auto last_api_id = api_id_bounds<TableIdx>::last;
constexpr auto last_api_id = roctx_domain_info<TableIdx>::last;
auto _data = std::vector<const char*>{};
_data.reserve(last_api_id);
get_names<TableIdx>(_data, std::make_index_sequence<last_api_id>{});
return _data;
}
template <size_t TableIdx>
void
iterate_args(uint32_t id,
const rocprofiler_callback_tracing_marker_api_data_t& data,
@@ -578,35 +576,49 @@ iterate_args(uint32_t id,
void* user_data)
{
if(callback)
iterate_args(id,
data,
callback,
user_data,
std::make_index_sequence<ROCPROFILER_MARKER_API_ID_LAST>{});
iterate_args<TableIdx>(id,
data,
callback,
user_data,
std::make_index_sequence<roctx_domain_info<TableIdx>::last>{});
}
#define INSTANTIATE_MARKER_TABLE_FUNC(TABLE) \
template const char* name_by_id<TABLE>(uint32_t); \
template uint32_t id_by_name<TABLE>(const char*); \
template std::vector<uint32_t> get_ids<TABLE>(); \
template std::vector<const char*> get_names<TABLE>();
INSTANTIATE_MARKER_TABLE_FUNC(ROCPROFILER_MARKER_API_TABLE_ID_RoctxApi)
template <typename TableT>
void
copy_table(roctx_api_table_t* _orig)
copy_table(TableT* _orig)
{
constexpr auto TableIdx = roctx_table_id_lookup<TableT>::value;
if(_orig)
copy_table<ROCPROFILER_MARKER_API_TABLE_ID_RoctxApi>(
_orig, std::make_index_sequence<ROCPROFILER_MARKER_API_ID_LAST>{});
copy_table<TableIdx>(_orig, std::make_index_sequence<roctx_domain_info<TableIdx>::last>{});
}
template <typename TableT>
void
update_table(roctx_api_table_t* _orig)
update_table(TableT* _orig)
{
constexpr auto TableIdx = roctx_table_id_lookup<TableT>::value;
if(_orig)
update_table<ROCPROFILER_MARKER_API_TABLE_ID_RoctxApi>(
_orig, std::make_index_sequence<ROCPROFILER_MARKER_API_ID_LAST>{});
update_table<TableIdx>(_orig,
std::make_index_sequence<roctx_domain_info<TableIdx>::last>{});
}
using iterate_args_data_t = rocprofiler_callback_tracing_marker_api_data_t;
using iterate_args_cb_t = rocprofiler_callback_tracing_operation_args_cb_t;
#define INSTANTIATE_MARKER_TABLE_FUNC(TABLE_TYPE, TABLE_IDX) \
template void copy_table<TABLE_TYPE>(TABLE_TYPE * _tbl); \
template void update_table<TABLE_TYPE>(TABLE_TYPE * _tbl); \
template const char* name_by_id<TABLE_IDX>(uint32_t); \
template uint32_t id_by_name<TABLE_IDX>(const char*); \
template std::vector<uint32_t> get_ids<TABLE_IDX>(); \
template std::vector<const char*> get_names<TABLE_IDX>(); \
template void iterate_args<TABLE_IDX>( \
uint32_t, const iterate_args_data_t&, iterate_args_cb_t, void*);
INSTANTIATE_MARKER_TABLE_FUNC(roctx_core_api_table_t, ROCPROFILER_MARKER_API_TABLE_ID_RoctxCore)
INSTANTIATE_MARKER_TABLE_FUNC(roctx_ctrl_api_table_t, ROCPROFILER_MARKER_API_TABLE_ID_RoctxControl)
INSTANTIATE_MARKER_TABLE_FUNC(roctx_name_api_table_t, ROCPROFILER_MARKER_API_TABLE_ID_RoctxName)
#undef INSTANTIATE_MARKER_TABLE_FUNC
} // namespace marker
} // namespace rocprofiler
+58 -21
查看文件
@@ -22,41 +22,78 @@
#include "lib/rocprofiler-sdk/marker/defines.hpp"
#include "lib/rocprofiler-sdk/marker/marker.hpp"
#include "rocprofiler-sdk/marker/table_api_id.h"
namespace rocprofiler
{
namespace marker
{
template <>
struct roctx_domain_info<ROCPROFILER_MARKER_API_TABLE_ID_RoctxApi>
struct roctx_domain_info<ROCPROFILER_MARKER_API_TABLE_ID_LAST>
{
static constexpr auto callback_domain_idx = ROCPROFILER_CALLBACK_TRACING_MARKER_API;
static constexpr auto buffered_domain_idx = ROCPROFILER_BUFFER_TRACING_MARKER_API;
using args_type = rocprofiler_marker_api_args_t;
using retval_type = rocprofiler_marker_api_retval_t;
using callback_data_type = rocprofiler_callback_tracing_marker_api_data_t;
using args_type = rocprofiler_marker_api_args_t;
using retval_type = rocprofiler_marker_api_retval_t;
using callback_data_type = rocprofiler_callback_tracing_marker_api_data_t;
using buffer_data_type = rocprofiler_buffer_tracing_marker_api_record_t;
};
template <>
struct roctx_domain_info<ROCPROFILER_MARKER_API_TABLE_ID_RoctxCore>
: roctx_domain_info<ROCPROFILER_MARKER_API_TABLE_ID_LAST>
{
static constexpr auto callback_domain_idx = ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API;
static constexpr auto buffered_domain_idx = ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API;
static constexpr auto none = ROCPROFILER_MARKER_CORE_API_ID_NONE;
static constexpr auto last = ROCPROFILER_MARKER_CORE_API_ID_LAST;
using enum_type = rocprofiler_marker_core_api_id_t;
};
template <>
struct roctx_domain_info<ROCPROFILER_MARKER_API_TABLE_ID_RoctxControl>
: roctx_domain_info<ROCPROFILER_MARKER_API_TABLE_ID_LAST>
{
static constexpr auto callback_domain_idx = ROCPROFILER_CALLBACK_TRACING_MARKER_CONTROL_API;
static constexpr auto buffered_domain_idx = ROCPROFILER_BUFFER_TRACING_MARKER_CONTROL_API;
static constexpr auto none = ROCPROFILER_MARKER_CONTROL_API_ID_NONE;
static constexpr auto last = ROCPROFILER_MARKER_CONTROL_API_ID_LAST;
using enum_type = rocprofiler_marker_control_api_id_t;
};
template <>
struct roctx_domain_info<ROCPROFILER_MARKER_API_TABLE_ID_RoctxName>
: roctx_domain_info<ROCPROFILER_MARKER_API_TABLE_ID_LAST>
{
static constexpr auto callback_domain_idx = ROCPROFILER_CALLBACK_TRACING_MARKER_NAME_API;
static constexpr auto buffered_domain_idx = ROCPROFILER_BUFFER_TRACING_MARKER_NAME_API;
static constexpr auto none = ROCPROFILER_MARKER_NAME_API_ID_NONE;
static constexpr auto last = ROCPROFILER_MARKER_NAME_API_ID_LAST;
using enum_type = rocprofiler_marker_name_api_id_t;
};
} // namespace marker
} // namespace rocprofiler
#if defined(ROCPROFILER_LIB_ROCPROFILER_MARKER_MARKER_CPP_IMPL) && \
ROCPROFILER_LIB_ROCPROFILER_MARKER_MARKER_CPP_IMPL == 1
#if defined(ROCPROFILER_LIB_ROCPROFILER_SDK_MARKER_MARKER_CPP_IMPL) && \
ROCPROFILER_LIB_ROCPROFILER_SDK_MARKER_MARKER_CPP_IMPL == 1
// clang-format off
MARKER_API_TABLE_LOOKUP_DEFINITION(ROCPROFILER_MARKER_API_TABLE_ID_RoctxApi, roctx_api_table_t)
MARKER_API_TABLE_LOOKUP_DEFINITION(ROCPROFILER_MARKER_API_TABLE_ID_RoctxCore, roctx_core_api_table_t)
MARKER_API_TABLE_LOOKUP_DEFINITION(ROCPROFILER_MARKER_API_TABLE_ID_RoctxControl, roctx_ctrl_api_table_t)
MARKER_API_TABLE_LOOKUP_DEFINITION(ROCPROFILER_MARKER_API_TABLE_ID_RoctxName, roctx_name_api_table_t)
MARKER_API_INFO_DEFINITION_V(ROCPROFILER_MARKER_API_TABLE_ID_RoctxApi, ROCPROFILER_MARKER_API_ID_roctxMarkA, roctxMarkA, roctxMarkA_fn, message)
MARKER_API_INFO_DEFINITION_V(ROCPROFILER_MARKER_API_TABLE_ID_RoctxApi, ROCPROFILER_MARKER_API_ID_roctxRangePushA, roctxRangePushA, roctxRangePushA_fn, message)
MARKER_API_INFO_DEFINITION_0(ROCPROFILER_MARKER_API_TABLE_ID_RoctxApi, ROCPROFILER_MARKER_API_ID_roctxRangePop, roctxRangePop, roctxRangePop_fn)
MARKER_API_INFO_DEFINITION_V(ROCPROFILER_MARKER_API_TABLE_ID_RoctxApi, ROCPROFILER_MARKER_API_ID_roctxRangeStartA, roctxRangeStartA, roctxRangeStartA_fn, message)
MARKER_API_INFO_DEFINITION_V(ROCPROFILER_MARKER_API_TABLE_ID_RoctxApi, ROCPROFILER_MARKER_API_ID_roctxRangeStop, roctxRangeStop, roctxRangeStop_fn, id)
MARKER_API_INFO_DEFINITION_V(ROCPROFILER_MARKER_API_TABLE_ID_RoctxApi, ROCPROFILER_MARKER_API_ID_roctxProfilerPause, roctxProfilerPause, roctxProfilerPause_fn, tid)
MARKER_API_INFO_DEFINITION_V(ROCPROFILER_MARKER_API_TABLE_ID_RoctxApi, ROCPROFILER_MARKER_API_ID_roctxProfilerResume, roctxProfilerResume, roctxProfilerResume_fn, tid)
MARKER_API_INFO_DEFINITION_V(ROCPROFILER_MARKER_API_TABLE_ID_RoctxApi, ROCPROFILER_MARKER_API_ID_roctxNameOsThread, roctxNameOsThread, roctxNameOsThread_fn, name)
MARKER_API_INFO_DEFINITION_V(ROCPROFILER_MARKER_API_TABLE_ID_RoctxApi, ROCPROFILER_MARKER_API_ID_roctxNameHsaAgent, roctxNameHsaAgent, roctxNameHsaAgent_fn, name, agent)
MARKER_API_INFO_DEFINITION_V(ROCPROFILER_MARKER_API_TABLE_ID_RoctxApi, ROCPROFILER_MARKER_API_ID_roctxNameHipDevice, roctxNameHipDevice, roctxNameHipDevice_fn, name, device_id)
MARKER_API_INFO_DEFINITION_V(ROCPROFILER_MARKER_API_TABLE_ID_RoctxApi, ROCPROFILER_MARKER_API_ID_roctxNameHipStream, roctxNameHipStream, roctxNameHipStream_fn, name, stream)
MARKER_API_INFO_DEFINITION_V(ROCPROFILER_MARKER_API_TABLE_ID_RoctxApi, ROCPROFILER_MARKER_API_ID_roctxGetThreadId, roctxGetThreadId, roctxGetThreadId_fn, tid)
MARKER_API_INFO_DEFINITION_V(ROCPROFILER_MARKER_API_TABLE_ID_RoctxCore, ROCPROFILER_MARKER_CORE_API_ID_roctxMarkA, roctxMarkA, roctxMarkA_fn, message)
MARKER_API_INFO_DEFINITION_V(ROCPROFILER_MARKER_API_TABLE_ID_RoctxCore, ROCPROFILER_MARKER_CORE_API_ID_roctxRangePushA, roctxRangePushA, roctxRangePushA_fn, message)
MARKER_API_INFO_DEFINITION_0(ROCPROFILER_MARKER_API_TABLE_ID_RoctxCore, ROCPROFILER_MARKER_CORE_API_ID_roctxRangePop, roctxRangePop, roctxRangePop_fn)
MARKER_API_INFO_DEFINITION_V(ROCPROFILER_MARKER_API_TABLE_ID_RoctxCore, ROCPROFILER_MARKER_CORE_API_ID_roctxRangeStartA, roctxRangeStartA, roctxRangeStartA_fn, message)
MARKER_API_INFO_DEFINITION_V(ROCPROFILER_MARKER_API_TABLE_ID_RoctxCore, ROCPROFILER_MARKER_CORE_API_ID_roctxRangeStop, roctxRangeStop, roctxRangeStop_fn, id)
MARKER_API_INFO_DEFINITION_V(ROCPROFILER_MARKER_API_TABLE_ID_RoctxCore, ROCPROFILER_MARKER_CORE_API_ID_roctxGetThreadId, roctxGetThreadId, roctxGetThreadId_fn, tid)
MARKER_API_INFO_DEFINITION_V(ROCPROFILER_MARKER_API_TABLE_ID_RoctxControl, ROCPROFILER_MARKER_CONTROL_API_ID_roctxProfilerPause, roctxProfilerPause, roctxProfilerPause_fn, tid)
MARKER_API_INFO_DEFINITION_V(ROCPROFILER_MARKER_API_TABLE_ID_RoctxControl, ROCPROFILER_MARKER_CONTROL_API_ID_roctxProfilerResume, roctxProfilerResume, roctxProfilerResume_fn, tid)
MARKER_API_INFO_DEFINITION_V(ROCPROFILER_MARKER_API_TABLE_ID_RoctxName, ROCPROFILER_MARKER_NAME_API_ID_roctxNameOsThread, roctxNameOsThread, roctxNameOsThread_fn, name)
MARKER_API_INFO_DEFINITION_V(ROCPROFILER_MARKER_API_TABLE_ID_RoctxName, ROCPROFILER_MARKER_NAME_API_ID_roctxNameHsaAgent, roctxNameHsaAgent, roctxNameHsaAgent_fn, name, agent)
MARKER_API_INFO_DEFINITION_V(ROCPROFILER_MARKER_API_TABLE_ID_RoctxName, ROCPROFILER_MARKER_NAME_API_ID_roctxNameHipDevice, roctxNameHipDevice, roctxNameHipDevice_fn, name, device_id)
MARKER_API_INFO_DEFINITION_V(ROCPROFILER_MARKER_API_TABLE_ID_RoctxName, ROCPROFILER_MARKER_NAME_API_ID_roctxNameHipStream, roctxNameHipStream, roctxNameHipStream_fn, name, stream)
// clang-format on
#else
+13 -4
查看文件
@@ -33,14 +33,20 @@ namespace rocprofiler
{
namespace marker
{
using roctx_api_table_t = ::roctxApiTable_t;
using roctx_core_api_table_t = ::roctxCoreApiTable_t;
using roctx_ctrl_api_table_t = ::roctxControlApiTable_t;
using roctx_name_api_table_t = ::roctxNameApiTable_t;
roctx_api_table_t&
template <typename Tp>
Tp*
get_table();
template <size_t OpIdx>
struct roctx_table_lookup;
template <typename Tp>
struct roctx_table_id_lookup;
template <size_t TableIdx>
struct roctx_domain_info;
@@ -68,6 +74,7 @@ template <size_t TableIdx>
uint32_t
id_by_name(const char* name);
template <size_t TableIdx>
void
iterate_args(uint32_t id,
const rocprofiler_callback_tracing_marker_api_data_t& data,
@@ -82,10 +89,12 @@ template <size_t TableIdx>
std::vector<uint32_t>
get_ids();
template <typename TableT>
void
copy_table(roctx_api_table_t* _orig);
copy_table(TableT* _orig);
template <typename TableT>
void
update_table(roctx_api_table_t* _orig);
update_table(TableT* _orig);
} // namespace marker
} // namespace rocprofiler
+25 -13
查看文件
@@ -657,8 +657,8 @@ rocprofiler_set_api_table(const char* name,
// install rocprofiler API wrappers
rocprofiler::hip::update_table(hip_runtime_api_table);
rocprofiler::intercept_table::notify_runtime_api_registration(
ROCPROFILER_HIP_RUNTIME_LIBRARY,
rocprofiler::intercept_table::notify_intercept_table_registration(
ROCPROFILER_HIP_RUNTIME_TABLE,
lib_version,
lib_instance,
std::make_tuple(hip_runtime_api_table));
@@ -680,8 +680,8 @@ rocprofiler_set_api_table(const char* name,
// install rocprofiler API wrappers
rocprofiler::hip::update_table(hip_compiler_api_table);
rocprofiler::intercept_table::notify_runtime_api_registration(
ROCPROFILER_HIP_COMPILER_LIBRARY,
rocprofiler::intercept_table::notify_intercept_table_registration(
ROCPROFILER_HIP_COMPILER_TABLE,
lib_version,
lib_instance,
std::make_tuple(hip_compiler_api_table));
@@ -715,30 +715,42 @@ rocprofiler_set_api_table(const char* name,
rocprofiler::hsa::update_table(hsa_api_table);
// allow tools to install API wrappers
rocprofiler::intercept_table::notify_runtime_api_registration(
ROCPROFILER_HSA_LIBRARY, lib_version, lib_instance, std::make_tuple(hsa_api_table));
rocprofiler::intercept_table::notify_intercept_table_registration(
ROCPROFILER_HSA_TABLE, lib_version, lib_instance, std::make_tuple(hsa_api_table));
}
else if(std::string_view{name} == "roctx")
{
// pass to roctx init
LOG_IF(ERROR, num_tables > 1)
LOG_IF(ERROR, num_tables >= 3)
<< " rocprofiler expected ROCTX library to pass 1 API table, not " << num_tables;
auto* roctx_api_table = static_cast<roctxApiTable_t*>(*tables);
auto* roctx_core = static_cast<roctxCoreApiTable_t*>(tables[0]);
auto* roctx_ctrl = static_cast<roctxControlApiTable_t*>(tables[1]);
auto* roctx_name = static_cast<roctxNameApiTable_t*>(tables[2]);
// any internal modifications to the roctxApiTable_t need to be done before we make
// the copy or else those modifications will be lost when ROCTx tracing is enabled because
// the ROCTx tracing invokes the function pointers from the copy below
rocprofiler::marker::copy_table(roctx_api_table);
rocprofiler::marker::copy_table(roctx_core);
rocprofiler::marker::copy_table(roctx_ctrl);
rocprofiler::marker::copy_table(roctx_name);
// install rocprofiler API wrappers
rocprofiler::marker::update_table(roctx_api_table);
rocprofiler::marker::update_table(roctx_core);
rocprofiler::marker::update_table(roctx_ctrl);
rocprofiler::marker::update_table(roctx_name);
rocprofiler::intercept_table::notify_runtime_api_registration(
ROCPROFILER_MARKER_LIBRARY,
rocprofiler::intercept_table::notify_intercept_table_registration(
ROCPROFILER_MARKER_CORE_TABLE, lib_version, lib_instance, std::make_tuple(roctx_core));
rocprofiler::intercept_table::notify_intercept_table_registration(
ROCPROFILER_MARKER_CONTROL_TABLE,
lib_version,
lib_instance,
std::make_tuple(roctx_api_table));
std::make_tuple(roctx_ctrl));
rocprofiler::intercept_table::notify_intercept_table_registration(
ROCPROFILER_MARKER_NAME_TABLE, lib_version, lib_instance, std::make_tuple(roctx_name));
}
else
{
+14 -2
查看文件
@@ -77,7 +77,13 @@ inline auto
get_callback_tracing_names()
{
static const auto supported_kinds = std::unordered_set<rocprofiler_callback_tracing_kind_t>{
ROCPROFILER_CALLBACK_TRACING_HSA_API, ROCPROFILER_CALLBACK_TRACING_MARKER_API};
ROCPROFILER_CALLBACK_TRACING_HSA_API,
ROCPROFILER_CALLBACK_TRACING_HIP_API,
ROCPROFILER_CALLBACK_TRACING_HIP_COMPILER_API,
ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API,
ROCPROFILER_CALLBACK_TRACING_MARKER_CONTROL_API,
ROCPROFILER_CALLBACK_TRACING_MARKER_NAME_API,
};
auto cb_name_info = callback_name_info{};
//
@@ -141,7 +147,13 @@ inline buffer_name_info
get_buffer_tracing_names()
{
static const auto supported_kinds = std::unordered_set<rocprofiler_buffer_tracing_kind_t>{
ROCPROFILER_BUFFER_TRACING_HSA_API, ROCPROFILER_BUFFER_TRACING_MARKER_API};
ROCPROFILER_BUFFER_TRACING_HSA_API,
ROCPROFILER_BUFFER_TRACING_HIP_API,
ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API,
ROCPROFILER_BUFFER_TRACING_MEMORY_COPY,
ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API,
ROCPROFILER_BUFFER_TRACING_MARKER_CONTROL_API,
ROCPROFILER_BUFFER_TRACING_MARKER_NAME_API};
auto cb_name_info = buffer_name_info{};
//
+43 -59
查看文件
@@ -40,6 +40,7 @@
#include <chrono>
#include <cstdint>
#include <cstdlib>
#include <initializer_list>
#include <iostream>
#include <random>
#include <sstream>
@@ -216,7 +217,7 @@ generate_wrapper(const char* name, RetT (*func)(Args...))
TABLE->FUNC##_fn = generate_wrapper<__COUNTER__>(#FUNC, TABLE->FUNC##_fn)
void
api_registration_callback(rocprofiler_runtime_library_t type,
api_registration_callback(rocprofiler_intercept_table_t type,
uint64_t lib_version,
uint64_t lib_instance,
void** tables,
@@ -227,7 +228,7 @@ api_registration_callback(rocprofiler_runtime_library_t type,
cb_data->client_workflow_count++;
EXPECT_EQ(type, ROCPROFILER_HSA_LIBRARY) << "unexpected library type: " << type;
EXPECT_EQ(type, ROCPROFILER_HSA_TABLE) << "unexpected library type: " << type;
EXPECT_EQ(lib_instance, 0) << "multiple instances of HSA runtime library";
EXPECT_EQ(num_tables, 1) << "expected only one table of type HsaApiTable";
EXPECT_GT(lib_version, 0) << "expected library version > 0";
@@ -238,6 +239,26 @@ api_registration_callback(rocprofiler_runtime_library_t type,
GENERATE_WRAPPER(hsa_api_table->core_, hsa_iterate_agents);
GENERATE_WRAPPER(hsa_api_table->core_, hsa_shut_down);
}
using init_list_t = std::initializer_list<int>;
auto valid_intercept_combos = init_list_t{
(ROCPROFILER_HSA_TABLE | ROCPROFILER_HIP_RUNTIME_TABLE | ROCPROFILER_HIP_COMPILER_TABLE |
ROCPROFILER_MARKER_CORE_TABLE | ROCPROFILER_MARKER_CONTROL_TABLE |
ROCPROFILER_MARKER_NAME_TABLE),
(ROCPROFILER_HSA_TABLE | ROCPROFILER_HIP_RUNTIME_TABLE | ROCPROFILER_HIP_COMPILER_TABLE |
ROCPROFILER_MARKER_CORE_TABLE | ROCPROFILER_MARKER_CONTROL_TABLE),
(ROCPROFILER_HSA_TABLE | ROCPROFILER_HIP_RUNTIME_TABLE | ROCPROFILER_HIP_COMPILER_TABLE |
ROCPROFILER_MARKER_CORE_TABLE),
(ROCPROFILER_HSA_TABLE | ROCPROFILER_HIP_RUNTIME_TABLE | ROCPROFILER_HIP_COMPILER_TABLE),
(ROCPROFILER_HSA_TABLE | ROCPROFILER_HIP_RUNTIME_TABLE),
(ROCPROFILER_HSA_TABLE),
(ROCPROFILER_HIP_RUNTIME_TABLE | ROCPROFILER_HIP_COMPILER_TABLE |
ROCPROFILER_MARKER_CORE_TABLE | ROCPROFILER_MARKER_CONTROL_TABLE |
ROCPROFILER_MARKER_NAME_TABLE),
(ROCPROFILER_HIP_COMPILER_TABLE | ROCPROFILER_MARKER_CORE_TABLE |
ROCPROFILER_MARKER_CONTROL_TABLE | ROCPROFILER_MARKER_NAME_TABLE),
(ROCPROFILER_MARKER_CORE_TABLE | ROCPROFILER_MARKER_CONTROL_TABLE |
ROCPROFILER_MARKER_NAME_TABLE)};
} // namespace
TEST(rocprofiler_lib, intercept_table_and_callback_tracing)
@@ -321,35 +342,15 @@ TEST(rocprofiler_lib, intercept_table_and_callback_tracing)
cb_data.client_id = client_id;
cb_data.client_id->name = ::testing::UnitTest::GetInstance()->current_test_info()->name();
ROCPROFILER_CALL_EXPECT(
rocprofiler_at_runtime_api_registration(api_registration_callback,
ROCPROFILER_LIBRARY | ROCPROFILER_HSA_LIBRARY |
ROCPROFILER_HIP_LIBRARY |
ROCPROFILER_MARKER_LIBRARY,
static_cast<void*>(&cb_data)),
"function should return invalid argument if ROCPROFILER_LIBRARY included",
ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT);
using init_list_t = std::initializer_list<int>;
for(auto itr : init_list_t{(ROCPROFILER_HSA_LIBRARY | ROCPROFILER_HIP_LIBRARY |
ROCPROFILER_HIP_COMPILER_LIBRARY | ROCPROFILER_MARKER_LIBRARY),
(ROCPROFILER_HSA_LIBRARY | ROCPROFILER_MARKER_LIBRARY),
(ROCPROFILER_MARKER_LIBRARY)})
for(auto itr : valid_intercept_combos)
{
ROCPROFILER_CALL_EXPECT(
rocprofiler_at_runtime_api_registration(
rocprofiler_at_intercept_table_registration(
api_registration_callback, itr, static_cast<void*>(&cb_data)),
"test should be updated if new (non-HSA, non-HIP) intercept table is supported",
ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED);
ROCPROFILER_STATUS_SUCCESS);
}
ROCPROFILER_CALL(rocprofiler_at_runtime_api_registration(
api_registration_callback,
ROCPROFILER_HSA_LIBRARY | ROCPROFILER_HIP_LIBRARY |
ROCPROFILER_HIP_COMPILER_LIBRARY,
static_cast<void*>(&cb_data)),
"HSA and HIP intercept table registration failed");
return &cfg_result;
};
@@ -381,7 +382,13 @@ TEST(rocprofiler_lib, intercept_table_and_callback_tracing)
cb_data.client_fini_func(*cb_data.client_id);
EXPECT_EQ(cb_data.client_workflow_count, 3);
size_t num_hsa_intercepts = 0;
for(auto itr : valid_intercept_combos)
{
if((itr & ROCPROFILER_HSA_TABLE) == ROCPROFILER_HSA_TABLE) ++num_hsa_intercepts;
}
EXPECT_EQ(cb_data.client_workflow_count, num_hsa_intercepts + 2);
for(auto itr : cb_data.client_callback_count)
{
@@ -498,44 +505,15 @@ TEST(rocprofiler_lib, intercept_table_and_callback_tracing_disable_context)
cb_data.client_id = client_id;
cb_data.client_id->name = ::testing::UnitTest::GetInstance()->current_test_info()->name();
ROCPROFILER_CALL_EXPECT(
rocprofiler_at_runtime_api_registration(
api_registration_callback,
ROCPROFILER_LIBRARY | ROCPROFILER_HSA_LIBRARY | ROCPROFILER_HIP_LIBRARY |
ROCPROFILER_HIP_COMPILER_LIBRARY | ROCPROFILER_MARKER_LIBRARY,
static_cast<void*>(&cb_data)),
"function should return invalid argument if ROCPROFILER_LIBRARY included",
ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT);
using init_list_t = std::initializer_list<int>;
for(auto itr : init_list_t{(ROCPROFILER_HSA_LIBRARY | ROCPROFILER_HIP_LIBRARY |
ROCPROFILER_HIP_COMPILER_LIBRARY | ROCPROFILER_MARKER_LIBRARY),
(ROCPROFILER_HSA_LIBRARY | ROCPROFILER_MARKER_LIBRARY),
(ROCPROFILER_HIP_LIBRARY | ROCPROFILER_MARKER_LIBRARY),
(ROCPROFILER_HIP_COMPILER_LIBRARY | ROCPROFILER_MARKER_LIBRARY),
(ROCPROFILER_MARKER_LIBRARY)})
for(auto itr : valid_intercept_combos)
{
ROCPROFILER_CALL_EXPECT(
rocprofiler_at_runtime_api_registration(
rocprofiler_at_intercept_table_registration(
api_registration_callback, itr, static_cast<void*>(&cb_data)),
"test should be updated if new (non-HSA, non-HIP) intercept table is supported",
ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED);
ROCPROFILER_STATUS_SUCCESS);
}
ROCPROFILER_CALL_EXPECT(rocprofiler_at_runtime_api_registration(
api_registration_callback,
ROCPROFILER_HSA_LIBRARY | ROCPROFILER_MARKER_LIBRARY,
static_cast<void*>(&cb_data)),
"test should be updated if ROCTx intercept table is supported",
ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED);
ROCPROFILER_CALL(rocprofiler_at_runtime_api_registration(
api_registration_callback,
ROCPROFILER_HSA_LIBRARY | ROCPROFILER_HIP_LIBRARY |
ROCPROFILER_HIP_COMPILER_LIBRARY,
static_cast<void*>(&cb_data)),
"HSA and HIP API intercept table registration failed");
return &cfg_result;
};
@@ -578,7 +556,13 @@ TEST(rocprofiler_lib, intercept_table_and_callback_tracing_disable_context)
cb_data.client_fini_func(*cb_data.client_id);
EXPECT_EQ(cb_data.client_workflow_count, 3);
size_t num_hsa_intercepts = 0;
for(auto itr : valid_intercept_combos)
{
if((itr & ROCPROFILER_HSA_TABLE) == ROCPROFILER_HSA_TABLE) ++num_hsa_intercepts;
}
EXPECT_EQ(cb_data.client_workflow_count, num_hsa_intercepts + 2);
auto get_tool_count = [](std::string_view func_name) {
// we already checked that first == second so we can just check first here
+62 -67
查看文件
@@ -101,14 +101,14 @@ tool_tracing_ctrl_callback(rocprofiler_callback_tracing_record_t record,
auto* cb_data = static_cast<callback_data*>(client_data);
if(record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER &&
record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_API &&
record.operation == ROCPROFILER_MARKER_API_ID_roctxProfilerPause)
record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_CONTROL_API &&
record.operation == ROCPROFILER_MARKER_CONTROL_API_ID_roctxProfilerPause)
{
ROCPROFILER_CALL(rocprofiler_stop_context(cb_data->client_ctx), "pausing client context");
}
else if(record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT &&
record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_API &&
record.operation == ROCPROFILER_MARKER_API_ID_roctxProfilerResume)
record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_CONTROL_API &&
record.operation == ROCPROFILER_MARKER_CONTROL_API_ID_roctxProfilerResume)
{
ROCPROFILER_CALL(rocprofiler_start_context(cb_data->client_ctx), "resuming client context");
}
@@ -132,8 +132,12 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record,
static auto name_map = get_callback_tracing_names();
EXPECT_EQ(name_map.kind_names.size(), ROCPROFILER_CALLBACK_TRACING_LAST);
EXPECT_EQ(name_map.operation_names.at(ROCPROFILER_CALLBACK_TRACING_MARKER_API).size(),
ROCPROFILER_MARKER_API_ID_LAST);
EXPECT_EQ(name_map.operation_names.at(ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API).size(),
ROCPROFILER_MARKER_CORE_API_ID_LAST);
EXPECT_EQ(name_map.operation_names.at(ROCPROFILER_CALLBACK_TRACING_MARKER_CONTROL_API).size(),
ROCPROFILER_MARKER_CONTROL_API_ID_LAST);
EXPECT_EQ(name_map.operation_names.at(ROCPROFILER_CALLBACK_TRACING_MARKER_NAME_API).size(),
ROCPROFILER_MARKER_NAME_API_ID_LAST);
std::cout << "[" << __FILE__ << ":" << __LINE__ << "] "
<< name_map.operation_names[record.kind][record.operation] << "\n"
@@ -181,9 +185,9 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record,
ROCPROFILER_CALL(rocprofiler_iterate_callback_tracing_kind_operation_args(
record, info_data_cb, static_cast<void*>(&info_data_v)),
"Failure iterating trace operation args");
if(record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_API)
if(record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API)
{
if(record.operation != ROCPROFILER_MARKER_API_ID_roctxRangePop)
if(record.operation != ROCPROFILER_MARKER_CORE_API_ID_roctxRangePop)
{
EXPECT_GT(info_data_v.num_args, 0)
<< name_map.operation_names[record.kind][record.operation]
@@ -210,8 +214,12 @@ tool_tracing_buffered(rocprofiler_context_id_t context,
static auto name_map = get_buffer_tracing_names();
EXPECT_EQ(name_map.kind_names.size(), ROCPROFILER_BUFFER_TRACING_LAST);
EXPECT_EQ(name_map.operation_names.at(ROCPROFILER_BUFFER_TRACING_MARKER_API).size(),
ROCPROFILER_MARKER_API_ID_LAST);
EXPECT_EQ(name_map.operation_names.at(ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API).size(),
ROCPROFILER_MARKER_CORE_API_ID_LAST);
EXPECT_EQ(name_map.operation_names.at(ROCPROFILER_BUFFER_TRACING_MARKER_CONTROL_API).size(),
ROCPROFILER_MARKER_CONTROL_API_ID_LAST);
EXPECT_EQ(name_map.operation_names.at(ROCPROFILER_BUFFER_TRACING_MARKER_NAME_API).size(),
ROCPROFILER_MARKER_NAME_API_ID_LAST);
auto v_records = std::vector<rocprofiler_buffer_tracing_marker_api_record_t*>{};
v_records.reserve(num_headers);
@@ -224,7 +232,8 @@ tool_tracing_buffered(rocprofiler_context_id_t context,
auto hash = rocprofiler_record_header_compute_hash(header->category, header->kind);
EXPECT_EQ(header->hash, hash);
EXPECT_TRUE(header->category == ROCPROFILER_BUFFER_CATEGORY_TRACING &&
header->kind == ROCPROFILER_BUFFER_TRACING_MARKER_API);
(header->kind == ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API ||
header->kind == ROCPROFILER_BUFFER_TRACING_MARKER_NAME_API));
v_records.emplace_back(
static_cast<rocprofiler_buffer_tracing_marker_api_record_t*>(header->payload));
@@ -300,39 +309,32 @@ TEST(rocprofiler_lib, roctx_callback_tracing)
ROCPROFILER_CALL(rocprofiler_create_context(&cb_data->client_ctx),
"failed to create context");
auto operations = std::vector<uint32_t>{};
rocprofiler_iterate_callback_tracing_kind_operations(
ROCPROFILER_CALLBACK_TRACING_MARKER_API,
[](rocprofiler_callback_tracing_kind_t, uint32_t operation, void* data) {
auto* _ops = static_cast<std::vector<uint32_t>*>(data);
if(operation != ROCPROFILER_MARKER_API_ID_roctxProfilerPause &&
operation != ROCPROFILER_MARKER_API_ID_roctxProfilerResume)
_ops->emplace_back(operation);
return 0;
},
&operations);
ROCPROFILER_CALL(rocprofiler_configure_callback_tracing_service(
cb_data->client_ctx,
ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API,
nullptr,
0,
tool_tracing_callback,
client_data),
"callback tracing service failed to configure");
ROCPROFILER_CALL(
rocprofiler_configure_callback_tracing_service(cb_data->client_ctx,
ROCPROFILER_CALLBACK_TRACING_MARKER_API,
operations.data(),
operations.size(),
tool_tracing_callback,
client_data),
"callback tracing service failed to configure");
ROCPROFILER_CALL(rocprofiler_configure_callback_tracing_service(
cb_data->client_ctx,
ROCPROFILER_CALLBACK_TRACING_MARKER_NAME_API,
nullptr,
0,
tool_tracing_callback,
client_data),
"callback tracing service failed to configure");
auto pause_resume_ops =
std::array<uint32_t, 2>{ROCPROFILER_MARKER_API_ID_roctxProfilerPause,
ROCPROFILER_MARKER_API_ID_roctxProfilerResume};
ROCPROFILER_CALL(
rocprofiler_configure_callback_tracing_service(pause_resume_ctx,
ROCPROFILER_CALLBACK_TRACING_MARKER_API,
pause_resume_ops.data(),
pause_resume_ops.size(),
tool_tracing_ctrl_callback,
client_data),
"callback tracing service failed to configure");
ROCPROFILER_CALL(rocprofiler_configure_callback_tracing_service(
pause_resume_ctx,
ROCPROFILER_CALLBACK_TRACING_MARKER_CONTROL_API,
nullptr,
0,
tool_tracing_ctrl_callback,
client_data),
"callback tracing service failed to configure");
int valid_ctx = 0;
ROCPROFILER_CALL(rocprofiler_context_is_valid(cb_data->client_ctx, &valid_ctx),
@@ -416,19 +418,16 @@ TEST(rocprofiler_lib, roctx_buffered_tracing)
cb_data->client_fini_func = fini_func;
auto pause_resume_ctx = rocprofiler_context_id_t{};
auto pause_resume_ops =
std::array<uint32_t, 2>{ROCPROFILER_MARKER_API_ID_roctxProfilerPause,
ROCPROFILER_MARKER_API_ID_roctxProfilerResume};
ROCPROFILER_CALL(rocprofiler_create_context(&pause_resume_ctx), "failed to create context");
ROCPROFILER_CALL(
rocprofiler_configure_callback_tracing_service(pause_resume_ctx,
ROCPROFILER_CALLBACK_TRACING_MARKER_API,
pause_resume_ops.data(),
pause_resume_ops.size(),
tool_tracing_ctrl_callback,
client_data),
"callback tracing service failed to configure");
ROCPROFILER_CALL(rocprofiler_configure_callback_tracing_service(
pause_resume_ctx,
ROCPROFILER_CALLBACK_TRACING_MARKER_CONTROL_API,
nullptr,
0,
tool_tracing_ctrl_callback,
client_data),
"callback tracing service failed to configure");
ROCPROFILER_CALL(rocprofiler_start_context(pause_resume_ctx),
"rocprofiler context start failed");
@@ -444,23 +443,19 @@ TEST(rocprofiler_lib, roctx_buffered_tracing)
&cb_data->client_buffer),
"buffer creation failed");
auto operations = std::vector<uint32_t>{};
rocprofiler_iterate_buffer_tracing_kind_operations(
ROCPROFILER_BUFFER_TRACING_MARKER_API,
[](rocprofiler_buffer_tracing_kind_t, uint32_t operation, void* data) {
auto* _ops = static_cast<std::vector<uint32_t>*>(data);
if(operation != ROCPROFILER_MARKER_API_ID_roctxProfilerPause &&
operation != ROCPROFILER_MARKER_API_ID_roctxProfilerResume)
_ops->emplace_back(operation);
return 0;
},
&operations);
ROCPROFILER_CALL(
rocprofiler_configure_buffer_tracing_service(cb_data->client_ctx,
ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API,
nullptr,
0,
cb_data->client_buffer),
"buffer tracing service failed to configure");
ROCPROFILER_CALL(
rocprofiler_configure_buffer_tracing_service(cb_data->client_ctx,
ROCPROFILER_BUFFER_TRACING_MARKER_API,
operations.data(),
operations.size(),
ROCPROFILER_BUFFER_TRACING_MARKER_NAME_API,
nullptr,
0,
cb_data->client_buffer),
"buffer tracing service failed to configure");
+11 -3
查看文件
@@ -31,8 +31,16 @@ set(tracing-env
"ROCPROFILER_LOG_LEVEL=${LOG_LEVEL}"
"HSA_TOOLS_LIB=$<TARGET_FILE:rocprofiler::rocprofiler-shared-library>")
set_tests_properties(rocprofv3-test-trace-execute PROPERTIES LABELS "integration-tests"
ENVIRONMENT "${tracing-env}")
set_tests_properties(
rocprofv3-test-trace-execute
PROPERTIES
LABELS
"integration-tests"
ENVIRONMENT
"${tracing-env}"
FAIL_REGULAR_EXPRESSION
"HSA_API|HIP_API|HIP_COMPILER_API|MARKER_CORE_API|MARKER_CONTROL_API|MARKER_NAME_API|KERNEL_DISPATCH|CODE_OBJECT"
)
foreach(FILENAME validate.py conftest.py)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME}
@@ -66,6 +74,6 @@ set_tests_properties(
DEPENDS
rocprofv3-test-trace-execute
FAIL_REGULAR_EXPRESSION
"AssertionError|HSA_API|HIP_API|MARKER_API|KERNEL_DISPATCH|CODE_OBJECT"
"AssertionError"
ATTACHED_FILES_ON_FAIL
"${VALIDATION_FILES}")
+7 -1
查看文件
@@ -51,6 +51,8 @@ def test_memory_copy_trace(memory_copy_input_data):
for row in memory_copy_input_data:
assert row["Kind"] == "MEMORY_COPY"
assert len(memory_copy_input_data) == 2
row = memory_copy_input_data[0]
assert row["Direction"] == "HOST_TO_DEVICE"
assert int(row["Source_Agent_Id"]) == 0
@@ -69,7 +71,11 @@ def test_memory_copy_trace(memory_copy_input_data):
def test_marker_api_trace(marker_input_data):
functions = []
for row in marker_input_data:
assert row["Domain"] == "MARKER_API"
assert row["Domain"] in [
"MARKER_CORE_API",
"MARKER_CONTROL_API",
"MARKER_NAME_API",
]
assert int(row["Process_Id"]) > 0
assert int(row["Thread_Id"]) == 0 or int(row["Thread_Id"]) >= int(
row["Process_Id"]
+54 -8
查看文件
@@ -130,8 +130,12 @@ get_callback_tracing_names()
{
static const auto supported = std::unordered_set<rocprofiler_callback_tracing_kind_t>{
ROCPROFILER_CALLBACK_TRACING_HSA_API,
ROCPROFILER_CALLBACK_TRACING_MARKER_API,
ROCPROFILER_CALLBACK_TRACING_HIP_API};
ROCPROFILER_CALLBACK_TRACING_HIP_API,
ROCPROFILER_CALLBACK_TRACING_HIP_COMPILER_API,
ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API,
ROCPROFILER_CALLBACK_TRACING_MARKER_CONTROL_API,
ROCPROFILER_CALLBACK_TRACING_MARKER_NAME_API,
};
auto cb_name_info = callback_name_info{};
//
@@ -184,8 +188,12 @@ get_buffer_tracing_names()
{
static const auto supported = std::unordered_set<rocprofiler_buffer_tracing_kind_t>{
ROCPROFILER_BUFFER_TRACING_HSA_API,
ROCPROFILER_BUFFER_TRACING_MARKER_API,
ROCPROFILER_BUFFER_TRACING_MEMORY_COPY};
ROCPROFILER_BUFFER_TRACING_HIP_API,
ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API,
ROCPROFILER_BUFFER_TRACING_MEMORY_COPY,
ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API,
ROCPROFILER_BUFFER_TRACING_MARKER_CONTROL_API,
ROCPROFILER_BUFFER_TRACING_MARKER_NAME_API};
auto cb_name_info = buffer_name_info{};
//
@@ -402,7 +410,9 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record,
hip_api_cb_records.emplace_back(
hip_api_callback_record_t{ts, record, *data, std::move(args)});
}
else if(record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_API)
else if(record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API ||
record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_NAME_API ||
record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_CONTROL_API)
{
auto* data = static_cast<rocprofiler_callback_tracing_marker_api_data_t*>(record.payload);
marker_api_cb_records.emplace_back(marker_api_callback_record_t{ts, record, *data});
@@ -467,7 +477,9 @@ tool_tracing_buffered(rocprofiler_context_id_t /*context*/,
hsa_api_bf_records.emplace_back(*record);
}
else if(header->kind == ROCPROFILER_BUFFER_TRACING_MARKER_API)
else if(header->kind == ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API ||
header->kind == ROCPROFILER_BUFFER_TRACING_MARKER_NAME_API ||
header->kind == ROCPROFILER_BUFFER_TRACING_MARKER_CONTROL_API)
{
auto* record =
static_cast<rocprofiler_buffer_tracing_marker_api_record_t*>(header->payload);
@@ -642,7 +654,25 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
ROCPROFILER_CALL(
rocprofiler_configure_callback_tracing_service(marker_api_callback_ctx,
ROCPROFILER_CALLBACK_TRACING_MARKER_API,
ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API,
nullptr,
0,
tool_tracing_callback,
nullptr),
"hsa api tracing service configure");
ROCPROFILER_CALL(rocprofiler_configure_callback_tracing_service(
marker_api_callback_ctx,
ROCPROFILER_CALLBACK_TRACING_MARKER_CONTROL_API,
nullptr,
0,
tool_tracing_callback,
nullptr),
"hsa api tracing service configure");
ROCPROFILER_CALL(
rocprofiler_configure_callback_tracing_service(marker_api_callback_ctx,
ROCPROFILER_CALLBACK_TRACING_MARKER_NAME_API,
nullptr,
0,
tool_tracing_callback,
@@ -715,7 +745,23 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
ROCPROFILER_CALL(
rocprofiler_configure_buffer_tracing_service(marker_api_buffered_ctx,
ROCPROFILER_BUFFER_TRACING_MARKER_API,
ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API,
nullptr,
0,
marker_api_buffered_buffer),
"buffer tracing service configure");
ROCPROFILER_CALL(
rocprofiler_configure_buffer_tracing_service(marker_api_buffered_ctx,
ROCPROFILER_BUFFER_TRACING_MARKER_CONTROL_API,
nullptr,
0,
marker_api_buffered_buffer),
"buffer tracing service configure");
ROCPROFILER_CALL(
rocprofiler_configure_buffer_tracing_service(marker_api_buffered_ctx,
ROCPROFILER_BUFFER_TRACING_MARKER_NAME_API,
nullptr,
0,
marker_api_buffered_buffer),