PC sampling bringup
Change-Id: I0d041c4c8c3778f2c328cde38432bc72223706a3
pc sampling integration fix
Change-Id: Ia66ff876d2d99ec4d561daf8320b65d75f5cd2fe
[ROCm/rocprofiler commit: 9df9fddcfb]
This commit is contained in:
@@ -76,8 +76,9 @@ if (NOT USE_PROF_API)
|
||||
endif()
|
||||
|
||||
# Protocol header lookup
|
||||
set(PROF_API_HEADER_NAME prof_protocol.h)
|
||||
if(USE_PROF_API EQUAL 1)
|
||||
find_path(PROF_API_HEADER_DIR prof_protocol.h
|
||||
find_path(PROF_API_HEADER_DIR ${PROF_API_HEADER_NAME}
|
||||
HINTS
|
||||
${PROF_API_HEADER_PATH}
|
||||
PATHS
|
||||
@@ -86,11 +87,11 @@ if(USE_PROF_API EQUAL 1)
|
||||
include/ext
|
||||
)
|
||||
if(NOT PROF_API_HEADER_DIR)
|
||||
MESSAGE(ERROR "Profiling API header not found. Tracer integration disabled. Use -DPROF_API_HEADER_PATH=<path to prof_protocol.h header>")
|
||||
MESSAGE(ERROR ": Profiling API header not found. Tracer integration disabled. Use -DPROF_API_HEADER_PATH=<path to ${PROF_API_HEADER_NAME} header>")
|
||||
else()
|
||||
add_definitions(-DUSE_PROF_API=1)
|
||||
include_directories(${PROF_API_HEADER_DIR})
|
||||
MESSAGE(STATUS "Profiling API: ${PROF_API_HEADER_DIR}")
|
||||
MESSAGE(STATUS "Profiling API: ${PROF_API_HEADER_DIR}/${PROF_API_HEADER_NAME}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
||||
Executable
@@ -0,0 +1,9 @@
|
||||
#!/bin/sh -x
|
||||
BIN_DIR=`dirname $0`
|
||||
BLD_DIR=$BIN_DIR/build
|
||||
|
||||
export CMAKE_PREFIX_PATH=/opt/rocm/include/hsa:/opt/rocm
|
||||
rm -rf $BLD_DIR && mkdir $BLD_DIR && cd $BLD_DIR && cmake ..
|
||||
make -j
|
||||
make mytest
|
||||
./run.sh
|
||||
@@ -89,7 +89,9 @@ hsa_status_t rocprofiler_error_string(
|
||||
// Profiling feature kind
|
||||
typedef enum {
|
||||
ROCPROFILER_FEATURE_KIND_METRIC = 0,
|
||||
ROCPROFILER_FEATURE_KIND_TRACE = 1
|
||||
ROCPROFILER_FEATURE_KIND_TRACE = 1,
|
||||
ROCPROFILER_FEATURE_KIND_SPM_MOD = 2,
|
||||
ROCPROFILER_FEATURE_KIND_PCSMP_MOD = 4
|
||||
} rocprofiler_feature_kind_t;
|
||||
|
||||
// Profiling feture parameter
|
||||
@@ -201,6 +203,10 @@ hsa_status_t rocprofiler_close(rocprofiler_t* context); // [in] profiling conte
|
||||
hsa_status_t rocprofiler_reset(rocprofiler_t* context, // [in] profiling context
|
||||
uint32_t group_index); // group index
|
||||
|
||||
// Return context agent
|
||||
hsa_status_t rocprofiler_get_agent(rocprofiler_t* context, // [in] profiling context
|
||||
hsa_agent_t* agent); // [out] GPU handle
|
||||
|
||||
// Supported time value ID
|
||||
typedef enum {
|
||||
ROCPROFILER_TIME_ID_CLOCK_REALTIME = 0, // Linux realtime clock time
|
||||
|
||||
@@ -30,8 +30,9 @@ set ( LIB_SRC
|
||||
${LIB_DIR}/core/simple_proxy_queue.cpp
|
||||
${LIB_DIR}/core/intercept_queue.cpp
|
||||
${LIB_DIR}/core/metrics.cpp
|
||||
${LIB_DIR}/core/activity.cpp
|
||||
${LIB_DIR}/util/hsa_rsrc_factory.cpp
|
||||
)
|
||||
add_library ( ${TARGET_LIB} SHARED ${LIB_SRC} )
|
||||
target_include_directories ( ${TARGET_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} )
|
||||
target_include_directories ( ${TARGET_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} ${HSA_KMT_LIB_PATH}/.. )
|
||||
target_link_libraries( ${TARGET_LIB} PRIVATE ${HSA_RUNTIME_LIB} c stdc++)
|
||||
|
||||
@@ -0,0 +1,171 @@
|
||||
/******************************************************************************
|
||||
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*******************************************************************************/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <string>
|
||||
|
||||
// Tracer messages protocol
|
||||
#include <prof_protocol.h>
|
||||
|
||||
#include "core/context.h"
|
||||
#include "inc/rocprofiler.h"
|
||||
#include "util/hsa_rsrc_factory.h"
|
||||
|
||||
#define PUBLIC_API __attribute__((visibility("default")))
|
||||
|
||||
// Error handler
|
||||
void fatal(const std::string msg) {
|
||||
fflush(stdout);
|
||||
fprintf(stderr, "%s\n\n", msg.c_str());
|
||||
fflush(stderr);
|
||||
abort();
|
||||
}
|
||||
|
||||
// Check returned HSA API status
|
||||
void check_status(hsa_status_t status) {
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
const char* error_string = NULL;
|
||||
rocprofiler_error_string(&error_string);
|
||||
fprintf(stderr, "ERROR: %s\n", error_string);
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
// Activity primitives
|
||||
namespace activity_prim {
|
||||
// PC sampling callback data
|
||||
struct pcsmp_callback_data_t {
|
||||
const char* kernel_name; // sampled kernel name
|
||||
void* data_buffer; // host buffer for tracing data
|
||||
uint64_t id; // sample id
|
||||
uint64_t cycle; // sample cycle
|
||||
uint64_t pc; // sample PC
|
||||
};
|
||||
|
||||
uint32_t activity_op = UINT32_MAX;
|
||||
void* activity_arg = NULL;
|
||||
std::atomic<activity_async_callback_t> activity_callback{NULL};
|
||||
rocprofiler_t* context = NULL;
|
||||
|
||||
hsa_status_t trace_data_cb(hsa_ven_amd_aqlprofile_info_type_t info_type,
|
||||
hsa_ven_amd_aqlprofile_info_data_t* info_data,
|
||||
void* data) {
|
||||
const pcsmp_callback_data_t* pcsmp_data = (pcsmp_callback_data_t*) data;
|
||||
|
||||
activity_record_t record{};
|
||||
record.op = activity_op;
|
||||
record.pc_sample.se = pcsmp_data->id;
|
||||
record.pc_sample.cycle = pcsmp_data->cycle;
|
||||
record.pc_sample.pc = pcsmp_data->pc;
|
||||
activity_async_callback_t fun = activity_callback.load(std::memory_order_acquire);
|
||||
if (fun) {
|
||||
(fun)(activity_op, &record, activity_arg);
|
||||
} else {
|
||||
free((void*)(pcsmp_data->kernel_name));
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
bool context_handler(rocprofiler_group_t group, void* arg) {
|
||||
hsa_agent_t agent{};
|
||||
hsa_status_t status = rocprofiler_get_agent(group.context, &agent);
|
||||
check_status(status);
|
||||
const rocprofiler::util::AgentInfo* agent_info = rocprofiler::util::HsaRsrcFactory::Instance().GetAgentInfo(agent);
|
||||
|
||||
pcsmp_callback_data_t pcsmp_data{};
|
||||
pcsmp_data.kernel_name = (const char*)arg;
|
||||
pcsmp_data.data_buffer = rocprofiler::util::HsaRsrcFactory::Instance().AllocateSysMemory(agent_info, rocprofiler::TraceProfile::GetSize());
|
||||
status = rocprofiler_iterate_trace_data(group.context, trace_data_cb, &pcsmp_data);
|
||||
check_status(status);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Kernel disoatch callback
|
||||
hsa_status_t dispatch_callback(const rocprofiler_callback_data_t* callback_data, void* user_data,
|
||||
rocprofiler_group_t* group) {
|
||||
// context features
|
||||
const rocprofiler_feature_kind_t trace_kind =
|
||||
(rocprofiler_feature_kind_t)(ROCPROFILER_FEATURE_KIND_TRACE | ROCPROFILER_FEATURE_KIND_PCSMP_MOD);
|
||||
const uint32_t feature_count = 1;
|
||||
const uint32_t parameter_count = 1;
|
||||
rocprofiler_feature_t* features = new rocprofiler_feature_t[feature_count];
|
||||
memset(features, 0, feature_count * sizeof(rocprofiler_feature_t));
|
||||
rocprofiler_parameter_t* parameters = new rocprofiler_parameter_t[parameter_count];
|
||||
memset(features, 0, parameter_count * sizeof(rocprofiler_parameter_t));
|
||||
parameters[0].parameter_name = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET;
|
||||
parameters[0].value = 0;
|
||||
|
||||
features[0].kind = trace_kind;
|
||||
features[0].parameters = parameters;
|
||||
features[0].parameter_count = parameter_count;
|
||||
|
||||
// context properties
|
||||
rocprofiler_properties_t properties{};
|
||||
properties.handler = context_handler;
|
||||
properties.handler_arg = (void*)strdup(callback_data->kernel_name);
|
||||
|
||||
// Open profiling context
|
||||
hsa_status_t status = rocprofiler_open(callback_data->agent, features, feature_count,
|
||||
&context, 0 /*ROCPROFILER_MODE_SINGLEGROUP*/, &properties);
|
||||
check_status(status);
|
||||
|
||||
// Get group[0]
|
||||
status = rocprofiler_get_group(context, 0, group);
|
||||
check_status(status);
|
||||
|
||||
return status;
|
||||
}
|
||||
} // namespace activity_prim
|
||||
|
||||
extern "C" {
|
||||
PUBLIC_API const char* GetOpName(uint32_t op) { return strdup("PCSAMPLE"); }
|
||||
|
||||
PUBLIC_API bool RegisterApiCallback(uint32_t op, void* callback, void* arg) { return true; }
|
||||
|
||||
PUBLIC_API bool RemoveApiCallback(uint32_t op) { return true; }
|
||||
|
||||
PUBLIC_API bool InitActivityCallback(void* callback, void* arg) {
|
||||
activity_prim::activity_arg = arg;
|
||||
activity_prim::activity_callback.store((activity_async_callback_t)callback, std::memory_order_release);
|
||||
|
||||
rocprofiler_queue_callbacks_t queue_callbacks{};
|
||||
queue_callbacks.dispatch = activity_prim::dispatch_callback;
|
||||
rocprofiler_set_queue_callbacks(queue_callbacks, NULL);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
PUBLIC_API bool EnableActivityCallback(uint32_t op, bool enable) {
|
||||
if (enable) {
|
||||
activity_prim::activity_op = op;
|
||||
rocprofiler_start_queue_callbacks();
|
||||
} else {
|
||||
rocprofiler_stop_queue_callbacks();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
} // extern "C"
|
||||
@@ -276,6 +276,7 @@ class Context {
|
||||
profile_vector_t profile_vector;
|
||||
set_[0].GetTraceProfiles(profile_vector);
|
||||
for (auto& tuple : profile_vector) {
|
||||
if (pcsmp_mode_) const_cast<profile_t*>(tuple.profile)->event_count = UINT32_MAX;
|
||||
const hsa_status_t status =
|
||||
api_->hsa_ven_amd_aqlprofile_iterate_data(tuple.profile, callback, data);
|
||||
if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "context iterate data failed");
|
||||
@@ -293,6 +294,7 @@ class Context {
|
||||
return false;
|
||||
}
|
||||
|
||||
hsa_agent_t GetAgent() const { return agent_; }
|
||||
Group* GetGroup(const uint32_t& index) { return &set_[index]; }
|
||||
rocprofiler_handler_t GetHandler(void** arg) const { *arg = handler_arg_; return handler_; }
|
||||
|
||||
@@ -306,7 +308,8 @@ class Context {
|
||||
api_(hsa_rsrc_->AqlProfileApi()),
|
||||
metrics_(NULL),
|
||||
handler_(handler),
|
||||
handler_arg_(handler_arg)
|
||||
handler_arg_(handler_arg),
|
||||
pcsmp_mode_(false)
|
||||
{}
|
||||
|
||||
~Context() { Destruct(); }
|
||||
@@ -434,10 +437,13 @@ class Context {
|
||||
const uint32_t group_index = block_status.group_index;
|
||||
set_[group_index].Insert(profile_info_t{event, NULL, 0, info});
|
||||
}
|
||||
} else if (kind == ROCPROFILER_FEATURE_KIND_TRACE) { // Processing traces features
|
||||
if (info->parameters != NULL) {
|
||||
set_[0].Insert(profile_info_t{NULL, info->parameters, info->parameter_count, info});
|
||||
} else {
|
||||
} else if (kind & ROCPROFILER_FEATURE_KIND_TRACE) { // Processing traces features
|
||||
info->kind = ROCPROFILER_FEATURE_KIND_TRACE;
|
||||
|
||||
const event_t* event = NULL;
|
||||
if (kind & ROCPROFILER_FEATURE_KIND_PCSMP_MOD) { // PC sampling
|
||||
pcsmp_mode_ = true;
|
||||
} else if (kind & ROCPROFILER_FEATURE_KIND_SPM_MOD) { // SPM trace
|
||||
const Metric* metric = metrics_->Get(name);
|
||||
if (metric == NULL)
|
||||
EXC_RAISING(HSA_STATUS_ERROR, "input metric '" << name << "' is not found");
|
||||
@@ -445,9 +451,9 @@ class Context {
|
||||
if (counters_vec.size() != 1)
|
||||
EXC_RAISING(HSA_STATUS_ERROR, "trace bad metric '" << name << "' is not base counter");
|
||||
const counter_t* counter = counters_vec[0];
|
||||
const event_t* event = &(counter->event);
|
||||
set_[0].Insert(profile_info_t{event, NULL, 0, info});
|
||||
event = &(counter->event);
|
||||
}
|
||||
set_[0].Insert(profile_info_t{event, info->parameters, info->parameter_count, info});
|
||||
} else {
|
||||
EXC_RAISING(HSA_STATUS_ERROR, "bad rocprofiler feature kind (" << kind << ")");
|
||||
}
|
||||
@@ -584,6 +590,9 @@ class Context {
|
||||
// Context completion handler
|
||||
rocprofiler_handler_t handler_;
|
||||
void* handler_arg_;
|
||||
|
||||
// PC sampling mode
|
||||
bool pcsmp_mode_;
|
||||
};
|
||||
|
||||
} // namespace rocprofiler
|
||||
|
||||
@@ -130,24 +130,35 @@ class InterceptQueue {
|
||||
Queue* proxy = obj->proxy_;
|
||||
|
||||
if (submit_callback_fun_) {
|
||||
for (uint64_t j = 0; j < count; ++j) {
|
||||
const packet_t* packet = &packets_arr[j];
|
||||
const hsa_kernel_dispatch_packet_t* dispatch_packet =
|
||||
reinterpret_cast<const hsa_kernel_dispatch_packet_t*>(packet);
|
||||
mutex_.lock();
|
||||
auto* callback_fun = submit_callback_fun_;
|
||||
void* callback_arg = submit_callback_arg_;
|
||||
mutex_.unlock();
|
||||
|
||||
uint64_t kernel_object = dispatch_packet->kernel_object;
|
||||
const amd_kernel_code_t* kernel_code = GetKernelCode(kernel_object);
|
||||
const char* kernel_name = (GetHeaderType(packet) == HSA_PACKET_TYPE_KERNEL_DISPATCH) ?
|
||||
QueryKernelName(kernel_object, kernel_code) : NULL;
|
||||
if (callback_fun) {
|
||||
for (uint64_t j = 0; j < count; ++j) {
|
||||
const packet_t* packet = &packets_arr[j];
|
||||
const hsa_kernel_dispatch_packet_t* dispatch_packet =
|
||||
reinterpret_cast<const hsa_kernel_dispatch_packet_t*>(packet);
|
||||
|
||||
// Prepareing submit callback data
|
||||
rocprofiler_hsa_callback_data_t data{};
|
||||
data.submit.packet = (void*)packet;
|
||||
data.submit.kernel_name = kernel_name;
|
||||
data.submit.queue = obj->queue_;
|
||||
data.submit.device_type = obj->agent_info_->dev_type;
|
||||
data.submit.device_id = obj->agent_info_->dev_index;
|
||||
submit_callback_fun_(ROCPROFILER_HSA_CB_ID_SUBMIT, &data, submit_callback_arg_);
|
||||
if (GetHeaderType(packet) == HSA_PACKET_TYPE_KERNEL_DISPATCH) {
|
||||
uint64_t kernel_object = dispatch_packet->kernel_object;
|
||||
const amd_kernel_code_t* kernel_code = GetKernelCode(kernel_object);
|
||||
const char* kernel_name = (GetHeaderType(packet) == HSA_PACKET_TYPE_KERNEL_DISPATCH) ?
|
||||
QueryKernelName(kernel_object, kernel_code) : NULL;
|
||||
|
||||
// Prepareing submit callback data
|
||||
rocprofiler_hsa_callback_data_t data{};
|
||||
data.submit.packet = (void*)packet;
|
||||
data.submit.kernel_name = kernel_name;
|
||||
data.submit.queue = obj->queue_;
|
||||
data.submit.device_type = obj->agent_info_->dev_type;
|
||||
data.submit.device_id = obj->agent_info_->dev_index;
|
||||
callback_fun(ROCPROFILER_HSA_CB_ID_SUBMIT, &data, callback_arg);
|
||||
} else {
|
||||
callback_fun(ROCPROFILER_HSA_CB_ID_SUBMIT, NULL, callback_arg);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -25,6 +25,8 @@ THE SOFTWARE.
|
||||
#include <hsa.h>
|
||||
#include <hsa_api_trace.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
|
||||
#include "core/context.h"
|
||||
@@ -169,7 +171,9 @@ enum {
|
||||
uint32_t LoadTool() {
|
||||
uint32_t intercept_mode = 0;
|
||||
const char* tool_lib = getenv("ROCP_TOOL_LIB");
|
||||
ONLOAD_TRACE("load tool library(" << tool_lib << ")");
|
||||
std::ostringstream oss;
|
||||
if (tool_lib) oss << "load tool library(" << tool_lib << ")";
|
||||
ONLOAD_TRACE(oss.str());
|
||||
|
||||
if (tool_lib) {
|
||||
intercept_mode = DISPATCH_INTERCEPT_MODE;
|
||||
@@ -555,6 +559,14 @@ PUBLIC_API hsa_status_t rocprofiler_reset(rocprofiler_t* handle, uint32_t group_
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
// Return context agent
|
||||
PUBLIC_API hsa_status_t rocprofiler_get_agent(rocprofiler_t* handle, hsa_agent_t* agent) {
|
||||
API_METHOD_PREFIX
|
||||
rocprofiler::Context* context = reinterpret_cast<rocprofiler::Context*>(handle);
|
||||
*agent = context->GetAgent();
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
// Get profiling group count
|
||||
PUBLIC_API hsa_status_t rocprofiler_group_count(const rocprofiler_t* handle,
|
||||
uint32_t* group_count) {
|
||||
|
||||
@@ -139,6 +139,7 @@ bool is_trace_local = true;
|
||||
// SPM trace enabled
|
||||
bool is_spm_trace = false;
|
||||
|
||||
static inline uint32_t GetPid() { return syscall(__NR_getpid); }
|
||||
static inline uint32_t GetTid() { return syscall(__NR_gettid); }
|
||||
|
||||
// Error handler
|
||||
@@ -909,7 +910,7 @@ extern "C" PUBLIC_API void OnLoadToolProp(rocprofiler_settings_t* settings)
|
||||
}
|
||||
if (rcfile != NULL) {
|
||||
// Getting defaults
|
||||
printf("ROCProfiler: rc-file '%s'\n", rcpath.c_str());
|
||||
printf("ROCProfiler pid(%u): rc-file '%s'\n", GetPid(), rcpath.c_str());
|
||||
auto defaults_list = rcfile->GetNodes("top.defaults");
|
||||
for (auto* entry : defaults_list) {
|
||||
const auto& opts = entry->opts;
|
||||
|
||||
مرجع در شماره جدید
Block a user