Adding Async output for API Tracing

If rocprofiler_set_api_trace_sync_callback is not called by the API client then it will be automatically saved in the buffer and it will be in async to the user to see the API trace data, sample for async api tracing is added to samples/tracer

Change-Id: I01266a12b0eec172fdcffd7f04c89c2fe96174bb


[ROCm/rocprofiler commit: 29b25a3ef5]
This commit is contained in:
Ammar ELWazir
2023-04-20 13:29:02 +00:00
committed by Ammar Elwazir
parent 03a0e9bec0
commit a2dcf8f84e
7 changed files with 116 additions and 8 deletions
@@ -117,6 +117,15 @@ target_link_options(tracer_hip_hsa PRIVATE "-Wl,--build-id=md5")
add_dependencies(samples tracer_hip_hsa)
install(TARGETS tracer_hip_hsa RUNTIME DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/samples COMPONENT samples)
## Build HIP/HSA Trace with async output api trace data Sample
set_source_files_properties(tracer/sample_async.cpp PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
hip_add_executable(tracer_hip_hsa_async tracer/sample_async.cpp ${ROCPROFILER_UTIL_SRC_FILES})
target_include_directories(tracer_hip_hsa_async PRIVATE ${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/inc ${CMAKE_CURRENT_SOURCE_DIR}/common)
target_link_libraries(tracer_hip_hsa_async PRIVATE ${ROCPROFILER_TARGET} amd_comgr)
target_link_options(tracer_hip_hsa_async PRIVATE "-Wl,--build-id=md5")
add_dependencies(samples tracer_hip_hsa_async)
install(TARGETS tracer_hip_hsa_async RUNTIME DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/samples COMPONENT samples)
# ############################################################################################################################################
# PC Sampling Samples
# ############################################################################################################################################
@@ -0,0 +1,77 @@
#include "../common/common.h"
int main(int argc, char** argv) {
int* gpuMem;
prepare();
// Initialize the tools
CHECK_ROCPROFILER(rocprofiler_initialize());
// Creating the session with given replay mode
rocprofiler_session_id_t session_id;
CHECK_ROCPROFILER(rocprofiler_create_session(ROCPROFILER_KERNEL_REPLAY_MODE, &session_id));
// Creating Output Buffer for the data
rocprofiler_buffer_id_t buffer_id;
CHECK_ROCPROFILER(rocprofiler_create_buffer(
session_id,
[](const rocprofiler_record_header_t* record, const rocprofiler_record_header_t* end_record,
rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id) {
WriteBufferRecords(record, end_record, session_id, buffer_id);
},
0x9999, &buffer_id));
// Tracing Filter
std::vector<rocprofiler_tracer_activity_domain_t> apis_requested;
apis_requested.emplace_back(ACTIVITY_DOMAIN_HIP_API);
apis_requested.emplace_back(ACTIVITY_DOMAIN_HIP_OPS);
apis_requested.emplace_back(ACTIVITY_DOMAIN_HSA_API);
apis_requested.emplace_back(ACTIVITY_DOMAIN_HSA_OPS);
apis_requested.emplace_back(ACTIVITY_DOMAIN_ROCTX);
rocprofiler_filter_id_t api_tracing_filter_id;
CHECK_ROCPROFILER(rocprofiler_create_filter(
session_id, ROCPROFILER_API_TRACE, rocprofiler_filter_data_t{&apis_requested[0]},
apis_requested.size(), &api_tracing_filter_id, rocprofiler_filter_property_t{}));
CHECK_ROCPROFILER(rocprofiler_set_filter_buffer(session_id, api_tracing_filter_id, buffer_id));
// Kernel Tracing
rocprofiler_filter_id_t kernel_tracing_filter_id;
CHECK_ROCPROFILER(rocprofiler_create_filter(session_id, ROCPROFILER_DISPATCH_TIMESTAMPS_COLLECTION,
rocprofiler_filter_data_t{}, 0, &kernel_tracing_filter_id,
rocprofiler_filter_property_t{}));
CHECK_ROCPROFILER(rocprofiler_set_filter_buffer(session_id, kernel_tracing_filter_id, buffer_id));
// Normal HIP Calls won't be traced
hipDeviceProp_t devProp;
HIP_CALL(hipGetDeviceProperties(&devProp, 0));
HIP_CALL(hipMalloc((void**)&gpuMem, 1 * sizeof(int)));
// KernelA and KernelB won't be traced
kernelCalls('A');
kernelCalls('B');
// Activating Profiling Session to profile whatever kernel launches occurs up
// till the next terminate session
CHECK_ROCPROFILER(rocprofiler_start_session(session_id));
// KernelC, KernelD, KernelE and KernelF to be traced as part of the session
kernelCalls('C');
kernelCalls('D');
kernelCalls('E');
kernelCalls('F');
// Normal HIP Calls that will be traced
HIP_CALL(hipFree(gpuMem));
// Deactivating session
CHECK_ROCPROFILER(rocprofiler_terminate_session(session_id));
// Manual Flush user buffer request
CHECK_ROCPROFILER(rocprofiler_flush_data(session_id, buffer_id));
// Destroy sessions
CHECK_ROCPROFILER(rocprofiler_destroy_session(session_id));
// Destroy all profiling related objects(User buffer, sessions, filters,
// etc..)
CHECK_ROCPROFILER(rocprofiler_finalize());
return 0;
}
@@ -233,9 +233,16 @@ std::variant<std::vector<std::string>, uint32_t*> Filter::GetProperty(
return property;
}
void Filter::SetCallback(rocprofiler_sync_callback_t& callback) { callback_ = callback; }
void Filter::SetCallback(rocprofiler_sync_callback_t& callback) {
callback_ = callback;
has_sync_callback_ = true;
}
rocprofiler_sync_callback_t& Filter::GetCallback() { return callback_; }
bool Filter::HasCallback() { return has_sync_callback_; }
rocprofiler_sync_callback_t& Filter::GetCallback() {
return callback_;
}
size_t Filter::GetPropertiesCount(rocprofiler_filter_property_kind_t kind) {
switch (kind) {
@@ -50,6 +50,7 @@ class Filter {
std::vector<rocprofiler_att_parameter_t> GetAttParametersData();
void SetCallback(rocprofiler_sync_callback_t& callback);
rocprofiler_sync_callback_t& GetCallback();
bool HasCallback();
void SetProperty(rocprofiler_filter_property_t property);
std::variant<std::vector<std::string>, uint32_t*> GetProperty(
@@ -75,6 +76,7 @@ class Filter {
std::vector<rocprofiler_att_parameter_t> att_parameters_; // ATT Parameters
rocprofiler_counters_sampler_parameters_t counters_sampler_parameters_; // sampled counters parameters
bool has_sync_callback_{false};
rocprofiler_sync_callback_t callback_;
};
@@ -131,7 +131,9 @@ void Session::Start() {
GetFilter(GetFilterIdWithKind(ROCPROFILER_API_TRACE))->GetTraceData();
if (!tracer_started_.load(std::memory_order_release)) {
tracer_ = new tracer::Tracer(
session_id_, GetFilter(GetFilterIdWithKind(ROCPROFILER_API_TRACE))->GetCallback(),
session_id_, (GetFilter(
GetFilterIdWithKind(ROCPROFILER_API_TRACE))->HasCallback() ? GetFilter(
GetFilterIdWithKind(ROCPROFILER_API_TRACE))->GetCallback() : nullptr),
GetFilter(GetFilterIdWithKind(ROCPROFILER_API_TRACE))->GetBufferId(), domains);
tracer_started_.exchange(true, std::memory_order_release);
}
@@ -39,9 +39,7 @@ Tracer::Tracer(rocprofiler_session_id_t session_id, rocprofiler_sync_callback_t
: domains_(domains), callback_(callback), buffer_id_(buffer_id), session_id_(session_id) {
assert(!is_active_.load(std::memory_order_release) && "Error: The tracer was initialized!");
std::lock_guard<std::mutex> lock(tracer_lock_);
callback_data_ = api_callback_data_t{callback, session_id};
is_active_.exchange(true, std::memory_order_release);
}
@@ -406,12 +404,20 @@ void Tracer::InitRoctracer(
switch (domain.first) {
case ACTIVITY_DOMAIN_ROCTX: {
assert(!domain.second && "Error: ROCTX API can't be filtered!");
roctracer_enable_domain_callback(ACTIVITY_DOMAIN_ROCTX, api_callback, &callback_data_);
if(callback_data_.user_sync_callback)
roctracer_enable_domain_callback(ACTIVITY_DOMAIN_ROCTX, api_callback, &callback_data_);
else
roctracer_enable_domain_activity(ACTIVITY_DOMAIN_ROCTX,
session_buffer_id_t{session_id_, buffer_id_});
break;
}
case ACTIVITY_DOMAIN_HSA_API: {
if (!domain.second) {
roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HSA_API, api_callback, &callback_data_);
if(callback_data_.user_sync_callback)
roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HSA_API, api_callback, &callback_data_);
else
roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HSA_API,
session_buffer_id_t{session_id_, buffer_id_});
} else {
assert(!api_filter_data_vector.empty() &&
"Error: HSA API calls filter data is empty and domain "
@@ -421,7 +427,11 @@ void Tracer::InitRoctracer(
}
case ACTIVITY_DOMAIN_HIP_API: {
if (!domain.second) {
roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API, api_callback, &callback_data_);
if(callback_data_.user_sync_callback)
roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API, api_callback, &callback_data_);
else
roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_API,
session_buffer_id_t{session_id_, buffer_id_});
} else {
assert(!api_filter_data_vector.empty() &&
"Error: HIP API calls filter data is empty and domain "
@@ -91,6 +91,7 @@ class Tracer {
roctx_report_activity_;
std::vector<rocprofiler_tracer_activity_domain_t> domains_;
bool is_sync_;
rocprofiler_sync_callback_t callback_;
rocprofiler_buffer_id_t buffer_id_;
rocprofiler_session_id_t session_id_;