From 67e1502ce5c02a962e8e5271af46b255da4fde26 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Thu, 23 Jan 2020 20:56:42 -0600 Subject: [PATCH] PC sampling bringup Change-Id: I0d041c4c8c3778f2c328cde38432bc72223706a3 pc sampling integration fix Change-Id: Ia66ff876d2d99ec4d561daf8320b65d75f5cd2fe [ROCm/rocprofiler commit: 9df9fddcfb7e5798ce0d2f1dc465dfc30e6cf31e] --- projects/rocprofiler/CMakeLists.txt | 7 +- projects/rocprofiler/build.sh | 9 + projects/rocprofiler/inc/rocprofiler.h | 8 +- projects/rocprofiler/src/CMakeLists.txt | 3 +- projects/rocprofiler/src/core/activity.cpp | 171 ++++++++++++++++++ projects/rocprofiler/src/core/context.h | 23 ++- .../rocprofiler/src/core/intercept_queue.h | 43 +++-- projects/rocprofiler/src/core/rocprofiler.cpp | 14 +- projects/rocprofiler/test/tool/tool.cpp | 3 +- 9 files changed, 251 insertions(+), 30 deletions(-) create mode 100755 projects/rocprofiler/build.sh create mode 100644 projects/rocprofiler/src/core/activity.cpp diff --git a/projects/rocprofiler/CMakeLists.txt b/projects/rocprofiler/CMakeLists.txt index 5222c836d5..cca655222c 100644 --- a/projects/rocprofiler/CMakeLists.txt +++ b/projects/rocprofiler/CMakeLists.txt @@ -76,8 +76,9 @@ if (NOT USE_PROF_API) endif() # Protocol header lookup +set(PROF_API_HEADER_NAME prof_protocol.h) if(USE_PROF_API EQUAL 1) - find_path(PROF_API_HEADER_DIR prof_protocol.h + find_path(PROF_API_HEADER_DIR ${PROF_API_HEADER_NAME} HINTS ${PROF_API_HEADER_PATH} PATHS @@ -86,11 +87,11 @@ if(USE_PROF_API EQUAL 1) include/ext ) if(NOT PROF_API_HEADER_DIR) - MESSAGE(ERROR "Profiling API header not found. Tracer integration disabled. Use -DPROF_API_HEADER_PATH=") + MESSAGE(ERROR ": Profiling API header not found. Tracer integration disabled. Use -DPROF_API_HEADER_PATH=") else() add_definitions(-DUSE_PROF_API=1) include_directories(${PROF_API_HEADER_DIR}) - MESSAGE(STATUS "Profiling API: ${PROF_API_HEADER_DIR}") + MESSAGE(STATUS "Profiling API: ${PROF_API_HEADER_DIR}/${PROF_API_HEADER_NAME}") endif() endif() diff --git a/projects/rocprofiler/build.sh b/projects/rocprofiler/build.sh new file mode 100755 index 0000000000..f90f09dbbe --- /dev/null +++ b/projects/rocprofiler/build.sh @@ -0,0 +1,9 @@ +#!/bin/sh -x +BIN_DIR=`dirname $0` +BLD_DIR=$BIN_DIR/build + +export CMAKE_PREFIX_PATH=/opt/rocm/include/hsa:/opt/rocm +rm -rf $BLD_DIR && mkdir $BLD_DIR && cd $BLD_DIR && cmake .. +make -j +make mytest +./run.sh diff --git a/projects/rocprofiler/inc/rocprofiler.h b/projects/rocprofiler/inc/rocprofiler.h index 6b595bc486..5889432510 100644 --- a/projects/rocprofiler/inc/rocprofiler.h +++ b/projects/rocprofiler/inc/rocprofiler.h @@ -89,7 +89,9 @@ hsa_status_t rocprofiler_error_string( // Profiling feature kind typedef enum { ROCPROFILER_FEATURE_KIND_METRIC = 0, - ROCPROFILER_FEATURE_KIND_TRACE = 1 + ROCPROFILER_FEATURE_KIND_TRACE = 1, + ROCPROFILER_FEATURE_KIND_SPM_MOD = 2, + ROCPROFILER_FEATURE_KIND_PCSMP_MOD = 4 } rocprofiler_feature_kind_t; // Profiling feture parameter @@ -201,6 +203,10 @@ hsa_status_t rocprofiler_close(rocprofiler_t* context); // [in] profiling conte hsa_status_t rocprofiler_reset(rocprofiler_t* context, // [in] profiling context uint32_t group_index); // group index +// Return context agent +hsa_status_t rocprofiler_get_agent(rocprofiler_t* context, // [in] profiling context + hsa_agent_t* agent); // [out] GPU handle + // Supported time value ID typedef enum { ROCPROFILER_TIME_ID_CLOCK_REALTIME = 0, // Linux realtime clock time diff --git a/projects/rocprofiler/src/CMakeLists.txt b/projects/rocprofiler/src/CMakeLists.txt index 9a3984112d..4c97ea6f51 100644 --- a/projects/rocprofiler/src/CMakeLists.txt +++ b/projects/rocprofiler/src/CMakeLists.txt @@ -30,8 +30,9 @@ set ( LIB_SRC ${LIB_DIR}/core/simple_proxy_queue.cpp ${LIB_DIR}/core/intercept_queue.cpp ${LIB_DIR}/core/metrics.cpp + ${LIB_DIR}/core/activity.cpp ${LIB_DIR}/util/hsa_rsrc_factory.cpp ) add_library ( ${TARGET_LIB} SHARED ${LIB_SRC} ) -target_include_directories ( ${TARGET_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} ) +target_include_directories ( ${TARGET_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} ${HSA_KMT_LIB_PATH}/.. ) target_link_libraries( ${TARGET_LIB} PRIVATE ${HSA_RUNTIME_LIB} c stdc++) diff --git a/projects/rocprofiler/src/core/activity.cpp b/projects/rocprofiler/src/core/activity.cpp new file mode 100644 index 0000000000..c72977e127 --- /dev/null +++ b/projects/rocprofiler/src/core/activity.cpp @@ -0,0 +1,171 @@ +/****************************************************************************** +Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*******************************************************************************/ + +#include +#include +#include +#include + +#include +#include + +// Tracer messages protocol +#include + +#include "core/context.h" +#include "inc/rocprofiler.h" +#include "util/hsa_rsrc_factory.h" + +#define PUBLIC_API __attribute__((visibility("default"))) + +// Error handler +void fatal(const std::string msg) { + fflush(stdout); + fprintf(stderr, "%s\n\n", msg.c_str()); + fflush(stderr); + abort(); +} + +// Check returned HSA API status +void check_status(hsa_status_t status) { + if (status != HSA_STATUS_SUCCESS) { + const char* error_string = NULL; + rocprofiler_error_string(&error_string); + fprintf(stderr, "ERROR: %s\n", error_string); + abort(); + } +} + +// Activity primitives +namespace activity_prim { +// PC sampling callback data +struct pcsmp_callback_data_t { + const char* kernel_name; // sampled kernel name + void* data_buffer; // host buffer for tracing data + uint64_t id; // sample id + uint64_t cycle; // sample cycle + uint64_t pc; // sample PC +}; + +uint32_t activity_op = UINT32_MAX; +void* activity_arg = NULL; +std::atomic activity_callback{NULL}; +rocprofiler_t* context = NULL; + +hsa_status_t trace_data_cb(hsa_ven_amd_aqlprofile_info_type_t info_type, + hsa_ven_amd_aqlprofile_info_data_t* info_data, + void* data) { + const pcsmp_callback_data_t* pcsmp_data = (pcsmp_callback_data_t*) data; + + activity_record_t record{}; + record.op = activity_op; + record.pc_sample.se = pcsmp_data->id; + record.pc_sample.cycle = pcsmp_data->cycle; + record.pc_sample.pc = pcsmp_data->pc; + activity_async_callback_t fun = activity_callback.load(std::memory_order_acquire); + if (fun) { + (fun)(activity_op, &record, activity_arg); + } else { + free((void*)(pcsmp_data->kernel_name)); + } + return HSA_STATUS_SUCCESS; +} + +bool context_handler(rocprofiler_group_t group, void* arg) { + hsa_agent_t agent{}; + hsa_status_t status = rocprofiler_get_agent(group.context, &agent); + check_status(status); + const rocprofiler::util::AgentInfo* agent_info = rocprofiler::util::HsaRsrcFactory::Instance().GetAgentInfo(agent); + + pcsmp_callback_data_t pcsmp_data{}; + pcsmp_data.kernel_name = (const char*)arg; + pcsmp_data.data_buffer = rocprofiler::util::HsaRsrcFactory::Instance().AllocateSysMemory(agent_info, rocprofiler::TraceProfile::GetSize()); + status = rocprofiler_iterate_trace_data(group.context, trace_data_cb, &pcsmp_data); + check_status(status); + return false; +} + +// Kernel disoatch callback +hsa_status_t dispatch_callback(const rocprofiler_callback_data_t* callback_data, void* user_data, + rocprofiler_group_t* group) { + // context features + const rocprofiler_feature_kind_t trace_kind = + (rocprofiler_feature_kind_t)(ROCPROFILER_FEATURE_KIND_TRACE | ROCPROFILER_FEATURE_KIND_PCSMP_MOD); + const uint32_t feature_count = 1; + const uint32_t parameter_count = 1; + rocprofiler_feature_t* features = new rocprofiler_feature_t[feature_count]; + memset(features, 0, feature_count * sizeof(rocprofiler_feature_t)); + rocprofiler_parameter_t* parameters = new rocprofiler_parameter_t[parameter_count]; + memset(features, 0, parameter_count * sizeof(rocprofiler_parameter_t)); + parameters[0].parameter_name = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET; + parameters[0].value = 0; + + features[0].kind = trace_kind; + features[0].parameters = parameters; + features[0].parameter_count = parameter_count; + + // context properties + rocprofiler_properties_t properties{}; + properties.handler = context_handler; + properties.handler_arg = (void*)strdup(callback_data->kernel_name); + + // Open profiling context + hsa_status_t status = rocprofiler_open(callback_data->agent, features, feature_count, + &context, 0 /*ROCPROFILER_MODE_SINGLEGROUP*/, &properties); + check_status(status); + + // Get group[0] + status = rocprofiler_get_group(context, 0, group); + check_status(status); + + return status; +} +} // namespace activity_prim + +extern "C" { +PUBLIC_API const char* GetOpName(uint32_t op) { return strdup("PCSAMPLE"); } + +PUBLIC_API bool RegisterApiCallback(uint32_t op, void* callback, void* arg) { return true; } + +PUBLIC_API bool RemoveApiCallback(uint32_t op) { return true; } + +PUBLIC_API bool InitActivityCallback(void* callback, void* arg) { + activity_prim::activity_arg = arg; + activity_prim::activity_callback.store((activity_async_callback_t)callback, std::memory_order_release); + + rocprofiler_queue_callbacks_t queue_callbacks{}; + queue_callbacks.dispatch = activity_prim::dispatch_callback; + rocprofiler_set_queue_callbacks(queue_callbacks, NULL); + + return true; +} + +PUBLIC_API bool EnableActivityCallback(uint32_t op, bool enable) { + if (enable) { + activity_prim::activity_op = op; + rocprofiler_start_queue_callbacks(); + } else { + rocprofiler_stop_queue_callbacks(); + } + return true; +} +} // extern "C" diff --git a/projects/rocprofiler/src/core/context.h b/projects/rocprofiler/src/core/context.h index 856c702485..88320ef32f 100644 --- a/projects/rocprofiler/src/core/context.h +++ b/projects/rocprofiler/src/core/context.h @@ -276,6 +276,7 @@ class Context { profile_vector_t profile_vector; set_[0].GetTraceProfiles(profile_vector); for (auto& tuple : profile_vector) { + if (pcsmp_mode_) const_cast(tuple.profile)->event_count = UINT32_MAX; const hsa_status_t status = api_->hsa_ven_amd_aqlprofile_iterate_data(tuple.profile, callback, data); if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "context iterate data failed"); @@ -293,6 +294,7 @@ class Context { return false; } + hsa_agent_t GetAgent() const { return agent_; } Group* GetGroup(const uint32_t& index) { return &set_[index]; } rocprofiler_handler_t GetHandler(void** arg) const { *arg = handler_arg_; return handler_; } @@ -306,7 +308,8 @@ class Context { api_(hsa_rsrc_->AqlProfileApi()), metrics_(NULL), handler_(handler), - handler_arg_(handler_arg) + handler_arg_(handler_arg), + pcsmp_mode_(false) {} ~Context() { Destruct(); } @@ -434,10 +437,13 @@ class Context { const uint32_t group_index = block_status.group_index; set_[group_index].Insert(profile_info_t{event, NULL, 0, info}); } - } else if (kind == ROCPROFILER_FEATURE_KIND_TRACE) { // Processing traces features - if (info->parameters != NULL) { - set_[0].Insert(profile_info_t{NULL, info->parameters, info->parameter_count, info}); - } else { + } else if (kind & ROCPROFILER_FEATURE_KIND_TRACE) { // Processing traces features + info->kind = ROCPROFILER_FEATURE_KIND_TRACE; + + const event_t* event = NULL; + if (kind & ROCPROFILER_FEATURE_KIND_PCSMP_MOD) { // PC sampling + pcsmp_mode_ = true; + } else if (kind & ROCPROFILER_FEATURE_KIND_SPM_MOD) { // SPM trace const Metric* metric = metrics_->Get(name); if (metric == NULL) EXC_RAISING(HSA_STATUS_ERROR, "input metric '" << name << "' is not found"); @@ -445,9 +451,9 @@ class Context { if (counters_vec.size() != 1) EXC_RAISING(HSA_STATUS_ERROR, "trace bad metric '" << name << "' is not base counter"); const counter_t* counter = counters_vec[0]; - const event_t* event = &(counter->event); - set_[0].Insert(profile_info_t{event, NULL, 0, info}); + event = &(counter->event); } + set_[0].Insert(profile_info_t{event, info->parameters, info->parameter_count, info}); } else { EXC_RAISING(HSA_STATUS_ERROR, "bad rocprofiler feature kind (" << kind << ")"); } @@ -584,6 +590,9 @@ class Context { // Context completion handler rocprofiler_handler_t handler_; void* handler_arg_; + + // PC sampling mode + bool pcsmp_mode_; }; } // namespace rocprofiler diff --git a/projects/rocprofiler/src/core/intercept_queue.h b/projects/rocprofiler/src/core/intercept_queue.h index a0176c4d8f..0d184e557d 100644 --- a/projects/rocprofiler/src/core/intercept_queue.h +++ b/projects/rocprofiler/src/core/intercept_queue.h @@ -130,24 +130,35 @@ class InterceptQueue { Queue* proxy = obj->proxy_; if (submit_callback_fun_) { - for (uint64_t j = 0; j < count; ++j) { - const packet_t* packet = &packets_arr[j]; - const hsa_kernel_dispatch_packet_t* dispatch_packet = - reinterpret_cast(packet); + mutex_.lock(); + auto* callback_fun = submit_callback_fun_; + void* callback_arg = submit_callback_arg_; + mutex_.unlock(); - uint64_t kernel_object = dispatch_packet->kernel_object; - const amd_kernel_code_t* kernel_code = GetKernelCode(kernel_object); - const char* kernel_name = (GetHeaderType(packet) == HSA_PACKET_TYPE_KERNEL_DISPATCH) ? - QueryKernelName(kernel_object, kernel_code) : NULL; + if (callback_fun) { + for (uint64_t j = 0; j < count; ++j) { + const packet_t* packet = &packets_arr[j]; + const hsa_kernel_dispatch_packet_t* dispatch_packet = + reinterpret_cast(packet); - // Prepareing submit callback data - rocprofiler_hsa_callback_data_t data{}; - data.submit.packet = (void*)packet; - data.submit.kernel_name = kernel_name; - data.submit.queue = obj->queue_; - data.submit.device_type = obj->agent_info_->dev_type; - data.submit.device_id = obj->agent_info_->dev_index; - submit_callback_fun_(ROCPROFILER_HSA_CB_ID_SUBMIT, &data, submit_callback_arg_); + if (GetHeaderType(packet) == HSA_PACKET_TYPE_KERNEL_DISPATCH) { + uint64_t kernel_object = dispatch_packet->kernel_object; + const amd_kernel_code_t* kernel_code = GetKernelCode(kernel_object); + const char* kernel_name = (GetHeaderType(packet) == HSA_PACKET_TYPE_KERNEL_DISPATCH) ? + QueryKernelName(kernel_object, kernel_code) : NULL; + + // Prepareing submit callback data + rocprofiler_hsa_callback_data_t data{}; + data.submit.packet = (void*)packet; + data.submit.kernel_name = kernel_name; + data.submit.queue = obj->queue_; + data.submit.device_type = obj->agent_info_->dev_type; + data.submit.device_id = obj->agent_info_->dev_index; + callback_fun(ROCPROFILER_HSA_CB_ID_SUBMIT, &data, callback_arg); + } else { + callback_fun(ROCPROFILER_HSA_CB_ID_SUBMIT, NULL, callback_arg); + } + } } } diff --git a/projects/rocprofiler/src/core/rocprofiler.cpp b/projects/rocprofiler/src/core/rocprofiler.cpp index 3a45fdf6ed..38aa5f9330 100644 --- a/projects/rocprofiler/src/core/rocprofiler.cpp +++ b/projects/rocprofiler/src/core/rocprofiler.cpp @@ -25,6 +25,8 @@ THE SOFTWARE. #include #include #include + +#include #include #include "core/context.h" @@ -169,7 +171,9 @@ enum { uint32_t LoadTool() { uint32_t intercept_mode = 0; const char* tool_lib = getenv("ROCP_TOOL_LIB"); - ONLOAD_TRACE("load tool library(" << tool_lib << ")"); + std::ostringstream oss; + if (tool_lib) oss << "load tool library(" << tool_lib << ")"; + ONLOAD_TRACE(oss.str()); if (tool_lib) { intercept_mode = DISPATCH_INTERCEPT_MODE; @@ -555,6 +559,14 @@ PUBLIC_API hsa_status_t rocprofiler_reset(rocprofiler_t* handle, uint32_t group_ API_METHOD_SUFFIX } +// Return context agent +PUBLIC_API hsa_status_t rocprofiler_get_agent(rocprofiler_t* handle, hsa_agent_t* agent) { + API_METHOD_PREFIX + rocprofiler::Context* context = reinterpret_cast(handle); + *agent = context->GetAgent(); + API_METHOD_SUFFIX +} + // Get profiling group count PUBLIC_API hsa_status_t rocprofiler_group_count(const rocprofiler_t* handle, uint32_t* group_count) { diff --git a/projects/rocprofiler/test/tool/tool.cpp b/projects/rocprofiler/test/tool/tool.cpp index 363b4095a8..0713cc6742 100644 --- a/projects/rocprofiler/test/tool/tool.cpp +++ b/projects/rocprofiler/test/tool/tool.cpp @@ -139,6 +139,7 @@ bool is_trace_local = true; // SPM trace enabled bool is_spm_trace = false; +static inline uint32_t GetPid() { return syscall(__NR_getpid); } static inline uint32_t GetTid() { return syscall(__NR_gettid); } // Error handler @@ -909,7 +910,7 @@ extern "C" PUBLIC_API void OnLoadToolProp(rocprofiler_settings_t* settings) } if (rcfile != NULL) { // Getting defaults - printf("ROCProfiler: rc-file '%s'\n", rcpath.c_str()); + printf("ROCProfiler pid(%u): rc-file '%s'\n", GetPid(), rcpath.c_str()); auto defaults_list = rcfile->GetNodes("top.defaults"); for (auto* entry : defaults_list) { const auto& opts = entry->opts;