diff --git a/Readme.txt b/Readme.txt index 3b2ca62448..81025053b2 100644 --- a/Readme.txt +++ b/Readme.txt @@ -31,14 +31,25 @@ $ export CMAKE_PREFIX_PATH=/home/evgeny/git/compute/out/ubuntu-16.04/16.04 $ cmake .. $ make +or + $ cmake -DCMAKE_PREFIX_PATH=/opt/rocm .. $ make To run the test: $ cd .../rocprofiler/build -$ export LD_LIBRARY_PATH=$PWD +$ export LD_LIBRARY_PATH=.: # paths to ROC profiler and oher libraries $ export HSA_TOOLS_LIB=librocprofiler64.so +$ export ROCP_TOOL_LIB=test/libtool.so # tool library loaded by ROC profiler +$ export ROCP_HSA_INTERCEPT=1 # enable HSA dispatch intercepting +$ export ROCP_METRICS=metrics.xml # ROC profiler metrics config file +$ export ROCP_INPUT=input.xml # input file for the tool library +$ export ROCP_OUTPUT_DIR=./ # output directory for the tool librrary, for metrics results file 'results.txt' and SQTT trace files 'thread_trace.se.out' +$ + +Internal 'simple_convolution' test run script: +$ cd .../rocprofiler/build $ run.sh To enabled error messages logging to '/tmp/rocprofiler_log.txt': diff --git a/inc/rocprofiler.h b/inc/rocprofiler.h index 154aafc775..31de79585e 100644 --- a/inc/rocprofiler.h +++ b/inc/rocprofiler.h @@ -79,8 +79,8 @@ extern "C" { // Profiling feature type typedef enum { - ROCPROFILER_FEATURE_KIND_METRIC = 0, - ROCPROFILER_FEATURE_KIND_TRACE = 1 + ROCPROFILER_FEATURE_KIND_METRIC = 0, + ROCPROFILER_FEATURE_KIND_TRACE = 1 } rocprofiler_feature_kind_t; // Profiling feture parameter @@ -88,38 +88,38 @@ typedef hsa_ven_amd_aqlprofile_parameter_t rocprofiler_parameter_t; // Profiling data kind typedef enum { - ROCPROFILER_DATA_KIND_UNINIT = 0, - ROCPROFILER_DATA_KIND_INT32 = 1, - ROCPROFILER_DATA_KIND_INT64 = 2, - ROCPROFILER_DATA_KIND_FLOAT = 3, - ROCPROFILER_DATA_KIND_DOUBLE = 4, - ROCPROFILER_DATA_KIND_BYTES = 5 + ROCPROFILER_DATA_KIND_UNINIT = 0, + ROCPROFILER_DATA_KIND_INT32 = 1, + ROCPROFILER_DATA_KIND_INT64 = 2, + ROCPROFILER_DATA_KIND_FLOAT = 3, + ROCPROFILER_DATA_KIND_DOUBLE = 4, + ROCPROFILER_DATA_KIND_BYTES = 5 } rocprofiler_data_kind_t; // Profiling data type typedef struct { rocprofiler_data_kind_t kind; // result kind union { - uint32_t result_int32; // 32bit integer result - uint64_t result_int64; // 64bit integer result - float result_float; // float single-precision result - double result_double; // float double-precision result + uint32_t result_int32; // 32bit integer result + uint64_t result_int64; // 64bit integer result + float result_float; // float single-precision result + double result_double; // float double-precision result struct { void* ptr; uint32_t size; uint32_t instance_count; bool copy; - } result_bytes; // data by ptr and byte size + } result_bytes; // data by ptr and byte size }; } rocprofiler_data_t; -// Profiling feature info +// Profiling feature info typedef struct { - rocprofiler_feature_kind_t kind; // feature kind - const char* name; // feature name - const rocprofiler_parameter_t* parameters; // feature parameters array - uint32_t parameter_count; // feature parameters count - rocprofiler_data_t data; // profiling data + rocprofiler_feature_kind_t kind; // feature kind + const char* name; // feature name + const rocprofiler_parameter_t* parameters; // feature parameters array + uint32_t parameter_count; // feature parameters count + rocprofiler_data_t data; // profiling data } rocprofiler_feature_t; //////////////////////////////////////////////////////////////////////////////// @@ -132,17 +132,17 @@ typedef void rocprofiler_t; // Profiling group object typedef struct { - unsigned index; // group index - rocprofiler_feature_t** features; // profiling info array - uint32_t feature_count; // profiling info count - rocprofiler_t* context; // context object + unsigned index; // group index + rocprofiler_feature_t** features; // profiling info array + uint32_t feature_count; // profiling info count + rocprofiler_t* context; // context object } rocprofiler_group_t; // Profiling mode mask typedef enum { - ROCPROFILER_MODE_STANDALONE = 1, // standalone mode when ROC profiler supports a queue - ROCPROFILER_MODE_CREATEQUEUE = 2, // ROC profiler creates queue in standalone mode - ROCPROFILER_MODE_SINGLEGROUP = 4 // only one group is allowed, failed otherwise + ROCPROFILER_MODE_STANDALONE = 1, // standalone mode when ROC profiler supports a queue + ROCPROFILER_MODE_CREATEQUEUE = 2, // ROC profiler creates queue in standalone mode + ROCPROFILER_MODE_SINGLEGROUP = 4 // only one group is allowed, failed otherwise } rocprofiler_mode_t; // Profiling handler, calling on profiling completion @@ -150,30 +150,27 @@ typedef void (*rocprofiler_handler_t)(rocprofiler_group_t group, void* arg); // Profiling preperties typedef struct { - hsa_queue_t* queue; // queue for STANDALONE mode - // the queue is created and returned in CREATEQUEUE mode - uint32_t queue_depth; // created queue depth - rocprofiler_handler_t handler; // handler on completion - void* handler_arg; // the handler arg + hsa_queue_t* queue; // queue for STANDALONE mode + // the queue is created and returned in CREATEQUEUE mode + uint32_t queue_depth; // created queue depth + rocprofiler_handler_t handler; // handler on completion + void* handler_arg; // the handler arg } rocprofiler_properties_t; // Create new profiling context -hsa_status_t rocprofiler_open( - hsa_agent_t agent, // GPU handle - rocprofiler_feature_t* features, // [in] profiling info array - uint32_t feature_count, // profiling info count - rocprofiler_t** context, // [out] context object - uint32_t mode, // profiling mode mask - rocprofiler_properties_t* properties); // profiling properties +hsa_status_t rocprofiler_open(hsa_agent_t agent, // GPU handle + rocprofiler_feature_t* features, // [in] profiling info array + uint32_t feature_count, // profiling info count + rocprofiler_t** context, // [out] context object + uint32_t mode, // profiling mode mask + rocprofiler_properties_t* properties); // profiling properties // Delete profiling info -hsa_status_t rocprofiler_close( - rocprofiler_t* context); // [in] profiling context +hsa_status_t rocprofiler_close(rocprofiler_t* context); // [in] profiling context // Context reset before reusing -hsa_status_t rocprofiler_reset( - rocprofiler_t* context, // [in] profiling context - uint32_t group_index); // group index +hsa_status_t rocprofiler_reset(rocprofiler_t* context, // [in] profiling context + uint32_t group_index); // group index //////////////////////////////////////////////////////////////////////////////// // Runtime API observer @@ -182,22 +179,22 @@ hsa_status_t rocprofiler_reset( // Profiling callback data typedef struct { - hsa_agent_t agent; - uint64_t kernel_object; - uint64_t queue_index; + hsa_agent_t agent; + uint64_t kernel_object; + uint64_t queue_index; } rocprofiler_callback_data_t; // Profiling callback type typedef hsa_status_t (*rocprofiler_callback_t)( const rocprofiler_callback_data_t* callback_data, // [in] callback data union, data depends on // the callback API id - void* user_data, // [in/out] user data passed to the callback + void* user_data, // [in/out] user data passed to the callback rocprofiler_group_t* group); // [out] profiling group // Set/remove kernel dispatch observer hsa_status_t rocprofiler_set_dispatch_callback( - rocprofiler_callback_t callback, // observer callback - void* data); // [in/out] passed callback data + rocprofiler_callback_t callback, // observer callback + void* data); // [in/out] passed callback data hsa_status_t rocprofiler_remove_dispatch_callback(); @@ -208,46 +205,37 @@ hsa_status_t rocprofiler_remove_dispatch_callback(); // contect.invocations' to collect all profiling data // Start profiling -hsa_status_t rocprofiler_start( - rocprofiler_t* context, // [in/out] profiling context - uint32_t group_index = 0); // group index +hsa_status_t rocprofiler_start(rocprofiler_t* context, // [in/out] profiling context + uint32_t group_index = 0); // group index // Stop profiling -hsa_status_t rocprofiler_stop( - rocprofiler_t* context, // [in/out] profiling context - uint32_t group_index = 0); // group index +hsa_status_t rocprofiler_stop(rocprofiler_t* context, // [in/out] profiling context + uint32_t group_index = 0); // group index // Read profiling data -hsa_status_t rocprofiler_get_data( - rocprofiler_t* context, // [in/out] profiling context - uint32_t group_index = 0); // group index +hsa_status_t rocprofiler_get_data(rocprofiler_t* context, // [in/out] profiling context + uint32_t group_index = 0); // group index // Get profiling groups count -hsa_status_t rocprofiler_group_count( - const rocprofiler_t* context, // [in] profiling context - uint32_t* group_count); // [out] profiling groups count +hsa_status_t rocprofiler_group_count(const rocprofiler_t* context, // [in] profiling context + uint32_t* group_count); // [out] profiling groups count // Get profiling group for a given index -hsa_status_t rocprofiler_get_group( - rocprofiler_t* context, // [in] profiling context - uint32_t group_index, // [in] profiling group index - rocprofiler_group_t* group); // [out] profiling group +hsa_status_t rocprofiler_get_group(rocprofiler_t* context, // [in] profiling context + uint32_t group_index, // [in] profiling group index + rocprofiler_group_t* group); // [out] profiling group // Start profiling -hsa_status_t rocprofiler_group_start( - rocprofiler_group_t* group); // [in/out] profiling group +hsa_status_t rocprofiler_group_start(rocprofiler_group_t* group); // [in/out] profiling group // Stop profiling -hsa_status_t rocprofiler_group_stop( - rocprofiler_group_t* group); // [in/out] profiling group +hsa_status_t rocprofiler_group_stop(rocprofiler_group_t* group); // [in/out] profiling group // Get profiling data -hsa_status_t rocprofiler_group_get_data( - rocprofiler_group_t* group); // [in/out] profiling group +hsa_status_t rocprofiler_group_get_data(rocprofiler_group_t* group); // [in/out] profiling group // Get metrics data -hsa_status_t rocprofiler_get_metrics( - const rocprofiler_t* context); // [in/out] profiling context +hsa_status_t rocprofiler_get_metrics(const rocprofiler_t* context); // [in/out] profiling context // Definition of output data iterator callback typedef hsa_ven_amd_aqlprofile_data_callback_t rocprofiler_trace_data_callback_t; @@ -256,21 +244,18 @@ typedef hsa_ven_amd_aqlprofile_data_callback_t rocprofiler_trace_data_callback_t hsa_status_t rocprofiler_iterate_trace_data( rocprofiler_t* context, // [in] profiling context rocprofiler_trace_data_callback_t callback, // [in] callback to iterate the output data - void* data); // [in/out] callback data + void* data); // [in/out] callback data //////////////////////////////////////////////////////////////////////////////// // Returning the error string method -hsa_status_t rocprofiler_error_string ( - const char** str); // [out] the API error string pointer returning +hsa_status_t rocprofiler_error_string( + const char** str); // [out] the API error string pointer returning //////////////////////////////////////////////////////////////////////////////// // HSA-runtime tool on-load method -bool OnLoad( - HsaApiTable* table, - uint64_t runtime_version, - uint64_t failed_tool_count, - const char* const * failed_tool_names); +bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count, + const char* const* failed_tool_names); #ifdef __cplusplus } // extern "C" block diff --git a/src/core/context.h b/src/core/context.h index 6fa7286939..3fad27437f 100644 --- a/src/core/context.h +++ b/src/core/context.h @@ -19,7 +19,9 @@ namespace rocprofiler { struct rocprofiler_contex_t; class Context; -inline unsigned align_size(unsigned size, unsigned alignment) { return ((size + alignment - 1) & ~(alignment - 1)); } +inline unsigned align_size(unsigned size, unsigned alignment) { + return ((size + alignment - 1) & ~(alignment - 1)); +} // Block descriptor struct block_des_t { @@ -42,9 +44,8 @@ struct block_status_t { }; // Metrics arguments -template -class MetricArgs : public xml::args_cache_t { - public: +template class MetricArgs : public xml::args_cache_t { + public: MetricArgs(const Map& map) : map_(map) {} bool Lookup(const std::string& name, uint64_t& result) const { rocprofiler_feature_t* info = NULL; @@ -53,26 +54,29 @@ class MetricArgs : public xml::args_cache_t { info = it->second; if (info) { result = info->data.result_int64; - if (info->data.kind == ROCPROFILER_DATA_KIND_UNINIT) EXC_RAISING(HSA_STATUS_ERROR, "var '" << name << "' is uninitialized"); - if (info->data.kind != ROCPROFILER_DATA_KIND_INT64) EXC_RAISING(HSA_STATUS_ERROR, "var '" << name << "' is of incompatible type, not INT64"); - } else EXC_RAISING(HSA_STATUS_ERROR, "var '" << name << "' info is NULL"); + if (info->data.kind == ROCPROFILER_DATA_KIND_UNINIT) + EXC_RAISING(HSA_STATUS_ERROR, "var '" << name << "' is uninitialized"); + if (info->data.kind != ROCPROFILER_DATA_KIND_INT64) + EXC_RAISING(HSA_STATUS_ERROR, "var '" << name << "' is of incompatible type, not INT64"); + } else + EXC_RAISING(HSA_STATUS_ERROR, "var '" << name << "' info is NULL"); return (info != NULL); } - private: + + private: const Map& map_; }; // Profiling group class Group { - public: - Group(const util::AgentInfo* agent_info, Context *context, const uint32_t& index) : - pmc_profile_(agent_info), - sqtt_profile_(agent_info), - n_profiles_(0), - refs_(1), - context_(context), - index_(index) - {} + public: + Group(const util::AgentInfo* agent_info, Context* context, const uint32_t& index) + : pmc_profile_(agent_info), + sqtt_profile_(agent_info), + n_profiles_(0), + refs_(1), + context_(context), + index_(index) {} void Insert(const profile_info_t& info) { const rocprofiler_feature_kind_t kind = info.rinfo->kind; @@ -106,9 +110,7 @@ class Group { sqtt_profile_.GetProfiles(vec); } - void GetTraceProfiles(profile_vector_t& vec) { - sqtt_profile_.GetProfiles(vec); - } + void GetTraceProfiles(profile_vector_t& vec) { sqtt_profile_.GetProfiles(vec); } info_vector_t& GetInfoVector() { return info_vector_; } const pkt_vector_t& GetStartVector() const { return start_vector_; } @@ -125,7 +127,7 @@ class Group { return refs_; } - private: + private: PmcProfile pmc_profile_; SqttProfile sqtt_profile_; info_vector_t info_vector_; @@ -139,19 +141,19 @@ class Group { // Profiling context class Context { - public: + public: typedef std::mutex mutex_t; typedef std::map info_map_t; - Context(const util::AgentInfo* agent_info, Queue* queue, rocprofiler_feature_t* info, const uint32_t info_count, rocprofiler_handler_t handler, void* handler_arg) : - agent_(agent_info->dev_id), - agent_info_(agent_info), - queue_(queue), - hsa_rsrc_(&util::HsaRsrcFactory::Instance()), - api_(hsa_rsrc_->AqlProfileApi()), - handler_(handler), - handler_arg_(handler_arg) - { + Context(const util::AgentInfo* agent_info, Queue* queue, rocprofiler_feature_t* info, + const uint32_t info_count, rocprofiler_handler_t handler, void* handler_arg) + : agent_(agent_info->dev_id), + agent_info_(agent_info), + queue_(queue), + hsa_rsrc_(&util::HsaRsrcFactory::Instance()), + api_(hsa_rsrc_->AqlProfileApi()), + handler_(handler), + handler_arg_(handler_arg) { metrics_ = MetricsDict::Create(agent_info); if (metrics_ == NULL) EXC_RAISING(HSA_STATUS_ERROR, "MetricsDict create failed"); Initialize(info, info_count); @@ -163,12 +165,8 @@ class Context { const profile_vector_t profile_vector = GetProfiles(group_index); for (auto& tuple : profile_vector) { // Handler for stop packet completion - hsa_amd_signal_async_handler( - tuple.completion_signal, - HSA_SIGNAL_CONDITION_LT, - 1, - Handler, - &set_[group_index]); + hsa_amd_signal_async_handler(tuple.completion_signal, HSA_SIGNAL_CONDITION_LT, 1, Handler, + &set_[group_index]); } } } @@ -178,7 +176,8 @@ class Context { for (const auto& v : info_map_) { const std::string& name = v.first; const rocprofiler_feature_t* info = v.second; - if ((info->kind == ROCPROFILER_FEATURE_KIND_METRIC) && (metrics_map_.find(name) == metrics_map_.end())) { + if ((info->kind == ROCPROFILER_FEATURE_KIND_METRIC) && + (metrics_map_.find(name) == metrics_map_.end())) { delete info; } } @@ -206,14 +205,18 @@ class Context { const rocprofiler_feature_kind_t kind = info->kind; const char* name = info->name; - if (kind == ROCPROFILER_FEATURE_KIND_METRIC) { // Processing metrics features + if (kind == ROCPROFILER_FEATURE_KIND_METRIC) { // Processing metrics features const Metric* metric = metrics_->Get(name); - if (metric == NULL) EXC_RAISING(HSA_STATUS_ERROR, "input metric '" << name << "' is not found"); + if (metric == NULL) + EXC_RAISING(HSA_STATUS_ERROR, "input metric '" << name << "' is not found"); auto ret = metrics_map_.insert({name, metric}); - if (!ret.second) EXC_RAISING(HSA_STATUS_ERROR, "input metric '" << name << "' is registered more then once"); + if (!ret.second) + EXC_RAISING(HSA_STATUS_ERROR, "input metric '" << name + << "' is registered more then once"); counters_vec_t counters_vec = metric->GetCounters(); - if (counters_vec.empty()) EXC_RAISING(HSA_STATUS_ERROR, "bad metric '" << name << "' is empty"); + if (counters_vec.empty()) + EXC_RAISING(HSA_STATUS_ERROR, "bad metric '" << name << "' is empty"); for (const counter_t* counter : counters_vec) { // For metrics expressions checking that there is no the same counter in the input metrics @@ -238,9 +241,10 @@ class Context { query.agent = agent_; query.type = HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC; query.events = event; - + uint32_t block_counters; - hsa_status_t status = api_->hsa_ven_amd_aqlprofile_get_info(&query, HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_COUNTERS, &block_counters); + hsa_status_t status = api_->hsa_ven_amd_aqlprofile_get_info( + &query, HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_COUNTERS, &block_counters); if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "get block_counters info"); block_status.max_counters = block_counters; } @@ -254,9 +258,9 @@ class Context { const uint32_t group_index = block_status.group_index; set_[group_index].Insert(profile_info_t{event, NULL, 0, info}); } - } else if (kind == ROCPROFILER_FEATURE_KIND_TRACE) { // Processing traces features + } else if (kind == ROCPROFILER_FEATURE_KIND_TRACE) { // Processing traces features set_[0].Insert(profile_info_t{NULL, info->parameters, info->parameter_count, info}); - } else { + } else { EXC_RAISING(HSA_STATUS_ERROR, "bad rocprofiler feature kind (" << kind << ")"); } } @@ -269,9 +273,7 @@ class Context { } } - void Reset(const uint32_t& group_index) { - set_[group_index].ResetRefs(); - } + void Reset(const uint32_t& group_index) { set_[group_index].ResetRefs(); } uint32_t GetGroupCount() const { return set_.size(); } @@ -285,8 +287,12 @@ class Context { return group; } - const pkt_vector_t& StartPackets(const uint32_t& group_index) const { return set_[group_index].GetStartVector(); } - const pkt_vector_t& StopPackets(const uint32_t& group_index) const { return set_[group_index].GetStopVector(); } + const pkt_vector_t& StartPackets(const uint32_t& group_index) const { + return set_[group_index].GetStartVector(); + } + const pkt_vector_t& StopPackets(const uint32_t& group_index) const { + return set_[group_index].GetStopVector(); + } void Start(const uint32_t& group_index, Queue* const queue = NULL) { const pkt_vector_t& start_packets = StartPackets(group_index); @@ -315,14 +321,11 @@ class Context { const profile_vector_t profile_vector = GetProfiles(group_index); for (auto& tuple : profile_vector) { // Wait for stop packet to complete - hsa_signal_wait_scacquire( - tuple.completion_signal, - HSA_SIGNAL_CONDITION_LT, - 1, - (uint64_t)-1, - HSA_WAIT_STATE_BLOCKED); + hsa_signal_wait_scacquire(tuple.completion_signal, HSA_SIGNAL_CONDITION_LT, 1, (uint64_t)-1, + HSA_WAIT_STATE_BLOCKED); callback_data_t callback_data{tuple.info_vector, tuple.info_vector->size(), NULL}; - const hsa_status_t status = api_->hsa_ven_amd_aqlprofile_iterate_data(tuple.profile, DataCallback, &callback_data); + const hsa_status_t status = + api_->hsa_ven_amd_aqlprofile_iterate_data(tuple.profile, DataCallback, &callback_data); if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "context iterate data failed"); } } @@ -335,7 +338,8 @@ class Context { const xml::Expr* expr = metric->GetExpr(); if (expr) { auto it = info_map_.find(name); - if (it == info_map_.end()) EXC_RAISING(HSA_STATUS_ERROR, "metric '" << name << "', rocprofiler info is not found"); + if (it == info_map_.end()) + EXC_RAISING(HSA_STATUS_ERROR, "metric '" << name << "', rocprofiler info is not found"); rocprofiler_feature_t* info = it->second; info->data.result_int64 = expr->Eval(args); info->data.kind = ROCPROFILER_DATA_KIND_INT64; @@ -343,16 +347,17 @@ class Context { } } - void IterateTraceData(rocprofiler_trace_data_callback_t callback, void *data) { + void IterateTraceData(rocprofiler_trace_data_callback_t callback, void* data) { profile_vector_t profile_vector; set_[0].GetTraceProfiles(profile_vector); for (auto& tuple : profile_vector) { - const hsa_status_t status = api_->hsa_ven_amd_aqlprofile_iterate_data(tuple.profile, callback, data); + const hsa_status_t status = + api_->hsa_ven_amd_aqlprofile_iterate_data(tuple.profile, callback, data); if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "context iterate data failed"); } } - private: + private: // Getting profling packets profile_vector_t GetProfiles(const uint32_t& index) { profile_vector_t vec; @@ -374,8 +379,7 @@ class Context { } static hsa_status_t DataCallback(hsa_ven_amd_aqlprofile_info_type_t ainfo_type, - hsa_ven_amd_aqlprofile_info_data_t* ainfo_data, - void* data) { + hsa_ven_amd_aqlprofile_info_data_t* ainfo_data, void* data) { hsa_status_t status = HSA_STATUS_SUCCESS; callback_data_t* callback_data = reinterpret_cast(data); info_vector_t& info_vector = *(callback_data->info_vector); @@ -403,7 +407,7 @@ class Context { char* ptr = (sample_id == 0) ? result_bytes_ptr : callback_data->ptr; uint64_t* header = reinterpret_cast(ptr); char* dest = ptr + sizeof(*header); - + if ((dest + size) < end) { hsa_status_t status = hsa_memory_copy(dest, src, size); if (status == HSA_STATUS_SUCCESS) { @@ -412,7 +416,8 @@ class Context { rinfo->data.result_bytes.instance_count = sample_id + 1; callback_data->ptr = dest + align_size(size, sizeof(uint64_t)); } - } else status = HSA_STATUS_ERROR; + } else + status = HSA_STATUS_ERROR; } else { if (sample_id == 0) { rinfo->data.kind = ROCPROFILER_DATA_KIND_BYTES; @@ -421,8 +426,10 @@ class Context { } rinfo->data.result_bytes.instance_count += 1; } - } else status = HSA_STATUS_ERROR; - } else status = HSA_STATUS_ERROR; + } else + status = HSA_STATUS_ERROR; + } else + status = HSA_STATUS_ERROR; return status; } @@ -445,7 +452,7 @@ class Context { const pfn_t* api_; // Profile group set std::vector set_; - // Metrics dictionary + // Metrics dictionary MetricsDict* metrics_; // Groups map std::map groups_map_; diff --git a/src/core/hsa_proxy_queue.h b/src/core/hsa_proxy_queue.h index b1a8167e66..e1c33da0fe 100644 --- a/src/core/hsa_proxy_queue.h +++ b/src/core/hsa_proxy_queue.h @@ -15,26 +15,23 @@ extern decltype(hsa_amd_queue_intercept_create)* hsa_amd_queue_intercept_create_ extern decltype(hsa_amd_queue_intercept_register)* hsa_amd_queue_intercept_register_fn; class HsaProxyQueue : public ProxyQueue { - public: + public: hsa_status_t SetInterceptCB(on_submit_cb_t on_submit_cb, void* data) { return hsa_amd_queue_intercept_register_fn(queue_, on_submit_cb, data); } - void Submit(const packet_t* packet) { EXC_RAISING(HSA_STATUS_ERROR, "HsaProxyQueue::Submit() is not supported"); } + void Submit(const packet_t* packet) { + EXC_RAISING(HSA_STATUS_ERROR, "HsaProxyQueue::Submit() is not supported"); + } - private: - hsa_status_t Init( - hsa_agent_t agent, - uint32_t size, - hsa_queue_type32_t type, - void (*callback)(hsa_status_t status, hsa_queue_t *source, void *data), - void *data, - uint32_t private_segment_size, - uint32_t group_segment_size, - hsa_queue_t **queue) - { + private: + hsa_status_t Init(hsa_agent_t agent, uint32_t size, hsa_queue_type32_t type, + void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data), + void* data, uint32_t private_segment_size, uint32_t group_segment_size, + hsa_queue_t** queue) { printf("HsaProxyQueue::Init()\n"); - const auto status = hsa_amd_queue_intercept_create_fn(agent, size, type, callback, data, private_segment_size, group_segment_size, &queue_); + const auto status = hsa_amd_queue_intercept_create_fn( + agent, size, type, callback, data, private_segment_size, group_segment_size, &queue_); *queue = queue_; return status; } @@ -44,6 +41,6 @@ class HsaProxyQueue : public ProxyQueue { hsa_queue_t* queue_; }; -} // namespace rocprofiler +} // namespace rocprofiler -#endif // _SRC_CORE_HSA_PROXY_QUEUE_H +#endif // _SRC_CORE_HSA_PROXY_QUEUE_H diff --git a/src/core/hsa_queue.h b/src/core/hsa_queue.h index 992dae8ac1..fcaecfd5b2 100644 --- a/src/core/hsa_queue.h +++ b/src/core/hsa_queue.h @@ -9,7 +9,7 @@ namespace rocprofiler { class HsaQueue : public Queue { - public: + public: typedef void (HsaQueue::*submit_fptr_t)(const packet_t* packet); enum { LEGACY_SLOT_SIZE_W = HSA_VEN_AMD_AQLPROFILE_LEGACY_PM4_PACKET_SIZE / sizeof(packet_word_t), @@ -19,9 +19,7 @@ class HsaQueue : public Queue { packet_word_t words[LEGACY_SLOT_SIZE_W]; }; - HsaQueue(const util::AgentInfo* agent_info, hsa_queue_t* queue) : - queue_(queue) - {} + HsaQueue(const util::AgentInfo* agent_info, hsa_queue_t* queue) : queue_(queue) {} void Submit(const packet_t* packet) { // Compute the write index of queue and copy Aql packet into it @@ -51,10 +49,10 @@ class HsaQueue : public Queue { hsa_signal_store_relaxed(queue_->doorbell_signal, que_idx); } - private: + private: hsa_queue_t* queue_; }; -} // namespace rocprofiler +} // namespace rocprofiler -#endif // _SRC_CORE_HSA_QUEUE_H +#endif // _SRC_CORE_HSA_QUEUE_H diff --git a/src/core/intercept_queue.cpp b/src/core/intercept_queue.cpp index 29c3a00aa2..06821ff0ee 100644 --- a/src/core/intercept_queue.cpp +++ b/src/core/intercept_queue.cpp @@ -12,4 +12,4 @@ void* InterceptQueue::on_dispatch_cb_data_ = NULL; const char* InterceptQueue::tool_lib_ = NULL; void* InterceptQueue::tool_handle_ = NULL; InterceptQueue::obj_map_t* InterceptQueue::obj_map_ = NULL; -} // namespace rocprofiler +} // namespace rocprofiler diff --git a/src/core/intercept_queue.h b/src/core/intercept_queue.h index 443c16d986..54995c6f1d 100644 --- a/src/core/intercept_queue.h +++ b/src/core/intercept_queue.h @@ -17,7 +17,7 @@ extern decltype(hsa_queue_create)* hsa_queue_create_fn; extern decltype(hsa_queue_destroy)* hsa_queue_destroy_fn; class InterceptQueue { - public: + public: typedef std::recursive_mutex mutex_t; typedef std::map obj_map_t; @@ -25,18 +25,15 @@ class InterceptQueue { static void SetTool(const char* tool) { tool_lib_ = tool; } - static void UnloadTool() { if (tool_handle_) dlclose(tool_handle_); } + static void UnloadTool() { + if (tool_handle_) dlclose(tool_handle_); + } - static hsa_status_t QueueCreate( - hsa_agent_t agent, - uint32_t size, - hsa_queue_type32_t type, - void (*callback)(hsa_status_t status, hsa_queue_t *source, void *data), - void *data, - uint32_t private_segment_size, - uint32_t group_segment_size, - hsa_queue_t **queue) - { + static hsa_status_t QueueCreate(hsa_agent_t agent, uint32_t size, hsa_queue_type32_t type, + void (*callback)(hsa_status_t status, hsa_queue_t* source, + void* data), + void* data, uint32_t private_segment_size, + uint32_t group_segment_size, hsa_queue_t** queue) { std::lock_guard lck(mutex_); hsa_status_t status = HSA_STATUS_ERROR; @@ -53,7 +50,8 @@ class InterceptQueue { if (!obj_map_) obj_map_ = new obj_map_t; - ProxyQueue* proxy = ProxyQueue::Create(agent, size, type, callback, data, private_segment_size, group_segment_size, queue, &status); + ProxyQueue* proxy = ProxyQueue::Create(agent, size, type, callback, data, private_segment_size, + group_segment_size, queue, &status); if (status != HSA_STATUS_SUCCESS) { InterceptQueue* obj = new InterceptQueue(agent, proxy); (*obj_map_)[(uint64_t)(*queue)] = obj; @@ -63,14 +61,14 @@ class InterceptQueue { return status; } - static hsa_status_t QueueDestroy(hsa_queue_t *queue) { + static hsa_status_t QueueDestroy(hsa_queue_t* queue) { std::lock_guard lck(mutex_); hsa_status_t status = HSA_STATUS_ERROR; obj_map_t::iterator it = obj_map_->find((uint64_t)queue); if (it != obj_map_->end()) { const InterceptQueue* obj = it->second; - delete obj; + delete obj; obj_map_->erase(it); status = HSA_STATUS_SUCCESS; } @@ -78,8 +76,9 @@ class InterceptQueue { return status; } - static void OnSubmitCB(const void* in_packets, uint64_t count, uint64_t user_que_idx, void* data, hsa_amd_queue_intercept_packet_writer writer) { - const packet_t* packets_arr = reinterpret_cast(in_packets); + static void OnSubmitCB(const void* in_packets, uint64_t count, uint64_t user_que_idx, void* data, + hsa_amd_queue_intercept_packet_writer writer) { + const packet_t* packets_arr = reinterpret_cast(in_packets); InterceptQueue* obj = reinterpret_cast(data); Queue* proxy = obj->proxy_; @@ -89,8 +88,10 @@ class InterceptQueue { if ((GetHeaderType(packet) == HSA_PACKET_TYPE_KERNEL_DISPATCH) && (on_dispatch_cb_ != NULL)) { rocprofiler_group_t group = {}; - const hsa_kernel_dispatch_packet_t* dispatch_packet = reinterpret_cast(packet); - rocprofiler_callback_data_t data = {obj->agent_info_->dev_id, dispatch_packet->kernel_object, user_que_idx}; + const hsa_kernel_dispatch_packet_t* dispatch_packet = + reinterpret_cast(packet); + rocprofiler_callback_data_t data = {obj->agent_info_->dev_id, + dispatch_packet->kernel_object, user_que_idx}; hsa_status_t status = on_dispatch_cb_(&data, on_dispatch_cb_data_, &group); if (status == HSA_STATUS_SUCCESS) { Context* context = reinterpret_cast(group.context); @@ -110,11 +111,11 @@ class InterceptQueue { } if (to_submit) { - if (writer != NULL) { - writer(packet, 1); - } else { - proxy->Submit(packet, 1); - } + if (writer != NULL) { + writer(packet, 1); + } else { + proxy->Submit(packet, 1); + } } packet += 1; @@ -133,12 +134,12 @@ class InterceptQueue { on_dispatch_cb_data_ = NULL; } - private: + private: InterceptQueue(const hsa_agent_t& agent, ProxyQueue* proxy) : proxy_(proxy) { agent_info_ = util::HsaRsrcFactory::Instance().GetAgentInfo(agent); } ~InterceptQueue() { ProxyQueue::Destroy(proxy_); } - + static packet_word_t GetHeaderType(const packet_t* packet) { const packet_word_t* header = reinterpret_cast(packet); return (*header >> HSA_PACKET_HEADER_TYPE) & header_type_mask; @@ -156,6 +157,6 @@ class InterceptQueue { const util::AgentInfo* agent_info_; }; -} // namespace rocprofiler +} // namespace rocprofiler -#endif // _SRC_CORE_INTERCEPT_QUEUE_H +#endif // _SRC_CORE_INTERCEPT_QUEUE_H diff --git a/src/core/metrics.h b/src/core/metrics.h index e4d045f584..db7e6ea896 100644 --- a/src/core/metrics.h +++ b/src/core/metrics.h @@ -25,61 +25,69 @@ struct counter_t { typedef std::vector counters_vec_t; class Metric { - public: + public: Metric(const std::string& name) : name_(name) {} std::string GetName() const { return name_; } - virtual void GetCounters(counters_vec_t &vec) const = 0; + virtual void GetCounters(counters_vec_t& vec) const = 0; counters_vec_t GetCounters() const { counters_vec_t counters; GetCounters(counters); return counters; } virtual const xml::Expr* GetExpr() const = 0; - private: + + private: std::string name_; }; class BaseMetric : public Metric { - public: + public: BaseMetric(const std::string& name, const counter_t& counter) : Metric(name), counter_(counter) {} - void GetCounters(counters_vec_t &vec) const { vec.push_back(&counter_); } + void GetCounters(counters_vec_t& vec) const { vec.push_back(&counter_); } const xml::Expr* GetExpr() const { return NULL; } - private: + + private: const counter_t counter_; }; class ExprMetric : public Metric { - public: - ExprMetric(const std::string& name, const counters_vec_t& counters, const xml::Expr* expr) : Metric(name), counters_(counters), expr_(expr) {} - void GetCounters(counters_vec_t& vec) const { vec.insert(vec.end(), counters_.begin(), counters_.end()); } + public: + ExprMetric(const std::string& name, const counters_vec_t& counters, const xml::Expr* expr) + : Metric(name), counters_(counters), expr_(expr) {} + void GetCounters(counters_vec_t& vec) const { + vec.insert(vec.end(), counters_.begin(), counters_.end()); + } const xml::Expr* GetExpr() const { return expr_; } - private: + + private: const counters_vec_t counters_; const xml::Expr* expr_; }; class MetricsDict { - public: + public: typedef std::map cache_t; typedef cache_t::const_iterator const_iterator_t; typedef std::map map_t; typedef std::mutex mutex_t; class ExprCache : public xml::expr_cache_t { - public: + public: ExprCache(const cache_t* cache) : cache_(cache) {} bool Lookup(const std::string& name, std::string& result) const { bool ret = false; auto it = cache_->find(name); if (it != cache_->end()) { ret = true; - const rocprofiler::ExprMetric* expr_metric = dynamic_cast(it->second); + const rocprofiler::ExprMetric* expr_metric = + dynamic_cast(it->second); if (expr_metric) result = expr_metric->GetExpr()->GetStr(); } return ret; } - private: + + private: const cache_t* const cache_; }; @@ -98,7 +106,7 @@ class MetricsDict { return metric; } - private: + private: MetricsDict(const util::AgentInfo* agent_info) : xml_(NULL) { const char* xml_name = getenv("ROCP_METRICS"); if (xml_name != NULL) { @@ -113,23 +121,26 @@ class MetricsDict { auto scope_list = xml_->GetNodes("top." + std::string(scope) + ".metric"); if (!scope_list.empty()) { std::cout << " " << scope_list.size() << " " << scope << " metrics found" << std::endl; - + for (auto node : scope_list) { const std::string name = node->opts["name"]; - if (cache_.find(name) != cache_.end()) EXC_RAISING(HSA_STATUS_ERROR, "ImportMetrics: metrics redefined '" << name << "'"); + if (cache_.find(name) != cache_.end()) + EXC_RAISING(HSA_STATUS_ERROR, "ImportMetrics: metrics redefined '" << name << "'"); const std::string expr_str = node->opts["expr"]; if (expr_str.empty()) { const std::string block_name = node->opts["block"]; const uint32_t event_id = atoi(node->opts["event"].c_str()); - + hsa_ven_amd_aqlprofile_profile_t profile; profile.agent = agent_info->dev_id; hsa_ven_amd_aqlprofile_id_query_t query = {block_name.c_str(), 0, 0}; - hsa_status_t status = util::HsaRsrcFactory::Instance().AqlProfileApi()-> - hsa_ven_amd_aqlprofile_get_info(&profile, HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_ID, &query); + hsa_status_t status = + util::HsaRsrcFactory::Instance().AqlProfileApi()->hsa_ven_amd_aqlprofile_get_info( + &profile, HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_ID, &query); if (status == HSA_STATUS_SUCCESS) { - const hsa_ven_amd_aqlprofile_block_name_t block_id = (hsa_ven_amd_aqlprofile_block_name_t)query.id; + const hsa_ven_amd_aqlprofile_block_name_t block_id = + (hsa_ven_amd_aqlprofile_block_name_t)query.id; if (query.instance_count > 1) { for (unsigned block_index = 0; block_index < query.instance_count; ++block_index) { std::ostringstream os; @@ -142,14 +153,18 @@ class MetricsDict { const counter_t counter = {name, {block_id, 0, event_id}}; cache_[name] = new BaseMetric(name, counter); } - } else AQL_EXC_RAISING(HSA_STATUS_ERROR, "ImportMetrics: bad block name '" << block_name << "'"); + } else + AQL_EXC_RAISING(HSA_STATUS_ERROR, "ImportMetrics: bad block name '" << block_name + << "'"); } else { xml::Expr* expr_obj = new xml::Expr(expr_str, new ExprCache(&cache_)); std::cout << " " << name << " = " << expr_obj->String() << std::endl; counters_vec_t counters_vec; for (const std::string var : expr_obj->GetVars()) { auto it = cache_.find(var); - if (it == cache_.end()) EXC_RAISING(HSA_STATUS_ERROR, "Bad metric '" << name << "', var '" << var << "' is not found"); + if (it == cache_.end()) + EXC_RAISING(HSA_STATUS_ERROR, "Bad metric '" << name << "', var '" << var + << "' is not found"); it->second->GetCounters(counters_vec); } cache_[name] = new ExprMetric(name, counters_vec, expr_obj); diff --git a/src/core/profile.h b/src/core/profile.h index 690d995da4..3847cb565d 100644 --- a/src/core/profile.h +++ b/src/core/profile.h @@ -26,13 +26,13 @@ struct profile_tuple_t { }; typedef std::vector profile_vector_t; -template class ConfigBase {}; +template class ConfigBase {}; -template<> class ConfigBase { - public: - ConfigBase(profile_t *profile) : profile_(profile) {} +template <> class ConfigBase { + public: + ConfigBase(profile_t* profile) : profile_(profile) {} - protected: + protected: void* Array() { return const_cast(profile_->events); } unsigned Count() const { return profile_->event_count; } void Set(event_t* events, const unsigned& count) { @@ -42,11 +42,11 @@ template<> class ConfigBase { profile_t* profile_; }; -template<> class ConfigBase { - public: - ConfigBase(profile_t *profile) : profile_(profile) {} +template <> class ConfigBase { + public: + ConfigBase(profile_t* profile) : profile_(profile) {} - protected: + protected: void* Array() { return const_cast(profile_->parameters); } unsigned Count() const { return profile_->parameter_count; } void Set(parameter_t* parameters, const unsigned& count) { @@ -56,23 +56,25 @@ template<> class ConfigBase { profile_t* profile_; }; -template -class Config : protected ConfigBase { +template class Config : protected ConfigBase { typedef ConfigBase Parent; - public: - Config(profile_t *profile) : Parent(profile) {} + + public: + Config(profile_t* profile) : Parent(profile) {} void Insert(const Item& item) { auto count = Parent::Count(); count += 1; - Item* array = reinterpret_cast(realloc(const_cast(Parent::Array()), count * sizeof(Item))); + Item* array = + reinterpret_cast(realloc(const_cast(Parent::Array()), count * sizeof(Item))); array[count - 1] = item; Parent::Set(array, count); } }; class Profile { - public: - static const uint32_t LEGACY_SLOT_SIZE_PKT = HSA_VEN_AMD_AQLPROFILE_LEGACY_PM4_PACKET_SIZE / sizeof(packet_t); + public: + static const uint32_t LEGACY_SLOT_SIZE_PKT = + HSA_VEN_AMD_AQLPROFILE_LEGACY_PM4_PACKET_SIZE / sizeof(packet_t); Profile(const util::AgentInfo* agent_info) : agent_info_(agent_info) { profile_ = {}; @@ -86,9 +88,7 @@ class Profile { free(const_cast(profile_.parameters)); } - virtual void Insert(const profile_info_t& info) { - info_vector_.push_back(info.rinfo); - } + virtual void Insert(const profile_info_t& info) { info_vector_.push_back(info.rinfo); } hsa_status_t Finalize(pkt_vector_t& start_vector, pkt_vector_t& stop_vector) { hsa_status_t status = HSA_STATUS_SUCCESS; @@ -124,10 +124,14 @@ class Profile { start_vector.insert(start_vector.end(), LEGACY_SLOT_SIZE_PKT, packet_t{}); stop_vector.insert(stop_vector.end(), LEGACY_SLOT_SIZE_PKT, packet_t{}); - status = api->hsa_ven_amd_aqlprofile_legacy_get_pm4(&start, reinterpret_cast(&start_vector[start_index])); - if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "hsa_ven_amd_aqlprofile_legacy_get_pm4"); - status = api->hsa_ven_amd_aqlprofile_legacy_get_pm4(&stop, reinterpret_cast(&stop_vector[stop_index])); - if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "hsa_ven_amd_aqlprofile_legacy_get_pm4"); + status = api->hsa_ven_amd_aqlprofile_legacy_get_pm4( + &start, reinterpret_cast(&start_vector[start_index])); + if (status != HSA_STATUS_SUCCESS) + AQL_EXC_RAISING(status, "hsa_ven_amd_aqlprofile_legacy_get_pm4"); + status = api->hsa_ven_amd_aqlprofile_legacy_get_pm4( + &stop, reinterpret_cast(&stop_vector[stop_index])); + if (status != HSA_STATUS_SUCCESS) + AQL_EXC_RAISING(status, "hsa_ven_amd_aqlprofile_legacy_get_pm4"); } else { start_vector.push_back(start); stop_vector.push_back(stop); @@ -145,7 +149,7 @@ class Profile { bool Empty() const { return info_vector_.empty(); } - protected: + protected: virtual hsa_status_t Allocate(util::HsaRsrcFactory* rsrc) = 0; const util::AgentInfo* const agent_info_; @@ -156,7 +160,7 @@ class Profile { }; class PmcProfile : public Profile { - public: + public: PmcProfile(const util::AgentInfo* agent_info) : Profile(agent_info) { profile_.type = HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC; } @@ -167,14 +171,16 @@ class PmcProfile : public Profile { } hsa_status_t Allocate(util::HsaRsrcFactory* rsrc) { - profile_.command_buffer.ptr = rsrc->AllocateSysMemory(agent_info_, profile_.command_buffer.size); + profile_.command_buffer.ptr = + rsrc->AllocateSysMemory(agent_info_, profile_.command_buffer.size); profile_.output_buffer.ptr = rsrc->AllocateSysMemory(agent_info_, profile_.output_buffer.size); - return (profile_.command_buffer.ptr && profile_.output_buffer.ptr) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR; + return (profile_.command_buffer.ptr && profile_.output_buffer.ptr) ? HSA_STATUS_SUCCESS + : HSA_STATUS_ERROR; } }; class SqttProfile : public Profile { - public: + public: static const uint32_t output_buffer_size = 0x2000000; // 32M SqttProfile(const util::AgentInfo* agent_info) : Profile(agent_info) { @@ -197,9 +203,12 @@ class SqttProfile : public Profile { hsa_status_t Allocate(util::HsaRsrcFactory* rsrc) { profile_.output_buffer.size = output_buffer_size; - profile_.command_buffer.ptr = rsrc->AllocateSysMemory(agent_info_, profile_.command_buffer.size); - profile_.output_buffer.ptr = rsrc->AllocateLocalMemory(agent_info_, profile_.output_buffer.size); - return (profile_.command_buffer.ptr && profile_.output_buffer.ptr) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR; + profile_.command_buffer.ptr = + rsrc->AllocateSysMemory(agent_info_, profile_.command_buffer.size); + profile_.output_buffer.ptr = + rsrc->AllocateLocalMemory(agent_info_, profile_.output_buffer.size); + return (profile_.command_buffer.ptr && profile_.output_buffer.ptr) ? HSA_STATUS_SUCCESS + : HSA_STATUS_ERROR; } }; diff --git a/src/core/proxy_queue.cpp b/src/core/proxy_queue.cpp index 166d84be9e..5f76980d51 100644 --- a/src/core/proxy_queue.cpp +++ b/src/core/proxy_queue.cpp @@ -10,25 +10,22 @@ void ProxyQueue::HsaIntercept(HsaApiTable* table) { if (rocp_type_) SimpleProxyQueue::HsaIntercept(table); } -ProxyQueue* ProxyQueue::Create( - hsa_agent_t agent, - uint32_t size, - hsa_queue_type32_t type, - void (*callback)(hsa_status_t status, hsa_queue_t *source, void *data), - void *data, - uint32_t private_segment_size, - uint32_t group_segment_size, - hsa_queue_t **queue, - hsa_status_t* status) -{ +ProxyQueue* ProxyQueue::Create(hsa_agent_t agent, uint32_t size, hsa_queue_type32_t type, + void (*callback)(hsa_status_t status, hsa_queue_t* source, + void* data), + void* data, uint32_t private_segment_size, + uint32_t group_segment_size, hsa_queue_t** queue, + hsa_status_t* status) { hsa_status_t suc = HSA_STATUS_ERROR; #ifdef ROCP_HSA_PROXY - ProxyQueue* instance = (rocp_type_) ? (ProxyQueue*) new SimpleProxyQueue() : (ProxyQueue*) new HsaProxyQueue(); + ProxyQueue* instance = + (rocp_type_) ? (ProxyQueue*)new SimpleProxyQueue() : (ProxyQueue*)new HsaProxyQueue(); #else ProxyQueue* instance = new SimpleProxyQueue(); #endif if (instance != NULL) { - const auto suc = instance->Init(agent, size, type, callback, data, private_segment_size, group_segment_size, queue); + const auto suc = instance->Init(agent, size, type, callback, data, private_segment_size, + group_segment_size, queue); if (suc != HSA_STATUS_SUCCESS) { delete instance; instance = NULL; @@ -45,4 +42,4 @@ hsa_status_t ProxyQueue::Destroy(const ProxyQueue* obj) { } bool ProxyQueue::rocp_type_ = false; -} // namespace rocprofiler +} // namespace rocprofiler diff --git a/src/core/proxy_queue.h b/src/core/proxy_queue.h index eb97402eb5..12580245cf 100644 --- a/src/core/proxy_queue.h +++ b/src/core/proxy_queue.h @@ -14,10 +14,11 @@ struct HsaApiTable; namespace rocprofiler { typedef void (*hsa_amd_queue_intercept_packet_writer)(const void* packets, uint64_t count); -typedef void (*on_submit_cb_t)(const void* packet, uint64_t count, uint64_t que_idx, void* data, hsa_amd_queue_intercept_packet_writer writer); +typedef void (*on_submit_cb_t)(const void* packet, uint64_t count, uint64_t que_idx, void* data, + hsa_amd_queue_intercept_packet_writer writer); class ProxyQueue : public Queue { - public: + public: static void InitFactory() { #ifdef ROCP_HSA_PROXY const char* type = getenv("ROCP_PROXY_QUEUE"); @@ -31,40 +32,28 @@ class ProxyQueue : public Queue { static void HsaIntercept(HsaApiTable* table); - static ProxyQueue* Create( - hsa_agent_t agent, - uint32_t size, - hsa_queue_type32_t type, - void (*callback)(hsa_status_t status, hsa_queue_t *source, void *data), - void *data, - uint32_t private_segment_size, - uint32_t group_segment_size, - hsa_queue_t **queue, - hsa_status_t* status); + static ProxyQueue* Create(hsa_agent_t agent, uint32_t size, hsa_queue_type32_t type, + void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data), + void* data, uint32_t private_segment_size, uint32_t group_segment_size, + hsa_queue_t** queue, hsa_status_t* status); static hsa_status_t Destroy(const ProxyQueue* obj); - virtual hsa_status_t Init( - hsa_agent_t agent, - uint32_t size, - hsa_queue_type32_t type, - void (*callback)(hsa_status_t status, hsa_queue_t *source, void *data), - void *data, - uint32_t private_segment_size, - uint32_t group_segment_size, - hsa_queue_t **queue - ) = 0; + virtual hsa_status_t Init(hsa_agent_t agent, uint32_t size, hsa_queue_type32_t type, + void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data), + void* data, uint32_t private_segment_size, uint32_t group_segment_size, + hsa_queue_t** queue) = 0; virtual hsa_status_t Cleanup() const = 0; virtual hsa_status_t SetInterceptCB(on_submit_cb_t on_submit_cb, void* data) = 0; virtual void Submit(const packet_t* packet) = 0; - protected: - virtual ~ProxyQueue() {}; + protected: + virtual ~ProxyQueue(){}; - private: + private: static bool rocp_type_; }; -} // namespace rocprofiler +} // namespace rocprofiler -#endif // _SRC_CORE_PROXY_QUEUE_H +#endif // _SRC_CORE_PROXY_QUEUE_H diff --git a/src/core/queue.h b/src/core/queue.h index 9d7393fdaf..567ccd9359 100644 --- a/src/core/queue.h +++ b/src/core/queue.h @@ -6,7 +6,7 @@ namespace rocprofiler { class Queue { - public: + public: Queue() {} virtual ~Queue() {} virtual void Submit(const packet_t* packet) = 0; @@ -15,6 +15,6 @@ class Queue { } }; -} // namespace rocprofiler +} // namespace rocprofiler -#endif // _SRC_CORE_QUEUE_H +#endif // _SRC_CORE_QUEUE_H diff --git a/src/core/rocprofiler.cpp b/src/core/rocprofiler.cpp index 60aa325b98..58707ce135 100644 --- a/src/core/rocprofiler.cpp +++ b/src/core/rocprofiler.cpp @@ -18,15 +18,15 @@ #define CONSTRUCTOR_API __attribute__((constructor)) #define DESTRUCTOR_API __attribute__((destructor)) -#define API_METHOD_PREFIX \ - hsa_status_t status = HSA_STATUS_SUCCESS; \ +#define API_METHOD_PREFIX \ + hsa_status_t status = HSA_STATUS_SUCCESS; \ try { - -#define API_METHOD_SUFFIX \ - } catch (std::exception& e) { \ - ERR_LOGGING(__FUNCTION__ << "(), " << e.what()); \ - status = rocprofiler::GetExcStatus(e); \ - } \ +#define API_METHOD_SUFFIX \ + } \ + catch (std::exception & e) { \ + ERR_LOGGING(__FUNCTION__ << "(), " << e.what()); \ + status = rocprofiler::GetExcStatus(e); \ + } \ return status; namespace rocprofiler { @@ -80,12 +80,12 @@ DESTRUCTOR_API void destructor() { hsa_status_t GetExcStatus(const std::exception& e) { const util::exception* rocprofiler_exc_ptr = dynamic_cast(&e); - return (rocprofiler_exc_ptr) ? static_cast(rocprofiler_exc_ptr->status()) : HSA_STATUS_ERROR; + return (rocprofiler_exc_ptr) ? static_cast(rocprofiler_exc_ptr->status()) + : HSA_STATUS_ERROR; } util::Logger::mutex_t util::Logger::mutex_; util::Logger* util::Logger::instance_ = NULL; - } extern "C" { @@ -98,14 +98,9 @@ PUBLIC_API hsa_status_t rocprofiler_error_string(const char** str) { } // Create new profiling context -PUBLIC_API hsa_status_t rocprofiler_open( - hsa_agent_t agent, - rocprofiler_feature_t* info, - uint32_t info_count, - rocprofiler_t** handle, - uint32_t mode, - rocprofiler_properties_t* properties) -{ +PUBLIC_API hsa_status_t rocprofiler_open(hsa_agent_t agent, rocprofiler_feature_t* info, + uint32_t info_count, rocprofiler_t** handle, uint32_t mode, + rocprofiler_properties_t* properties) { API_METHOD_PREFIX rocprofiler::util::HsaRsrcFactory* hsa_rsrc = &rocprofiler::util::HsaRsrcFactory::Instance(); const rocprofiler::util::AgentInfo* agent_info = hsa_rsrc->GetAgentInfo(agent); @@ -117,7 +112,8 @@ PUBLIC_API hsa_status_t rocprofiler_open( if (mode != 0) { if (mode & ROCPROFILER_MODE_STANDALONE) { if (mode & ROCPROFILER_MODE_CREATEQUEUE) { - if (hsa_rsrc->CreateQueue(agent_info, properties->queue_depth, &(properties->queue)) == false) { + if (hsa_rsrc->CreateQueue(agent_info, properties->queue_depth, &(properties->queue)) == + false) { EXC_RAISING(HSA_STATUS_ERROR, "CreateQueue() failed"); } } @@ -127,13 +123,13 @@ PUBLIC_API hsa_status_t rocprofiler_open( } } - *handle = new rocprofiler::Context(agent_info, queue, info, info_count, properties->handler, properties->handler_arg); + *handle = new rocprofiler::Context(agent_info, queue, info, info_count, properties->handler, + properties->handler_arg); API_METHOD_SUFFIX } // Delete profiling info -PUBLIC_API hsa_status_t rocprofiler_close(rocprofiler_t* handle) -{ +PUBLIC_API hsa_status_t rocprofiler_close(rocprofiler_t* handle) { API_METHOD_PREFIX rocprofiler::Context* context = reinterpret_cast(handle); if (context) delete context; @@ -141,8 +137,7 @@ PUBLIC_API hsa_status_t rocprofiler_close(rocprofiler_t* handle) } // Reset context -PUBLIC_API hsa_status_t rocprofiler_reset(rocprofiler_t* handle, uint32_t group_index) -{ +PUBLIC_API hsa_status_t rocprofiler_reset(rocprofiler_t* handle, uint32_t group_index) { API_METHOD_PREFIX rocprofiler::Context* context = reinterpret_cast(handle); context->Reset(group_index); @@ -150,7 +145,8 @@ PUBLIC_API hsa_status_t rocprofiler_reset(rocprofiler_t* handle, uint32_t group_ } // Get profiling group count -PUBLIC_API hsa_status_t rocprofiler_group_count(const rocprofiler_t* handle, uint32_t* group_count) { +PUBLIC_API hsa_status_t rocprofiler_group_count(const rocprofiler_t* handle, + uint32_t* group_count) { API_METHOD_PREFIX const rocprofiler::Context* context = reinterpret_cast(handle); *group_count = context->GetGroupCount(); @@ -158,7 +154,8 @@ PUBLIC_API hsa_status_t rocprofiler_group_count(const rocprofiler_t* handle, uin } // Get profiling group for a given group index -PUBLIC_API hsa_status_t rocprofiler_get_group(rocprofiler_t* handle, uint32_t group_index, rocprofiler_group_t* group) { +PUBLIC_API hsa_status_t rocprofiler_get_group(rocprofiler_t* handle, uint32_t group_index, + rocprofiler_group_t* group) { API_METHOD_PREFIX rocprofiler::Context* context = reinterpret_cast(handle); *group = context->GetGroupInfo(group_index); @@ -220,7 +217,8 @@ PUBLIC_API hsa_status_t rocprofiler_get_metrics(const rocprofiler_t* handle) { } // Set kernel dispatch observer -PUBLIC_API hsa_status_t rocprofiler_set_dispatch_callback(rocprofiler_callback_t callback, void* data) { +PUBLIC_API hsa_status_t rocprofiler_set_dispatch_callback(rocprofiler_callback_t callback, + void* data) { API_METHOD_PREFIX rocprofiler::InterceptQueue::SetDispatchCB(callback, data); API_METHOD_SUFFIX @@ -234,18 +232,16 @@ PUBLIC_API hsa_status_t rocprofiler_remove_dispatch_callback() { } // Method for iterating the events output data -PUBLIC_API hsa_status_t rocprofiler_iterate_trace_data(rocprofiler_t* handle, hsa_ven_amd_aqlprofile_data_callback_t callback, void* data) { +PUBLIC_API hsa_status_t rocprofiler_iterate_trace_data( + rocprofiler_t* handle, hsa_ven_amd_aqlprofile_data_callback_t callback, void* data) { API_METHOD_PREFIX rocprofiler::Context* context = reinterpret_cast(handle); context->IterateTraceData(callback, data); API_METHOD_SUFFIX } -PUBLIC_API bool OnLoad( - HsaApiTable* table, - uint64_t runtime_version, - uint64_t failed_tool_count, - const char* const * failed_tool_names) { +PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count, + const char* const* failed_tool_names) { rocprofiler::SaveHsaApi(table); rocprofiler::ProxyQueue::InitFactory(); rocprofiler::InterceptQueue::SetTool(getenv("ROCP_TOOL_LIB")); @@ -257,8 +253,6 @@ PUBLIC_API bool OnLoad( return true; } -PUBLIC_API void OnUnload() { - rocprofiler::RestoreHsaApi(); -} +PUBLIC_API void OnUnload() { rocprofiler::RestoreHsaApi(); } -} // extern "C" +} // extern "C" diff --git a/src/core/simple_proxy_queue.cpp b/src/core/simple_proxy_queue.cpp index a4dadf4e5e..e138a042f6 100644 --- a/src/core/simple_proxy_queue.cpp +++ b/src/core/simple_proxy_queue.cpp @@ -8,4 +8,4 @@ void SimpleProxyQueue::HsaIntercept(HsaApiTable* table) { } std::map SimpleProxyQueue::queue_map_; -} // namespace rocprofiler +} // namespace rocprofiler diff --git a/src/core/simple_proxy_queue.h b/src/core/simple_proxy_queue.h index 9a9f6221c9..bdfbcf6cb7 100644 --- a/src/core/simple_proxy_queue.h +++ b/src/core/simple_proxy_queue.h @@ -20,13 +20,10 @@ typedef decltype(hsa_signal_t::handle) signal_handle_t; class SimpleProxyQueue : public ProxyQueue { - public: + public: static void HsaIntercept(HsaApiTable* table); - - static void SignalStore( - hsa_signal_t signal, - hsa_signal_value_t que_idx) - { + + static void SignalStore(hsa_signal_t signal, hsa_signal_value_t que_idx) { auto it = queue_map_.find(signal.handle); if (it != queue_map_.end()) { SimpleProxyQueue* instance = it->second; @@ -37,17 +34,17 @@ class SimpleProxyQueue : public ProxyQueue { // Submited packet const uint32_t idx = j & instance->queue_mask_; packet_t* packet = reinterpret_cast(instance->queue_->base_address) + idx; - if (instance->on_submit_cb_ != NULL) instance->on_submit_cb_(packet, 1, j, instance->on_submit_cb_data_, NULL); - else instance->Submit(packet); + if (instance->on_submit_cb_ != NULL) + instance->on_submit_cb_(packet, 1, j, instance->on_submit_cb_data_, NULL); + else + instance->Submit(packet); } } else { hsa_signal_store_relaxed_fn(signal, que_idx); } } - - static uint64_t LoadIndex( - const hsa_queue_t *queue) - { + + static uint64_t LoadIndex(const hsa_queue_t* queue) { uint64_t index = 0; auto it = queue_map_.find(queue->doorbell_signal.handle); if (it != queue_map_.end()) { @@ -60,10 +57,7 @@ class SimpleProxyQueue : public ProxyQueue { return index; } - static void StoreIndex( - const hsa_queue_t *queue, - uint64_t value) - { + static void StoreIndex(const hsa_queue_t* queue, uint64_t value) { auto it = queue_map_.find(queue->doorbell_signal.handle); if (it != queue_map_.end()) { SimpleProxyQueue* instance = it->second; @@ -107,31 +101,24 @@ class SimpleProxyQueue : public ProxyQueue { hsa_signal_store_relaxed_fn(doorbell_signal_, que_idx); } - SimpleProxyQueue() : - agent_info_(NULL), - queue_(NULL), - base_address_(NULL), - doorbell_signal_({}), - queue_index_(0), - queue_mask_(0), - submit_index_(0), - on_submit_cb_(0), - on_submit_cb_data_(0) - {} + SimpleProxyQueue() + : agent_info_(NULL), + queue_(NULL), + base_address_(NULL), + doorbell_signal_({}), + queue_index_(0), + queue_mask_(0), + submit_index_(0), + on_submit_cb_(0), + on_submit_cb_data_(0) {} ~SimpleProxyQueue() {} - private: - hsa_status_t Init( - hsa_agent_t agent, - uint32_t size, - hsa_queue_type32_t type, - void (*callback)(hsa_status_t status, hsa_queue_t *source, void *data), - void *data, - uint32_t private_segment_size, - uint32_t group_segment_size, - hsa_queue_t **queue) - { + private: + hsa_status_t Init(hsa_agent_t agent, uint32_t size, hsa_queue_type32_t type, + void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data), + void* data, uint32_t private_segment_size, uint32_t group_segment_size, + hsa_queue_t** queue) { auto status = Init(agent, size); *queue = queue_; return status; @@ -142,13 +129,14 @@ class SimpleProxyQueue : public ProxyQueue { agent_info_ = util::HsaRsrcFactory::Instance().GetAgentInfo(agent); if (agent_info_ != NULL) { if (agent_info_->dev_type == HSA_DEVICE_TYPE_GPU) { - status = hsa_queue_create_fn(agent, size, HSA_QUEUE_TYPE_MULTI, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue_); + status = hsa_queue_create_fn(agent, size, HSA_QUEUE_TYPE_MULTI, NULL, NULL, UINT32_MAX, + UINT32_MAX, &queue_); if (status == HSA_STATUS_SUCCESS) { base_address_ = reinterpret_cast(queue_->base_address); doorbell_signal_ = queue_->doorbell_signal; data_array_ = calloc(size + 1, sizeof(packet_t)); uintptr_t addr = (uintptr_t)data_array_; - queue_->base_address = (void*) ((addr + align_mask_) & ~align_mask_); + queue_->base_address = (void*)((addr + align_mask_) & ~align_mask_); status = hsa_signal_create(1, 0, NULL, &(queue_->doorbell_signal)); queue_mask_ = size - 1; queue_map_[queue_->doorbell_signal.handle] = this; @@ -182,6 +170,6 @@ class SimpleProxyQueue : public ProxyQueue { void* data_array_; }; -} // namespace rocprofiler +} // namespace rocprofiler -#endif // _SRC_CORE_SIMPLE_PROXY_QUEUE_H +#endif // _SRC_CORE_SIMPLE_PROXY_QUEUE_H diff --git a/src/util/exception.h b/src/util/exception.h index d539a6734f..7dc7884fbd 100644 --- a/src/util/exception.h +++ b/src/util/exception.h @@ -7,24 +7,27 @@ #include #include -#define EXC_RAISING(error, stream) { \ - std::ostringstream oss; oss << __FUNCTION__ << "(), " << stream; \ - throw rocprofiler::util::exception(error, oss.str()); \ -} +#define EXC_RAISING(error, stream) \ + { \ + std::ostringstream oss; \ + oss << __FUNCTION__ << "(), " << stream; \ + throw rocprofiler::util::exception(error, oss.str()); \ + } -#define AQL_EXC_RAISING(error, stream) { \ - const char* error_string = NULL; \ - const rocprofiler::pfn_t* api = util::HsaRsrcFactory::Instance().AqlProfileApi(); \ - api->hsa_ven_amd_aqlprofile_error_string(&error_string); \ - EXC_RAISING(error, stream << ", " << error_string); \ -} +#define AQL_EXC_RAISING(error, stream) \ + { \ + const char* error_string = NULL; \ + const rocprofiler::pfn_t* api = util::HsaRsrcFactory::Instance().AqlProfileApi(); \ + api->hsa_ven_amd_aqlprofile_error_string(&error_string); \ + EXC_RAISING(error, stream << ", " << error_string); \ + } namespace rocprofiler { namespace util { class exception : public std::exception { public: - explicit exception(const uint32_t &status, const std::string& msg) : status_(status), str_(msg) {} + explicit exception(const uint32_t& status, const std::string& msg) : status_(status), str_(msg) {} const char* what() const throw() { return str_.c_str(); } uint32_t status() const throw() { return status_; } diff --git a/src/util/hsa_rsrc_factory.cpp b/src/util/hsa_rsrc_factory.cpp index 18ce098ce2..022021e99b 100644 --- a/src/util/hsa_rsrc_factory.cpp +++ b/src/util/hsa_rsrc_factory.cpp @@ -124,11 +124,11 @@ const AgentInfo* HsaRsrcFactory::AddAgentInfo(const hsa_agent_t agent) { hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &agent_info->max_queue_size); agent_info->profile = hsa_profile_t(108); hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &agent_info->profile); - + // Initialize memory regions to zero agent_info->kernarg_region.handle = 0; agent_info->coarse_region.handle = 0; - + // Find and Bind Memory regions of the Gpu agent hsa_agent_iterate_regions(agent, FindMemRegionsCallback, agent_info); @@ -216,7 +216,8 @@ bool HsaRsrcFactory::GetCpuAgentInfo(uint32_t idx, const AgentInfo** agent_info) // // @return bool true if successful, false otherwise // -bool HsaRsrcFactory::CreateQueue(const AgentInfo* agent_info, uint32_t num_pkts, hsa_queue_t** queue) { +bool HsaRsrcFactory::CreateQueue(const AgentInfo* agent_info, uint32_t num_pkts, + hsa_queue_t** queue) { hsa_status_t status; status = hsa_queue_create(agent_info->dev_id, num_pkts, HSA_QUEUE_TYPE_MULTI, NULL, NULL, UINT32_MAX, UINT32_MAX, queue); @@ -388,5 +389,5 @@ bool HsaRsrcFactory::PrintGpuAgents(const std::string& header) { HsaRsrcFactory* HsaRsrcFactory::instance_ = NULL; HsaRsrcFactory::mutex_t HsaRsrcFactory::mutex_; -} // namespace util -} // namespace rocprofiler +} // namespace util +} // namespace rocprofiler diff --git a/src/util/hsa_rsrc_factory.h b/src/util/hsa_rsrc_factory.h index 8b4e2227a0..d1de693d67 100644 --- a/src/util/hsa_rsrc_factory.h +++ b/src/util/hsa_rsrc_factory.h @@ -256,7 +256,7 @@ class HsaRsrcFactory { hsa_ven_amd_aqlprofile_1_00_pfn_t aqlprofile_api_; }; -} // namespace util -} // namespace rocprofiler +} // namespace util +} // namespace rocprofiler #endif // SRC_UTIL_HSA_RSRC_FACTORY_H_ diff --git a/src/util/logger.h b/src/util/logger.h index 1688dd735f..812fcc68d1 100644 --- a/src/util/logger.h +++ b/src/util/logger.h @@ -134,30 +134,26 @@ class Logger { } // namespace util } // namespace rocprofiler -#define ERR_LOGGING(stream) { \ - rocprofiler::util::Logger::Instance() \ - << "error: " \ - << rocprofiler::util::Logger::begm \ - << stream \ - << rocprofiler::util::Logger::endl; \ -} +#define ERR_LOGGING(stream) \ + { \ + rocprofiler::util::Logger::Instance() << "error: " << rocprofiler::util::Logger::begm \ + << stream << rocprofiler::util::Logger::endl; \ + } -#define INFO_LOGGING(stream) { \ - rocprofiler::util::Logger::Instance() \ - << "info: " \ - << rocprofiler::util::Logger::begm \ - << stream \ - << rocprofiler::util::Logger::endl; \ -} +#define INFO_LOGGING(stream) \ + { \ + rocprofiler::util::Logger::Instance() << "info: " << rocprofiler::util::Logger::begm << stream \ + << rocprofiler::util::Logger::endl; \ + } #ifdef DEBUG -# define DBG_LOGGING(stream) { \ - rocprofiler::util::Logger::Instance() << rocprofiler::util::Logger::begm \ - << "debug: \"" << stream << "\"" << \ - << " in " << __FUNCTION__ \ - << " at " << __FILE__ << " line " << __LINE__ \ - << rocprofiler::util::Logger::endl; \ -} +#define DBG_LOGGING(stream) \ + { \ + rocprofiler::util::Logger::Instance() << rocprofiler::util::Logger::begm << "debug: \"" \ + << stream << "\"" < < < < \ + " in " << __FUNCTION__ << " at " << __FILE__ << " line " << __LINE__ \ + << rocprofiler::util::Logger::endl; \ + } #endif #endif // SRC_UTIL_LOGGER_H_ diff --git a/src/xml/expr.h b/src/xml/expr.h index 1f450d186e..385975975a 100644 --- a/src/xml/expr.h +++ b/src/xml/expr.h @@ -13,6 +13,7 @@ class exception_t : public std::exception { public: explicit exception_t(const std::string& msg) : str_(msg) {} const char* what() const throw() { return str_.c_str(); } + protected: const std::string str_; }; @@ -21,9 +22,8 @@ typedef uint64_t args_t; typedef std::map args_map_t; class Expr; -template -class any_cache_t { - public: +template class any_cache_t { + public: virtual ~any_cache_t() {} virtual bool Lookup(const std::string& name, T& result) const = 0; }; @@ -32,9 +32,10 @@ typedef any_cache_t expr_cache_t; typedef any_cache_t args_cache_t; class bin_expr_t { - public: - static const bin_expr_t* CreateExpr(const bin_expr_t* arg1, const bin_expr_t* arg2, const char op); - static const bin_expr_t* CreateArg(Expr *obj, const std::string str); + public: + static const bin_expr_t* CreateExpr(const bin_expr_t* arg1, const bin_expr_t* arg2, + const char op); + static const bin_expr_t* CreateArg(Expr* obj, const std::string str); bin_expr_t() : arg1_(NULL), arg2_(NULL) {} bin_expr_t(const bin_expr_t* arg1, const bin_expr_t* arg2) : arg1_(arg1), arg2_(arg2) {} @@ -44,37 +45,34 @@ class bin_expr_t { std::string String() const { std::string str; - if (arg1_) { str = "(" + arg1_->String() + " " + Symbol() + " " + arg2_->String() + ")"; } - else str = Symbol(); + if (arg1_) { + str = "(" + arg1_->String() + " " + Symbol() + " " + arg2_->String() + ")"; + } else + str = Symbol(); return str; } - protected: + protected: const bin_expr_t* arg1_; const bin_expr_t* arg2_; }; class Expr { - public: - explicit Expr(const std::string& expr, const expr_cache_t* cache) : - expr_(expr), - pos_(0), - sub_count_(0), - cache_(cache) - { + public: + explicit Expr(const std::string& expr, const expr_cache_t* cache) + : expr_(expr), pos_(0), sub_count_(0), cache_(cache) { sub_vec_ = new std::vector; var_vec_ = new std::vector; tree_ = ParseExpr(); } - explicit Expr(const std::string& expr, const Expr* obj) : - expr_(expr), - pos_(0), - sub_count_(0), - cache_(obj->cache_), - sub_vec_(obj->sub_vec_), - var_vec_(obj->var_vec_) - { + explicit Expr(const std::string& expr, const Expr* obj) + : expr_(expr), + pos_(0), + sub_count_(0), + cache_(obj->cache_), + sub_vec_(obj->sub_vec_), + var_vec_(obj->var_vec_) { sub_vec_->push_back(this); tree_ = ParseExpr(); if (!SubCheck()) throw exception_t("expr '" + expr_ + "', bad parenthesis count"); @@ -95,13 +93,15 @@ class Expr { std::string Lookup(const std::string& str) const { std::string result; - if (cache_ && !(cache_->Lookup(str, result))) throw exception_t("expr '" + expr_ + "', lookup '" + str + "' failed"); + if (cache_ && !(cache_->Lookup(str, result))) + throw exception_t("expr '" + expr_ + "', lookup '" + str + "' failed"); return result; } void AddVar(const std::string& str) { bool found = false; - for (std::string s : *var_vec_) if (s == str) found = true; + for (std::string s : *var_vec_) + if (s == str) found = true; if (!found) var_vec_->push_back(str); } @@ -109,7 +109,7 @@ class Expr { std::string String() const { return tree_->String(); } - private: + private: const bin_expr_t* ParseExpr() { const bin_expr_t* expr = ParseArg(); while (!IsEnd()) { @@ -119,7 +119,8 @@ class Expr { Next(); SubClose(); break; - } if (IsSymb('*') || IsSymb('/')) { + } + if (IsSymb('*') || IsSymb('/')) { Next(); second_arg = ParseArg(); expr = bin_expr_t::CreateExpr(expr, second_arg, op); @@ -173,7 +174,7 @@ class Expr { } ++i; } - end: + end: return i; } std::string CutTill(const unsigned pos) { @@ -192,39 +193,44 @@ class Expr { }; class add_expr_t : public bin_expr_t { - public: + public: add_expr_t(const bin_expr_t* arg1, const bin_expr_t* arg2) : bin_expr_t(arg1, arg2) {} args_t Eval(const args_cache_t& args) const { return (arg1_->Eval(args) + arg2_->Eval(args)); } std::string Symbol() const { return "+"; } }; class sub_expr_t : public bin_expr_t { - public: + public: sub_expr_t(const bin_expr_t* arg1, const bin_expr_t* arg2) : bin_expr_t(arg1, arg2) {} args_t Eval(const args_cache_t& args) const { return (arg1_->Eval(args) - arg2_->Eval(args)); } std::string Symbol() const { return "-"; } }; class mul_expr_t : public bin_expr_t { - public: + public: mul_expr_t(const bin_expr_t* arg1, const bin_expr_t* arg2) : bin_expr_t(arg1, arg2) {} args_t Eval(const args_cache_t& args) const { return (arg1_->Eval(args) * arg2_->Eval(args)); } std::string Symbol() const { return "*"; } }; class div_expr_t : public bin_expr_t { - public: + public: div_expr_t(const bin_expr_t* arg1, const bin_expr_t* arg2) : bin_expr_t(arg1, arg2) {} args_t Eval(const args_cache_t& args) const { return (arg1_->Eval(args) / arg2_->Eval(args)); } std::string Symbol() const { return "/"; } }; class const_expr_t : public bin_expr_t { - public: + public: const_expr_t(const args_t value) : value_(value) {} args_t Eval(const args_cache_t&) const { return value_; } - std::string Symbol() const { std::ostringstream os; os << value_; return os.str(); } - private: + std::string Symbol() const { + std::ostringstream os; + os << value_; + return os.str(); + } + + private: const args_t value_; }; class var_expr_t : public bin_expr_t { - public: + public: var_expr_t(const std::string name) : name_(name) {} args_t Eval(const args_cache_t& args) const { args_t result = 0; @@ -232,11 +238,13 @@ class var_expr_t : public bin_expr_t { return result; } std::string Symbol() const { return name_; } - private: + + private: const std::string name_; }; -inline const bin_expr_t* bin_expr_t::CreateExpr(const bin_expr_t* arg1, const bin_expr_t* arg2, const char op) { +inline const bin_expr_t* bin_expr_t::CreateExpr(const bin_expr_t* arg1, const bin_expr_t* arg2, + const char op) { const bin_expr_t* expr = NULL; switch (op) { case '+': @@ -274,6 +282,6 @@ inline const bin_expr_t* bin_expr_t::CreateArg(Expr* obj, const std::string str) return arg; } -} // namespace xml +} // namespace xml -#endif // _SRC_XML_EXPR_H +#endif // _SRC_XML_EXPR_H diff --git a/src/xml/xml.h b/src/xml/xml.h index f1a8410ab9..c790ea618b 100644 --- a/src/xml/xml.h +++ b/src/xml/xml.h @@ -16,7 +16,7 @@ namespace xml { class Xml { - public: + public: typedef std::vector token_t; struct level_t { std::string tag; @@ -25,24 +25,20 @@ class Xml { }; typedef std::vector nodes_vec_t; - enum { - DECL_STATE, - BODY_STATE - }; + enum { DECL_STATE, BODY_STATE }; - Xml(const char* file_name) : - file_name_(file_name), - file_line_(0), - data_size_(0), - index_(0), - state_(BODY_STATE), - level_(NULL), - comment_(false) - { + Xml(const char* file_name) + : file_name_(file_name), + file_line_(0), + data_size_(0), + index_(0), + state_(BODY_STATE), + level_(NULL), + comment_(false) { AddLevel("top"); fd_ = open(file_name, O_RDONLY); - if (fd_ == -1) { + if (fd_ == -1) { std::cout << "XML file not found: " << file_name << std::endl; return; } @@ -54,11 +50,11 @@ class Xml { // End of file if (token.size() == 0) break; -// token_t token1 = token; -// token1.push_back('\0'); -// std::cout << "> " << &token1[0] << std::endl; + // token_t token1 = token; + // token1.push_back('\0'); + // std::cout << "> " << &token1[0] << std::endl; - switch(state_) { + switch (state_) { case BODY_STATE: if (token[0] == '<') { bool node_begin = true; @@ -69,14 +65,20 @@ class Xml { } unsigned i = ind; - while (i < token.size()) { if (token[i] == '>') break; ++i; } + while (i < token.size()) { + if (token[i] == '>') break; + ++i; + } for (unsigned j = i + 1; j < token.size(); ++j) remainder.push_back(token[j]); if (i == token.size()) { - if (node_begin) state_ = DECL_STATE; - else BadFormat(token); + if (node_begin) + state_ = DECL_STATE; + else + BadFormat(token); token.push_back('\0'); - } else token[i] = '\0'; + } else + token[i] = '\0'; const char* tag = strdup(&token[ind]); if (node_begin) { @@ -88,7 +90,8 @@ class Xml { } UpLevel(); } - } else BadFormat(token); + } else + BadFormat(token); break; case DECL_STATE: if (token[0] == '>') { @@ -98,7 +101,8 @@ class Xml { } else { token.push_back('\0'); unsigned j = 0; - for (j = 0; j < token.size(); ++j) if (token[j] == '=') break; + for (j = 0; j < token.size(); ++j) + if (token[j] == '=') break; if (j == token.size()) BadFormat(token); token[j] = '\0'; const char* key = &token[0]; @@ -113,12 +117,10 @@ class Xml { } } - std::vector GetNodes(std::string global_tag) { - return map_[global_tag]; - } + std::vector GetNodes(std::string global_tag) { return map_[global_tag]; } void Print() const { - for(auto& elem : map_) { + for (auto& elem : map_) { for (auto node : elem.second) { if (node->opts.size()) { std::cout << elem.first << ":" << std::endl; @@ -130,7 +132,7 @@ class Xml { } } - private: + private: bool LineEndCheck() { bool found = false; if (buffer_[index_] == '\n') { @@ -153,16 +155,18 @@ class Xml { data_size_ = read(fd_, buffer_, buf_size_); if (data_size_ <= 0) break; } - if (token.empty()) while ((index_ < data_size_) && ((buffer_[index_] == ' ') || LineEndCheck())) { - ++index_; - } + if (token.empty()) + while ((index_ < data_size_) && ((buffer_[index_] == ' ') || LineEndCheck())) { + ++index_; + } while ((index_ < data_size_) && (buffer_[index_] != ' ') && !LineEndCheck()) { token.push_back(buffer_[index_++]); } if (index_ == data_size_) { index_ = 0; data_size_ = 0; - } else break; + } else + break; } return token; @@ -170,7 +174,8 @@ class Xml { void BadFormat(token_t token) { token.push_back('\0'); - std::cout << "Error: " << file_name_ << ", line " << file_line_ << ", bad XML token '" << &token[0] << "'" << std::endl; + std::cout << "Error: " << file_name_ << ", line " << file_line_ << ", bad XML token '" + << &token[0] << "'" << std::endl; exit(1); } @@ -184,7 +189,9 @@ class Xml { level_ = level; std::string global_tag; - for (level_t* level : stack_) { global_tag += level->tag + "."; } + for (level_t* level : stack_) { + global_tag += level->tag + "."; + } global_tag += tag; map_[global_tag].push_back(level_); } @@ -194,13 +201,9 @@ class Xml { stack_.pop_back(); } - std::string CurrentLevel() const { - return level_->tag; - } + std::string CurrentLevel() const { return level_->tag; } - void AddOption(const std::string& key, const std::string& value) { - level_->opts[key] = value; - } + void AddOption(const std::string& key, const std::string& value) { level_->opts[key] = value; } const char* file_name_; unsigned file_line_; diff --git a/test/ctrl/standalone_test.cpp b/test/ctrl/standalone_test.cpp index 29983ed99e..bd9c362243 100644 --- a/test/ctrl/standalone_test.cpp +++ b/test/ctrl/standalone_test.cpp @@ -10,15 +10,15 @@ #include "util/test_assert.h" int main(int argc, char** argv) { - bool ret_val = false; - // HSA status - hsa_status_t status = HSA_STATUS_ERROR; - // Profiling context - rocprofiler_t* context = NULL; - // Profiling properties - rocprofiler_properties_t properties; - // Number of context invocation - uint32_t invocation = 0; + bool ret_val = false; + // HSA status + hsa_status_t status = HSA_STATUS_ERROR; + // Profiling context + rocprofiler_t* context = NULL; + // Profiling properties + rocprofiler_properties_t properties; + // Number of context invocation + uint32_t invocation = 0; #if 0 // Profiling info objects @@ -29,48 +29,49 @@ int main(int argc, char** argv) { info[0].type = ROCPROFILER_TYPE_METRIC; info[0].name = "SQ_WAVES"; #else - // Profiling info objects - const unsigned info_count = 3; - rocprofiler_info_t info[info_count]; - // PMC events - memset(info, 0, sizeof(info)); - info[0].type = ROCPROFILER_TYPE_METRIC; - info[0].name = "SQ_WAVES"; - info[1].type = ROCPROFILER_TYPE_METRIC; - info[1].name = "SQ_ITEMS"; - // Tracing parameters - const unsigned parameter_count = 2; - rocprofiler_parameter_t parameters[parameter_count]; - info[2].name = "THREAD_TRACE"; - info[2].type = ROCPROFILER_TYPE_TRACE; - info[2].parameters = parameters; - info[2].parameter_count = parameter_count; - parameters[0].parameter_name = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK; - parameters[0].value = 0; - parameters[1].parameter_name = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK; - parameters[1].value = 0; + // Profiling info objects + const unsigned info_count = 3; + rocprofiler_info_t info[info_count]; + // PMC events + memset(info, 0, sizeof(info)); + info[0].type = ROCPROFILER_TYPE_METRIC; + info[0].name = "SQ_WAVES"; + info[1].type = ROCPROFILER_TYPE_METRIC; + info[1].name = "SQ_ITEMS"; + // Tracing parameters + const unsigned parameter_count = 2; + rocprofiler_parameter_t parameters[parameter_count]; + info[2].name = "THREAD_TRACE"; + info[2].type = ROCPROFILER_TYPE_TRACE; + info[2].parameters = parameters; + info[2].parameter_count = parameter_count; + parameters[0].parameter_name = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK; + parameters[0].value = 0; + parameters[1].parameter_name = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK; + parameters[1].value = 0; #endif - // Creating profiling context - properties = {}; - properties.queue_depth = 128; - status = rocprofiler_open(TestHsa::HsaAgentId(), info, info_count, &context, ROCPROFILER_MODE_STANDALONE|ROCPROFILER_MODE_OWNQUEUE, &properties); - TEST_STATUS(status == HSA_STATUS_SUCCESS); + // Creating profiling context + properties = {}; + properties.queue_depth = 128; + status = rocprofiler_open(TestHsa::HsaAgentId(), info, info_count, &context, + ROCPROFILER_MODE_STANDALONE | ROCPROFILER_MODE_OWNQUEUE, &properties); + TEST_STATUS(status == HSA_STATUS_SUCCESS); - TestHsa::SetQueue(properties.queue); + TestHsa::SetQueue(properties.queue); - // Adding dispatch observer - status = rocprofiler_dispatch_observer(rocprofiler_dispatch_callback, context); - TEST_STATUS(status == HSA_STATUS_SUCCESS); + // Adding dispatch observer + status = rocprofiler_dispatch_observer(rocprofiler_dispatch_callback, context); + TEST_STATUS(status == HSA_STATUS_SUCCESS); - // Querying the number of context invocation - status = rocprofiler_invocation(context, &invocation); - TEST_STATUS(status == HSA_STATUS_SUCCESS); + // Querying the number of context invocation + status = rocprofiler_invocation(context, &invocation); + TEST_STATUS(status == HSA_STATUS_SUCCESS); - // Dispatching profiled kernel n-times to collect all counter groups data - unsigned n = 0; - while(1) { - std::cout << "> " << n << "/" << invocation << std::endl; + // Dispatching profiled kernel n-times to collect all counter groups data + unsigned n = 0; + while (1) { + std::cout << "> " << n << "/" << invocation << std::endl; #if 0 status = rocprofiler_start(context); TEST_STATUS(status == HSA_STATUS_SUCCESS); @@ -78,49 +79,49 @@ int main(int argc, char** argv) { status = rocprofiler_stop(context); TEST_STATUS(status == HSA_STATUS_SUCCESS); #else - ret_val = RunKernel(argc, argv); + ret_val = RunKernel(argc, argv); #endif - status = rocprofiler_sample(context); - TEST_STATUS(status == HSA_STATUS_SUCCESS); - - for (rocprofiler_info_t* p = info; p < info + info_count; ++p) { - std::cout << (p - info) << ": " << p->name; - switch (p->data.kind) { - case ROCPROFILER_DATA_KIND_INT64: - std::cout << std::dec << " result64 (" << p->data.result64 << ")" << std::endl; - break; - case ROCPROFILER_BYTES: { - const char* ptr = reinterpret_cast(p->data.result_bytes.ptr); - uint64_t size = 0; - for (unsigned i = 0; i < p->data.result_bytes.instance_count; ++i) { - size = *reinterpret_cast(ptr); - const char* data = ptr + sizeof(size); - std::cout << std::endl; - std::cout << std::hex << " data (" << (void*)data << ")" << std::endl; - std::cout << std::dec << " size (" << size << ")" << std::endl; - ptr = data + size; - } - break; - } - default: - std::cout << "result kind (" << p->data.kind << ")" << std::endl; - TEST_ASSERT(false); - } - } - - ++n; - if (n < invocation) { - status = rocprofiler_next(context); - TEST_STATUS(status == HSA_STATUS_SUCCESS); - continue; - } - break; - } - - // Finishing cleanup - // Deleting profiling context will delete all allocated resources - status = rocprofiler_close(context); + status = rocprofiler_sample(context); TEST_STATUS(status == HSA_STATUS_SUCCESS); - return (ret_val) ? 0 : 1; + for (rocprofiler_info_t* p = info; p < info + info_count; ++p) { + std::cout << (p - info) << ": " << p->name; + switch (p->data.kind) { + case ROCPROFILER_DATA_KIND_INT64: + std::cout << std::dec << " result64 (" << p->data.result64 << ")" << std::endl; + break; + case ROCPROFILER_BYTES: { + const char* ptr = reinterpret_cast(p->data.result_bytes.ptr); + uint64_t size = 0; + for (unsigned i = 0; i < p->data.result_bytes.instance_count; ++i) { + size = *reinterpret_cast(ptr); + const char* data = ptr + sizeof(size); + std::cout << std::endl; + std::cout << std::hex << " data (" << (void*)data << ")" << std::endl; + std::cout << std::dec << " size (" << size << ")" << std::endl; + ptr = data + size; + } + break; + } + default: + std::cout << "result kind (" << p->data.kind << ")" << std::endl; + TEST_ASSERT(false); + } + } + + ++n; + if (n < invocation) { + status = rocprofiler_next(context); + TEST_STATUS(status == HSA_STATUS_SUCCESS); + continue; + } + break; + } + + // Finishing cleanup + // Deleting profiling context will delete all allocated resources + status = rocprofiler_close(context); + TEST_STATUS(status == HSA_STATUS_SUCCESS); + + return (ret_val) ? 0 : 1; } diff --git a/test/ctrl/test_aql.h b/test/ctrl/test_aql.h index 38909e6acd..4f2f65d690 100644 --- a/test/ctrl/test_aql.h +++ b/test/ctrl/test_aql.h @@ -37,7 +37,9 @@ OF THE POSSIBILITY OF SUCH DAMAGE. class TestAql { public: explicit TestAql(TestAql* t = 0) : test_(t) {} - virtual ~TestAql() { if (test_) delete test_; } + virtual ~TestAql() { + if (test_) delete test_; + } TestAql* Test() { return test_; } virtual AgentInfo* GetAgentInfo() { return (test_) ? test_->GetAgentInfo() : 0; } diff --git a/test/ctrl/test_hsa.cpp b/test/ctrl/test_hsa.cpp index 39d606f405..30aeb9e942 100644 --- a/test/ctrl/test_hsa.cpp +++ b/test/ctrl/test_hsa.cpp @@ -59,7 +59,7 @@ HsaRsrcFactory* TestHsa::HsaInstantiate(const uint32_t agent_ind) { // Create an instance of Aql Queue if (hsa_queue_ == NULL) { uint32_t num_pkts = 128; - if(hsa_rsrc_->CreateQueue(agent_info_, num_pkts, &hsa_queue_) == false) { + if (hsa_rsrc_->CreateQueue(agent_info_, num_pkts, &hsa_queue_) == false) { hsa_queue_ = NULL; } } @@ -67,7 +67,9 @@ HsaRsrcFactory* TestHsa::HsaInstantiate(const uint32_t agent_ind) { return hsa_rsrc_; } -void TestHsa::HsaShutdown() { if (hsa_rsrc_) hsa_rsrc_->Destroy(); } +void TestHsa::HsaShutdown() { + if (hsa_rsrc_) hsa_rsrc_->Destroy(); +} bool TestHsa::Initialize(int arg_cnt, char** arg_list) { std::clog << "TestHsa::Initialize :" << std::endl; diff --git a/test/ctrl/test_pgen_rocp.h b/test/ctrl/test_pgen_rocp.h index 06ed2edfd7..bdba66ecbf 100644 --- a/test/ctrl/test_pgen_rocp.h +++ b/test/ctrl/test_pgen_rocp.h @@ -35,8 +35,8 @@ OF THE POSSIBILITY OF SUCH DAMAGE. #include "util/test_assert.h" hsa_status_t TestPGenRocpCallback(hsa_ven_amd_aqlprofile_info_type_t info_type, - hsa_ven_amd_aqlprofile_info_data_t* info_data, - void* callback_data) { + hsa_ven_amd_aqlprofile_info_data_t* info_data, + void* callback_data) { hsa_status_t status = HSA_STATUS_SUCCESS; typedef std::vector passed_data_t; reinterpret_cast(callback_data)->push_back(*info_data); diff --git a/test/ctrl/test_pmgr.cpp b/test/ctrl/test_pmgr.cpp index 87fba5676f..a78d76c754 100644 --- a/test/ctrl/test_pmgr.cpp +++ b/test/ctrl/test_pmgr.cpp @@ -48,8 +48,7 @@ bool TestPMgr::AddPacketGfx9(const packet_t* packet) { *slot = aql_packet; // After AQL packet is fully copied into queue buffer // update packet header from invalid state to valid state - auto header_atomic_ptr = - reinterpret_cast*>(&slot->header); + auto header_atomic_ptr = reinterpret_cast*>(&slot->header); header_atomic_ptr->store(header, std::memory_order_release); // Increment the write index and ring the doorbell to dispatch the kernel. diff --git a/test/ctrl/thr_tool.cpp b/test/ctrl/thr_tool.cpp index 86a89bae97..7359516dbb 100644 --- a/test/ctrl/thr_tool.cpp +++ b/test/ctrl/thr_tool.cpp @@ -48,11 +48,14 @@ void check_status(hsa_status_t status) { } } -unsigned align_size(unsigned size, unsigned alignment) { return ((size + alignment - 1) & ~(alignment - 1)); } +unsigned align_size(unsigned size, unsigned alignment) { + return ((size + alignment - 1) & ~(alignment - 1)); +} -void print_info(FILE* file, const rocprofiler_info_t* info, const unsigned info_count, const char* str) { +void print_info(FILE* file, const rocprofiler_info_t* info, const unsigned info_count, + const char* str) { if (str) fprintf(file, "%s:\n", str); - for (unsigned i= 0; i < info_count; ++i) { + for (unsigned i = 0; i < info_count; ++i) { const rocprofiler_info_t* p = &info[i]; fprintf(file, " %s ", p->name); switch (p->data.kind) { @@ -81,29 +84,30 @@ void print_info(FILE* file, const rocprofiler_info_t* info, const unsigned info_ void print_group(FILE* file, const rocprofiler_group_t* group, const char* str) { if (str) fprintf(file, "%s:\n", str); - for (unsigned i= 0; i < group->info_count; ++i) { + for (unsigned i = 0; i < group->info_count; ++i) { print_info(file, group->info[i], 1, NULL); } } void store_context(context_entry_t context_entry) { - if(pthread_mutex_lock(&mutex) != 0) { + if (pthread_mutex_lock(&mutex) != 0) { perror("pthread_mutex_lock"); exit(1); } if ((context_array == NULL) || (context_array_index >= context_array_size)) { context_array_size *= 2; - context_array = reinterpret_cast(realloc(context_array, context_array_size * sizeof(context_entry_t))); + context_array = reinterpret_cast( + realloc(context_array, context_array_size * sizeof(context_entry_t))); } context_array_index += 1; context_array[context_array_index - 1] = context_entry; - if(pthread_mutex_unlock(&mutex) != 0) { + if (pthread_mutex_unlock(&mutex) != 0) { perror("pthread_mutex_unlock"); exit(1); } } -void dump_context(FILE *file, unsigned index) { +void dump_context(FILE* file, unsigned index) { hsa_status_t status = HSA_STATUS_ERROR; if (pthread_mutex_lock(&mutex) != 0) { @@ -122,7 +126,7 @@ void dump_context(FILE *file, unsigned index) { status = rocprofiler_get_group_data(group); check_status(status); - //print_group(file, group, "Group[0] data"); + // print_group(file, group, "Group[0] data"); status = rocprofiler_get_metrics(group->context); check_status(status); @@ -134,10 +138,8 @@ void dump_context(FILE *file, unsigned index) { } // Provided standard profiling callback -hsa_status_t dispatch_callback( - const rocprofiler_callback_data_t* callback_data, - void* user_data, - rocprofiler_group_t** group) { +hsa_status_t dispatch_callback(const rocprofiler_callback_data_t* callback_data, void* user_data, + rocprofiler_group_t** group) { hsa_status_t status = HSA_STATUS_ERROR; // Passed tool data dispatch_data_t* tool_data = reinterpret_cast(user_data); @@ -173,17 +175,30 @@ void* dumping_data(void*) { CONSTRUCTOR_API void constructor() { std::map parameters_dict; - parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET"] = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET; - parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK"] = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK; - parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK"] = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK; - parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK"] = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK; - parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2"] = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2; + parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET"] = + HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET; + parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK"] = + HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK; + parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK"] = + HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK; + parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK"] = + HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK; + parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2"] = + HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2; #ifdef TOOL_THREAD int err = pthread_attr_init(&thr_attr); - if (err) { errno = err; perror("pthread_attr_init"); exit(1); } + if (err) { + errno = err; + perror("pthread_attr_init"); + exit(1); + } err = pthread_create(&thread, &thr_attr, dumping_data, NULL); - if (err) { errno = err; perror("pthread_create"); exit(1); } + if (err) { + errno = err; + perror("pthread_create"); + exit(1); + } #endif // Set output file @@ -194,7 +209,8 @@ CONSTRUCTOR_API void constructor() { perror("fopen"); exit(1); } - } else file_handle = stdout; + } else + file_handle = stdout; // Getting input const char* xml_name = getenv("ROCP_INPUT"); @@ -211,7 +227,7 @@ CONSTRUCTOR_API void constructor() { for (auto* entry : metrics_list) { const std::string entry_str = entry->opts["name"]; size_t pos1 = 0; - while(pos1 < entry_str.length()) { + while (pos1 < entry_str.length()) { const size_t pos2 = entry_str.find(",", pos1); const std::string metric_name = entry_str.substr(pos1, pos2 - pos1); metrics_vec.push_back(metric_name); @@ -224,10 +240,10 @@ CONSTRUCTOR_API void constructor() { auto traces_list = xml->GetNodes("top.trace"); const unsigned info_count = metrics_vec.size() + traces_list.size(); - rocprofiler_info_t* info= new rocprofiler_info_t[info_count]; + rocprofiler_info_t* info = new rocprofiler_info_t[info_count]; memset(info, 0, info_count * sizeof(rocprofiler_info_t)); - printf(" %d metrics\n", (int) metrics_vec.size()); + printf(" %d metrics\n", (int)metrics_vec.size()); for (unsigned i = 0; i < metrics_vec.size(); ++i) { const std::string& name = metrics_vec[i]; printf("%s%s", (i == 0) ? " " : ", ", name.c_str()); @@ -237,7 +253,7 @@ CONSTRUCTOR_API void constructor() { } if (metrics_vec.size()) printf("\n"); - printf(" %d traces\n", (int) traces_list.size()); + printf(" %d traces\n", (int)traces_list.size()); unsigned index = metrics_vec.size(); for (auto* entry : traces_list) { auto params_list = xml->GetNodes("top.trace.parameters"); @@ -253,7 +269,7 @@ CONSTRUCTOR_API void constructor() { for (auto* params : params_list) { const unsigned parameter_count = params->opts.size(); - rocprofiler_parameter_t *parameters = new rocprofiler_parameter_t[parameter_count]; + rocprofiler_parameter_t* parameters = new rocprofiler_parameter_t[parameter_count]; unsigned p_index = 0; for (auto& v : params->opts) { const std::string parameter_name = v.first; diff --git a/test/ctrl/tool.cpp b/test/ctrl/tool.cpp index c97be5c8b0..e7c6ee062b 100644 --- a/test/ctrl/tool.cpp +++ b/test/ctrl/tool.cpp @@ -65,14 +65,15 @@ void check_status(hsa_status_t status) { context_entry_t* alloc_context_entry() { context_entry_t* ptr = 0; - if(pthread_mutex_lock(&mutex) != 0) { + if (pthread_mutex_lock(&mutex) != 0) { perror("pthread_mutex_lock"); exit(1); } if ((context_array == NULL) || (context_array_count >= context_array_size)) { context_array_size *= 2; - context_array = reinterpret_cast(realloc(context_array, context_array_size * sizeof(context_entry_t))); + context_array = reinterpret_cast( + realloc(context_array, context_array_size * sizeof(context_entry_t))); } ptr = &context_array[context_array_count]; *ptr = {}; @@ -98,7 +99,7 @@ void dump_sqtt_trace(const uint32_t chunk, const void* data, const uint32_t& siz perror("result file fopen"); exit(1); } - + // Write the buffer in terms of shorts (16 bits) const unsigned short* ptr = reinterpret_cast(data); for (uint32_t i = 0; i < (size / sizeof(short)); ++i) { @@ -108,28 +109,30 @@ void dump_sqtt_trace(const uint32_t chunk, const void* data, const uint32_t& siz } // Trace data callback for getting trace data from GPU local mamory -hsa_status_t trace_data_cb( - hsa_ven_amd_aqlprofile_info_type_t info_type, - hsa_ven_amd_aqlprofile_info_data_t* info_data, - void* data) -{ +hsa_status_t trace_data_cb(hsa_ven_amd_aqlprofile_info_type_t info_type, + hsa_ven_amd_aqlprofile_info_data_t* info_data, void* data) { FILE* file = reinterpret_cast(data); hsa_status_t status = HSA_STATUS_SUCCESS; if (info_type == HSA_VEN_AMD_AQLPROFILE_INFO_SQTT_DATA) { - fprintf(file, " data ptr (%p), size(%u)\n", info_data->sqtt_data.ptr, info_data->sqtt_data.size); + fprintf(file, " data ptr (%p), size(%u)\n", info_data->sqtt_data.ptr, + info_data->sqtt_data.size); dump_sqtt_trace(info_data->sample_id, info_data->sqtt_data.ptr, info_data->sqtt_data.size); - } else status = HSA_STATUS_ERROR; + } else + status = HSA_STATUS_ERROR; return status; } // Align to specified alignment -unsigned align_size(unsigned size, unsigned alignment) { return ((size + alignment - 1) & ~(alignment - 1)); } +unsigned align_size(unsigned size, unsigned alignment) { + return ((size + alignment - 1) & ~(alignment - 1)); +} // Output profiling results for input features -void output_results(FILE* file, const rocprofiler_feature_t* features, const unsigned feature_count, rocprofiler_t* context, const char* str) { +void output_results(FILE* file, const rocprofiler_feature_t* features, const unsigned feature_count, + rocprofiler_t* context, const char* str) { if (str) fprintf(file, "%s:\n", str); - for (unsigned i= 0; i < feature_count; ++i) { + for (unsigned i = 0; i < feature_count; ++i) { const rocprofiler_feature_t* p = &features[i]; fprintf(file, " %s ", p->name); switch (p->data.kind) { @@ -173,7 +176,7 @@ void output_results(FILE* file, const rocprofiler_feature_t* features, const uns // Output group intermeadate profiling results, created internally for complex metrics void output_group(FILE* file, const rocprofiler_group_t* group, const char* str) { if (str) fprintf(file, "%s:\n", str); - for (unsigned i= 0; i < group->feature_count; ++i) { + for (unsigned i = 0; i < group->feature_count; ++i) { output_results(file, group->features[i], 1, group->context, NULL); } } @@ -190,15 +193,15 @@ void dump_context(context_entry_t* entry) { FILE* file_handle = entry->file_handle; fprintf(file_handle, "Dispatch[%u], kernel_object(0x%lx):\n", index, entry->data.kernel_object); - + status = rocprofiler_group_get_data(&group); check_status(status); - //output_group(file, group, "Group[0] data"); - + // output_group(file, group, "Group[0] data"); + status = rocprofiler_get_metrics(group.context); check_status(status); output_results(file_handle, features, feature_count, group.context, NULL); - + // Finishing cleanup // Deleting profiling context will delete all allocated resources rocprofiler_close(group.context); @@ -240,10 +243,8 @@ void handler(rocprofiler_group_t group, void* arg) { } // Kernel disoatch callback -hsa_status_t dispatch_callback( - const rocprofiler_callback_data_t* callback_data, - void* user_data, - rocprofiler_group_t* group) { +hsa_status_t dispatch_callback(const rocprofiler_callback_data_t* callback_data, void* user_data, + rocprofiler_group_t* group) { // HSA status hsa_status_t status = HSA_STATUS_ERROR; // Passed tool data @@ -258,7 +259,8 @@ hsa_status_t dispatch_callback( properties.handler_arg = (void*)entry; // Open profiling context - status = rocprofiler_open(callback_data->agent, tool_data->features, tool_data->feature_count, &context, 0/*ROCPROFILER_MODE_SINGLEGROUP*/, &properties); + status = rocprofiler_open(callback_data->agent, tool_data->features, tool_data->feature_count, + &context, 0 /*ROCPROFILER_MODE_SINGLEGROUP*/, &properties); check_status(status); // Check that we have only one profiling group @@ -284,11 +286,16 @@ hsa_status_t dispatch_callback( // Tool constructor CONSTRUCTOR_API void constructor() { std::map parameters_dict; - parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET"] = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET; - parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK"] = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK; - parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK"] = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK; - parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK"] = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK; - parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2"] = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2; + parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET"] = + HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET; + parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK"] = + HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK; + parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK"] = + HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK; + parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK"] = + HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK; + parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2"] = + HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2; // Set output file result_prefix = getenv("ROCP_OUTPUT_DIR"); @@ -301,7 +308,8 @@ CONSTRUCTOR_API void constructor() { perror("result file fopen"); exit(1); } - } else file_handle = stdout; + } else + file_handle = stdout; // Getting input const char* xml_name = getenv("ROCP_INPUT"); @@ -318,7 +326,7 @@ CONSTRUCTOR_API void constructor() { for (auto* entry : metrics_list) { const std::string entry_str = entry->opts["name"]; size_t pos1 = 0; - while(pos1 < entry_str.length()) { + while (pos1 < entry_str.length()) { const size_t pos2 = entry_str.find(",", pos1); const std::string metric_name = entry_str.substr(pos1, pos2 - pos1); metrics_vec.push_back(metric_name); @@ -331,10 +339,10 @@ CONSTRUCTOR_API void constructor() { auto traces_list = xml->GetNodes("top.trace"); const unsigned feature_count = metrics_vec.size() + traces_list.size(); - rocprofiler_feature_t* features= new rocprofiler_feature_t[feature_count]; + rocprofiler_feature_t* features = new rocprofiler_feature_t[feature_count]; memset(features, 0, feature_count * sizeof(rocprofiler_feature_t)); - printf(" %d metrics\n", (int) metrics_vec.size()); + printf(" %d metrics\n", (int)metrics_vec.size()); for (unsigned i = 0; i < metrics_vec.size(); ++i) { const std::string& name = metrics_vec[i]; printf("%s%s", (i == 0) ? " " : ", ", name.c_str()); @@ -344,7 +352,7 @@ CONSTRUCTOR_API void constructor() { } if (metrics_vec.size()) printf("\n"); - printf(" %d traces\n", (int) traces_list.size()); + printf(" %d traces\n", (int)traces_list.size()); unsigned index = metrics_vec.size(); for (auto* entry : traces_list) { auto params_list = xml->GetNodes("top.trace.parameters"); @@ -362,7 +370,7 @@ CONSTRUCTOR_API void constructor() { for (auto* params : params_list) { const unsigned parameter_count = params->opts.size(); - rocprofiler_parameter_t *parameters = new rocprofiler_parameter_t[parameter_count]; + rocprofiler_parameter_t* parameters = new rocprofiler_parameter_t[parameter_count]; unsigned p_index = 0; for (auto& v : params->opts) { const std::string parameter_name = v.first; diff --git a/test/run.sh b/test/run.sh index 2fcbefb107..d88c193f49 100755 --- a/test/run.sh +++ b/test/run.sh @@ -16,7 +16,7 @@ export ROCP_HSA_INTERCEPT=1 unset ROCP_PROXY_QUEUE export ROCP_METRICS=metrics.xml export ROCP_INPUT=input.xml -export ROCP_OUTPUT=output.txt +export ROCP_OUTPUT_DIR=./ echo "Run simple profiling test" if [ -n "$1" ] ; then diff --git a/test/util/test_assert.h b/test/util/test_assert.h index 101dc03434..a9b168375b 100644 --- a/test/util/test_assert.h +++ b/test/util/test_assert.h @@ -40,8 +40,7 @@ OF THE POSSIBILITY OF SUCH DAMAGE. #define TEST_STATUS(cond) \ { \ if (!(cond)) { \ - std::cerr << "Test error at " << __FILE__ << ", line " << __LINE__ \ - << std::endl; \ + std::cerr << "Test error at " << __FILE__ << ", line " << __LINE__ << std::endl; \ const char* message; \ rocprofiler_error_string(&message); \ std::cerr << "ERROR: " << message << std::endl; \ diff --git a/test/util/xml.h b/test/util/xml.h index ef84fb4e36..9fa88541e3 100644 --- a/test/util/xml.h +++ b/test/util/xml.h @@ -16,7 +16,7 @@ namespace xml { class Xml { - public: + public: typedef std::vector token_t; struct level_t { std::string tag; @@ -25,24 +25,20 @@ class Xml { }; typedef std::vector nodes_vec_t; - enum { - DECL_STATE, - BODY_STATE - }; + enum { DECL_STATE, BODY_STATE }; - Xml(const char* file_name) : - file_name_(file_name), - file_line_(0), - data_size_(0), - index_(0), - state_(BODY_STATE), - level_(NULL), - comment_(false) - { + Xml(const char* file_name) + : file_name_(file_name), + file_line_(0), + data_size_(0), + index_(0), + state_(BODY_STATE), + level_(NULL), + comment_(false) { AddLevel("top"); fd_ = open(file_name, O_RDONLY); - if (fd_ == -1) { + if (fd_ == -1) { std::cout << "XML file not found: " << file_name << std::endl; return; } @@ -54,11 +50,11 @@ class Xml { // End of file if (token.size() == 0) break; -// token_t token1 = token; -// token1.push_back('\0'); -// std::cout << "> " << &token1[0] << std::endl; + // token_t token1 = token; + // token1.push_back('\0'); + // std::cout << "> " << &token1[0] << std::endl; - switch(state_) { + switch (state_) { case BODY_STATE: if (token[0] == '<') { bool node_begin = true; @@ -69,14 +65,20 @@ class Xml { } unsigned i = ind; - while (i < token.size()) { if (token[i] == '>') break; ++i; } + while (i < token.size()) { + if (token[i] == '>') break; + ++i; + } for (unsigned j = i + 1; j < token.size(); ++j) remainder.push_back(token[j]); if (i == token.size()) { - if (node_begin) state_ = DECL_STATE; - else BadFormat(token); + if (node_begin) + state_ = DECL_STATE; + else + BadFormat(token); token.push_back('\0'); - } else token[i] = '\0'; + } else + token[i] = '\0'; const char* tag = strdup(&token[ind]); if (node_begin) { @@ -88,7 +90,8 @@ class Xml { } UpLevel(); } - } else BadFormat(token); + } else + BadFormat(token); break; case DECL_STATE: if (token[0] == '>') { @@ -98,7 +101,8 @@ class Xml { } else { token.push_back('\0'); unsigned j = 0; - for (j = 0; j < token.size(); ++j) if (token[j] == '=') break; + for (j = 0; j < token.size(); ++j) + if (token[j] == '=') break; if (j == token.size()) BadFormat(token); token[j] = '\0'; const char* key = &token[0]; @@ -113,12 +117,10 @@ class Xml { } } - std::vector GetNodes(std::string global_tag) { - return map_[global_tag]; - } + std::vector GetNodes(std::string global_tag) { return map_[global_tag]; } void Print() const { - for(auto& elem : map_) { + for (auto& elem : map_) { for (auto node : elem.second) { if (node->opts.size()) { std::cout << elem.first << ":" << std::endl; @@ -130,7 +132,7 @@ class Xml { } } - private: + private: bool LineEndCheck() { bool found = false; if (buffer_[index_] == '\n') { @@ -153,16 +155,18 @@ class Xml { data_size_ = read(fd_, buffer_, buf_size_); if (data_size_ <= 0) break; } - if (token.empty()) while ((index_ < data_size_) && ((buffer_[index_] == ' ') || LineEndCheck())) { - ++index_; - } + if (token.empty()) + while ((index_ < data_size_) && ((buffer_[index_] == ' ') || LineEndCheck())) { + ++index_; + } while ((index_ < data_size_) && (buffer_[index_] != ' ') && !LineEndCheck()) { token.push_back(buffer_[index_++]); } if (index_ == data_size_) { index_ = 0; data_size_ = 0; - } else break; + } else + break; } return token; @@ -170,7 +174,8 @@ class Xml { void BadFormat(token_t token) { token.push_back('\0'); - std::cout << "Error: " << file_name_ << ", line " << file_line_ << ", bad XML token '" << &token[0] << "'" << std::endl; + std::cout << "Error: " << file_name_ << ", line " << file_line_ << ", bad XML token '" + << &token[0] << "'" << std::endl; exit(1); } @@ -184,7 +189,9 @@ class Xml { level_ = level; std::string global_tag; - for (level_t* level : stack_) { global_tag += level->tag + "."; } + for (level_t* level : stack_) { + global_tag += level->tag + "."; + } global_tag += tag; map_[global_tag].push_back(level_); } @@ -194,13 +201,9 @@ class Xml { stack_.pop_back(); } - std::string CurrentLevel() const { - return level_->tag; - } + std::string CurrentLevel() const { return level_->tag; } - void AddOption(const std::string& key, const std::string& value) { - level_->opts[key] = value; - } + void AddOption(const std::string& key, const std::string& value) { level_->opts[key] = value; } const char* file_name_; unsigned file_line_;