From 156d9327cc4fe8fc9ddbbbef3610a5a81de73dad Mon Sep 17 00:00:00 2001 From: Evgeny Date: Sat, 15 Aug 2020 02:23:43 -0500 Subject: [PATCH] flush-rate option fixed Change-Id: I74ff83ebf2a0a4ba414d30b7cb193522f44660ce --- src/core/roctracer.cpp | 3 +- src/core/trace_buffer.h | 140 ++++++++++++++++++++++---------------- src/proxy/tracker.h | 13 ++-- test/run.sh | 5 +- test/tool/tracer_tool.cpp | 55 +++++++-------- 5 files changed, 118 insertions(+), 98 deletions(-) diff --git a/src/core/roctracer.cpp b/src/core/roctracer.cpp index dd47bb0f0a..52f1e28bf4 100644 --- a/src/core/roctracer.cpp +++ b/src/core/roctracer.cpp @@ -223,7 +223,6 @@ constexpr TraceBuffer::flush_prm_t trace_buffer_prm[] = { {KERNEL_ENTRY_TYPE, hsa_kernel_handler} }; TraceBuffer* trace_buffer = NULL; -//TraceBuffer trace_buffer("HSA GPU", 0x200000, trace_buffer_prm, 2); namespace hsa_support { // callbacks table @@ -1127,7 +1126,7 @@ PUBLIC_API roctracer_status_t roctracer_flush_activity_expl(roctracer_pool_t* po API_METHOD_PREFIX if (pool == NULL) pool = roctracer_default_pool(); roctracer::MemoryPool* memory_pool = reinterpret_cast(pool); - memory_pool->Flush(); + if (memory_pool != NULL) memory_pool->Flush(); roctracer::TraceBufferBase::FlushAll(); API_METHOD_SUFFIX } diff --git a/src/core/trace_buffer.h b/src/core/trace_buffer.h index 8d99404631..cb6767f2c9 100644 --- a/src/core/trace_buffer.h +++ b/src/core/trace_buffer.h @@ -36,15 +36,17 @@ enum { TRACE_ENTRY_COMPL = 2 }; -enum { - API_ENTRY_TYPE, - COPY_ENTRY_TYPE, - KERNEL_ENTRY_TYPE +enum entry_type_t { + DFLT_ENTRY_TYPE = 0, + API_ENTRY_TYPE = 1, + COPY_ENTRY_TYPE = 2, + KERNEL_ENTRY_TYPE = 3, + NUM_ENTRY_TYPE = 4 }; struct trace_entry_t { std::atomic valid; - uint32_t type; + entry_type_t type; uint64_t dispatch; uint64_t begin; // kernel begin timestamp, ns uint64_t end; // kernel end timestamp, ns @@ -67,14 +69,26 @@ struct trace_entry_t { template struct push_element_fun { T* const elem_; - void fun(T* node) { if (node->next_elem_ == NULL) node->next_elem_ = elem_; } - push_element_fun(T* elem) : elem_(elem) {} + T** prev_; + bool fun(T* node) { + if (node->priority_ > elem_->priority_) { + *prev_ = elem_; + elem_->next_elem_ = node; + } else if (node->next_elem_ == NULL) { + node->next_elem_ = elem_; + } else { + prev_ = &(node->next_elem_); + return false; + } + return true; + } + push_element_fun(T* elem, T** prev) : elem_(elem), prev_(prev) {} }; template struct call_element_fun { void (T::*fptr_)(); - void fun(T* node) { (node->*fptr_)(); } + bool fun(T* node) const { (node->*fptr_)(); return false; } call_element_fun(void (T::*f)()) : fptr_(f) {} }; @@ -89,10 +103,10 @@ struct TraceBufferBase { static void Push(TraceBufferBase* elem) { if (head_elem_ == NULL) head_elem_ = elem; - else foreach(push_element_fun(elem)); + else foreach(push_element_fun(elem, &head_elem_)); } - TraceBufferBase() : next_elem_(NULL) {} + TraceBufferBase(const uint32_t& prior) : priority_(prior), next_elem_(NULL) {} template static void foreach(const F& f_in) { @@ -101,11 +115,12 @@ struct TraceBufferBase { TraceBufferBase* p = head_elem_; while (p != NULL) { TraceBufferBase* next = p->next_elem_; - f.fun(p); + if (f.fun(p) == true) break; p = next; } } + const uint32_t priority_; TraceBufferBase* next_elem_; static TraceBufferBase* head_elem_; static mutex_t mutex_; @@ -118,35 +133,41 @@ class TraceBuffer : protected TraceBufferBase { typedef TraceBuffer Obj; typedef uint64_t pointer_t; typedef std::recursive_mutex mutex_t; + typedef typename std::list buf_list_t; + typedef typename buf_list_t::iterator buf_list_it_t; struct flush_prm_t { - uint32_t type; + entry_type_t type; callback_t fun; }; TraceBuffer(const char* name, uint32_t size, const flush_prm_t* flush_prm_arr, uint32_t flush_prm_count, uint32_t prior = 0) : - is_flushed_(false), + TraceBufferBase(prior), + size_(size), work_thread_started_(false) { name_ = strdup(name); - size_ = size; data_ = allocate_fun(); next_ = allocate_fun(); read_pointer_ = 0; + write_pointer_ = 0; end_pointer_ = size; buf_list_.push_back(data_); - flush_prm_arr_ = flush_prm_arr; - flush_prm_count_ = flush_prm_count; - - priority_ = prior; + memset(f_array_, 0, sizeof(f_array_)); + for (const flush_prm_t* prm = flush_prm_arr; prm < flush_prm_arr + flush_prm_count; prm++) { + const entry_type_t type = prm->type; + if (type >= NUM_ENTRY_TYPE) FATAL("out of f_array bounds (" << type << ")"); + if (f_array_[type] != NULL) FATAL("handler function ptr redefinition (" << type << ")"); + f_array_[type] = prm->fun; + } TraceBufferBase::Push(this); } ~TraceBuffer() { StopWorkerThread(); - FlushAll(); + Flush(); } void StartWorkerThread() { @@ -171,52 +192,52 @@ class TraceBuffer : protected TraceBufferBase { } Entry* GetEntry() { - const pointer_t pointer = read_pointer_.fetch_add(1); + const pointer_t pointer = write_pointer_.fetch_add(1); if (pointer >= end_pointer_) wrap_buffer(pointer); if (pointer >= end_pointer_) FATAL("pointer >= end_pointer_ after buffer wrap"); - return data_ + (pointer + size_ - end_pointer_); + Entry* entry = data_ + (size_ + pointer - end_pointer_); + entry->valid = TRACE_ENTRY_INV; + entry->type = DFLT_ENTRY_TYPE; + return entry; } void Flush() { flush_buf(); } - void Flush(const bool& b) { - DisableFlushing(!b); - flush_buf(); - } - void DisableFlushing(const bool& b) { is_flushed_.exchange(b, std::memory_order_acquire); } private: void flush_buf() { std::lock_guard lck(mutex_); - const bool is_flushed = is_flushed_.exchange(true, std::memory_order_acquire); - if (priority_ != 0) { - priority_ -= 1; - return; - } + pointer_t pointer = read_pointer_; + pointer_t curr_pointer = write_pointer_.load(std::memory_order_relaxed); + buf_list_it_t it = buf_list_.begin(); + buf_list_it_t end_it = buf_list_.end(); + while(it != end_it) { + Entry* buf = *it; + Entry* ptr = buf + (pointer % size_); + Entry* end_ptr = buf + size_; + while ((ptr < end_ptr) && (pointer < curr_pointer)) { + if (ptr->valid != TRACE_ENTRY_COMPL) break; - if (is_flushed == false) { - for (const flush_prm_t* prm = flush_prm_arr_; prm < flush_prm_arr_ + flush_prm_count_; prm++) { - // Flushed entries type - uint32_t type = prm->type; - // Flushing function - callback_t fun = prm->fun; - if (fun == NULL) FATAL("flush function is not set"); + entry_type_t type = ptr->type; + if (type >= NUM_ENTRY_TYPE) FATAL("out of f_array bounds (" << type << ")"); + callback_t f_ptr = f_array_[type]; + if (f_ptr == NULL) FATAL("f_ptr == NULL"); + (*f_ptr)(ptr); - pointer_t pointer = 0; - for (Entry* ptr : buf_list_) { - Entry* end = ptr + size_; - while ((ptr < end) && (pointer < read_pointer_)) { - if (ptr->type == type) { - if (ptr->valid == TRACE_ENTRY_COMPL) { - fun(ptr); - } - } - ptr++; - pointer++; - } - } + ptr++; + pointer++; } + + buf_list_it_t prev = it; + it++; + if (ptr == end_ptr) { + free_fun(*prev); + buf_list_.erase(prev); + } + if (pointer == curr_pointer) break; } + + read_pointer_ = pointer; } inline Entry* allocate_fun() { @@ -226,6 +247,10 @@ class TraceBuffer : protected TraceBufferBase { return ptr; } + inline void free_fun(void* ptr) { + free(ptr); + } + static void* allocate_worker(void* arg) { Obj* obj = (Obj*)arg; @@ -258,17 +283,14 @@ class TraceBuffer : protected TraceBufferBase { } const char* name_; - uint32_t size_; + const uint32_t size_; Entry* data_; Entry* next_; - volatile std::atomic read_pointer_; + pointer_t read_pointer_; + volatile std::atomic write_pointer_; volatile std::atomic end_pointer_; - std::list buf_list_; - - const flush_prm_t* flush_prm_arr_; - uint32_t flush_prm_count_; - uint32_t priority_; - volatile std::atomic is_flushed_; + buf_list_t buf_list_; + callback_t f_array_[NUM_ENTRY_TYPE]; pthread_t work_thread_; pthread_mutex_t work_mutex_; diff --git a/src/proxy/tracker.h b/src/proxy/tracker.h index edb223b064..dc0322bd06 100644 --- a/src/proxy/tracker.h +++ b/src/proxy/tracker.h @@ -40,9 +40,10 @@ class Tracker { public: typedef util::HsaRsrcFactory::timestamp_t timestamp_t; typedef roctracer::trace_entry_t entry_t; + typedef roctracer::entry_type_t entry_type_t; // Add tracker entry - inline static void Enable(uint32_t type, const hsa_agent_t& agent, const hsa_signal_t& signal, entry_t* entry) { + inline static void Enable(entry_type_t type, const hsa_agent_t& agent, const hsa_signal_t& signal, entry_t* entry) { hsa_status_t status = HSA_STATUS_ERROR; util::HsaRsrcFactory* hsa_rsrc = &(util::HsaRsrcFactory::Instance()); @@ -88,13 +89,16 @@ class Tracker { } entry->complete = hsa_rsrc->TimestampNs(); + hsa_signal_t orig = entry->orig; + hsa_signal_t signal = entry->signal; + + // Releasing completed entry entry->valid.store(roctracer::TRACE_ENTRY_COMPL, std::memory_order_release); // Original intercepted signal completion - hsa_signal_t orig = entry->orig; if (orig.handle) { amd_signal_t* orig_signal_ptr = reinterpret_cast(orig.handle); - amd_signal_t* prof_signal_ptr = reinterpret_cast(entry->signal.handle); + amd_signal_t* prof_signal_ptr = reinterpret_cast(signal.handle); orig_signal_ptr->start_ts = prof_signal_ptr->start_ts; orig_signal_ptr->end_ts = prof_signal_ptr->end_ts; @@ -102,7 +106,7 @@ class Tracker { if (signal_value != new_value) EXC_ABORT(HSA_STATUS_ERROR, "Tracker::Complete bad signal value"); hsa_signal_store_screlease(orig, signal_value); } - hsa_signal_destroy(entry->signal); + hsa_signal_destroy(signal); } // Handler for packet completion @@ -113,7 +117,6 @@ class Tracker { // Complete entry Tracker::Complete(signal_value, entry); - return false; } }; diff --git a/test/run.sh b/test/run.sh index 962033f6a2..c5c8aa4596 100755 --- a/test/run.sh +++ b/test/run.sh @@ -79,7 +79,9 @@ eval_test() { test_runnum=$((test_runnum + 1)) eval "$cmdline" >$test_trace 2>&1 is_failed=$? - cat $test_trace + if [ $is_failed != 0 ] ; then + cat $test_trace + fi if [ $IS_CI = 1 ] ; then is_failed=0; else @@ -87,6 +89,7 @@ eval_test() { python ./test/check_trace.py -in $test_name -ck $check_trace_flag is_failed=$? if [ $is_failed != 0 ] ; then + echo "Trace checker error:" python ./test/check_trace.py -v -in $test_name -ck $check_trace_flag fi fi diff --git a/test/tool/tracer_tool.cpp b/test/tool/tracer_tool.cpp index db87d04d8f..a075bc2987 100644 --- a/test/tool/tracer_tool.cpp +++ b/test/tool/tracer_tool.cpp @@ -176,6 +176,8 @@ void* control_thr_fun(void*) { usleep(dist_us); } } + + return NULL; } // Flushing control thread @@ -204,8 +206,8 @@ void* flush_thr_fun(void*) { // rocTX annotation tracing struct roctx_trace_entry_t { - uint32_t valid; - uint32_t type; + std::atomic valid; + roctracer::entry_type_t type; uint32_t cid; timestamp_t time; uint32_t pid; @@ -215,9 +217,8 @@ struct roctx_trace_entry_t { }; void roctx_flush_cb(roctx_trace_entry_t* entry); -constexpr roctracer::TraceBuffer::flush_prm_t roctx_flush_prm[1] = {{0, roctx_flush_cb}}; +constexpr roctracer::TraceBuffer::flush_prm_t roctx_flush_prm = {roctracer::DFLT_ENTRY_TYPE, roctx_flush_cb}; roctracer::TraceBuffer* roctx_trace_buffer = NULL; -//roctracer::TraceBuffer roctx_trace_buffer("rocTX API", 0x200000, roctx_flush_prm, 1); // rocTX callback function static inline void roctx_callback_fun( @@ -233,14 +234,13 @@ static inline void roctx_callback_fun( const timestamp_t time = timer->timestamp_fn_ns(); #endif roctx_trace_entry_t* entry = roctx_trace_buffer->GetEntry(); - entry->valid = roctracer::TRACE_ENTRY_COMPL; - entry->type = 0; entry->cid = cid; entry->time = time; entry->pid = GetPid(); entry->tid = tid; entry->rid = rid; entry->message = (message != NULL) ? strdup(message) : NULL; + entry->valid.store(roctracer::TRACE_ENTRY_COMPL, std::memory_order_release); } void roctx_api_callback( @@ -283,8 +283,8 @@ void roctx_flush_cb(roctx_trace_entry_t* entry) { // HSA API tracing struct hsa_api_trace_entry_t { - uint32_t valid; - uint32_t type; + std::atomic valid; + roctracer::entry_type_t type; uint32_t cid; timestamp_t begin; timestamp_t end; @@ -294,9 +294,8 @@ struct hsa_api_trace_entry_t { }; void hsa_api_flush_cb(hsa_api_trace_entry_t* entry); -constexpr roctracer::TraceBuffer::flush_prm_t hsa_flush_prm[1] = {{0, hsa_api_flush_cb}}; +constexpr roctracer::TraceBuffer::flush_prm_t hsa_flush_prm = {roctracer::DFLT_ENTRY_TYPE, hsa_api_flush_cb}; roctracer::TraceBuffer* hsa_api_trace_buffer = NULL; -//roctracer::TraceBuffer hsa_api_trace_buffer("HSA API", 0x200000, hsa_flush_prm, 1); // HSA API callback function void hsa_api_callback( @@ -312,14 +311,13 @@ void hsa_api_callback( } else { const timestamp_t end_timestamp = (cid == HSA_API_ID_hsa_shut_down) ? hsa_begin_timestamp : timer->timestamp_fn_ns(); hsa_api_trace_entry_t* entry = hsa_api_trace_buffer->GetEntry(); - entry->valid = roctracer::TRACE_ENTRY_COMPL; - entry->type = 0; entry->cid = cid; entry->begin = hsa_begin_timestamp; entry->end = end_timestamp; entry->pid = GetPid(); entry->tid = GetTid(); entry->data = *data; + entry->valid.store(roctracer::TRACE_ENTRY_COMPL, std::memory_order_release); } } @@ -343,8 +341,8 @@ void hsa_activity_callback( // HIP API tracing struct hip_api_trace_entry_t { - uint32_t valid; - uint32_t type; + std::atomic valid; + roctracer::entry_type_t type; uint32_t domain; uint32_t cid; timestamp_t begin; @@ -357,9 +355,8 @@ struct hip_api_trace_entry_t { }; void hip_api_flush_cb(hip_api_trace_entry_t* entry); -constexpr roctracer::TraceBuffer::flush_prm_t hip_api_flush_prm[1] = {{0, hip_api_flush_cb}}; +constexpr roctracer::TraceBuffer::flush_prm_t hip_api_flush_prm = {roctracer::DFLT_ENTRY_TYPE, hip_api_flush_cb}; roctracer::TraceBuffer* hip_api_trace_buffer = NULL; -//roctracer::TraceBuffer hip_api_trace_buffer("HIP API", 0x200000, hip_api_flush_prm, 1); static inline bool is_hip_kernel_launch_api(const uint32_t& cid) { bool ret = @@ -390,8 +387,6 @@ void hip_api_callback( const timestamp_t end_timestamp = timer->timestamp_fn_ns(); hip_api_trace_entry_t* entry = hip_api_trace_buffer->GetEntry(); - entry->valid = roctracer::TRACE_ENTRY_COMPL; - entry->type = 0; entry->cid = cid; entry->domain = domain; entry->begin = hip_begin_timestamp; @@ -437,6 +432,8 @@ void hip_api_callback( } } } + + entry->valid.store(roctracer::TRACE_ENTRY_COMPL, std::memory_order_release); } } @@ -451,8 +448,6 @@ void mark_api_callback( const timestamp_t timestamp = timer->timestamp_fn_ns(); hip_api_trace_entry_t* entry = hip_api_trace_buffer->GetEntry(); - entry->valid = roctracer::TRACE_ENTRY_COMPL; - entry->type = 0; entry->cid = 0; entry->domain = domain; entry->begin = timestamp; @@ -462,6 +457,7 @@ void mark_api_callback( entry->data = {}; entry->name = strdup(name); entry->ptr = NULL; + entry->valid.store(roctracer::TRACE_ENTRY_COMPL, std::memory_order_release); } typedef std::map hip_kernel_map_t; @@ -567,17 +563,16 @@ void hip_api_flush_cb(hip_api_trace_entry_t* entry) { // HSA API tracing struct hip_act_trace_entry_t { - uint32_t valid; - uint32_t type; + std::atomic valid; + roctracer::entry_type_t type; uint32_t kind; timestamp_t dur; uint64_t correlation_id; }; void hip_act_flush_cb(hip_act_trace_entry_t* entry); -constexpr roctracer::TraceBuffer::flush_prm_t hip_act_flush_prm[1] = {{0, hip_act_flush_cb}}; +constexpr roctracer::TraceBuffer::flush_prm_t hip_act_flush_prm = {roctracer::DFLT_ENTRY_TYPE, hip_act_flush_cb}; roctracer::TraceBuffer* hip_act_trace_buffer = NULL; -//roctracer::TraceBuffer hip_act_trace_buffer("HIP ACT", 0x200000, hip_act_flush_prm, 1); // HIP ACT trace buffer flush callback void hip_act_flush_cb(hip_act_trace_entry_t* entry) { @@ -616,11 +611,10 @@ void pool_activity_callback(const char* begin, const char* end, void* arg) { case ACTIVITY_DOMAIN_HCC_OPS: if (hip_memcpy_stats != NULL) { hip_act_trace_entry_t* entry = hip_act_trace_buffer->GetEntry(); - entry->valid = roctracer::TRACE_ENTRY_COMPL; - entry->type = 0; entry->kind = record->kind; entry->dur = record->end_ns - record->begin_ns; entry->correlation_id = record->correlation_id; + entry->valid.store(roctracer::TRACE_ENTRY_COMPL, std::memory_order_release); } else { fprintf(hcc_activity_file_handle, "%lu:%lu %d:%lu %s:%lu:%u\n", record->begin_ns, record->end_ns, @@ -801,7 +795,6 @@ void tool_unload() { // Flush tracing pool close_tracing_pool(); roctracer::TraceBufferBase::FlushAll(); - hip_act_trace_buffer->Flush(true); close_file_handles(); ONLOAD_TRACE_END(); @@ -1117,10 +1110,10 @@ extern "C" PUBLIC_API void OnUnload() { extern "C" CONSTRUCTOR_API void constructor() { ONLOAD_TRACE_BEG(); - roctx_trace_buffer = new roctracer::TraceBuffer("rocTX API", 0x200000, roctx_flush_prm, 1); - hip_api_trace_buffer = new roctracer::TraceBuffer("HIP API", 0x200000, hip_api_flush_prm, 1); - hip_act_trace_buffer = new roctracer::TraceBuffer("HIP ACT", 0x200000, hip_act_flush_prm, 1, 1); - hsa_api_trace_buffer = new roctracer::TraceBuffer("HSA API", 0x200000, hsa_flush_prm, 1); + roctx_trace_buffer = new roctracer::TraceBuffer("rocTX API", 0x200000, &roctx_flush_prm, 1); + hip_api_trace_buffer = new roctracer::TraceBuffer("HIP API", 0x200000, &hip_api_flush_prm, 1); + hip_act_trace_buffer = new roctracer::TraceBuffer("HIP ACT", 0x200000, &hip_act_flush_prm, 1, 1); + hsa_api_trace_buffer = new roctracer::TraceBuffer("HSA API", 0x200000, &hsa_flush_prm, 1); roctracer_load(); tool_load(); ONLOAD_TRACE_END();