diff --git a/projects/roctracer/src/proxy/tracker.h b/projects/roctracer/src/proxy/tracker.h new file mode 100644 index 0000000000..69c71551cb --- /dev/null +++ b/projects/roctracer/src/proxy/tracker.h @@ -0,0 +1,281 @@ +/****************************************************************************** +Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*******************************************************************************/ + +#ifndef SRC_CORE_TRACKER_H_ +#define SRC_CORE_TRACKER_H_ + +#include +#include +#include +#include + +#include +#include +#include + +#include "util/hsa_rsrc_factory.h" +#include "util/exception.h" +#include "util/logger.h" + +namespace proxy { +// Dispatch record +typedef struct { + uint64_t dispatch; // dispatch timestamp, ns + uint64_t begin; // kernel begin timestamp, ns + uint64_t end; // kernel end timestamp, ns + uint64_t complete; // completion signal timestamp, ns +} async_record_t; + +class Tracker { + public: + typedef std::mutex mutex_t; + typedef util::HsaRsrcFactory::timestamp_t timestamp_t; + typedef async_record_t record_t; + struct entry_t; + typedef std::list sig_list_t; + typedef sig_list_t::iterator sig_list_it_t; + typedef uint64_t counter_t; + + struct entry_t { + counter_t index; + std::atomic valid; + Tracker* tracker; + sig_list_t::iterator it; + hsa_agent_t agent; + hsa_signal_t orig; + hsa_signal_t signal; + record_t* record; + std::atomic handler; + void* arg; + bool is_memcopy; + }; + + static Tracker* Create() { + std::lock_guard lck(glob_mutex_); + if (instance_ == NULL) instance_ = new Tracker; + return instance_; + } + + static Tracker& Instance() { + if (instance_ == NULL) instance_ = Create(); + return *instance_; + } + + static void Destroy() { + std::lock_guard lck(glob_mutex_); + if (instance_ != NULL) delete instance_; + instance_ = NULL; + } + + // Add tracker entry + entry_t* Alloc(const hsa_agent_t& agent, const hsa_signal_t& orig) { + hsa_status_t status = HSA_STATUS_ERROR; + + // Creating a new tracker entry + entry_t* entry = new entry_t{}; + assert(entry); + entry->tracker = this; + entry->agent = agent; + entry->orig = orig; + + // Creating a record with the dispatch timestamps + record_t* record = new record_t{}; + assert(record); + record->dispatch = hsa_rsrc_->TimestampNs(); + entry->record = record; + + // Creating a proxy signal + status = hsa_signal_create(1, 0, NULL, &(entry->signal)); + if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_signal_create"); + status = hsa_amd_signal_async_handler(entry->signal, HSA_SIGNAL_CONDITION_LT, 1, Handler, entry); + if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_amd_signal_async_handler"); + + // Adding antry to the list + mutex_.lock(); + entry->it = sig_list_.insert(sig_list_.end(), entry); + entry->index = counter_++; + mutex_.unlock(); + + return entry; + } + + // Delete tracker entry + void Delete(entry_t* entry) { + hsa_signal_destroy(entry->signal); + mutex_.lock(); + sig_list_.erase(entry->it); + mutex_.unlock(); + delete entry; + } + + // Enable tracker entry + void Enable(entry_t* entry, void* handler, void* arg) { + // Set entry handler and release the entry + entry->arg = arg; + entry->handler.store(handler, std::memory_order_release); + + // Debug trace + if (trace_on_) { + auto outstanding = outstanding_.fetch_add(1); + fprintf(stdout, "Tracker::Add: entry %p, record %p, outst %lu\n", entry, entry->record, outstanding); + fflush(stdout); + } + } + + void EnableDispatch(entry_t* entry, hsa_amd_signal_handler handler, void* arg) { + entry->is_memcopy = false; + Enable(entry, reinterpret_cast(handler), arg); + } + void EnableMemcopy(entry_t* entry, hsa_amd_signal_handler handler, void* arg) { + entry->is_memcopy = true; + Enable(entry, reinterpret_cast(handler), arg); + } + + private: + Tracker() : + outstanding_(0), + hsa_rsrc_(&(util::HsaRsrcFactory::Instance())) + {} + + ~Tracker() { + auto it = sig_list_.begin(); + auto end = sig_list_.end(); + while (it != end) { + auto cur = it++; + hsa_rsrc_->SignalWait((*cur)->signal); + Erase(cur); + } + } + + // Delete an entry by iterator + void Erase(const sig_list_it_t& it) { Delete(*it); } + + // Entry completion + inline void Complete(hsa_signal_value_t signal_value, entry_t* entry) { + record_t* record = entry->record; + + // Debug trace + if (trace_on_) { + auto outstanding = outstanding_.fetch_sub(1); + fprintf(stdout, "Tracker::Handler: entry %p, record %p, outst %lu\n", entry, entry->record, outstanding); + fflush(stdout); + } + + // Query begin/end and complete timestamps + if (entry->is_memcopy) { + hsa_amd_profiling_async_copy_time_t async_copy_time{}; + hsa_status_t status = hsa_amd_profiling_get_async_copy_time(entry->signal, &async_copy_time); + if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_amd_profiling_get_async_copy_time"); + record->begin = hsa_rsrc_->SysclockToNs(async_copy_time.start); + record->end = hsa_rsrc_->SysclockToNs(async_copy_time.end); + } else { + hsa_amd_profiling_dispatch_time_t dispatch_time{}; + hsa_status_t status = hsa_amd_profiling_get_dispatch_time(entry->agent, entry->signal, &dispatch_time); + if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_amd_profiling_get_dispatch_time"); + record->begin = hsa_rsrc_->SysclockToNs(dispatch_time.start); + record->end = hsa_rsrc_->SysclockToNs(dispatch_time.end); + } + + record->complete = hsa_rsrc_->TimestampNs(); + entry->valid.store(true, std::memory_order_release); + + // Original intercepted signal completion + hsa_signal_t orig = entry->orig; + if (orig.handle) { + amd_signal_t* orig_signal_ptr = reinterpret_cast(orig.handle); + amd_signal_t* prof_signal_ptr = reinterpret_cast(entry->signal.handle); + orig_signal_ptr->start_ts = prof_signal_ptr->start_ts; + orig_signal_ptr->end_ts = prof_signal_ptr->end_ts; + + const hsa_signal_value_t new_value = hsa_signal_load_relaxed(orig) - 1; + if (signal_value != new_value) EXC_ABORT(HSA_STATUS_ERROR, "Tracker::Complete bad signal value"); + hsa_signal_store_screlease(orig, signal_value); + } + } + + inline static void HandleEntry(hsa_signal_value_t signal_value, entry_t* entry) { + // Call entry handler + void* handler = static_cast(entry->handler); + reinterpret_cast(handler)(signal_value, entry->arg); + // Delete tracker entry + entry->tracker->Delete(entry); + } + + // Handler for packet completion + static bool Handler(hsa_signal_value_t signal_value, void* arg) { + // Acquire entry + entry_t* entry = reinterpret_cast(arg); + volatile std::atomic* ptr = &entry->handler; + while (ptr->load(std::memory_order_acquire) == NULL) sched_yield(); + + // Complete entry + Tracker* tracker = entry->tracker; + tracker->Complete(signal_value, entry); + + if (ordering_enabled_ == false) { + HandleEntry(signal_value, entry); + } else { + // Acquire last entry + entry_t* back = tracker->sig_list_.back(); + volatile std::atomic* ptr = &back->handler; + while (ptr->load(std::memory_order_acquire) == NULL) sched_yield(); + + tracker->handler_mutex_.lock(); + sig_list_it_t it = tracker->sig_list_.begin(); + sig_list_it_t end = back->it; + while (it != end) { + entry = *(it++); + if (entry->valid.load(std::memory_order_acquire)) { + HandleEntry(signal_value, entry); + } else { + break; + } + } + tracker->handler_mutex_.unlock(); + } + + return false; + } + + // instance + static Tracker* instance_; + static mutex_t glob_mutex_; + static counter_t counter_; + + // Tracked signals list + sig_list_t sig_list_; + // Inter-thread synchronization + mutex_t mutex_; + mutex_t handler_mutex_; + // Outstanding dispatches + std::atomic outstanding_; + // HSA resources factory + util::HsaRsrcFactory* hsa_rsrc_; + // Handling ordering enabled + static const bool ordering_enabled_ = false; + // Enable tracing + static const bool trace_on_ = false; +}; + +} // namespace rocprofiler + +#endif // SRC_CORE_TRACKER_H_