[ROCm/roctracer commit: 0c7fd6494d]
Dieser Commit ist enthalten in:
Evgeny
2019-01-22 19:22:57 -06:00
Ursprung a60563fda1
Commit e143019530
+281
Datei anzeigen
@@ -0,0 +1,281 @@
/******************************************************************************
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*******************************************************************************/
#ifndef SRC_CORE_TRACKER_H_
#define SRC_CORE_TRACKER_H_
#include <amd_hsa_signal.h>
#include <assert.h>
#include <hsa.h>
#include <hsa_ext_amd.h>
#include <atomic>
#include <list>
#include <mutex>
#include "util/hsa_rsrc_factory.h"
#include "util/exception.h"
#include "util/logger.h"
namespace proxy {
// Dispatch record
typedef struct {
uint64_t dispatch; // dispatch timestamp, ns
uint64_t begin; // kernel begin timestamp, ns
uint64_t end; // kernel end timestamp, ns
uint64_t complete; // completion signal timestamp, ns
} async_record_t;
class Tracker {
public:
typedef std::mutex mutex_t;
typedef util::HsaRsrcFactory::timestamp_t timestamp_t;
typedef async_record_t record_t;
struct entry_t;
typedef std::list<entry_t*> sig_list_t;
typedef sig_list_t::iterator sig_list_it_t;
typedef uint64_t counter_t;
struct entry_t {
counter_t index;
std::atomic<bool> valid;
Tracker* tracker;
sig_list_t::iterator it;
hsa_agent_t agent;
hsa_signal_t orig;
hsa_signal_t signal;
record_t* record;
std::atomic<void*> handler;
void* arg;
bool is_memcopy;
};
static Tracker* Create() {
std::lock_guard<mutex_t> lck(glob_mutex_);
if (instance_ == NULL) instance_ = new Tracker;
return instance_;
}
static Tracker& Instance() {
if (instance_ == NULL) instance_ = Create();
return *instance_;
}
static void Destroy() {
std::lock_guard<mutex_t> lck(glob_mutex_);
if (instance_ != NULL) delete instance_;
instance_ = NULL;
}
// Add tracker entry
entry_t* Alloc(const hsa_agent_t& agent, const hsa_signal_t& orig) {
hsa_status_t status = HSA_STATUS_ERROR;
// Creating a new tracker entry
entry_t* entry = new entry_t{};
assert(entry);
entry->tracker = this;
entry->agent = agent;
entry->orig = orig;
// Creating a record with the dispatch timestamps
record_t* record = new record_t{};
assert(record);
record->dispatch = hsa_rsrc_->TimestampNs();
entry->record = record;
// Creating a proxy signal
status = hsa_signal_create(1, 0, NULL, &(entry->signal));
if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_signal_create");
status = hsa_amd_signal_async_handler(entry->signal, HSA_SIGNAL_CONDITION_LT, 1, Handler, entry);
if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_amd_signal_async_handler");
// Adding antry to the list
mutex_.lock();
entry->it = sig_list_.insert(sig_list_.end(), entry);
entry->index = counter_++;
mutex_.unlock();
return entry;
}
// Delete tracker entry
void Delete(entry_t* entry) {
hsa_signal_destroy(entry->signal);
mutex_.lock();
sig_list_.erase(entry->it);
mutex_.unlock();
delete entry;
}
// Enable tracker entry
void Enable(entry_t* entry, void* handler, void* arg) {
// Set entry handler and release the entry
entry->arg = arg;
entry->handler.store(handler, std::memory_order_release);
// Debug trace
if (trace_on_) {
auto outstanding = outstanding_.fetch_add(1);
fprintf(stdout, "Tracker::Add: entry %p, record %p, outst %lu\n", entry, entry->record, outstanding);
fflush(stdout);
}
}
void EnableDispatch(entry_t* entry, hsa_amd_signal_handler handler, void* arg) {
entry->is_memcopy = false;
Enable(entry, reinterpret_cast<void*>(handler), arg);
}
void EnableMemcopy(entry_t* entry, hsa_amd_signal_handler handler, void* arg) {
entry->is_memcopy = true;
Enable(entry, reinterpret_cast<void*>(handler), arg);
}
private:
Tracker() :
outstanding_(0),
hsa_rsrc_(&(util::HsaRsrcFactory::Instance()))
{}
~Tracker() {
auto it = sig_list_.begin();
auto end = sig_list_.end();
while (it != end) {
auto cur = it++;
hsa_rsrc_->SignalWait((*cur)->signal);
Erase(cur);
}
}
// Delete an entry by iterator
void Erase(const sig_list_it_t& it) { Delete(*it); }
// Entry completion
inline void Complete(hsa_signal_value_t signal_value, entry_t* entry) {
record_t* record = entry->record;
// Debug trace
if (trace_on_) {
auto outstanding = outstanding_.fetch_sub(1);
fprintf(stdout, "Tracker::Handler: entry %p, record %p, outst %lu\n", entry, entry->record, outstanding);
fflush(stdout);
}
// Query begin/end and complete timestamps
if (entry->is_memcopy) {
hsa_amd_profiling_async_copy_time_t async_copy_time{};
hsa_status_t status = hsa_amd_profiling_get_async_copy_time(entry->signal, &async_copy_time);
if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_amd_profiling_get_async_copy_time");
record->begin = hsa_rsrc_->SysclockToNs(async_copy_time.start);
record->end = hsa_rsrc_->SysclockToNs(async_copy_time.end);
} else {
hsa_amd_profiling_dispatch_time_t dispatch_time{};
hsa_status_t status = hsa_amd_profiling_get_dispatch_time(entry->agent, entry->signal, &dispatch_time);
if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_amd_profiling_get_dispatch_time");
record->begin = hsa_rsrc_->SysclockToNs(dispatch_time.start);
record->end = hsa_rsrc_->SysclockToNs(dispatch_time.end);
}
record->complete = hsa_rsrc_->TimestampNs();
entry->valid.store(true, std::memory_order_release);
// Original intercepted signal completion
hsa_signal_t orig = entry->orig;
if (orig.handle) {
amd_signal_t* orig_signal_ptr = reinterpret_cast<amd_signal_t*>(orig.handle);
amd_signal_t* prof_signal_ptr = reinterpret_cast<amd_signal_t*>(entry->signal.handle);
orig_signal_ptr->start_ts = prof_signal_ptr->start_ts;
orig_signal_ptr->end_ts = prof_signal_ptr->end_ts;
const hsa_signal_value_t new_value = hsa_signal_load_relaxed(orig) - 1;
if (signal_value != new_value) EXC_ABORT(HSA_STATUS_ERROR, "Tracker::Complete bad signal value");
hsa_signal_store_screlease(orig, signal_value);
}
}
inline static void HandleEntry(hsa_signal_value_t signal_value, entry_t* entry) {
// Call entry handler
void* handler = static_cast<void*>(entry->handler);
reinterpret_cast<hsa_amd_signal_handler>(handler)(signal_value, entry->arg);
// Delete tracker entry
entry->tracker->Delete(entry);
}
// Handler for packet completion
static bool Handler(hsa_signal_value_t signal_value, void* arg) {
// Acquire entry
entry_t* entry = reinterpret_cast<entry_t*>(arg);
volatile std::atomic<void*>* ptr = &entry->handler;
while (ptr->load(std::memory_order_acquire) == NULL) sched_yield();
// Complete entry
Tracker* tracker = entry->tracker;
tracker->Complete(signal_value, entry);
if (ordering_enabled_ == false) {
HandleEntry(signal_value, entry);
} else {
// Acquire last entry
entry_t* back = tracker->sig_list_.back();
volatile std::atomic<void*>* ptr = &back->handler;
while (ptr->load(std::memory_order_acquire) == NULL) sched_yield();
tracker->handler_mutex_.lock();
sig_list_it_t it = tracker->sig_list_.begin();
sig_list_it_t end = back->it;
while (it != end) {
entry = *(it++);
if (entry->valid.load(std::memory_order_acquire)) {
HandleEntry(signal_value, entry);
} else {
break;
}
}
tracker->handler_mutex_.unlock();
}
return false;
}
// instance
static Tracker* instance_;
static mutex_t glob_mutex_;
static counter_t counter_;
// Tracked signals list
sig_list_t sig_list_;
// Inter-thread synchronization
mutex_t mutex_;
mutex_t handler_mutex_;
// Outstanding dispatches
std::atomic<uint64_t> outstanding_;
// HSA resources factory
util::HsaRsrcFactory* hsa_rsrc_;
// Handling ordering enabled
static const bool ordering_enabled_ = false;
// Enable tracing
static const bool trace_on_ = false;
};
} // namespace rocprofiler
#endif // SRC_CORE_TRACKER_H_