flush-rate option fixed

Change-Id: I74ff83ebf2a0a4ba414d30b7cb193522f44660ce
Este commit está contenido en:
Evgeny
2020-08-15 02:23:43 -05:00
padre ddcb68d0a8
commit 156d9327cc
Se han modificado 5 ficheros con 118 adiciones y 98 borrados
+1 -2
Ver fichero
@@ -223,7 +223,6 @@ constexpr TraceBuffer<trace_entry_t>::flush_prm_t trace_buffer_prm[] = {
{KERNEL_ENTRY_TYPE, hsa_kernel_handler}
};
TraceBuffer<trace_entry_t>* trace_buffer = NULL;
//TraceBuffer<trace_entry_t> trace_buffer("HSA GPU", 0x200000, trace_buffer_prm, 2);
namespace hsa_support {
// callbacks table
@@ -1127,7 +1126,7 @@ PUBLIC_API roctracer_status_t roctracer_flush_activity_expl(roctracer_pool_t* po
API_METHOD_PREFIX
if (pool == NULL) pool = roctracer_default_pool();
roctracer::MemoryPool* memory_pool = reinterpret_cast<roctracer::MemoryPool*>(pool);
memory_pool->Flush();
if (memory_pool != NULL) memory_pool->Flush();
roctracer::TraceBufferBase::FlushAll();
API_METHOD_SUFFIX
}
+81 -59
Ver fichero
@@ -36,15 +36,17 @@ enum {
TRACE_ENTRY_COMPL = 2
};
enum {
API_ENTRY_TYPE,
COPY_ENTRY_TYPE,
KERNEL_ENTRY_TYPE
enum entry_type_t {
DFLT_ENTRY_TYPE = 0,
API_ENTRY_TYPE = 1,
COPY_ENTRY_TYPE = 2,
KERNEL_ENTRY_TYPE = 3,
NUM_ENTRY_TYPE = 4
};
struct trace_entry_t {
std::atomic<uint32_t> valid;
uint32_t type;
entry_type_t type;
uint64_t dispatch;
uint64_t begin; // kernel begin timestamp, ns
uint64_t end; // kernel end timestamp, ns
@@ -67,14 +69,26 @@ struct trace_entry_t {
template <class T>
struct push_element_fun {
T* const elem_;
void fun(T* node) { if (node->next_elem_ == NULL) node->next_elem_ = elem_; }
push_element_fun(T* elem) : elem_(elem) {}
T** prev_;
bool fun(T* node) {
if (node->priority_ > elem_->priority_) {
*prev_ = elem_;
elem_->next_elem_ = node;
} else if (node->next_elem_ == NULL) {
node->next_elem_ = elem_;
} else {
prev_ = &(node->next_elem_);
return false;
}
return true;
}
push_element_fun(T* elem, T** prev) : elem_(elem), prev_(prev) {}
};
template <class T>
struct call_element_fun {
void (T::*fptr_)();
void fun(T* node) { (node->*fptr_)(); }
bool fun(T* node) const { (node->*fptr_)(); return false; }
call_element_fun(void (T::*f)()) : fptr_(f) {}
};
@@ -89,10 +103,10 @@ struct TraceBufferBase {
static void Push(TraceBufferBase* elem) {
if (head_elem_ == NULL) head_elem_ = elem;
else foreach(push_element_fun<TraceBufferBase>(elem));
else foreach(push_element_fun<TraceBufferBase>(elem, &head_elem_));
}
TraceBufferBase() : next_elem_(NULL) {}
TraceBufferBase(const uint32_t& prior) : priority_(prior), next_elem_(NULL) {}
template<class F>
static void foreach(const F& f_in) {
@@ -101,11 +115,12 @@ struct TraceBufferBase {
TraceBufferBase* p = head_elem_;
while (p != NULL) {
TraceBufferBase* next = p->next_elem_;
f.fun(p);
if (f.fun(p) == true) break;
p = next;
}
}
const uint32_t priority_;
TraceBufferBase* next_elem_;
static TraceBufferBase* head_elem_;
static mutex_t mutex_;
@@ -118,35 +133,41 @@ class TraceBuffer : protected TraceBufferBase {
typedef TraceBuffer<Entry> Obj;
typedef uint64_t pointer_t;
typedef std::recursive_mutex mutex_t;
typedef typename std::list<Entry*> buf_list_t;
typedef typename buf_list_t::iterator buf_list_it_t;
struct flush_prm_t {
uint32_t type;
entry_type_t type;
callback_t fun;
};
TraceBuffer(const char* name, uint32_t size, const flush_prm_t* flush_prm_arr, uint32_t flush_prm_count, uint32_t prior = 0) :
is_flushed_(false),
TraceBufferBase(prior),
size_(size),
work_thread_started_(false)
{
name_ = strdup(name);
size_ = size;
data_ = allocate_fun();
next_ = allocate_fun();
read_pointer_ = 0;
write_pointer_ = 0;
end_pointer_ = size;
buf_list_.push_back(data_);
flush_prm_arr_ = flush_prm_arr;
flush_prm_count_ = flush_prm_count;
priority_ = prior;
memset(f_array_, 0, sizeof(f_array_));
for (const flush_prm_t* prm = flush_prm_arr; prm < flush_prm_arr + flush_prm_count; prm++) {
const entry_type_t type = prm->type;
if (type >= NUM_ENTRY_TYPE) FATAL("out of f_array bounds (" << type << ")");
if (f_array_[type] != NULL) FATAL("handler function ptr redefinition (" << type << ")");
f_array_[type] = prm->fun;
}
TraceBufferBase::Push(this);
}
~TraceBuffer() {
StopWorkerThread();
FlushAll();
Flush();
}
void StartWorkerThread() {
@@ -171,52 +192,52 @@ class TraceBuffer : protected TraceBufferBase {
}
Entry* GetEntry() {
const pointer_t pointer = read_pointer_.fetch_add(1);
const pointer_t pointer = write_pointer_.fetch_add(1);
if (pointer >= end_pointer_) wrap_buffer(pointer);
if (pointer >= end_pointer_) FATAL("pointer >= end_pointer_ after buffer wrap");
return data_ + (pointer + size_ - end_pointer_);
Entry* entry = data_ + (size_ + pointer - end_pointer_);
entry->valid = TRACE_ENTRY_INV;
entry->type = DFLT_ENTRY_TYPE;
return entry;
}
void Flush() { flush_buf(); }
void Flush(const bool& b) {
DisableFlushing(!b);
flush_buf();
}
void DisableFlushing(const bool& b) { is_flushed_.exchange(b, std::memory_order_acquire); }
private:
void flush_buf() {
std::lock_guard<mutex_t> lck(mutex_);
const bool is_flushed = is_flushed_.exchange(true, std::memory_order_acquire);
if (priority_ != 0) {
priority_ -= 1;
return;
}
pointer_t pointer = read_pointer_;
pointer_t curr_pointer = write_pointer_.load(std::memory_order_relaxed);
buf_list_it_t it = buf_list_.begin();
buf_list_it_t end_it = buf_list_.end();
while(it != end_it) {
Entry* buf = *it;
Entry* ptr = buf + (pointer % size_);
Entry* end_ptr = buf + size_;
while ((ptr < end_ptr) && (pointer < curr_pointer)) {
if (ptr->valid != TRACE_ENTRY_COMPL) break;
if (is_flushed == false) {
for (const flush_prm_t* prm = flush_prm_arr_; prm < flush_prm_arr_ + flush_prm_count_; prm++) {
// Flushed entries type
uint32_t type = prm->type;
// Flushing function
callback_t fun = prm->fun;
if (fun == NULL) FATAL("flush function is not set");
entry_type_t type = ptr->type;
if (type >= NUM_ENTRY_TYPE) FATAL("out of f_array bounds (" << type << ")");
callback_t f_ptr = f_array_[type];
if (f_ptr == NULL) FATAL("f_ptr == NULL");
(*f_ptr)(ptr);
pointer_t pointer = 0;
for (Entry* ptr : buf_list_) {
Entry* end = ptr + size_;
while ((ptr < end) && (pointer < read_pointer_)) {
if (ptr->type == type) {
if (ptr->valid == TRACE_ENTRY_COMPL) {
fun(ptr);
}
}
ptr++;
pointer++;
}
}
ptr++;
pointer++;
}
buf_list_it_t prev = it;
it++;
if (ptr == end_ptr) {
free_fun(*prev);
buf_list_.erase(prev);
}
if (pointer == curr_pointer) break;
}
read_pointer_ = pointer;
}
inline Entry* allocate_fun() {
@@ -226,6 +247,10 @@ class TraceBuffer : protected TraceBufferBase {
return ptr;
}
inline void free_fun(void* ptr) {
free(ptr);
}
static void* allocate_worker(void* arg) {
Obj* obj = (Obj*)arg;
@@ -258,17 +283,14 @@ class TraceBuffer : protected TraceBufferBase {
}
const char* name_;
uint32_t size_;
const uint32_t size_;
Entry* data_;
Entry* next_;
volatile std::atomic<pointer_t> read_pointer_;
pointer_t read_pointer_;
volatile std::atomic<pointer_t> write_pointer_;
volatile std::atomic<pointer_t> end_pointer_;
std::list<Entry*> buf_list_;
const flush_prm_t* flush_prm_arr_;
uint32_t flush_prm_count_;
uint32_t priority_;
volatile std::atomic<bool> is_flushed_;
buf_list_t buf_list_;
callback_t f_array_[NUM_ENTRY_TYPE];
pthread_t work_thread_;
pthread_mutex_t work_mutex_;
+8 -5
Ver fichero
@@ -40,9 +40,10 @@ class Tracker {
public:
typedef util::HsaRsrcFactory::timestamp_t timestamp_t;
typedef roctracer::trace_entry_t entry_t;
typedef roctracer::entry_type_t entry_type_t;
// Add tracker entry
inline static void Enable(uint32_t type, const hsa_agent_t& agent, const hsa_signal_t& signal, entry_t* entry) {
inline static void Enable(entry_type_t type, const hsa_agent_t& agent, const hsa_signal_t& signal, entry_t* entry) {
hsa_status_t status = HSA_STATUS_ERROR;
util::HsaRsrcFactory* hsa_rsrc = &(util::HsaRsrcFactory::Instance());
@@ -88,13 +89,16 @@ class Tracker {
}
entry->complete = hsa_rsrc->TimestampNs();
hsa_signal_t orig = entry->orig;
hsa_signal_t signal = entry->signal;
// Releasing completed entry
entry->valid.store(roctracer::TRACE_ENTRY_COMPL, std::memory_order_release);
// Original intercepted signal completion
hsa_signal_t orig = entry->orig;
if (orig.handle) {
amd_signal_t* orig_signal_ptr = reinterpret_cast<amd_signal_t*>(orig.handle);
amd_signal_t* prof_signal_ptr = reinterpret_cast<amd_signal_t*>(entry->signal.handle);
amd_signal_t* prof_signal_ptr = reinterpret_cast<amd_signal_t*>(signal.handle);
orig_signal_ptr->start_ts = prof_signal_ptr->start_ts;
orig_signal_ptr->end_ts = prof_signal_ptr->end_ts;
@@ -102,7 +106,7 @@ class Tracker {
if (signal_value != new_value) EXC_ABORT(HSA_STATUS_ERROR, "Tracker::Complete bad signal value");
hsa_signal_store_screlease(orig, signal_value);
}
hsa_signal_destroy(entry->signal);
hsa_signal_destroy(signal);
}
// Handler for packet completion
@@ -113,7 +117,6 @@ class Tracker {
// Complete entry
Tracker::Complete(signal_value, entry);
return false;
}
};
+4 -1
Ver fichero
@@ -79,7 +79,9 @@ eval_test() {
test_runnum=$((test_runnum + 1))
eval "$cmdline" >$test_trace 2>&1
is_failed=$?
cat $test_trace
if [ $is_failed != 0 ] ; then
cat $test_trace
fi
if [ $IS_CI = 1 ] ; then
is_failed=0;
else
@@ -87,6 +89,7 @@ eval_test() {
python ./test/check_trace.py -in $test_name -ck $check_trace_flag
is_failed=$?
if [ $is_failed != 0 ] ; then
echo "Trace checker error:"
python ./test/check_trace.py -v -in $test_name -ck $check_trace_flag
fi
fi
+24 -31
Ver fichero
@@ -176,6 +176,8 @@ void* control_thr_fun(void*) {
usleep(dist_us);
}
}
return NULL;
}
// Flushing control thread
@@ -204,8 +206,8 @@ void* flush_thr_fun(void*) {
// rocTX annotation tracing
struct roctx_trace_entry_t {
uint32_t valid;
uint32_t type;
std::atomic<uint32_t> valid;
roctracer::entry_type_t type;
uint32_t cid;
timestamp_t time;
uint32_t pid;
@@ -215,9 +217,8 @@ struct roctx_trace_entry_t {
};
void roctx_flush_cb(roctx_trace_entry_t* entry);
constexpr roctracer::TraceBuffer<roctx_trace_entry_t>::flush_prm_t roctx_flush_prm[1] = {{0, roctx_flush_cb}};
constexpr roctracer::TraceBuffer<roctx_trace_entry_t>::flush_prm_t roctx_flush_prm = {roctracer::DFLT_ENTRY_TYPE, roctx_flush_cb};
roctracer::TraceBuffer<roctx_trace_entry_t>* roctx_trace_buffer = NULL;
//roctracer::TraceBuffer<roctx_trace_entry_t> roctx_trace_buffer("rocTX API", 0x200000, roctx_flush_prm, 1);
// rocTX callback function
static inline void roctx_callback_fun(
@@ -233,14 +234,13 @@ static inline void roctx_callback_fun(
const timestamp_t time = timer->timestamp_fn_ns();
#endif
roctx_trace_entry_t* entry = roctx_trace_buffer->GetEntry();
entry->valid = roctracer::TRACE_ENTRY_COMPL;
entry->type = 0;
entry->cid = cid;
entry->time = time;
entry->pid = GetPid();
entry->tid = tid;
entry->rid = rid;
entry->message = (message != NULL) ? strdup(message) : NULL;
entry->valid.store(roctracer::TRACE_ENTRY_COMPL, std::memory_order_release);
}
void roctx_api_callback(
@@ -283,8 +283,8 @@ void roctx_flush_cb(roctx_trace_entry_t* entry) {
// HSA API tracing
struct hsa_api_trace_entry_t {
uint32_t valid;
uint32_t type;
std::atomic<uint32_t> valid;
roctracer::entry_type_t type;
uint32_t cid;
timestamp_t begin;
timestamp_t end;
@@ -294,9 +294,8 @@ struct hsa_api_trace_entry_t {
};
void hsa_api_flush_cb(hsa_api_trace_entry_t* entry);
constexpr roctracer::TraceBuffer<hsa_api_trace_entry_t>::flush_prm_t hsa_flush_prm[1] = {{0, hsa_api_flush_cb}};
constexpr roctracer::TraceBuffer<hsa_api_trace_entry_t>::flush_prm_t hsa_flush_prm = {roctracer::DFLT_ENTRY_TYPE, hsa_api_flush_cb};
roctracer::TraceBuffer<hsa_api_trace_entry_t>* hsa_api_trace_buffer = NULL;
//roctracer::TraceBuffer<hsa_api_trace_entry_t> hsa_api_trace_buffer("HSA API", 0x200000, hsa_flush_prm, 1);
// HSA API callback function
void hsa_api_callback(
@@ -312,14 +311,13 @@ void hsa_api_callback(
} else {
const timestamp_t end_timestamp = (cid == HSA_API_ID_hsa_shut_down) ? hsa_begin_timestamp : timer->timestamp_fn_ns();
hsa_api_trace_entry_t* entry = hsa_api_trace_buffer->GetEntry();
entry->valid = roctracer::TRACE_ENTRY_COMPL;
entry->type = 0;
entry->cid = cid;
entry->begin = hsa_begin_timestamp;
entry->end = end_timestamp;
entry->pid = GetPid();
entry->tid = GetTid();
entry->data = *data;
entry->valid.store(roctracer::TRACE_ENTRY_COMPL, std::memory_order_release);
}
}
@@ -343,8 +341,8 @@ void hsa_activity_callback(
// HIP API tracing
struct hip_api_trace_entry_t {
uint32_t valid;
uint32_t type;
std::atomic<uint32_t> valid;
roctracer::entry_type_t type;
uint32_t domain;
uint32_t cid;
timestamp_t begin;
@@ -357,9 +355,8 @@ struct hip_api_trace_entry_t {
};
void hip_api_flush_cb(hip_api_trace_entry_t* entry);
constexpr roctracer::TraceBuffer<hip_api_trace_entry_t>::flush_prm_t hip_api_flush_prm[1] = {{0, hip_api_flush_cb}};
constexpr roctracer::TraceBuffer<hip_api_trace_entry_t>::flush_prm_t hip_api_flush_prm = {roctracer::DFLT_ENTRY_TYPE, hip_api_flush_cb};
roctracer::TraceBuffer<hip_api_trace_entry_t>* hip_api_trace_buffer = NULL;
//roctracer::TraceBuffer<hip_api_trace_entry_t> hip_api_trace_buffer("HIP API", 0x200000, hip_api_flush_prm, 1);
static inline bool is_hip_kernel_launch_api(const uint32_t& cid) {
bool ret =
@@ -390,8 +387,6 @@ void hip_api_callback(
const timestamp_t end_timestamp = timer->timestamp_fn_ns();
hip_api_trace_entry_t* entry = hip_api_trace_buffer->GetEntry();
entry->valid = roctracer::TRACE_ENTRY_COMPL;
entry->type = 0;
entry->cid = cid;
entry->domain = domain;
entry->begin = hip_begin_timestamp;
@@ -437,6 +432,8 @@ void hip_api_callback(
}
}
}
entry->valid.store(roctracer::TRACE_ENTRY_COMPL, std::memory_order_release);
}
}
@@ -451,8 +448,6 @@ void mark_api_callback(
const timestamp_t timestamp = timer->timestamp_fn_ns();
hip_api_trace_entry_t* entry = hip_api_trace_buffer->GetEntry();
entry->valid = roctracer::TRACE_ENTRY_COMPL;
entry->type = 0;
entry->cid = 0;
entry->domain = domain;
entry->begin = timestamp;
@@ -462,6 +457,7 @@ void mark_api_callback(
entry->data = {};
entry->name = strdup(name);
entry->ptr = NULL;
entry->valid.store(roctracer::TRACE_ENTRY_COMPL, std::memory_order_release);
}
typedef std::map<uint64_t, const char*> hip_kernel_map_t;
@@ -567,17 +563,16 @@ void hip_api_flush_cb(hip_api_trace_entry_t* entry) {
// HSA API tracing
struct hip_act_trace_entry_t {
uint32_t valid;
uint32_t type;
std::atomic<uint32_t> valid;
roctracer::entry_type_t type;
uint32_t kind;
timestamp_t dur;
uint64_t correlation_id;
};
void hip_act_flush_cb(hip_act_trace_entry_t* entry);
constexpr roctracer::TraceBuffer<hip_act_trace_entry_t>::flush_prm_t hip_act_flush_prm[1] = {{0, hip_act_flush_cb}};
constexpr roctracer::TraceBuffer<hip_act_trace_entry_t>::flush_prm_t hip_act_flush_prm = {roctracer::DFLT_ENTRY_TYPE, hip_act_flush_cb};
roctracer::TraceBuffer<hip_act_trace_entry_t>* hip_act_trace_buffer = NULL;
//roctracer::TraceBuffer<hip_act_trace_entry_t> hip_act_trace_buffer("HIP ACT", 0x200000, hip_act_flush_prm, 1);
// HIP ACT trace buffer flush callback
void hip_act_flush_cb(hip_act_trace_entry_t* entry) {
@@ -616,11 +611,10 @@ void pool_activity_callback(const char* begin, const char* end, void* arg) {
case ACTIVITY_DOMAIN_HCC_OPS:
if (hip_memcpy_stats != NULL) {
hip_act_trace_entry_t* entry = hip_act_trace_buffer->GetEntry();
entry->valid = roctracer::TRACE_ENTRY_COMPL;
entry->type = 0;
entry->kind = record->kind;
entry->dur = record->end_ns - record->begin_ns;
entry->correlation_id = record->correlation_id;
entry->valid.store(roctracer::TRACE_ENTRY_COMPL, std::memory_order_release);
} else {
fprintf(hcc_activity_file_handle, "%lu:%lu %d:%lu %s:%lu:%u\n",
record->begin_ns, record->end_ns,
@@ -801,7 +795,6 @@ void tool_unload() {
// Flush tracing pool
close_tracing_pool();
roctracer::TraceBufferBase::FlushAll();
hip_act_trace_buffer->Flush(true);
close_file_handles();
ONLOAD_TRACE_END();
@@ -1117,10 +1110,10 @@ extern "C" PUBLIC_API void OnUnload() {
extern "C" CONSTRUCTOR_API void constructor() {
ONLOAD_TRACE_BEG();
roctx_trace_buffer = new roctracer::TraceBuffer<roctx_trace_entry_t>("rocTX API", 0x200000, roctx_flush_prm, 1);
hip_api_trace_buffer = new roctracer::TraceBuffer<hip_api_trace_entry_t>("HIP API", 0x200000, hip_api_flush_prm, 1);
hip_act_trace_buffer = new roctracer::TraceBuffer<hip_act_trace_entry_t>("HIP ACT", 0x200000, hip_act_flush_prm, 1, 1);
hsa_api_trace_buffer = new roctracer::TraceBuffer<hsa_api_trace_entry_t>("HSA API", 0x200000, hsa_flush_prm, 1);
roctx_trace_buffer = new roctracer::TraceBuffer<roctx_trace_entry_t>("rocTX API", 0x200000, &roctx_flush_prm, 1);
hip_api_trace_buffer = new roctracer::TraceBuffer<hip_api_trace_entry_t>("HIP API", 0x200000, &hip_api_flush_prm, 1);
hip_act_trace_buffer = new roctracer::TraceBuffer<hip_act_trace_entry_t>("HIP ACT", 0x200000, &hip_act_flush_prm, 1, 1);
hsa_api_trace_buffer = new roctracer::TraceBuffer<hsa_api_trace_entry_t>("HSA API", 0x200000, &hsa_flush_prm, 1);
roctracer_load();
tool_load();
ONLOAD_TRACE_END();