tool: adding wait list
Change-Id: I6f161ddc9aef953b62e52004bd081327d34a9470
Этот коммит содержится в:
@@ -41,9 +41,11 @@ class InterceptQueue {
|
||||
group_segment_size, queue, &status);
|
||||
if (status != HSA_STATUS_SUCCESS) abort();
|
||||
|
||||
if (tracker_on_ && (tracker_ == NULL)) tracker_ = new Tracker(timeout_);
|
||||
status = hsa_amd_profiling_set_profiler_enabled(*queue, true);
|
||||
if (status != HSA_STATUS_SUCCESS) abort();
|
||||
if (tracker_on_ && (tracker_ == NULL)) {
|
||||
tracker_ = new Tracker(timeout_);
|
||||
status = hsa_amd_profiling_set_profiler_enabled(*queue, true);
|
||||
if (status != HSA_STATUS_SUCCESS) abort();
|
||||
}
|
||||
|
||||
if (!obj_map_) obj_map_ = new obj_map_t;
|
||||
InterceptQueue* obj = new InterceptQueue(agent, *queue, proxy);
|
||||
|
||||
@@ -19,7 +19,7 @@ ProxyQueue* ProxyQueue::Create(hsa_agent_t agent, uint32_t size, hsa_queue_type3
|
||||
hsa_status_t suc = HSA_STATUS_ERROR;
|
||||
#ifdef ROCP_HSA_PROXY
|
||||
ProxyQueue* instance =
|
||||
(rocp_type_) ? (ProxyQueue*)new SimpleProxyQueue() : (ProxyQueue*)new HsaProxyQueue();
|
||||
(rocp_type_) ? (ProxyQueue*) new SimpleProxyQueue() : (ProxyQueue*) new HsaProxyQueue();
|
||||
#else
|
||||
ProxyQueue* instance = new SimpleProxyQueue();
|
||||
#endif
|
||||
|
||||
@@ -144,6 +144,7 @@ class SimpleProxyQueue : public ProxyQueue {
|
||||
on_submit_cb_data_(0)
|
||||
{
|
||||
printf("ROCProfiler: SimpleProxyQueue is enabled\n");
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
~SimpleProxyQueue() {}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
#ifndef SRC_CORE_TRACKER_H_
|
||||
#define SRC_CORE_TRACKER_H_
|
||||
|
||||
#include <amd_hsa_signal.h>
|
||||
#include <assert.h>
|
||||
#include <hsa.h>
|
||||
#include <hsa_ext_amd.h>
|
||||
@@ -29,7 +30,7 @@ class Tracker {
|
||||
record_t* record;
|
||||
};
|
||||
|
||||
Tracker(uint64_t timeout = UINT64_MAX) : timeout_(timeout) {}
|
||||
Tracker(uint64_t timeout = UINT64_MAX) : timeout_(timeout), outstanding(0) {}
|
||||
~Tracker() {
|
||||
mutex_.lock();
|
||||
for (entry_t* entry : sig_list_) {
|
||||
@@ -51,6 +52,7 @@ class Tracker {
|
||||
|
||||
// Add tracker entry
|
||||
entry_t* Add(const hsa_agent_t& agent, const hsa_signal_t& orig) {
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
entry_t* entry = new entry_t{};
|
||||
assert(entry);
|
||||
entry->tracker = this;
|
||||
@@ -60,7 +62,7 @@ class Tracker {
|
||||
|
||||
entry->agent = agent;
|
||||
entry->orig = orig;
|
||||
hsa_status_t status = hsa_signal_create(1, 0, NULL, &(entry->signal));
|
||||
status = hsa_signal_create(1, 0, NULL, &(entry->signal));
|
||||
if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_signal_create");
|
||||
|
||||
record_t* record = new record_t{};
|
||||
@@ -72,6 +74,14 @@ class Tracker {
|
||||
hsa_amd_signal_async_handler(entry->signal, HSA_SIGNAL_CONDITION_LT, 1, Handler, entry);
|
||||
if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_amd_signal_async_handler");
|
||||
|
||||
if (trace_on_) {
|
||||
mutex_.lock();
|
||||
entry->tracker->outstanding++;
|
||||
fprintf(stdout, "Tracker::Add: entry %p, record %p, outst %lu\n", entry, entry->record, entry->tracker->outstanding);
|
||||
fflush(stdout);
|
||||
mutex_.unlock();
|
||||
}
|
||||
|
||||
return entry;
|
||||
}
|
||||
|
||||
@@ -90,6 +100,14 @@ class Tracker {
|
||||
entry_t* entry = reinterpret_cast<entry_t*>(arg);
|
||||
record_t* record = entry->record;
|
||||
|
||||
if (trace_on_) {
|
||||
mutex_.lock();
|
||||
entry->tracker->outstanding--;
|
||||
fprintf(stdout, "Tracker::Handler: entry %p, record %p, outst %lu\n", entry, entry->record, entry->tracker->outstanding);
|
||||
fflush(stdout);
|
||||
mutex_.unlock();
|
||||
}
|
||||
|
||||
hsa_status_t status = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP, &record->complete);
|
||||
if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP)");
|
||||
if (record->complete == 0) EXC_RAISING(status, "hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP), time is zero");
|
||||
@@ -103,8 +121,13 @@ class Tracker {
|
||||
|
||||
hsa_signal_t orig = entry->orig;
|
||||
if (orig.handle) {
|
||||
amd_signal_t* orig_signal_ptr = reinterpret_cast<amd_signal_t*>(orig.handle);
|
||||
amd_signal_t* prof_signal_ptr = reinterpret_cast<amd_signal_t*>(entry->signal.handle);
|
||||
orig_signal_ptr->start_ts = prof_signal_ptr->start_ts;
|
||||
orig_signal_ptr->end_ts = prof_signal_ptr->end_ts;
|
||||
|
||||
const hsa_signal_value_t value = hsa_signal_load_relaxed(orig);
|
||||
hsa_signal_store_relaxed(orig, value - 1);
|
||||
hsa_signal_store_screlease(orig, value - 1);
|
||||
}
|
||||
entry->tracker->Del(entry);
|
||||
|
||||
@@ -117,6 +140,10 @@ class Tracker {
|
||||
sig_list_t sig_list_;
|
||||
// Inter-thread synchronization
|
||||
static mutex_t mutex_;
|
||||
// Outstanding dispatches
|
||||
uint64_t outstanding;
|
||||
// Enable tracing
|
||||
static const bool trace_on_ = false;
|
||||
};
|
||||
|
||||
} // namespace rocprofiler
|
||||
|
||||
+124
-59
@@ -8,12 +8,12 @@
|
||||
#include <dirent.h>
|
||||
#include <hsa.h>
|
||||
#include <pthread.h>
|
||||
#include <sched.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <list>
|
||||
#include <map>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
@@ -40,7 +40,6 @@ struct callbacks_data_t {
|
||||
|
||||
// Context stored entry type
|
||||
struct context_entry_t {
|
||||
int valid;
|
||||
uint32_t index;
|
||||
rocprofiler_group_t group;
|
||||
rocprofiler_feature_t* features;
|
||||
@@ -49,8 +48,10 @@ struct context_entry_t {
|
||||
FILE* file_handle;
|
||||
};
|
||||
|
||||
// Enable tracing
|
||||
static const bool trace_on = false;
|
||||
// Tool is unloaded
|
||||
bool is_unloaded = false;
|
||||
bool is_loaded = false;
|
||||
// Dispatch callbacks and context handlers synchronization
|
||||
pthread_mutex_t mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
|
||||
// Dispatch callback data
|
||||
@@ -58,6 +59,8 @@ callbacks_data_t* callbacks_data = NULL;
|
||||
// Stored contexts array
|
||||
typedef std::map<uint32_t, context_entry_t> context_array_t;
|
||||
context_array_t* context_array = NULL;
|
||||
typedef std::list<context_entry_t*> wait_list_t;
|
||||
wait_list_t* wait_list = NULL;
|
||||
// Contexts collected count
|
||||
uint32_t context_count = 0;
|
||||
uint32_t context_collected = 0;
|
||||
@@ -106,7 +109,6 @@ context_entry_t* alloc_context_entry() {
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (context_array == NULL) context_array = new context_array_t;
|
||||
const uint32_t index = context_count;
|
||||
auto ret = context_array->insert({index, context_entry_t{}});
|
||||
if (ret.second == false) {
|
||||
@@ -247,58 +249,76 @@ void output_group(FILE* file, const rocprofiler_group_t* group, const char* str)
|
||||
bool dump_context(context_entry_t* entry) {
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
|
||||
if (entry->valid) {
|
||||
const rocprofiler_dispatch_record_t* record = entry->data.record;
|
||||
if (record) {
|
||||
if (record->complete == 0) return false;
|
||||
}
|
||||
|
||||
++context_collected;
|
||||
entry->valid = 0;
|
||||
const uint32_t index = entry->index;
|
||||
FILE* file_handle = entry->file_handle;
|
||||
const rocprofiler_feature_t* features = entry->features;
|
||||
const unsigned feature_count = entry->feature_count;
|
||||
|
||||
fprintf(file_handle, "dispatch[%u], queue_index(%lu), kernel_name(\"%s\")",
|
||||
index,
|
||||
entry->data.queue_index,
|
||||
entry->data.kernel_name);
|
||||
if (record) fprintf(file_handle, ", time(%lu,%lu,%lu,%lu)",
|
||||
record->dispatch,
|
||||
record->begin,
|
||||
record->end,
|
||||
record->complete);
|
||||
fprintf(file_handle, "\n");
|
||||
fflush(file_handle);
|
||||
|
||||
if (record) {
|
||||
delete record;
|
||||
entry->data.record = NULL;
|
||||
}
|
||||
|
||||
rocprofiler_group_t& group = entry->group;
|
||||
if (group.context != NULL) {
|
||||
status = rocprofiler_group_get_data(&group);
|
||||
check_status(status);
|
||||
// output_group(file, group, "Group[0] data");
|
||||
|
||||
status = rocprofiler_get_metrics(group.context);
|
||||
check_status(status);
|
||||
std::ostringstream oss;
|
||||
oss << index << "__" << entry->data.kernel_name;
|
||||
output_results(file_handle, features, feature_count, group.context, oss.str().substr(0, KERNEL_NAME_LEN_MAX).c_str());
|
||||
free(const_cast<char*>(entry->data.kernel_name));
|
||||
|
||||
// Finishing cleanup
|
||||
// Deleting profiling context will delete all allocated resources
|
||||
rocprofiler_close(group.context);
|
||||
const rocprofiler_dispatch_record_t* record = entry->data.record;
|
||||
if (record) {
|
||||
if (record->complete == 0) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
++context_collected;
|
||||
const uint32_t index = entry->index;
|
||||
FILE* file_handle = entry->file_handle;
|
||||
const rocprofiler_feature_t* features = entry->features;
|
||||
const unsigned feature_count = entry->feature_count;
|
||||
|
||||
fprintf(file_handle, "dispatch[%u], queue_index(%lu), kernel_name(\"%s\")",
|
||||
index,
|
||||
entry->data.queue_index,
|
||||
entry->data.kernel_name);
|
||||
if (record) fprintf(file_handle, ", time(%lu,%lu,%lu,%lu)",
|
||||
record->dispatch,
|
||||
record->begin,
|
||||
record->end,
|
||||
record->complete);
|
||||
fprintf(file_handle, "\n");
|
||||
fflush(file_handle);
|
||||
|
||||
if (record) {
|
||||
delete record;
|
||||
entry->data.record = NULL;
|
||||
}
|
||||
|
||||
rocprofiler_group_t& group = entry->group;
|
||||
if (group.context != NULL) {
|
||||
status = rocprofiler_group_get_data(&group);
|
||||
check_status(status);
|
||||
// output_group(file, group, "Group[0] data");
|
||||
|
||||
status = rocprofiler_get_metrics(group.context);
|
||||
check_status(status);
|
||||
std::ostringstream oss;
|
||||
oss << index << "__" << entry->data.kernel_name;
|
||||
output_results(file_handle, features, feature_count, group.context, oss.str().substr(0, KERNEL_NAME_LEN_MAX).c_str());
|
||||
free(const_cast<char*>(entry->data.kernel_name));
|
||||
|
||||
// Finishing cleanup
|
||||
// Deleting profiling context will delete all allocated resources
|
||||
rocprofiler_close(group.context);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Profiling completion handler
|
||||
static inline bool dump_context_entry(context_entry_t* entry) {
|
||||
const bool ret = dump_context(entry);
|
||||
if (ret) dealloc_context_entry(entry);
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Dump waiting entries
|
||||
static inline void dump_wait_list() {
|
||||
auto it = wait_list->begin();
|
||||
auto end = wait_list->begin();
|
||||
while (it != end) {
|
||||
auto cur = it++;
|
||||
if (dump_context_entry(*cur)) {
|
||||
wait_list->erase(cur);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Dump all stored contexts profiling output data
|
||||
void dump_context_array() {
|
||||
if (pthread_mutex_lock(&mutex) != 0) {
|
||||
@@ -306,7 +326,16 @@ void dump_context_array() {
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (context_array) for (auto& v : *context_array) dump_context(&v.second);
|
||||
if (!wait_list->empty()) dump_wait_list();
|
||||
|
||||
if (context_array) {
|
||||
auto it = context_array->begin();
|
||||
auto end = context_array->end();
|
||||
while (it != end) {
|
||||
auto cur = it++;
|
||||
dump_context_entry(&(cur->second));
|
||||
}
|
||||
}
|
||||
|
||||
if (pthread_mutex_unlock(&mutex) != 0) {
|
||||
perror("pthread_mutex_unlock");
|
||||
@@ -317,16 +346,21 @@ void dump_context_array() {
|
||||
// Profiling completion handler
|
||||
bool handler(rocprofiler_group_t group, void* arg) {
|
||||
context_entry_t* entry = reinterpret_cast<context_entry_t*>(arg);
|
||||
bool ret = false;
|
||||
|
||||
if (pthread_mutex_lock(&mutex) != 0) {
|
||||
perror("pthread_mutex_lock");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (context_array->find(entry->index) != context_array->end()) {
|
||||
if (dump_context(entry)) dealloc_context_entry(entry);
|
||||
else ret = true;
|
||||
if (!wait_list->empty()) dump_wait_list();
|
||||
|
||||
if (!dump_context_entry(entry)) {
|
||||
wait_list->push_back(entry);
|
||||
}
|
||||
|
||||
if (trace_on) {
|
||||
fprintf(stdout, "tool::handler: context_array %d\n", (int)(context_array->size()));
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
if (pthread_mutex_unlock(&mutex) != 0) {
|
||||
@@ -334,7 +368,7 @@ bool handler(rocprofiler_group_t group, void* arg) {
|
||||
exit(1);
|
||||
}
|
||||
|
||||
return ret;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Kernel disoatch callback
|
||||
@@ -414,7 +448,11 @@ hsa_status_t dispatch_callback(const rocprofiler_callback_data_t* callback_data,
|
||||
entry->data.kernel_name = strdup(callback_data->kernel_name);
|
||||
entry->file_handle = tool_data->file_handle;
|
||||
entry->index = next_context_count();
|
||||
entry->valid = 1;
|
||||
|
||||
if (trace_on) {
|
||||
fprintf(stdout, "tool::dispatch: context_array %d\n", (int)(context_array->size()));
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
@@ -468,6 +506,17 @@ void get_xml_array(xml::Xml* xml, const std::string& tag, const std::string& fie
|
||||
// Tool constructor
|
||||
extern "C" PUBLIC_API void OnLoadTool()
|
||||
{
|
||||
if (pthread_mutex_lock(&mutex) != 0) {
|
||||
perror("pthread_mutex_lock");
|
||||
exit(1);
|
||||
}
|
||||
if (is_loaded) return;
|
||||
is_loaded = true;
|
||||
if (pthread_mutex_unlock(&mutex) != 0) {
|
||||
perror("pthread_mutex_unlock");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
std::map<std::string, hsa_ven_amd_aqlprofile_parameter_name_t> parameters_dict;
|
||||
parameters_dict["COMPUTE_UNIT_TARGET"] =
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET;
|
||||
@@ -602,6 +651,9 @@ extern "C" PUBLIC_API void OnLoadTool()
|
||||
}
|
||||
fflush(stdout);
|
||||
|
||||
// Context array aloocation
|
||||
context_array = new context_array_t;
|
||||
wait_list = new wait_list_t;
|
||||
|
||||
// Adding dispatch observer
|
||||
rocprofiler_queue_callbacks_t callbacks_ptrs{0};
|
||||
@@ -624,7 +676,16 @@ extern "C" PUBLIC_API void OnLoadTool()
|
||||
|
||||
// Tool destructor
|
||||
extern "C" PUBLIC_API void OnUnloadTool() {
|
||||
is_unloaded = true;
|
||||
if (pthread_mutex_lock(&mutex) != 0) {
|
||||
perror("pthread_mutex_lock");
|
||||
exit(1);
|
||||
}
|
||||
if (!is_loaded) return;
|
||||
is_loaded = false;
|
||||
if (pthread_mutex_unlock(&mutex) != 0) {
|
||||
perror("pthread_mutex_unlock");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Unregister dispatch callback
|
||||
rocprofiler_remove_queue_callbacks();
|
||||
@@ -648,8 +709,12 @@ extern "C" PUBLIC_API void OnUnloadTool() {
|
||||
kernel_string_vec = NULL;
|
||||
delete range_vec;
|
||||
range_vec = NULL;
|
||||
delete context_array;
|
||||
context_array = NULL;
|
||||
delete wait_list;
|
||||
wait_list = NULL;
|
||||
}
|
||||
|
||||
extern "C" DESTRUCTOR_API void destructor() {
|
||||
if (is_unloaded == false) OnUnloadTool();
|
||||
if (is_loaded == true) OnUnloadTool();
|
||||
}
|
||||
|
||||
Ссылка в новой задаче
Block a user