trace buffer fix
Этот коммит содержится в:
@@ -39,6 +39,7 @@ asyncronous activity records pool support.
|
||||
export CMAKE_PREFIX_PATH=/opt/rocm
|
||||
|
||||
- Build ROCtracer
|
||||
export CMAKE_BUILD_TYPE=<debug|release> # release by default
|
||||
cd <your path>/roctracer && mkdir build && cd build && cmake -DCMAKE_INSTALL_PREFIX=/opt/rocm .. && make -j <nproc>
|
||||
|
||||
- To build and run test
|
||||
|
||||
@@ -132,23 +132,23 @@ struct output_streamer<uint64_t> {
|
||||
|
||||
template<>
|
||||
struct output_streamer<bool*> {
|
||||
inline static std::ostream& put(std::ostream& out, bool* v) { out << std::hex << "<bool " << "0x" << *v << std::dec << ">"; return out; }
|
||||
inline static std::ostream& put(std::ostream& out, bool* v) { out << std::hex << "<bool " << "0x" << v << std::dec << ">"; return out; }
|
||||
};
|
||||
template<>
|
||||
struct output_streamer<uint8_t*> {
|
||||
inline static std::ostream& put(std::ostream& out, uint8_t* v) { out << std::hex << "<uint8_t " << "0x" << *v << std::dec << ">"; return out; }
|
||||
inline static std::ostream& put(std::ostream& out, uint8_t* v) { out << std::hex << "<uint8_t " << "0x" << v << std::dec << ">"; return out; }
|
||||
};
|
||||
template<>
|
||||
struct output_streamer<uint16_t*> {
|
||||
inline static std::ostream& put(std::ostream& out, uint16_t* v) { out << std::hex << "<uint16_t " << "0x" << *v << std::dec << ">"; return out; }
|
||||
inline static std::ostream& put(std::ostream& out, uint16_t* v) { out << std::hex << "<uint16_t " << "0x" << v << std::dec << ">"; return out; }
|
||||
};
|
||||
template<>
|
||||
struct output_streamer<uint32_t*> {
|
||||
inline static std::ostream& put(std::ostream& out, uint32_t* v) { out << std::hex << "<uint32_t " << "0x" << *v << std::dec << ">"; return out; }
|
||||
inline static std::ostream& put(std::ostream& out, uint32_t* v) { out << std::hex << "<uint32_t " << "0x" << v << std::dec << ">"; return out; }
|
||||
};
|
||||
template<>
|
||||
struct output_streamer<uint64_t*> {
|
||||
inline static std::ostream& put(std::ostream& out, uint64_t* v) { out << std::hex << "<uint64_t " << "0x" << *v << std::dec << ">"; return out; }
|
||||
inline static std::ostream& put(std::ostream& out, uint64_t* v) { out << std::hex << "<uint64_t " << "0x" << v << std::dec << ">"; return out; }
|
||||
};
|
||||
|
||||
template<>
|
||||
|
||||
@@ -88,6 +88,8 @@ THE SOFTWARE.
|
||||
(void)err; \
|
||||
return X;
|
||||
|
||||
#define onload_debug false
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Mark callback
|
||||
//
|
||||
@@ -470,7 +472,8 @@ void hsa_kernel_handler(::proxy::Tracker::entry_t* entry) {
|
||||
}
|
||||
fprintf(kernel_file_handle, "dispatch[%lu], gpu-id(%u), tid(%u), kernel-name(\"%s\"), time(%lu,%lu,%lu,%lu)\n",
|
||||
index,
|
||||
::util::HsaRsrcFactory::Instance().GetAgentInfo(entry->agent)->dev_index,
|
||||
//::util::HsaRsrcFactory::Instance().GetAgentInfo(entry->agent)->dev_index,
|
||||
entry->dev_index,
|
||||
entry->kernel.tid,
|
||||
entry->kernel.name,
|
||||
entry->dispatch,
|
||||
@@ -985,51 +988,51 @@ PUBLIC_API roctracer_status_t roctracer_set_properties(
|
||||
// HSA-runtime tool on-load method
|
||||
PUBLIC_API bool roctracer_load(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count,
|
||||
const char* const* failed_tool_names) {
|
||||
// printf("LIB roctracer_load\n"); fflush(stdout);
|
||||
if (onload_debug) printf("LIB roctracer_load\n"); fflush(stdout);
|
||||
static bool is_loaded = false;
|
||||
if (is_loaded) return true;
|
||||
is_loaded = true;
|
||||
|
||||
// printf("LIB roctracer_load end\n"); fflush(stdout);
|
||||
if (onload_debug) printf("LIB roctracer_load end\n"); fflush(stdout);
|
||||
return true;
|
||||
}
|
||||
|
||||
PUBLIC_API void roctracer_unload(bool destruct) {
|
||||
// printf("LIB roctracer_unload\n"); fflush(stdout);
|
||||
if (onload_debug) printf("LIB roctracer_unload (%d)\n", (int)destruct); fflush(stdout);
|
||||
static bool is_unloaded = false;
|
||||
if (is_unloaded) return;
|
||||
is_unloaded = true;
|
||||
|
||||
if (destruct == false) roctracer::trace_buffer.Flush();
|
||||
//if (destruct == false) roctracer::trace_buffer.Flush();
|
||||
if ((roctracer::hsa_support::output_prefix != NULL) && (roctracer::kernel_file_handle != NULL)) fclose(roctracer::kernel_file_handle);
|
||||
// printf("LIB roctracer_unload end\n"); fflush(stdout);
|
||||
if (onload_debug) printf("LIB roctracer_unload (%d) end\n", (int)destruct); fflush(stdout);
|
||||
}
|
||||
|
||||
PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count,
|
||||
const char* const* failed_tool_names) {
|
||||
// printf("LIB OnLoad\n"); fflush(stdout);
|
||||
if (onload_debug) printf("LIB OnLoad\n"); fflush(stdout);
|
||||
const bool ret = roctracer_load(table, runtime_version, failed_tool_count, failed_tool_names);
|
||||
// printf("LIB OnLoad end\n"); fflush(stdout);
|
||||
if (onload_debug) printf("LIB OnLoad end\n"); fflush(stdout);
|
||||
return ret;
|
||||
}
|
||||
PUBLIC_API void OnUnload() {
|
||||
// printf("LIB OnUnload\n"); fflush(stdout);
|
||||
if (onload_debug) printf("LIB OnUnload\n"); fflush(stdout);
|
||||
roctracer_unload(false);
|
||||
// printf("LIB OnUnload end\n"); fflush(stdout);
|
||||
if (onload_debug) printf("LIB OnUnload end\n"); fflush(stdout);
|
||||
}
|
||||
|
||||
CONSTRUCTOR_API void constructor() {
|
||||
// printf("LIB constructor\n"); fflush(stdout);
|
||||
if (onload_debug) printf("LIB constructor\n"); fflush(stdout);
|
||||
roctracer::util::Logger::Create();
|
||||
// printf("LIB constructor end\n"); fflush(stdout);
|
||||
if (onload_debug) printf("LIB constructor end\n"); fflush(stdout);
|
||||
}
|
||||
|
||||
DESTRUCTOR_API void destructor() {
|
||||
// printf("LIB destructor\n"); fflush(stdout);
|
||||
if (onload_debug) printf("LIB destructor\n"); fflush(stdout);
|
||||
roctracer_unload(true);
|
||||
util::HsaRsrcFactory::Destroy();
|
||||
roctracer::util::Logger::Destroy();
|
||||
// printf("LIB destructor end\n"); fflush(stdout);
|
||||
if (onload_debug) printf("LIB destructor end\n"); fflush(stdout);
|
||||
}
|
||||
|
||||
} // extern "C"
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
#include <mutex>
|
||||
#include <pthread.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#define PTHREAD_CALL(call) \
|
||||
do { \
|
||||
@@ -37,6 +38,7 @@ struct trace_entry_t {
|
||||
uint64_t end; // kernel end timestamp, ns
|
||||
uint64_t complete;
|
||||
hsa_agent_t agent;
|
||||
uint32_t dev_index;
|
||||
hsa_signal_t orig;
|
||||
hsa_signal_t signal;
|
||||
union {
|
||||
@@ -56,6 +58,7 @@ class TraceBuffer {
|
||||
typedef void (*callback_t)(Entry*);
|
||||
typedef TraceBuffer<Entry> Obj;
|
||||
typedef uint64_t pointer_t;
|
||||
typedef std::mutex mutex_t;
|
||||
|
||||
struct flush_prm_t {
|
||||
uint32_t type;
|
||||
@@ -87,17 +90,19 @@ class TraceBuffer {
|
||||
PTHREAD_CALL(pthread_join(work_thread_, &res));
|
||||
if (res != PTHREAD_CANCELED) abort_run("~TraceBuffer: consumer thread wasn't stopped correctly");
|
||||
|
||||
Flush();
|
||||
flush_buf();
|
||||
}
|
||||
|
||||
|
||||
Entry* GetEntry() {
|
||||
const pointer_t pointer = read_pointer_.fetch_add(1);
|
||||
if (pointer >= end_pointer_) wrap_buffer(pointer);
|
||||
return data_ + pointer;
|
||||
if (pointer >= end_pointer_) abort_run("pointer >= end_pointer_ after buffer wrap");
|
||||
return data_ + (pointer + size_ - end_pointer_);
|
||||
}
|
||||
|
||||
void Flush() {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
flush_buf();
|
||||
}
|
||||
|
||||
@@ -148,6 +153,7 @@ class TraceBuffer {
|
||||
}
|
||||
|
||||
void wrap_buffer(const pointer_t pointer) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
PTHREAD_CALL(pthread_mutex_lock(&work_mutex_));
|
||||
if (pointer >= end_pointer_) {
|
||||
data_ = next_;
|
||||
@@ -170,8 +176,8 @@ class TraceBuffer {
|
||||
uint32_t size_;
|
||||
Entry* data_;
|
||||
Entry* next_;
|
||||
std::atomic<pointer_t> read_pointer_;
|
||||
pointer_t end_pointer_;
|
||||
volatile std::atomic<pointer_t> read_pointer_;
|
||||
volatile std::atomic<pointer_t> end_pointer_;
|
||||
std::list<Entry*> buf_list_;
|
||||
|
||||
flush_prm_t* flush_prm_arr_;
|
||||
@@ -181,6 +187,8 @@ class TraceBuffer {
|
||||
pthread_t work_thread_;
|
||||
pthread_mutex_t work_mutex_;
|
||||
pthread_cond_t work_cond_;
|
||||
|
||||
mutex_t mutex_;
|
||||
};
|
||||
} // namespace roctracer
|
||||
|
||||
|
||||
@@ -49,6 +49,7 @@ class Tracker {
|
||||
// Creating a new tracker entry
|
||||
entry->type = type;
|
||||
entry->agent = agent;
|
||||
entry->dev_index = 0; //hsa_rsrc->GetAgentInfo(agent)->dev_index;
|
||||
entry->orig = signal;
|
||||
entry->dispatch = hsa_rsrc->TimestampNs();
|
||||
entry->valid.store(roctracer::TRACE_ENTRY_INIT, std::memory_order_release);
|
||||
@@ -83,6 +84,7 @@ class Tracker {
|
||||
if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_amd_profiling_get_dispatch_time");
|
||||
entry->begin = hsa_rsrc->SysclockToNs(dispatch_time.start);
|
||||
entry->end = hsa_rsrc->SysclockToNs(dispatch_time.end);
|
||||
entry->dev_index = (hsa_rsrc->GetAgentInfo(entry->agent))->dev_index;
|
||||
}
|
||||
|
||||
entry->complete = hsa_rsrc->TimestampNs();
|
||||
|
||||
@@ -50,6 +50,8 @@ THE SOFTWARE.
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define onload_debug false
|
||||
|
||||
typedef hsa_rt_utils::Timer::timestamp_t timestamp_t;
|
||||
hsa_rt_utils::Timer* timer = NULL;
|
||||
thread_local timestamp_t hsa_begin_timestamp = 0;
|
||||
@@ -367,7 +369,7 @@ FILE* open_output_file(const char* prefix, const char* name) {
|
||||
// HSA-runtime tool on-load method
|
||||
extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count,
|
||||
const char* const* failed_tool_names) {
|
||||
// printf("TOOL OnLoad\n"); fflush(stdout);
|
||||
if (onload_debug) printf("TOOL OnLoad\n"); fflush(stdout);
|
||||
timer = new hsa_rt_utils::Timer(table->core_->hsa_system_get_info_fn);
|
||||
|
||||
// API traces switches
|
||||
@@ -498,13 +500,13 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
roctracer_set_properties(ACTIVITY_DOMAIN_HIP_API, (void*)mark_api_callback);
|
||||
}
|
||||
|
||||
// printf("TOOL OnLoad end\n"); fflush(stdout);
|
||||
if (onload_debug) printf("TOOL OnLoad end\n"); fflush(stdout);
|
||||
return roctracer_load(table, runtime_version, failed_tool_count, failed_tool_names);
|
||||
}
|
||||
|
||||
// tool unload method
|
||||
void tool_unload(bool destruct) {
|
||||
// printf("TOOL tool_unload\n"); fflush(stdout);
|
||||
if (onload_debug) printf("TOOL tool_unload\n"); fflush(stdout);
|
||||
static bool is_unloaded = false;
|
||||
if (is_unloaded) {
|
||||
return;
|
||||
@@ -515,7 +517,7 @@ void tool_unload(bool destruct) {
|
||||
if (trace_hsa_api) {
|
||||
ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HSA_API));
|
||||
|
||||
if (destruct == false) hsa_api_trace_buffer.Flush();
|
||||
// if (destruct == false) hsa_api_trace_buffer.Flush();
|
||||
|
||||
fclose(hsa_api_file_handle);
|
||||
}
|
||||
@@ -531,26 +533,26 @@ void tool_unload(bool destruct) {
|
||||
ROCTRACER_CALL(roctracer_flush_activity());
|
||||
ROCTRACER_CALL(roctracer_close_pool());
|
||||
|
||||
if (destruct == false) hip_api_trace_buffer.Flush();
|
||||
// if (destruct == false) hip_api_trace_buffer.Flush();
|
||||
|
||||
if (hip_api_file_handle != stdout) fclose(hip_api_file_handle);
|
||||
if (hcc_activity_file_handle != stdout) fclose(hcc_activity_file_handle);
|
||||
}
|
||||
// printf("TOOL tool_unload end\n"); fflush(stdout);
|
||||
if (onload_debug) printf("TOOL tool_unload end\n"); fflush(stdout);
|
||||
}
|
||||
|
||||
// HSA-runtime on-unload method
|
||||
extern "C" PUBLIC_API void OnUnload() {
|
||||
// printf("TOOL OnUnload\n"); fflush(stdout);
|
||||
if (onload_debug) printf("TOOL OnUnload\n"); fflush(stdout);
|
||||
tool_unload(false);
|
||||
// printf("TOOL OnUnload end\n"); fflush(stdout);
|
||||
if (onload_debug) printf("TOOL OnUnload end\n"); fflush(stdout);
|
||||
}
|
||||
|
||||
extern "C" CONSTRUCTOR_API void constructor() {
|
||||
// printf("TOOL constructor ...end\n"); fflush(stdout);
|
||||
if (onload_debug) printf("TOOL constructor ...end\n"); fflush(stdout);
|
||||
}
|
||||
extern "C" DESTRUCTOR_API void destructor() {
|
||||
// printf("TOOL destructor\n"); fflush(stdout);
|
||||
if (onload_debug) printf("TOOL destructor\n"); fflush(stdout);
|
||||
tool_unload(true);
|
||||
// printf("TOOL destructor end\n"); fflush(stdout);
|
||||
if (onload_debug) printf("TOOL destructor end\n"); fflush(stdout);
|
||||
}
|
||||
|
||||
Ссылка в новой задаче
Block a user