diff --git a/projects/roctracer/README.md b/projects/roctracer/README.md index 9cf511b4cd..1c911fb9d9 100644 --- a/projects/roctracer/README.md +++ b/projects/roctracer/README.md @@ -39,6 +39,7 @@ asyncronous activity records pool support. export CMAKE_PREFIX_PATH=/opt/rocm - Build ROCtracer + export CMAKE_BUILD_TYPE= # release by default cd /roctracer && mkdir build && cd build && cmake -DCMAKE_INSTALL_PREFIX=/opt/rocm .. && make -j - To build and run test diff --git a/projects/roctracer/inc/roctracer_hsa.h b/projects/roctracer/inc/roctracer_hsa.h index 924d6a6238..f7944d2a8f 100644 --- a/projects/roctracer/inc/roctracer_hsa.h +++ b/projects/roctracer/inc/roctracer_hsa.h @@ -132,23 +132,23 @@ struct output_streamer { template<> struct output_streamer { - inline static std::ostream& put(std::ostream& out, bool* v) { out << std::hex << ""; return out; } + inline static std::ostream& put(std::ostream& out, bool* v) { out << std::hex << ""; return out; } }; template<> struct output_streamer { - inline static std::ostream& put(std::ostream& out, uint8_t* v) { out << std::hex << ""; return out; } + inline static std::ostream& put(std::ostream& out, uint8_t* v) { out << std::hex << ""; return out; } }; template<> struct output_streamer { - inline static std::ostream& put(std::ostream& out, uint16_t* v) { out << std::hex << ""; return out; } + inline static std::ostream& put(std::ostream& out, uint16_t* v) { out << std::hex << ""; return out; } }; template<> struct output_streamer { - inline static std::ostream& put(std::ostream& out, uint32_t* v) { out << std::hex << ""; return out; } + inline static std::ostream& put(std::ostream& out, uint32_t* v) { out << std::hex << ""; return out; } }; template<> struct output_streamer { - inline static std::ostream& put(std::ostream& out, uint64_t* v) { out << std::hex << ""; return out; } + inline static std::ostream& put(std::ostream& out, uint64_t* v) { out << std::hex << ""; return out; } }; template<> diff --git a/projects/roctracer/src/core/roctracer.cpp b/projects/roctracer/src/core/roctracer.cpp index 26e5ddbcc5..d9629b9282 100644 --- a/projects/roctracer/src/core/roctracer.cpp +++ b/projects/roctracer/src/core/roctracer.cpp @@ -88,6 +88,8 @@ THE SOFTWARE. (void)err; \ return X; +#define onload_debug false + /////////////////////////////////////////////////////////////////////////////////////////////////// // Mark callback // @@ -470,7 +472,8 @@ void hsa_kernel_handler(::proxy::Tracker::entry_t* entry) { } fprintf(kernel_file_handle, "dispatch[%lu], gpu-id(%u), tid(%u), kernel-name(\"%s\"), time(%lu,%lu,%lu,%lu)\n", index, - ::util::HsaRsrcFactory::Instance().GetAgentInfo(entry->agent)->dev_index, + //::util::HsaRsrcFactory::Instance().GetAgentInfo(entry->agent)->dev_index, + entry->dev_index, entry->kernel.tid, entry->kernel.name, entry->dispatch, @@ -985,51 +988,51 @@ PUBLIC_API roctracer_status_t roctracer_set_properties( // HSA-runtime tool on-load method PUBLIC_API bool roctracer_load(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count, const char* const* failed_tool_names) { -// printf("LIB roctracer_load\n"); fflush(stdout); + if (onload_debug) printf("LIB roctracer_load\n"); fflush(stdout); static bool is_loaded = false; if (is_loaded) return true; is_loaded = true; -// printf("LIB roctracer_load end\n"); fflush(stdout); + if (onload_debug) printf("LIB roctracer_load end\n"); fflush(stdout); return true; } PUBLIC_API void roctracer_unload(bool destruct) { -// printf("LIB roctracer_unload\n"); fflush(stdout); + if (onload_debug) printf("LIB roctracer_unload (%d)\n", (int)destruct); fflush(stdout); static bool is_unloaded = false; if (is_unloaded) return; is_unloaded = true; - if (destruct == false) roctracer::trace_buffer.Flush(); + //if (destruct == false) roctracer::trace_buffer.Flush(); if ((roctracer::hsa_support::output_prefix != NULL) && (roctracer::kernel_file_handle != NULL)) fclose(roctracer::kernel_file_handle); -// printf("LIB roctracer_unload end\n"); fflush(stdout); + if (onload_debug) printf("LIB roctracer_unload (%d) end\n", (int)destruct); fflush(stdout); } PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count, const char* const* failed_tool_names) { -// printf("LIB OnLoad\n"); fflush(stdout); + if (onload_debug) printf("LIB OnLoad\n"); fflush(stdout); const bool ret = roctracer_load(table, runtime_version, failed_tool_count, failed_tool_names); -// printf("LIB OnLoad end\n"); fflush(stdout); + if (onload_debug) printf("LIB OnLoad end\n"); fflush(stdout); return ret; } PUBLIC_API void OnUnload() { -// printf("LIB OnUnload\n"); fflush(stdout); + if (onload_debug) printf("LIB OnUnload\n"); fflush(stdout); roctracer_unload(false); -// printf("LIB OnUnload end\n"); fflush(stdout); + if (onload_debug) printf("LIB OnUnload end\n"); fflush(stdout); } CONSTRUCTOR_API void constructor() { -// printf("LIB constructor\n"); fflush(stdout); + if (onload_debug) printf("LIB constructor\n"); fflush(stdout); roctracer::util::Logger::Create(); -// printf("LIB constructor end\n"); fflush(stdout); + if (onload_debug) printf("LIB constructor end\n"); fflush(stdout); } DESTRUCTOR_API void destructor() { -// printf("LIB destructor\n"); fflush(stdout); + if (onload_debug) printf("LIB destructor\n"); fflush(stdout); roctracer_unload(true); util::HsaRsrcFactory::Destroy(); roctracer::util::Logger::Destroy(); -// printf("LIB destructor end\n"); fflush(stdout); + if (onload_debug) printf("LIB destructor end\n"); fflush(stdout); } } // extern "C" diff --git a/projects/roctracer/src/core/trace_buffer.h b/projects/roctracer/src/core/trace_buffer.h index 2f987218a8..fc4c32e5ed 100644 --- a/projects/roctracer/src/core/trace_buffer.h +++ b/projects/roctracer/src/core/trace_buffer.h @@ -5,6 +5,7 @@ #include #include #include +#include #define PTHREAD_CALL(call) \ do { \ @@ -37,6 +38,7 @@ struct trace_entry_t { uint64_t end; // kernel end timestamp, ns uint64_t complete; hsa_agent_t agent; + uint32_t dev_index; hsa_signal_t orig; hsa_signal_t signal; union { @@ -56,6 +58,7 @@ class TraceBuffer { typedef void (*callback_t)(Entry*); typedef TraceBuffer Obj; typedef uint64_t pointer_t; + typedef std::mutex mutex_t; struct flush_prm_t { uint32_t type; @@ -87,17 +90,19 @@ class TraceBuffer { PTHREAD_CALL(pthread_join(work_thread_, &res)); if (res != PTHREAD_CANCELED) abort_run("~TraceBuffer: consumer thread wasn't stopped correctly"); - Flush(); + flush_buf(); } Entry* GetEntry() { const pointer_t pointer = read_pointer_.fetch_add(1); if (pointer >= end_pointer_) wrap_buffer(pointer); - return data_ + pointer; + if (pointer >= end_pointer_) abort_run("pointer >= end_pointer_ after buffer wrap"); + return data_ + (pointer + size_ - end_pointer_); } void Flush() { + std::lock_guard lck(mutex_); flush_buf(); } @@ -148,6 +153,7 @@ class TraceBuffer { } void wrap_buffer(const pointer_t pointer) { + std::lock_guard lck(mutex_); PTHREAD_CALL(pthread_mutex_lock(&work_mutex_)); if (pointer >= end_pointer_) { data_ = next_; @@ -170,8 +176,8 @@ class TraceBuffer { uint32_t size_; Entry* data_; Entry* next_; - std::atomic read_pointer_; - pointer_t end_pointer_; + volatile std::atomic read_pointer_; + volatile std::atomic end_pointer_; std::list buf_list_; flush_prm_t* flush_prm_arr_; @@ -181,6 +187,8 @@ class TraceBuffer { pthread_t work_thread_; pthread_mutex_t work_mutex_; pthread_cond_t work_cond_; + + mutex_t mutex_; }; } // namespace roctracer diff --git a/projects/roctracer/src/proxy/tracker.h b/projects/roctracer/src/proxy/tracker.h index 267f811a2f..edb223b064 100644 --- a/projects/roctracer/src/proxy/tracker.h +++ b/projects/roctracer/src/proxy/tracker.h @@ -49,6 +49,7 @@ class Tracker { // Creating a new tracker entry entry->type = type; entry->agent = agent; + entry->dev_index = 0; //hsa_rsrc->GetAgentInfo(agent)->dev_index; entry->orig = signal; entry->dispatch = hsa_rsrc->TimestampNs(); entry->valid.store(roctracer::TRACE_ENTRY_INIT, std::memory_order_release); @@ -83,6 +84,7 @@ class Tracker { if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_amd_profiling_get_dispatch_time"); entry->begin = hsa_rsrc->SysclockToNs(dispatch_time.start); entry->end = hsa_rsrc->SysclockToNs(dispatch_time.end); + entry->dev_index = (hsa_rsrc->GetAgentInfo(entry->agent))->dev_index; } entry->complete = hsa_rsrc->TimestampNs(); diff --git a/projects/roctracer/test/tool/tracer_tool.cpp b/projects/roctracer/test/tool/tracer_tool.cpp index 14e3dfc4e8..8f1fc11ab0 100644 --- a/projects/roctracer/test/tool/tracer_tool.cpp +++ b/projects/roctracer/test/tool/tracer_tool.cpp @@ -50,6 +50,8 @@ THE SOFTWARE. } \ } while (0) +#define onload_debug false + typedef hsa_rt_utils::Timer::timestamp_t timestamp_t; hsa_rt_utils::Timer* timer = NULL; thread_local timestamp_t hsa_begin_timestamp = 0; @@ -367,7 +369,7 @@ FILE* open_output_file(const char* prefix, const char* name) { // HSA-runtime tool on-load method extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count, const char* const* failed_tool_names) { -// printf("TOOL OnLoad\n"); fflush(stdout); + if (onload_debug) printf("TOOL OnLoad\n"); fflush(stdout); timer = new hsa_rt_utils::Timer(table->core_->hsa_system_get_info_fn); // API traces switches @@ -498,13 +500,13 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, roctracer_set_properties(ACTIVITY_DOMAIN_HIP_API, (void*)mark_api_callback); } -// printf("TOOL OnLoad end\n"); fflush(stdout); + if (onload_debug) printf("TOOL OnLoad end\n"); fflush(stdout); return roctracer_load(table, runtime_version, failed_tool_count, failed_tool_names); } // tool unload method void tool_unload(bool destruct) { -// printf("TOOL tool_unload\n"); fflush(stdout); + if (onload_debug) printf("TOOL tool_unload\n"); fflush(stdout); static bool is_unloaded = false; if (is_unloaded) { return; @@ -515,7 +517,7 @@ void tool_unload(bool destruct) { if (trace_hsa_api) { ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HSA_API)); - if (destruct == false) hsa_api_trace_buffer.Flush(); + // if (destruct == false) hsa_api_trace_buffer.Flush(); fclose(hsa_api_file_handle); } @@ -531,26 +533,26 @@ void tool_unload(bool destruct) { ROCTRACER_CALL(roctracer_flush_activity()); ROCTRACER_CALL(roctracer_close_pool()); - if (destruct == false) hip_api_trace_buffer.Flush(); + // if (destruct == false) hip_api_trace_buffer.Flush(); if (hip_api_file_handle != stdout) fclose(hip_api_file_handle); if (hcc_activity_file_handle != stdout) fclose(hcc_activity_file_handle); } -// printf("TOOL tool_unload end\n"); fflush(stdout); + if (onload_debug) printf("TOOL tool_unload end\n"); fflush(stdout); } // HSA-runtime on-unload method extern "C" PUBLIC_API void OnUnload() { -// printf("TOOL OnUnload\n"); fflush(stdout); + if (onload_debug) printf("TOOL OnUnload\n"); fflush(stdout); tool_unload(false); -// printf("TOOL OnUnload end\n"); fflush(stdout); + if (onload_debug) printf("TOOL OnUnload end\n"); fflush(stdout); } extern "C" CONSTRUCTOR_API void constructor() { -// printf("TOOL constructor ...end\n"); fflush(stdout); + if (onload_debug) printf("TOOL constructor ...end\n"); fflush(stdout); } extern "C" DESTRUCTOR_API void destructor() { -// printf("TOOL destructor\n"); fflush(stdout); + if (onload_debug) printf("TOOL destructor\n"); fflush(stdout); tool_unload(true); -// printf("TOOL destructor end\n"); fflush(stdout); + if (onload_debug) printf("TOOL destructor end\n"); fflush(stdout); }