From 5b2c27cccd84024d0ff008bf28f534b83f30fcc8 Mon Sep 17 00:00:00 2001 From: "Jonathan R. Madsen" Date: Tue, 24 May 2022 18:35:33 -0500 Subject: [PATCH] Minor updates for transpose, timemory submodule, roctracer, and omnitrace exe (#4) * transpose usage message * timemory submodule update * roctracer updates - Changes to verbosity of roctracer::shutdown - protect_flush_activity prevents deadlock when error in callback * Removed linking to timemory-cxx in omnitrace - omnitrace exe does not link to `timemory-cxx` target --- examples/transpose/transpose.cpp | 12 ++++++ external/timemory | 2 +- source/bin/omnitrace/CMakeLists.txt | 2 +- source/bin/omnitrace/omnitrace.cpp | 2 + .../library/components/roctracer.cpp | 43 +++++++++++++++++-- .../library/components/roctracer.hpp | 5 +++ .../components/roctracer_callbacks.cpp | 22 +++++++--- 7 files changed, 76 insertions(+), 12 deletions(-) diff --git a/examples/transpose/transpose.cpp b/examples/transpose/transpose.cpp index e5a82f9752..6eb161a931 100644 --- a/examples/transpose/transpose.cpp +++ b/examples/transpose/transpose.cpp @@ -187,6 +187,18 @@ main(int argc, char** argv) int nthreads = 2; int nitr = 5000; size_t nsync = 10; + for(int i = 1; i < argc; ++i) + { + auto _arg = std::string{ argv[i] }; + if(_arg == "?" || _arg == "-h" || _arg == "--help") + { + fprintf(stderr, + "usage: transpose [NUM_THREADS (%i)] [NUM_ITERATION (%i)] " + "[SYNC_EVERY_N_ITERATIONS (%zu)]\n", + nthreads, nitr, nsync); + exit(EXIT_SUCCESS); + } + } if(argc > 1) nthreads = atoi(argv[1]); if(argc > 2) nitr = atoi(argv[2]); if(argc > 3) nsync = atoll(argv[3]); diff --git a/external/timemory b/external/timemory index 08313b1bf0..40a150cc84 160000 --- a/external/timemory +++ b/external/timemory @@ -1 +1 @@ -Subproject commit 08313b1bf0766dae6adc4c5015c28f17dd898464 +Subproject commit 40a150cc847c1b2bd2f842b3b56ca0829900ed8e diff --git a/source/bin/omnitrace/CMakeLists.txt b/source/bin/omnitrace/CMakeLists.txt index 0cddab6b58..80e0589483 100644 --- a/source/bin/omnitrace/CMakeLists.txt +++ b/source/bin/omnitrace/CMakeLists.txt @@ -22,9 +22,9 @@ target_link_libraries( omnitrace-exe PRIVATE omnitrace::omnitrace-headers omnitrace::omnitrace-dyninst - omnitrace::omnitrace-timemory omnitrace::omnitrace-compile-options omnitrace::omnitrace-compile-definitions + timemory::timemory-headers $,omnitrace::omnitrace-sanitizer,>) set_target_properties( diff --git a/source/bin/omnitrace/omnitrace.cpp b/source/bin/omnitrace/omnitrace.cpp index 7c6ccfd804..40d115ef60 100644 --- a/source/bin/omnitrace/omnitrace.cpp +++ b/source/bin/omnitrace/omnitrace.cpp @@ -24,6 +24,8 @@ #include "fwd.hpp" #include +#include +#include #include #include diff --git a/source/lib/omnitrace/library/components/roctracer.cpp b/source/lib/omnitrace/library/components/roctracer.cpp index 9bc8098649..a7566a055f 100644 --- a/source/lib/omnitrace/library/components/roctracer.cpp +++ b/source/lib/omnitrace/library/components/roctracer.cpp @@ -24,6 +24,7 @@ #include "library/components/pthread_gotcha.hpp" #include "library/components/roctracer_callbacks.hpp" #include "library/config.hpp" +#include "library/debug.hpp" #include "library/defines.hpp" #include "library/redirect.hpp" #include "library/sampling.hpp" @@ -35,6 +36,16 @@ namespace tim { namespace component { +namespace +{ +auto& +roctracer_activity_count() +{ + static std::atomic _v{ 0 }; + return _v; +} +} // namespace + void roctracer::preinit() { @@ -157,13 +168,15 @@ roctracer::shutdown() OMNITRACE_VERBOSE_F(1, "shutting down roctracer...\n"); - OMNITRACE_DEBUG_F("executing hip_exec_activity_callbacks\n"); + OMNITRACE_VERBOSE_F(2, "executing hip_exec_activity_callbacks(0..%zu)\n", + max_supported_threads); // make sure all async operations are executed for(size_t i = 0; i < max_supported_threads; ++i) hip_exec_activity_callbacks(i); // callback for hsa - OMNITRACE_DEBUG_F("executing roctracer_shutdown_routines...\n"); + OMNITRACE_VERBOSE_F(2, "executing %zu roctracer_shutdown_routines...\n", + roctracer_shutdown_routines().size()); for(auto& itr : roctracer_shutdown_routines()) itr.second(); @@ -178,12 +191,36 @@ roctracer::shutdown() #endif // ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_ROCTX)); + OMNITRACE_VERBOSE_F( + 2, "executing roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HIP_API)...\n"); ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HIP_API)); + + OMNITRACE_VERBOSE_F( + 2, "executing roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HIP_OPS)...\n"); ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HIP_OPS)); - ROCTRACER_CALL(roctracer_flush_activity()); + + if(roctracer_activity_count() == 0) + { + OMNITRACE_VERBOSE_F(2, "executing roctracer_flush_activity()...\n"); + ROCTRACER_CALL(roctracer_flush_activity()); + } + else + { + OMNITRACE_CI_FAIL(true, + "roctracer_activity_count() != 0 (== %li). " + "roctracer::shutdown() most likely called during abort", + roctracer_activity_count().load()); + } OMNITRACE_VERBOSE_F(1, "roctracer is shutdown\n"); } + +scope::transient_destructor +roctracer::protect_flush_activity() +{ + return scope::transient_destructor([]() { --roctracer_activity_count(); }, + []() { ++roctracer_activity_count(); }); +} } // namespace component } // namespace tim diff --git a/source/lib/omnitrace/library/components/roctracer.hpp b/source/lib/omnitrace/library/components/roctracer.hpp index 5a7f783da1..3d50dec3df 100644 --- a/source/lib/omnitrace/library/components/roctracer.hpp +++ b/source/lib/omnitrace/library/components/roctracer.hpp @@ -33,6 +33,7 @@ #include #include #include +#include namespace tim { @@ -64,6 +65,10 @@ struct roctracer void start(); void stop(); + + // this function protects roctracer_flush_activty from being called + // when omnitrace exits during a callback + [[nodiscard]] static scope::transient_destructor protect_flush_activity(); }; #if !defined(OMNITRACE_USE_ROCTRACER) diff --git a/source/lib/omnitrace/library/components/roctracer_callbacks.cpp b/source/lib/omnitrace/library/components/roctracer_callbacks.cpp index c06e19ba5f..c27ac9bbab 100644 --- a/source/lib/omnitrace/library/components/roctracer_callbacks.cpp +++ b/source/lib/omnitrace/library/components/roctracer_callbacks.cpp @@ -285,16 +285,20 @@ hsa_activity_callback(uint32_t op, activity_record_t* record, void* arg) OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); - sampling::block_signals(); + static thread_local std::once_flag _once{}; + std::call_once(_once, []() { + sampling::block_signals(); + threading::set_thread_name("omni.roctracer"); + }); + + auto&& _protect = comp::roctracer::protect_flush_activity(); + (void) _protect; static const char* copy_op_name = "hsa_async_copy"; static const char* dispatch_op_name = "hsa_dispatch"; static const char* barrier_op_name = "hsa_barrier"; const char** _name = nullptr; - static thread_local auto _once = (threading::set_thread_name("omni.roctracer"), true); - (void) _once; - switch(op) { case HSA_OP_ID_DISPATCH: _name = &dispatch_op_name; break; @@ -629,10 +633,14 @@ hip_activity_callback(const char* begin, const char* end, void*) OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); - sampling::block_signals(); + static thread_local std::once_flag _once{}; + std::call_once(_once, []() { + sampling::block_signals(); + threading::set_thread_name("omni.roctracer"); + }); - static thread_local auto _once = (threading::set_thread_name("omni.roctracer"), true); - (void) _once; + auto&& _protect = comp::roctracer::protect_flush_activity(); + (void) _protect; using Device = critical_trace::Device; using Phase = critical_trace::Phase;