From c869df7272332e83dcef7cc86b04a2dbd21b0fd1 Mon Sep 17 00:00:00 2001 From: Giovanni LB Date: Tue, 15 Aug 2023 14:50:29 -0300 Subject: [PATCH] DCGPUBU-44: Added arbitrary envvars to file/dir names. Squashed some fixes. - Added arbitrary env-vars - Fixed some UBs with atomic variables - Fixed loading of file plugin - ATT vs. kernel-trace off by one Change-Id: I69c75f66f722e4085b5279f41afd05813628846d [ROCm/rocprofiler commit: 157eacd2bbd9a46cffd8289002dc66dd9a57dc34] --- projects/rocprofiler/README.md | 9 +++- projects/rocprofiler/bin/rocprofv2 | 39 ++++------------ projects/rocprofiler/plugin/cli/cli.cpp | 24 ---------- projects/rocprofiler/plugin/file/file.cpp | 26 +---------- .../rocprofiler/plugin/perfetto/perfetto.cpp | 28 +---------- .../src/core/memory/generic_buffer.cpp | 6 +-- .../rocprofiler/src/core/session/session.cpp | 34 +++++++------- .../rocprofiler/src/core/session/spm/spm.cpp | 8 ++-- .../src/core/session/tracer/tracer.cpp | 8 ++-- projects/rocprofiler/src/tools/tool.cpp | 46 ++++++++++++++++++- .../featuretests/profiler/profiler_gtest.cpp | 6 +-- 11 files changed, 94 insertions(+), 140 deletions(-) diff --git a/projects/rocprofiler/README.md b/projects/rocprofiler/README.md index a00a55f8d4..a9fa1d698e 100644 --- a/projects/rocprofiler/README.md +++ b/projects/rocprofiler/README.md @@ -243,6 +243,13 @@ The user has two options for building: ./rocprofv2 --plugin plugin_name -i samples/input.txt -d output_dir # -d is optional, but can be used to define the directory output for output results ``` + Both the output directory and filenames allow for simple environment variable substitution via a special syntax %q{var} -> $var, e.g.: + ```bash + export var="FOO" + rocprofv2 --plugin perfetto -o file_%q{var}_name + # Generates file names: file_FOO_name[...].pftrace + ``` + - #### (ATT) Advanced Thread Trace Tool used to collect fine-grained hardware metrics. Provides ISA-level instruction hotspot analysis via hardware tracing. @@ -313,7 +320,7 @@ The user has two options for building: - att: TARGET_CU=1 //or some other CU [0,15] - WGP for Navi [0,8] - SE_MASK=0x1 // bitmask of shader engines. The fewer, the easier on the hardware. Default enables 1 out of 4 shader engines. - SIMD_MASK=0xF // GFX9: bitmask of SIMDs. Navi: SIMD Index [0-3]. - - DISPATCH=ID,RN // collect trace only for the given dispatch_ID and MPI rank RN. RN is optional and ignored for single processes. Multiple lines with varying combinations of RN and ID can be added. + - DISPATCH=ID,RN // collect trace only for the given dispatch_ID (from --kernel-trace) and MPI rank RN. RN is optional and ignored for single processes. Multiple lines with varying combinations of RN and ID can be added. - KERNEL=kernname // Profile only kernels containing the string kernname (c++ mangled name). Multiple lines can be added. - PERFCOUNTERS_COL_PERIOD=0x3 // Multiplier period for counter collection [0~31]. 0=fastest (usually once every 16 cycles). GFX9 only. Counters will be shown in a graph over time in the browser UI. - PERFCOUNTER=counter_name // Add a SQ counter to be collected with ATT; period defined by PERFCOUNTERS_COL_PERIOD. GFX9 only. diff --git a/projects/rocprofiler/bin/rocprofv2 b/projects/rocprofiler/bin/rocprofv2 index 24704e2dc2..41772bbef1 100755 --- a/projects/rocprofiler/bin/rocprofv2 +++ b/projects/rocprofiler/bin/rocprofv2 @@ -58,6 +58,8 @@ if [ -z "$1" ]; then exit 1 fi +OUTPUT_PATH_INTERNAL="." + while [ 1 ]; do if [[ "$1" = "-h" || "$1" = "--help" ]]; then usage @@ -98,19 +100,6 @@ while [ 1 ]; do elif [[ "$1" = "-d" || "$1" = "--output-directory" ]]; then if [ $2 ]; then OUTPUT_PATH_INTERNAL=$2 - MPI_RANK_INTERNAL="" - if [ -n "$MPI_RANK" ]; then - MPI_RANK_INTERNAL=$MPI_RANK - elif [ -n "$OMPI_COMM_WORLD_RANK" ]; then - MPI_RANK_INTERNAL=$OMPI_COMM_WORLD_RANK - elif [ -n "$MV2_COMM_WORLD_RANK" ]; then - MPI_RANK_INTERNAL=$MV2_COMM_WORLD_RANK - fi - if [ -n "$MPI_RANK_INTERNAL" ]; then - OUTPUT_PATH_INTERNAL=${OUTPUT_PATH_INTERNAL//"%rank"/$MPI_RANK_INTERNAL} - fi - - mkdir -p $OUTPUT_PATH_INTERNAL export OUTPUT_PATH=$OUTPUT_PATH_INTERNAL else usage @@ -239,32 +228,22 @@ if [ -n "$COUNTERS_PATH" ]; then done <$input fi -COUNTERS_PMC_DIRS="" -if [ -n "$PMC_LINES" ]; then +if [ -n "$PMC_LINES" ] && [ ! -n "$ATT_ARGV" ]; then COUNTER=1 for i in ${!PMC_LINES[@]}; do export ROCPROFILER_COUNTERS="${PMC_LINES[$i]}" #Skipping lines without pmc when not in att mode - if [[ ! ${PMC_LINES[$i]} =~ "pmc" && ! ${PMC_LINES[$i]} =~ "att" ]]; then + if [[ ! ${PMC_LINES[$i]} =~ "pmc" ]]; then continue fi - if [ -n "$OUTPUT_PATH" ]; then - if [ ! -n "$ATT_ARGV" ]; then - FINAL_PATH="$OUTPUT_PATH_INTERNAL/pmc_$COUNTER" - COUNTERS_PMC_DIRS="$COUNTERS_PMC_DIRS $FINAL_PATH" - else - FINAL_PATH="$OUTPUT_PATH" - fi - echo -e "\nThe output path for the following counters: $FINAL_PATH" - mkdir -p $FINAL_PATH - echo $ROCPROFILER_COUNTERS >$FINAL_PATH/pmc.txt - export OUTPUT_PATH=$FINAL_PATH - let COUNTER=COUNTER+1 - fi + FINAL_PATH="$OUTPUT_PATH_INTERNAL/pmc_$COUNTER" + export OUTPUT_PATH=$FINAL_PATH + let COUNTER=COUNTER+1 LD_PRELOAD=$LD_PRELOAD:$ROCM_DIR/lib/rocprofiler/librocprofiler_tool.so $* + echo -e "\nThe output path for the following counters: $OUTPUT_PATH" done -elif [ ! -n "$ATT_ARGV" ]; then +else LD_PRELOAD=$LD_PRELOAD:$ROCM_DIR/lib/rocprofiler/librocprofiler_tool.so $* fi diff --git a/projects/rocprofiler/plugin/cli/cli.cpp b/projects/rocprofiler/plugin/cli/cli.cpp index d698109aa1..fd96aa4688 100644 --- a/projects/rocprofiler/plugin/cli/cli.cpp +++ b/projects/rocprofiler/plugin/cli/cli.cpp @@ -95,8 +95,6 @@ class file_plugin_t { } std::stringstream ss; - output_file_name = replace_MPI_macros(output_file_name); - ss << output_file_name << GetPid() << "_" << name_; stream_.open(output_prefix / ss.str()); } @@ -104,28 +102,6 @@ class file_plugin_t { bool is_open() const { return stream_.is_open(); } bool fail() const { return stream_.fail(); } - // Returns a string with the MPI %macro replaced with the corresponding envvar - std::string replace_MPI_macros(std::string output_file_name) { - std::unordered_map MPI_BUILTINS = { - {"MPI_RANK", "%rank"}, - {"OMPI_COMM_WORLD_RANK", "%rank"}, - {"MV2_COMM_WORLD_RANK", "%rank"}}; - - for (const auto& [envvar, key] : MPI_BUILTINS) { - size_t key_find = output_file_name.rfind(key); - if (key_find == std::string::npos) continue; // Does not contain a %?rank var - - const char* env_var_set = getenv(envvar); - if (env_var_set == nullptr) continue; // MPI_COMM_WORLD_x var is does not exist - - int rank = atoi(env_var_set); - output_file_name = output_file_name.substr(0, key_find) + std::to_string(rank) + - output_file_name.substr(key_find + std::string(key).size()); - } - - return output_file_name; - } - private: const std::string name_; std::ofstream stream_; diff --git a/projects/rocprofiler/plugin/file/file.cpp b/projects/rocprofiler/plugin/file/file.cpp index 145c471445..5e341f5868 100644 --- a/projects/rocprofiler/plugin/file/file.cpp +++ b/projects/rocprofiler/plugin/file/file.cpp @@ -111,7 +111,7 @@ class file_plugin_t { const char* output_dir = getenv("OUTPUT_PATH"); output_file_name = getenv("OUT_FILE_NAME") ? std::string(getenv("OUT_FILE_NAME")) : ""; - if (output_dir == nullptr && getenv("OUT_FILE_NAME") == nullptr) { + if (output_dir == nullptr && output_file_name.size() == 0) { stream_.copyfmt(std::cout); stream_.clear(std::cout.rdstate()); stream_.basic_ios::rdbuf(std::cout.rdbuf()); @@ -127,8 +127,6 @@ class file_plugin_t { return; } - output_file_name = replace_MPI_macros(output_file_name); - std::stringstream ss; ss << name_ << "_" << ((output_file_name.empty()) ? std::to_string(GetPid()) : "") << output_file_name << ".csv"; @@ -140,28 +138,6 @@ class file_plugin_t { bool fail() const { return stream_.fail(); } bool isStdOut() const { return bPrintToStdout; } - // Returns a string with the MPI %macro replaced with the corresponding envvar - std::string replace_MPI_macros(std::string output_file_name) { - std::unordered_map MPI_BUILTINS = { - {"MPI_RANK", "%rank"}, - {"OMPI_COMM_WORLD_RANK", "%rank"}, - {"MV2_COMM_WORLD_RANK", "%rank"}}; - - for (const auto& [envvar, key] : MPI_BUILTINS) { - size_t key_find = output_file_name.rfind(key); - if (key_find == std::string::npos) continue; // Does not contain a %?rank var - - const char* env_var_set = getenv(envvar); - if (env_var_set == nullptr) continue; // MPI_COMM_WORLD_x var is does not exist - - int rank = atoi(env_var_set); - output_file_name = output_file_name.substr(0, key_find) + std::to_string(rank) + - output_file_name.substr(key_find + std::string(key).size()); - } - - return output_file_name; - } - private: const std::string name_; std::ofstream stream_; diff --git a/projects/rocprofiler/plugin/perfetto/perfetto.cpp b/projects/rocprofiler/plugin/perfetto/perfetto.cpp index cb2053c91d..c7c3cfe7e1 100644 --- a/projects/rocprofiler/plugin/perfetto/perfetto.cpp +++ b/projects/rocprofiler/plugin/perfetto/perfetto.cpp @@ -144,7 +144,6 @@ class perfetto_plugin_t { data_source_cfg->set_name("track_event"); data_source_cfg->set_track_event_config_raw(track_event_cfg.SerializeAsString()); - output_file_name = replace_MPI_macros(output_file_name); output_prefix_.append(output_file_name + std::to_string(GetPid()) + "_output.pftrace"); file_descriptor_ = open(output_prefix_.string().c_str(), O_RDWR | O_CREAT | O_TRUNC, 0600); if (file_descriptor_ == -1) rocprofiler::warning("Can't open output file\n"); @@ -156,8 +155,7 @@ class perfetto_plugin_t { // Give a custom name for the traced process. perfetto::ProcessTrack process_track = perfetto::ProcessTrack::Current(); perfetto::protos::gen::TrackDescriptor desc = process_track.Serialize(); - desc.mutable_process()->set_process_name("Node: " + std::string(hostname_) + " Rank " + - std::to_string(MPI_rank)); + desc.mutable_process()->set_process_name("Node: " + std::string(hostname_)); perfetto::TrackEvent::SetTrackDescriptor(process_track, desc); is_valid_ = true; @@ -170,28 +168,6 @@ class perfetto_plugin_t { } } - std::string replace_MPI_macros(std::string output_file_name) { - std::vector MPI_BUILTINS = {"MPI_RANK", "OMPI_COMM_WORLD_RANK", - "MV2_COMM_WORLD_RANK"}; - bIsMPI = false; - - for (const char* envvar : MPI_BUILTINS) { - const char* rank_env_var = getenv(envvar); - if (rank_env_var == nullptr) continue; // MPI var is does not exist - - MPI_rank = atoi(rank_env_var); - bIsMPI = true; - break; - } - - size_t key_find = output_file_name.rfind("%rank"); - if (key_find != std::string::npos) { // Contains a %?rank string - output_file_name = output_file_name.substr(0, key_find) + std::to_string(MPI_rank) + - output_file_name.substr(key_find + std::string("%rank").size()); - } - return output_file_name; - } - const char* GetDomainName(rocprofiler_tracer_activity_domain_t domain) { switch (domain) { case ACTIVITY_DOMAIN_ROCTX: @@ -631,8 +607,6 @@ class perfetto_plugin_t { std::unique_ptr tracing_session_; int file_descriptor_; bool is_valid_{false}; - bool bIsMPI = false; - int MPI_rank = 0; size_t roctx_track_entries_{0}; // Correlate stream id(s) with correlation id(s) to identify the stream id of every HIP activity diff --git a/projects/rocprofiler/src/core/memory/generic_buffer.cpp b/projects/rocprofiler/src/core/memory/generic_buffer.cpp index 5efe059ca6..445335fdf2 100644 --- a/projects/rocprofiler/src/core/memory/generic_buffer.cpp +++ b/projects/rocprofiler/src/core/memory/generic_buffer.cpp @@ -37,7 +37,7 @@ GenericBuffer::GenericBuffer(rocprofiler_session_id_t session_id, rocprofiler_bu id_(id), flush_function_(flush_function), session_id_(session_id) { - if (!is_valid_.load(std::memory_order_release)) { + if (!is_valid_.load(std::memory_order_acquire)) { // Pool definition: The memory pool is split in 2 buffers of equal size. When // first initialized, the write pointer points to the first element of the // first buffer. When a buffer is full, or when Flush() is called, the write @@ -67,7 +67,7 @@ GenericBuffer::GenericBuffer(rocprofiler_session_id_t session_id, rocprofiler_bu } GenericBuffer::~GenericBuffer() { - if (is_valid_.load(std::memory_order_release)) { + if (is_valid_.load(std::memory_order_acquire)) { std::lock_guard lock(buffer_lock_); // if (rocprofiler::GetROCProfiler_Singleton()->GetSession(session_id_)) // rocprofiler::GetROCProfiler_Singleton()->GetSession(session_id_)->DisableTools(id_); @@ -175,7 +175,7 @@ rocprofiler_session_id_t GenericBuffer::GetSessionId() { return rocprofiler_session_id_t{0}; } -bool GenericBuffer::IsValid() { return is_valid_.load(std::memory_order_release); } +bool GenericBuffer::IsValid() { return is_valid_.load(std::memory_order_acquire); } rocprofiler_buffer_id_t GenericBuffer::GetId() { if (is_valid_) return id_; diff --git a/projects/rocprofiler/src/core/session/session.cpp b/projects/rocprofiler/src/core/session/session.cpp index d3596b24bd..ea4fd28212 100644 --- a/projects/rocprofiler/src/core/session/session.cpp +++ b/projects/rocprofiler/src/core/session/session.cpp @@ -51,34 +51,34 @@ Session::~Session() { { std::lock_guard lock(session_lock_); if (FindFilterWithKind(ROCPROFILER_SPM_COLLECTION) && spmcounter_ && - spm_started_.load(std::memory_order_release)) { + spm_started_.load(std::memory_order_acquire)) { delete spmcounter_; } if (FindFilterWithKind(ROCPROFILER_API_TRACE) && tracer_ && - tracer_started_.load(std::memory_order_release)) { + tracer_started_.load(std::memory_order_acquire)) { delete tracer_; tracer_started_.exchange(false, std::memory_order_release); } if (FindFilterWithKind(ROCPROFILER_PC_SAMPLING_COLLECTION) && pc_sampler_ && - pc_sampler_started_.load(std::memory_order_release)) { + pc_sampler_started_.load(std::memory_order_acquire)) { delete pc_sampler_; pc_sampler_started_.exchange(false, std::memory_order_release); } if (FindFilterWithKind(ROCPROFILER_COUNTERS_SAMPLER) && counters_sampler_ && - counters_sampler_started_.load(std::memory_order_release)) { + counters_sampler_started_.load(std::memory_order_acquire)) { delete counters_sampler_; counters_sampler_started_.exchange(false, std::memory_order_release); } if ((FindFilterWithKind(ROCPROFILER_DISPATCH_TIMESTAMPS_COLLECTION) || FindFilterWithKind(ROCPROFILER_COUNTERS_COLLECTION)) && - profiler_ && profiler_started_.load(std::memory_order_release)) { + profiler_ && profiler_started_.load(std::memory_order_acquire)) { rocprofiler::queue::ResetSessionID(); delete profiler_; profiler_started_.exchange(false, std::memory_order_release); } if (FindFilterWithKind(ROCPROFILER_ATT_TRACE_COLLECTION) && att_tracer_ && - att_tracer_started_.load(std::memory_order_release)) { + att_tracer_started_.load(std::memory_order_acquire)) { delete att_tracer_; att_tracer_started_.exchange(false, std::memory_order_release); } @@ -107,7 +107,7 @@ void Session::DisableTools(rocprofiler_buffer_id_t buffer_id) { if (FindFilterWithKind(ROCPROFILER_API_TRACE) && GetFilter(GetFilterIdWithKind(ROCPROFILER_API_TRACE))->GetBufferId().value == buffer_id.value) { - if (tracer_started_.load(std::memory_order_release)) { + if (tracer_started_.load(std::memory_order_acquire)) { tracer_->DisableRoctracer(); } } @@ -116,7 +116,7 @@ void Session::DisableTools(rocprofiler_buffer_id_t buffer_id) { void Session::Start() { std::lock_guard lock(session_lock_); if (!is_active_) { - if (!profiler_started_.load(std::memory_order_release)) { + if (!profiler_started_.load(std::memory_order_acquire)) { if (FindFilterWithKind(ROCPROFILER_DISPATCH_TIMESTAMPS_COLLECTION)) { profiler_ = new profiler::Profiler( GetFilter(GetFilterIdWithKind(ROCPROFILER_DISPATCH_TIMESTAMPS_COLLECTION)) @@ -136,7 +136,7 @@ void Session::Start() { rocprofiler::queue::ResetSessionID(session_id_); } if (FindFilterWithKind(ROCPROFILER_ATT_TRACE_COLLECTION)) { - if (!att_tracer_started_.load(std::memory_order_release)) { + if (!att_tracer_started_.load(std::memory_order_acquire)) { att_tracer_ = new att::AttTracer( GetFilter(GetFilterIdWithKind(ROCPROFILER_ATT_TRACE_COLLECTION))->GetBufferId(), GetFilter(GetFilterIdWithKind(ROCPROFILER_ATT_TRACE_COLLECTION))->GetId(), session_id_); @@ -145,7 +145,7 @@ void Session::Start() { } if (FindFilterWithKind(ROCPROFILER_SPM_COLLECTION)) { - if (!spm_started_.load(std::memory_order_release)) { + if (!spm_started_.load(std::memory_order_acquire)) { rocprofiler_spm_parameter_t* spmparameter = GetFilter(GetFilterIdWithKind(ROCPROFILER_SPM_COLLECTION))->GetSpmParameterData(); spmcounter_ = new spm::SpmCounters( @@ -153,7 +153,7 @@ void Session::Start() { GetFilter(GetFilterIdWithKind(ROCPROFILER_SPM_COLLECTION))->GetId(), spmparameter, session_id_); } - if (!profiler_started_.load(std::memory_order_release)) { + if (!profiler_started_.load(std::memory_order_acquire)) { profiler_ = new profiler::Profiler( GetFilter(GetFilterIdWithKind(ROCPROFILER_SPM_COLLECTION))->GetBufferId(), GetFilter(GetFilterIdWithKind(ROCPROFILER_SPM_COLLECTION))->GetId(), session_id_); @@ -165,7 +165,7 @@ void Session::Start() { if (FindFilterWithKind(ROCPROFILER_API_TRACE)) { std::vector domains = GetFilter(GetFilterIdWithKind(ROCPROFILER_API_TRACE))->GetTraceData(); - if (!tracer_started_.load(std::memory_order_release)) { + if (!tracer_started_.load(std::memory_order_acquire)) { tracer_ = new tracer::Tracer( session_id_, (GetFilter(GetFilterIdWithKind(ROCPROFILER_API_TRACE))->HasCallback() @@ -178,7 +178,7 @@ void Session::Start() { } if (FindFilterWithKind(ROCPROFILER_PC_SAMPLING_COLLECTION)) { - if (!pc_sampler_started_.load(std::memory_order_release)) { + if (!pc_sampler_started_.load(std::memory_order_acquire)) { pc_sampler_ = new pc_sampler::PCSampler( GetFilter(GetFilterIdWithKind(ROCPROFILER_PC_SAMPLING_COLLECTION))->GetBufferId(), GetFilter(GetFilterIdWithKind(ROCPROFILER_PC_SAMPLING_COLLECTION))->GetId(), @@ -189,7 +189,7 @@ void Session::Start() { } if (FindFilterWithKind(ROCPROFILER_COUNTERS_SAMPLER)) { - if (!counters_sampler_started_.load(std::memory_order_release)) { + if (!counters_sampler_started_.load(std::memory_order_acquire)) { counters_sampler_ = new CountersSampler( GetFilter(GetFilterIdWithKind(ROCPROFILER_COUNTERS_SAMPLER))->GetBufferId(), GetFilter(GetFilterIdWithKind(ROCPROFILER_COUNTERS_SAMPLER))->GetId(), session_id_); @@ -214,18 +214,18 @@ void Session::Terminate() { if (FindFilterWithKind(ROCPROFILER_API_TRACE)) { std::vector domains = GetFilter(GetFilterIdWithKind(ROCPROFILER_API_TRACE))->GetTraceData(); - if (tracer_started_.load(std::memory_order_release)) { + if (tracer_started_.load(std::memory_order_acquire)) { tracer_->StopRoctracer(); } } if (FindFilterWithKind(ROCPROFILER_PC_SAMPLING_COLLECTION)) { - if (pc_sampler_started_.load(std::memory_order_release)) { + if (pc_sampler_started_.load(std::memory_order_acquire)) { pc_sampler_->Stop(); } } if (FindFilterWithKind(ROCPROFILER_COUNTERS_SAMPLER)) { - if (counters_sampler_started_.load(std::memory_order_release)) { + if (counters_sampler_started_.load(std::memory_order_acquire)) { counters_sampler_->Stop(); } } diff --git a/projects/rocprofiler/src/core/session/spm/spm.cpp b/projects/rocprofiler/src/core/session/spm/spm.cpp index 6d58cc14a3..b8edd53a5e 100644 --- a/projects/rocprofiler/src/core/session/spm/spm.cpp +++ b/projects/rocprofiler/src/core/session/spm/spm.cpp @@ -67,7 +67,7 @@ std::mutex processQueueLock; // rocprofiler_status_t SetDestBuffer(hsa_agent_t GPUNode, uint32_t size, uint32_t timeout) { // rocprofiler_status_t ret; -// uint32_t idx = currIndex.load(std::memory_order_release); +// uint32_t idx = currIndex.load(std::memory_order_acquire); // if (size) { // // Check if user buffer in using // if (spm_buffer_params[idx].addr != NULL) { @@ -97,7 +97,7 @@ std::mutex processQueueLock; // } // if (spm_buffer_params[idx].data_loss) std::cout << "Data Loss" << std::endl; // if (spm_buffer_params[idx].len) { -// uint32_t pidx = preIndex.load(std::memory_order_release); +// uint32_t pidx = preIndex.load(std::memory_order_acquire); // if (spm_buffer_params[idx].len == spm_buffer_params[pidx].size) { // std::cout << "Buffer completely filled with bytes" << spm_buffer_params[idx].len << // std::endl; fd = fopen("SPM_rocprofiler_data.txt", "wb"); size_t retele = @@ -121,8 +121,8 @@ std::mutex processQueueLock; // void spmBufferSetup(hsa_agent_t GPUNode) { // rocprofiler_status_t ret; -// if (is_started.load(std::memory_order_release)) { -// uint32_t idx = currIndex.load(std::memory_order_release); +// if (is_started.load(std::memory_order_acquire)) { +// uint32_t idx = currIndex.load(std::memory_order_acquire); // ret = SetDestBuffer(GPUNode, spm_buffer_params[idx].size, spm_buffer_params[idx].timeout); // if (ret != ROCPROFILER_STATUS_SUCCESS) { // std::cout << "Fail to set Dest Buf 2 " diff --git a/projects/rocprofiler/src/core/session/tracer/tracer.cpp b/projects/rocprofiler/src/core/session/tracer/tracer.cpp index 914a161f65..ce723dfe7f 100644 --- a/projects/rocprofiler/src/core/session/tracer/tracer.cpp +++ b/projects/rocprofiler/src/core/session/tracer/tracer.cpp @@ -46,14 +46,14 @@ Tracer::Tracer(rocprofiler_session_id_t session_id, rocprofiler_sync_callback_t rocprofiler_buffer_id_t buffer_id, std::vector domains) : domains_(domains), callback_(callback), buffer_id_(buffer_id), session_id_(session_id) { - assert(!is_active_.load(std::memory_order_release) && "Error: The tracer was initialized!"); + assert(!is_active_.load(std::memory_order_acquire) && "Error: The tracer was initialized!"); std::lock_guard lock(tracer_lock_); callback_data_ = api_callback_data_t{callback, session_id}; is_active_.exchange(true, std::memory_order_release); } void Tracer::StartRoctracer() { - if (!roctracer_initiated_.load(std::memory_order_release)) { + if (!roctracer_initiated_.load(std::memory_order_acquire)) { std::map domains_filteration_map; // TODO(aelwazir): get filter property and parse it here for (auto& domain : domains_) { @@ -68,7 +68,7 @@ void Tracer::StartRoctracer() { } void Tracer::StopRoctracer() { - if (roctracer_initiated_.load(std::memory_order_release)) roctracer_stop(); + if (roctracer_initiated_.load(std::memory_order_acquire)) roctracer_stop(); } void Tracer::DisableRoctracer() { @@ -105,7 +105,7 @@ void Tracer::DisableRoctracer() { } Tracer::~Tracer() { - assert(is_active_.load(std::memory_order_release) && "Error: The tracer was not initialized!"); + assert(is_active_.load(std::memory_order_acquire) && "Error: The tracer was not initialized!"); std::lock_guard lock(tracer_lock_); is_active_.exchange(false, std::memory_order_release); diff --git a/projects/rocprofiler/src/tools/tool.cpp b/projects/rocprofiler/src/tools/tool.cpp index a7926c2012..3c7f535b66 100644 --- a/projects/rocprofiler/src/tools/tool.cpp +++ b/projects/rocprofiler/src/tools/tool.cpp @@ -328,7 +328,7 @@ att_parsed_input_t GetATTParams() { int rank = (comma < line.size() - 1) ? stoi(line.substr(comma + 1)) : 0; if (MPI_RANK < 0 || rank == MPI_RANK) // Only add ID if rank matches the one in input.txt - dispatch_ids.push_back(id); + dispatch_ids.push_back(std::max(id-1,0)); // off by 1 in relation to kernel-trace continue; } // param_value is a number @@ -416,17 +416,59 @@ void finish() { } } +static bool env_var_search(std::string& s) { + std::smatch m; + std::regex e ("(.*)\\%\\q\\{([^}]+)\\}(.*)"); + std::regex_match(s, m, e); + + if (m.size() != 4) return false; + + while (m.size() == 4) { + const char* envvar = getenv(m[2].str().c_str()); + if (!envvar) envvar = ""; + s = m[1].str()+envvar+m[3].str(); + std::regex_match(s, m, e); + }; + + return true; +} + +static void env_var_replace(const char* env_name) { + if (!env_name) return; + const char* env = getenv(env_name); + if (!env) return; + + std::string new_env(env); + if (env_var_search(new_env)) setenv(env_name, new_env.c_str(), 1); +} + // load plugins void plugins_load() { // Load output plugin if (Dl_info dl_info; dladdr((void*)plugins_load, &dl_info) != 0) { const char* plugin_name = getenv("ROCPROFILER_PLUGIN_LIB"); if (plugin_name == nullptr) { - if (getenv("OUTPUT_PATH")) + if (getenv("OUTPUT_PATH") || getenv("OUT_FILE_NAME")) plugin_name = "libfile_plugin.so"; else plugin_name = "libcli_plugin.so"; } + env_var_replace("OUTPUT_PATH"); + env_var_replace("OUT_FILE_NAME"); + + std::string out_path = getenv("OUTPUT_PATH") ? getenv("OUTPUT_PATH") : ""; + + if (out_path.size()) { + try { + std::experimental::filesystem::create_directories(out_path); + } catch (...) {} + out_path = out_path + '/'; + } + if (getenv("ROCPROFILER_COUNTERS")) { + std::ofstream(out_path+"pmc.txt", std::ios::app) + << std::string(getenv("ROCPROFILER_COUNTERS")) << '\n'; + } + if (!plugin.emplace(fs::path(dl_info.dli_fname).replace_filename(plugin_name)).is_valid()) { plugin.reset(); } diff --git a/projects/rocprofiler/tests-v2/featuretests/profiler/profiler_gtest.cpp b/projects/rocprofiler/tests-v2/featuretests/profiler/profiler_gtest.cpp index dc128aef6d..aa3c5c44b2 100644 --- a/projects/rocprofiler/tests-v2/featuretests/profiler/profiler_gtest.cpp +++ b/projects/rocprofiler/tests-v2/featuretests/profiler/profiler_gtest.cpp @@ -1213,7 +1213,7 @@ class VectorAddFilenameMPITest : public FilePluginTest { protected: virtual void SetUp() { setenv("MPI_RANK", "7", true); - RunApplication("hip_vectoradd", " --hip-activity -d /tmp/tests-v2/file/ -o test_%rank_"); + RunApplication("hip_vectoradd", " --hip-activity -d /tmp/tests-v2/file/ -o test_%q{MPI_RANK}_"); } virtual void TearDown() { std::experimental::filesystem::remove_all("/tmp/tests-v2/file/"); @@ -1239,7 +1239,7 @@ class VectorAddPerfettoMPITest : public PerfettoPluginTest { protected: virtual void SetUp() { setenv("MPI_RANK", "7", true); - RunApplication("hip_vectoradd", " -d /tmp/tests-v2/perfetto/ -o test_%rank_ --plugin perfetto"); + RunApplication("hip_vectoradd", " -d /tmp/tests-v2/perfetto/ -o test_%q{MPI_RANK}_ --plugin perfetto"); } virtual void TearDown() { std::experimental::filesystem::remove_all("/tmp/tests-v2/perfetto/"); @@ -1274,7 +1274,7 @@ class VectorAddCTFMPITest : public CTFPluginTest { protected: virtual void SetUp() { setenv("MPI_RANK", "7", true); - RunApplication("hip_vectoradd", " -d /tmp/tests-v2/ctf_%rank --plugin ctf"); + RunApplication("hip_vectoradd", " -d /tmp/tests-v2/ctf_%q{MPI_RANK} --plugin ctf"); } virtual void TearDown() { std::experimental::filesystem::remove_all("/tmp/tests-v2/");