DCGPUBU-44: Added arbitrary envvars to file/dir names. Squashed some fixes.
- Added arbitrary env-vars
- Fixed some UBs with atomic variables
- Fixed loading of file plugin
- ATT vs. kernel-trace off by one
Change-Id: I69c75f66f722e4085b5279f41afd05813628846d
[ROCm/rocprofiler commit: 157eacd2bb]
This commit is contained in:
@@ -243,6 +243,13 @@ The user has two options for building:
|
||||
./rocprofv2 --plugin plugin_name -i samples/input.txt -d output_dir <app_relative_path> # -d is optional, but can be used to define the directory output for output results
|
||||
```
|
||||
|
||||
Both the output directory and filenames allow for simple environment variable substitution via a special syntax %q{var} -> $var, e.g.:
|
||||
```bash
|
||||
export var="FOO"
|
||||
rocprofv2 --plugin perfetto -o file_%q{var}_name
|
||||
# Generates file names: file_FOO_name[...].pftrace
|
||||
```
|
||||
|
||||
- #### (ATT) Advanced Thread Trace
|
||||
Tool used to collect fine-grained hardware metrics. Provides ISA-level instruction hotspot analysis via hardware tracing.
|
||||
|
||||
@@ -313,7 +320,7 @@ The user has two options for building:
|
||||
- att: TARGET_CU=1 //or some other CU [0,15] - WGP for Navi [0,8]
|
||||
- SE_MASK=0x1 // bitmask of shader engines. The fewer, the easier on the hardware. Default enables 1 out of 4 shader engines.
|
||||
- SIMD_MASK=0xF // GFX9: bitmask of SIMDs. Navi: SIMD Index [0-3].
|
||||
- DISPATCH=ID,RN // collect trace only for the given dispatch_ID and MPI rank RN. RN is optional and ignored for single processes. Multiple lines with varying combinations of RN and ID can be added.
|
||||
- DISPATCH=ID,RN // collect trace only for the given dispatch_ID (from --kernel-trace) and MPI rank RN. RN is optional and ignored for single processes. Multiple lines with varying combinations of RN and ID can be added.
|
||||
- KERNEL=kernname // Profile only kernels containing the string kernname (c++ mangled name). Multiple lines can be added.
|
||||
- PERFCOUNTERS_COL_PERIOD=0x3 // Multiplier period for counter collection [0~31]. 0=fastest (usually once every 16 cycles). GFX9 only. Counters will be shown in a graph over time in the browser UI.
|
||||
- PERFCOUNTER=counter_name // Add a SQ counter to be collected with ATT; period defined by PERFCOUNTERS_COL_PERIOD. GFX9 only.
|
||||
|
||||
@@ -58,6 +58,8 @@ if [ -z "$1" ]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
OUTPUT_PATH_INTERNAL="."
|
||||
|
||||
while [ 1 ]; do
|
||||
if [[ "$1" = "-h" || "$1" = "--help" ]]; then
|
||||
usage
|
||||
@@ -98,19 +100,6 @@ while [ 1 ]; do
|
||||
elif [[ "$1" = "-d" || "$1" = "--output-directory" ]]; then
|
||||
if [ $2 ]; then
|
||||
OUTPUT_PATH_INTERNAL=$2
|
||||
MPI_RANK_INTERNAL=""
|
||||
if [ -n "$MPI_RANK" ]; then
|
||||
MPI_RANK_INTERNAL=$MPI_RANK
|
||||
elif [ -n "$OMPI_COMM_WORLD_RANK" ]; then
|
||||
MPI_RANK_INTERNAL=$OMPI_COMM_WORLD_RANK
|
||||
elif [ -n "$MV2_COMM_WORLD_RANK" ]; then
|
||||
MPI_RANK_INTERNAL=$MV2_COMM_WORLD_RANK
|
||||
fi
|
||||
if [ -n "$MPI_RANK_INTERNAL" ]; then
|
||||
OUTPUT_PATH_INTERNAL=${OUTPUT_PATH_INTERNAL//"%rank"/$MPI_RANK_INTERNAL}
|
||||
fi
|
||||
|
||||
mkdir -p $OUTPUT_PATH_INTERNAL
|
||||
export OUTPUT_PATH=$OUTPUT_PATH_INTERNAL
|
||||
else
|
||||
usage
|
||||
@@ -239,32 +228,22 @@ if [ -n "$COUNTERS_PATH" ]; then
|
||||
done <$input
|
||||
fi
|
||||
|
||||
COUNTERS_PMC_DIRS=""
|
||||
if [ -n "$PMC_LINES" ]; then
|
||||
if [ -n "$PMC_LINES" ] && [ ! -n "$ATT_ARGV" ]; then
|
||||
COUNTER=1
|
||||
for i in ${!PMC_LINES[@]}; do
|
||||
export ROCPROFILER_COUNTERS="${PMC_LINES[$i]}"
|
||||
#Skipping lines without pmc when not in att mode
|
||||
if [[ ! ${PMC_LINES[$i]} =~ "pmc" && ! ${PMC_LINES[$i]} =~ "att" ]]; then
|
||||
if [[ ! ${PMC_LINES[$i]} =~ "pmc" ]]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
if [ -n "$OUTPUT_PATH" ]; then
|
||||
if [ ! -n "$ATT_ARGV" ]; then
|
||||
FINAL_PATH="$OUTPUT_PATH_INTERNAL/pmc_$COUNTER"
|
||||
COUNTERS_PMC_DIRS="$COUNTERS_PMC_DIRS $FINAL_PATH"
|
||||
else
|
||||
FINAL_PATH="$OUTPUT_PATH"
|
||||
fi
|
||||
echo -e "\nThe output path for the following counters: $FINAL_PATH"
|
||||
mkdir -p $FINAL_PATH
|
||||
echo $ROCPROFILER_COUNTERS >$FINAL_PATH/pmc.txt
|
||||
export OUTPUT_PATH=$FINAL_PATH
|
||||
let COUNTER=COUNTER+1
|
||||
fi
|
||||
FINAL_PATH="$OUTPUT_PATH_INTERNAL/pmc_$COUNTER"
|
||||
export OUTPUT_PATH=$FINAL_PATH
|
||||
let COUNTER=COUNTER+1
|
||||
LD_PRELOAD=$LD_PRELOAD:$ROCM_DIR/lib/rocprofiler/librocprofiler_tool.so $*
|
||||
echo -e "\nThe output path for the following counters: $OUTPUT_PATH"
|
||||
done
|
||||
elif [ ! -n "$ATT_ARGV" ]; then
|
||||
else
|
||||
LD_PRELOAD=$LD_PRELOAD:$ROCM_DIR/lib/rocprofiler/librocprofiler_tool.so $*
|
||||
fi
|
||||
|
||||
|
||||
@@ -95,8 +95,6 @@ class file_plugin_t {
|
||||
}
|
||||
|
||||
std::stringstream ss;
|
||||
output_file_name = replace_MPI_macros(output_file_name);
|
||||
|
||||
ss << output_file_name << GetPid() << "_" << name_;
|
||||
stream_.open(output_prefix / ss.str());
|
||||
}
|
||||
@@ -104,28 +102,6 @@ class file_plugin_t {
|
||||
bool is_open() const { return stream_.is_open(); }
|
||||
bool fail() const { return stream_.fail(); }
|
||||
|
||||
// Returns a string with the MPI %macro replaced with the corresponding envvar
|
||||
std::string replace_MPI_macros(std::string output_file_name) {
|
||||
std::unordered_map<const char*, const char*> MPI_BUILTINS = {
|
||||
{"MPI_RANK", "%rank"},
|
||||
{"OMPI_COMM_WORLD_RANK", "%rank"},
|
||||
{"MV2_COMM_WORLD_RANK", "%rank"}};
|
||||
|
||||
for (const auto& [envvar, key] : MPI_BUILTINS) {
|
||||
size_t key_find = output_file_name.rfind(key);
|
||||
if (key_find == std::string::npos) continue; // Does not contain a %?rank var
|
||||
|
||||
const char* env_var_set = getenv(envvar);
|
||||
if (env_var_set == nullptr) continue; // MPI_COMM_WORLD_x var is does not exist
|
||||
|
||||
int rank = atoi(env_var_set);
|
||||
output_file_name = output_file_name.substr(0, key_find) + std::to_string(rank) +
|
||||
output_file_name.substr(key_find + std::string(key).size());
|
||||
}
|
||||
|
||||
return output_file_name;
|
||||
}
|
||||
|
||||
private:
|
||||
const std::string name_;
|
||||
std::ofstream stream_;
|
||||
|
||||
@@ -111,7 +111,7 @@ class file_plugin_t {
|
||||
const char* output_dir = getenv("OUTPUT_PATH");
|
||||
output_file_name = getenv("OUT_FILE_NAME") ? std::string(getenv("OUT_FILE_NAME")) : "";
|
||||
|
||||
if (output_dir == nullptr && getenv("OUT_FILE_NAME") == nullptr) {
|
||||
if (output_dir == nullptr && output_file_name.size() == 0) {
|
||||
stream_.copyfmt(std::cout);
|
||||
stream_.clear(std::cout.rdstate());
|
||||
stream_.basic_ios<char>::rdbuf(std::cout.rdbuf());
|
||||
@@ -127,8 +127,6 @@ class file_plugin_t {
|
||||
return;
|
||||
}
|
||||
|
||||
output_file_name = replace_MPI_macros(output_file_name);
|
||||
|
||||
std::stringstream ss;
|
||||
ss << name_ << "_" << ((output_file_name.empty()) ? std::to_string(GetPid()) : "")
|
||||
<< output_file_name << ".csv";
|
||||
@@ -140,28 +138,6 @@ class file_plugin_t {
|
||||
bool fail() const { return stream_.fail(); }
|
||||
bool isStdOut() const { return bPrintToStdout; }
|
||||
|
||||
// Returns a string with the MPI %macro replaced with the corresponding envvar
|
||||
std::string replace_MPI_macros(std::string output_file_name) {
|
||||
std::unordered_map<const char*, const char*> MPI_BUILTINS = {
|
||||
{"MPI_RANK", "%rank"},
|
||||
{"OMPI_COMM_WORLD_RANK", "%rank"},
|
||||
{"MV2_COMM_WORLD_RANK", "%rank"}};
|
||||
|
||||
for (const auto& [envvar, key] : MPI_BUILTINS) {
|
||||
size_t key_find = output_file_name.rfind(key);
|
||||
if (key_find == std::string::npos) continue; // Does not contain a %?rank var
|
||||
|
||||
const char* env_var_set = getenv(envvar);
|
||||
if (env_var_set == nullptr) continue; // MPI_COMM_WORLD_x var is does not exist
|
||||
|
||||
int rank = atoi(env_var_set);
|
||||
output_file_name = output_file_name.substr(0, key_find) + std::to_string(rank) +
|
||||
output_file_name.substr(key_find + std::string(key).size());
|
||||
}
|
||||
|
||||
return output_file_name;
|
||||
}
|
||||
|
||||
private:
|
||||
const std::string name_;
|
||||
std::ofstream stream_;
|
||||
|
||||
@@ -144,7 +144,6 @@ class perfetto_plugin_t {
|
||||
data_source_cfg->set_name("track_event");
|
||||
data_source_cfg->set_track_event_config_raw(track_event_cfg.SerializeAsString());
|
||||
|
||||
output_file_name = replace_MPI_macros(output_file_name);
|
||||
output_prefix_.append(output_file_name + std::to_string(GetPid()) + "_output.pftrace");
|
||||
file_descriptor_ = open(output_prefix_.string().c_str(), O_RDWR | O_CREAT | O_TRUNC, 0600);
|
||||
if (file_descriptor_ == -1) rocprofiler::warning("Can't open output file\n");
|
||||
@@ -156,8 +155,7 @@ class perfetto_plugin_t {
|
||||
// Give a custom name for the traced process.
|
||||
perfetto::ProcessTrack process_track = perfetto::ProcessTrack::Current();
|
||||
perfetto::protos::gen::TrackDescriptor desc = process_track.Serialize();
|
||||
desc.mutable_process()->set_process_name("Node: " + std::string(hostname_) + " Rank " +
|
||||
std::to_string(MPI_rank));
|
||||
desc.mutable_process()->set_process_name("Node: " + std::string(hostname_));
|
||||
perfetto::TrackEvent::SetTrackDescriptor(process_track, desc);
|
||||
|
||||
is_valid_ = true;
|
||||
@@ -170,28 +168,6 @@ class perfetto_plugin_t {
|
||||
}
|
||||
}
|
||||
|
||||
std::string replace_MPI_macros(std::string output_file_name) {
|
||||
std::vector<const char*> MPI_BUILTINS = {"MPI_RANK", "OMPI_COMM_WORLD_RANK",
|
||||
"MV2_COMM_WORLD_RANK"};
|
||||
bIsMPI = false;
|
||||
|
||||
for (const char* envvar : MPI_BUILTINS) {
|
||||
const char* rank_env_var = getenv(envvar);
|
||||
if (rank_env_var == nullptr) continue; // MPI var is does not exist
|
||||
|
||||
MPI_rank = atoi(rank_env_var);
|
||||
bIsMPI = true;
|
||||
break;
|
||||
}
|
||||
|
||||
size_t key_find = output_file_name.rfind("%rank");
|
||||
if (key_find != std::string::npos) { // Contains a %?rank string
|
||||
output_file_name = output_file_name.substr(0, key_find) + std::to_string(MPI_rank) +
|
||||
output_file_name.substr(key_find + std::string("%rank").size());
|
||||
}
|
||||
return output_file_name;
|
||||
}
|
||||
|
||||
const char* GetDomainName(rocprofiler_tracer_activity_domain_t domain) {
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_ROCTX:
|
||||
@@ -631,8 +607,6 @@ class perfetto_plugin_t {
|
||||
std::unique_ptr<perfetto::TracingSession> tracing_session_;
|
||||
int file_descriptor_;
|
||||
bool is_valid_{false};
|
||||
bool bIsMPI = false;
|
||||
int MPI_rank = 0;
|
||||
size_t roctx_track_entries_{0};
|
||||
|
||||
// Correlate stream id(s) with correlation id(s) to identify the stream id of every HIP activity
|
||||
|
||||
@@ -37,7 +37,7 @@ GenericBuffer::GenericBuffer(rocprofiler_session_id_t session_id, rocprofiler_bu
|
||||
id_(id),
|
||||
flush_function_(flush_function),
|
||||
session_id_(session_id) {
|
||||
if (!is_valid_.load(std::memory_order_release)) {
|
||||
if (!is_valid_.load(std::memory_order_acquire)) {
|
||||
// Pool definition: The memory pool is split in 2 buffers of equal size. When
|
||||
// first initialized, the write pointer points to the first element of the
|
||||
// first buffer. When a buffer is full, or when Flush() is called, the write
|
||||
@@ -67,7 +67,7 @@ GenericBuffer::GenericBuffer(rocprofiler_session_id_t session_id, rocprofiler_bu
|
||||
}
|
||||
|
||||
GenericBuffer::~GenericBuffer() {
|
||||
if (is_valid_.load(std::memory_order_release)) {
|
||||
if (is_valid_.load(std::memory_order_acquire)) {
|
||||
std::lock_guard lock(buffer_lock_);
|
||||
// if (rocprofiler::GetROCProfiler_Singleton()->GetSession(session_id_))
|
||||
// rocprofiler::GetROCProfiler_Singleton()->GetSession(session_id_)->DisableTools(id_);
|
||||
@@ -175,7 +175,7 @@ rocprofiler_session_id_t GenericBuffer::GetSessionId() {
|
||||
return rocprofiler_session_id_t{0};
|
||||
}
|
||||
|
||||
bool GenericBuffer::IsValid() { return is_valid_.load(std::memory_order_release); }
|
||||
bool GenericBuffer::IsValid() { return is_valid_.load(std::memory_order_acquire); }
|
||||
|
||||
rocprofiler_buffer_id_t GenericBuffer::GetId() {
|
||||
if (is_valid_) return id_;
|
||||
|
||||
@@ -51,34 +51,34 @@ Session::~Session() {
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(session_lock_);
|
||||
if (FindFilterWithKind(ROCPROFILER_SPM_COLLECTION) && spmcounter_ &&
|
||||
spm_started_.load(std::memory_order_release)) {
|
||||
spm_started_.load(std::memory_order_acquire)) {
|
||||
delete spmcounter_;
|
||||
}
|
||||
if (FindFilterWithKind(ROCPROFILER_API_TRACE) && tracer_ &&
|
||||
tracer_started_.load(std::memory_order_release)) {
|
||||
tracer_started_.load(std::memory_order_acquire)) {
|
||||
delete tracer_;
|
||||
tracer_started_.exchange(false, std::memory_order_release);
|
||||
}
|
||||
if (FindFilterWithKind(ROCPROFILER_PC_SAMPLING_COLLECTION) && pc_sampler_ &&
|
||||
pc_sampler_started_.load(std::memory_order_release)) {
|
||||
pc_sampler_started_.load(std::memory_order_acquire)) {
|
||||
delete pc_sampler_;
|
||||
pc_sampler_started_.exchange(false, std::memory_order_release);
|
||||
}
|
||||
|
||||
if (FindFilterWithKind(ROCPROFILER_COUNTERS_SAMPLER) && counters_sampler_ &&
|
||||
counters_sampler_started_.load(std::memory_order_release)) {
|
||||
counters_sampler_started_.load(std::memory_order_acquire)) {
|
||||
delete counters_sampler_;
|
||||
counters_sampler_started_.exchange(false, std::memory_order_release);
|
||||
}
|
||||
if ((FindFilterWithKind(ROCPROFILER_DISPATCH_TIMESTAMPS_COLLECTION) ||
|
||||
FindFilterWithKind(ROCPROFILER_COUNTERS_COLLECTION)) &&
|
||||
profiler_ && profiler_started_.load(std::memory_order_release)) {
|
||||
profiler_ && profiler_started_.load(std::memory_order_acquire)) {
|
||||
rocprofiler::queue::ResetSessionID();
|
||||
delete profiler_;
|
||||
profiler_started_.exchange(false, std::memory_order_release);
|
||||
}
|
||||
if (FindFilterWithKind(ROCPROFILER_ATT_TRACE_COLLECTION) && att_tracer_ &&
|
||||
att_tracer_started_.load(std::memory_order_release)) {
|
||||
att_tracer_started_.load(std::memory_order_acquire)) {
|
||||
delete att_tracer_;
|
||||
att_tracer_started_.exchange(false, std::memory_order_release);
|
||||
}
|
||||
@@ -107,7 +107,7 @@ void Session::DisableTools(rocprofiler_buffer_id_t buffer_id) {
|
||||
if (FindFilterWithKind(ROCPROFILER_API_TRACE) &&
|
||||
GetFilter(GetFilterIdWithKind(ROCPROFILER_API_TRACE))->GetBufferId().value ==
|
||||
buffer_id.value) {
|
||||
if (tracer_started_.load(std::memory_order_release)) {
|
||||
if (tracer_started_.load(std::memory_order_acquire)) {
|
||||
tracer_->DisableRoctracer();
|
||||
}
|
||||
}
|
||||
@@ -116,7 +116,7 @@ void Session::DisableTools(rocprofiler_buffer_id_t buffer_id) {
|
||||
void Session::Start() {
|
||||
std::lock_guard<std::mutex> lock(session_lock_);
|
||||
if (!is_active_) {
|
||||
if (!profiler_started_.load(std::memory_order_release)) {
|
||||
if (!profiler_started_.load(std::memory_order_acquire)) {
|
||||
if (FindFilterWithKind(ROCPROFILER_DISPATCH_TIMESTAMPS_COLLECTION)) {
|
||||
profiler_ = new profiler::Profiler(
|
||||
GetFilter(GetFilterIdWithKind(ROCPROFILER_DISPATCH_TIMESTAMPS_COLLECTION))
|
||||
@@ -136,7 +136,7 @@ void Session::Start() {
|
||||
rocprofiler::queue::ResetSessionID(session_id_);
|
||||
}
|
||||
if (FindFilterWithKind(ROCPROFILER_ATT_TRACE_COLLECTION)) {
|
||||
if (!att_tracer_started_.load(std::memory_order_release)) {
|
||||
if (!att_tracer_started_.load(std::memory_order_acquire)) {
|
||||
att_tracer_ = new att::AttTracer(
|
||||
GetFilter(GetFilterIdWithKind(ROCPROFILER_ATT_TRACE_COLLECTION))->GetBufferId(),
|
||||
GetFilter(GetFilterIdWithKind(ROCPROFILER_ATT_TRACE_COLLECTION))->GetId(), session_id_);
|
||||
@@ -145,7 +145,7 @@ void Session::Start() {
|
||||
}
|
||||
|
||||
if (FindFilterWithKind(ROCPROFILER_SPM_COLLECTION)) {
|
||||
if (!spm_started_.load(std::memory_order_release)) {
|
||||
if (!spm_started_.load(std::memory_order_acquire)) {
|
||||
rocprofiler_spm_parameter_t* spmparameter =
|
||||
GetFilter(GetFilterIdWithKind(ROCPROFILER_SPM_COLLECTION))->GetSpmParameterData();
|
||||
spmcounter_ = new spm::SpmCounters(
|
||||
@@ -153,7 +153,7 @@ void Session::Start() {
|
||||
GetFilter(GetFilterIdWithKind(ROCPROFILER_SPM_COLLECTION))->GetId(), spmparameter,
|
||||
session_id_);
|
||||
}
|
||||
if (!profiler_started_.load(std::memory_order_release)) {
|
||||
if (!profiler_started_.load(std::memory_order_acquire)) {
|
||||
profiler_ = new profiler::Profiler(
|
||||
GetFilter(GetFilterIdWithKind(ROCPROFILER_SPM_COLLECTION))->GetBufferId(),
|
||||
GetFilter(GetFilterIdWithKind(ROCPROFILER_SPM_COLLECTION))->GetId(), session_id_);
|
||||
@@ -165,7 +165,7 @@ void Session::Start() {
|
||||
if (FindFilterWithKind(ROCPROFILER_API_TRACE)) {
|
||||
std::vector<rocprofiler_tracer_activity_domain_t> domains =
|
||||
GetFilter(GetFilterIdWithKind(ROCPROFILER_API_TRACE))->GetTraceData();
|
||||
if (!tracer_started_.load(std::memory_order_release)) {
|
||||
if (!tracer_started_.load(std::memory_order_acquire)) {
|
||||
tracer_ = new tracer::Tracer(
|
||||
session_id_,
|
||||
(GetFilter(GetFilterIdWithKind(ROCPROFILER_API_TRACE))->HasCallback()
|
||||
@@ -178,7 +178,7 @@ void Session::Start() {
|
||||
}
|
||||
|
||||
if (FindFilterWithKind(ROCPROFILER_PC_SAMPLING_COLLECTION)) {
|
||||
if (!pc_sampler_started_.load(std::memory_order_release)) {
|
||||
if (!pc_sampler_started_.load(std::memory_order_acquire)) {
|
||||
pc_sampler_ = new pc_sampler::PCSampler(
|
||||
GetFilter(GetFilterIdWithKind(ROCPROFILER_PC_SAMPLING_COLLECTION))->GetBufferId(),
|
||||
GetFilter(GetFilterIdWithKind(ROCPROFILER_PC_SAMPLING_COLLECTION))->GetId(),
|
||||
@@ -189,7 +189,7 @@ void Session::Start() {
|
||||
}
|
||||
|
||||
if (FindFilterWithKind(ROCPROFILER_COUNTERS_SAMPLER)) {
|
||||
if (!counters_sampler_started_.load(std::memory_order_release)) {
|
||||
if (!counters_sampler_started_.load(std::memory_order_acquire)) {
|
||||
counters_sampler_ = new CountersSampler(
|
||||
GetFilter(GetFilterIdWithKind(ROCPROFILER_COUNTERS_SAMPLER))->GetBufferId(),
|
||||
GetFilter(GetFilterIdWithKind(ROCPROFILER_COUNTERS_SAMPLER))->GetId(), session_id_);
|
||||
@@ -214,18 +214,18 @@ void Session::Terminate() {
|
||||
if (FindFilterWithKind(ROCPROFILER_API_TRACE)) {
|
||||
std::vector<rocprofiler_tracer_activity_domain_t> domains =
|
||||
GetFilter(GetFilterIdWithKind(ROCPROFILER_API_TRACE))->GetTraceData();
|
||||
if (tracer_started_.load(std::memory_order_release)) {
|
||||
if (tracer_started_.load(std::memory_order_acquire)) {
|
||||
tracer_->StopRoctracer();
|
||||
}
|
||||
}
|
||||
if (FindFilterWithKind(ROCPROFILER_PC_SAMPLING_COLLECTION)) {
|
||||
if (pc_sampler_started_.load(std::memory_order_release)) {
|
||||
if (pc_sampler_started_.load(std::memory_order_acquire)) {
|
||||
pc_sampler_->Stop();
|
||||
}
|
||||
}
|
||||
|
||||
if (FindFilterWithKind(ROCPROFILER_COUNTERS_SAMPLER)) {
|
||||
if (counters_sampler_started_.load(std::memory_order_release)) {
|
||||
if (counters_sampler_started_.load(std::memory_order_acquire)) {
|
||||
counters_sampler_->Stop();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -67,7 +67,7 @@ std::mutex processQueueLock;
|
||||
|
||||
// rocprofiler_status_t SetDestBuffer(hsa_agent_t GPUNode, uint32_t size, uint32_t timeout) {
|
||||
// rocprofiler_status_t ret;
|
||||
// uint32_t idx = currIndex.load(std::memory_order_release);
|
||||
// uint32_t idx = currIndex.load(std::memory_order_acquire);
|
||||
// if (size) {
|
||||
// // Check if user buffer in using
|
||||
// if (spm_buffer_params[idx].addr != NULL) {
|
||||
@@ -97,7 +97,7 @@ std::mutex processQueueLock;
|
||||
// }
|
||||
// if (spm_buffer_params[idx].data_loss) std::cout << "Data Loss" << std::endl;
|
||||
// if (spm_buffer_params[idx].len) {
|
||||
// uint32_t pidx = preIndex.load(std::memory_order_release);
|
||||
// uint32_t pidx = preIndex.load(std::memory_order_acquire);
|
||||
// if (spm_buffer_params[idx].len == spm_buffer_params[pidx].size) {
|
||||
// std::cout << "Buffer completely filled with bytes" << spm_buffer_params[idx].len <<
|
||||
// std::endl; fd = fopen("SPM_rocprofiler_data.txt", "wb"); size_t retele =
|
||||
@@ -121,8 +121,8 @@ std::mutex processQueueLock;
|
||||
|
||||
// void spmBufferSetup(hsa_agent_t GPUNode) {
|
||||
// rocprofiler_status_t ret;
|
||||
// if (is_started.load(std::memory_order_release)) {
|
||||
// uint32_t idx = currIndex.load(std::memory_order_release);
|
||||
// if (is_started.load(std::memory_order_acquire)) {
|
||||
// uint32_t idx = currIndex.load(std::memory_order_acquire);
|
||||
// ret = SetDestBuffer(GPUNode, spm_buffer_params[idx].size, spm_buffer_params[idx].timeout);
|
||||
// if (ret != ROCPROFILER_STATUS_SUCCESS) {
|
||||
// std::cout << "Fail to set Dest Buf 2 "
|
||||
|
||||
@@ -46,14 +46,14 @@ Tracer::Tracer(rocprofiler_session_id_t session_id, rocprofiler_sync_callback_t
|
||||
rocprofiler_buffer_id_t buffer_id,
|
||||
std::vector<rocprofiler_tracer_activity_domain_t> domains)
|
||||
: domains_(domains), callback_(callback), buffer_id_(buffer_id), session_id_(session_id) {
|
||||
assert(!is_active_.load(std::memory_order_release) && "Error: The tracer was initialized!");
|
||||
assert(!is_active_.load(std::memory_order_acquire) && "Error: The tracer was initialized!");
|
||||
std::lock_guard<std::mutex> lock(tracer_lock_);
|
||||
callback_data_ = api_callback_data_t{callback, session_id};
|
||||
is_active_.exchange(true, std::memory_order_release);
|
||||
}
|
||||
|
||||
void Tracer::StartRoctracer() {
|
||||
if (!roctracer_initiated_.load(std::memory_order_release)) {
|
||||
if (!roctracer_initiated_.load(std::memory_order_acquire)) {
|
||||
std::map<rocprofiler_tracer_activity_domain_t, is_filtered_domain_t> domains_filteration_map;
|
||||
// TODO(aelwazir): get filter property and parse it here
|
||||
for (auto& domain : domains_) {
|
||||
@@ -68,7 +68,7 @@ void Tracer::StartRoctracer() {
|
||||
}
|
||||
|
||||
void Tracer::StopRoctracer() {
|
||||
if (roctracer_initiated_.load(std::memory_order_release)) roctracer_stop();
|
||||
if (roctracer_initiated_.load(std::memory_order_acquire)) roctracer_stop();
|
||||
}
|
||||
|
||||
void Tracer::DisableRoctracer() {
|
||||
@@ -105,7 +105,7 @@ void Tracer::DisableRoctracer() {
|
||||
}
|
||||
|
||||
Tracer::~Tracer() {
|
||||
assert(is_active_.load(std::memory_order_release) && "Error: The tracer was not initialized!");
|
||||
assert(is_active_.load(std::memory_order_acquire) && "Error: The tracer was not initialized!");
|
||||
std::lock_guard<std::mutex> lock(tracer_lock_);
|
||||
|
||||
is_active_.exchange(false, std::memory_order_release);
|
||||
|
||||
@@ -328,7 +328,7 @@ att_parsed_input_t GetATTParams() {
|
||||
int rank = (comma < line.size() - 1) ? stoi(line.substr(comma + 1)) : 0;
|
||||
|
||||
if (MPI_RANK < 0 || rank == MPI_RANK) // Only add ID if rank matches the one in input.txt
|
||||
dispatch_ids.push_back(id);
|
||||
dispatch_ids.push_back(std::max(id-1,0)); // off by 1 in relation to kernel-trace
|
||||
continue;
|
||||
}
|
||||
// param_value is a number
|
||||
@@ -416,17 +416,59 @@ void finish() {
|
||||
}
|
||||
}
|
||||
|
||||
static bool env_var_search(std::string& s) {
|
||||
std::smatch m;
|
||||
std::regex e ("(.*)\\%\\q\\{([^}]+)\\}(.*)");
|
||||
std::regex_match(s, m, e);
|
||||
|
||||
if (m.size() != 4) return false;
|
||||
|
||||
while (m.size() == 4) {
|
||||
const char* envvar = getenv(m[2].str().c_str());
|
||||
if (!envvar) envvar = "";
|
||||
s = m[1].str()+envvar+m[3].str();
|
||||
std::regex_match(s, m, e);
|
||||
};
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void env_var_replace(const char* env_name) {
|
||||
if (!env_name) return;
|
||||
const char* env = getenv(env_name);
|
||||
if (!env) return;
|
||||
|
||||
std::string new_env(env);
|
||||
if (env_var_search(new_env)) setenv(env_name, new_env.c_str(), 1);
|
||||
}
|
||||
|
||||
// load plugins
|
||||
void plugins_load() {
|
||||
// Load output plugin
|
||||
if (Dl_info dl_info; dladdr((void*)plugins_load, &dl_info) != 0) {
|
||||
const char* plugin_name = getenv("ROCPROFILER_PLUGIN_LIB");
|
||||
if (plugin_name == nullptr) {
|
||||
if (getenv("OUTPUT_PATH"))
|
||||
if (getenv("OUTPUT_PATH") || getenv("OUT_FILE_NAME"))
|
||||
plugin_name = "libfile_plugin.so";
|
||||
else
|
||||
plugin_name = "libcli_plugin.so";
|
||||
}
|
||||
env_var_replace("OUTPUT_PATH");
|
||||
env_var_replace("OUT_FILE_NAME");
|
||||
|
||||
std::string out_path = getenv("OUTPUT_PATH") ? getenv("OUTPUT_PATH") : "";
|
||||
|
||||
if (out_path.size()) {
|
||||
try {
|
||||
std::experimental::filesystem::create_directories(out_path);
|
||||
} catch (...) {}
|
||||
out_path = out_path + '/';
|
||||
}
|
||||
if (getenv("ROCPROFILER_COUNTERS")) {
|
||||
std::ofstream(out_path+"pmc.txt", std::ios::app)
|
||||
<< std::string(getenv("ROCPROFILER_COUNTERS")) << '\n';
|
||||
}
|
||||
|
||||
if (!plugin.emplace(fs::path(dl_info.dli_fname).replace_filename(plugin_name)).is_valid()) {
|
||||
plugin.reset();
|
||||
}
|
||||
|
||||
@@ -1213,7 +1213,7 @@ class VectorAddFilenameMPITest : public FilePluginTest {
|
||||
protected:
|
||||
virtual void SetUp() {
|
||||
setenv("MPI_RANK", "7", true);
|
||||
RunApplication("hip_vectoradd", " --hip-activity -d /tmp/tests-v2/file/ -o test_%rank_");
|
||||
RunApplication("hip_vectoradd", " --hip-activity -d /tmp/tests-v2/file/ -o test_%q{MPI_RANK}_");
|
||||
}
|
||||
virtual void TearDown() {
|
||||
std::experimental::filesystem::remove_all("/tmp/tests-v2/file/");
|
||||
@@ -1239,7 +1239,7 @@ class VectorAddPerfettoMPITest : public PerfettoPluginTest {
|
||||
protected:
|
||||
virtual void SetUp() {
|
||||
setenv("MPI_RANK", "7", true);
|
||||
RunApplication("hip_vectoradd", " -d /tmp/tests-v2/perfetto/ -o test_%rank_ --plugin perfetto");
|
||||
RunApplication("hip_vectoradd", " -d /tmp/tests-v2/perfetto/ -o test_%q{MPI_RANK}_ --plugin perfetto");
|
||||
}
|
||||
virtual void TearDown() {
|
||||
std::experimental::filesystem::remove_all("/tmp/tests-v2/perfetto/");
|
||||
@@ -1274,7 +1274,7 @@ class VectorAddCTFMPITest : public CTFPluginTest {
|
||||
protected:
|
||||
virtual void SetUp() {
|
||||
setenv("MPI_RANK", "7", true);
|
||||
RunApplication("hip_vectoradd", " -d /tmp/tests-v2/ctf_%rank --plugin ctf");
|
||||
RunApplication("hip_vectoradd", " -d /tmp/tests-v2/ctf_%q{MPI_RANK} --plugin ctf");
|
||||
}
|
||||
virtual void TearDown() {
|
||||
std::experimental::filesystem::remove_all("/tmp/tests-v2/");
|
||||
|
||||
Reference in New Issue
Block a user