2
0
Ficheiros
rocm-systems/projects/rocprofiler/plugin/file/file.cpp
T
Ammar ELWazir 6eb06cf201 Pull from Github
Squashed commit of the following:

commit f029195705a15700380c6f832ba5d15d46fd6de7
Author: Jonathan R. Madsen <jrmadsen@users.noreply.github.com>
Date:   Thu Jul 13 14:38:56 2023 -0500

    Formatting workflows for source (clang-format) and cmake (cmake-format) (#4)

    * Add .cmake-format.yaml file

    * Add formatting workflow

    * provide base input for creating PR

    * Update scheme for extracting branch name

    - disable running formatting on push to amd-staging branch

    * patch .cmake-format.yaml for find_package signature

    - apparently cmake-format doesn't format the full signature of find_package

    * run formatting (clang-format v11) (#7)

    Co-authored-by: jrmadsen <jrmadsen@users.noreply.github.com>

    * run cmake formatting (cmake-format) (#6)

    Co-authored-by: jrmadsen <jrmadsen@users.noreply.github.com>

    ---------

    Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>

commit bc4d135fdd8a1a9e51235f18a5d575fd2b3735e6
Author: Ammar ELWazir <aelwazir@amd.com>
Date:   Thu Jul 13 12:55:17 2023 -0500

    Removing Build cache for potential issues with auto-generated header files (#5)

    Change-Id: I9e2319f4335e2f88585ffa6fac2bd88a1c952e6e

commit ce86dea6a311d44d880fa684eb78f3329295e2a4
Author: Jonathan R. Madsen <jrmadsen@users.noreply.github.com>
Date:   Thu Jul 13 11:08:58 2023 -0500

    Fix decltype(<hsa-function>) function pointer usage (#3)

    - the following is done in several places:
        decltype(hsa_memory_allocate)* hsa_memory_allocate
    - above can cause compiler errors
    - replace decltype(<hsa-function>) with decltype(::<hsa-function>)
      - this ensures that the type within the decltype is recognized as the global scope HSA function, not the variable
    - in many places, the variable has a "_fn" suffix to prevent this issue but added '::' anyway for consistency

commit ac49fdd92a72e9c99394253a02da413a6c2e3b3a
Merge: a07946a 03a0855
Author: Ammar ELWazir <aelwazir@amd.com>
Date:   Wed Jul 12 11:36:24 2023 -0500

    Merge pull request #2 from ROCm-Developer-Tools/gerrit-amd-staging

    Pull from gerrit

commit 03a085588cffe863e8f466de67be1cfb205b675a
Merge: c26b32b a07946a
Author: Ammar ELWazir <aelwazir@amd.com>
Date:   Wed Jul 12 10:57:30 2023 -0500

    Merge branch 'amd-staging' into gerrit-amd-staging

commit a07946a5cd4c670c83c27ad1a076a9d4567ce6d7
Author: Ammar ELWazir <Ammar.ELWazir@amd.com>
Date:   Wed Jul 12 15:46:04 2023 +0000

    Enabling Cached Builds

commit 525e494a7f13941077a8fd4ad6840904db4d27d4
Author: Ammar ELWazir <Ammar.ELWazir@amd.com>
Date:   Wed Jul 12 04:53:54 2023 +0000

    Updating missed GPU Targets

commit 42c75862f628c9bee7cfb7dc04dff2619430efbc
Author: Ammar ELWazir <Ammar.ELWazir@amd.com>
Date:   Wed Jul 12 04:43:02 2023 +0000

    Adding V1 Testing

commit 9d72fd4aee85e4b0c12e717060d2730fa5b73be1
Author: Ammar ELWazir <Ammar.ELWazir@amd.com>
Date:   Wed Jul 12 03:34:31 2023 +0000

    Fixing Artifacts directory path

commit f4000cc558b3b2e4676f7994f7ce8c8e6f94518e
Author: Ammar ELWazir <Ammar.ELWazir@amd.com>
Date:   Wed Jul 12 03:27:26 2023 +0000

    Fixing CMake for test build job

commit 2ce8115d4c33948c3c8f957f545a95a04e1d6cd2
Author: Ammar ELWazir <Ammar.ELWazir@amd.com>
Date:   Wed Jul 12 03:16:18 2023 +0000

    Fixing Ubuntu CMake for ubuntu test build

commit 6d0ed439191be900748d0c025157f9d689a73ec7
Author: Ammar ELWazir <Ammar.ELWazir@amd.com>
Date:   Wed Jul 12 01:28:41 2023 +0000

    Removing Navi21

commit e349a7642e5ae5eb03ab9fcd0a0f74f09f78cab5
Author: Ammar ELWazir <Ammar.ELWazir@amd.com>
Date:   Wed Jul 12 01:14:14 2023 +0000

    Removing Navi21

commit fefd02fe68d2a4bca7ec2e381960ad004ee9fc5b
Author: Ammar ELWazir <Ammar.ELWazir@amd.com>
Date:   Wed Jul 12 00:42:48 2023 +0000

    Fixing CMake Job

commit 2ea46abf7bf92643efa8c549fa70346ffbd79d65
Author: Ammar ELWazir <Ammar.ELWazir@amd.com>
Date:   Wed Jul 12 00:35:13 2023 +0000

    Fixing CMake Job

commit d99d681ed1999c5fcf291dc678b11a77205fb0f3
Author: Ammar ELWazir <Ammar.ELWazir@amd.com>
Date:   Wed Jul 12 00:32:13 2023 +0000

    Fixing Pull Latest Dockers and CMake Jobs

commit dfc4498072d13b4a1df3a63047d34c682c3d9a29
Author: Ammar ELWazir <Ammar.ELWazir@amd.com>
Date:   Tue Jul 11 23:54:21 2023 +0000

    Fixing CMake job

commit 919efe04de707f7c702031be15c3e2c5f8442cbb
Author: Ammar ELWazir <Ammar.ELWazir@amd.com>
Date:   Tue Jul 11 23:52:13 2023 +0000

    Adding Pull Last dockers job

commit be1b1256e8b0e05308e8f7e7e69bee3acca55281
Author: Ammar ELWazir <aelwazir@amd.com>
Date:   Tue Jul 11 18:25:40 2023 -0500

    Update cmake.yml

commit 212299fa4355ae6ec18f9aaacbb79c51ea6c6f97
Author: Ammar ELWazir <aelwazir@amd.com>
Date:   Tue Jul 11 18:23:35 2023 -0500

    Update cmake.yml

commit 7c2c1327086a61466cc6cac39f70865c051a8bc7
Author: Ammar ELWazir <aelwazir@amd.com>
Date:   Tue Jul 11 18:18:53 2023 -0500

    Update cmake.yml

commit 191b5ce007e612e814c1d7a3afb4ad398f3852e1
Author: Ammar ELWazir <aelwazir@amd.com>
Date:   Tue Jul 11 16:03:22 2023 -0500

    Update cmake.yml

commit 8824113d95f3e13c7ce4d0af8e0d9d8f522a6c4a
Author: Ammar ELWazir <Ammar.ELWazir@amd.com>
Date:   Tue Jul 11 16:28:09 2023 +0000

    Fixing Pull from Gerrit job name

    Change-Id: I9e7ed9a27a13ca49d62c93bdadb30f0057e4d385

commit cc3d5e4b02ffb439e8cc2b3efa53527c376f9982
Author: Ammar ELWazir <Ammar.ELWazir@amd.com>
Date:   Tue Jul 11 16:21:43 2023 +0000

    Adding Staging sync job

    Change-Id: I0551f43878b0678ce4b3e74e27d62357cf95ad95

commit b9be2eee71380a2e6dd34d520e92d0c4209277a0
Author: Ammar ELWazir <Ammar.ELWazir@amd.com>
Date:   Tue Jul 11 15:57:11 2023 +0000

    Fixing build.sh

    Change-Id: Ia987b0244f0875370d5fe69907b3f5e9cea914de

commit 9eee33a95a1abd656a7ac5ca10a9f245e9825431
Author: Ammar ELWazir <aelwazir@amd.com>
Date:   Mon Jul 10 21:39:46 2023 -0500

    Update cmake.yml

commit 7093b85a78497140e8b52632ca2a002bdaeacd62
Author: Ammar ELWazir <aelwazir@amd.com>
Date:   Mon Jul 10 21:33:29 2023 -0500

    Update cmake.yml

commit f54697172c72a67740f9fdfa0c217b6ea6931576
Author: Ammar ELWazir <aelwazir@amd.com>
Date:   Mon Jul 10 21:01:26 2023 -0500

    Update cmake.yml

commit 1b6620e16f8940386b0f4f04e69e2410d21c0e26
Author: Ammar ELWazir <aelwazir@amd.com>
Date:   Mon Jul 10 20:21:02 2023 -0500

    Update cmake.yml

commit a94bec740c6b42c4b79c87bca20fa87b99bf060d
Author: Ammar ELWazir <aelwazir@amd.com>
Date:   Mon Jul 10 19:46:35 2023 -0500

    Update cmake.yml

commit 85d6b29d4375a69d575c18ece8542c50f2ddfcc3
Author: Ammar ELWazir <aelwazir@amd.com>
Date:   Mon Jul 10 19:34:39 2023 -0500

    Update cmake.yml

commit 8c004887cf1435f1a6214c3d2455299a8a27bd4c
Author: Ammar ELWazir <aelwazir@amd.com>
Date:   Mon Jul 10 19:31:17 2023 -0500

    Update cmake.yml

commit a14a9168e17d9348a53c6e9c9a47ba1edb4c4509
Author: Ammar ELWazir <aelwazir@amd.com>
Date:   Mon Jul 10 19:25:46 2023 -0500

    Update cmake.yml

commit 000f2f40b84e6a2f7d4becdbf5aed01436ca4c83
Author: Ammar ELWazir <aelwazir@amd.com>
Date:   Mon Jul 10 19:08:18 2023 -0500

    Update cmake.yml

commit a28a53d56731cad848fa9133d1c4dbaa8fc7afa7
Author: Ammar ELWazir <aelwazir@amd.com>
Date:   Mon Jul 10 19:03:39 2023 -0500

    Update cmake.yml

commit a6a2db01027f0b01fdfbb5997ddb772c7f51b649
Author: Ammar ELWazir <aelwazir@amd.com>
Date:   Mon Jul 10 18:21:53 2023 -0500

    Update cmake.yml

commit 118ef2a88b2d44e3207c31c343da3e5e5ec6f176
Author: Ammar ELWazir <aelwazir@amd.com>
Date:   Mon Jul 10 17:55:57 2023 -0500

    Update cmake.yml

commit 03c4c232396440cd0be6d2dd7baf4ceea1c2589d
Author: Ammar ELWazir <aelwazir@amd.com>
Date:   Mon Jul 10 17:48:49 2023 -0500

    Create cmake.yml

Change-Id: I77992f15694e77cbae49c56f9ff02f4f9079235d


[ROCm/rocprofiler commit: d4a33cf33a]
2023-07-13 20:54:30 -04:00

518 linhas
20 KiB
C++

/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include <cxxabi.h>
#include <stdarg.h>
#include <stdio.h>
#include <string.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <unistd.h>
#include <atomic>
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <experimental/filesystem>
#include <fstream>
#include <iostream>
#include <memory>
#include <optional>
#include <ostream>
#include <sstream>
#include <string>
#include <hsa/hsa.h>
#include <mutex>
#include <unordered_map>
#include "rocprofiler.h"
#include "rocprofiler_plugin.h"
#include "../utils.h"
namespace fs = std::experimental::filesystem;
namespace {
std::vector<std::string> GetCounterNames() {
std::vector<std::string> counters;
const char* line_c_str = getenv("ROCPROFILER_COUNTERS");
if (line_c_str) {
std::string line = line_c_str;
// skip commented lines
auto found = line.find_first_not_of(" \t");
if (found != std::string::npos) {
if (line[found] == '#') return {};
}
if (line.find("pmc") == std::string::npos) return counters;
char seperator = ' ';
std::string::size_type prev_pos = 0, pos = line.find(seperator, prev_pos);
prev_pos = ++pos;
if (pos != std::string::npos) {
while ((pos = line.find(seperator, pos)) != std::string::npos) {
std::string substring(line.substr(prev_pos, pos - prev_pos));
if (substring.length() > 0 && substring != ":") {
counters.push_back(substring);
}
prev_pos = ++pos;
}
if (!line.substr(prev_pos, pos - prev_pos).empty()) {
counters.push_back(line.substr(prev_pos, pos - prev_pos));
}
}
}
return counters;
}
static std::string output_file_name;
class file_plugin_t {
private:
enum class output_type_t { COUNTER, TRACER, PC_SAMPLING };
class output_file_t {
public:
output_file_t(std::string name, bool bOpenOnInit = false) : name_(std::move(name)) {
if (bOpenOnInit) open();
}
std::string name() const { return name_; }
template <typename T> std::ostream& operator<<(T&& value) {
if (!is_open()) open();
return stream_ << std::forward<T>(value);
}
std::ostream& operator<<(std::ostream& (*func)(std::ostream&)) {
if (!is_open()) open();
return stream_ << func;
}
void open() {
// If the stream is already in the failed state, there's no need to try
// to open the file.
if (fail()) return;
const char* output_dir = getenv("OUTPUT_PATH");
output_file_name = getenv("OUT_FILE_NAME") ? std::string(getenv("OUT_FILE_NAME")) : "";
if (output_dir == nullptr && getenv("OUT_FILE_NAME") == nullptr) {
stream_.copyfmt(std::cout);
stream_.clear(std::cout.rdstate());
stream_.basic_ios<char>::rdbuf(std::cout.rdbuf());
bPrintToStdout = true;
return;
}
if (output_dir == nullptr) output_dir = ".";
fs::path output_prefix(output_dir);
if (!fs::is_directory(fs::status(output_prefix))) {
if (!stream_.fail()) rocprofiler::warning("Cannot open output directory '%s'", output_dir);
stream_.setstate(std::ios_base::failbit);
return;
}
output_file_name = replace_MPI_macros(output_file_name);
std::stringstream ss;
ss << name_ << "_" << ((output_file_name.empty()) ? std::to_string(GetPid()) : "")
<< output_file_name << ".csv";
std::cout << "Results File: " << output_prefix / ss.str() << std::endl;
stream_.open(output_prefix / ss.str());
}
bool is_open() const { return stream_.is_open(); }
bool fail() const { return stream_.fail(); }
bool isStdOut() const { return bPrintToStdout; }
// Returns a string with the MPI %macro replaced with the corresponding envvar
std::string replace_MPI_macros(std::string output_file_name) {
std::unordered_map<const char*, const char*> MPI_BUILTINS = {
{"MPI_RANK", "%rank"},
{"OMPI_COMM_WORLD_RANK", "%rank"},
{"MV2_COMM_WORLD_RANK", "%rank"}};
for (const auto& [envvar, key] : MPI_BUILTINS) {
size_t key_find = output_file_name.rfind(key);
if (key_find == std::string::npos) continue; // Does not contain a %?rank var
const char* env_var_set = getenv(envvar);
if (env_var_set == nullptr) continue; // MPI_COMM_WORLD_x var is does not exist
int rank = atoi(env_var_set);
output_file_name = output_file_name.substr(0, key_find) + std::to_string(rank) +
output_file_name.substr(key_find + std::string(key).size());
}
return output_file_name;
}
private:
const std::string name_;
std::ofstream stream_;
bool bPrintToStdout = false;
};
output_file_t* get_output_file(output_type_t output_type, uint32_t domain = 0) {
switch (output_type) {
case output_type_t::COUNTER:
return &output_file_;
case output_type_t::TRACER:
switch (domain) {
case ACTIVITY_DOMAIN_ROCTX:
return &roctx_file_;
case ACTIVITY_DOMAIN_HSA_API:
return &hsa_api_file_;
case ACTIVITY_DOMAIN_HIP_API:
return &hip_api_file_;
case ACTIVITY_DOMAIN_HIP_OPS:
return &hip_activity_file_;
case ACTIVITY_DOMAIN_HSA_OPS:
return &hsa_async_copy_file_;
default:
assert(!"domain/op not supported!");
break;
}
break;
case output_type_t::PC_SAMPLING:
return &pc_sample_file_;
}
return nullptr;
}
public:
file_plugin_t(void* data) {
if (data) counter_names_ = GetCounterNames();
valid_ = true;
}
void WriteHeader(output_type_t type, rocprofiler_tracer_activity_domain_t domain) {
output_file_t* output_file;
switch (domain) {
case ACTIVITY_DOMAIN_HSA_API: {
if (hsa_api_header_written_.load(std::memory_order_relaxed)) return;
output_file = get_output_file(output_type_t::TRACER, ACTIVITY_DOMAIN_HSA_API);
*output_file << "Domain,Function,Start_Timestamp,End_Timestamp,Correlation_ID" << std::endl;
*output_file << std::endl;
hsa_api_header_written_.exchange(true, std::memory_order_release);
return;
}
case ACTIVITY_DOMAIN_HIP_API: {
if (hip_api_header_written_.load(std::memory_order_relaxed)) return;
output_file = get_output_file(output_type_t::TRACER, ACTIVITY_DOMAIN_HIP_API);
*output_file << "Domain,Function,Start_Timestamp,End_Timestamp,Correlation_ID" << std::endl;
*output_file << std::endl;
hip_api_header_written_.exchange(true, std::memory_order_release);
return;
}
case ACTIVITY_DOMAIN_ROCTX: {
if (roctx_header_written_.load(std::memory_order_relaxed)) return;
output_file = get_output_file(output_type_t::TRACER, ACTIVITY_DOMAIN_ROCTX);
*output_file << "Domain,ROCTX_ID,Message,Timestamp" << std::endl;
*output_file << std::endl;
roctx_header_written_.exchange(true, std::memory_order_release);
return;
}
case ACTIVITY_DOMAIN_HSA_OPS: {
if (hsa_async_copy_header_written_.load(std::memory_order_relaxed)) return;
output_file = get_output_file(output_type_t::TRACER, ACTIVITY_DOMAIN_HSA_OPS);
*output_file << "Domain,Operation,Start_Timestamp,Stop_Timestamp,Correlation_ID"
<< std::endl;
*output_file << std::endl;
hsa_async_copy_header_written_.exchange(true, std::memory_order_release);
return;
}
case ACTIVITY_DOMAIN_HIP_OPS: {
if (hip_activity_header_written_.load(std::memory_order_relaxed)) return;
output_file = get_output_file(output_type_t::TRACER, ACTIVITY_DOMAIN_HIP_OPS);
*output_file << "Domain,Operation,Kernel_Name,Start_Timestamp,Stop_Timestamp,"
"Correlation_ID"
<< std::endl;
*output_file << std::endl;
hip_activity_header_written_.exchange(true, std::memory_order_release);
return;
}
default: {
if (type == output_type_t::COUNTER) {
if (kernel_dispatches_header_written_.load(std::memory_order_relaxed)) return;
output_file = get_output_file(output_type_t::COUNTER);
*output_file
<< "Dispatch_ID,GPU_ID,Queue_ID,Queue_Index,PID,TID,GRD,WGR,LDS,SCR,Arch_VGPR,"
"ACCUM_VGPR,SGPR,Wave_Size,SIG,OBJ,Kernel_Name,Start_Timestamp,End_Timestamp,"
"Correlation_ID";
if (counter_names_.size() > 0) {
for (uint32_t i = 0; i < counter_names_.size(); i++)
*output_file << "," << counter_names_[i];
}
*output_file << std::endl;
*output_file << std::endl;
kernel_dispatches_header_written_.exchange(true, std::memory_order_release);
return;
} else if (type == output_type_t::PC_SAMPLING) {
if (pc_sample_header_written_.load(std::memory_order_relaxed)) return;
output_file = get_output_file(output_type_t::PC_SAMPLING);
*output_file << "Dispatch_ID,Timestamp,GPU_ID,PC_Sample,Shader_Engines" << std::endl;
*output_file << std::endl;
pc_sample_header_written_.exchange(true, std::memory_order_release);
return;
}
return;
}
}
}
std::mutex writing_lock;
const char* GetDomainName(rocprofiler_tracer_activity_domain_t domain) {
switch (domain) {
case ACTIVITY_DOMAIN_ROCTX:
return "ROCTX_DOMAIN";
break;
case ACTIVITY_DOMAIN_HIP_API:
return "HIP_API_DOMAIN";
break;
case ACTIVITY_DOMAIN_HIP_OPS:
return "HIP_OPS_DOMAIN";
break;
case ACTIVITY_DOMAIN_HSA_API:
return "HSA_API_DOMAIN";
break;
case ACTIVITY_DOMAIN_HSA_OPS:
return "HSA_OPS_DOMAIN";
break;
case ACTIVITY_DOMAIN_HSA_EVT:
return "HSA_EVT_DOMAIN";
break;
default:
return "";
}
}
void FlushTracerRecord(rocprofiler_record_tracer_t tracer_record,
rocprofiler_session_id_t session_id,
rocprofiler_buffer_id_t buffer_id = rocprofiler_buffer_id_t{0}) {
std::lock_guard<std::mutex> lock(writing_lock);
if (tracer_record.timestamps.end.value <= 0 && tracer_record.domain != ACTIVITY_DOMAIN_ROCTX)
return;
WriteHeader(output_type_t::TRACER, tracer_record.domain);
std::string roctx_message;
if (tracer_record.domain == ACTIVITY_DOMAIN_ROCTX && tracer_record.name) {
roctx_message = tracer_record.name;
}
const char* operation_name_c = nullptr;
// ROCTX domain Operation ID doesn't have a name
// It depends on the user input of the roctx functions.
// ROCTX message is the tracer_record.name
if (tracer_record.domain != ACTIVITY_DOMAIN_ROCTX) {
CHECK_ROCPROFILER(rocprofiler_query_tracer_operation_name(
tracer_record.domain, tracer_record.operation_id, &operation_name_c));
}
output_file_t* output_file = get_output_file(output_type_t::TRACER, tracer_record.domain);
*output_file << GetDomainName(tracer_record.domain);
if (tracer_record.domain == ACTIVITY_DOMAIN_ROCTX && tracer_record.external_id.id >= 0)
*output_file << "," << tracer_record.external_id.id;
if (tracer_record.domain == ACTIVITY_DOMAIN_ROCTX) {
if (roctx_message.size() > 1)
*output_file << ",\"" << roctx_message << "\"";
else
*output_file << ",";
}
if (operation_name_c) *output_file << ",\"" << operation_name_c << "\"";
if (tracer_record.name && tracer_record.domain != ACTIVITY_DOMAIN_ROCTX) {
*output_file << ",\"" << rocprofiler::cxx_demangle(tracer_record.name) << "\"";
} else if (tracer_record.domain == ACTIVITY_DOMAIN_HIP_OPS) {
*output_file << ",";
}
if (tracer_record.domain != ACTIVITY_DOMAIN_ROCTX) {
*output_file << "," << tracer_record.timestamps.begin.value << ","
<< tracer_record.timestamps.end.value;
*output_file << "," << tracer_record.correlation_id.value;
} else {
*output_file << "," << tracer_record.timestamps.begin.value;
}
*output_file << std::endl;
}
void FlushProfilerRecord(const rocprofiler_record_profiler_t* profiler_record,
rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id) {
std::lock_guard<std::mutex> lock(writing_lock);
WriteHeader(output_type_t::COUNTER, ACTIVITY_DOMAIN_NUMBER);
size_t name_length = 0;
output_file_t* output_file{nullptr};
output_file = get_output_file(output_type_t::COUNTER);
CHECK_ROCPROFILER(rocprofiler_query_kernel_info_size(ROCPROFILER_KERNEL_NAME,
profiler_record->kernel_id, &name_length));
// Taken from rocprofiler: The size hasn't changed in recent past
static const uint32_t lds_block_size = 128 * 4;
const char* kernel_name_c = nullptr;
if (name_length > 1) {
CHECK_ROCPROFILER(rocprofiler_query_kernel_info(ROCPROFILER_KERNEL_NAME,
profiler_record->kernel_id, &kernel_name_c));
}
*output_file << std::to_string(profiler_record->header.id.handle) << ","
<< std::to_string(profiler_record->gpu_id.handle) << ","
<< std::to_string(profiler_record->queue_id.handle) << ","
<< std::to_string(profiler_record->queue_idx.value) << ","
<< std::to_string(GetPid()) << ","
<< std::to_string(profiler_record->thread_id.value);
*output_file << "," << std::to_string(profiler_record->kernel_properties.grid_size) << ","
<< std::to_string(profiler_record->kernel_properties.workgroup_size) << ","
<< std::to_string(
((profiler_record->kernel_properties.lds_size + (lds_block_size - 1)) &
~(lds_block_size - 1)))
<< "," << std::to_string(profiler_record->kernel_properties.scratch_size) << ","
<< std::to_string(profiler_record->kernel_properties.arch_vgpr_count) << ","
<< std::to_string(profiler_record->kernel_properties.accum_vgpr_count) << ","
<< std::to_string(profiler_record->kernel_properties.sgpr_count) << ","
<< std::to_string(profiler_record->kernel_properties.wave_size) << ","
<< std::to_string(profiler_record->kernel_properties.signal_handle);
std::string kernel_name = "";
if (name_length > 1) {
kernel_name = rocprofiler::cxx_demangle(kernel_name_c);
std::string key = "\"";
std::size_t found = kernel_name.rfind(key);
while (found != std::string::npos) {
kernel_name.replace(found, key.length(), "'");
found = kernel_name.rfind(key, found - 1);
}
}
*output_file << "," << std::to_string(profiler_record->kernel_id.handle) << ",\"" << kernel_name
<< "\"," << std::to_string(profiler_record->timestamps.begin.value) << ","
<< std::to_string(profiler_record->timestamps.end.value) << ","
<< std::to_string(profiler_record->correlation_id.value);
// For Counters
if (profiler_record->counters) {
for (uint64_t i = 0; i < profiler_record->counters_count.value; i++) {
if (profiler_record->counters[i].counter_handler.handle > 0) {
*output_file << "," << std::to_string(profiler_record->counters[i].value.value);
}
}
}
*output_file << '\n';
if (kernel_name_c) {
free(const_cast<char*>(kernel_name_c));
}
}
void FlushPCSamplingRecord(const rocprofiler_record_pc_sample_t* pc_sampling_record) {
WriteHeader(output_type_t::PC_SAMPLING, ACTIVITY_DOMAIN_NUMBER);
output_file_t* output_file{nullptr};
output_file = get_output_file(output_type_t::PC_SAMPLING);
const auto& sample = pc_sampling_record->pc_sample;
*output_file << sample.dispatch_id.value << "," << sample.timestamp.value << ","
<< sample.gpu_id.handle << "," << std::hex << std::showbase << sample.pc << ","
<< sample.se << std::endl;
}
int WriteBufferRecords(const rocprofiler_record_header_t* begin,
const rocprofiler_record_header_t* end,
rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id) {
while (begin < end) {
if (!begin) return 0;
switch (begin->kind) {
case ROCPROFILER_PROFILER_RECORD: {
const rocprofiler_record_profiler_t* profiler_record =
reinterpret_cast<const rocprofiler_record_profiler_t*>(begin);
FlushProfilerRecord(profiler_record, session_id, buffer_id);
break;
}
case ROCPROFILER_TRACER_RECORD: {
rocprofiler_record_tracer_t* tracer_record = const_cast<rocprofiler_record_tracer_t*>(
reinterpret_cast<const rocprofiler_record_tracer_t*>(begin));
FlushTracerRecord(*tracer_record, session_id, buffer_id);
break;
}
case ROCPROFILER_ATT_TRACER_RECORD: {
break;
}
case ROCPROFILER_PC_SAMPLING_RECORD: {
const rocprofiler_record_pc_sample_t* pc_sampling_record =
reinterpret_cast<const rocprofiler_record_pc_sample_t*>(begin);
FlushPCSamplingRecord(pc_sampling_record);
break;
}
default:
break;
}
rocprofiler_next_record(begin, &begin, session_id, buffer_id);
}
return 0;
}
bool is_valid() const { return valid_; }
private:
bool valid_{false};
std::vector<std::string> counter_names_;
std::atomic<bool> roctx_header_written_{false}, hsa_api_header_written_{false},
hip_api_header_written_{false}, hip_activity_header_written_{false},
hsa_async_copy_header_written_{false}, pc_sample_header_written_{false},
kernel_dispatches_header_written_{false};
output_file_t roctx_file_{"roctx_trace"}, hsa_api_file_{"hsa_api_trace"},
hip_api_file_{"hip_api_trace"}, hip_activity_file_{"hcc_ops_trace"},
hsa_async_copy_file_{"async_copy_trace"}, pc_sample_file_{"pcs_trace"},
output_file_{"results"};
};
file_plugin_t* file_plugin = nullptr;
} // namespace
ROCPROFILER_EXPORT int rocprofiler_plugin_initialize(uint32_t rocprofiler_major_version,
uint32_t rocprofiler_minor_version,
void* data) {
if (rocprofiler_major_version != ROCPROFILER_VERSION_MAJOR ||
rocprofiler_minor_version < ROCPROFILER_VERSION_MINOR)
return -1;
if (file_plugin != nullptr) return -1;
file_plugin = new file_plugin_t(data);
if (file_plugin->is_valid()) return 0;
// The plugin failed to initialized, destroy it and return an error.
delete file_plugin;
file_plugin = nullptr;
return -1;
}
ROCPROFILER_EXPORT void rocprofiler_plugin_finalize() {
if (!file_plugin) return;
delete file_plugin;
file_plugin = nullptr;
}
ROCPROFILER_EXPORT int rocprofiler_plugin_write_buffer_records(
const rocprofiler_record_header_t* begin, const rocprofiler_record_header_t* end,
rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id) {
if (!file_plugin || !file_plugin->is_valid()) return -1;
return file_plugin->WriteBufferRecords(begin, end, session_id, buffer_id);
}
ROCPROFILER_EXPORT int rocprofiler_plugin_write_record(rocprofiler_record_tracer_t record) {
if (!file_plugin || !file_plugin->is_valid()) return -1;
if (record.header.id.handle == 0) return 0;
file_plugin->FlushTracerRecord(record, rocprofiler_session_id_t{0}, rocprofiler_buffer_id_t{0});
return 0;
}