Dateien
rocm-systems/projects/rocprofiler/plugin/ctf/plugin.cpp
T
Ammar ELWazir de4abd0d0f Adding rocprofilerv2
Change-Id: Ic0cc280ba207d2b8f6ccae1cd4ac3184152fc1ad


[ROCm/rocprofiler commit: 8032adb64f]
2023-03-09 13:20:33 +00:00

870 Zeilen
31 KiB
C++

/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include <cassert>
#include <cstdlib>
#include <sstream>
#include <stdexcept>
#include <iostream>
#include <utility>
#include <string>
#include <memory>
#include <limits>
#include <fstream>
#include <experimental/filesystem>
#include <time.h>
#include <hsa/hsa.h>
#include <hsa/hsa_ext_amd.h>
#include "hsa_prof_str.h"
#include <hip/hip_runtime.h>
#include <hip/amd_detail/hip_prof_str.h>
#include "rocprofiler.h"
#include "rocprofiler_plugin.h"
#include "../utils.h"
#include "barectf.h"
#include "barectf_event_record.h"
#include "barectf_tracer.h"
#include "plugin.h"
namespace fs = std::experimental::filesystem;
namespace rocm_ctf {
namespace {
// Abstract tracer event record using the barectf context type `CtxT`.
template <typename CtxT> class TracerEventRecord : public BarectfEventRecord<CtxT> {
protected:
explicit TracerEventRecord(const rocprofiler_record_tracer_t& record, const std::uint64_t clock_val)
: BarectfEventRecord<CtxT>{clock_val},
op_{record.operation_id.id},
thread_id_{record.thread_id.value},
queue_id_{record.queue_id.handle},
agent_id_{record.agent_id.handle},
correlation_id_{record.correlation_id.value} {}
std::uint32_t GetOp() const noexcept { return op_; }
std::uint32_t GetThreadId() const noexcept { return thread_id_; }
std::uint64_t GetQueueId() const noexcept { return queue_id_; }
std::uint64_t GetAgentId() const noexcept { return agent_id_; }
std::uint64_t GetCorrelationId() const noexcept { return correlation_id_; }
private:
std::uint32_t op_;
std::uint32_t thread_id_;
std::uint64_t queue_id_;
std::uint64_t agent_id_;
std::uint64_t correlation_id_;
};
// Returns the beginning clock value of the tracer or profiler record
// `record`.
template <typename RecordT> std::uint64_t GetRecordBeginClockVal(const RecordT& record) {
return record.timestamps.begin.value;
}
// Returns the end clock value of the tracer or profiler record
// `record`.
template <typename RecordT> std::uint64_t GetRecordEndClockVal(const RecordT& record) {
return record.timestamps.end.value;
}
// Queries allocated string data using the size query function
// `query_size_func` and the data query function `query_data_func`,
// returning the corresponding string and freeing temporary allocated
// memory.
//
// Returns an empty string if anything goes wrong.
template <typename QuerySizeFuncT, typename QueryDataFuncT>
std::string QueryAllocStr(QuerySizeFuncT&& query_size_func, QueryDataFuncT&& query_data_func) {
// Query size first.
std::size_t size = 0;
[[maybe_unused]] auto ret = query_size_func(&size);
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query size");
if (size == 0) {
// No size: return empty string.
return {};
}
// Query data (allocated by query_data_func()).
char* alloc_str = nullptr;
ret = query_data_func(&alloc_str);
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query data");
if (!alloc_str) {
// No data: return empty string.
return {};
}
// Allocate return value.
std::string str_ret{alloc_str};
// Free allocated data.
std::free(alloc_str);
// Return string object.
return str_ret;
}
// rocTX event record.
class RocTxEventRecord final : public TracerEventRecord<barectf_roctx_ctx> {
public:
explicit RocTxEventRecord(const rocprofiler_record_tracer_t& record,
const rocprofiler_session_id_t session_id)
: TracerEventRecord<barectf_roctx_ctx>{record, GetRecordBeginClockVal(record)},
id_{QueryId(record, session_id)},
msg_{QueryMsg(record, session_id)} {}
void Write(barectf_roctx_ctx& barectf_ctx) const override {
barectf_roctx_trace_roctx(&barectf_ctx, GetThreadId(), id_, msg_.c_str());
}
private:
// Queries and returns the rocTX message of the record `record` and
// session ID `session_id`.
//
// Returns an empty string if not available.
static std::string QueryMsg(const rocprofiler_record_tracer_t& record,
const rocprofiler_session_id_t session_id) {
// Query size first.
std::size_t msg_size = 0;
[[maybe_unused]] auto ret = rocprofiler_query_roctx_tracer_api_data_info_size(
session_id, ROCPROFILER_ROCTX_MESSAGE, record.api_data_handle, record.operation_id,
&msg_size);
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query rocTX message size");
if (msg_size == 0) {
// No size: return empty string.
return {};
}
// Query data (borrowed from the record: no need to free).
char* msg = nullptr;
ret = rocprofiler_query_roctx_tracer_api_data_info(
session_id, ROCPROFILER_ROCTX_MESSAGE, record.api_data_handle, record.operation_id, &msg);
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query rocTX message");
if (!msg) {
// No data: return empty string.
return {};
}
return rocmtools::cxx_demangle(msg);
}
// Queries and returns the rocTX ID of the record `record` and the
// session ID `session_id`.
//
// Returns 0 if anything goes wrong.
static std::uint64_t QueryId(const rocprofiler_record_tracer_t& record,
const rocprofiler_session_id_t session_id) {
try {
return std::stoull(QueryAllocStr(
[&record, session_id](const auto size) {
return rocprofiler_query_roctx_tracer_api_data_info_size(
session_id, ROCPROFILER_ROCTX_ID, record.api_data_handle, record.operation_id, size);
},
[&record, session_id](const auto str) {
return rocprofiler_query_roctx_tracer_api_data_info(
session_id, ROCPROFILER_ROCTX_ID, record.api_data_handle, record.operation_id, str);
}));
} catch (...) {
return 0;
}
}
std::uint64_t id_;
std::string msg_;
};
// Abstract HSA API event record.
class HsaApiEventRecord : public TracerEventRecord<barectf_hsa_api_ctx> {
protected:
explicit HsaApiEventRecord(const rocprofiler_record_tracer_t& record,
const rocprofiler_session_id_t session_id, const std::uint64_t clock_val)
: TracerEventRecord<barectf_hsa_api_ctx>{record, clock_val},
api_data_{QueryApiData(record, session_id)} {}
const hsa_api_data_t& GetApiData() const noexcept { return api_data_; }
private:
// Queries and returns the API data of the record `record` and session
// ID `session_id`.
static const hsa_api_data_t& QueryApiData(const rocprofiler_record_tracer_t& record,
const rocprofiler_session_id_t session_id) {
// Query size first (only for assertions).
[[maybe_unused]] std::size_t size = 0;
[[maybe_unused]] auto ret = rocprofiler_query_hsa_tracer_api_data_info_size(
session_id, ROCPROFILER_HSA_API_DATA, record.api_data_handle, record.operation_id, &size);
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query HSA API data size");
assert(size > 0);
// Query data (borrowed from the record).
char* data = nullptr;
ret = rocprofiler_query_hsa_tracer_api_data_info(
session_id, ROCPROFILER_HSA_API_DATA, record.api_data_handle, record.operation_id, &data);
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query HSA API data");
assert(data);
// Reinterpret as an HSA API data pointer.
return *reinterpret_cast<const hsa_api_data_t*>(data);
}
hsa_api_data_t api_data_;
};
// HSA API event record (beginning).
class HsaApiEventRecordBegin final : public HsaApiEventRecord {
public:
explicit HsaApiEventRecordBegin(const rocprofiler_record_tracer_t& record,
const rocprofiler_session_id_t session_id)
: HsaApiEventRecord{record, session_id, GetRecordBeginClockVal(record)} {}
void Write(barectf_hsa_api_ctx& barectf_ctx) const override {
// Include generated switch statement.
#include "hsa_begin.cpp.i"
}
};
// HSA API event record (end).
class HsaApiEventRecordEnd final : public HsaApiEventRecord {
public:
explicit HsaApiEventRecordEnd(const rocprofiler_record_tracer_t& record,
const rocprofiler_session_id_t session_id)
: HsaApiEventRecord{record, session_id, GetRecordEndClockVal(record)} {}
void Write(barectf_hsa_api_ctx& barectf_ctx) const override {
// Include generated switch statement.
#include "hsa_end.cpp.i"
}
};
// Abstract HIP API event record.
class HipApiEventRecord : public TracerEventRecord<barectf_hip_api_ctx> {
protected:
explicit HipApiEventRecord(const rocprofiler_record_tracer_t& record,
const rocprofiler_session_id_t session_id, const std::uint64_t clock_val)
: TracerEventRecord<barectf_hip_api_ctx>{record, clock_val},
api_data_{QueryApiData(record, session_id)},
kernel_name_{QueryKernelName(record, session_id)} {}
const hip_api_data_t& GetApiData() const noexcept { return api_data_; }
const std::string& GetKernelName() const noexcept { return kernel_name_; }
private:
// Queries and returns the API data of the record `record` and session
// ID `session_id`.
static const hip_api_data_t& QueryApiData(const rocprofiler_record_tracer_t& record,
const rocprofiler_session_id_t session_id) {
// Query size first (only for assertions).
[[maybe_unused]] std::size_t size = 0;
[[maybe_unused]] auto ret = rocprofiler_query_hip_tracer_api_data_info_size(
session_id, ROCPROFILER_HIP_API_DATA, record.api_data_handle, record.operation_id, &size);
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query HIP API data size");
assert(size > 0);
// Query data (borrowed from the record).
char* data = nullptr;
ret = rocprofiler_query_hip_tracer_api_data_info(
session_id, ROCPROFILER_HIP_API_DATA, record.api_data_handle, record.operation_id, &data);
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query HIP API data");
assert(data);
// Reinterpret as an HIP API data pointer.
return *reinterpret_cast<const hip_api_data_t*>(data);
}
// Queries and returns the kernel name of the record `record` and
// session ID `session_id`.
//
// Returns an empty string if not available.
static std::string QueryKernelName(const rocprofiler_record_tracer_t& record,
const rocprofiler_session_id_t session_id) {
const auto kernel_name = QueryAllocStr(
[&record, session_id](const auto size) {
return rocprofiler_query_hip_tracer_api_data_info_size(
session_id, ROCPROFILER_HIP_KERNEL_NAME, record.api_data_handle, record.operation_id,
size);
},
[&record, session_id](const auto str) {
return rocprofiler_query_hip_tracer_api_data_info(session_id, ROCPROFILER_HIP_KERNEL_NAME,
record.api_data_handle,
record.operation_id, str);
});
if (kernel_name.size() > 1) {
// Return demangled version.
return rocmtools::cxx_demangle(kernel_name);
}
return kernel_name;
}
hip_api_data_t api_data_;
std::string kernel_name_;
};
// HIP API event record (beginning).
class HipApiEventRecordBegin final : public HipApiEventRecord {
public:
explicit HipApiEventRecordBegin(const rocprofiler_record_tracer_t& record,
const rocprofiler_session_id_t session_id)
: HipApiEventRecord{record, session_id, GetRecordBeginClockVal(record)} {}
void Write(barectf_hip_api_ctx& barectf_ctx) const override {
// Include generated switch statement.
#include "hip_begin.cpp.i"
}
};
// HIP API event record (end).
class HipApiEventRecordEnd final : public HipApiEventRecord {
public:
explicit HipApiEventRecordEnd(const rocprofiler_record_tracer_t& record,
const rocprofiler_session_id_t session_id)
: HipApiEventRecord{record, session_id, GetRecordEndClockVal(record)} {}
void Write(barectf_hip_api_ctx& barectf_ctx) const override {
// Include generated switch statement.
#include "hip_end.cpp.i"
}
};
// HSA API handle type event record.
class HsaHandleTypeEventRecord final : public BarectfEventRecord<barectf_hsa_handles_ctx> {
public:
enum class Type {
CPU = 0,
GPU = 1,
};
explicit HsaHandleTypeEventRecord(const std::uint64_t handle, const Type type)
: BarectfEventRecord<barectf_hsa_handles_ctx>{0}, handle_{handle}, type_{type} {}
void Write(barectf_hsa_handles_ctx& barectf_ctx) const override {
barectf_hsa_handles_trace_hsa_handle_type(&barectf_ctx, handle_,
static_cast<std::uint8_t>(type_));
}
private:
std::uint64_t handle_;
Type type_;
};
// Abstract API operation event record.
class ApiOpEventRecord : public TracerEventRecord<barectf_api_ops_ctx> {
protected:
explicit ApiOpEventRecord(const rocprofiler_record_tracer_t& record, const std::uint64_t clock_val)
: TracerEventRecord<barectf_api_ops_ctx>{record, clock_val} {}
};
// HSA API operation event record (beginning).
class HsaOpEventRecordBegin final : public ApiOpEventRecord {
public:
explicit HsaOpEventRecordBegin(const rocprofiler_record_tracer_t& record)
: ApiOpEventRecord{record, GetRecordBeginClockVal(record)} {}
void Write(barectf_api_ops_ctx& barectf_ctx) const override {
barectf_api_ops_trace_hsa_op_begin(&barectf_ctx, GetThreadId(), GetQueueId(), GetAgentId(),
GetCorrelationId());
}
};
// HSA API operation event record (end).
class HsaOpEventRecordEnd final : public ApiOpEventRecord {
public:
explicit HsaOpEventRecordEnd(const rocprofiler_record_tracer_t& record)
: ApiOpEventRecord{record, GetRecordEndClockVal(record)} {}
void Write(barectf_api_ops_ctx& barectf_ctx) const override {
barectf_api_ops_trace_hsa_op_end(&barectf_ctx, GetThreadId(), GetQueueId(), GetAgentId(),
GetCorrelationId());
}
};
// HIP API operation event record (beginning).
class HipOpEventRecordBegin final : public ApiOpEventRecord {
public:
explicit HipOpEventRecordBegin(const rocprofiler_record_tracer_t& record)
: ApiOpEventRecord{record, GetRecordBeginClockVal(record)},
kernel_name_{QueryKernelName(record)} {}
void Write(barectf_api_ops_ctx& barectf_ctx) const override {
barectf_api_ops_trace_hip_op_begin(&barectf_ctx, GetThreadId(), GetQueueId(), GetAgentId(),
GetCorrelationId(), kernel_name_.c_str());
}
private:
// Queries and returns the kernel name of the record `record`.
//
// Returns an empty string if not available.
static std::string QueryKernelName(const rocprofiler_record_tracer_t& record) {
if (record.operation_id.id == 0) {
if (const auto api_handle = record.api_data_handle.handle) {
const auto str = reinterpret_cast<const char*>(api_handle);
if (std::strlen(str) > 1) {
// Return demangled version.
return rocmtools::cxx_demangle(str);
}
}
}
return {};
}
std::string kernel_name_;
};
// HIP API operation event record (end).
class HipOpEventRecordEnd final : public ApiOpEventRecord {
public:
explicit HipOpEventRecordEnd(const rocprofiler_record_tracer_t& record)
: ApiOpEventRecord{record, GetRecordEndClockVal(record)} {}
void Write(barectf_api_ops_ctx& barectf_ctx) const override {
barectf_api_ops_trace_hip_op_end(&barectf_ctx, GetThreadId(), GetQueueId(), GetAgentId(),
GetCorrelationId());
}
};
// Profiler record base.
class ProfilerEventRecord : public BarectfEventRecord<barectf_profiler_ctx> {
public:
explicit ProfilerEventRecord(const rocprofiler_record_profiler_t& record,
const rocprofiler_session_id_t session_id)
: BarectfEventRecord<barectf_profiler_ctx>{GetRecordBeginClockVal(record)},
dispatch_{record.header.id.handle},
gpu_id_{record.gpu_id.handle},
queue_id_{record.queue_id.handle},
queue_index_{record.queue_idx.value},
process_id_{GetPid()},
thread_id_{record.thread_id.value},
kernel_id_{record.kernel_id.handle},
kernel_name_{QueryKernelName(record)},
counter_infos_{QueryCounterInfos(record, session_id)} {}
void Write(barectf_profiler_ctx& barectf_ctx) const override {
barectf_profiler_trace_profiler_record(
&barectf_ctx, dispatch_, gpu_id_, queue_id_, queue_index_, process_id_, thread_id_,
kernel_id_, kernel_name_.c_str(), counter_infos_.names.size(), counter_infos_.names.data(),
counter_infos_.values.size(), counter_infos_.values.data());
}
protected:
// Counter infos.
//
// `names[i]` names the counter value `values[i]`.
struct CounterInfos final {
// `names_storage` owns the strings while the elements of `names`
// point to the internal C strings of `names_storage`.
//
// This is needed because barectf expects an array of contiguous
// C string pointers.
std::vector<std::string> names_storage;
std::vector<const char*> names;
// Counter values.
std::vector<std::uint64_t> values;
};
std::uint64_t GetDispatch() const noexcept { return dispatch_; }
std::uint64_t GetGpuId() const noexcept { return gpu_id_; }
std::uint64_t GetQueueId() const noexcept { return queue_id_; }
std::uint64_t GetQueueIndex() const noexcept { return queue_index_; }
std::uint32_t GetProcessId() const noexcept { return process_id_; }
std::uint32_t GetThreadId() const noexcept { return thread_id_; }
std::uint64_t GetKernelId() const noexcept { return kernel_id_; }
const std::string& GetKernelName() const noexcept { return kernel_name_; }
const CounterInfos& GetCounterInfos() const noexcept { return counter_infos_; }
private:
// Queries and returns the kernel name of the record `record`.
//
// Returns an empty string if not available.
static std::string QueryKernelName(const rocprofiler_record_profiler_t& record) {
const auto kernel_name = QueryAllocStr(
[&record](const auto size) {
return rocprofiler_query_kernel_info_size(ROCPROFILER_KERNEL_NAME, record.kernel_id, size);
},
[&record](const auto str) {
return rocprofiler_query_kernel_info(ROCPROFILER_KERNEL_NAME, record.kernel_id,
const_cast<const char**>(str));
});
if (kernel_name.size() <= 1) {
return {};
}
// Return truncated and demangled version.
return rocmtools::truncate_name(rocmtools::cxx_demangle(kernel_name));
}
// Queries and returns the counter infos of the record `record` and
// session ID `session_id`.
static CounterInfos QueryCounterInfos(const rocprofiler_record_profiler_t& record,
const rocprofiler_session_id_t session_id) {
if (!record.counters) {
// No counters.
return {};
}
CounterInfos infos;
for (std::size_t i = 0; i < record.counters_count.value; ++i) {
auto& counter = record.counters[i];
if (counter.counter_handler.handle == 0) {
// Not available: continue.
continue;
}
// Query counter name size first
std::size_t counter_name_size = 0;
[[maybe_unused]] auto ret = rocprofiler_query_counter_info_size(
session_id, ROCPROFILER_COUNTER_NAME, counter.counter_handler, &counter_name_size);
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query counter name size");
if (counter_name_size == 0) {
// No size: continue.
continue;
}
// Query counter name (borrowed from `record`: no need to free).
const char* counter_name = nullptr;
ret = rocprofiler_query_counter_info(session_id, ROCPROFILER_COUNTER_NAME,
counter.counter_handler, &counter_name);
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query counter name");
if (!counter_name) {
// Not available: continue.
continue;
}
// Push back infos.
infos.names_storage.emplace_back(counter_name);
infos.names.push_back(infos.names_storage.back().c_str());
infos.values.push_back(counter.value.value);
}
return infos;
}
std::uint64_t dispatch_;
std::uint64_t gpu_id_;
std::uint64_t queue_id_;
std::uint64_t queue_index_;
std::uint32_t process_id_;
std::uint32_t thread_id_;
std::uint64_t kernel_id_;
std::string kernel_name_;
CounterInfos counter_infos_;
};
// Profiler record base.
class ProfilerWithKernelPropsEventRecord final : public ProfilerEventRecord {
private:
// According to `plugin/file/file.cpp`:
//
// > Taken from rocprofiler: The size hasn't changed in recent past
static constexpr std::uint32_t lds_block_size_ = 128 * 4;
public:
explicit ProfilerWithKernelPropsEventRecord(const rocprofiler_record_profiler_t& record,
const rocprofiler_session_id_t session_id)
: ProfilerEventRecord{record, session_id},
grid_size_{record.kernel_properties.grid_size},
workgroup_size_{record.kernel_properties.workgroup_size},
lds_size_{
((record.kernel_properties.lds_size + (lds_block_size_ - 1)) & ~(lds_block_size_ - 1))},
scratch_size_{record.kernel_properties.scratch_size},
arch_vgpr_count_{record.kernel_properties.arch_vgpr_count},
accum_vgpr_count_{record.kernel_properties.accum_vgpr_count},
sgpr_count_{record.kernel_properties.sgpr_count},
wave_size_{record.kernel_properties.wave_size},
signal_handle_{record.kernel_properties.signal_handle} {}
void Write(barectf_profiler_ctx& barectf_ctx) const override {
barectf_profiler_trace_profiler_record_with_kernel_properties(
&barectf_ctx, GetDispatch(), GetGpuId(), GetQueueId(), GetQueueIndex(), GetProcessId(),
GetThreadId(), GetKernelId(), GetKernelName().c_str(), GetCounterInfos().names.size(),
GetCounterInfos().names.data(), GetCounterInfos().values.size(),
GetCounterInfos().values.data(), grid_size_, workgroup_size_, lds_size_, scratch_size_,
arch_vgpr_count_, accum_vgpr_count_, sgpr_count_, wave_size_, signal_handle_);
}
private:
std::uint64_t grid_size_;
std::uint64_t workgroup_size_;
std::uint64_t lds_size_;
std::uint64_t scratch_size_;
std::uint64_t arch_vgpr_count_;
std::uint64_t accum_vgpr_count_;
std::uint64_t sgpr_count_;
std::uint64_t wave_size_;
std::uint64_t signal_handle_;
};
} // namespace
Plugin::Plugin(const std::size_t packet_size, const fs::path& trace_dir,
const fs::path& metadata_stream_path)
: roctx_tracer_{packet_size, trace_dir, "roctx_"},
hsa_api_tracer_{packet_size, trace_dir, "hsa_api_"},
hip_api_tracer_{packet_size, trace_dir, "hip_api_"},
api_ops_tracer_{packet_size, trace_dir, "api_ops_"},
hsa_handles_tracer_{packet_size, trace_dir, "hsa_handles_"},
profiler_tracer_{packet_size, trace_dir, "profiler_"} {
// Make sure the trace directory doesn't exist.
if (fs::exists(trace_dir)) {
std::ostringstream ss;
ss << "CTF trace directory `" << trace_dir.string() << "` already exists";
throw std::runtime_error{ss.str()};
}
// Make sure the metadata stream file exists.
if (!fs::exists(metadata_stream_path)) {
std::ostringstream ss;
ss << "CTF metadata stream file `" << metadata_stream_path.string() << "` doesn't exist";
throw std::runtime_error{ss.str()};
}
// Create trace directory.
if (!fs::create_directory(trace_dir)) {
std::ostringstream ss;
ss << "Cannot create the CTF trace directory `" << trace_dir.string() << "`";
throw std::runtime_error{ss.str()};
}
// Copy adjusted metadata stream file to trace directory.
try {
CopyAdjustedMetadataStreamFile(metadata_stream_path, trace_dir);
} catch (const std::exception& exc) {
std::ostringstream ss;
ss << "Cannot adjust and copy metadata stream file `" << metadata_stream_path.string()
<< "` to the CTF trace directory `" << trace_dir.string() << "`: " << exc.what();
throw std::runtime_error{ss.str()};
}
// Write HSA handle type event records.
WriteHsaHandleTypes();
}
void Plugin::HandleTracerRecord(const rocprofiler_record_tracer_t& record,
const rocprofiler_session_id_t session_id) {
std::lock_guard<std::mutex> lock{lock_};
// Depending on the domain, create and add an event record to the
// corresponding tracer.
switch (record.domain) {
case ACTIVITY_DOMAIN_ROCTX:
roctx_tracer_.AddEventRecord(std::make_shared<const RocTxEventRecord>(record, session_id));
break;
case ACTIVITY_DOMAIN_HSA_API: {
hsa_api_tracer_.AddEventRecord(
std::make_shared<const HsaApiEventRecordBegin>(record, session_id));
hsa_api_tracer_.AddEventRecord(
std::make_shared<const HsaApiEventRecordEnd>(record, session_id));
break;
}
case ACTIVITY_DOMAIN_HIP_API: {
hip_api_tracer_.AddEventRecord(
std::make_shared<const HipApiEventRecordBegin>(record, session_id));
hip_api_tracer_.AddEventRecord(
std::make_shared<const HipApiEventRecordEnd>(record, session_id));
break;
}
case ACTIVITY_DOMAIN_HSA_OPS:
api_ops_tracer_.AddEventRecord(std::make_shared<const HsaOpEventRecordBegin>(record));
api_ops_tracer_.AddEventRecord(std::make_shared<const HsaOpEventRecordEnd>(record));
break;
case ACTIVITY_DOMAIN_HIP_OPS:
api_ops_tracer_.AddEventRecord(std::make_shared<const HipOpEventRecordBegin>(record));
api_ops_tracer_.AddEventRecord(std::make_shared<const HipOpEventRecordEnd>(record));
break;
default:
// Warn
std::cerr << "rocm_ctf::Plugin::HandleTracerRecord(): "
<< "ignoring record for unknown domain #" << record.domain << std::endl;
break;
}
}
void Plugin::HandleProfilerRecord(const rocprofiler_record_profiler_t& record,
const rocprofiler_session_id_t session_id) {
std::lock_guard<std::mutex> lock{lock_};
profiler_tracer_.AddEventRecord(
std::make_shared<const ProfilerWithKernelPropsEventRecord>(record, session_id));
}
void Plugin::HandleBufferRecords(const rocprofiler_record_header_t* begin,
const rocprofiler_record_header_t* const end,
const rocprofiler_session_id_t session_id,
const rocprofiler_buffer_id_t buffer_id) {
while (begin && begin < end) {
if (begin->kind == ROCPROFILER_TRACER_RECORD) {
HandleTracerRecord(*reinterpret_cast<const rocprofiler_record_tracer_t*>(begin), session_id);
} else {
assert(begin->kind == ROCPROFILER_PROFILER_RECORD);
HandleProfilerRecord(*reinterpret_cast<const rocprofiler_record_profiler_t*>(begin),
session_id);
}
rocprofiler_next_record(begin, &begin, session_id, buffer_id);
}
}
void Plugin::WriteHsaHandleTypes() {
[[maybe_unused]] const auto status = hsa_iterate_agents(
[](const auto agent, const auto user_data) {
auto& tracer = *static_cast<HsaHandlesTracer*>(user_data);
hsa_device_type_t type;
if (hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type) != HSA_STATUS_SUCCESS) {
return HSA_STATUS_ERROR;
}
using Type = HsaHandleTypeEventRecord::Type;
auto event_record = std::make_shared<HsaHandleTypeEventRecord>(
agent.handle, type == HSA_DEVICE_TYPE_CPU ? Type::CPU : Type::GPU);
tracer.AddEventRecord(std::move(event_record));
return HSA_STATUS_SUCCESS;
},
&hsa_handles_tracer_);
assert(status == HSA_STATUS_SUCCESS && "Iterate HSA agents");
}
namespace {
constexpr std::uint64_t ns_per_s = 1'000'000'000ULL;
// Samples the ROCMTools clock and returns the value.
std::uint64_t GetClkVal() {
rocprofiler_timestamp_t ts;
[[maybe_unused]] const auto ret = rocprofiler_get_timestamp(&ts);
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Get timestamp");
return ts.value;
}
// Updates `offset` and `delta`, if needed, to a more accurate clock
// class offset and a smaller ROCMTools clock value delta.
//
// This function samples the ROCMTools clock twice, also sampling the
// real-time clock in between, and uses the average ROCMTools clock
// value to approximate the actual clock class offset.
//
// This strategy is based on the measure_single_clock_offset() function
// of the LTTng-tools project <https://lttng.org/>.
void UpdateClkClsOffsetAndDelta(std::uint64_t& offset, std::uint64_t& delta) {
// Sample ROCMTools clock (first time).
const auto rocm_clk_val1 = GetClkVal();
// Sample real-time clock.
timespec realtime_spec = {0, 0};
[[maybe_unused]] const auto ret = clock_gettime(CLOCK_REALTIME, &realtime_spec);
assert(ret == 0);
// Sample ROCMTools clock (second time).
const auto rocm_clk_val2 = GetClkVal();
// Compute the current ROCMTools clock value delta.
const auto this_delta = rocm_clk_val2 - rocm_clk_val1;
if (this_delta > delta) {
// Discard larger delta.
return;
}
// Compute the average ROCMTools clock value.
const auto rocm_clk_val_avg = (rocm_clk_val1 + rocm_clk_val2) >> 1;
// Compute the real-time clock value in nanoseconds.
const auto realtime_ns =
(static_cast<std::uint64_t>(realtime_spec.tv_sec) * ns_per_s) + realtime_spec.tv_nsec;
// Update clock class offset and delta.
assert(rocm_clk_val_avg < realtime_ns);
offset = realtime_ns - rocm_clk_val_avg;
delta = this_delta;
}
// Computes and returns the most possible accurate clock class offset.
std::uint64_t GetMetadataClkClsOffset() {
std::uint64_t offset = 0;
std::uint64_t delta = std::numeric_limits<std::uint64_t>::max();
// Best effort to find the most accurate offset.
for (auto i = 0U; i < 50U; ++i) {
UpdateClkClsOffsetAndDelta(offset, delta);
}
return offset;
}
} // namespace
void Plugin::CopyAdjustedMetadataStreamFile(const fs::path& metadata_stream_path,
const fs::path& trace_dir) {
// Load installed metadata stream file contents.
std::string metadata;
std::getline(std::ifstream{metadata_stream_path}, metadata, '\0');
// Replace the original `offset` property.
{
static constexpr auto offset_term = "offset = 0;";
std::ostringstream ss;
ss << "offset = " << GetMetadataClkClsOffset() << ';';
metadata.replace(metadata.find(offset_term), std::strlen(offset_term), ss.str());
}
// Write adjusted metadata stream to trace directory.
{
std::ofstream output{trace_dir / "metadata"};
output.write(metadata.data(), metadata.size());
}
}
} // namespace rocm_ctf