Adding rocprofilerv2
Change-Id: Ic0cc280ba207d2b8f6ccae1cd4ac3184152fc1ad
[ROCm/rocprofiler commit: 8032adb64f]
Este cometimento está contido em:
@@ -0,0 +1,869 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdlib>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
#include <iostream>
|
||||
#include <utility>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <limits>
|
||||
#include <fstream>
|
||||
#include <experimental/filesystem>
|
||||
#include <time.h>
|
||||
|
||||
#include <hsa/hsa.h>
|
||||
#include <hsa/hsa_ext_amd.h>
|
||||
#include "hsa_prof_str.h"
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <hip/amd_detail/hip_prof_str.h>
|
||||
|
||||
#include "rocprofiler.h"
|
||||
#include "rocprofiler_plugin.h"
|
||||
#include "../utils.h"
|
||||
|
||||
#include "barectf.h"
|
||||
#include "barectf_event_record.h"
|
||||
#include "barectf_tracer.h"
|
||||
#include "plugin.h"
|
||||
|
||||
namespace fs = std::experimental::filesystem;
|
||||
|
||||
namespace rocm_ctf {
|
||||
namespace {
|
||||
|
||||
// Abstract tracer event record using the barectf context type `CtxT`.
|
||||
template <typename CtxT> class TracerEventRecord : public BarectfEventRecord<CtxT> {
|
||||
protected:
|
||||
explicit TracerEventRecord(const rocprofiler_record_tracer_t& record, const std::uint64_t clock_val)
|
||||
: BarectfEventRecord<CtxT>{clock_val},
|
||||
op_{record.operation_id.id},
|
||||
thread_id_{record.thread_id.value},
|
||||
queue_id_{record.queue_id.handle},
|
||||
agent_id_{record.agent_id.handle},
|
||||
correlation_id_{record.correlation_id.value} {}
|
||||
|
||||
std::uint32_t GetOp() const noexcept { return op_; }
|
||||
std::uint32_t GetThreadId() const noexcept { return thread_id_; }
|
||||
std::uint64_t GetQueueId() const noexcept { return queue_id_; }
|
||||
std::uint64_t GetAgentId() const noexcept { return agent_id_; }
|
||||
std::uint64_t GetCorrelationId() const noexcept { return correlation_id_; }
|
||||
|
||||
private:
|
||||
std::uint32_t op_;
|
||||
std::uint32_t thread_id_;
|
||||
std::uint64_t queue_id_;
|
||||
std::uint64_t agent_id_;
|
||||
std::uint64_t correlation_id_;
|
||||
};
|
||||
|
||||
// Returns the beginning clock value of the tracer or profiler record
|
||||
// `record`.
|
||||
template <typename RecordT> std::uint64_t GetRecordBeginClockVal(const RecordT& record) {
|
||||
return record.timestamps.begin.value;
|
||||
}
|
||||
|
||||
// Returns the end clock value of the tracer or profiler record
|
||||
// `record`.
|
||||
template <typename RecordT> std::uint64_t GetRecordEndClockVal(const RecordT& record) {
|
||||
return record.timestamps.end.value;
|
||||
}
|
||||
|
||||
// Queries allocated string data using the size query function
|
||||
// `query_size_func` and the data query function `query_data_func`,
|
||||
// returning the corresponding string and freeing temporary allocated
|
||||
// memory.
|
||||
//
|
||||
// Returns an empty string if anything goes wrong.
|
||||
template <typename QuerySizeFuncT, typename QueryDataFuncT>
|
||||
std::string QueryAllocStr(QuerySizeFuncT&& query_size_func, QueryDataFuncT&& query_data_func) {
|
||||
// Query size first.
|
||||
std::size_t size = 0;
|
||||
[[maybe_unused]] auto ret = query_size_func(&size);
|
||||
|
||||
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query size");
|
||||
|
||||
if (size == 0) {
|
||||
// No size: return empty string.
|
||||
return {};
|
||||
}
|
||||
|
||||
// Query data (allocated by query_data_func()).
|
||||
char* alloc_str = nullptr;
|
||||
|
||||
ret = query_data_func(&alloc_str);
|
||||
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query data");
|
||||
|
||||
if (!alloc_str) {
|
||||
// No data: return empty string.
|
||||
return {};
|
||||
}
|
||||
|
||||
// Allocate return value.
|
||||
std::string str_ret{alloc_str};
|
||||
|
||||
// Free allocated data.
|
||||
std::free(alloc_str);
|
||||
|
||||
// Return string object.
|
||||
return str_ret;
|
||||
}
|
||||
|
||||
// rocTX event record.
|
||||
class RocTxEventRecord final : public TracerEventRecord<barectf_roctx_ctx> {
|
||||
public:
|
||||
explicit RocTxEventRecord(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id)
|
||||
: TracerEventRecord<barectf_roctx_ctx>{record, GetRecordBeginClockVal(record)},
|
||||
id_{QueryId(record, session_id)},
|
||||
msg_{QueryMsg(record, session_id)} {}
|
||||
|
||||
void Write(barectf_roctx_ctx& barectf_ctx) const override {
|
||||
barectf_roctx_trace_roctx(&barectf_ctx, GetThreadId(), id_, msg_.c_str());
|
||||
}
|
||||
|
||||
private:
|
||||
// Queries and returns the rocTX message of the record `record` and
|
||||
// session ID `session_id`.
|
||||
//
|
||||
// Returns an empty string if not available.
|
||||
static std::string QueryMsg(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id) {
|
||||
// Query size first.
|
||||
std::size_t msg_size = 0;
|
||||
[[maybe_unused]] auto ret = rocprofiler_query_roctx_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_ROCTX_MESSAGE, record.api_data_handle, record.operation_id,
|
||||
&msg_size);
|
||||
|
||||
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query rocTX message size");
|
||||
|
||||
if (msg_size == 0) {
|
||||
// No size: return empty string.
|
||||
return {};
|
||||
}
|
||||
|
||||
// Query data (borrowed from the record: no need to free).
|
||||
char* msg = nullptr;
|
||||
|
||||
ret = rocprofiler_query_roctx_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_ROCTX_MESSAGE, record.api_data_handle, record.operation_id, &msg);
|
||||
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query rocTX message");
|
||||
|
||||
if (!msg) {
|
||||
// No data: return empty string.
|
||||
return {};
|
||||
}
|
||||
|
||||
return rocmtools::cxx_demangle(msg);
|
||||
}
|
||||
|
||||
// Queries and returns the rocTX ID of the record `record` and the
|
||||
// session ID `session_id`.
|
||||
//
|
||||
// Returns 0 if anything goes wrong.
|
||||
static std::uint64_t QueryId(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id) {
|
||||
try {
|
||||
return std::stoull(QueryAllocStr(
|
||||
[&record, session_id](const auto size) {
|
||||
return rocprofiler_query_roctx_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_ROCTX_ID, record.api_data_handle, record.operation_id, size);
|
||||
},
|
||||
[&record, session_id](const auto str) {
|
||||
return rocprofiler_query_roctx_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_ROCTX_ID, record.api_data_handle, record.operation_id, str);
|
||||
}));
|
||||
} catch (...) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
std::uint64_t id_;
|
||||
std::string msg_;
|
||||
};
|
||||
|
||||
// Abstract HSA API event record.
|
||||
class HsaApiEventRecord : public TracerEventRecord<barectf_hsa_api_ctx> {
|
||||
protected:
|
||||
explicit HsaApiEventRecord(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id, const std::uint64_t clock_val)
|
||||
: TracerEventRecord<barectf_hsa_api_ctx>{record, clock_val},
|
||||
api_data_{QueryApiData(record, session_id)} {}
|
||||
|
||||
const hsa_api_data_t& GetApiData() const noexcept { return api_data_; }
|
||||
|
||||
private:
|
||||
// Queries and returns the API data of the record `record` and session
|
||||
// ID `session_id`.
|
||||
static const hsa_api_data_t& QueryApiData(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id) {
|
||||
// Query size first (only for assertions).
|
||||
[[maybe_unused]] std::size_t size = 0;
|
||||
[[maybe_unused]] auto ret = rocprofiler_query_hsa_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HSA_API_DATA, record.api_data_handle, record.operation_id, &size);
|
||||
|
||||
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query HSA API data size");
|
||||
assert(size > 0);
|
||||
|
||||
// Query data (borrowed from the record).
|
||||
char* data = nullptr;
|
||||
ret = rocprofiler_query_hsa_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_HSA_API_DATA, record.api_data_handle, record.operation_id, &data);
|
||||
|
||||
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query HSA API data");
|
||||
assert(data);
|
||||
|
||||
// Reinterpret as an HSA API data pointer.
|
||||
return *reinterpret_cast<const hsa_api_data_t*>(data);
|
||||
}
|
||||
|
||||
hsa_api_data_t api_data_;
|
||||
};
|
||||
|
||||
// HSA API event record (beginning).
|
||||
class HsaApiEventRecordBegin final : public HsaApiEventRecord {
|
||||
public:
|
||||
explicit HsaApiEventRecordBegin(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id)
|
||||
: HsaApiEventRecord{record, session_id, GetRecordBeginClockVal(record)} {}
|
||||
|
||||
void Write(barectf_hsa_api_ctx& barectf_ctx) const override {
|
||||
// Include generated switch statement.
|
||||
#include "hsa_begin.cpp.i"
|
||||
}
|
||||
};
|
||||
|
||||
// HSA API event record (end).
|
||||
class HsaApiEventRecordEnd final : public HsaApiEventRecord {
|
||||
public:
|
||||
explicit HsaApiEventRecordEnd(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id)
|
||||
: HsaApiEventRecord{record, session_id, GetRecordEndClockVal(record)} {}
|
||||
|
||||
void Write(barectf_hsa_api_ctx& barectf_ctx) const override {
|
||||
// Include generated switch statement.
|
||||
#include "hsa_end.cpp.i"
|
||||
}
|
||||
};
|
||||
|
||||
// Abstract HIP API event record.
|
||||
class HipApiEventRecord : public TracerEventRecord<barectf_hip_api_ctx> {
|
||||
protected:
|
||||
explicit HipApiEventRecord(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id, const std::uint64_t clock_val)
|
||||
: TracerEventRecord<barectf_hip_api_ctx>{record, clock_val},
|
||||
api_data_{QueryApiData(record, session_id)},
|
||||
kernel_name_{QueryKernelName(record, session_id)} {}
|
||||
|
||||
const hip_api_data_t& GetApiData() const noexcept { return api_data_; }
|
||||
const std::string& GetKernelName() const noexcept { return kernel_name_; }
|
||||
|
||||
private:
|
||||
// Queries and returns the API data of the record `record` and session
|
||||
// ID `session_id`.
|
||||
static const hip_api_data_t& QueryApiData(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id) {
|
||||
// Query size first (only for assertions).
|
||||
[[maybe_unused]] std::size_t size = 0;
|
||||
[[maybe_unused]] auto ret = rocprofiler_query_hip_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HIP_API_DATA, record.api_data_handle, record.operation_id, &size);
|
||||
|
||||
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query HIP API data size");
|
||||
assert(size > 0);
|
||||
|
||||
// Query data (borrowed from the record).
|
||||
char* data = nullptr;
|
||||
|
||||
ret = rocprofiler_query_hip_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_HIP_API_DATA, record.api_data_handle, record.operation_id, &data);
|
||||
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query HIP API data");
|
||||
assert(data);
|
||||
|
||||
// Reinterpret as an HIP API data pointer.
|
||||
return *reinterpret_cast<const hip_api_data_t*>(data);
|
||||
}
|
||||
|
||||
// Queries and returns the kernel name of the record `record` and
|
||||
// session ID `session_id`.
|
||||
//
|
||||
// Returns an empty string if not available.
|
||||
static std::string QueryKernelName(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id) {
|
||||
const auto kernel_name = QueryAllocStr(
|
||||
[&record, session_id](const auto size) {
|
||||
return rocprofiler_query_hip_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HIP_KERNEL_NAME, record.api_data_handle, record.operation_id,
|
||||
size);
|
||||
},
|
||||
[&record, session_id](const auto str) {
|
||||
return rocprofiler_query_hip_tracer_api_data_info(session_id, ROCPROFILER_HIP_KERNEL_NAME,
|
||||
record.api_data_handle,
|
||||
record.operation_id, str);
|
||||
});
|
||||
|
||||
if (kernel_name.size() > 1) {
|
||||
// Return demangled version.
|
||||
return rocmtools::cxx_demangle(kernel_name);
|
||||
}
|
||||
|
||||
return kernel_name;
|
||||
}
|
||||
|
||||
hip_api_data_t api_data_;
|
||||
std::string kernel_name_;
|
||||
};
|
||||
|
||||
// HIP API event record (beginning).
|
||||
class HipApiEventRecordBegin final : public HipApiEventRecord {
|
||||
public:
|
||||
explicit HipApiEventRecordBegin(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id)
|
||||
: HipApiEventRecord{record, session_id, GetRecordBeginClockVal(record)} {}
|
||||
|
||||
void Write(barectf_hip_api_ctx& barectf_ctx) const override {
|
||||
// Include generated switch statement.
|
||||
#include "hip_begin.cpp.i"
|
||||
}
|
||||
};
|
||||
|
||||
// HIP API event record (end).
|
||||
class HipApiEventRecordEnd final : public HipApiEventRecord {
|
||||
public:
|
||||
explicit HipApiEventRecordEnd(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id)
|
||||
: HipApiEventRecord{record, session_id, GetRecordEndClockVal(record)} {}
|
||||
|
||||
void Write(barectf_hip_api_ctx& barectf_ctx) const override {
|
||||
// Include generated switch statement.
|
||||
#include "hip_end.cpp.i"
|
||||
}
|
||||
};
|
||||
|
||||
// HSA API handle type event record.
|
||||
class HsaHandleTypeEventRecord final : public BarectfEventRecord<barectf_hsa_handles_ctx> {
|
||||
public:
|
||||
enum class Type {
|
||||
CPU = 0,
|
||||
GPU = 1,
|
||||
};
|
||||
|
||||
explicit HsaHandleTypeEventRecord(const std::uint64_t handle, const Type type)
|
||||
: BarectfEventRecord<barectf_hsa_handles_ctx>{0}, handle_{handle}, type_{type} {}
|
||||
|
||||
void Write(barectf_hsa_handles_ctx& barectf_ctx) const override {
|
||||
barectf_hsa_handles_trace_hsa_handle_type(&barectf_ctx, handle_,
|
||||
static_cast<std::uint8_t>(type_));
|
||||
}
|
||||
|
||||
private:
|
||||
std::uint64_t handle_;
|
||||
Type type_;
|
||||
};
|
||||
|
||||
// Abstract API operation event record.
|
||||
class ApiOpEventRecord : public TracerEventRecord<barectf_api_ops_ctx> {
|
||||
protected:
|
||||
explicit ApiOpEventRecord(const rocprofiler_record_tracer_t& record, const std::uint64_t clock_val)
|
||||
: TracerEventRecord<barectf_api_ops_ctx>{record, clock_val} {}
|
||||
};
|
||||
|
||||
// HSA API operation event record (beginning).
|
||||
class HsaOpEventRecordBegin final : public ApiOpEventRecord {
|
||||
public:
|
||||
explicit HsaOpEventRecordBegin(const rocprofiler_record_tracer_t& record)
|
||||
: ApiOpEventRecord{record, GetRecordBeginClockVal(record)} {}
|
||||
|
||||
void Write(barectf_api_ops_ctx& barectf_ctx) const override {
|
||||
barectf_api_ops_trace_hsa_op_begin(&barectf_ctx, GetThreadId(), GetQueueId(), GetAgentId(),
|
||||
GetCorrelationId());
|
||||
}
|
||||
};
|
||||
|
||||
// HSA API operation event record (end).
|
||||
class HsaOpEventRecordEnd final : public ApiOpEventRecord {
|
||||
public:
|
||||
explicit HsaOpEventRecordEnd(const rocprofiler_record_tracer_t& record)
|
||||
: ApiOpEventRecord{record, GetRecordEndClockVal(record)} {}
|
||||
|
||||
void Write(barectf_api_ops_ctx& barectf_ctx) const override {
|
||||
barectf_api_ops_trace_hsa_op_end(&barectf_ctx, GetThreadId(), GetQueueId(), GetAgentId(),
|
||||
GetCorrelationId());
|
||||
}
|
||||
};
|
||||
|
||||
// HIP API operation event record (beginning).
|
||||
class HipOpEventRecordBegin final : public ApiOpEventRecord {
|
||||
public:
|
||||
explicit HipOpEventRecordBegin(const rocprofiler_record_tracer_t& record)
|
||||
: ApiOpEventRecord{record, GetRecordBeginClockVal(record)},
|
||||
kernel_name_{QueryKernelName(record)} {}
|
||||
|
||||
void Write(barectf_api_ops_ctx& barectf_ctx) const override {
|
||||
barectf_api_ops_trace_hip_op_begin(&barectf_ctx, GetThreadId(), GetQueueId(), GetAgentId(),
|
||||
GetCorrelationId(), kernel_name_.c_str());
|
||||
}
|
||||
|
||||
private:
|
||||
// Queries and returns the kernel name of the record `record`.
|
||||
//
|
||||
// Returns an empty string if not available.
|
||||
static std::string QueryKernelName(const rocprofiler_record_tracer_t& record) {
|
||||
if (record.operation_id.id == 0) {
|
||||
if (const auto api_handle = record.api_data_handle.handle) {
|
||||
const auto str = reinterpret_cast<const char*>(api_handle);
|
||||
|
||||
if (std::strlen(str) > 1) {
|
||||
// Return demangled version.
|
||||
return rocmtools::cxx_demangle(str);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
std::string kernel_name_;
|
||||
};
|
||||
|
||||
// HIP API operation event record (end).
|
||||
class HipOpEventRecordEnd final : public ApiOpEventRecord {
|
||||
public:
|
||||
explicit HipOpEventRecordEnd(const rocprofiler_record_tracer_t& record)
|
||||
: ApiOpEventRecord{record, GetRecordEndClockVal(record)} {}
|
||||
|
||||
void Write(barectf_api_ops_ctx& barectf_ctx) const override {
|
||||
barectf_api_ops_trace_hip_op_end(&barectf_ctx, GetThreadId(), GetQueueId(), GetAgentId(),
|
||||
GetCorrelationId());
|
||||
}
|
||||
};
|
||||
|
||||
// Profiler record base.
|
||||
class ProfilerEventRecord : public BarectfEventRecord<barectf_profiler_ctx> {
|
||||
public:
|
||||
explicit ProfilerEventRecord(const rocprofiler_record_profiler_t& record,
|
||||
const rocprofiler_session_id_t session_id)
|
||||
: BarectfEventRecord<barectf_profiler_ctx>{GetRecordBeginClockVal(record)},
|
||||
dispatch_{record.header.id.handle},
|
||||
gpu_id_{record.gpu_id.handle},
|
||||
queue_id_{record.queue_id.handle},
|
||||
queue_index_{record.queue_idx.value},
|
||||
process_id_{GetPid()},
|
||||
thread_id_{record.thread_id.value},
|
||||
kernel_id_{record.kernel_id.handle},
|
||||
kernel_name_{QueryKernelName(record)},
|
||||
counter_infos_{QueryCounterInfos(record, session_id)} {}
|
||||
|
||||
void Write(barectf_profiler_ctx& barectf_ctx) const override {
|
||||
barectf_profiler_trace_profiler_record(
|
||||
&barectf_ctx, dispatch_, gpu_id_, queue_id_, queue_index_, process_id_, thread_id_,
|
||||
kernel_id_, kernel_name_.c_str(), counter_infos_.names.size(), counter_infos_.names.data(),
|
||||
counter_infos_.values.size(), counter_infos_.values.data());
|
||||
}
|
||||
|
||||
protected:
|
||||
// Counter infos.
|
||||
//
|
||||
// `names[i]` names the counter value `values[i]`.
|
||||
struct CounterInfos final {
|
||||
// `names_storage` owns the strings while the elements of `names`
|
||||
// point to the internal C strings of `names_storage`.
|
||||
//
|
||||
// This is needed because barectf expects an array of contiguous
|
||||
// C string pointers.
|
||||
std::vector<std::string> names_storage;
|
||||
std::vector<const char*> names;
|
||||
|
||||
// Counter values.
|
||||
std::vector<std::uint64_t> values;
|
||||
};
|
||||
|
||||
std::uint64_t GetDispatch() const noexcept { return dispatch_; }
|
||||
std::uint64_t GetGpuId() const noexcept { return gpu_id_; }
|
||||
std::uint64_t GetQueueId() const noexcept { return queue_id_; }
|
||||
std::uint64_t GetQueueIndex() const noexcept { return queue_index_; }
|
||||
std::uint32_t GetProcessId() const noexcept { return process_id_; }
|
||||
std::uint32_t GetThreadId() const noexcept { return thread_id_; }
|
||||
std::uint64_t GetKernelId() const noexcept { return kernel_id_; }
|
||||
const std::string& GetKernelName() const noexcept { return kernel_name_; }
|
||||
const CounterInfos& GetCounterInfos() const noexcept { return counter_infos_; }
|
||||
|
||||
private:
|
||||
// Queries and returns the kernel name of the record `record`.
|
||||
//
|
||||
// Returns an empty string if not available.
|
||||
static std::string QueryKernelName(const rocprofiler_record_profiler_t& record) {
|
||||
const auto kernel_name = QueryAllocStr(
|
||||
[&record](const auto size) {
|
||||
return rocprofiler_query_kernel_info_size(ROCPROFILER_KERNEL_NAME, record.kernel_id, size);
|
||||
},
|
||||
[&record](const auto str) {
|
||||
return rocprofiler_query_kernel_info(ROCPROFILER_KERNEL_NAME, record.kernel_id,
|
||||
const_cast<const char**>(str));
|
||||
});
|
||||
|
||||
if (kernel_name.size() <= 1) {
|
||||
return {};
|
||||
}
|
||||
|
||||
// Return truncated and demangled version.
|
||||
return rocmtools::truncate_name(rocmtools::cxx_demangle(kernel_name));
|
||||
}
|
||||
|
||||
// Queries and returns the counter infos of the record `record` and
|
||||
// session ID `session_id`.
|
||||
static CounterInfos QueryCounterInfos(const rocprofiler_record_profiler_t& record,
|
||||
const rocprofiler_session_id_t session_id) {
|
||||
if (!record.counters) {
|
||||
// No counters.
|
||||
return {};
|
||||
}
|
||||
|
||||
CounterInfos infos;
|
||||
|
||||
for (std::size_t i = 0; i < record.counters_count.value; ++i) {
|
||||
auto& counter = record.counters[i];
|
||||
|
||||
if (counter.counter_handler.handle == 0) {
|
||||
// Not available: continue.
|
||||
continue;
|
||||
}
|
||||
|
||||
// Query counter name size first
|
||||
std::size_t counter_name_size = 0;
|
||||
[[maybe_unused]] auto ret = rocprofiler_query_counter_info_size(
|
||||
session_id, ROCPROFILER_COUNTER_NAME, counter.counter_handler, &counter_name_size);
|
||||
|
||||
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query counter name size");
|
||||
|
||||
if (counter_name_size == 0) {
|
||||
// No size: continue.
|
||||
continue;
|
||||
}
|
||||
|
||||
// Query counter name (borrowed from `record`: no need to free).
|
||||
const char* counter_name = nullptr;
|
||||
|
||||
ret = rocprofiler_query_counter_info(session_id, ROCPROFILER_COUNTER_NAME,
|
||||
counter.counter_handler, &counter_name);
|
||||
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query counter name");
|
||||
|
||||
if (!counter_name) {
|
||||
// Not available: continue.
|
||||
continue;
|
||||
}
|
||||
|
||||
// Push back infos.
|
||||
infos.names_storage.emplace_back(counter_name);
|
||||
infos.names.push_back(infos.names_storage.back().c_str());
|
||||
infos.values.push_back(counter.value.value);
|
||||
}
|
||||
|
||||
return infos;
|
||||
}
|
||||
|
||||
std::uint64_t dispatch_;
|
||||
std::uint64_t gpu_id_;
|
||||
std::uint64_t queue_id_;
|
||||
std::uint64_t queue_index_;
|
||||
std::uint32_t process_id_;
|
||||
std::uint32_t thread_id_;
|
||||
std::uint64_t kernel_id_;
|
||||
std::string kernel_name_;
|
||||
CounterInfos counter_infos_;
|
||||
};
|
||||
|
||||
// Profiler record base.
|
||||
class ProfilerWithKernelPropsEventRecord final : public ProfilerEventRecord {
|
||||
private:
|
||||
// According to `plugin/file/file.cpp`:
|
||||
//
|
||||
// > Taken from rocprofiler: The size hasn't changed in recent past
|
||||
static constexpr std::uint32_t lds_block_size_ = 128 * 4;
|
||||
|
||||
public:
|
||||
explicit ProfilerWithKernelPropsEventRecord(const rocprofiler_record_profiler_t& record,
|
||||
const rocprofiler_session_id_t session_id)
|
||||
: ProfilerEventRecord{record, session_id},
|
||||
grid_size_{record.kernel_properties.grid_size},
|
||||
workgroup_size_{record.kernel_properties.workgroup_size},
|
||||
lds_size_{
|
||||
((record.kernel_properties.lds_size + (lds_block_size_ - 1)) & ~(lds_block_size_ - 1))},
|
||||
scratch_size_{record.kernel_properties.scratch_size},
|
||||
arch_vgpr_count_{record.kernel_properties.arch_vgpr_count},
|
||||
accum_vgpr_count_{record.kernel_properties.accum_vgpr_count},
|
||||
sgpr_count_{record.kernel_properties.sgpr_count},
|
||||
wave_size_{record.kernel_properties.wave_size},
|
||||
signal_handle_{record.kernel_properties.signal_handle} {}
|
||||
|
||||
void Write(barectf_profiler_ctx& barectf_ctx) const override {
|
||||
barectf_profiler_trace_profiler_record_with_kernel_properties(
|
||||
&barectf_ctx, GetDispatch(), GetGpuId(), GetQueueId(), GetQueueIndex(), GetProcessId(),
|
||||
GetThreadId(), GetKernelId(), GetKernelName().c_str(), GetCounterInfos().names.size(),
|
||||
GetCounterInfos().names.data(), GetCounterInfos().values.size(),
|
||||
GetCounterInfos().values.data(), grid_size_, workgroup_size_, lds_size_, scratch_size_,
|
||||
arch_vgpr_count_, accum_vgpr_count_, sgpr_count_, wave_size_, signal_handle_);
|
||||
}
|
||||
|
||||
private:
|
||||
std::uint64_t grid_size_;
|
||||
std::uint64_t workgroup_size_;
|
||||
std::uint64_t lds_size_;
|
||||
std::uint64_t scratch_size_;
|
||||
std::uint64_t arch_vgpr_count_;
|
||||
std::uint64_t accum_vgpr_count_;
|
||||
std::uint64_t sgpr_count_;
|
||||
std::uint64_t wave_size_;
|
||||
std::uint64_t signal_handle_;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
Plugin::Plugin(const std::size_t packet_size, const fs::path& trace_dir,
|
||||
const fs::path& metadata_stream_path)
|
||||
: roctx_tracer_{packet_size, trace_dir, "roctx_"},
|
||||
hsa_api_tracer_{packet_size, trace_dir, "hsa_api_"},
|
||||
hip_api_tracer_{packet_size, trace_dir, "hip_api_"},
|
||||
api_ops_tracer_{packet_size, trace_dir, "api_ops_"},
|
||||
hsa_handles_tracer_{packet_size, trace_dir, "hsa_handles_"},
|
||||
profiler_tracer_{packet_size, trace_dir, "profiler_"} {
|
||||
// Make sure the trace directory doesn't exist.
|
||||
if (fs::exists(trace_dir)) {
|
||||
std::ostringstream ss;
|
||||
|
||||
ss << "CTF trace directory `" << trace_dir.string() << "` already exists";
|
||||
throw std::runtime_error{ss.str()};
|
||||
}
|
||||
|
||||
// Make sure the metadata stream file exists.
|
||||
if (!fs::exists(metadata_stream_path)) {
|
||||
std::ostringstream ss;
|
||||
|
||||
ss << "CTF metadata stream file `" << metadata_stream_path.string() << "` doesn't exist";
|
||||
throw std::runtime_error{ss.str()};
|
||||
}
|
||||
|
||||
// Create trace directory.
|
||||
if (!fs::create_directory(trace_dir)) {
|
||||
std::ostringstream ss;
|
||||
|
||||
ss << "Cannot create the CTF trace directory `" << trace_dir.string() << "`";
|
||||
throw std::runtime_error{ss.str()};
|
||||
}
|
||||
|
||||
// Copy adjusted metadata stream file to trace directory.
|
||||
try {
|
||||
CopyAdjustedMetadataStreamFile(metadata_stream_path, trace_dir);
|
||||
} catch (const std::exception& exc) {
|
||||
std::ostringstream ss;
|
||||
|
||||
ss << "Cannot adjust and copy metadata stream file `" << metadata_stream_path.string()
|
||||
<< "` to the CTF trace directory `" << trace_dir.string() << "`: " << exc.what();
|
||||
throw std::runtime_error{ss.str()};
|
||||
}
|
||||
|
||||
// Write HSA handle type event records.
|
||||
WriteHsaHandleTypes();
|
||||
}
|
||||
|
||||
void Plugin::HandleTracerRecord(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id) {
|
||||
std::lock_guard<std::mutex> lock{lock_};
|
||||
|
||||
// Depending on the domain, create and add an event record to the
|
||||
// corresponding tracer.
|
||||
switch (record.domain) {
|
||||
case ACTIVITY_DOMAIN_ROCTX:
|
||||
roctx_tracer_.AddEventRecord(std::make_shared<const RocTxEventRecord>(record, session_id));
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_API: {
|
||||
hsa_api_tracer_.AddEventRecord(
|
||||
std::make_shared<const HsaApiEventRecordBegin>(record, session_id));
|
||||
hsa_api_tracer_.AddEventRecord(
|
||||
std::make_shared<const HsaApiEventRecordEnd>(record, session_id));
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HIP_API: {
|
||||
hip_api_tracer_.AddEventRecord(
|
||||
std::make_shared<const HipApiEventRecordBegin>(record, session_id));
|
||||
hip_api_tracer_.AddEventRecord(
|
||||
std::make_shared<const HipApiEventRecordEnd>(record, session_id));
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HSA_OPS:
|
||||
api_ops_tracer_.AddEventRecord(std::make_shared<const HsaOpEventRecordBegin>(record));
|
||||
api_ops_tracer_.AddEventRecord(std::make_shared<const HsaOpEventRecordEnd>(record));
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HIP_OPS:
|
||||
api_ops_tracer_.AddEventRecord(std::make_shared<const HipOpEventRecordBegin>(record));
|
||||
api_ops_tracer_.AddEventRecord(std::make_shared<const HipOpEventRecordEnd>(record));
|
||||
break;
|
||||
default:
|
||||
// Warn
|
||||
std::cerr << "rocm_ctf::Plugin::HandleTracerRecord(): "
|
||||
<< "ignoring record for unknown domain #" << record.domain << std::endl;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Plugin::HandleProfilerRecord(const rocprofiler_record_profiler_t& record,
|
||||
const rocprofiler_session_id_t session_id) {
|
||||
std::lock_guard<std::mutex> lock{lock_};
|
||||
profiler_tracer_.AddEventRecord(
|
||||
std::make_shared<const ProfilerWithKernelPropsEventRecord>(record, session_id));
|
||||
}
|
||||
|
||||
void Plugin::HandleBufferRecords(const rocprofiler_record_header_t* begin,
|
||||
const rocprofiler_record_header_t* const end,
|
||||
const rocprofiler_session_id_t session_id,
|
||||
const rocprofiler_buffer_id_t buffer_id) {
|
||||
while (begin && begin < end) {
|
||||
if (begin->kind == ROCPROFILER_TRACER_RECORD) {
|
||||
HandleTracerRecord(*reinterpret_cast<const rocprofiler_record_tracer_t*>(begin), session_id);
|
||||
} else {
|
||||
assert(begin->kind == ROCPROFILER_PROFILER_RECORD);
|
||||
HandleProfilerRecord(*reinterpret_cast<const rocprofiler_record_profiler_t*>(begin),
|
||||
session_id);
|
||||
}
|
||||
|
||||
rocprofiler_next_record(begin, &begin, session_id, buffer_id);
|
||||
}
|
||||
}
|
||||
|
||||
void Plugin::WriteHsaHandleTypes() {
|
||||
[[maybe_unused]] const auto status = hsa_iterate_agents(
|
||||
[](const auto agent, const auto user_data) {
|
||||
auto& tracer = *static_cast<HsaHandlesTracer*>(user_data);
|
||||
hsa_device_type_t type;
|
||||
|
||||
if (hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type) != HSA_STATUS_SUCCESS) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
using Type = HsaHandleTypeEventRecord::Type;
|
||||
|
||||
auto event_record = std::make_shared<HsaHandleTypeEventRecord>(
|
||||
agent.handle, type == HSA_DEVICE_TYPE_CPU ? Type::CPU : Type::GPU);
|
||||
|
||||
tracer.AddEventRecord(std::move(event_record));
|
||||
return HSA_STATUS_SUCCESS;
|
||||
},
|
||||
&hsa_handles_tracer_);
|
||||
|
||||
assert(status == HSA_STATUS_SUCCESS && "Iterate HSA agents");
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr std::uint64_t ns_per_s = 1'000'000'000ULL;
|
||||
|
||||
// Samples the ROCMTools clock and returns the value.
|
||||
std::uint64_t GetClkVal() {
|
||||
rocprofiler_timestamp_t ts;
|
||||
[[maybe_unused]] const auto ret = rocprofiler_get_timestamp(&ts);
|
||||
|
||||
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Get timestamp");
|
||||
return ts.value;
|
||||
}
|
||||
|
||||
// Updates `offset` and `delta`, if needed, to a more accurate clock
|
||||
// class offset and a smaller ROCMTools clock value delta.
|
||||
//
|
||||
// This function samples the ROCMTools clock twice, also sampling the
|
||||
// real-time clock in between, and uses the average ROCMTools clock
|
||||
// value to approximate the actual clock class offset.
|
||||
//
|
||||
// This strategy is based on the measure_single_clock_offset() function
|
||||
// of the LTTng-tools project <https://lttng.org/>.
|
||||
void UpdateClkClsOffsetAndDelta(std::uint64_t& offset, std::uint64_t& delta) {
|
||||
// Sample ROCMTools clock (first time).
|
||||
const auto rocm_clk_val1 = GetClkVal();
|
||||
|
||||
// Sample real-time clock.
|
||||
timespec realtime_spec = {0, 0};
|
||||
[[maybe_unused]] const auto ret = clock_gettime(CLOCK_REALTIME, &realtime_spec);
|
||||
|
||||
assert(ret == 0);
|
||||
|
||||
// Sample ROCMTools clock (second time).
|
||||
const auto rocm_clk_val2 = GetClkVal();
|
||||
|
||||
// Compute the current ROCMTools clock value delta.
|
||||
const auto this_delta = rocm_clk_val2 - rocm_clk_val1;
|
||||
|
||||
if (this_delta > delta) {
|
||||
// Discard larger delta.
|
||||
return;
|
||||
}
|
||||
|
||||
// Compute the average ROCMTools clock value.
|
||||
const auto rocm_clk_val_avg = (rocm_clk_val1 + rocm_clk_val2) >> 1;
|
||||
|
||||
// Compute the real-time clock value in nanoseconds.
|
||||
const auto realtime_ns =
|
||||
(static_cast<std::uint64_t>(realtime_spec.tv_sec) * ns_per_s) + realtime_spec.tv_nsec;
|
||||
|
||||
// Update clock class offset and delta.
|
||||
assert(rocm_clk_val_avg < realtime_ns);
|
||||
offset = realtime_ns - rocm_clk_val_avg;
|
||||
delta = this_delta;
|
||||
}
|
||||
|
||||
// Computes and returns the most possible accurate clock class offset.
|
||||
std::uint64_t GetMetadataClkClsOffset() {
|
||||
std::uint64_t offset = 0;
|
||||
std::uint64_t delta = std::numeric_limits<std::uint64_t>::max();
|
||||
|
||||
// Best effort to find the most accurate offset.
|
||||
for (auto i = 0U; i < 50U; ++i) {
|
||||
UpdateClkClsOffsetAndDelta(offset, delta);
|
||||
}
|
||||
|
||||
return offset;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void Plugin::CopyAdjustedMetadataStreamFile(const fs::path& metadata_stream_path,
|
||||
const fs::path& trace_dir) {
|
||||
// Load installed metadata stream file contents.
|
||||
std::string metadata;
|
||||
std::getline(std::ifstream{metadata_stream_path}, metadata, '\0');
|
||||
|
||||
// Replace the original `offset` property.
|
||||
{
|
||||
static constexpr auto offset_term = "offset = 0;";
|
||||
std::ostringstream ss;
|
||||
|
||||
ss << "offset = " << GetMetadataClkClsOffset() << ';';
|
||||
metadata.replace(metadata.find(offset_term), std::strlen(offset_term), ss.str());
|
||||
}
|
||||
|
||||
// Write adjusted metadata stream to trace directory.
|
||||
{
|
||||
std::ofstream output{trace_dir / "metadata"};
|
||||
|
||||
output.write(metadata.data(), metadata.size());
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace rocm_ctf
|
||||
Criar uma nova questão referindo esta
Bloquear um utilizador