Add TraceBuffer entry construction/destruction

Change-Id: I354f36b0d7a0baea0efb75d5e81f169b5f969542


[ROCm/roctracer commit: 2513f9f51f]
This commit is contained in:
Laurent Morichetti
2022-07-26 16:04:20 -07:00
rodzic b2b252ba31
commit 7f47eb9d02
3 zmienionych plików z 111 dodań i 100 usunięć
@@ -145,6 +145,44 @@ class TraceBuffer : protected TraceBufferBase {
}
}
// Flush all entries between read_pointer and write_pointer. read_pointer and write_pointer are
// monotonically increasing indices, with read_pointer % size always indexing inside the first
// buffer in the list. Stop flushing if an incomplete entry is found, it will be flushed with
// the next invocation after changing its state to 'complete'.
void Flush() override {
std::lock_guard lock(write_mutex_);
auto write_index = write_index_.load(std::memory_order_relaxed);
for (auto it = buffer_list_.begin(); it != buffer_list_.end();) {
auto end_of_buffer = read_index_ - read_index_ % size_ + size_;
while (read_index_ < std::min(write_index.index, end_of_buffer)) {
Entry* entry = &(*it)[read_index_ % size_];
// The entry is not yet complete, stop flushing here.
if (entry->valid.load(std::memory_order_acquire) != TRACE_ENTRY_COMPLETE) return;
flush_callback_(entry);
entry->~Entry();
++read_index_;
}
// The buffer is still in use or the read pointer did not reach the end of the buffer.
if (*it == write_index.buffer || read_index_ != end_of_buffer) return;
// All entries in the current buffer are now processed. Destroy the buffer and move onto the
// next buffer in the list.
allocator_.deallocate(*it, size_);
it = buffer_list_.erase(it);
}
}
template <typename... Args> Entry& Emplace(Args... args) {
return *new (GetEntry()) Entry(std::forward<Args>(args)...);
}
private:
Entry* GetEntry() {
auto current = write_index_.load(std::memory_order_relaxed);
@@ -194,38 +232,6 @@ class TraceBuffer : protected TraceBufferBase {
}
}
// Flush all entries between read_pointer and write_pointer. read_pointer and write_pointer are
// monotonically increasing indices, with read_pointer % size always indexing inside the first
// buffer in the list. Stop flushing if an incomplete entry is found, it will be flushed with
// the next invocation after changing its state to 'complete'.
void Flush() override {
std::lock_guard lock(write_mutex_);
auto write_index = write_index_.load(std::memory_order_relaxed);
for (auto it = buffer_list_.begin(); it != buffer_list_.end();) {
auto end_of_buffer = read_index_ - read_index_ % size_ + size_;
while (read_index_ < std::min(write_index.index, end_of_buffer)) {
Entry* entry = &(*it)[read_index_ % size_];
// The entry is not yet complete, stop flushing here.
if (entry->valid.load(std::memory_order_acquire) != TRACE_ENTRY_COMPLETE) return;
flush_callback_(entry);
++read_index_;
}
// The buffer is still in use or the read pointer did not reach the end of the buffer.
if (*it == write_index.buffer || read_index_ != end_of_buffer) return;
// All entries in the current buffer are now processed. Destroy the buffer and move onto the
// next buffer in the list.
allocator_.deallocate(*it, size_);
it = buffer_list_.erase(it);
}
}
private:
void AllocateFreeBuffer() {
assert(free_buffer_ == nullptr);
@@ -238,6 +238,19 @@ struct roctx_trace_entry_t {
uint32_t tid;
roctx_range_id_t rid;
const char* message;
roctx_trace_entry_t(uint32_t cid_, roctracer_timestamp_t time_, uint32_t pid_, uint32_t tid_,
roctx_range_id_t rid_, const char* message_)
: valid(roctracer::TRACE_ENTRY_INIT),
cid(cid_),
time(time_),
pid(pid_),
tid(tid_),
rid(rid_),
message(message_ != nullptr ? strdup(message_) : nullptr) {}
~roctx_trace_entry_t() {
if (message != nullptr) free(const_cast<char*>(message));
}
};
// rocTX buffer flush function
@@ -261,16 +274,9 @@ void roctx_api_callback(uint32_t domain, uint32_t cid, const void* callback_data
void* /* user_arg */) {
const roctx_api_data_t* data = reinterpret_cast<const roctx_api_data_t*>(callback_data);
roctx_trace_entry_t* entry = roctx_trace_buffer.GetEntry();
entry->cid = cid;
entry->time = util::timestamp_ns();
entry->pid = GetPid();
entry->tid = GetTid();
entry->rid = data->args.id;
entry->message = (data->args.message != NULL)
? strdup(data->args.message) /* FIXME: Who frees the message? */
: NULL;
entry->valid.store(roctracer::TRACE_ENTRY_COMPLETE, std::memory_order_release);
roctx_trace_entry_t& entry = roctx_trace_buffer.Emplace(
cid, util::timestamp_ns(), GetPid(), GetTid(), data->args.id, data->args.message);
entry.valid.store(roctracer::TRACE_ENTRY_COMPLETE, std::memory_order_release);
}
///////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -284,6 +290,17 @@ struct hsa_api_trace_entry_t {
uint32_t pid;
uint32_t tid;
hsa_api_data_t data;
hsa_api_trace_entry_t(uint32_t cid_, roctracer_timestamp_t begin_, roctracer_timestamp_t end_,
uint32_t pid_, uint32_t tid_, const hsa_api_data_t& data_)
: valid(roctracer::TRACE_ENTRY_INIT),
cid(cid_),
begin(begin_),
end(end_),
pid(pid_),
tid(tid_),
data(data_) {}
~hsa_api_trace_entry_t() {}
};
void hsa_api_flush_cb(hsa_api_trace_entry_t* entry) {
@@ -307,14 +324,9 @@ void hsa_api_callback(uint32_t domain, uint32_t cid, const void* callback_data,
} else {
const roctracer_timestamp_t end_timestamp =
(cid == HSA_API_ID_hsa_shut_down) ? hsa_begin_timestamp : util::timestamp_ns();
hsa_api_trace_entry_t* entry = hsa_api_trace_buffer.GetEntry();
entry->cid = cid;
entry->begin = hsa_begin_timestamp;
entry->end = end_timestamp;
entry->pid = GetPid();
entry->tid = GetTid();
entry->data = *data;
entry->valid.store(roctracer::TRACE_ENTRY_COMPLETE, std::memory_order_release);
hsa_api_trace_entry_t& entry = hsa_api_trace_buffer.Emplace(
cid, hsa_begin_timestamp, end_timestamp, GetPid(), GetTid(), *data);
entry.valid.store(roctracer::TRACE_ENTRY_COMPLETE, std::memory_order_release);
}
}
@@ -323,7 +335,7 @@ void hsa_api_callback(uint32_t domain, uint32_t cid, const void* callback_data,
struct hip_api_trace_entry_t {
std::atomic<uint32_t> valid;
uint32_t domain;
activity_domain_t domain;
uint32_t cid;
roctracer_timestamp_t begin;
roctracer_timestamp_t end;
@@ -332,6 +344,24 @@ struct hip_api_trace_entry_t {
hip_api_data_t data;
const char* name;
void* ptr;
hip_api_trace_entry_t(activity_domain_t domain_, uint32_t cid_, roctracer_timestamp_t begin_,
roctracer_timestamp_t end_, uint32_t pid_, uint32_t tid_,
const hip_api_data_t& data_, const char* name_, void* ptr_)
: valid(roctracer::TRACE_ENTRY_INIT),
domain(domain_),
cid(cid_),
begin(begin_),
end(end_),
pid(pid_),
tid(tid_),
data(data_),
name(name_ != nullptr ? strdup(name_) : nullptr),
ptr(ptr_) {}
~hip_api_trace_entry_t() {
if (name != nullptr) free(const_cast<char*>(name));
}
};
static inline bool is_hip_kernel_launch_api(const uint32_t& cid) {
@@ -395,7 +425,6 @@ void hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data,
(void)arg;
const hip_api_data_t* data = reinterpret_cast<const hip_api_data_t*>(callback_data);
const roctracer_timestamp_t timestamp = util::timestamp_ns();
hip_api_trace_entry_t* entry = NULL;
if (data->phase == ACTIVITY_API_PHASE_ENTER) {
hip_begin_timestamp = timestamp;
@@ -403,68 +432,58 @@ void hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data,
// Post init of HIP APU args
hipApiArgsInit((hip_api_id_t)cid, const_cast<hip_api_data_t*>(data));
entry = hip_api_trace_buffer.GetEntry();
entry->cid = cid;
entry->domain = domain;
entry->begin = hip_begin_timestamp;
entry->end = timestamp;
entry->pid = GetPid();
entry->tid = GetTid();
entry->data = *data;
entry->name = NULL;
entry->ptr = NULL;
std::string kernel_name;
if (cid == HIP_API_ID_hipMalloc) {
entry->ptr = *(data->args.hipMalloc.ptr);
} else if (is_hip_kernel_launch_api(cid)) {
if (is_hip_kernel_launch_api(cid)) {
switch (cid) {
case HIP_API_ID_hipExtLaunchMultiKernelMultiDevice:
case HIP_API_ID_hipLaunchCooperativeKernelMultiDevice: {
const hipLaunchParams* listKernels =
data->args.hipLaunchCooperativeKernelMultiDevice.launchParamsList;
std::string name_str = "";
for (int i = 0; i < data->args.hipLaunchCooperativeKernelMultiDevice.numDevices; ++i) {
std::stringstream ss;
const hipLaunchParams& lp = listKernels[i];
if (lp.func != NULL) {
const char* kernel_name =
roctracer::HipLoader::Instance().KernelNameRefByPtr(lp.func, lp.stream);
const int device_id = roctracer::HipLoader::Instance().GetStreamDeviceId(lp.stream);
name_str += std::string(kernel_name) + ":" + std::to_string(device_id) + ";";
if (lp.func != nullptr) {
ss << roctracer::HipLoader::Instance().KernelNameRefByPtr(lp.func, lp.stream) << ":"
<< roctracer::HipLoader::Instance().GetStreamDeviceId(lp.stream) << ";";
}
kernel_name = ss.str();
}
entry->name = strdup(name_str.c_str());
break;
}
case HIP_API_ID_hipLaunchKernel:
case HIP_API_ID_hipLaunchCooperativeKernel: {
const void* f = data->args.hipLaunchKernel.function_address;
hipStream_t stream = data->args.hipLaunchKernel.stream;
if (f != NULL)
entry->name = strdup(roctracer::HipLoader::Instance().KernelNameRefByPtr(f, stream));
if (f != nullptr)
kernel_name = roctracer::HipLoader::Instance().KernelNameRefByPtr(f, stream);
break;
}
case HIP_API_ID_hipExtLaunchKernel: {
const void* f = data->args.hipExtLaunchKernel.function_address;
hipStream_t stream = data->args.hipExtLaunchKernel.stream;
if (f != NULL)
entry->name = strdup(roctracer::HipLoader::Instance().KernelNameRefByPtr(f, stream));
if (f != nullptr)
kernel_name = roctracer::HipLoader::Instance().KernelNameRefByPtr(f, stream);
break;
}
default: {
const hipFunction_t f = data->args.hipModuleLaunchKernel.f;
if (f != NULL) entry->name = strdup(roctracer::HipLoader::Instance().KernelNameRef(f));
if (f != nullptr) kernel_name = roctracer::HipLoader::Instance().KernelNameRef(f);
}
}
}
entry->valid.store(roctracer::TRACE_ENTRY_COMPLETE, std::memory_order_release);
hip_api_trace_entry_t& entry = hip_api_trace_buffer.Emplace(
static_cast<activity_domain_t>(domain), cid, hip_begin_timestamp, timestamp, GetPid(),
GetTid(), *data, kernel_name.c_str(),
cid == HIP_API_ID_hipMalloc ? data->args.hipMalloc.ptr : nullptr);
entry.valid.store(roctracer::TRACE_ENTRY_COMPLETE, std::memory_order_release);
}
DEBUG_TRACE(
"hip_api_callback(\"%s\") phase(%d): cid(%u) data(%p) entry(%p) name(\"%s\") "
"correlation_id(%lu) timestamp(%lu)\n",
roctracer_op_string(domain, cid, 0), data->phase, cid, data, entry,
(entry) ? entry->name : NULL, data->correlation_id, timestamp);
(entry.name != nullptr) ? entry.name : "", data->correlation_id, timestamp);
}
void mark_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg) {
@@ -472,29 +491,15 @@ void mark_api_callback(uint32_t domain, uint32_t cid, const void* callback_data,
const char* name = reinterpret_cast<const char*>(callback_data);
const roctracer_timestamp_t timestamp = util::timestamp_ns();
hip_api_trace_entry_t* entry = hip_api_trace_buffer.GetEntry();
entry->cid = 0;
entry->domain = domain;
entry->begin = timestamp;
entry->end = timestamp + 1;
entry->pid = GetPid();
entry->tid = GetTid();
entry->data = {};
entry->name = strdup(name);
entry->ptr = NULL;
entry->valid.store(roctracer::TRACE_ENTRY_COMPLETE, std::memory_order_release);
hip_api_trace_entry_t& entry = hip_api_trace_buffer.Emplace(
static_cast<activity_domain_t>(domain), cid, timestamp, timestamp + 1, GetPid(), GetTid(),
hip_api_data_t{}, name, nullptr);
entry.valid.store(roctracer::TRACE_ENTRY_COMPLETE, std::memory_order_release);
}
///////////////////////////////////////////////////////////////////////////////////////////////////////
// HSA API tracing
struct hip_act_trace_entry_t {
std::atomic<uint32_t> valid;
uint32_t kind;
roctracer_timestamp_t dur;
uint64_t correlation_id;
};
// Activity tracing callback
// hipMalloc id(3) correlation_id(1): begin_ns(1525888652762640464) end_ns(1525888652762877067)
void pool_activity_callback(const char* begin, const char* end, void* arg) {
@@ -62,8 +62,8 @@ int main() {
for (auto&& thread : threads) {
thread = std::thread([&trace_buffer]() {
for (std::size_t j = 0; j < num_iterations; ++j) {
auto* entry = trace_buffer.GetEntry();
entry->valid.store(roctracer::TRACE_ENTRY_COMPLETE, std::memory_order_release);
auto& entry = trace_buffer.Emplace();
entry.valid.store(roctracer::TRACE_ENTRY_COMPLETE, std::memory_order_release);
}
});
}