Remove EvtStats from the tracer_tool

The same information can be generated from the hcc_ops_trace.txt file,
so in a later commit, will add a stage to the tblextr.py script to
generate the .csv files when ROCP_STATS_OPT=1.

Change-Id: I3d1575e096bedf98c66068d9a4ca141421e5bb9d


[ROCm/roctracer commit: 836bab37d3]
Este commit está contenido en:
Laurent Morichetti
2022-05-23 23:58:06 -07:00
padre 4af9cdc409
commit c3e991dfb1
Se han modificado 6 ficheros con 295 adiciones y 220 borrados
+4 -1
Ver fichero
@@ -85,7 +85,10 @@ typedef struct activity_record_s {
activity_correlation_id_t external_id; /* external correlation id */
};
};
size_t bytes; /* data size bytes */
union {
size_t bytes; /* data size bytes */
const char* kernel_name;
};
} activity_record_t;
/* Activity sync callback type */
+16 -6
Ver fichero
@@ -408,13 +408,23 @@ void HIP_ActivityIdCallback(activity_correlation_id_t correlation_id) {
CorrelationIdRegister(correlation_id);
}
void HIP_AsyncActivityCallback(uint32_t op_id, void* record, void* arg) {
void HIP_AsyncActivityCallback(uint32_t op_id, void* record_ptr, void* arg) {
MemoryPool* pool = reinterpret_cast<MemoryPool*>(arg);
roctracer_record_t* record_ptr = reinterpret_cast<roctracer_record_t*>(record);
record_ptr->domain = ACTIVITY_DOMAIN_HIP_OPS;
record_ptr->correlation_id = CorrelationIdLookup(record_ptr->correlation_id);
if (record_ptr->correlation_id == 0) return;
pool->Write(*record_ptr);
roctracer_record_t record = *reinterpret_cast<roctracer_record_t*>(record_ptr);
record.domain = ACTIVITY_DOMAIN_HIP_OPS;
record.correlation_id = CorrelationIdLookup(record.correlation_id);
if (record.correlation_id == 0) return;
// If the record is for a kernel dispatch, write the kernel name in the pool's data,
// and make the record point to it. Older HIP runtimes do not provide a kernel
// name, so record.kernel_name might be null.
if (record.op == HIP_OP_ID_DISPATCH && record.kernel_name != nullptr)
pool->Write(record, record.kernel_name, strlen(record.kernel_name) + 1,
[](auto& record, const void* data) {
record.kernel_name = static_cast<const char*>(data);
});
else
pool->Write(record);
DEBUG_TRACE(
"HIP_AsyncActivityCallback(\"%s\"): op(%u) kind(%u) record(%p) pool(%p) correlation_id(%d) "
@@ -1,115 +0,0 @@
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#ifndef EVT_STATS_H_
#define EVT_STATS_H_
#include <stdint.h>
#include <map>
#include <set>
#include <sstream>
#include <utility>
template <class evt_id_t, class evt_weight_t> class EvtStatsT {
public:
typedef std::mutex mutex_t;
typedef uint64_t evt_count_t;
typedef double evt_avr_t;
struct evt_record_t {
uint64_t count;
evt_avr_t avr;
evt_record_t() : count(0), avr(0) {}
};
typedef typename std::map<evt_id_t, evt_record_t> map_t;
typedef typename std::map<evt_id_t, const char*> labels_t;
// Comparison function
struct cmpfun {
template <typename T> bool operator()(const T& a, const T& b) const {
return (a.second.avr != b.second.avr) ? a.second.avr < b.second.avr : a.first < b.first;
}
};
inline void add_event(evt_id_t id, evt_weight_t weight) {
std::lock_guard<mutex_t> lck(mutex_);
// printf("EvtStats %p ::add_event %u %lu\n", this, id, weight); fflush(stdout);
evt_record_t& rec = map_[id];
const evt_count_t prev_count = rec.count;
const evt_count_t new_count = prev_count + 1;
const evt_avr_t prev_avr = rec.avr;
const evt_avr_t new_avr = ((prev_avr * prev_count) + weight) / new_count;
rec.count = new_count;
rec.avr = new_avr;
}
void dump() {
std::lock_guard<mutex_t> lck(mutex_);
fprintf(stdout, "Dumping %s\n", path_);
fflush(stdout);
typedef typename std::set<std::pair<evt_id_t, evt_record_t>, cmpfun> set_t;
set_t s_(map_.begin(), map_.end());
uint64_t index = 0;
for (auto& e : s_) {
const evt_id_t id = e.first;
const char* label = get_label(id);
std::ostringstream oss;
oss << index << ",\"" << label << "\"," << e.second.count << "," << (uint64_t)(e.second.avr)
<< "," << (uint64_t)(e.second.count * e.second.avr);
fprintf(fdes_, "%s\n", oss.str().c_str());
index += 1;
}
fclose(fdes_);
}
const char* get_label(const uint32_t& id) {
auto ret = labels_.insert({id, NULL});
const char* label = ret.first->second;
return label;
}
const char* get_label(const char* id) { return id; }
const char* get_label(const std::string& id) { return id.c_str(); }
void set_label(evt_id_t id, const char* label) {
// printf("EvtStats %p ::set_label %u %s\n", this, id, label); fflush(stdout);
labels_[id] = label;
}
EvtStatsT(FILE* f, const char* path) : fdes_(f), path_(path) {
// printf("EvtStats %p ::EvtStatsT()\n", this); fflush(stdout);
fprintf(fdes_, "Index,Name,Count,Avr,Total\n");
}
private:
mutex_t mutex_;
map_t map_;
labels_t labels_;
FILE* fdes_;
const char* path_;
};
typedef EvtStatsT<uint32_t, uint64_t> EvtStats;
#endif // EVT_STATS_H_
+11 -98
Ver fichero
@@ -43,7 +43,6 @@
#include "util/xml.h"
#include "loader.h"
#include "trace_buffer.h"
#include "evt_stats.h"
// Macro to check ROC-tracer calls status
#define CHECK_ROCTRACER(call) \
@@ -110,12 +109,6 @@ std::vector<std::string> hip_api_vec;
LOADER_INSTANTIATE();
TRACE_BUFFER_INSTANTIATE();
typedef EvtStatsT<const std::string, uint32_t> EvtStatsA;
// HIP stats
EvtStats* hip_api_stats = NULL;
EvtStatsA* hip_kernel_stats = NULL;
EvtStatsA* hip_memcpy_stats = NULL;
// Global output file handle
FILE* begin_ts_file_handle = NULL;
FILE* roctx_file_handle = NULL;
@@ -299,10 +292,6 @@ struct hip_api_trace_entry_t {
void* ptr;
};
typedef std::map<uint64_t, const char*> hip_kernel_map_t;
hip_kernel_map_t* hip_kernel_map = NULL;
std::mutex hip_kernel_mutex;
static inline bool is_hip_kernel_launch_api(const uint32_t& cid) {
bool ret = (cid == HIP_API_ID_hipLaunchKernel) || (cid == HIP_API_ID_hipExtLaunchKernel) ||
(cid == HIP_API_ID_hipLaunchCooperativeKernel) ||
@@ -336,23 +325,14 @@ void hip_api_flush_cb(hip_api_trace_entry_t* entry) {
entry->name, correlation_id, begin_timestamp, end_timestamp);
if (domain == ACTIVITY_DOMAIN_HIP_API) {
if (hip_api_stats != NULL) {
hip_api_stats->add_event(cid, end_timestamp - begin_timestamp);
if (is_hip_kernel_launch_api(cid)) {
hip_kernel_mutex.lock();
(*hip_kernel_map)[correlation_id] = entry->name;
hip_kernel_mutex.unlock();
}
} else {
const char* str = hipApiString((hip_api_id_t)cid, data);
rec_ss << " " << str;
if (is_hip_kernel_launch_api(cid) && entry->name) {
const char* kernel_name = cxx_demangle(entry->name);
rec_ss << " kernel=" << kernel_name;
}
rec_ss << " :" << correlation_id;
fprintf(hip_api_file_handle, "%s\n", rec_ss.str().c_str());
const char* str = hipApiString((hip_api_id_t)cid, data);
rec_ss << " " << str;
if (is_hip_kernel_launch_api(cid) && entry->name) {
const char* kernel_name = cxx_demangle(entry->name);
rec_ss << " kernel=" << kernel_name;
}
rec_ss << " :" << correlation_id;
fprintf(hip_api_file_handle, "%s\n", rec_ss.str().c_str());
} else {
fprintf(hip_api_file_handle, "%s(name(%s))\n", oss.str().c_str(), entry->name);
}
@@ -467,36 +447,6 @@ struct hip_act_trace_entry_t {
uint64_t correlation_id;
};
// HIP ACT trace buffer flush callback
void hip_act_flush_cb(hip_act_trace_entry_t* entry) {
const uint32_t domain = ACTIVITY_DOMAIN_HIP_OPS;
const uint32_t op = 0;
const char* name = roctracer_op_string(domain, op, entry->kind);
if (name == NULL) {
printf("hip_act_flush_cb name is NULL\n");
fflush(stdout);
abort();
}
if (strncmp("Kernel", name, 6) == 0) {
hip_kernel_mutex.lock();
if (hip_kernel_stats == NULL) {
printf("hip_act_flush_cb hip_kernel_stats is NULL\n");
fflush(stdout);
abort();
}
name = (*hip_kernel_map)[entry->correlation_id];
hip_kernel_mutex.unlock();
const char* kernel_name = cxx_demangle(name);
hip_kernel_stats->add_event(kernel_name, entry->dur);
} else {
hip_memcpy_stats->add_event(name, entry->dur);
}
}
roctracer::TraceBuffer<hip_act_trace_entry_t> hip_act_trace_buffer("HIP ACT", 0x200000,
hip_act_flush_cb, 1);
// Activity tracing callback
// hipMalloc id(3) correlation_id(1): begin_ns(1525888652762640464) end_ns(1525888652762877067)
void pool_activity_callback(const char* begin, const char* end, void* arg) {
@@ -513,18 +463,10 @@ void pool_activity_callback(const char* begin, const char* end, void* arg) {
switch (record->domain) {
case ACTIVITY_DOMAIN_HIP_OPS:
if (hip_memcpy_stats != NULL) {
hip_act_trace_entry_t* entry = hip_act_trace_buffer.GetEntry();
entry->kind = record->kind;
entry->dur = record->end_ns - record->begin_ns;
entry->correlation_id = record->correlation_id;
entry->valid.store(roctracer::TRACE_ENTRY_COMPLETE, std::memory_order_release);
} else {
fprintf(hcc_activity_file_handle, "%lu:%lu %d:%lu %s:%lu:%u\n", record->begin_ns,
record->end_ns, record->device_id, record->queue_id, name, record->correlation_id,
my_pid);
fflush(hcc_activity_file_handle);
}
fprintf(hcc_activity_file_handle, "%lu:%lu %d:%lu %s:%lu:%u\n", record->begin_ns,
record->end_ns, record->device_id, record->queue_id, name, record->correlation_id,
my_pid);
fflush(hcc_activity_file_handle);
break;
case ACTIVITY_DOMAIN_HSA_OPS:
if (record->op == HSA_OP_ID_COPY) {
@@ -693,10 +635,6 @@ void tool_unload() {
close_file_handles();
if (hip_api_stats) hip_api_stats->dump();
if (hip_kernel_stats) hip_kernel_stats->dump();
if (hip_memcpy_stats) hip_memcpy_stats->dump();
ONLOAD_TRACE_END();
}
@@ -951,12 +889,6 @@ extern "C" ROCTRACER_EXPORT bool OnLoad(HsaApiTable* table, uint64_t runtime_ver
// Allocating tracing pool
open_tracing_pool();
// Check for optimized stats
const bool is_stats_opt = (getenv("ROCP_STATS_OPT") != NULL);
// HIP kernel map instantiation
if (is_stats_opt) hip_kernel_map = new hip_kernel_map_t;
// Enable tracing
if (trace_hip_api) {
hip_api_file_handle = open_output_file(output_prefix, "hip_api_trace.txt");
@@ -973,30 +905,11 @@ extern "C" ROCTRACER_EXPORT bool OnLoad(HsaApiTable* table, uint64_t runtime_ver
CHECK_ROCTRACER(
roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API, hip_api_callback, NULL));
}
if (is_stats_opt) {
const char* path = NULL;
FILE* f = open_output_file(output_prefix, "hip_api_stats.csv", &path);
hip_api_stats = new EvtStats(f, path);
for (uint32_t id = HIP_API_ID_FIRST; id <= HIP_API_ID_LAST; id += 1) {
const char* label = roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, id, 0);
hip_api_stats->set_label(id, label);
}
}
}
if (trace_hip_activity) {
hcc_activity_file_handle = open_output_file(output_prefix, "hcc_ops_trace.txt");
CHECK_ROCTRACER(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_OPS));
if (is_stats_opt) {
FILE* f = NULL;
const char* path = NULL;
f = open_output_file(output_prefix, "hip_kernel_stats.csv", &path);
hip_kernel_stats = new EvtStatsA(f, path);
f = open_output_file(output_prefix, "hip_memcpy_stats.csv", &path);
hip_memcpy_stats = new EvtStatsA(f, path);
}
}
}
+5
Ver fichero
@@ -141,3 +141,8 @@ file(GLOB files RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "golden_traces/tests_trace_
foreach(file ${files})
configure_file(${file} ${PROJECT_BINARY_DIR}/test/${file} COPYONLY)
endforeach()
add_library(hip_stats SHARED EXCLUDE_FROM_ALL hip_stats/hip_stats.cpp)
target_compile_definitions(hip_stats PRIVATE __HIP_PLATFORM_AMD__)
target_link_libraries(hip_stats roctracer)
add_dependencies(mytest hip_stats)
@@ -0,0 +1,259 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include "roctracer.h"
#include "roctracer_hip.h"
#include <cstdint>
#include <cstdlib>
#include <experimental/filesystem>
#include <iomanip>
#include <iostream>
#include <fstream>
#include <numeric>
#include <set>
#include <string>
#include <sstream>
#include <unordered_map>
#include <utility>
#define CHECK_ROCTRACER(call) \
do { \
roctracer_status_t status = call; \
if (status != ROCTRACER_STATUS_SUCCESS) { \
std::cerr << roctracer_error_string() << std::endl; \
abort(); \
} \
} while (false)
namespace {
constexpr uint64_t NextPowerOf2(uint64_t v) {
v += (v == 0);
v--;
v |= v >> 1;
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;
v |= v >> 16;
v |= v >> 32;
return ++v;
}
constexpr size_t KiB = 1024;
constexpr size_t MiB = KiB * KiB;
constexpr size_t GiB = KiB * KiB * KiB;
std::string HumanReadableSize(size_t size, int precision) {
std::stringstream ss;
if (size < KiB)
ss << size;
else if (size < MiB)
ss << std::fixed << std::setprecision(precision) << (double)size / KiB << "K";
else if (size < GiB)
ss << std::fixed << std::setprecision(precision) << (double)size / MiB << "M";
else
ss << std::fixed << std::setprecision(precision) << (double)size / GiB << "G";
return ss.str();
}
struct FunctionStats {
uint64_t total_time_ns;
uint64_t count;
void Accumulate(uint64_t time_ns) {
total_time_ns += time_ns;
++count;
}
};
struct MemCopyStats {
uint64_t total_time_ns;
uint64_t total_byte_size;
uint64_t count;
void Accumulate(uint64_t time_ns, uint64_t byte_size) {
total_time_ns += time_ns;
total_byte_size += byte_size;
++count;
}
};
struct pair_hash {
template <typename T1, typename T2> std::size_t operator()(const std::pair<T1, T2>& pair) const {
return std::hash<T1>()(pair.first) ^ std::hash<T2>()(pair.second);
}
};
std::unordered_map<decltype(roctracer_record_t::op), FunctionStats> hip_api_stats;
std::unordered_map<std::string, FunctionStats> kernel_stats;
std::unordered_map<std::pair<decltype(roctracer_record_t::kind), size_t>, MemCopyStats, pair_hash>
memcpy_stats;
void CollectStatistics(const char* begin, const char* end, void* /* user_arg */) {
const auto* record = reinterpret_cast<const roctracer_record_t*>(begin);
while (record < reinterpret_cast<const roctracer_record_t*>(end)) {
auto elapsed_time_ns = record->end_ns - record->begin_ns;
if (record->domain == ACTIVITY_DOMAIN_HIP_OPS && record->op == HIP_OP_ID_DISPATCH) {
const char* kernel_name = record->kernel_name;
if (kernel_name == nullptr) kernel_name = "Unknown kernels";
kernel_stats[kernel_name].Accumulate(elapsed_time_ns);
} else if (record->domain == ACTIVITY_DOMAIN_HIP_OPS && record->op == HIP_OP_ID_COPY)
memcpy_stats[std::make_pair(record->kind, NextPowerOf2(record->bytes))].Accumulate(
elapsed_time_ns, record->bytes);
else if (record->domain == ACTIVITY_DOMAIN_HIP_API)
hip_api_stats[record->op].Accumulate(elapsed_time_ns);
CHECK_ROCTRACER(roctracer_next_record(record, &record));
}
}
namespace fs = std::experimental::filesystem;
void DumpStatistics() {
CHECK_ROCTRACER(roctracer_close_pool());
fs::path output_dir = []() {
const char* env_var = getenv("ROCP_OUTPUT_DIR");
return env_var != nullptr ? env_var : "";
}();
std::ofstream out;
if (output_dir.empty()) {
// If an output directory was not specified, then print the statistics to stdout.
out.copyfmt(std::cout);
out.clear(std::cout.rdstate());
out.basic_ios<char>::rdbuf(std::cout.rdbuf());
} else {
if (auto status = fs::status(output_dir); !fs::exists(status) || !fs::is_directory(status)) {
std::cerr << "error: ROCP_OUTPUT_DIR=" << output_dir << " is not a directory" << std::endl;
return;
}
}
auto compare = [](const auto& x, const auto& y) {
return x.second.total_time_ns > y.second.total_time_ns;
};
// Print the HIP API statistics sorted by descending total inclusive time.
if (!hip_api_stats.empty()) {
const char* filename = "hip_api_stats.csv";
if (!output_dir.empty()) out = std::ofstream(output_dir / filename);
if (out.good()) {
std::cout << "Dumping HIP API statistics." << std::endl;
uint64_t total_hip_api_time_ns =
std::accumulate(hip_api_stats.begin(), hip_api_stats.end(), 0,
[](uint64_t total_time_ns, const auto& stats) {
return total_time_ns + stats.second.total_time_ns;
});
out << "\"Name\",\"Calls\",\"TotalDurationNs\",\"AverageNs\",\"Percentage\"" << std::endl;
for (auto&& [op, stats] : std::set<decltype(hip_api_stats)::value_type, decltype(compare)>(
hip_api_stats.begin(), hip_api_stats.end(), compare))
out << "\"" << roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, op, 0) << "\"," << stats.count
<< "," << stats.total_time_ns << "," << stats.total_time_ns / stats.count << ","
<< std::fixed << std::setprecision(4)
<< (double)stats.total_time_ns / total_hip_api_time_ns * 100 << std::endl;
} else {
std::cerr << "warning: could not open " << output_dir / filename << std::endl;
}
}
// Print the HIP kernel dispatch statistics sorted by descending execution time.
if (!kernel_stats.empty()) {
const char* filename = "hip_kernel_stats.csv";
if (!output_dir.empty()) out = std::ofstream(output_dir / filename);
if (out.good()) {
std::cout << "Dumping HIP kernel dispatch statistics." << std::endl;
uint64_t total_kernel_time_ns =
std::accumulate(kernel_stats.begin(), kernel_stats.end(), 0,
[](uint64_t total_time_ns, const auto& stats) {
return total_time_ns + stats.second.total_time_ns;
});
out << "\"Name\",\"Calls\",\"TotalDurationNs\",\"AverageNs\",\"Percentage\"" << std::endl;
for (auto&& [name, stats] : std::set<decltype(kernel_stats)::value_type, decltype(compare)>(
kernel_stats.begin(), kernel_stats.end(), compare))
out << "\"" << name << "\"," << stats.count << "," << stats.total_time_ns << ","
<< stats.total_time_ns / stats.count << "," << std::fixed << std::setprecision(4)
<< (double)stats.total_time_ns / total_kernel_time_ns * 100 << std::endl;
} else {
std::cerr << "warning: could not open " << output_dir / filename << std::endl;
}
}
// Print the HIP memory copy statistics sorted by descending transfer time.
if (!memcpy_stats.empty()) {
const char* filename = "hip_copy_stats.csv";
if (!output_dir.empty()) out = std::ofstream(output_dir / filename);
if (out.good()) {
std::cout << "Dumping HIP memory copy statistics." << std::endl;
uint64_t total_memory_copy_time_ns =
std::accumulate(memcpy_stats.begin(), memcpy_stats.end(), 0,
[](uint64_t total_time_ns, const auto& stats) {
return total_time_ns + stats.second.total_time_ns;
});
out << "\"Name\",\"Calls\",\"TotalBytes\",\"TotalDurationNs\",\"AverageNs\",\"Percentage\""
<< std::endl;
for (auto&& [kind, stats] : std::set<decltype(memcpy_stats)::value_type, decltype(compare)>(
memcpy_stats.begin(), memcpy_stats.end(), compare))
out << "\"" << roctracer_op_string(ACTIVITY_DOMAIN_HIP_OPS, HIP_OP_ID_COPY, kind.first)
<< "(" << HumanReadableSize(kind.second >> 1, 0) << "-"
<< HumanReadableSize(kind.second, 0) << ")"
<< "\"," << stats.count << "," << stats.total_byte_size << "," << stats.total_time_ns
<< "," << stats.total_time_ns / stats.count << "," << std::fixed << std::setprecision(4)
<< (double)stats.total_time_ns / total_memory_copy_time_ns * 100 << std::endl;
} else {
std::cerr << "warning: could not open " << output_dir / filename << std::endl;
}
}
}
} // namespace
#include <hsa_api_trace.h>
extern "C" ROCTRACER_EXPORT bool OnLoad(HsaApiTable* /* table */, uint64_t /* runtime_version */,
uint64_t /* failed_tool_count */,
const char* const* /* failed_tool_names */) {
roctracer_properties_t properties{};
properties.buffer_size = sizeof(roctracer_record_t) * 10000;
properties.buffer_callback_fun = CollectStatistics;
properties.buffer_callback_arg = nullptr;
CHECK_ROCTRACER(roctracer_open_pool(&properties));
CHECK_ROCTRACER(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_API));
CHECK_ROCTRACER(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HIP_OPS, HIP_OP_ID_DISPATCH));
CHECK_ROCTRACER(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HIP_OPS, HIP_OP_ID_COPY));
std::atexit([]() { DumpStatistics(); });
return true;
}
extern "C" ROCTRACER_EXPORT void OnUnload() {}