From c3e991dfb167c0c88cee25c6fdec1a3a86791712 Mon Sep 17 00:00:00 2001 From: Laurent Morichetti Date: Mon, 23 May 2022 23:58:06 -0700 Subject: [PATCH] Remove EvtStats from the tracer_tool The same information can be generated from the hcc_ops_trace.txt file, so in a later commit, will add a stage to the tblextr.py script to generate the .csv files when ROCP_STATS_OPT=1. Change-Id: I3d1575e096bedf98c66068d9a4ca141421e5bb9d [ROCm/roctracer commit: 836bab37d3fcdc12a51de419e381b1ac523199c3] --- projects/roctracer/inc/ext/prof_protocol.h | 5 +- .../roctracer/src/roctracer/roctracer.cpp | 22 +- .../roctracer/src/tracer_tool/evt_stats.h | 115 -------- .../roctracer/src/tracer_tool/tracer_tool.cpp | 109 +------- projects/roctracer/test/CMakeLists.txt | 5 + .../roctracer/test/hip_stats/hip_stats.cpp | 259 ++++++++++++++++++ 6 files changed, 295 insertions(+), 220 deletions(-) delete mode 100644 projects/roctracer/src/tracer_tool/evt_stats.h create mode 100644 projects/roctracer/test/hip_stats/hip_stats.cpp diff --git a/projects/roctracer/inc/ext/prof_protocol.h b/projects/roctracer/inc/ext/prof_protocol.h index d9aff1fb9d..6a824d551e 100644 --- a/projects/roctracer/inc/ext/prof_protocol.h +++ b/projects/roctracer/inc/ext/prof_protocol.h @@ -85,7 +85,10 @@ typedef struct activity_record_s { activity_correlation_id_t external_id; /* external correlation id */ }; }; - size_t bytes; /* data size bytes */ + union { + size_t bytes; /* data size bytes */ + const char* kernel_name; + }; } activity_record_t; /* Activity sync callback type */ diff --git a/projects/roctracer/src/roctracer/roctracer.cpp b/projects/roctracer/src/roctracer/roctracer.cpp index 5e132382fd..97f7669759 100644 --- a/projects/roctracer/src/roctracer/roctracer.cpp +++ b/projects/roctracer/src/roctracer/roctracer.cpp @@ -408,13 +408,23 @@ void HIP_ActivityIdCallback(activity_correlation_id_t correlation_id) { CorrelationIdRegister(correlation_id); } -void HIP_AsyncActivityCallback(uint32_t op_id, void* record, void* arg) { +void HIP_AsyncActivityCallback(uint32_t op_id, void* record_ptr, void* arg) { MemoryPool* pool = reinterpret_cast(arg); - roctracer_record_t* record_ptr = reinterpret_cast(record); - record_ptr->domain = ACTIVITY_DOMAIN_HIP_OPS; - record_ptr->correlation_id = CorrelationIdLookup(record_ptr->correlation_id); - if (record_ptr->correlation_id == 0) return; - pool->Write(*record_ptr); + roctracer_record_t record = *reinterpret_cast(record_ptr); + record.domain = ACTIVITY_DOMAIN_HIP_OPS; + record.correlation_id = CorrelationIdLookup(record.correlation_id); + if (record.correlation_id == 0) return; + + // If the record is for a kernel dispatch, write the kernel name in the pool's data, + // and make the record point to it. Older HIP runtimes do not provide a kernel + // name, so record.kernel_name might be null. + if (record.op == HIP_OP_ID_DISPATCH && record.kernel_name != nullptr) + pool->Write(record, record.kernel_name, strlen(record.kernel_name) + 1, + [](auto& record, const void* data) { + record.kernel_name = static_cast(data); + }); + else + pool->Write(record); DEBUG_TRACE( "HIP_AsyncActivityCallback(\"%s\"): op(%u) kind(%u) record(%p) pool(%p) correlation_id(%d) " diff --git a/projects/roctracer/src/tracer_tool/evt_stats.h b/projects/roctracer/src/tracer_tool/evt_stats.h deleted file mode 100644 index 4731cd1bc4..0000000000 --- a/projects/roctracer/src/tracer_tool/evt_stats.h +++ /dev/null @@ -1,115 +0,0 @@ -/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. */ - -#ifndef EVT_STATS_H_ -#define EVT_STATS_H_ - -#include - -#include -#include -#include -#include - -template class EvtStatsT { - public: - typedef std::mutex mutex_t; - typedef uint64_t evt_count_t; - typedef double evt_avr_t; - struct evt_record_t { - uint64_t count; - evt_avr_t avr; - evt_record_t() : count(0), avr(0) {} - }; - typedef typename std::map map_t; - typedef typename std::map labels_t; - - // Comparison function - struct cmpfun { - template bool operator()(const T& a, const T& b) const { - return (a.second.avr != b.second.avr) ? a.second.avr < b.second.avr : a.first < b.first; - } - }; - - inline void add_event(evt_id_t id, evt_weight_t weight) { - std::lock_guard lck(mutex_); - // printf("EvtStats %p ::add_event %u %lu\n", this, id, weight); fflush(stdout); - - evt_record_t& rec = map_[id]; - const evt_count_t prev_count = rec.count; - const evt_count_t new_count = prev_count + 1; - const evt_avr_t prev_avr = rec.avr; - const evt_avr_t new_avr = ((prev_avr * prev_count) + weight) / new_count; - - rec.count = new_count; - rec.avr = new_avr; - } - - void dump() { - std::lock_guard lck(mutex_); - fprintf(stdout, "Dumping %s\n", path_); - fflush(stdout); - - typedef typename std::set, cmpfun> set_t; - set_t s_(map_.begin(), map_.end()); - - uint64_t index = 0; - for (auto& e : s_) { - const evt_id_t id = e.first; - const char* label = get_label(id); - std::ostringstream oss; - oss << index << ",\"" << label << "\"," << e.second.count << "," << (uint64_t)(e.second.avr) - << "," << (uint64_t)(e.second.count * e.second.avr); - fprintf(fdes_, "%s\n", oss.str().c_str()); - index += 1; - } - - fclose(fdes_); - } - - const char* get_label(const uint32_t& id) { - auto ret = labels_.insert({id, NULL}); - const char* label = ret.first->second; - return label; - } - const char* get_label(const char* id) { return id; } - const char* get_label(const std::string& id) { return id.c_str(); } - - void set_label(evt_id_t id, const char* label) { - // printf("EvtStats %p ::set_label %u %s\n", this, id, label); fflush(stdout); - labels_[id] = label; - } - - EvtStatsT(FILE* f, const char* path) : fdes_(f), path_(path) { - // printf("EvtStats %p ::EvtStatsT()\n", this); fflush(stdout); - fprintf(fdes_, "Index,Name,Count,Avr,Total\n"); - } - - private: - mutex_t mutex_; - map_t map_; - labels_t labels_; - FILE* fdes_; - const char* path_; -}; - -typedef EvtStatsT EvtStats; - -#endif // EVT_STATS_H_ diff --git a/projects/roctracer/src/tracer_tool/tracer_tool.cpp b/projects/roctracer/src/tracer_tool/tracer_tool.cpp index 72745212a7..9f1712f662 100644 --- a/projects/roctracer/src/tracer_tool/tracer_tool.cpp +++ b/projects/roctracer/src/tracer_tool/tracer_tool.cpp @@ -43,7 +43,6 @@ #include "util/xml.h" #include "loader.h" #include "trace_buffer.h" -#include "evt_stats.h" // Macro to check ROC-tracer calls status #define CHECK_ROCTRACER(call) \ @@ -110,12 +109,6 @@ std::vector hip_api_vec; LOADER_INSTANTIATE(); TRACE_BUFFER_INSTANTIATE(); -typedef EvtStatsT EvtStatsA; -// HIP stats -EvtStats* hip_api_stats = NULL; -EvtStatsA* hip_kernel_stats = NULL; -EvtStatsA* hip_memcpy_stats = NULL; - // Global output file handle FILE* begin_ts_file_handle = NULL; FILE* roctx_file_handle = NULL; @@ -299,10 +292,6 @@ struct hip_api_trace_entry_t { void* ptr; }; -typedef std::map hip_kernel_map_t; -hip_kernel_map_t* hip_kernel_map = NULL; -std::mutex hip_kernel_mutex; - static inline bool is_hip_kernel_launch_api(const uint32_t& cid) { bool ret = (cid == HIP_API_ID_hipLaunchKernel) || (cid == HIP_API_ID_hipExtLaunchKernel) || (cid == HIP_API_ID_hipLaunchCooperativeKernel) || @@ -336,23 +325,14 @@ void hip_api_flush_cb(hip_api_trace_entry_t* entry) { entry->name, correlation_id, begin_timestamp, end_timestamp); if (domain == ACTIVITY_DOMAIN_HIP_API) { - if (hip_api_stats != NULL) { - hip_api_stats->add_event(cid, end_timestamp - begin_timestamp); - if (is_hip_kernel_launch_api(cid)) { - hip_kernel_mutex.lock(); - (*hip_kernel_map)[correlation_id] = entry->name; - hip_kernel_mutex.unlock(); - } - } else { - const char* str = hipApiString((hip_api_id_t)cid, data); - rec_ss << " " << str; - if (is_hip_kernel_launch_api(cid) && entry->name) { - const char* kernel_name = cxx_demangle(entry->name); - rec_ss << " kernel=" << kernel_name; - } - rec_ss << " :" << correlation_id; - fprintf(hip_api_file_handle, "%s\n", rec_ss.str().c_str()); + const char* str = hipApiString((hip_api_id_t)cid, data); + rec_ss << " " << str; + if (is_hip_kernel_launch_api(cid) && entry->name) { + const char* kernel_name = cxx_demangle(entry->name); + rec_ss << " kernel=" << kernel_name; } + rec_ss << " :" << correlation_id; + fprintf(hip_api_file_handle, "%s\n", rec_ss.str().c_str()); } else { fprintf(hip_api_file_handle, "%s(name(%s))\n", oss.str().c_str(), entry->name); } @@ -467,36 +447,6 @@ struct hip_act_trace_entry_t { uint64_t correlation_id; }; -// HIP ACT trace buffer flush callback -void hip_act_flush_cb(hip_act_trace_entry_t* entry) { - const uint32_t domain = ACTIVITY_DOMAIN_HIP_OPS; - const uint32_t op = 0; - const char* name = roctracer_op_string(domain, op, entry->kind); - if (name == NULL) { - printf("hip_act_flush_cb name is NULL\n"); - fflush(stdout); - abort(); - } - - if (strncmp("Kernel", name, 6) == 0) { - hip_kernel_mutex.lock(); - if (hip_kernel_stats == NULL) { - printf("hip_act_flush_cb hip_kernel_stats is NULL\n"); - fflush(stdout); - abort(); - } - name = (*hip_kernel_map)[entry->correlation_id]; - hip_kernel_mutex.unlock(); - const char* kernel_name = cxx_demangle(name); - hip_kernel_stats->add_event(kernel_name, entry->dur); - } else { - hip_memcpy_stats->add_event(name, entry->dur); - } -} - -roctracer::TraceBuffer hip_act_trace_buffer("HIP ACT", 0x200000, - hip_act_flush_cb, 1); - // Activity tracing callback // hipMalloc id(3) correlation_id(1): begin_ns(1525888652762640464) end_ns(1525888652762877067) void pool_activity_callback(const char* begin, const char* end, void* arg) { @@ -513,18 +463,10 @@ void pool_activity_callback(const char* begin, const char* end, void* arg) { switch (record->domain) { case ACTIVITY_DOMAIN_HIP_OPS: - if (hip_memcpy_stats != NULL) { - hip_act_trace_entry_t* entry = hip_act_trace_buffer.GetEntry(); - entry->kind = record->kind; - entry->dur = record->end_ns - record->begin_ns; - entry->correlation_id = record->correlation_id; - entry->valid.store(roctracer::TRACE_ENTRY_COMPLETE, std::memory_order_release); - } else { - fprintf(hcc_activity_file_handle, "%lu:%lu %d:%lu %s:%lu:%u\n", record->begin_ns, - record->end_ns, record->device_id, record->queue_id, name, record->correlation_id, - my_pid); - fflush(hcc_activity_file_handle); - } + fprintf(hcc_activity_file_handle, "%lu:%lu %d:%lu %s:%lu:%u\n", record->begin_ns, + record->end_ns, record->device_id, record->queue_id, name, record->correlation_id, + my_pid); + fflush(hcc_activity_file_handle); break; case ACTIVITY_DOMAIN_HSA_OPS: if (record->op == HSA_OP_ID_COPY) { @@ -693,10 +635,6 @@ void tool_unload() { close_file_handles(); - if (hip_api_stats) hip_api_stats->dump(); - if (hip_kernel_stats) hip_kernel_stats->dump(); - if (hip_memcpy_stats) hip_memcpy_stats->dump(); - ONLOAD_TRACE_END(); } @@ -951,12 +889,6 @@ extern "C" ROCTRACER_EXPORT bool OnLoad(HsaApiTable* table, uint64_t runtime_ver // Allocating tracing pool open_tracing_pool(); - // Check for optimized stats - const bool is_stats_opt = (getenv("ROCP_STATS_OPT") != NULL); - - // HIP kernel map instantiation - if (is_stats_opt) hip_kernel_map = new hip_kernel_map_t; - // Enable tracing if (trace_hip_api) { hip_api_file_handle = open_output_file(output_prefix, "hip_api_trace.txt"); @@ -973,30 +905,11 @@ extern "C" ROCTRACER_EXPORT bool OnLoad(HsaApiTable* table, uint64_t runtime_ver CHECK_ROCTRACER( roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API, hip_api_callback, NULL)); } - - if (is_stats_opt) { - const char* path = NULL; - FILE* f = open_output_file(output_prefix, "hip_api_stats.csv", &path); - hip_api_stats = new EvtStats(f, path); - for (uint32_t id = HIP_API_ID_FIRST; id <= HIP_API_ID_LAST; id += 1) { - const char* label = roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, id, 0); - hip_api_stats->set_label(id, label); - } - } } if (trace_hip_activity) { hcc_activity_file_handle = open_output_file(output_prefix, "hcc_ops_trace.txt"); CHECK_ROCTRACER(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_OPS)); - - if (is_stats_opt) { - FILE* f = NULL; - const char* path = NULL; - f = open_output_file(output_prefix, "hip_kernel_stats.csv", &path); - hip_kernel_stats = new EvtStatsA(f, path); - f = open_output_file(output_prefix, "hip_memcpy_stats.csv", &path); - hip_memcpy_stats = new EvtStatsA(f, path); - } } } diff --git a/projects/roctracer/test/CMakeLists.txt b/projects/roctracer/test/CMakeLists.txt index 8074ddd0f0..4665117fdc 100644 --- a/projects/roctracer/test/CMakeLists.txt +++ b/projects/roctracer/test/CMakeLists.txt @@ -141,3 +141,8 @@ file(GLOB files RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "golden_traces/tests_trace_ foreach(file ${files}) configure_file(${file} ${PROJECT_BINARY_DIR}/test/${file} COPYONLY) endforeach() + +add_library(hip_stats SHARED EXCLUDE_FROM_ALL hip_stats/hip_stats.cpp) +target_compile_definitions(hip_stats PRIVATE __HIP_PLATFORM_AMD__) +target_link_libraries(hip_stats roctracer) +add_dependencies(mytest hip_stats) diff --git a/projects/roctracer/test/hip_stats/hip_stats.cpp b/projects/roctracer/test/hip_stats/hip_stats.cpp new file mode 100644 index 0000000000..4cfc140bd4 --- /dev/null +++ b/projects/roctracer/test/hip_stats/hip_stats.cpp @@ -0,0 +1,259 @@ +/* Copyright (c) 2022 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include "roctracer.h" +#include "roctracer_hip.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define CHECK_ROCTRACER(call) \ + do { \ + roctracer_status_t status = call; \ + if (status != ROCTRACER_STATUS_SUCCESS) { \ + std::cerr << roctracer_error_string() << std::endl; \ + abort(); \ + } \ + } while (false) + +namespace { + +constexpr uint64_t NextPowerOf2(uint64_t v) { + v += (v == 0); + v--; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v |= v >> 32; + return ++v; +} + +constexpr size_t KiB = 1024; +constexpr size_t MiB = KiB * KiB; +constexpr size_t GiB = KiB * KiB * KiB; + +std::string HumanReadableSize(size_t size, int precision) { + std::stringstream ss; + if (size < KiB) + ss << size; + else if (size < MiB) + ss << std::fixed << std::setprecision(precision) << (double)size / KiB << "K"; + else if (size < GiB) + ss << std::fixed << std::setprecision(precision) << (double)size / MiB << "M"; + else + ss << std::fixed << std::setprecision(precision) << (double)size / GiB << "G"; + return ss.str(); +} + +struct FunctionStats { + uint64_t total_time_ns; + uint64_t count; + void Accumulate(uint64_t time_ns) { + total_time_ns += time_ns; + ++count; + } +}; + +struct MemCopyStats { + uint64_t total_time_ns; + uint64_t total_byte_size; + uint64_t count; + void Accumulate(uint64_t time_ns, uint64_t byte_size) { + total_time_ns += time_ns; + total_byte_size += byte_size; + ++count; + } +}; + +struct pair_hash { + template std::size_t operator()(const std::pair& pair) const { + return std::hash()(pair.first) ^ std::hash()(pair.second); + } +}; + +std::unordered_map hip_api_stats; +std::unordered_map kernel_stats; +std::unordered_map, MemCopyStats, pair_hash> + memcpy_stats; + +void CollectStatistics(const char* begin, const char* end, void* /* user_arg */) { + const auto* record = reinterpret_cast(begin); + while (record < reinterpret_cast(end)) { + auto elapsed_time_ns = record->end_ns - record->begin_ns; + + if (record->domain == ACTIVITY_DOMAIN_HIP_OPS && record->op == HIP_OP_ID_DISPATCH) { + const char* kernel_name = record->kernel_name; + if (kernel_name == nullptr) kernel_name = "Unknown kernels"; + kernel_stats[kernel_name].Accumulate(elapsed_time_ns); + } else if (record->domain == ACTIVITY_DOMAIN_HIP_OPS && record->op == HIP_OP_ID_COPY) + memcpy_stats[std::make_pair(record->kind, NextPowerOf2(record->bytes))].Accumulate( + elapsed_time_ns, record->bytes); + else if (record->domain == ACTIVITY_DOMAIN_HIP_API) + hip_api_stats[record->op].Accumulate(elapsed_time_ns); + + CHECK_ROCTRACER(roctracer_next_record(record, &record)); + } +} + +namespace fs = std::experimental::filesystem; + +void DumpStatistics() { + CHECK_ROCTRACER(roctracer_close_pool()); + + fs::path output_dir = []() { + const char* env_var = getenv("ROCP_OUTPUT_DIR"); + return env_var != nullptr ? env_var : ""; + }(); + + std::ofstream out; + + if (output_dir.empty()) { + // If an output directory was not specified, then print the statistics to stdout. + out.copyfmt(std::cout); + out.clear(std::cout.rdstate()); + out.basic_ios::rdbuf(std::cout.rdbuf()); + } else { + if (auto status = fs::status(output_dir); !fs::exists(status) || !fs::is_directory(status)) { + std::cerr << "error: ROCP_OUTPUT_DIR=" << output_dir << " is not a directory" << std::endl; + return; + } + } + + auto compare = [](const auto& x, const auto& y) { + return x.second.total_time_ns > y.second.total_time_ns; + }; + + // Print the HIP API statistics sorted by descending total inclusive time. + if (!hip_api_stats.empty()) { + const char* filename = "hip_api_stats.csv"; + if (!output_dir.empty()) out = std::ofstream(output_dir / filename); + + if (out.good()) { + std::cout << "Dumping HIP API statistics." << std::endl; + + uint64_t total_hip_api_time_ns = + std::accumulate(hip_api_stats.begin(), hip_api_stats.end(), 0, + [](uint64_t total_time_ns, const auto& stats) { + return total_time_ns + stats.second.total_time_ns; + }); + + out << "\"Name\",\"Calls\",\"TotalDurationNs\",\"AverageNs\",\"Percentage\"" << std::endl; + for (auto&& [op, stats] : std::set( + hip_api_stats.begin(), hip_api_stats.end(), compare)) + out << "\"" << roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, op, 0) << "\"," << stats.count + << "," << stats.total_time_ns << "," << stats.total_time_ns / stats.count << "," + << std::fixed << std::setprecision(4) + << (double)stats.total_time_ns / total_hip_api_time_ns * 100 << std::endl; + } else { + std::cerr << "warning: could not open " << output_dir / filename << std::endl; + } + } + + // Print the HIP kernel dispatch statistics sorted by descending execution time. + + if (!kernel_stats.empty()) { + const char* filename = "hip_kernel_stats.csv"; + if (!output_dir.empty()) out = std::ofstream(output_dir / filename); + + if (out.good()) { + std::cout << "Dumping HIP kernel dispatch statistics." << std::endl; + + uint64_t total_kernel_time_ns = + std::accumulate(kernel_stats.begin(), kernel_stats.end(), 0, + [](uint64_t total_time_ns, const auto& stats) { + return total_time_ns + stats.second.total_time_ns; + }); + + out << "\"Name\",\"Calls\",\"TotalDurationNs\",\"AverageNs\",\"Percentage\"" << std::endl; + for (auto&& [name, stats] : std::set( + kernel_stats.begin(), kernel_stats.end(), compare)) + out << "\"" << name << "\"," << stats.count << "," << stats.total_time_ns << "," + << stats.total_time_ns / stats.count << "," << std::fixed << std::setprecision(4) + << (double)stats.total_time_ns / total_kernel_time_ns * 100 << std::endl; + } else { + std::cerr << "warning: could not open " << output_dir / filename << std::endl; + } + } + + // Print the HIP memory copy statistics sorted by descending transfer time. + + if (!memcpy_stats.empty()) { + const char* filename = "hip_copy_stats.csv"; + if (!output_dir.empty()) out = std::ofstream(output_dir / filename); + + if (out.good()) { + std::cout << "Dumping HIP memory copy statistics." << std::endl; + + uint64_t total_memory_copy_time_ns = + std::accumulate(memcpy_stats.begin(), memcpy_stats.end(), 0, + [](uint64_t total_time_ns, const auto& stats) { + return total_time_ns + stats.second.total_time_ns; + }); + + out << "\"Name\",\"Calls\",\"TotalBytes\",\"TotalDurationNs\",\"AverageNs\",\"Percentage\"" + << std::endl; + for (auto&& [kind, stats] : std::set( + memcpy_stats.begin(), memcpy_stats.end(), compare)) + out << "\"" << roctracer_op_string(ACTIVITY_DOMAIN_HIP_OPS, HIP_OP_ID_COPY, kind.first) + << "(" << HumanReadableSize(kind.second >> 1, 0) << "-" + << HumanReadableSize(kind.second, 0) << ")" + << "\"," << stats.count << "," << stats.total_byte_size << "," << stats.total_time_ns + << "," << stats.total_time_ns / stats.count << "," << std::fixed << std::setprecision(4) + << (double)stats.total_time_ns / total_memory_copy_time_ns * 100 << std::endl; + } else { + std::cerr << "warning: could not open " << output_dir / filename << std::endl; + } + } +} + +} // namespace + +#include + +extern "C" ROCTRACER_EXPORT bool OnLoad(HsaApiTable* /* table */, uint64_t /* runtime_version */, + uint64_t /* failed_tool_count */, + const char* const* /* failed_tool_names */) { + roctracer_properties_t properties{}; + properties.buffer_size = sizeof(roctracer_record_t) * 10000; + properties.buffer_callback_fun = CollectStatistics; + properties.buffer_callback_arg = nullptr; + + CHECK_ROCTRACER(roctracer_open_pool(&properties)); + CHECK_ROCTRACER(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_API)); + CHECK_ROCTRACER(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HIP_OPS, HIP_OP_ID_DISPATCH)); + CHECK_ROCTRACER(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HIP_OPS, HIP_OP_ID_COPY)); + + std::atexit([]() { DumpStatistics(); }); + return true; +} + +extern "C" ROCTRACER_EXPORT void OnUnload() {}