Remove EvtStats from the tracer_tool
The same information can be generated from the hcc_ops_trace.txt file, so in a later commit, will add a stage to the tblextr.py script to generate the .csv files when ROCP_STATS_OPT=1. Change-Id: I3d1575e096bedf98c66068d9a4ca141421e5bb9d
Este cometimento está contido em:
@@ -85,7 +85,10 @@ typedef struct activity_record_s {
|
||||
activity_correlation_id_t external_id; /* external correlation id */
|
||||
};
|
||||
};
|
||||
size_t bytes; /* data size bytes */
|
||||
union {
|
||||
size_t bytes; /* data size bytes */
|
||||
const char* kernel_name;
|
||||
};
|
||||
} activity_record_t;
|
||||
|
||||
/* Activity sync callback type */
|
||||
|
||||
@@ -408,13 +408,23 @@ void HIP_ActivityIdCallback(activity_correlation_id_t correlation_id) {
|
||||
CorrelationIdRegister(correlation_id);
|
||||
}
|
||||
|
||||
void HIP_AsyncActivityCallback(uint32_t op_id, void* record, void* arg) {
|
||||
void HIP_AsyncActivityCallback(uint32_t op_id, void* record_ptr, void* arg) {
|
||||
MemoryPool* pool = reinterpret_cast<MemoryPool*>(arg);
|
||||
roctracer_record_t* record_ptr = reinterpret_cast<roctracer_record_t*>(record);
|
||||
record_ptr->domain = ACTIVITY_DOMAIN_HIP_OPS;
|
||||
record_ptr->correlation_id = CorrelationIdLookup(record_ptr->correlation_id);
|
||||
if (record_ptr->correlation_id == 0) return;
|
||||
pool->Write(*record_ptr);
|
||||
roctracer_record_t record = *reinterpret_cast<roctracer_record_t*>(record_ptr);
|
||||
record.domain = ACTIVITY_DOMAIN_HIP_OPS;
|
||||
record.correlation_id = CorrelationIdLookup(record.correlation_id);
|
||||
if (record.correlation_id == 0) return;
|
||||
|
||||
// If the record is for a kernel dispatch, write the kernel name in the pool's data,
|
||||
// and make the record point to it. Older HIP runtimes do not provide a kernel
|
||||
// name, so record.kernel_name might be null.
|
||||
if (record.op == HIP_OP_ID_DISPATCH && record.kernel_name != nullptr)
|
||||
pool->Write(record, record.kernel_name, strlen(record.kernel_name) + 1,
|
||||
[](auto& record, const void* data) {
|
||||
record.kernel_name = static_cast<const char*>(data);
|
||||
});
|
||||
else
|
||||
pool->Write(record);
|
||||
|
||||
DEBUG_TRACE(
|
||||
"HIP_AsyncActivityCallback(\"%s\"): op(%u) kind(%u) record(%p) pool(%p) correlation_id(%d) "
|
||||
|
||||
@@ -1,115 +0,0 @@
|
||||
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef EVT_STATS_H_
|
||||
#define EVT_STATS_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <sstream>
|
||||
#include <utility>
|
||||
|
||||
template <class evt_id_t, class evt_weight_t> class EvtStatsT {
|
||||
public:
|
||||
typedef std::mutex mutex_t;
|
||||
typedef uint64_t evt_count_t;
|
||||
typedef double evt_avr_t;
|
||||
struct evt_record_t {
|
||||
uint64_t count;
|
||||
evt_avr_t avr;
|
||||
evt_record_t() : count(0), avr(0) {}
|
||||
};
|
||||
typedef typename std::map<evt_id_t, evt_record_t> map_t;
|
||||
typedef typename std::map<evt_id_t, const char*> labels_t;
|
||||
|
||||
// Comparison function
|
||||
struct cmpfun {
|
||||
template <typename T> bool operator()(const T& a, const T& b) const {
|
||||
return (a.second.avr != b.second.avr) ? a.second.avr < b.second.avr : a.first < b.first;
|
||||
}
|
||||
};
|
||||
|
||||
inline void add_event(evt_id_t id, evt_weight_t weight) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
// printf("EvtStats %p ::add_event %u %lu\n", this, id, weight); fflush(stdout);
|
||||
|
||||
evt_record_t& rec = map_[id];
|
||||
const evt_count_t prev_count = rec.count;
|
||||
const evt_count_t new_count = prev_count + 1;
|
||||
const evt_avr_t prev_avr = rec.avr;
|
||||
const evt_avr_t new_avr = ((prev_avr * prev_count) + weight) / new_count;
|
||||
|
||||
rec.count = new_count;
|
||||
rec.avr = new_avr;
|
||||
}
|
||||
|
||||
void dump() {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
fprintf(stdout, "Dumping %s\n", path_);
|
||||
fflush(stdout);
|
||||
|
||||
typedef typename std::set<std::pair<evt_id_t, evt_record_t>, cmpfun> set_t;
|
||||
set_t s_(map_.begin(), map_.end());
|
||||
|
||||
uint64_t index = 0;
|
||||
for (auto& e : s_) {
|
||||
const evt_id_t id = e.first;
|
||||
const char* label = get_label(id);
|
||||
std::ostringstream oss;
|
||||
oss << index << ",\"" << label << "\"," << e.second.count << "," << (uint64_t)(e.second.avr)
|
||||
<< "," << (uint64_t)(e.second.count * e.second.avr);
|
||||
fprintf(fdes_, "%s\n", oss.str().c_str());
|
||||
index += 1;
|
||||
}
|
||||
|
||||
fclose(fdes_);
|
||||
}
|
||||
|
||||
const char* get_label(const uint32_t& id) {
|
||||
auto ret = labels_.insert({id, NULL});
|
||||
const char* label = ret.first->second;
|
||||
return label;
|
||||
}
|
||||
const char* get_label(const char* id) { return id; }
|
||||
const char* get_label(const std::string& id) { return id.c_str(); }
|
||||
|
||||
void set_label(evt_id_t id, const char* label) {
|
||||
// printf("EvtStats %p ::set_label %u %s\n", this, id, label); fflush(stdout);
|
||||
labels_[id] = label;
|
||||
}
|
||||
|
||||
EvtStatsT(FILE* f, const char* path) : fdes_(f), path_(path) {
|
||||
// printf("EvtStats %p ::EvtStatsT()\n", this); fflush(stdout);
|
||||
fprintf(fdes_, "Index,Name,Count,Avr,Total\n");
|
||||
}
|
||||
|
||||
private:
|
||||
mutex_t mutex_;
|
||||
map_t map_;
|
||||
labels_t labels_;
|
||||
FILE* fdes_;
|
||||
const char* path_;
|
||||
};
|
||||
|
||||
typedef EvtStatsT<uint32_t, uint64_t> EvtStats;
|
||||
|
||||
#endif // EVT_STATS_H_
|
||||
@@ -43,7 +43,6 @@
|
||||
#include "util/xml.h"
|
||||
#include "loader.h"
|
||||
#include "trace_buffer.h"
|
||||
#include "evt_stats.h"
|
||||
|
||||
// Macro to check ROC-tracer calls status
|
||||
#define CHECK_ROCTRACER(call) \
|
||||
@@ -110,12 +109,6 @@ std::vector<std::string> hip_api_vec;
|
||||
LOADER_INSTANTIATE();
|
||||
TRACE_BUFFER_INSTANTIATE();
|
||||
|
||||
typedef EvtStatsT<const std::string, uint32_t> EvtStatsA;
|
||||
// HIP stats
|
||||
EvtStats* hip_api_stats = NULL;
|
||||
EvtStatsA* hip_kernel_stats = NULL;
|
||||
EvtStatsA* hip_memcpy_stats = NULL;
|
||||
|
||||
// Global output file handle
|
||||
FILE* begin_ts_file_handle = NULL;
|
||||
FILE* roctx_file_handle = NULL;
|
||||
@@ -299,10 +292,6 @@ struct hip_api_trace_entry_t {
|
||||
void* ptr;
|
||||
};
|
||||
|
||||
typedef std::map<uint64_t, const char*> hip_kernel_map_t;
|
||||
hip_kernel_map_t* hip_kernel_map = NULL;
|
||||
std::mutex hip_kernel_mutex;
|
||||
|
||||
static inline bool is_hip_kernel_launch_api(const uint32_t& cid) {
|
||||
bool ret = (cid == HIP_API_ID_hipLaunchKernel) || (cid == HIP_API_ID_hipExtLaunchKernel) ||
|
||||
(cid == HIP_API_ID_hipLaunchCooperativeKernel) ||
|
||||
@@ -336,23 +325,14 @@ void hip_api_flush_cb(hip_api_trace_entry_t* entry) {
|
||||
entry->name, correlation_id, begin_timestamp, end_timestamp);
|
||||
|
||||
if (domain == ACTIVITY_DOMAIN_HIP_API) {
|
||||
if (hip_api_stats != NULL) {
|
||||
hip_api_stats->add_event(cid, end_timestamp - begin_timestamp);
|
||||
if (is_hip_kernel_launch_api(cid)) {
|
||||
hip_kernel_mutex.lock();
|
||||
(*hip_kernel_map)[correlation_id] = entry->name;
|
||||
hip_kernel_mutex.unlock();
|
||||
}
|
||||
} else {
|
||||
const char* str = hipApiString((hip_api_id_t)cid, data);
|
||||
rec_ss << " " << str;
|
||||
if (is_hip_kernel_launch_api(cid) && entry->name) {
|
||||
const char* kernel_name = cxx_demangle(entry->name);
|
||||
rec_ss << " kernel=" << kernel_name;
|
||||
}
|
||||
rec_ss << " :" << correlation_id;
|
||||
fprintf(hip_api_file_handle, "%s\n", rec_ss.str().c_str());
|
||||
const char* str = hipApiString((hip_api_id_t)cid, data);
|
||||
rec_ss << " " << str;
|
||||
if (is_hip_kernel_launch_api(cid) && entry->name) {
|
||||
const char* kernel_name = cxx_demangle(entry->name);
|
||||
rec_ss << " kernel=" << kernel_name;
|
||||
}
|
||||
rec_ss << " :" << correlation_id;
|
||||
fprintf(hip_api_file_handle, "%s\n", rec_ss.str().c_str());
|
||||
} else {
|
||||
fprintf(hip_api_file_handle, "%s(name(%s))\n", oss.str().c_str(), entry->name);
|
||||
}
|
||||
@@ -467,36 +447,6 @@ struct hip_act_trace_entry_t {
|
||||
uint64_t correlation_id;
|
||||
};
|
||||
|
||||
// HIP ACT trace buffer flush callback
|
||||
void hip_act_flush_cb(hip_act_trace_entry_t* entry) {
|
||||
const uint32_t domain = ACTIVITY_DOMAIN_HIP_OPS;
|
||||
const uint32_t op = 0;
|
||||
const char* name = roctracer_op_string(domain, op, entry->kind);
|
||||
if (name == NULL) {
|
||||
printf("hip_act_flush_cb name is NULL\n");
|
||||
fflush(stdout);
|
||||
abort();
|
||||
}
|
||||
|
||||
if (strncmp("Kernel", name, 6) == 0) {
|
||||
hip_kernel_mutex.lock();
|
||||
if (hip_kernel_stats == NULL) {
|
||||
printf("hip_act_flush_cb hip_kernel_stats is NULL\n");
|
||||
fflush(stdout);
|
||||
abort();
|
||||
}
|
||||
name = (*hip_kernel_map)[entry->correlation_id];
|
||||
hip_kernel_mutex.unlock();
|
||||
const char* kernel_name = cxx_demangle(name);
|
||||
hip_kernel_stats->add_event(kernel_name, entry->dur);
|
||||
} else {
|
||||
hip_memcpy_stats->add_event(name, entry->dur);
|
||||
}
|
||||
}
|
||||
|
||||
roctracer::TraceBuffer<hip_act_trace_entry_t> hip_act_trace_buffer("HIP ACT", 0x200000,
|
||||
hip_act_flush_cb, 1);
|
||||
|
||||
// Activity tracing callback
|
||||
// hipMalloc id(3) correlation_id(1): begin_ns(1525888652762640464) end_ns(1525888652762877067)
|
||||
void pool_activity_callback(const char* begin, const char* end, void* arg) {
|
||||
@@ -513,18 +463,10 @@ void pool_activity_callback(const char* begin, const char* end, void* arg) {
|
||||
|
||||
switch (record->domain) {
|
||||
case ACTIVITY_DOMAIN_HIP_OPS:
|
||||
if (hip_memcpy_stats != NULL) {
|
||||
hip_act_trace_entry_t* entry = hip_act_trace_buffer.GetEntry();
|
||||
entry->kind = record->kind;
|
||||
entry->dur = record->end_ns - record->begin_ns;
|
||||
entry->correlation_id = record->correlation_id;
|
||||
entry->valid.store(roctracer::TRACE_ENTRY_COMPLETE, std::memory_order_release);
|
||||
} else {
|
||||
fprintf(hcc_activity_file_handle, "%lu:%lu %d:%lu %s:%lu:%u\n", record->begin_ns,
|
||||
record->end_ns, record->device_id, record->queue_id, name, record->correlation_id,
|
||||
my_pid);
|
||||
fflush(hcc_activity_file_handle);
|
||||
}
|
||||
fprintf(hcc_activity_file_handle, "%lu:%lu %d:%lu %s:%lu:%u\n", record->begin_ns,
|
||||
record->end_ns, record->device_id, record->queue_id, name, record->correlation_id,
|
||||
my_pid);
|
||||
fflush(hcc_activity_file_handle);
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_OPS:
|
||||
if (record->op == HSA_OP_ID_COPY) {
|
||||
@@ -693,10 +635,6 @@ void tool_unload() {
|
||||
|
||||
close_file_handles();
|
||||
|
||||
if (hip_api_stats) hip_api_stats->dump();
|
||||
if (hip_kernel_stats) hip_kernel_stats->dump();
|
||||
if (hip_memcpy_stats) hip_memcpy_stats->dump();
|
||||
|
||||
ONLOAD_TRACE_END();
|
||||
}
|
||||
|
||||
@@ -951,12 +889,6 @@ extern "C" ROCTRACER_EXPORT bool OnLoad(HsaApiTable* table, uint64_t runtime_ver
|
||||
// Allocating tracing pool
|
||||
open_tracing_pool();
|
||||
|
||||
// Check for optimized stats
|
||||
const bool is_stats_opt = (getenv("ROCP_STATS_OPT") != NULL);
|
||||
|
||||
// HIP kernel map instantiation
|
||||
if (is_stats_opt) hip_kernel_map = new hip_kernel_map_t;
|
||||
|
||||
// Enable tracing
|
||||
if (trace_hip_api) {
|
||||
hip_api_file_handle = open_output_file(output_prefix, "hip_api_trace.txt");
|
||||
@@ -973,30 +905,11 @@ extern "C" ROCTRACER_EXPORT bool OnLoad(HsaApiTable* table, uint64_t runtime_ver
|
||||
CHECK_ROCTRACER(
|
||||
roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API, hip_api_callback, NULL));
|
||||
}
|
||||
|
||||
if (is_stats_opt) {
|
||||
const char* path = NULL;
|
||||
FILE* f = open_output_file(output_prefix, "hip_api_stats.csv", &path);
|
||||
hip_api_stats = new EvtStats(f, path);
|
||||
for (uint32_t id = HIP_API_ID_FIRST; id <= HIP_API_ID_LAST; id += 1) {
|
||||
const char* label = roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, id, 0);
|
||||
hip_api_stats->set_label(id, label);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (trace_hip_activity) {
|
||||
hcc_activity_file_handle = open_output_file(output_prefix, "hcc_ops_trace.txt");
|
||||
CHECK_ROCTRACER(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_OPS));
|
||||
|
||||
if (is_stats_opt) {
|
||||
FILE* f = NULL;
|
||||
const char* path = NULL;
|
||||
f = open_output_file(output_prefix, "hip_kernel_stats.csv", &path);
|
||||
hip_kernel_stats = new EvtStatsA(f, path);
|
||||
f = open_output_file(output_prefix, "hip_memcpy_stats.csv", &path);
|
||||
hip_memcpy_stats = new EvtStatsA(f, path);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -141,3 +141,8 @@ file(GLOB files RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "golden_traces/tests_trace_
|
||||
foreach(file ${files})
|
||||
configure_file(${file} ${PROJECT_BINARY_DIR}/test/${file} COPYONLY)
|
||||
endforeach()
|
||||
|
||||
add_library(hip_stats SHARED EXCLUDE_FROM_ALL hip_stats/hip_stats.cpp)
|
||||
target_compile_definitions(hip_stats PRIVATE __HIP_PLATFORM_AMD__)
|
||||
target_link_libraries(hip_stats roctracer)
|
||||
add_dependencies(mytest hip_stats)
|
||||
|
||||
@@ -0,0 +1,259 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "roctracer.h"
|
||||
#include "roctracer_hip.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <experimental/filesystem>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <numeric>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
|
||||
#define CHECK_ROCTRACER(call) \
|
||||
do { \
|
||||
roctracer_status_t status = call; \
|
||||
if (status != ROCTRACER_STATUS_SUCCESS) { \
|
||||
std::cerr << roctracer_error_string() << std::endl; \
|
||||
abort(); \
|
||||
} \
|
||||
} while (false)
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr uint64_t NextPowerOf2(uint64_t v) {
|
||||
v += (v == 0);
|
||||
v--;
|
||||
v |= v >> 1;
|
||||
v |= v >> 2;
|
||||
v |= v >> 4;
|
||||
v |= v >> 8;
|
||||
v |= v >> 16;
|
||||
v |= v >> 32;
|
||||
return ++v;
|
||||
}
|
||||
|
||||
constexpr size_t KiB = 1024;
|
||||
constexpr size_t MiB = KiB * KiB;
|
||||
constexpr size_t GiB = KiB * KiB * KiB;
|
||||
|
||||
std::string HumanReadableSize(size_t size, int precision) {
|
||||
std::stringstream ss;
|
||||
if (size < KiB)
|
||||
ss << size;
|
||||
else if (size < MiB)
|
||||
ss << std::fixed << std::setprecision(precision) << (double)size / KiB << "K";
|
||||
else if (size < GiB)
|
||||
ss << std::fixed << std::setprecision(precision) << (double)size / MiB << "M";
|
||||
else
|
||||
ss << std::fixed << std::setprecision(precision) << (double)size / GiB << "G";
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
struct FunctionStats {
|
||||
uint64_t total_time_ns;
|
||||
uint64_t count;
|
||||
void Accumulate(uint64_t time_ns) {
|
||||
total_time_ns += time_ns;
|
||||
++count;
|
||||
}
|
||||
};
|
||||
|
||||
struct MemCopyStats {
|
||||
uint64_t total_time_ns;
|
||||
uint64_t total_byte_size;
|
||||
uint64_t count;
|
||||
void Accumulate(uint64_t time_ns, uint64_t byte_size) {
|
||||
total_time_ns += time_ns;
|
||||
total_byte_size += byte_size;
|
||||
++count;
|
||||
}
|
||||
};
|
||||
|
||||
struct pair_hash {
|
||||
template <typename T1, typename T2> std::size_t operator()(const std::pair<T1, T2>& pair) const {
|
||||
return std::hash<T1>()(pair.first) ^ std::hash<T2>()(pair.second);
|
||||
}
|
||||
};
|
||||
|
||||
std::unordered_map<decltype(roctracer_record_t::op), FunctionStats> hip_api_stats;
|
||||
std::unordered_map<std::string, FunctionStats> kernel_stats;
|
||||
std::unordered_map<std::pair<decltype(roctracer_record_t::kind), size_t>, MemCopyStats, pair_hash>
|
||||
memcpy_stats;
|
||||
|
||||
void CollectStatistics(const char* begin, const char* end, void* /* user_arg */) {
|
||||
const auto* record = reinterpret_cast<const roctracer_record_t*>(begin);
|
||||
while (record < reinterpret_cast<const roctracer_record_t*>(end)) {
|
||||
auto elapsed_time_ns = record->end_ns - record->begin_ns;
|
||||
|
||||
if (record->domain == ACTIVITY_DOMAIN_HIP_OPS && record->op == HIP_OP_ID_DISPATCH) {
|
||||
const char* kernel_name = record->kernel_name;
|
||||
if (kernel_name == nullptr) kernel_name = "Unknown kernels";
|
||||
kernel_stats[kernel_name].Accumulate(elapsed_time_ns);
|
||||
} else if (record->domain == ACTIVITY_DOMAIN_HIP_OPS && record->op == HIP_OP_ID_COPY)
|
||||
memcpy_stats[std::make_pair(record->kind, NextPowerOf2(record->bytes))].Accumulate(
|
||||
elapsed_time_ns, record->bytes);
|
||||
else if (record->domain == ACTIVITY_DOMAIN_HIP_API)
|
||||
hip_api_stats[record->op].Accumulate(elapsed_time_ns);
|
||||
|
||||
CHECK_ROCTRACER(roctracer_next_record(record, &record));
|
||||
}
|
||||
}
|
||||
|
||||
namespace fs = std::experimental::filesystem;
|
||||
|
||||
void DumpStatistics() {
|
||||
CHECK_ROCTRACER(roctracer_close_pool());
|
||||
|
||||
fs::path output_dir = []() {
|
||||
const char* env_var = getenv("ROCP_OUTPUT_DIR");
|
||||
return env_var != nullptr ? env_var : "";
|
||||
}();
|
||||
|
||||
std::ofstream out;
|
||||
|
||||
if (output_dir.empty()) {
|
||||
// If an output directory was not specified, then print the statistics to stdout.
|
||||
out.copyfmt(std::cout);
|
||||
out.clear(std::cout.rdstate());
|
||||
out.basic_ios<char>::rdbuf(std::cout.rdbuf());
|
||||
} else {
|
||||
if (auto status = fs::status(output_dir); !fs::exists(status) || !fs::is_directory(status)) {
|
||||
std::cerr << "error: ROCP_OUTPUT_DIR=" << output_dir << " is not a directory" << std::endl;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
auto compare = [](const auto& x, const auto& y) {
|
||||
return x.second.total_time_ns > y.second.total_time_ns;
|
||||
};
|
||||
|
||||
// Print the HIP API statistics sorted by descending total inclusive time.
|
||||
if (!hip_api_stats.empty()) {
|
||||
const char* filename = "hip_api_stats.csv";
|
||||
if (!output_dir.empty()) out = std::ofstream(output_dir / filename);
|
||||
|
||||
if (out.good()) {
|
||||
std::cout << "Dumping HIP API statistics." << std::endl;
|
||||
|
||||
uint64_t total_hip_api_time_ns =
|
||||
std::accumulate(hip_api_stats.begin(), hip_api_stats.end(), 0,
|
||||
[](uint64_t total_time_ns, const auto& stats) {
|
||||
return total_time_ns + stats.second.total_time_ns;
|
||||
});
|
||||
|
||||
out << "\"Name\",\"Calls\",\"TotalDurationNs\",\"AverageNs\",\"Percentage\"" << std::endl;
|
||||
for (auto&& [op, stats] : std::set<decltype(hip_api_stats)::value_type, decltype(compare)>(
|
||||
hip_api_stats.begin(), hip_api_stats.end(), compare))
|
||||
out << "\"" << roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, op, 0) << "\"," << stats.count
|
||||
<< "," << stats.total_time_ns << "," << stats.total_time_ns / stats.count << ","
|
||||
<< std::fixed << std::setprecision(4)
|
||||
<< (double)stats.total_time_ns / total_hip_api_time_ns * 100 << std::endl;
|
||||
} else {
|
||||
std::cerr << "warning: could not open " << output_dir / filename << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
// Print the HIP kernel dispatch statistics sorted by descending execution time.
|
||||
|
||||
if (!kernel_stats.empty()) {
|
||||
const char* filename = "hip_kernel_stats.csv";
|
||||
if (!output_dir.empty()) out = std::ofstream(output_dir / filename);
|
||||
|
||||
if (out.good()) {
|
||||
std::cout << "Dumping HIP kernel dispatch statistics." << std::endl;
|
||||
|
||||
uint64_t total_kernel_time_ns =
|
||||
std::accumulate(kernel_stats.begin(), kernel_stats.end(), 0,
|
||||
[](uint64_t total_time_ns, const auto& stats) {
|
||||
return total_time_ns + stats.second.total_time_ns;
|
||||
});
|
||||
|
||||
out << "\"Name\",\"Calls\",\"TotalDurationNs\",\"AverageNs\",\"Percentage\"" << std::endl;
|
||||
for (auto&& [name, stats] : std::set<decltype(kernel_stats)::value_type, decltype(compare)>(
|
||||
kernel_stats.begin(), kernel_stats.end(), compare))
|
||||
out << "\"" << name << "\"," << stats.count << "," << stats.total_time_ns << ","
|
||||
<< stats.total_time_ns / stats.count << "," << std::fixed << std::setprecision(4)
|
||||
<< (double)stats.total_time_ns / total_kernel_time_ns * 100 << std::endl;
|
||||
} else {
|
||||
std::cerr << "warning: could not open " << output_dir / filename << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
// Print the HIP memory copy statistics sorted by descending transfer time.
|
||||
|
||||
if (!memcpy_stats.empty()) {
|
||||
const char* filename = "hip_copy_stats.csv";
|
||||
if (!output_dir.empty()) out = std::ofstream(output_dir / filename);
|
||||
|
||||
if (out.good()) {
|
||||
std::cout << "Dumping HIP memory copy statistics." << std::endl;
|
||||
|
||||
uint64_t total_memory_copy_time_ns =
|
||||
std::accumulate(memcpy_stats.begin(), memcpy_stats.end(), 0,
|
||||
[](uint64_t total_time_ns, const auto& stats) {
|
||||
return total_time_ns + stats.second.total_time_ns;
|
||||
});
|
||||
|
||||
out << "\"Name\",\"Calls\",\"TotalBytes\",\"TotalDurationNs\",\"AverageNs\",\"Percentage\""
|
||||
<< std::endl;
|
||||
for (auto&& [kind, stats] : std::set<decltype(memcpy_stats)::value_type, decltype(compare)>(
|
||||
memcpy_stats.begin(), memcpy_stats.end(), compare))
|
||||
out << "\"" << roctracer_op_string(ACTIVITY_DOMAIN_HIP_OPS, HIP_OP_ID_COPY, kind.first)
|
||||
<< "(" << HumanReadableSize(kind.second >> 1, 0) << "-"
|
||||
<< HumanReadableSize(kind.second, 0) << ")"
|
||||
<< "\"," << stats.count << "," << stats.total_byte_size << "," << stats.total_time_ns
|
||||
<< "," << stats.total_time_ns / stats.count << "," << std::fixed << std::setprecision(4)
|
||||
<< (double)stats.total_time_ns / total_memory_copy_time_ns * 100 << std::endl;
|
||||
} else {
|
||||
std::cerr << "warning: could not open " << output_dir / filename << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
#include <hsa_api_trace.h>
|
||||
|
||||
extern "C" ROCTRACER_EXPORT bool OnLoad(HsaApiTable* /* table */, uint64_t /* runtime_version */,
|
||||
uint64_t /* failed_tool_count */,
|
||||
const char* const* /* failed_tool_names */) {
|
||||
roctracer_properties_t properties{};
|
||||
properties.buffer_size = sizeof(roctracer_record_t) * 10000;
|
||||
properties.buffer_callback_fun = CollectStatistics;
|
||||
properties.buffer_callback_arg = nullptr;
|
||||
|
||||
CHECK_ROCTRACER(roctracer_open_pool(&properties));
|
||||
CHECK_ROCTRACER(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_API));
|
||||
CHECK_ROCTRACER(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HIP_OPS, HIP_OP_ID_DISPATCH));
|
||||
CHECK_ROCTRACER(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HIP_OPS, HIP_OP_ID_COPY));
|
||||
|
||||
std::atexit([]() { DumpStatistics(); });
|
||||
return true;
|
||||
}
|
||||
|
||||
extern "C" ROCTRACER_EXPORT void OnUnload() {}
|
||||
Criar uma nova questão referindo esta
Bloquear um utilizador