rocprofv3: Updates to counter collection optimizations (#24)
* Updates to counter collection optimizations
* Fix logic error
---------
Co-authored-by: Jonathan R. Madsen <jonathanrmadsen@gmail.com>
[ROCm/rocprofiler-sdk commit: a09eda05b2]
このコミットが含まれているのは:
@@ -40,21 +40,22 @@ namespace tool
|
||||
{
|
||||
constexpr auto type = domain_type::COUNTER_VALUES;
|
||||
|
||||
std::vector<tool_counter_value_t>
|
||||
tool_counter_record_t::getRecords() const
|
||||
tool_counter_record_t::container_type
|
||||
tool_counter_record_t::read() const
|
||||
{
|
||||
auto& _tmp_file = get_tmp_file_buffer<tool_counter_value_t>(type)->file;
|
||||
if(!record.fpos) return container_type{};
|
||||
|
||||
return _tmp_file.read<tool_counter_value_t>(records.offset, records.count);
|
||||
auto& _tmp_file = CHECK_NOTNULL(get_tmp_file_buffer<tool_counter_value_t>(type))->file;
|
||||
return _tmp_file.read<tool_counter_value_t>(*record.fpos);
|
||||
}
|
||||
|
||||
void
|
||||
tool_counter_record_t::writeRecord(const tool_counter_value_t* ptr, size_t num_records)
|
||||
tool_counter_record_t::write(const tool_counter_record_t::container_type& _data)
|
||||
{
|
||||
auto& _tmp_file = get_tmp_file_buffer<tool_counter_value_t>(type)->file;
|
||||
if(_data.empty()) return;
|
||||
|
||||
records.offset = _tmp_file.write<tool_counter_value_t>(ptr, num_records);
|
||||
records.count = num_records;
|
||||
auto& _tmp_file = CHECK_NOTNULL(get_tmp_file_buffer<tool_counter_value_t>(type))->file;
|
||||
record.fpos = _tmp_file.write<tool_counter_value_t>(_data.data(), _data.size());
|
||||
}
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
|
||||
@@ -86,29 +86,30 @@ struct tool_counter_value_t
|
||||
|
||||
struct serialized_counter_record_t
|
||||
{
|
||||
size_t offset = 0;
|
||||
size_t count = 0;
|
||||
std::optional<std::streampos> fpos = std::nullopt;
|
||||
};
|
||||
|
||||
struct tool_counter_record_t
|
||||
{
|
||||
using container_type = std::vector<tool_counter_value_t>;
|
||||
|
||||
uint64_t thread_id = 0;
|
||||
rocprofiler_dispatch_counting_service_data_t dispatch_data = {};
|
||||
serialized_counter_record_t records = {};
|
||||
serialized_counter_record_t record = {};
|
||||
|
||||
template <typename ArchiveT>
|
||||
void save(ArchiveT& ar) const
|
||||
{
|
||||
// should be removed when moving to buffered tracing
|
||||
auto tmp = getRecords();
|
||||
auto tmp = read();
|
||||
|
||||
ar(cereal::make_nvp("thread_id", thread_id));
|
||||
ar(cereal::make_nvp("dispatch_data", dispatch_data));
|
||||
ar(cereal::make_nvp("records", tmp));
|
||||
}
|
||||
|
||||
std::vector<tool_counter_value_t> getRecords() const;
|
||||
void writeRecord(const tool_counter_value_t* ptr, size_t num_records);
|
||||
container_type read() const;
|
||||
void write(const container_type& data);
|
||||
};
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
|
||||
@@ -579,7 +579,9 @@ generate_csv(const output_config& cfg,
|
||||
"Start_Timestamp",
|
||||
"End_Timestamp"}};
|
||||
|
||||
auto counter_id_to_name = std::map<rocprofiler_counter_id_t, std::string>{};
|
||||
auto counter_id_to_name = std::unordered_map<rocprofiler_counter_id_t, std::string_view>{};
|
||||
for(const auto& itr : tool_metadata.get_counter_info())
|
||||
counter_id_to_name.emplace(itr.id, itr.name);
|
||||
|
||||
for(auto ditr : data)
|
||||
{
|
||||
@@ -587,7 +589,7 @@ generate_csv(const output_config& cfg,
|
||||
{
|
||||
auto kernel_id = record.dispatch_data.dispatch_info.kernel_id;
|
||||
auto counter_id_value = std::map<rocprofiler_counter_id_t, double>{};
|
||||
auto record_vector = record.getRecords();
|
||||
auto record_vector = record.read();
|
||||
|
||||
// Accumulate counters based on ID
|
||||
for(auto& count : record_vector)
|
||||
@@ -595,13 +597,6 @@ generate_csv(const output_config& cfg,
|
||||
counter_id_value[count.id] += count.value;
|
||||
}
|
||||
|
||||
// Query counter names for all IDs
|
||||
for(auto& [id, _] : counter_id_value)
|
||||
{
|
||||
if(counter_id_to_name.find(id) == counter_id_to_name.end())
|
||||
counter_id_to_name[id] = tool_metadata.get_counter_info(id)->name;
|
||||
}
|
||||
|
||||
const auto& correlation_id = record.dispatch_data.correlation_id;
|
||||
const auto* kernel_info = tool_metadata.get_kernel_symbol(kernel_id);
|
||||
auto lds_block_size_v =
|
||||
|
||||
@@ -22,12 +22,16 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include "lib/common/logging.hpp"
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <fstream>
|
||||
#include <ios>
|
||||
#include <mutex>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
|
||||
struct tmp_file
|
||||
@@ -43,38 +47,14 @@ struct tmp_file
|
||||
|
||||
explicit operator bool() const;
|
||||
|
||||
template <typename Type>
|
||||
size_t write(const Type* data, size_t num_records)
|
||||
{
|
||||
// Assert we are not mixing types with tool_counter_value_t
|
||||
static_assert(sizeof(Type) == 16);
|
||||
size_t allocated = offset.fetch_add(num_records);
|
||||
template <typename Tp>
|
||||
std::streampos write(const Tp* data, size_t num_records);
|
||||
|
||||
std::unique_lock<std::mutex> lk(file_mutex);
|
||||
if(!stream.is_open()) open();
|
||||
stream.seekp(allocated * sizeof(Type));
|
||||
stream.write((char*) data, num_records * sizeof(Type));
|
||||
return allocated;
|
||||
};
|
||||
template <typename Tp>
|
||||
std::streampos write(const Tp& data);
|
||||
|
||||
template <typename Type>
|
||||
std::vector<Type> read(size_t seekpos, size_t num_elements)
|
||||
{
|
||||
// Assert we are not mixing types with tool_counter_value_t
|
||||
static_assert(sizeof(Type) == 16);
|
||||
|
||||
std::vector<Type> ret;
|
||||
ret.resize(num_elements);
|
||||
|
||||
std::unique_lock<std::mutex> lk(file_mutex);
|
||||
if(!stream.is_open()) open();
|
||||
|
||||
stream.seekg(seekpos * sizeof(Type));
|
||||
stream.read((char*) ret.data(), num_elements * sizeof(Type));
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::atomic<size_t> offset{0};
|
||||
template <typename Tp>
|
||||
std::vector<Tp> read(std::streampos seekpos);
|
||||
|
||||
std::string filename = {};
|
||||
std::string subdirectory = {};
|
||||
@@ -84,3 +64,57 @@ struct tmp_file
|
||||
std::set<std::streampos> file_pos = {};
|
||||
std::mutex file_mutex = {};
|
||||
};
|
||||
|
||||
template <typename Tp>
|
||||
std::streampos
|
||||
tmp_file::write(const Tp* data, size_t num_records)
|
||||
{
|
||||
auto lk = std::unique_lock<std::mutex>{file_mutex};
|
||||
|
||||
if(!stream.is_open()) open();
|
||||
ROCP_CI_LOG_IF(WARNING, stream.tellg() != stream.tellp()) // this should always be true
|
||||
<< "tellg=" << stream.tellg() << ", tellp=" << stream.tellp();
|
||||
|
||||
auto pos = stream.tellp();
|
||||
stream.write(reinterpret_cast<const char*>(&num_records), sizeof(size_t));
|
||||
stream.write(reinterpret_cast<const char*>(data), num_records * sizeof(Tp));
|
||||
return pos;
|
||||
}
|
||||
|
||||
template <typename Tp>
|
||||
std::streampos
|
||||
tmp_file::write(const Tp& data)
|
||||
{
|
||||
static_assert(std::is_standard_layout<Tp>::value, "only supports standard layout types");
|
||||
static_assert(!std::is_pointer<Tp>::value, "only supports non-pointer types");
|
||||
|
||||
auto lk = std::unique_lock<std::mutex>{file_mutex};
|
||||
|
||||
if(!stream.is_open()) open();
|
||||
ROCP_CI_LOG_IF(WARNING, stream.tellg() != stream.tellp())
|
||||
<< fmt::format("tellg={}, tellp={}", stream.tellg(), stream.tellp());
|
||||
|
||||
auto pos = stream.tellp();
|
||||
size_t num_records = 1;
|
||||
stream.write(reinterpret_cast<const char*>(&num_records), sizeof(size_t));
|
||||
stream.write(reinterpret_cast<const char*>(&data), num_records * sizeof(Tp));
|
||||
return pos;
|
||||
}
|
||||
|
||||
template <typename Tp>
|
||||
std::vector<Tp>
|
||||
tmp_file::read(std::streampos seekpos)
|
||||
{
|
||||
auto lk = std::unique_lock<std::mutex>{file_mutex};
|
||||
if(!stream.is_open()) open();
|
||||
|
||||
stream.seekg(seekpos);
|
||||
size_t num_elements = 0;
|
||||
stream.read(reinterpret_cast<char*>(&num_elements), sizeof(size_t));
|
||||
|
||||
auto ret = std::vector<Tp>{};
|
||||
ret.resize(num_elements, Tp{});
|
||||
stream.read(reinterpret_cast<char*>(ret.data()), num_elements * sizeof(Tp));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -106,10 +106,16 @@ offload_buffer(domain_type type)
|
||||
auto _lk = std::lock_guard<std::mutex>(filebuf->file.file_mutex);
|
||||
[[maybe_unused]] static auto _success = filebuf->file.open();
|
||||
auto& _fs = filebuf->file.stream;
|
||||
filebuf->file.file_pos.emplace(_fs.tellg());
|
||||
|
||||
ROCP_CI_LOG_IF(WARNING, _fs.tellg() != _fs.tellp()) // this should always be true
|
||||
<< "tellg=" << _fs.tellg() << ", tellp=" << _fs.tellp();
|
||||
|
||||
filebuf->file.file_pos.emplace(_fs.tellp());
|
||||
filebuf->buffer.save(_fs);
|
||||
filebuf->buffer.clear();
|
||||
CHECK(filebuf->buffer.is_empty() == true);
|
||||
|
||||
ROCP_CI_LOG_IF(ERROR, !filebuf->buffer.is_empty())
|
||||
<< "buffer is not empty after offload: count=" << filebuf->buffer.count();
|
||||
}
|
||||
|
||||
template <typename Tp>
|
||||
|
||||
@@ -958,19 +958,23 @@ counter_record_callback(rocprofiler_dispatch_counting_service_data_t dispatch_da
|
||||
counter_record.dispatch_data = dispatch_data;
|
||||
counter_record.thread_id = user_data.value;
|
||||
|
||||
std::vector<rocprofiler::tool::tool_counter_value_t> serialized_records;
|
||||
serialized_records.resize(record_count);
|
||||
auto serialized_records = std::vector<tool::tool_counter_value_t>{};
|
||||
serialized_records.reserve(record_count);
|
||||
|
||||
for(size_t count = 0; count < record_count; count++)
|
||||
for(size_t count = 0; count < record_count; ++count)
|
||||
{
|
||||
auto _counter_id = rocprofiler_counter_id_t{};
|
||||
ROCPROFILER_CALL(rocprofiler_query_record_counter_id(record_data[count].id, &_counter_id),
|
||||
"query record counter id");
|
||||
serialized_records[count] = {_counter_id, record_data[count].counter_value};
|
||||
serialized_records.emplace_back(
|
||||
tool::tool_counter_value_t{_counter_id, record_data[count].counter_value});
|
||||
}
|
||||
|
||||
counter_record.writeRecord(serialized_records.data(), serialized_records.size());
|
||||
tool::write_ring_buffer(counter_record, domain_type::COUNTER_COLLECTION);
|
||||
if(!serialized_records.empty())
|
||||
{
|
||||
counter_record.write(serialized_records);
|
||||
tool::write_ring_buffer(counter_record, domain_type::COUNTER_COLLECTION);
|
||||
}
|
||||
}
|
||||
|
||||
rocprofiler_client_finalize_t client_finalizer = nullptr;
|
||||
|
||||
新しいイシューから参照
ユーザーをブロックする