rocprofv3: Updates to counter collection optimizations (#24)

* Updates to counter collection optimizations

* Fix logic error

---------

Co-authored-by: Jonathan R. Madsen <jonathanrmadsen@gmail.com>

[ROCm/rocprofiler-sdk commit: a09eda05b2]
このコミットが含まれているのは:
Madsen, Jonathan
2024-12-06 17:29:12 -06:00
committed by GitHub
コミット 0ed4441ca7
6個のファイルの変更103行の追加62行の削除
+9 -8
ファイルの表示
@@ -40,21 +40,22 @@ namespace tool
{
constexpr auto type = domain_type::COUNTER_VALUES;
std::vector<tool_counter_value_t>
tool_counter_record_t::getRecords() const
tool_counter_record_t::container_type
tool_counter_record_t::read() const
{
auto& _tmp_file = get_tmp_file_buffer<tool_counter_value_t>(type)->file;
if(!record.fpos) return container_type{};
return _tmp_file.read<tool_counter_value_t>(records.offset, records.count);
auto& _tmp_file = CHECK_NOTNULL(get_tmp_file_buffer<tool_counter_value_t>(type))->file;
return _tmp_file.read<tool_counter_value_t>(*record.fpos);
}
void
tool_counter_record_t::writeRecord(const tool_counter_value_t* ptr, size_t num_records)
tool_counter_record_t::write(const tool_counter_record_t::container_type& _data)
{
auto& _tmp_file = get_tmp_file_buffer<tool_counter_value_t>(type)->file;
if(_data.empty()) return;
records.offset = _tmp_file.write<tool_counter_value_t>(ptr, num_records);
records.count = num_records;
auto& _tmp_file = CHECK_NOTNULL(get_tmp_file_buffer<tool_counter_value_t>(type))->file;
record.fpos = _tmp_file.write<tool_counter_value_t>(_data.data(), _data.size());
}
} // namespace tool
} // namespace rocprofiler
+7 -6
ファイルの表示
@@ -86,29 +86,30 @@ struct tool_counter_value_t
struct serialized_counter_record_t
{
size_t offset = 0;
size_t count = 0;
std::optional<std::streampos> fpos = std::nullopt;
};
struct tool_counter_record_t
{
using container_type = std::vector<tool_counter_value_t>;
uint64_t thread_id = 0;
rocprofiler_dispatch_counting_service_data_t dispatch_data = {};
serialized_counter_record_t records = {};
serialized_counter_record_t record = {};
template <typename ArchiveT>
void save(ArchiveT& ar) const
{
// should be removed when moving to buffered tracing
auto tmp = getRecords();
auto tmp = read();
ar(cereal::make_nvp("thread_id", thread_id));
ar(cereal::make_nvp("dispatch_data", dispatch_data));
ar(cereal::make_nvp("records", tmp));
}
std::vector<tool_counter_value_t> getRecords() const;
void writeRecord(const tool_counter_value_t* ptr, size_t num_records);
container_type read() const;
void write(const container_type& data);
};
} // namespace tool
} // namespace rocprofiler
+4 -9
ファイルの表示
@@ -579,7 +579,9 @@ generate_csv(const output_config& cfg,
"Start_Timestamp",
"End_Timestamp"}};
auto counter_id_to_name = std::map<rocprofiler_counter_id_t, std::string>{};
auto counter_id_to_name = std::unordered_map<rocprofiler_counter_id_t, std::string_view>{};
for(const auto& itr : tool_metadata.get_counter_info())
counter_id_to_name.emplace(itr.id, itr.name);
for(auto ditr : data)
{
@@ -587,7 +589,7 @@ generate_csv(const output_config& cfg,
{
auto kernel_id = record.dispatch_data.dispatch_info.kernel_id;
auto counter_id_value = std::map<rocprofiler_counter_id_t, double>{};
auto record_vector = record.getRecords();
auto record_vector = record.read();
// Accumulate counters based on ID
for(auto& count : record_vector)
@@ -595,13 +597,6 @@ generate_csv(const output_config& cfg,
counter_id_value[count.id] += count.value;
}
// Query counter names for all IDs
for(auto& [id, _] : counter_id_value)
{
if(counter_id_to_name.find(id) == counter_id_to_name.end())
counter_id_to_name[id] = tool_metadata.get_counter_info(id)->name;
}
const auto& correlation_id = record.dispatch_data.correlation_id;
const auto* kernel_info = tool_metadata.get_kernel_symbol(kernel_id);
auto lds_block_size_v =
+65 -31
ファイルの表示
@@ -22,12 +22,16 @@
#pragma once
#include <atomic>
#include "lib/common/logging.hpp"
#include <fmt/format.h>
#include <fstream>
#include <ios>
#include <mutex>
#include <set>
#include <string>
#include <type_traits>
#include <vector>
struct tmp_file
@@ -43,38 +47,14 @@ struct tmp_file
explicit operator bool() const;
template <typename Type>
size_t write(const Type* data, size_t num_records)
{
// Assert we are not mixing types with tool_counter_value_t
static_assert(sizeof(Type) == 16);
size_t allocated = offset.fetch_add(num_records);
template <typename Tp>
std::streampos write(const Tp* data, size_t num_records);
std::unique_lock<std::mutex> lk(file_mutex);
if(!stream.is_open()) open();
stream.seekp(allocated * sizeof(Type));
stream.write((char*) data, num_records * sizeof(Type));
return allocated;
};
template <typename Tp>
std::streampos write(const Tp& data);
template <typename Type>
std::vector<Type> read(size_t seekpos, size_t num_elements)
{
// Assert we are not mixing types with tool_counter_value_t
static_assert(sizeof(Type) == 16);
std::vector<Type> ret;
ret.resize(num_elements);
std::unique_lock<std::mutex> lk(file_mutex);
if(!stream.is_open()) open();
stream.seekg(seekpos * sizeof(Type));
stream.read((char*) ret.data(), num_elements * sizeof(Type));
return ret;
}
std::atomic<size_t> offset{0};
template <typename Tp>
std::vector<Tp> read(std::streampos seekpos);
std::string filename = {};
std::string subdirectory = {};
@@ -84,3 +64,57 @@ struct tmp_file
std::set<std::streampos> file_pos = {};
std::mutex file_mutex = {};
};
template <typename Tp>
std::streampos
tmp_file::write(const Tp* data, size_t num_records)
{
auto lk = std::unique_lock<std::mutex>{file_mutex};
if(!stream.is_open()) open();
ROCP_CI_LOG_IF(WARNING, stream.tellg() != stream.tellp()) // this should always be true
<< "tellg=" << stream.tellg() << ", tellp=" << stream.tellp();
auto pos = stream.tellp();
stream.write(reinterpret_cast<const char*>(&num_records), sizeof(size_t));
stream.write(reinterpret_cast<const char*>(data), num_records * sizeof(Tp));
return pos;
}
template <typename Tp>
std::streampos
tmp_file::write(const Tp& data)
{
static_assert(std::is_standard_layout<Tp>::value, "only supports standard layout types");
static_assert(!std::is_pointer<Tp>::value, "only supports non-pointer types");
auto lk = std::unique_lock<std::mutex>{file_mutex};
if(!stream.is_open()) open();
ROCP_CI_LOG_IF(WARNING, stream.tellg() != stream.tellp())
<< fmt::format("tellg={}, tellp={}", stream.tellg(), stream.tellp());
auto pos = stream.tellp();
size_t num_records = 1;
stream.write(reinterpret_cast<const char*>(&num_records), sizeof(size_t));
stream.write(reinterpret_cast<const char*>(&data), num_records * sizeof(Tp));
return pos;
}
template <typename Tp>
std::vector<Tp>
tmp_file::read(std::streampos seekpos)
{
auto lk = std::unique_lock<std::mutex>{file_mutex};
if(!stream.is_open()) open();
stream.seekg(seekpos);
size_t num_elements = 0;
stream.read(reinterpret_cast<char*>(&num_elements), sizeof(size_t));
auto ret = std::vector<Tp>{};
ret.resize(num_elements, Tp{});
stream.read(reinterpret_cast<char*>(ret.data()), num_elements * sizeof(Tp));
return ret;
}
+8 -2
ファイルの表示
@@ -106,10 +106,16 @@ offload_buffer(domain_type type)
auto _lk = std::lock_guard<std::mutex>(filebuf->file.file_mutex);
[[maybe_unused]] static auto _success = filebuf->file.open();
auto& _fs = filebuf->file.stream;
filebuf->file.file_pos.emplace(_fs.tellg());
ROCP_CI_LOG_IF(WARNING, _fs.tellg() != _fs.tellp()) // this should always be true
<< "tellg=" << _fs.tellg() << ", tellp=" << _fs.tellp();
filebuf->file.file_pos.emplace(_fs.tellp());
filebuf->buffer.save(_fs);
filebuf->buffer.clear();
CHECK(filebuf->buffer.is_empty() == true);
ROCP_CI_LOG_IF(ERROR, !filebuf->buffer.is_empty())
<< "buffer is not empty after offload: count=" << filebuf->buffer.count();
}
template <typename Tp>
+10 -6
ファイルの表示
@@ -958,19 +958,23 @@ counter_record_callback(rocprofiler_dispatch_counting_service_data_t dispatch_da
counter_record.dispatch_data = dispatch_data;
counter_record.thread_id = user_data.value;
std::vector<rocprofiler::tool::tool_counter_value_t> serialized_records;
serialized_records.resize(record_count);
auto serialized_records = std::vector<tool::tool_counter_value_t>{};
serialized_records.reserve(record_count);
for(size_t count = 0; count < record_count; count++)
for(size_t count = 0; count < record_count; ++count)
{
auto _counter_id = rocprofiler_counter_id_t{};
ROCPROFILER_CALL(rocprofiler_query_record_counter_id(record_data[count].id, &_counter_id),
"query record counter id");
serialized_records[count] = {_counter_id, record_data[count].counter_value};
serialized_records.emplace_back(
tool::tool_counter_value_t{_counter_id, record_data[count].counter_value});
}
counter_record.writeRecord(serialized_records.data(), serialized_records.size());
tool::write_ring_buffer(counter_record, domain_type::COUNTER_COLLECTION);
if(!serialized_records.empty())
{
counter_record.write(serialized_records);
tool::write_ring_buffer(counter_record, domain_type::COUNTER_COLLECTION);
}
}
rocprofiler_client_finalize_t client_finalizer = nullptr;