From 0ed4441ca78e90f7644a8bc19b3d8a852fd0f06d Mon Sep 17 00:00:00 2001 From: "Madsen, Jonathan" Date: Fri, 6 Dec 2024 17:29:12 -0600 Subject: [PATCH] rocprofv3: Updates to counter collection optimizations (#24) * Updates to counter collection optimizations * Fix logic error --------- Co-authored-by: Jonathan R. Madsen [ROCm/rocprofiler-sdk commit: a09eda05b2081bae9907d5f8f717a9c701d9d236] --- .../source/lib/output/counter_info.cpp | 17 ++-- .../source/lib/output/counter_info.hpp | 13 +-- .../source/lib/output/generateCSV.cpp | 13 +-- .../source/lib/output/tmp_file.hpp | 96 +++++++++++++------ .../source/lib/output/tmp_file_buffer.hpp | 10 +- .../source/lib/rocprofiler-sdk-tool/tool.cpp | 16 ++-- 6 files changed, 103 insertions(+), 62 deletions(-) diff --git a/projects/rocprofiler-sdk/source/lib/output/counter_info.cpp b/projects/rocprofiler-sdk/source/lib/output/counter_info.cpp index 6fb949e021..d31dbf6a20 100644 --- a/projects/rocprofiler-sdk/source/lib/output/counter_info.cpp +++ b/projects/rocprofiler-sdk/source/lib/output/counter_info.cpp @@ -40,21 +40,22 @@ namespace tool { constexpr auto type = domain_type::COUNTER_VALUES; -std::vector -tool_counter_record_t::getRecords() const +tool_counter_record_t::container_type +tool_counter_record_t::read() const { - auto& _tmp_file = get_tmp_file_buffer(type)->file; + if(!record.fpos) return container_type{}; - return _tmp_file.read(records.offset, records.count); + auto& _tmp_file = CHECK_NOTNULL(get_tmp_file_buffer(type))->file; + return _tmp_file.read(*record.fpos); } void -tool_counter_record_t::writeRecord(const tool_counter_value_t* ptr, size_t num_records) +tool_counter_record_t::write(const tool_counter_record_t::container_type& _data) { - auto& _tmp_file = get_tmp_file_buffer(type)->file; + if(_data.empty()) return; - records.offset = _tmp_file.write(ptr, num_records); - records.count = num_records; + auto& _tmp_file = CHECK_NOTNULL(get_tmp_file_buffer(type))->file; + record.fpos = _tmp_file.write(_data.data(), _data.size()); } } // namespace tool } // namespace rocprofiler diff --git a/projects/rocprofiler-sdk/source/lib/output/counter_info.hpp b/projects/rocprofiler-sdk/source/lib/output/counter_info.hpp index 8af62ef35c..b4861f89e0 100644 --- a/projects/rocprofiler-sdk/source/lib/output/counter_info.hpp +++ b/projects/rocprofiler-sdk/source/lib/output/counter_info.hpp @@ -86,29 +86,30 @@ struct tool_counter_value_t struct serialized_counter_record_t { - size_t offset = 0; - size_t count = 0; + std::optional fpos = std::nullopt; }; struct tool_counter_record_t { + using container_type = std::vector; + uint64_t thread_id = 0; rocprofiler_dispatch_counting_service_data_t dispatch_data = {}; - serialized_counter_record_t records = {}; + serialized_counter_record_t record = {}; template void save(ArchiveT& ar) const { // should be removed when moving to buffered tracing - auto tmp = getRecords(); + auto tmp = read(); ar(cereal::make_nvp("thread_id", thread_id)); ar(cereal::make_nvp("dispatch_data", dispatch_data)); ar(cereal::make_nvp("records", tmp)); } - std::vector getRecords() const; - void writeRecord(const tool_counter_value_t* ptr, size_t num_records); + container_type read() const; + void write(const container_type& data); }; } // namespace tool } // namespace rocprofiler diff --git a/projects/rocprofiler-sdk/source/lib/output/generateCSV.cpp b/projects/rocprofiler-sdk/source/lib/output/generateCSV.cpp index 12a308a2c9..ad7080083a 100644 --- a/projects/rocprofiler-sdk/source/lib/output/generateCSV.cpp +++ b/projects/rocprofiler-sdk/source/lib/output/generateCSV.cpp @@ -579,7 +579,9 @@ generate_csv(const output_config& cfg, "Start_Timestamp", "End_Timestamp"}}; - auto counter_id_to_name = std::map{}; + auto counter_id_to_name = std::unordered_map{}; + for(const auto& itr : tool_metadata.get_counter_info()) + counter_id_to_name.emplace(itr.id, itr.name); for(auto ditr : data) { @@ -587,7 +589,7 @@ generate_csv(const output_config& cfg, { auto kernel_id = record.dispatch_data.dispatch_info.kernel_id; auto counter_id_value = std::map{}; - auto record_vector = record.getRecords(); + auto record_vector = record.read(); // Accumulate counters based on ID for(auto& count : record_vector) @@ -595,13 +597,6 @@ generate_csv(const output_config& cfg, counter_id_value[count.id] += count.value; } - // Query counter names for all IDs - for(auto& [id, _] : counter_id_value) - { - if(counter_id_to_name.find(id) == counter_id_to_name.end()) - counter_id_to_name[id] = tool_metadata.get_counter_info(id)->name; - } - const auto& correlation_id = record.dispatch_data.correlation_id; const auto* kernel_info = tool_metadata.get_kernel_symbol(kernel_id); auto lds_block_size_v = diff --git a/projects/rocprofiler-sdk/source/lib/output/tmp_file.hpp b/projects/rocprofiler-sdk/source/lib/output/tmp_file.hpp index 748f8dfc4d..f4134e5458 100644 --- a/projects/rocprofiler-sdk/source/lib/output/tmp_file.hpp +++ b/projects/rocprofiler-sdk/source/lib/output/tmp_file.hpp @@ -22,12 +22,16 @@ #pragma once -#include +#include "lib/common/logging.hpp" + +#include + #include #include #include #include #include +#include #include struct tmp_file @@ -43,38 +47,14 @@ struct tmp_file explicit operator bool() const; - template - size_t write(const Type* data, size_t num_records) - { - // Assert we are not mixing types with tool_counter_value_t - static_assert(sizeof(Type) == 16); - size_t allocated = offset.fetch_add(num_records); + template + std::streampos write(const Tp* data, size_t num_records); - std::unique_lock lk(file_mutex); - if(!stream.is_open()) open(); - stream.seekp(allocated * sizeof(Type)); - stream.write((char*) data, num_records * sizeof(Type)); - return allocated; - }; + template + std::streampos write(const Tp& data); - template - std::vector read(size_t seekpos, size_t num_elements) - { - // Assert we are not mixing types with tool_counter_value_t - static_assert(sizeof(Type) == 16); - - std::vector ret; - ret.resize(num_elements); - - std::unique_lock lk(file_mutex); - if(!stream.is_open()) open(); - - stream.seekg(seekpos * sizeof(Type)); - stream.read((char*) ret.data(), num_elements * sizeof(Type)); - return ret; - } - - std::atomic offset{0}; + template + std::vector read(std::streampos seekpos); std::string filename = {}; std::string subdirectory = {}; @@ -84,3 +64,57 @@ struct tmp_file std::set file_pos = {}; std::mutex file_mutex = {}; }; + +template +std::streampos +tmp_file::write(const Tp* data, size_t num_records) +{ + auto lk = std::unique_lock{file_mutex}; + + if(!stream.is_open()) open(); + ROCP_CI_LOG_IF(WARNING, stream.tellg() != stream.tellp()) // this should always be true + << "tellg=" << stream.tellg() << ", tellp=" << stream.tellp(); + + auto pos = stream.tellp(); + stream.write(reinterpret_cast(&num_records), sizeof(size_t)); + stream.write(reinterpret_cast(data), num_records * sizeof(Tp)); + return pos; +} + +template +std::streampos +tmp_file::write(const Tp& data) +{ + static_assert(std::is_standard_layout::value, "only supports standard layout types"); + static_assert(!std::is_pointer::value, "only supports non-pointer types"); + + auto lk = std::unique_lock{file_mutex}; + + if(!stream.is_open()) open(); + ROCP_CI_LOG_IF(WARNING, stream.tellg() != stream.tellp()) + << fmt::format("tellg={}, tellp={}", stream.tellg(), stream.tellp()); + + auto pos = stream.tellp(); + size_t num_records = 1; + stream.write(reinterpret_cast(&num_records), sizeof(size_t)); + stream.write(reinterpret_cast(&data), num_records * sizeof(Tp)); + return pos; +} + +template +std::vector +tmp_file::read(std::streampos seekpos) +{ + auto lk = std::unique_lock{file_mutex}; + if(!stream.is_open()) open(); + + stream.seekg(seekpos); + size_t num_elements = 0; + stream.read(reinterpret_cast(&num_elements), sizeof(size_t)); + + auto ret = std::vector{}; + ret.resize(num_elements, Tp{}); + stream.read(reinterpret_cast(ret.data()), num_elements * sizeof(Tp)); + + return ret; +} diff --git a/projects/rocprofiler-sdk/source/lib/output/tmp_file_buffer.hpp b/projects/rocprofiler-sdk/source/lib/output/tmp_file_buffer.hpp index bbe189a254..aa79e69067 100644 --- a/projects/rocprofiler-sdk/source/lib/output/tmp_file_buffer.hpp +++ b/projects/rocprofiler-sdk/source/lib/output/tmp_file_buffer.hpp @@ -106,10 +106,16 @@ offload_buffer(domain_type type) auto _lk = std::lock_guard(filebuf->file.file_mutex); [[maybe_unused]] static auto _success = filebuf->file.open(); auto& _fs = filebuf->file.stream; - filebuf->file.file_pos.emplace(_fs.tellg()); + + ROCP_CI_LOG_IF(WARNING, _fs.tellg() != _fs.tellp()) // this should always be true + << "tellg=" << _fs.tellg() << ", tellp=" << _fs.tellp(); + + filebuf->file.file_pos.emplace(_fs.tellp()); filebuf->buffer.save(_fs); filebuf->buffer.clear(); - CHECK(filebuf->buffer.is_empty() == true); + + ROCP_CI_LOG_IF(ERROR, !filebuf->buffer.is_empty()) + << "buffer is not empty after offload: count=" << filebuf->buffer.count(); } template diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/tool.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/tool.cpp index c118709d68..b2299111a6 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/tool.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/tool.cpp @@ -958,19 +958,23 @@ counter_record_callback(rocprofiler_dispatch_counting_service_data_t dispatch_da counter_record.dispatch_data = dispatch_data; counter_record.thread_id = user_data.value; - std::vector serialized_records; - serialized_records.resize(record_count); + auto serialized_records = std::vector{}; + serialized_records.reserve(record_count); - for(size_t count = 0; count < record_count; count++) + for(size_t count = 0; count < record_count; ++count) { auto _counter_id = rocprofiler_counter_id_t{}; ROCPROFILER_CALL(rocprofiler_query_record_counter_id(record_data[count].id, &_counter_id), "query record counter id"); - serialized_records[count] = {_counter_id, record_data[count].counter_value}; + serialized_records.emplace_back( + tool::tool_counter_value_t{_counter_id, record_data[count].counter_value}); } - counter_record.writeRecord(serialized_records.data(), serialized_records.size()); - tool::write_ring_buffer(counter_record, domain_type::COUNTER_COLLECTION); + if(!serialized_records.empty()) + { + counter_record.write(serialized_records); + tool::write_ring_buffer(counter_record, domain_type::COUNTER_COLLECTION); + } } rocprofiler_client_finalize_t client_finalizer = nullptr;