Arquivos
rocm-systems/source/lib/output/generateStats.cpp
T
Jonathan R. Madsen 5eb8c2658c rocprofv3: refactor and reorganize rocprofiler-sdk-tool library (#1138)
* Add rocprofv3-multi-node.md to source/lib/rocprofiler-sdk-tool

* Initial source re-organization

- create "output" static library

* Update include/rocprofiler-sdk/cxx/serialization.hpp

- add GPR count fields to kernel symbol serialization

* Add source/scripts/generate-rocpd.py

- reads one or more JSON output files from rocprofv3 and writes rocpd SQLite3 database
- Note: preliminary implementation

* More reorganization b/t lib/rocprofiler-sdk-tool and lib/output

* Updates to generate-rocpd.py

- add SQL views
- option: --absolute-timestamps -> --normalize-timestamps
- option: --generic-markers
- misc fixes with regards to getting the views working
- support marker names

* Update generate-rocpd.py

- Add --marker-mode option

* Update generate-rocpd.py

- Improve debugging of bad bulk SQLite statements

* Update rocprofv3-multi-node.md

- cleanup of proposed SQL schema

* lib/output/format_path.{hpp,cpp}

- rename format to format_path (in config.hpp and config.cpp)
- move format_path functionality to format_path.{hpp,cpp}

* Rework lib/output/tmp_file_buffer.{hpp,cpp}

* Update output_key.cpp

- support %cwd%, %launch_date%

* Rework lib/output/buffered_output.hpp

* Support csv_output_file constructed via domain_type

* Update lib/output/domain_type.{hpp,cpp}

- get_domain_trace_file_name
- get_domain_stats_file_name

* Update lib/rocprofiler-sdk-tool/tool.cpp

- tweak headers

* Update lib/output/generate*.cpp

- remove include of helpers.hpp
- CSV uses domain_type for filenames

* Update samples/counter_collection/per_dev_serialization.cpp

- make wait_on volatile

* Remove tool_table from lib/output and lib/rocprofiler-sdk-tool

- Also split various structs into their own files
  - lib/output/agent_info
  - lib/output/metadata
  - lib/output/kernel_symbol_info
  - lib/output/counter_info
- Implemented rocprofiler::tool::metadata

* Optimize rocprofiler_tool_counter_collection_record_t

- reduce the size of the struct from 24784 bytes to 8376 bytes

* Introduced output_config

- split subset of config (from tools library) into output_config to be able to configure the output generating functions separately from the tool library
- this is a significant step towards the output generating functions not relying on static global memory

* Stream chunks of data into output instead of loading all info memory

* Remove duplicate group_segment_size in rocprofiler_kernel_dispatch_info_t serialization

* Adding Q&A to rocprofv3-multi-node.md

* Remove all remaining include lib/rocprofiler-sdk-tool from lib/output

- migrated a fair amount of code from lib/rocprofiler-sdk-tool/helper.hpp to lib/output

* Update Q&A of rocprofv3-multi-node.md

* Fix minor compilation errors + minor cleanup

* Update hsa/async_copy.cpp

- when ROCPROFILER_CI_STRICT_TIMESTAMPS > 0, reduce the active_signal sync wait time

* Update profiling_time.hpp

- fix log messages for when start/end time is less/greater than enqueue/current CPU time

* Fix generate_stats for tool_counter_record_t

* Dictionary optimization for generate-rocpd.py

---------

Co-authored-by: SrirakshaNag <104580803+SrirakshaNag@users.noreply.github.com>
2024-11-07 01:15:19 -06:00

398 linhas
13 KiB
C++

// MIT License
//
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include "generateStats.hpp"
#include "domain_type.hpp"
#include "output_stream.hpp"
#include "statistics.hpp"
#include "timestamps.hpp"
#include "lib/common/logging.hpp"
#include <rocprofiler-sdk/fwd.h>
#include <rocprofiler-sdk/marker/api_id.h>
#include <fmt/format.h>
#include <unistd.h>
#include <cstdint>
#include <iomanip>
#include <regex>
#include <sstream>
#include <string_view>
#include <utility>
namespace rocprofiler
{
namespace tool
{
namespace
{
stats_entry_t
get_stats(const stats_map_t& data_v)
{
auto _stats = stats_entry_t{};
for(const auto& [id, value] : data_v)
{
_stats.entries.emplace_back(id, value);
_stats.total += value;
}
return _stats.sort();
}
} // namespace
stats_entry_t
generate_stats(const output_config& /*cfg*/,
const metadata& tool_metadata,
const generator<rocprofiler_buffer_tracing_kernel_dispatch_record_t>& data)
{
auto kernel_stats = stats_map_t{};
for(auto ditr : data)
{
for(auto record : data.get(ditr))
{
auto kernel_name = tool_metadata.get_kernel_name(record.dispatch_info.kernel_id,
record.correlation_id.external.value);
kernel_stats[kernel_name] += (record.end_timestamp - record.start_timestamp);
}
}
return get_stats(kernel_stats);
}
stats_entry_t
generate_stats(const output_config& /*cfg*/,
const metadata& tool_metadata,
const generator<rocprofiler_buffer_tracing_hip_api_record_t>& data)
{
auto hip_stats = stats_map_t{};
for(auto ditr : data)
{
for(auto record : data.get(ditr))
{
auto api_name = tool_metadata.get_operation_name(record.kind, record.operation);
hip_stats[api_name] += (record.end_timestamp - record.start_timestamp);
}
}
return get_stats(hip_stats);
}
stats_entry_t
generate_stats(const output_config& /*cfg*/,
const metadata& tool_metadata,
const generator<rocprofiler_buffer_tracing_hsa_api_record_t>& data)
{
auto hsa_stats = stats_map_t{};
for(auto ditr : data)
{
for(auto record : data.get(ditr))
{
auto api_name = tool_metadata.get_operation_name(record.kind, record.operation);
hsa_stats[api_name] += (record.end_timestamp - record.start_timestamp);
}
}
return get_stats(hsa_stats);
}
stats_entry_t
generate_stats(const output_config& /*cfg*/,
const metadata& tool_metadata,
const generator<rocprofiler_buffer_tracing_memory_copy_record_t>& data)
{
auto memory_copy_stats = stats_map_t{};
for(auto ditr : data)
{
for(auto record : data.get(ditr))
{
auto api_name = tool_metadata.get_operation_name(record.kind, record.operation);
memory_copy_stats[api_name] += (record.end_timestamp - record.start_timestamp);
}
}
return get_stats(memory_copy_stats);
}
stats_entry_t
generate_stats(const output_config& /*cfg*/,
const metadata& tool_metadata,
const generator<rocprofiler_buffer_tracing_marker_api_record_t>& data)
{
auto marker_stats = stats_map_t{};
for(auto ditr : data)
{
for(auto record : data.get(ditr))
{
auto _name = std::string_view{};
if(record.kind == ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API &&
(record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxMarkA ||
record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangePushA ||
record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangeStartA))
{
_name = tool_metadata.get_marker_message(record.correlation_id.internal);
}
else
{
_name = tool_metadata.get_operation_name(record.kind, record.operation);
}
marker_stats[_name] += (record.end_timestamp - record.start_timestamp);
}
}
return get_stats(marker_stats);
}
stats_entry_t
generate_stats(const output_config& /*cfg*/,
const metadata& /*tool_metadata*/,
const generator<tool_counter_record_t>& /*data*/)
{
return stats_entry_t{};
}
stats_entry_t
generate_stats(const output_config& /*cfg*/,
const metadata& tool_metadata,
const generator<rocprofiler_buffer_tracing_scratch_memory_record_t>& data)
{
auto scratch_memory_stats = stats_map_t{};
for(auto ditr : data)
{
for(auto record : data.get(ditr))
{
auto op_name = tool_metadata.get_operation_name(record.kind, record.operation);
scratch_memory_stats[op_name] += (record.end_timestamp - record.start_timestamp);
}
}
return get_stats(scratch_memory_stats);
}
stats_entry_t
generate_stats(const output_config& /*cfg*/,
const metadata& tool_metadata,
const generator<rocprofiler_buffer_tracing_rccl_api_record_t>& data)
{
auto rccl_stats = stats_map_t{};
for(auto ditr : data)
{
for(auto record : data.get(ditr))
{
auto api_name = tool_metadata.get_operation_name(record.kind, record.operation);
rccl_stats[api_name] += (record.end_timestamp - record.start_timestamp);
}
}
return get_stats(rccl_stats);
}
namespace
{
void
generate_stats(const output_config& cfg,
output_stream& os,
std::string_view label,
const domain_stats_vec_t& data_v,
std::string_view indent_v)
{
auto _data = stats_entry_t{};
auto _cols = std::unordered_map<std::string_view, domain_type>{};
auto _get_entry = [&_data, &_cols](domain_type _domain,
std::string_view _key) -> stats_data_t* {
for(auto& itr : _data.entries)
{
if(itr.first == _key) return &itr.second;
}
_cols.emplace(_key, _domain);
auto& itr = _data.entries.emplace_back(_key, stats_data_t{});
return &itr.second;
};
uint64_t name_width = 40;
uint64_t domain_width = 12;
for(const auto& itr : data_v)
{
for(const auto& eitr : itr.second.entries)
{
_data.total += eitr.second;
auto* _entry = _get_entry(itr.first, eitr.first);
*CHECK_NOTNULL(_entry) += eitr.second;
name_width = std::max(name_width, eitr.first.length());
}
domain_width = std::max(domain_width, get_domain_column_name(itr.first).length());
}
if(!_data) return;
std::sort(_data.entries.begin(), _data.entries.end(), [](const auto& lhs, const auto& rhs) {
return (lhs.second.get_sum() > rhs.second.get_sum());
});
const float_type _total_duration = _data.total.get_sum();
os << fmt::format("\n{}ROCPROFV3 {}:\n\n", indent_v, label) << std::flush;
{
auto _header = fmt::format(
"| {:^{}} | {:^{}} | {:^15} | {:^15} | {:^15} | {:^13} | {:^15} | {:^15} | {:^15} |",
"NAME",
name_width,
"DOMAIN",
domain_width,
"CALLS",
fmt::format("DURATION ({})", cfg.stats_summary_unit),
fmt::format("AVERAGE ({})", cfg.stats_summary_unit),
"PERCENT (INC)",
fmt::format("MIN ({})", cfg.stats_summary_unit),
fmt::format("MAX ({})", cfg.stats_summary_unit),
"STDDEV");
(*os.stream) << indent_v << _header << "\n" << std::flush;
auto _div =
fmt::format("|-{0:-^{1}}-|-{0:-^{2}}-|-{0:-^15}-|-{0:-^15}-|-{0:-^15}-|-{0:-^13}"
"-|-{0:-^15}-|-{0:-^15}-|-{0:-^15}-|",
"",
name_width,
domain_width);
(*os.stream) << indent_v << _div << "\n" << std::flush;
}
for(const auto& [type, value] : _data.entries)
{
auto name = type;
auto duration_ns = value.get_sum();
auto calls = value.get_count();
auto avg_ns = value.get_mean();
auto percent_v = value.get_percent(_total_duration);
auto percent = std::to_string(percent_v);
auto _row = std::string{};
if(cfg.stats_summary_unit_value > 1)
{
auto _unit_div = static_cast<double>(cfg.stats_summary_unit_value);
_row = fmt::format("{}| {:<{}} | {:<{}} | {:15} | {:15} | {:15.3e} | {:>13} | {:15} | "
"{:15} | {:15.3e} |",
indent_v,
name,
name_width,
get_domain_column_name(_cols.at(name)),
domain_width,
calls,
duration_ns / _unit_div,
avg_ns / _unit_div,
percent,
value.get_min() / _unit_div,
value.get_max() / _unit_div,
value.get_stddev() / _unit_div);
}
else
{
_row = fmt::format("{}| {:<{}} | {:<{}} | {:15} | {:15} | {:15.3e} | {:>13} | {:15} | "
"{:15} | {:15.3e} |",
indent_v,
name,
name_width,
get_domain_column_name(_cols.at(name)),
domain_width,
calls,
duration_ns,
avg_ns,
percent,
value.get_min(),
value.get_max(),
value.get_stddev());
}
(*os.stream) << _row << "\n" << std::flush;
}
(*os.stream) << "\n" << std::flush;
}
} // namespace
void
generate_stats(const output_config& cfg,
const metadata& /*tool_metadata*/,
const domain_stats_vec_t& inp_data)
{
auto data_v = inp_data;
std::sort(data_v.begin(), data_v.end(), [](const auto& lhs, const auto& rhs) {
return lhs.first < rhs.first;
});
output_stream _os = get_output_stream(cfg, cfg.stats_summary_file, ".txt");
auto _indent = (_os.writes_to_file()) ? std::string_view{} : std::string_view{" "};
if(cfg.stats_summary_per_domain)
{
for(const auto& itr : data_v)
{
if(!itr.second) continue;
auto _name = fmt::format("{} SUMMARY", get_domain_column_name(itr.first));
auto _tmp = domain_stats_vec_t{};
_tmp.emplace_back(itr.first, itr.second);
generate_stats(cfg, _os, _name, _tmp, _indent);
}
}
if(!cfg.stats_summary_groups.empty())
{
auto domain_groups = std::vector<domain_stats_vec_t>{};
for(const auto& itr : cfg.stats_summary_groups)
{
auto _names = std::vector<std::string>{};
auto _tmp = domain_stats_vec_t{};
for(const auto& ditr : data_v)
{
auto _col_name = get_domain_column_name(ditr.first);
if(std::regex_match(_col_name.data(), std::regex{itr}))
{
if(!ditr.second) continue;
_names.emplace_back(_col_name);
_tmp.emplace_back(ditr.first, ditr.second);
}
}
ROCP_CI_LOG_IF(WARNING, _names.empty())
<< "summary group regex '" << itr << "' matched with zero domain groups";
auto _name = fmt::format("{} SUMMARY", fmt::join(_names.begin(), _names.end(), " + "));
generate_stats(cfg, _os, _name, _tmp, _indent);
}
}
if(cfg.stats_summary) generate_stats(cfg, _os, "SUMMARY", data_v, _indent);
}
} // namespace tool
} // namespace rocprofiler