diff --git a/samples/advanced_thread_trace/CMakeLists.txt b/samples/advanced_thread_trace/CMakeLists.txt index e92cddc652..694bbabde2 100644 --- a/samples/advanced_thread_trace/CMakeLists.txt +++ b/samples/advanced_thread_trace/CMakeLists.txt @@ -49,7 +49,7 @@ target_link_libraries( PRIVATE rocprofiler-sdk::rocprofiler-sdk amd_comgr rocprofiler-sdk::samples-common-library rocprofiler-sdk::samples-build-flags) -rocprofiler_samples_get_preload_env(PRELOAD_ENV advanced-thread-trace) +rocprofiler_samples_get_preload_env(PRELOAD_ENV) add_test(NAME advanced-thread-trace COMMAND $) diff --git a/samples/code_object_isa_decode/CMakeLists.txt b/samples/code_object_isa_decode/CMakeLists.txt index cfdb889451..f3ae85abfb 100644 --- a/samples/code_object_isa_decode/CMakeLists.txt +++ b/samples/code_object_isa_decode/CMakeLists.txt @@ -106,7 +106,7 @@ target_link_libraries( PRIVATE rocprofiler-sdk::samples-common-library rocprofiler-sdk::rocprofiler-sdk amd_comgr rocprofiler-sdk::samples-build-flags) -rocprofiler_samples_get_preload_env(PRELOAD_ENV code-object-isa-decode) +rocprofiler_samples_get_preload_env(PRELOAD_ENV) add_test(NAME code-object-isa-decode COMMAND $) diff --git a/samples/counter_collection/per_dev_serialization.cpp b/samples/counter_collection/per_dev_serialization.cpp index fb14018e90..0400c6757d 100644 --- a/samples/counter_collection/per_dev_serialization.cpp +++ b/samples/counter_collection/per_dev_serialization.cpp @@ -36,13 +36,16 @@ } while(0) __global__ void -kernelA(int* wait_on, int value, int* no_opt) +kernelA(int devid, volatile int* wait_on, int value, int* no_opt) { + printf("[device=%i][begin] Wait on %i: %i (%i)\n", devid, value, *wait_on, *no_opt); while(*wait_on != value) { (*no_opt)++; }; + printf("[device=%i][break] Wait on %i: %i (%i)\n", devid, value, *wait_on, *no_opt); (*wait_on)--; + printf("[device=%i][return] Wait on %i: %i (%i)\n", devid, value, *wait_on, *no_opt); } int @@ -53,17 +56,23 @@ main(int, char**) if(ntotdevice < 2) return 0; start(); - int* check_value = nullptr; - int* no_opt = nullptr; + volatile int* check_value = nullptr; + int* no_opt_0 = nullptr; + int* no_opt_1 = nullptr; HIP_CALL(hipMallocManaged(&check_value, sizeof(*check_value))); - HIP_CALL(hipMallocManaged(&no_opt, sizeof(*no_opt))); - *no_opt = 0; + HIP_CALL(hipMallocManaged(&no_opt_0, sizeof(*no_opt_0))); + HIP_CALL(hipMallocManaged(&no_opt_1, sizeof(*no_opt_1))); + *no_opt_0 = 0; + *no_opt_1 = 0; *check_value = 1; + // Will hang if per-device serialization is not functional HIP_CALL(hipSetDevice(0)); - hipLaunchKernelGGL(kernelA, dim3(1), dim3(1), 0, 0, check_value, 0, no_opt); + hipLaunchKernelGGL(kernelA, dim3(1), dim3(1), 0, 0, 0, check_value, 0, no_opt_0); + HIP_CALL(hipSetDevice(1)); - hipLaunchKernelGGL(kernelA, dim3(1), dim3(1), 0, 0, check_value, 1, no_opt); + hipLaunchKernelGGL(kernelA, dim3(1), dim3(1), 0, 0, 1, check_value, 1, no_opt_1); + HIP_CALL(hipSetDevice(0)); HIP_CALL(hipDeviceSynchronize()); diff --git a/source/include/rocprofiler-sdk/cxx/serialization.hpp b/source/include/rocprofiler-sdk/cxx/serialization.hpp index ea7f94636c..a06893e16b 100644 --- a/source/include/rocprofiler-sdk/cxx/serialization.hpp +++ b/source/include/rocprofiler-sdk/cxx/serialization.hpp @@ -172,6 +172,9 @@ save(ArchiveT& ar, rocprofiler_callback_tracing_code_object_kernel_symbol_regist ROCP_SDK_SAVE_DATA_FIELD(kernarg_segment_alignment); ROCP_SDK_SAVE_DATA_FIELD(group_segment_size); ROCP_SDK_SAVE_DATA_FIELD(private_segment_size); + ROCP_SDK_SAVE_DATA_FIELD(sgpr_count); + ROCP_SDK_SAVE_DATA_FIELD(arch_vgpr_count); + ROCP_SDK_SAVE_DATA_FIELD(accum_vgpr_count); } template @@ -301,7 +304,6 @@ save(ArchiveT& ar, rocprofiler_kernel_dispatch_info_t data) ROCP_SDK_SAVE_DATA_FIELD(private_segment_size); ROCP_SDK_SAVE_DATA_FIELD(group_segment_size); ROCP_SDK_SAVE_DATA_FIELD(workgroup_size); - ROCP_SDK_SAVE_DATA_FIELD(group_segment_size); ROCP_SDK_SAVE_DATA_FIELD(grid_size); } diff --git a/source/lib/CMakeLists.txt b/source/lib/CMakeLists.txt index 9f9992c419..2237677653 100644 --- a/source/lib/CMakeLists.txt +++ b/source/lib/CMakeLists.txt @@ -3,6 +3,7 @@ # set(CMAKE_INSTALL_DEFAULT_COMPONENT_NAME "core") add_subdirectory(common) +add_subdirectory(output) add_subdirectory(rocprofiler-sdk) set(CMAKE_INSTALL_DEFAULT_COMPONENT_NAME "tools") diff --git a/source/lib/common/synchronized.hpp b/source/lib/common/synchronized.hpp index 37ab31f304..67bd7855ee 100644 --- a/source/lib/common/synchronized.hpp +++ b/source/lib/common/synchronized.hpp @@ -73,6 +73,9 @@ public: // Do not allow this data structure to be copied, std::move only. Synchronized(const Synchronized&) = delete; + // return a copy of the data + value_type get() const; + template decltype(auto) rlock(FuncT&& lambda, Args&&... args) const; @@ -100,6 +103,14 @@ private: // // member definitions // +template +typename Synchronized::value_type +Synchronized::get() const +{ + auto lock = std::shared_lock{m_mutex}; + return m_data; +} + template template decltype(auto) diff --git a/source/lib/output/CMakeLists.txt b/source/lib/output/CMakeLists.txt new file mode 100644 index 0000000000..f2048abab1 --- /dev/null +++ b/source/lib/output/CMakeLists.txt @@ -0,0 +1,61 @@ +# +# Tool library used by rocprofiler +# + +rocprofiler_activate_clang_tidy() + +set(TOOL_OUTPUT_HEADERS + agent_info.hpp + buffered_output.hpp + counter_info.hpp + csv.hpp + csv_output_file.hpp + domain_type.hpp + format_path.hpp + generateCSV.hpp + generateJSON.hpp + generateOTF2.hpp + generatePerfetto.hpp + generateStats.hpp + generator.hpp + kernel_symbol_info.hpp + metadata.hpp + output_config.hpp + output_key.hpp + output_stream.hpp + statistics.hpp + timestamps.hpp + tmp_file_buffer.hpp + tmp_file.hpp) + +set(TOOL_OUTPUT_SOURCES + csv_output_file.cpp + domain_type.cpp + format_path.cpp + generateCSV.cpp + generateJSON.cpp + generateOTF2.cpp + generatePerfetto.cpp + generateStats.cpp + metadata.cpp + output_config.cpp + output_key.cpp + output_stream.cpp + statistics.cpp + tmp_file_buffer.cpp + tmp_file.cpp) + +add_library(rocprofiler-sdk-output-library STATIC) +add_library(rocprofiler-sdk::rocprofiler-sdk-output-library ALIAS + rocprofiler-sdk-output-library) +target_sources(rocprofiler-sdk-output-library PRIVATE ${TOOL_OUTPUT_SOURCES} + ${TOOL_OUTPUT_HEADERS}) +target_link_libraries( + rocprofiler-sdk-output-library + PRIVATE rocprofiler-sdk::rocprofiler-sdk-headers + rocprofiler-sdk::rocprofiler-sdk-build-flags + rocprofiler-sdk::rocprofiler-sdk-memcheck + rocprofiler-sdk::rocprofiler-sdk-common-library + rocprofiler-sdk::rocprofiler-sdk-cereal + rocprofiler-sdk::rocprofiler-sdk-perfetto + rocprofiler-sdk::rocprofiler-sdk-otf2) diff --git a/source/lib/rocprofiler-sdk-tool/generateJSON.hpp b/source/lib/output/agent_info.hpp similarity index 50% rename from source/lib/rocprofiler-sdk-tool/generateJSON.hpp rename to source/lib/output/agent_info.hpp index 2f01317677..1b4ef2c4ba 100644 --- a/source/lib/rocprofiler-sdk-tool/generateJSON.hpp +++ b/source/lib/output/agent_info.hpp @@ -22,27 +22,50 @@ #pragma once -#include "helper.hpp" -#include "statistics.hpp" +#include +#include + +#include +#include +#include "rocprofiler-sdk/fwd.h" namespace rocprofiler { namespace tool { -void -write_json(tool_table* tool_functions, - uint64_t pid, - const domain_stats_vec_t& domain_stats, - std::vector agent_data, - std::vector counter_data, - std::deque* hip_api_deque, - std::deque* hsa_api_deque, - std::deque* kernel_dispatch_deque, - std::deque* memory_copy_deque, - std::deque* counter_collection_deque, - std::deque* marker_api_deque, - std::deque* scratch_memory_deque, - std::deque* rccl_api_deque); +struct agent_info : rocprofiler_agent_v0_t +{ + using base_type = rocprofiler_agent_v0_t; + agent_info(base_type _base) + : base_type{_base} + {} + + ~agent_info() = default; + agent_info(const agent_info&) = default; + agent_info(agent_info&&) noexcept = default; + agent_info& operator=(const agent_info&) = default; + agent_info& operator=(agent_info&&) noexcept = default; + + int64_t gpu_index = + (base_type::type == ROCPROFILER_AGENT_TYPE_GPU) ? base_type::logical_node_type_id : -1; +}; + +using agent_info_vec_t = std::vector; +using agent_info_map_t = std::unordered_map; } // namespace tool } // namespace rocprofiler + +namespace cereal +{ +#define SAVE_DATA_FIELD(FIELD) ar(make_nvp(#FIELD, data.FIELD)) + +template +void +save(ArchiveT& ar, const ::rocprofiler::tool::agent_info& data) +{ + cereal::save(ar, static_cast(data)); +} + +#undef SAVE_DATA_FIELD +} // namespace cereal diff --git a/source/lib/rocprofiler-sdk-tool/buffered_output.hpp b/source/lib/output/buffered_output.hpp similarity index 55% rename from source/lib/rocprofiler-sdk-tool/buffered_output.hpp rename to source/lib/output/buffered_output.hpp index cb144ad4ac..7245e03dc5 100644 --- a/source/lib/rocprofiler-sdk-tool/buffered_output.hpp +++ b/source/lib/output/buffered_output.hpp @@ -22,7 +22,8 @@ #pragma once -#include "helper.hpp" +#include "counter_info.hpp" +#include "generator.hpp" #include "statistics.hpp" #include "tmp_file_buffer.hpp" @@ -31,6 +32,8 @@ #include +#include + namespace rocprofiler { namespace tool @@ -41,25 +44,27 @@ using stats_data_t = statistics; template struct buffered_output { - using ring_buffer_type = rocprofiler::common::container::ring_buffer; + using type = Tp; static constexpr auto buffer_type_v = DomainT; explicit buffered_output(bool _enabled); ~buffered_output() = default; buffered_output(const buffered_output&) = delete; buffered_output(buffered_output&&) noexcept = delete; - buffered_output& operator=(const buffered_output&) = default; - buffered_output& operator=(buffered_output&&) noexcept = default; + buffered_output& operator=(const buffered_output&) = delete; + buffered_output& operator=(buffered_output&&) noexcept = delete; + + operator bool() const { return enabled; } void flush(); void read(); void clear(); void destroy(); - operator bool() const { return enabled; } + generator get_generator() const { return generator{get_tmp_file_buffer(DomainT)}; } + std::deque load_all(); - std::deque element_data = {}; - stats_entry_t stats = {}; + stats_entry_t stats = {}; private: bool enabled = false; @@ -76,7 +81,7 @@ buffered_output::flush() { if(!enabled) return; - flush_tmp_buffer(buffer_type_v); + flush_tmp_buffer(buffer_type_v); } template @@ -87,7 +92,26 @@ buffered_output::read() flush(); - element_data = get_buffer_elements(read_tmp_file(buffer_type_v)); + read_tmp_file(buffer_type_v); +} + +template +std::deque +buffered_output::load_all() +{ + auto data = std::deque{}; + if(enabled) + { + auto gen = get_generator(); + for(auto ditr : gen) + { + for(auto itr : gen.get(ditr)) + { + data.emplace_back(itr); + } + } + } + return data; } template @@ -95,8 +119,6 @@ void buffered_output::clear() { if(!enabled) return; - - element_data.clear(); } template @@ -106,10 +128,30 @@ buffered_output::destroy() if(!enabled) return; clear(); - auto [_tmp_buf, _tmp_file] = get_tmp_file_buffer(buffer_type_v); - _tmp_buf->destroy(); - delete _tmp_buf; - delete _tmp_file; + auto*& filebuf = get_tmp_file_buffer(buffer_type_v); + file_buffer* tmp = nullptr; + std::swap(filebuf, tmp); + tmp->buffer.destroy(); + delete tmp; } + +using hip_buffered_output_t = + buffered_output; +using hsa_buffered_output_t = + buffered_output; +using kernel_dispatch_buffered_output_t = + buffered_output; +using memory_copy_buffered_output_t = + buffered_output; +using marker_buffered_output_t = + buffered_output; +using rccl_buffered_output_t = + buffered_output; +using counter_collection_buffered_output_t = + buffered_output; +using scratch_memory_buffered_output_t = + buffered_output; } // namespace tool } // namespace rocprofiler diff --git a/source/lib/output/counter_info.hpp b/source/lib/output/counter_info.hpp new file mode 100644 index 0000000000..556abc40b2 --- /dev/null +++ b/source/lib/output/counter_info.hpp @@ -0,0 +1,125 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "lib/common/logging.hpp" + +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace rocprofiler +{ +namespace tool +{ +constexpr uint32_t lds_block_size = 128 * 4; + +using counter_dimension_id_vec_t = std::vector; +using counter_dimension_info_vec_t = std::vector; + +struct tool_counter_info : rocprofiler_counter_info_v0_t +{ + using parent_type = rocprofiler_counter_info_v0_t; + + tool_counter_info(rocprofiler_agent_id_t _agent_id, + parent_type _info, + counter_dimension_id_vec_t&& _dim_ids, + counter_dimension_info_vec_t&& _dim_info) + : parent_type{_info} + , agent_id{_agent_id} + , dimension_ids{std::move(_dim_ids)} + , dimensions{std::move(_dim_info)} + {} + + ~tool_counter_info() = default; + tool_counter_info(const tool_counter_info&) = default; + tool_counter_info(tool_counter_info&&) noexcept = default; + tool_counter_info& operator=(const tool_counter_info&) = default; + tool_counter_info& operator=(tool_counter_info&&) noexcept = default; + + rocprofiler_agent_id_t agent_id = {}; + counter_dimension_id_vec_t dimension_ids = {}; + counter_dimension_info_vec_t dimensions = {}; +}; + +using counter_info_vec_t = std::vector; +using agent_counter_info_map_t = std::unordered_map; + +struct tool_counter_value_t +{ + rocprofiler_counter_id_t id = {}; + double value = 0; + + template + void save(ArchiveT& ar) const + { + ar(cereal::make_nvp("counter_id", id)); + ar(cereal::make_nvp("value", value)); + } +}; + +struct tool_counter_record_t +{ + static constexpr size_t max_capacity = 512; + + uint64_t thread_id = 0; + rocprofiler_dispatch_counting_service_data_t dispatch_data = {}; + std::array records = {}; + uint64_t counter_count = 0; + + template + void save(ArchiveT& ar) const + { + // should be removed when moving to buffered tracing + auto tmp = + std::vector{records.begin(), records.begin() + counter_count}; + + ar(cereal::make_nvp("thread_id", thread_id)); + ar(cereal::make_nvp("dispatch_data", dispatch_data)); + ar(cereal::make_nvp("records", tmp)); + } +}; +} // namespace tool +} // namespace rocprofiler + +namespace cereal +{ +#define SAVE_DATA_FIELD(FIELD) ar(make_nvp(#FIELD, data.FIELD)) + +template +void +save(ArchiveT& ar, const ::rocprofiler::tool::tool_counter_info& data) +{ + SAVE_DATA_FIELD(agent_id); + cereal::save(ar, static_cast(data)); + SAVE_DATA_FIELD(dimension_ids); +} + +#undef SAVE_DATA_FIELD +} // namespace cereal diff --git a/source/lib/rocprofiler-sdk-tool/csv.hpp b/source/lib/output/csv.hpp similarity index 100% rename from source/lib/rocprofiler-sdk-tool/csv.hpp rename to source/lib/output/csv.hpp diff --git a/source/lib/rocprofiler-sdk-tool/tmp_file_buffer.cpp b/source/lib/output/csv_output_file.cpp similarity index 73% rename from source/lib/rocprofiler-sdk-tool/tmp_file_buffer.cpp rename to source/lib/output/csv_output_file.cpp index 1fa0a7c1e8..ff0cd74d38 100644 --- a/source/lib/rocprofiler-sdk-tool/tmp_file_buffer.cpp +++ b/source/lib/output/csv_output_file.cpp @@ -20,17 +20,26 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. -#include "tmp_file_buffer.hpp" +#include "csv_output_file.hpp" +#include "lib/common/filesystem.hpp" +#include "lib/common/logging.hpp" + +#include #include -#include +#include +#include -std::string -compose_tmp_file_name(domain_type buffer_type) +namespace rocprofiler { - return rocprofiler::tool::format(fmt::format("{}/.rocprofv3/{}-{}.dat", - rocprofiler::tool::get_config().tmp_directory, - "%ppid%-%pid%", - get_domain_file_name(buffer_type))); +namespace tool +{ +csv_output_file::~csv_output_file() +{ + if(m_os.stream) ROCP_INFO << "Closing result file: " << m_name; + + m_os.close(); } +} // namespace tool +} // namespace rocprofiler diff --git a/source/lib/rocprofiler-sdk-tool/output_file.hpp b/source/lib/output/csv_output_file.hpp similarity index 55% rename from source/lib/rocprofiler-sdk-tool/output_file.hpp rename to source/lib/output/csv_output_file.hpp index c2f7ce273b..6a8d5372fd 100644 --- a/source/lib/rocprofiler-sdk-tool/output_file.hpp +++ b/source/lib/output/csv_output_file.hpp @@ -22,8 +22,9 @@ #pragma once -#include "config.hpp" #include "csv.hpp" +#include "domain_type.hpp" +#include "output_stream.hpp" #include "lib/common/filesystem.hpp" @@ -40,59 +41,24 @@ namespace rocprofiler { namespace tool { -using ostream_dtor_t = void (*)(std::ostream*&); - -using output_stream_pair_t = std::pair; - -struct output_stream_t -{ - output_stream_t() = default; - output_stream_t(std::ostream* _os, ostream_dtor_t _dtor) - : stream{_os} - , dtor{_dtor} - {} - - ~output_stream_t() { close(); } - output_stream_t(const output_stream_t&) = delete; - output_stream_t(output_stream_t&&) noexcept = default; - output_stream_t& operator=(const output_stream_t&) = delete; - output_stream_t& operator=(output_stream_t&&) noexcept = default; - - explicit operator bool() const { return stream != nullptr; } - - template - std::ostream& operator<<(Tp&& value) - { - return ((stream) ? *stream : std::cerr) << std::forward(value) << std::flush; - } - - void close() - { - if(stream) (*stream) << std::flush; - if(dtor) dtor(stream); - } - - bool writes_to_file() const { return (dynamic_cast(stream) != nullptr); } - - std::ostream* stream = nullptr; - ostream_dtor_t dtor = nullptr; -}; - -std::string -get_output_filename(std::string_view fname, std::string_view ext); - -output_stream_t -get_output_stream(std::string_view fname, std::string_view ext); - -struct output_file +struct csv_output_file { template - output_file(std::string name, csv::csv_encoder, std::array&& header); + csv_output_file(const output_config& cfg, + std::string_view name, + csv::csv_encoder, + std::array&& header); - ~output_file(); + template + csv_output_file(const output_config& cfg, + domain_type domain, + csv::csv_encoder, + std::array&& header); - output_file(const output_file&) = delete; - output_file& operator=(const output_file&) = delete; + ~csv_output_file(); + + csv_output_file(const csv_output_file&) = delete; + csv_output_file& operator=(const csv_output_file&) = delete; std::string name() const { return m_name; } @@ -108,15 +74,16 @@ struct output_file private: const std::string m_name = {}; std::mutex m_mutex = {}; - output_stream_t m_os = {}; + output_stream m_os = {}; }; template -output_file::output_file(std::string name, - csv::csv_encoder encoder, - std::array&& header) -: m_name{std::move(name)} -, m_os{get_output_stream(m_name, ".csv")} +csv_output_file::csv_output_file(const output_config& cfg, + std::string_view name, + csv::csv_encoder encoder, + std::array&& header) +: m_name{std::string{name}} +, m_os{get_output_stream(cfg, m_name, ".csv")} { for(auto& itr : header) { @@ -127,5 +94,13 @@ output_file::output_file(std::string name, // write the csv header if(m_os.stream) encoder.write_row(*m_os.stream, header); } + +template +csv_output_file::csv_output_file(const output_config& cfg, + domain_type domain, + csv::csv_encoder encoder, + std::array&& header) +: csv_output_file{cfg, get_domain_trace_file_name(domain), encoder, std::move(header)} +{} } // namespace tool } // namespace rocprofiler diff --git a/source/lib/rocprofiler-sdk-tool/domain_type.cpp b/source/lib/output/domain_type.cpp similarity index 57% rename from source/lib/rocprofiler-sdk-tool/domain_type.cpp rename to source/lib/output/domain_type.cpp index 6686b6936a..d2fa6692d6 100644 --- a/source/lib/rocprofiler-sdk-tool/domain_type.cpp +++ b/source/lib/output/domain_type.cpp @@ -29,33 +29,51 @@ namespace template struct domain_type_name; -#define DEFINE_BUFFER_TYPE_NAME(ENUM_VALUE, COLUMN_NAME, FILENAME) \ +#define DEFINE_BUFFER_TYPE_NAME(ENUM_VALUE, COLUMN_NAME, TRACE_FILENAME, STATS_FILENAME) \ template <> \ struct domain_type_name \ { \ - static constexpr auto column_name = COLUMN_NAME; \ - static constexpr auto filename = FILENAME; \ + static constexpr auto column_name = COLUMN_NAME; \ + static constexpr auto trace_filename = TRACE_FILENAME; \ + static constexpr auto stats_filename = STATS_FILENAME; \ }; -DEFINE_BUFFER_TYPE_NAME(HSA, "HSA_API", "hsa_api") -DEFINE_BUFFER_TYPE_NAME(HIP, "HIP_API", "hip_api") -DEFINE_BUFFER_TYPE_NAME(MARKER, "MARKER_API", "marker_api") -DEFINE_BUFFER_TYPE_NAME(KERNEL_DISPATCH, "KERNEL_DISPATCH", "kernel_dispatch") -DEFINE_BUFFER_TYPE_NAME(MEMORY_COPY, "MEMORY_COPY", "memory_copy") -DEFINE_BUFFER_TYPE_NAME(SCRATCH_MEMORY, "SCRATCH_MEMORY", "scratch_memory") -DEFINE_BUFFER_TYPE_NAME(COUNTER_COLLECTION, "COUNTER_COLLECTION", "counter_collection") -DEFINE_BUFFER_TYPE_NAME(RCCL, "RCCL_API", "rccl_api") +DEFINE_BUFFER_TYPE_NAME(HSA, "HSA_API", "hsa_api_trace", "hsa_api_stats") +DEFINE_BUFFER_TYPE_NAME(HIP, "HIP_API", "hip_api_trace", "hip_api_stats") +DEFINE_BUFFER_TYPE_NAME(MARKER, "MARKER_API", "marker_api_trace", "marker_api_stats") +DEFINE_BUFFER_TYPE_NAME(KERNEL_DISPATCH, "KERNEL_DISPATCH", "kernel_trace", "kernel_stats") +DEFINE_BUFFER_TYPE_NAME(MEMORY_COPY, "MEMORY_COPY", "memory_copy_trace", "memory_copy_stats") +DEFINE_BUFFER_TYPE_NAME(SCRATCH_MEMORY, + "SCRATCH_MEMORY", + "scratch_memory_trace", + "scratch_memory_stats") +DEFINE_BUFFER_TYPE_NAME(COUNTER_COLLECTION, + "COUNTER_COLLECTION", + "counter_collection", + "counter_collection_stats") +DEFINE_BUFFER_TYPE_NAME(RCCL, "RCCL_API", "rccl_api_trace", "rccl_api_stats") #undef DEFINE_BUFFER_TYPE_NAME template std::string_view -get_domain_file_name(domain_type _buffer_type, std::index_sequence) +get_domain_trace_file_name(domain_type _buffer_type, std::index_sequence) { if(static_cast(_buffer_type) == Idx) - return domain_type_name(Idx)>::filename; + return domain_type_name(Idx)>::trace_filename; if constexpr(sizeof...(TailIdx) > 0) - return get_domain_file_name(_buffer_type, std::index_sequence{}); + return get_domain_trace_file_name(_buffer_type, std::index_sequence{}); + return std::string_view{}; +} + +template +std::string_view +get_domain_stats_file_name(domain_type _buffer_type, std::index_sequence) +{ + if(static_cast(_buffer_type) == Idx) + return domain_type_name(Idx)>::stats_filename; + if constexpr(sizeof...(TailIdx) > 0) + return get_domain_stats_file_name(_buffer_type, std::index_sequence{}); return std::string_view{}; } @@ -73,11 +91,19 @@ get_domain_column_name(domain_type buffer_type, std::index_sequence(domain_type::LAST); - return get_domain_file_name(_buffer_type, std::make_index_sequence{}); + return get_domain_trace_file_name(_buffer_type, std::make_index_sequence{}); +} + +std::string_view +get_domain_stats_file_name(domain_type _buffer_type) +{ + constexpr auto buffer_type_last_v = static_cast(domain_type::LAST); + + return get_domain_stats_file_name(_buffer_type, std::make_index_sequence{}); } std::string_view diff --git a/source/lib/rocprofiler-sdk-tool/domain_type.hpp b/source/lib/output/domain_type.hpp similarity index 93% rename from source/lib/rocprofiler-sdk-tool/domain_type.hpp rename to source/lib/output/domain_type.hpp index 15bf4c0dcb..25780b8bd3 100644 --- a/source/lib/rocprofiler-sdk-tool/domain_type.hpp +++ b/source/lib/output/domain_type.hpp @@ -38,7 +38,10 @@ enum class domain_type }; std::string_view -get_domain_file_name(domain_type val); +get_domain_trace_file_name(domain_type val); + +std::string_view +get_domain_stats_file_name(domain_type val); std::string_view get_domain_column_name(domain_type _buffer_type); diff --git a/source/lib/output/format_path.cpp b/source/lib/output/format_path.cpp new file mode 100644 index 0000000000..9132c83e64 --- /dev/null +++ b/source/lib/output/format_path.cpp @@ -0,0 +1,172 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// + +#include "format_path.hpp" + +#include "lib/common/defines.hpp" +#include "lib/common/demangle.hpp" +#include "lib/common/environment.hpp" +#include "lib/common/filesystem.hpp" +#include "lib/common/logging.hpp" +#include "lib/common/units.hpp" +#include "lib/common/utility.hpp" +#include "lib/output/output_key.hpp" + +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace rocprofiler +{ +namespace tool +{ +namespace +{ +const auto env_regexes = + new std::array{std::regex{"(.*)%(env|ENV)\\{([A-Z0-9_]+)\\}%(.*)"}, + std::regex{"(.*)\\$(env|ENV)\\{([A-Z0-9_]+)\\}(.*)"}, + std::regex{"(.*)%q\\{([A-Z0-9_]+)\\}(.*)"}}; +// env regex examples: +// - %env{USER}% Consistent with other output key formats (start+end with %) +// - $ENV{USER} Similar to CMake +// - %q{USER} Compatibility with NVIDIA +// + +std::string +format_path_impl(std::string _fpath, const std::vector& _keys) +{ + if(_fpath.find('%') == std::string::npos && _fpath.find('$') == std::string::npos) + return _fpath; + + auto _replace = [](auto& _v, const output_key& pitr) { + auto pos = std::string::npos; + while((pos = _v.find(pitr.key)) != std::string::npos) + _v.replace(pos, pitr.key.length(), pitr.value); + }; + + for(auto&& itr : _keys) + _replace(_fpath, itr); + + // environment and configuration variables + try + { + auto strip_leading_and_replace = + [](std::string_view inp_v, std::initializer_list keys, const char* val) { + auto inp = std::string{inp_v}; + for(auto key : keys) + { + auto pos = std::string::npos; + while((pos = inp.find(key)) == 0) + inp = inp.substr(pos + 1); + + while((pos = inp.find(key)) != std::string::npos) + inp = inp.replace(pos, 1, val); + } + return inp; + }; + + for(const auto& _re : *env_regexes) + { + while(std::regex_search(_fpath, _re)) + { + auto _var = std::regex_replace(_fpath, _re, "$3"); + std::string _val = common::get_env(_var, ""); + _val = strip_leading_and_replace(_val, {'\t', ' ', '/'}, "_"); + auto _beg = std::regex_replace(_fpath, _re, "$1"); + auto _end = std::regex_replace(_fpath, _re, "$4"); + _fpath = fmt::format("{}{}{}", _beg, _val, _end); + } + } + } catch(std::exception& _e) + { + ROCP_WARNING << "[rocprofiler] " << __FUNCTION__ << " threw an exception :: " << _e.what() + << "\n"; + } + + // remove %arg% where N >= argc + try + { + std::regex _re{"(.*)%(arg[0-9]+)%([-/_]*)(.*)"}; + while(std::regex_search(_fpath, _re)) + _fpath = std::regex_replace(_fpath, _re, "$1$4"); + } catch(std::exception& _e) + { + ROCP_WARNING << "[rocprofiler] " << __FUNCTION__ << " threw an exception :: " << _e.what() + << "\n"; + } + + return _fpath; +} + +std::string +format_path(std::string&& _fpath, const std::vector& _keys) +{ + if(_fpath.find('%') == std::string::npos && _fpath.find('$') == std::string::npos) + return _fpath; + + auto _ref = _fpath; + _fpath = format_path_impl(std::move(_fpath), _keys); + + return (_fpath == _ref) ? _fpath : format_path(std::move(_fpath), _keys); +} +} // namespace + +int +get_mpi_size() +{ + static int _v = common::get_env( + "OMPI_COMM_WORLD_SIZE", + common::get_env("MV2_COMM_WORLD_SIZE", common::get_env("MPI_SIZE", 0))); + return _v; +} + +int +get_mpi_rank() +{ + static int _v = common::get_env( + "OMPI_COMM_WORLD_RANK", + common::get_env("MV2_COMM_WORLD_RANK", common::get_env("MPI_RANK", -1))); + return _v; +} + +std::string +format_path(std::string _fpath, const std::string& _tag) +{ + return format_path(std::move(_fpath), output_keys(_tag)); +} +} // namespace tool +} // namespace rocprofiler diff --git a/source/lib/output/format_path.hpp b/source/lib/output/format_path.hpp new file mode 100644 index 0000000000..6e73ed6039 --- /dev/null +++ b/source/lib/output/format_path.hpp @@ -0,0 +1,53 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// + +#pragma once + +#include "lib/common/environment.hpp" +#include "lib/common/filesystem.hpp" +#include "lib/common/mpl.hpp" + +#include + +#include + +#include +#include +#include +#include +#include + +namespace rocprofiler +{ +namespace tool +{ +int +get_mpi_size(); + +int +get_mpi_rank(); + +std::string +format_path(std::string _fpath, const std::string& _tag = {}); +} // namespace tool +} // namespace rocprofiler diff --git a/source/lib/output/generateCSV.cpp b/source/lib/output/generateCSV.cpp new file mode 100644 index 0000000000..7f588c78f2 --- /dev/null +++ b/source/lib/output/generateCSV.cpp @@ -0,0 +1,714 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "generateCSV.hpp" +#include "csv.hpp" +#include "csv_output_file.hpp" +#include "domain_type.hpp" +#include "generateStats.hpp" +#include "output_config.hpp" +#include "output_stream.hpp" +#include "statistics.hpp" +#include "timestamps.hpp" + +#include +#include + +#include +#include +#include +#include +#include + +namespace rocprofiler +{ +namespace tool +{ +namespace +{ +tool::csv_output_file +get_stats_output_file(const output_config& cfg, std::string_view name) +{ + return tool::csv_output_file{cfg, + name, + tool::csv::stats_csv_encoder{}, + { + "Name", + "Calls", + "TotalDurationNs", + "AverageNs", + "Percentage", + "MinNs", + "MaxNs", + "StdDev", + }}; +} + +tool::csv_output_file +get_stats_output_file(const output_config& cfg, domain_type domain) +{ + return get_stats_output_file(cfg, get_domain_stats_file_name(domain)); +} + +void +write_stats(tool::csv_output_file&& ofs, const stats_entry_vec_t& data_v) +{ + auto data = stats_entry_vec_t{}; + auto _duration = stats_data_t{}; + for(const auto& [id, value] : data_v) + { + data.emplace_back(id, value); + _duration += value; + } + + std::sort(data.begin(), data.end(), [](const auto& lhs, const auto& rhs) { + return (lhs.second.get_sum() > rhs.second.get_sum()); + }); + + constexpr float_type one_hundred = 100.0; + + const float_type _total_duration = _duration.get_sum(); + for(const auto& [name, value] : data) + { + auto duration_ns = value.get_sum(); + auto calls = value.get_count(); + float_type avg_ns = value.get_mean(); + float_type percent_v = (duration_ns / _total_duration) * one_hundred; + + auto _row = std::stringstream{}; + rocprofiler::tool::csv::stats_csv_encoder::write_row(_row, + name, + calls, + duration_ns, + avg_ns, + percentage{percent_v}, + value.get_min(), + value.get_max(), + value.get_stddev()); + ofs << _row.str() << std::flush; + } +} +} // namespace + +void +generate_csv(const output_config& cfg, + const metadata& /*tool_metadata*/, + std::vector& data) +{ + if(data.empty()) return; + + std::sort(data.begin(), data.end(), [](const agent_info& lhs, const agent_info& rhs) { + return lhs.node_id < rhs.node_id; + }); + + auto ofs = tool::csv_output_file{cfg, + "agent_info", + tool::csv::agent_info_csv_encoder{}, + {"Node_Id", + "Logical_Node_Id", + "Agent_Type", + "Cpu_Cores_Count", + "Simd_Count", + "Cpu_Core_Id_Base", + "Simd_Id_Base", + "Max_Waves_Per_Simd", + "Lds_Size_In_Kb", + "Gds_Size_In_Kb", + "Num_Gws", + "Wave_Front_Size", + "Num_Xcc", + "Cu_Count", + "Array_Count", + "Num_Shader_Banks", + "Simd_Arrays_Per_Engine", + "Cu_Per_Simd_Array", + "Simd_Per_Cu", + "Max_Slots_Scratch_Cu", + "Gfx_Target_Version", + "Vendor_Id", + "Device_Id", + "Location_Id", + "Domain", + "Drm_Render_Minor", + "Num_Sdma_Engines", + "Num_Sdma_Xgmi_Engines", + "Num_Sdma_Queues_Per_Engine", + "Num_Cp_Queues", + "Max_Engine_Clk_Ccompute", + "Max_Engine_Clk_Fcompute", + "Sdma_Fw_Version", + "Fw_Version", + "Capability", + "Cu_Per_Engine", + "Max_Waves_Per_Cu", + "Family_Id", + "Workgroup_Max_Size", + "Grid_Max_Size", + "Local_Mem_Size", + "Hive_Id", + "Gpu_Id", + "Workgroup_Max_Dim_X", + "Workgroup_Max_Dim_Y", + "Workgroup_Max_Dim_Z", + "Grid_Max_Dim_X", + "Grid_Max_Dim_Y", + "Grid_Max_Dim_Z", + "Name", + "Vendor_Name", + "Product_Name", + "Model_Name"}}; + + for(auto& itr : data) + { + auto _type = std::string_view{}; + if(itr.type == ROCPROFILER_AGENT_TYPE_CPU) + _type = "CPU"; + else if(itr.type == ROCPROFILER_AGENT_TYPE_GPU) + _type = "GPU"; + else + _type = "UNK"; + + auto row_ss = std::stringstream{}; + rocprofiler::tool::csv::agent_info_csv_encoder::write_row(row_ss, + itr.node_id, + itr.logical_node_id, + _type, + itr.cpu_cores_count, + itr.simd_count, + itr.cpu_core_id_base, + itr.simd_id_base, + itr.max_waves_per_simd, + itr.lds_size_in_kb, + itr.gds_size_in_kb, + itr.num_gws, + itr.wave_front_size, + itr.num_xcc, + itr.cu_count, + itr.array_count, + itr.num_shader_banks, + itr.simd_arrays_per_engine, + itr.cu_per_simd_array, + itr.simd_per_cu, + itr.max_slots_scratch_cu, + itr.gfx_target_version, + itr.vendor_id, + itr.device_id, + itr.location_id, + itr.domain, + itr.drm_render_minor, + itr.num_sdma_engines, + itr.num_sdma_xgmi_engines, + itr.num_sdma_queues_per_engine, + itr.num_cp_queues, + itr.max_engine_clk_ccompute, + itr.max_engine_clk_fcompute, + itr.sdma_fw_version.Value, + itr.fw_version.Value, + itr.capability.Value, + itr.cu_per_engine, + itr.max_waves_per_cu, + itr.family_id, + itr.workgroup_max_size, + itr.grid_max_size, + itr.local_mem_size, + itr.hive_id, + itr.gpu_id, + itr.workgroup_max_dim.x, + itr.workgroup_max_dim.y, + itr.workgroup_max_dim.z, + itr.grid_max_dim.x, + itr.grid_max_dim.y, + itr.grid_max_dim.z, + itr.name, + itr.vendor_name, + itr.product_name, + itr.model_name); + ofs << row_ss.str(); + } +} + +void +generate_csv(const output_config& cfg, + const metadata& tool_metadata, + const generator& data, + const stats_entry_t& stats) +{ + if(data.empty()) return; + + if(cfg.stats && stats) + write_stats(get_stats_output_file(cfg, domain_type::KERNEL_DISPATCH), stats.entries); + + auto ofs = tool::csv_output_file{cfg, + domain_type::KERNEL_DISPATCH, + tool::csv::kernel_trace_csv_encoder{}, + {"Kind", + "Agent_Id", + "Queue_Id", + "Thread_Id", + "Dispatch_Id", + "Kernel_Id", + "Kernel_Name", + "Correlation_Id", + "Start_Timestamp", + "End_Timestamp", + "Private_Segment_Size", + "Group_Segment_Size", + "Workgroup_Size_X", + "Workgroup_Size_Y", + "Workgroup_Size_Z", + "Grid_Size_X", + "Grid_Size_Y", + "Grid_Size_Z"}}; + + for(auto ditr : data) + { + for(auto record : data.get(ditr)) + { + auto row_ss = std::stringstream{}; + auto kernel_name = tool_metadata.get_kernel_name(record.dispatch_info.kernel_id, + record.correlation_id.external.value); + rocprofiler::tool::csv::kernel_trace_csv_encoder::write_row( + row_ss, + tool_metadata.get_kind_name(record.kind), + tool_metadata.get_node_id(record.dispatch_info.agent_id), + record.dispatch_info.queue_id.handle, + record.thread_id, + record.dispatch_info.dispatch_id, + record.dispatch_info.kernel_id, + kernel_name, + record.correlation_id.internal, + record.start_timestamp, + record.end_timestamp, + record.dispatch_info.private_segment_size, + record.dispatch_info.group_segment_size, + record.dispatch_info.workgroup_size.x, + record.dispatch_info.workgroup_size.y, + record.dispatch_info.workgroup_size.z, + record.dispatch_info.grid_size.x, + record.dispatch_info.grid_size.y, + record.dispatch_info.grid_size.z); + + ofs << row_ss.str(); + } + } +} + +void +generate_csv(const output_config& cfg, + const metadata& tool_metadata, + const generator& data, + const stats_entry_t& stats) +{ + if(data.empty()) return; + + if(cfg.stats && stats) write_stats(get_stats_output_file(cfg, domain_type::HIP), stats.entries); + + auto ofs = tool::csv_output_file{cfg, + domain_type::HIP, + tool::csv::api_csv_encoder{}, + {"Domain", + "Function", + "Process_Id", + "Thread_Id", + "Correlation_Id", + "Start_Timestamp", + "End_Timestamp"}}; + for(auto ditr : data) + { + for(auto record : data.get(ditr)) + { + auto row_ss = std::stringstream{}; + auto api_name = tool_metadata.get_operation_name(record.kind, record.operation); + rocprofiler::tool::csv::api_csv_encoder::write_row( + row_ss, + tool_metadata.get_kind_name(record.kind), + api_name, + tool_metadata.process_id, + record.thread_id, + record.correlation_id.internal, + record.start_timestamp, + record.end_timestamp); + + ofs << row_ss.str(); + } + } +} + +void +generate_csv(const output_config& cfg, + const metadata& tool_metadata, + const generator& data, + const stats_entry_t& stats) +{ + if(data.empty()) return; + + if(cfg.stats && stats) write_stats(get_stats_output_file(cfg, domain_type::HSA), stats.entries); + + auto ofs = tool::csv_output_file{cfg, + domain_type::HSA, + tool::csv::api_csv_encoder{}, + {"Domain", + "Function", + "Process_Id", + "Thread_Id", + "Correlation_Id", + "Start_Timestamp", + "End_Timestamp"}}; + + for(auto ditr : data) + { + for(auto record : data.get(ditr)) + { + auto row_ss = std::stringstream{}; + auto api_name = tool_metadata.get_operation_name(record.kind, record.operation); + rocprofiler::tool::csv::api_csv_encoder::write_row( + row_ss, + tool_metadata.get_kind_name(record.kind), + api_name, + tool_metadata.process_id, + record.thread_id, + record.correlation_id.internal, + record.start_timestamp, + record.end_timestamp); + + ofs << row_ss.str(); + } + } +} + +void +generate_csv(const output_config& cfg, + const metadata& tool_metadata, + const generator& data, + const stats_entry_t& stats) +{ + if(data.empty()) return; + + if(cfg.stats && stats) + write_stats(get_stats_output_file(cfg, domain_type::MEMORY_COPY), stats.entries); + + auto ofs = tool::csv_output_file{cfg, + domain_type::MEMORY_COPY, + tool::csv::memory_copy_csv_encoder{}, + {"Kind", + "Direction", + "Source_Agent_Id", + "Destination_Agent_Id", + "Correlation_Id", + "Start_Timestamp", + "End_Timestamp"}}; + for(auto ditr : data) + { + for(auto record : data.get(ditr)) + { + auto row_ss = std::stringstream{}; + auto api_name = tool_metadata.get_operation_name(record.kind, record.operation); + rocprofiler::tool::csv::memory_copy_csv_encoder::write_row( + row_ss, + tool_metadata.get_kind_name(record.kind), + api_name, + tool_metadata.get_node_id(record.src_agent_id), + tool_metadata.get_node_id(record.dst_agent_id), + record.correlation_id.internal, + record.start_timestamp, + record.end_timestamp); + + ofs << row_ss.str(); + } + } +} + +void +generate_csv(const output_config& cfg, + const metadata& tool_metadata, + const generator& data, + const stats_entry_t& stats) +{ + if(data.empty()) return; + + if(cfg.stats && stats) + write_stats(get_stats_output_file(cfg, domain_type::MARKER), stats.entries); + + auto ofs = tool::csv_output_file{cfg, + domain_type::MARKER, + tool::csv::marker_csv_encoder{}, + {"Domain", + "Function", + "Process_Id", + "Thread_Id", + "Correlation_Id", + "Start_Timestamp", + "End_Timestamp"}}; + for(auto ditr : data) + { + for(auto record : data.get(ditr)) + { + auto row_ss = std::stringstream{}; + auto _name = std::string_view{}; + + if(record.kind == ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API && + (record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxMarkA || + record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangePushA || + record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangeStartA)) + { + _name = tool_metadata.get_marker_message(record.correlation_id.internal); + } + else + { + _name = tool_metadata.get_operation_name(record.kind, record.operation); + } + + tool::csv::marker_csv_encoder::write_row(row_ss, + tool_metadata.get_kind_name(record.kind), + _name, + tool_metadata.process_id, + record.thread_id, + record.correlation_id.internal, + record.start_timestamp, + record.end_timestamp); + + ofs << row_ss.str(); + } + } +} + +void +generate_csv(const output_config& cfg, + const metadata& tool_metadata, + const generator& data, + const stats_entry_t& stats) +{ + if(data.empty()) return; + + if(cfg.stats && stats) + write_stats(get_stats_output_file(cfg, domain_type::COUNTER_COLLECTION), stats.entries); + + auto ofs = tool::csv_output_file{cfg, + domain_type::COUNTER_COLLECTION, + tool::csv::counter_collection_csv_encoder{}, + {"Correlation_Id", + "Dispatch_Id", + "Agent_Id", + "Queue_Id", + "Process_Id", + "Thread_Id", + "Grid_Size", + "Kernel_Id", + "Kernel_Name", + "Workgroup_Size", + "LDS_Block_Size", + "Scratch_Size", + "VGPR_Count", + "SGPR_Count", + "Counter_Name", + "Counter_Value", + "Start_Timestamp", + "End_Timestamp"}}; + for(auto ditr : data) + { + for(auto record : data.get(ditr)) + { + auto kernel_id = record.dispatch_data.dispatch_info.kernel_id; + auto counter_name_value = std::map{}; + for(uint64_t i = 0; i < record.counter_count; i++) + { + const auto& rec = record.records.at(i); + std::string_view counter_name = tool_metadata.get_counter_info(rec.id)->name; + auto search = counter_name_value.find(counter_name); + if(search == counter_name_value.end()) + counter_name_value.emplace( + std::pair{counter_name, rec.value}); + else + search->second += rec.value; + } + + const auto& correlation_id = record.dispatch_data.correlation_id; + const auto* kernel_info = tool_metadata.get_kernel_symbol(kernel_id); + auto lds_block_size_v = + (kernel_info->group_segment_size + (lds_block_size - 1)) & ~(lds_block_size - 1); + + auto magnitude = [](rocprofiler_dim3_t dims) { return (dims.x * dims.y * dims.z); }; + auto row_ss = std::stringstream{}; + for(auto& itr : counter_name_value) + { + tool::csv::counter_collection_csv_encoder::write_row( + row_ss, + correlation_id.internal, + record.dispatch_data.dispatch_info.dispatch_id, + tool_metadata.get_node_id(record.dispatch_data.dispatch_info.agent_id), + record.dispatch_data.dispatch_info.queue_id.handle, + tool_metadata.process_id, + record.thread_id, + magnitude(record.dispatch_data.dispatch_info.grid_size), + record.dispatch_data.dispatch_info.kernel_id, + tool_metadata.get_kernel_name(kernel_id, correlation_id.external.value), + magnitude(record.dispatch_data.dispatch_info.workgroup_size), + lds_block_size_v, + record.dispatch_data.dispatch_info.private_segment_size, + kernel_info->arch_vgpr_count, + kernel_info->sgpr_count, + itr.first, + itr.second, + record.dispatch_data.start_timestamp, + record.dispatch_data.end_timestamp); + } + ofs << row_ss.str(); + } + } +} + +void +generate_csv(const output_config& cfg, + const metadata& tool_metadata, + const generator& data, + const stats_entry_t& stats) +{ + if(data.empty()) return; + + if(cfg.stats && stats) + write_stats(get_stats_output_file(cfg, domain_type::SCRATCH_MEMORY), stats.entries); + + auto ofs = tool::csv_output_file{cfg, + domain_type::SCRATCH_MEMORY, + tool::csv::scratch_memory_encoder{}, + { + "Kind", + "Operation", + "Agent_Id", + "Queue_Id", + "Thread_Id", + "Alloc_flags", + "Start_Timestamp", + "End_Timestamp", + }}; + + for(auto ditr : data) + { + for(auto record : data.get(ditr)) + { + auto row_ss = std::stringstream{}; + auto kind_name = tool_metadata.get_kind_name(record.kind); + auto op_name = tool_metadata.get_operation_name(record.kind, record.operation); + + tool::csv::scratch_memory_encoder::write_row(row_ss, + kind_name, + op_name, + tool_metadata.get_node_id(record.agent_id), + record.queue_id.handle, + record.thread_id, + record.flags, + record.start_timestamp, + record.end_timestamp); + + ofs << row_ss.str(); + } + } +} + +void +generate_csv(const output_config& cfg, + const metadata& tool_metadata, + const generator& data, + const stats_entry_t& stats) +{ + if(data.empty()) return; + + if(cfg.stats && stats) + write_stats(get_stats_output_file(cfg, domain_type::RCCL), stats.entries); + + auto ofs = tool::csv_output_file{cfg, + domain_type::RCCL, + tool::csv::api_csv_encoder{}, + {"Domain", + "Function", + "Process_Id", + "Thread_Id", + "Correlation_Id", + "Start_Timestamp", + "End_Timestamp"}}; + for(auto ditr : data) + { + for(auto record : data.get(ditr)) + { + auto row_ss = std::stringstream{}; + auto api_name = tool_metadata.get_operation_name(record.kind, record.operation); + rocprofiler::tool::csv::api_csv_encoder::write_row( + row_ss, + tool_metadata.get_kind_name(record.kind), + api_name, + tool_metadata.process_id, + record.thread_id, + record.correlation_id.internal, + record.start_timestamp, + record.end_timestamp); + + ofs << row_ss.str(); + } + } +} + +void +generate_csv(const output_config& cfg, + const metadata& /*tool_metadata*/, + const domain_stats_vec_t& data_v) +{ + using csv_encoder_t = rocprofiler::tool::csv::stats_csv_encoder; + + if(!cfg.stats) return; + + auto _data = data_v; + auto _total_stats = stats_data_t{}; + for(const auto& itr : _data) + _total_stats += itr.second.total; + + if(_total_stats.get_count() == 0) return; + + std::sort(_data.begin(), _data.end(), [](const auto& lhs, const auto& rhs) { + return (lhs.second.total.get_sum() > rhs.second.total.get_sum()); + }); + + auto ofs = get_stats_output_file(cfg, "domain_stats"); + + const float_type _total_duration = _total_stats.get_sum(); + for(const auto& [type, value] : _data) + { + auto name = get_domain_column_name(type); + auto duration_ns = value.total.get_sum(); + auto calls = value.total.get_count(); + auto avg_ns = value.total.get_mean(); + auto percent_v = value.total.get_percent(_total_duration); + + auto _row = std::stringstream{}; + csv_encoder_t::write_row(_row, + name, + calls, + duration_ns, + avg_ns, + percentage{percent_v}, + value.total.get_min(), + value.total.get_max(), + value.total.get_stddev()); + ofs << _row.str() << std::flush; + } +} +} // namespace tool +} // namespace rocprofiler diff --git a/source/lib/output/generateCSV.hpp b/source/lib/output/generateCSV.hpp new file mode 100644 index 0000000000..20beb36a53 --- /dev/null +++ b/source/lib/output/generateCSV.hpp @@ -0,0 +1,95 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "generateStats.hpp" +#include "generator.hpp" +#include "output_config.hpp" +#include "statistics.hpp" + +#include +#include + +namespace rocprofiler +{ +namespace tool +{ +void +generate_csv(const output_config& cfg, + const metadata& tool_metadata, + std::vector& data); + +void +generate_csv(const output_config& cfg, + const metadata& tool_metadata, + const generator& data, + const stats_entry_t& stats); + +void +generate_csv(const output_config& cfg, + const metadata& tool_metadata, + const generator& data, + const stats_entry_t& stats); + +void +generate_csv(const output_config& cfg, + const metadata& tool_metadata, + const generator& data, + const stats_entry_t& stats); + +void +generate_csv(const output_config& cfg, + const metadata& tool_metadata, + const generator& data, + const stats_entry_t& stats); + +void +generate_csv(const output_config& cfg, + const metadata& tool_metadata, + const generator& data, + const stats_entry_t& stats); + +void +generate_csv(const output_config& cfg, + const metadata& tool_metadata, + const generator& data, + const stats_entry_t& stats); + +void +generate_csv(const output_config& cfg, + const metadata& tool_metadata, + const generator& data, + const stats_entry_t& stats); + +void +generate_csv(const output_config& cfg, + const metadata& tool_metadata, + const generator& data, + const stats_entry_t& stats); + +void +generate_csv(const output_config& cfg, + const metadata& tool_metadata, + const domain_stats_vec_t& data); +} // namespace tool +} // namespace rocprofiler diff --git a/source/lib/output/generateJSON.cpp b/source/lib/output/generateJSON.cpp new file mode 100644 index 0000000000..98da33b4a0 --- /dev/null +++ b/source/lib/output/generateJSON.cpp @@ -0,0 +1,224 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "generateJSON.hpp" +#include "output_stream.hpp" +#include "statistics.hpp" +#include "timestamps.hpp" + +#include "lib/common/string_entry.hpp" +#include "lib/common/utility.hpp" + +#include +#include + +#include + +namespace rocprofiler +{ +namespace tool +{ +json_output::json_output(const output_config& cfg, + std::string_view filename, + JSONOutputArchive::Options _opts) +: stream{get_output_stream(cfg, filename, ".json")} +, archive{new JSONOutputArchive{*stream.stream, _opts}} +{ + archive->setNextName("rocprofiler-sdk-tool"); + archive->startNode(); + archive->makeArray(); +} + +json_output::~json_output() { close(); } + +void +json_output::close() +{ + if(archive && stream) + { + archive->finishNode(); + archive.reset(); + stream.close(); + } +} + +json_output +open_json(const output_config& cfg) +{ + constexpr auto json_prec = 16; + constexpr auto json_indent = JSONOutputArchive::Options::IndentChar::space; + auto json_opts = JSONOutputArchive::Options{json_prec, json_indent, 0}; + auto filename = std::string_view{"results"}; + + return json_output{cfg, filename, json_opts}; +} + +void +json_output::start_process() +{ + startNode(); +} + +void +json_output::finish_process() +{ + finishNode(); +} + +void +close_json(json_output& json_ar) +{ + json_ar.close(); +} + +void +write_json(json_output& json_ar, + const output_config& cfg, + const metadata& tool_metadata, + uint64_t pid) +{ + // metadata + { + auto timestamps = + timestamps_t{tool_metadata.process_start_ns, tool_metadata.process_end_ns}; + + json_ar.setNextName("metadata"); + json_ar.startNode(); + json_ar(cereal::make_nvp("pid", pid)); + json_ar(cereal::make_nvp("init_time", timestamps.app_start_time)); + json_ar(cereal::make_nvp("fini_time", timestamps.app_end_time)); + json_ar(cereal::make_nvp("config", cfg)); + json_ar(cereal::make_nvp("command", common::read_command_line(pid))); + json_ar.finishNode(); + } + + json_ar(cereal::make_nvp("agents", tool_metadata.agents)); + json_ar(cereal::make_nvp("counters", tool_metadata.get_counter_info())); + + { + auto callback_name_info = tool_metadata.callback_names; + auto buffer_name_info = tool_metadata.buffer_names; + auto counter_dims = tool_metadata.get_counter_dimension_info(); + auto marker_msg_data = tool_metadata.marker_messages.get(); + + json_ar.setNextName("strings"); + json_ar.startNode(); + json_ar(cereal::make_nvp("callback_records", callback_name_info)); + json_ar(cereal::make_nvp("buffer_records", buffer_name_info)); + json_ar(cereal::make_nvp("marker_api", marker_msg_data)); + + { + auto _extern_corr_id_strings = std::map{}; + if(cfg.kernel_rename) + { + for(auto itr : tool_metadata.external_corr_ids.get()) + { + if(itr > 0) + { + const auto* _str = tool_metadata.get_string_entry(itr); + if(_str) _extern_corr_id_strings.emplace(itr, *_str); + } + } + } + + json_ar.setNextName("correlation_id"); + json_ar.startNode(); + json_ar(cereal::make_nvp("external", _extern_corr_id_strings)); + json_ar.finishNode(); + } + + { + json_ar.setNextName("counters"); + json_ar.startNode(); + json_ar(cereal::make_nvp("dimension_ids", counter_dims)); + json_ar.finishNode(); + } + + json_ar.finishNode(); + } + + { + auto kern_sym_data = tool_metadata.get_kernel_symbols(); + auto code_obj_data = tool_metadata.get_code_objects(); + + json_ar(cereal::make_nvp("code_objects", code_obj_data)); + json_ar(cereal::make_nvp("kernel_symbols", kern_sym_data)); + } +} + +void +write_json(json_output& json_ar, + const output_config& /*cfg*/, + const metadata& /*tool_metadata*/, + const domain_stats_vec_t& domain_stats, + generator&& hip_api_gen, + generator hsa_api_gen, + generator kernel_dispatch_gen, + generator memory_copy_gen, + generator counter_collection_gen, + generator marker_api_gen, + generator scratch_memory_gen, + generator rccl_api_gen) + +{ + // summary + { + json_ar.setNextName("summary"); + json_ar.startNode(); + json_ar.makeArray(); + + for(const auto& itr : domain_stats) + { + auto _name = get_domain_column_name(itr.first); + json_ar.startNode(); + + json_ar(cereal::make_nvp("domain", std::string{_name})); + json_ar(cereal::make_nvp("stats", itr.second)); + + json_ar.finishNode(); + } + + json_ar.finishNode(); + } + + { + json_ar.setNextName("callback_records"); + json_ar.startNode(); + json_ar(cereal::make_nvp("counter_collection", counter_collection_gen)); + json_ar.finishNode(); + } + + { + json_ar.setNextName("buffer_records"); + json_ar.startNode(); + json_ar(cereal::make_nvp("kernel_dispatch", kernel_dispatch_gen)); + json_ar(cereal::make_nvp("hip_api", hip_api_gen)); + json_ar(cereal::make_nvp("hsa_api", hsa_api_gen)); + json_ar(cereal::make_nvp("marker_api", marker_api_gen)); + json_ar(cereal::make_nvp("rccl_api", rccl_api_gen)); + json_ar(cereal::make_nvp("memory_copy", memory_copy_gen)); + json_ar(cereal::make_nvp("scratch_memory", scratch_memory_gen)); + json_ar.finishNode(); + } +} +} // namespace tool +} // namespace rocprofiler diff --git a/source/lib/output/generateJSON.hpp b/source/lib/output/generateJSON.hpp new file mode 100644 index 0000000000..b2d18a2e74 --- /dev/null +++ b/source/lib/output/generateJSON.hpp @@ -0,0 +1,97 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "agent_info.hpp" +#include "buffered_output.hpp" +#include "metadata.hpp" +#include "output_config.hpp" +#include "output_stream.hpp" +#include "statistics.hpp" + +#include +#include + +namespace rocprofiler +{ +namespace tool +{ +using JSONOutputArchive = ::cereal::MinimalJSONOutputArchive; + +struct json_output +{ + json_output(const output_config& cfg, + std::string_view filename, + JSONOutputArchive::Options _opts); + ~json_output(); + + json_output(const json_output&) = delete; + json_output(json_output&&) noexcept = default; + json_output& operator=(const json_output&) = delete; + json_output& operator=(json_output&&) noexcept = default; + + template + decltype(auto) operator()(Args&&... args) + { + return (*archive)(std::forward(args)...); + } + + decltype(auto) startNode() { return archive->startNode(); } + decltype(auto) finishNode() { return archive->finishNode(); } + decltype(auto) makeArray() { return archive->makeArray(); } + decltype(auto) setNextName(const char* name) { archive->setNextName(name); } + + void start_process(); + void finish_process(); + + void close(); + +private: + output_stream stream = {}; + std::unique_ptr archive = {}; +}; + +json_output +open_json(const output_config& cfg); + +void +close_json(json_output& ar); + +void +write_json(json_output&, const output_config& cfg, const metadata& tool_metadata, uint64_t pid); + +void +write_json(json_output& json_ar, + const output_config& cfg, + const metadata& tool_metadata, + const domain_stats_vec_t& domain_stats, + generator&& hip_api_gen, + generator hsa_api_gen, + generator kernel_dispatch_gen, + generator memory_copy_gen, + generator counter_collection_gen, + generator marker_api_gen, + generator scratch_memory_gen, + generator rccl_api_gen); +} // namespace tool +} // namespace rocprofiler diff --git a/source/lib/rocprofiler-sdk-tool/generateOTF2.cpp b/source/lib/output/generateOTF2.cpp similarity index 96% rename from source/lib/rocprofiler-sdk-tool/generateOTF2.cpp rename to source/lib/output/generateOTF2.cpp index 2708af7f61..62af356bef 100644 --- a/source/lib/rocprofiler-sdk-tool/generateOTF2.cpp +++ b/source/lib/output/generateOTF2.cpp @@ -21,9 +21,8 @@ // SOFTWARE. #include "generateOTF2.hpp" -#include "config.hpp" -#include "helper.hpp" -#include "output_file.hpp" +#include "output_stream.hpp" +#include "timestamps.hpp" #include "lib/common/filesystem.hpp" #include "lib/common/mpl.hpp" @@ -273,11 +272,11 @@ add_event(std::string_view name, } void -setup() +setup(const output_config& cfg) { namespace fs = common::filesystem; - auto _filename = get_output_filename("results", std::string_view{}); + auto _filename = get_output_filename(cfg, "results", std::string_view{}); auto _filepath = fs::path{_filename}; auto _name = _filepath.filename().string(); auto _path = _filepath.parent_path().string(); @@ -344,9 +343,10 @@ create_attribute_list() } // namespace void -write_otf2(tool_table* tool_functions, +write_otf2(const output_config& cfg, + const metadata& tool_metadata, uint64_t pid, - const std::vector& agent_data, + const std::vector& agent_data, std::deque* hip_api_data, std::deque* hsa_api_data, std::deque* kernel_dispatch_data, @@ -357,16 +357,14 @@ write_otf2(tool_table* tool { namespace sdk = ::rocprofiler::sdk; - setup(); + setup(cfg); - auto _app_ts = *tool_functions->tool_get_app_timestamps_fn(); - auto agents_map = std::unordered_map{}; - for(auto itr : agent_data) - agents_map.emplace(itr.id, itr); + auto _app_ts = timestamps_t{tool_metadata.process_start_ns, tool_metadata.process_end_ns}; + auto agents_map = tool_metadata.agents_map; - const auto kernel_sym_data = get_kernel_symbol_data(); - const auto buffer_names = sdk::get_buffer_tracing_names(); - auto tids = std::set{}; + const auto kernel_sym_data = tool_metadata.get_kernel_symbols(); + const auto& buffer_names = tool_metadata.buffer_names; + auto tids = std::set{}; auto agent_thread_ids = std::map>{}; auto agent_queue_ids = std::map const kernel_symbol_data* { + const rocprofiler_kernel_dispatch_info_t& _info) -> const kernel_symbol_info* { for(const auto& kitr : kernel_sym_data) if(kitr.kernel_id == _info.kernel_id) return &kitr; return CHECK_NOTNULL(nullptr); @@ -514,7 +512,7 @@ write_otf2(tool_table* tool auto add_event_data = [&buffer_names, &_hash_data, &_data, - &tool_functions, + &tool_metadata, &thread_event_info, &get_attr](const auto* _inp, auto _attrib) { if(!_inp) return; @@ -533,7 +531,7 @@ write_otf2(tool_table* tool paradigm = OTF2_PARADIGM_USER; if(itr.kind == ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API && itr.operation != ROCPROFILER_MARKER_CORE_API_ID_roctxGetThreadId) - name = tool_functions->tool_get_roctx_msg_fn(itr.correlation_id.internal); + name = tool_metadata.get_marker_message(itr.correlation_id.internal); } _hash_data.emplace( @@ -591,8 +589,8 @@ write_otf2(tool_table* tool const auto* sym = _get_kernel_sym_data(info); CHECK(sym != nullptr); - auto name = tool_functions->tool_get_kernel_name_fn(info.kernel_id, - itr.correlation_id.external.value); + auto name = + tool_metadata.get_kernel_name(info.kernel_id, itr.correlation_id.external.value); _hash_data.emplace( get_hash_id(name), region_info{std::string{name}, OTF2_REGION_ROLE_FUNCTION, OTF2_PARADIGM_HIP}); diff --git a/source/lib/rocprofiler-sdk-tool/generateOTF2.hpp b/source/lib/output/generateOTF2.hpp similarity index 86% rename from source/lib/rocprofiler-sdk-tool/generateOTF2.hpp rename to source/lib/output/generateOTF2.hpp index 53c2aa1b87..a4d19b8486 100644 --- a/source/lib/rocprofiler-sdk-tool/generateOTF2.hpp +++ b/source/lib/output/generateOTF2.hpp @@ -22,8 +22,11 @@ #pragma once -#include "helper.hpp" +#include "agent_info.hpp" +#include "metadata.hpp" +#include "output_config.hpp" +#include #include namespace rocprofiler @@ -31,9 +34,10 @@ namespace rocprofiler namespace tool { void -write_otf2(tool_table* tool_functions, +write_otf2(const output_config& cfg, + const metadata& tool_metadata, uint64_t pid, - const std::vector& agent_data, + const std::vector& agent_data, std::deque* hip_api_data, std::deque* hsa_api_data, std::deque* kernel_dispatch_data, diff --git a/source/lib/output/generatePerfetto.cpp b/source/lib/output/generatePerfetto.cpp new file mode 100644 index 0000000000..0e184c91f6 --- /dev/null +++ b/source/lib/output/generatePerfetto.cpp @@ -0,0 +1,606 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "generatePerfetto.hpp" +#include "output_stream.hpp" +#include "timestamps.hpp" + +#include "lib/common/utility.hpp" + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace rocprofiler +{ +namespace tool +{ +namespace +{ +auto main_tid = common::get_tid(); + +template +size_t +get_hash_id(Tp&& _val) +{ + if constexpr(!std::is_pointer::value) + return std::hash{}(std::forward(_val)); + else if constexpr(std::is_same::value) + return get_hash_id(std::string_view{_val}); + else + return get_hash_id(*_val); +} +} // namespace + +void +write_perfetto( + const output_config& ocfg, + const metadata& tool_metadata, + std::vector agent_data, + const generator& hip_api_gen, + const generator& hsa_api_gen, + const generator& kernel_dispatch_gen, + const generator& memory_copy_gen, + const generator& marker_api_gen, + const generator& /*scratch_memory_gen*/, + const generator& rccl_api_gen) +{ + namespace sdk = ::rocprofiler::sdk; + + // auto root_process_track = ::perfetto::Track{}; + // uint64_t process_uuid = tool_metadata.process_start_ns ^ tool_metadata.process_id; + // auto process_track = ::perfetto::Track{process_uuid, root_process_track}; + // auto process_track = ::perfetto::ProcessTrack::Current(); + + auto agents_map = std::unordered_map{}; + for(auto itr : agent_data) + agents_map.emplace(itr.id, itr); + + auto args = ::perfetto::TracingInitArgs{}; + auto track_event_cfg = ::perfetto::protos::gen::TrackEventConfig{}; + auto cfg = ::perfetto::TraceConfig{}; + + // environment settings + auto shmem_size_hint = ocfg.perfetto_shmem_size_hint; + auto buffer_size_kb = ocfg.perfetto_buffer_size; + + auto* buffer_config = cfg.add_buffers(); + buffer_config->set_size_kb(buffer_size_kb); + + if(ocfg.perfetto_buffer_fill_policy == "discard" || ocfg.perfetto_buffer_fill_policy.empty()) + buffer_config->set_fill_policy( + ::perfetto::protos::gen::TraceConfig_BufferConfig_FillPolicy_DISCARD); + else if(ocfg.perfetto_buffer_fill_policy == "ring_buffer") + buffer_config->set_fill_policy( + ::perfetto::protos::gen::TraceConfig_BufferConfig_FillPolicy_RING_BUFFER); + else + ROCP_FATAL << "Unsupport perfetto buffer fill policy: '" << ocfg.perfetto_buffer_fill_policy + << "'. Supported: discard, ring_buffer"; + + auto* ds_cfg = cfg.add_data_sources()->mutable_config(); + ds_cfg->set_name("track_event"); // this MUST be track_event + ds_cfg->set_track_event_config_raw(track_event_cfg.SerializeAsString()); + + args.shmem_size_hint_kb = shmem_size_hint; + + if(ocfg.perfetto_backend == "inprocess" || ocfg.perfetto_backend.empty()) + args.backends |= ::perfetto::kInProcessBackend; + else if(ocfg.perfetto_backend == "system") + args.backends |= ::perfetto::kSystemBackend; + else + ROCP_FATAL << "Unsupport perfetto backend: '" << ocfg.perfetto_backend + << "'. Supported: inprocess, system"; + + ::perfetto::Tracing::Initialize(args); + ::perfetto::TrackEvent::Register(); + + auto tracing_session = ::perfetto::Tracing::NewTrace(); + + tracing_session->Setup(cfg); + tracing_session->StartBlocking(); + + auto tids = std::set{}; + auto demangled = std::unordered_map{}; + auto agent_thread_ids = std::unordered_map>{}; + auto agent_queue_ids = + std::unordered_map>{}; + auto thread_indexes = std::unordered_map{}; + + auto thread_tracks = std::unordered_map{}; + auto agent_thread_tracks = + std::unordered_map>{}; + auto agent_queue_tracks = + std::unordered_map>{}; + + auto _get_agent = [&agent_data](rocprofiler_agent_id_t _id) -> const rocprofiler_agent_t* { + for(const auto& itr : agent_data) + { + if(_id == itr.id) return &itr; + } + return CHECK_NOTNULL(nullptr); + }; + + { + for(auto ditr : hsa_api_gen) + for(auto itr : hsa_api_gen.get(ditr)) + tids.emplace(itr.thread_id); + for(auto ditr : hip_api_gen) + for(auto itr : hip_api_gen.get(ditr)) + tids.emplace(itr.thread_id); + for(auto ditr : marker_api_gen) + for(auto itr : marker_api_gen.get(ditr)) + tids.emplace(itr.thread_id); + for(auto ditr : rccl_api_gen) + for(auto itr : rccl_api_gen.get(ditr)) + tids.emplace(itr.thread_id); + + for(auto ditr : memory_copy_gen) + for(auto itr : memory_copy_gen.get(ditr)) + { + tids.emplace(itr.thread_id); + agent_thread_ids[itr.dst_agent_id].emplace(itr.thread_id); + } + + for(auto ditr : kernel_dispatch_gen) + for(auto itr : kernel_dispatch_gen.get(ditr)) + { + tids.emplace(itr.thread_id); + agent_queue_ids[itr.dispatch_info.agent_id].emplace(itr.dispatch_info.queue_id); + } + } + + uint64_t nthrn = 0; + for(auto itr : tids) + { + if(itr == main_tid) + { + thread_indexes.emplace(main_tid, 0); + thread_tracks.emplace(main_tid, ::perfetto::ThreadTrack::Current()); + } + else + { + auto _idx = ++nthrn; + thread_indexes.emplace(itr, _idx); + auto _track = ::perfetto::Track{itr}; + auto _desc = _track.Serialize(); + auto _namess = std::stringstream{}; + _namess << "THREAD " << _idx << " (" << itr << ")"; + _desc.set_name(_namess.str()); + perfetto::TrackEvent::SetTrackDescriptor(_track, _desc); + + thread_tracks.emplace(itr, _track); + } + } + + for(const auto& itr : agent_thread_ids) + { + const auto* _agent = _get_agent(itr.first); + + for(auto titr : itr.second) + { + auto _namess = std::stringstream{}; + _namess << "COPY to AGENT [" << _agent->logical_node_id << "] THREAD [" + << thread_indexes.at(titr) << "] "; + + if(_agent->type == ROCPROFILER_AGENT_TYPE_CPU) + _namess << "(CPU)"; + else if(_agent->type == ROCPROFILER_AGENT_TYPE_GPU) + _namess << "(GPU)"; + else + _namess << "(UNK)"; + + auto _track = ::perfetto::Track{get_hash_id(_namess.str())}; + auto _desc = _track.Serialize(); + _desc.set_name(_namess.str()); + + perfetto::TrackEvent::SetTrackDescriptor(_track, _desc); + + agent_thread_tracks[itr.first].emplace(titr, _track); + } + } + + for(const auto& aitr : agent_queue_ids) + { + uint32_t nqueue = 0; + for(auto qitr : aitr.second) + { + const auto* _agent = _get_agent(aitr.first); + + auto _namess = std::stringstream{}; + _namess << "COMPUTE AGENT [" << _agent->logical_node_id << "] QUEUE [" << nqueue++ + << "] "; + + if(_agent->type == ROCPROFILER_AGENT_TYPE_CPU) + _namess << "(CPU)"; + else if(_agent->type == ROCPROFILER_AGENT_TYPE_GPU) + _namess << "(GPU)"; + else + _namess << "(UNK)"; + + auto _track = ::perfetto::Track{get_hash_id(_namess.str())}; + auto _desc = _track.Serialize(); + _desc.set_name(_namess.str()); + + perfetto::TrackEvent::SetTrackDescriptor(_track, _desc); + + agent_queue_tracks[aitr.first].emplace(qitr, _track); + } + } + + // trace events + { + auto buffer_names = sdk::get_buffer_tracing_names(); + auto callbk_name_info = sdk::get_callback_tracing_names(); + + for(auto ditr : hsa_api_gen) + for(auto itr : hsa_api_gen.get(ditr)) + { + auto name = buffer_names.at(itr.kind, itr.operation); + auto& track = thread_tracks.at(itr.thread_id); + + TRACE_EVENT_BEGIN(sdk::perfetto_category::name, + ::perfetto::StaticString(name.data()), + track, + itr.start_timestamp, + ::perfetto::Flow::ProcessScoped(itr.correlation_id.internal), + "begin_ns", + itr.start_timestamp, + "end_ns", + itr.end_timestamp, + "delta_ns", + (itr.end_timestamp - itr.start_timestamp), + "tid", + itr.thread_id, + "kind", + itr.kind, + "operation", + itr.operation, + "corr_id", + itr.correlation_id.internal); + TRACE_EVENT_END( + sdk::perfetto_category::name, track, itr.end_timestamp); + tracing_session->FlushBlocking(); + } + + for(auto ditr : hip_api_gen) + for(auto itr : hip_api_gen.get(ditr)) + { + auto name = buffer_names.at(itr.kind, itr.operation); + auto& track = thread_tracks.at(itr.thread_id); + + TRACE_EVENT_BEGIN(sdk::perfetto_category::name, + ::perfetto::StaticString(name.data()), + track, + itr.start_timestamp, + ::perfetto::Flow::ProcessScoped(itr.correlation_id.internal), + "begin_ns", + itr.start_timestamp, + "end_ns", + itr.end_timestamp, + "delta_ns", + (itr.end_timestamp - itr.start_timestamp), + "tid", + itr.thread_id, + "kind", + itr.kind, + "operation", + itr.operation, + "corr_id", + itr.correlation_id.internal); + TRACE_EVENT_END( + sdk::perfetto_category::name, track, itr.end_timestamp); + tracing_session->FlushBlocking(); + } + + for(auto ditr : marker_api_gen) + for(auto itr : marker_api_gen.get(ditr)) + { + auto& track = thread_tracks.at(itr.thread_id); + auto name = (itr.kind == ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API && + itr.operation != ROCPROFILER_MARKER_CORE_API_ID_roctxGetThreadId) + ? tool_metadata.get_marker_message(itr.correlation_id.internal) + : buffer_names.at(itr.kind, itr.operation); + + TRACE_EVENT_BEGIN(sdk::perfetto_category::name, + ::perfetto::StaticString(name.data()), + track, + itr.start_timestamp, + ::perfetto::Flow::ProcessScoped(itr.correlation_id.internal), + "begin_ns", + itr.start_timestamp, + "end_ns", + itr.end_timestamp, + "delta_ns", + (itr.end_timestamp - itr.start_timestamp), + "tid", + itr.thread_id, + "kind", + itr.kind, + "operation", + itr.operation, + "corr_id", + itr.correlation_id.internal); + TRACE_EVENT_END(sdk::perfetto_category::name, + track, + itr.end_timestamp); + tracing_session->FlushBlocking(); + } + + for(auto ditr : rccl_api_gen) + for(auto itr : rccl_api_gen.get(ditr)) + { + auto name = buffer_names.at(itr.kind, itr.operation); + auto& track = thread_tracks.at(itr.thread_id); + + TRACE_EVENT_BEGIN(sdk::perfetto_category::name, + ::perfetto::StaticString(name.data()), + track, + itr.start_timestamp, + ::perfetto::Flow::ProcessScoped(itr.correlation_id.internal), + "begin_ns", + itr.start_timestamp, + "end_ns", + itr.end_timestamp, + "delta_ns", + (itr.end_timestamp - itr.start_timestamp), + "tid", + itr.thread_id, + "kind", + itr.kind, + "operation", + itr.operation, + "corr_id", + itr.correlation_id.internal); + TRACE_EVENT_END(sdk::perfetto_category::name, + track, + itr.end_timestamp); + tracing_session->FlushBlocking(); + } + + for(auto ditr : memory_copy_gen) + for(auto itr : memory_copy_gen.get(ditr)) + { + auto name = buffer_names.at(itr.kind, itr.operation); + auto& track = agent_thread_tracks.at(itr.dst_agent_id).at(itr.thread_id); + + TRACE_EVENT_BEGIN(sdk::perfetto_category::name, + ::perfetto::StaticString(name.data()), + track, + itr.start_timestamp, + ::perfetto::Flow::ProcessScoped(itr.correlation_id.internal), + "begin_ns", + itr.start_timestamp, + "end_ns", + itr.end_timestamp, + "delta_ns", + (itr.end_timestamp - itr.start_timestamp), + "kind", + itr.kind, + "operation", + itr.operation, + "src_agent", + agents_map.at(itr.src_agent_id).logical_node_id, + "dst_agent", + agents_map.at(itr.dst_agent_id).logical_node_id, + "copy_bytes", + itr.bytes, + "corr_id", + itr.correlation_id.internal, + "tid", + itr.thread_id); + TRACE_EVENT_END(sdk::perfetto_category::name, + track, + itr.end_timestamp); + tracing_session->FlushBlocking(); + } + + for(auto ditr : kernel_dispatch_gen) + for(auto itr : kernel_dispatch_gen.get(ditr)) + { + const auto& info = itr.dispatch_info; + const kernel_symbol_info* sym = tool_metadata.get_kernel_symbol(info.kernel_id); + + CHECK(sym != nullptr); + + auto name = std::string_view{sym->kernel_name}; + auto& track = agent_queue_tracks.at(info.agent_id).at(info.queue_id); + + if(demangled.find(name) == demangled.end()) + { + demangled.emplace(name, common::cxx_demangle(name)); + } + + TRACE_EVENT_BEGIN( + sdk::perfetto_category::name, + ::perfetto::StaticString(demangled.at(name).c_str()), + track, + itr.start_timestamp, + ::perfetto::Flow::ProcessScoped(itr.correlation_id.internal), + "begin_ns", + itr.start_timestamp, + "end_ns", + itr.end_timestamp, + "delta_ns", + (itr.end_timestamp - itr.start_timestamp), + "kind", + itr.kind, + "agent", + agents_map.at(info.agent_id).logical_node_id, + "corr_id", + itr.correlation_id.internal, + "queue", + info.queue_id.handle, + "tid", + itr.thread_id, + "kernel_id", + info.kernel_id, + "private_segment_size", + info.private_segment_size, + "group_segment_size", + info.group_segment_size, + "workgroup_size", + info.workgroup_size.x * info.workgroup_size.y * info.workgroup_size.z, + "grid_size", + info.grid_size.x * info.grid_size.y * info.grid_size.z); + TRACE_EVENT_END(sdk::perfetto_category::name, + track, + itr.end_timestamp); + tracing_session->FlushBlocking(); + } + } + + // counter tracks + { + // memory copy counter track + auto mem_cpy_endpoints = std::map>{}; + auto mem_cpy_extremes = std::pair{}; + for(auto ditr : memory_copy_gen) + for(auto itr : memory_copy_gen.get(ditr)) + { + uint64_t _mean_timestamp = + itr.start_timestamp + (0.5 * (itr.end_timestamp - itr.start_timestamp)); + + mem_cpy_endpoints[itr.dst_agent_id].emplace(itr.start_timestamp - 1000, 0); + mem_cpy_endpoints[itr.dst_agent_id].emplace(itr.start_timestamp, 0); + mem_cpy_endpoints[itr.dst_agent_id].emplace(_mean_timestamp, 0); + mem_cpy_endpoints[itr.dst_agent_id].emplace(itr.end_timestamp, 0); + mem_cpy_endpoints[itr.dst_agent_id].emplace(itr.end_timestamp + 1000, 0); + + mem_cpy_extremes = + std::make_pair(std::min(mem_cpy_extremes.first, itr.start_timestamp), + std::max(mem_cpy_extremes.second, itr.end_timestamp)); + } + + for(auto ditr : memory_copy_gen) + for(auto itr : memory_copy_gen.get(ditr)) + { + auto mbeg = mem_cpy_endpoints.at(itr.dst_agent_id).lower_bound(itr.start_timestamp); + auto mend = mem_cpy_endpoints.at(itr.dst_agent_id).upper_bound(itr.end_timestamp); + + LOG_IF(FATAL, mbeg == mend) + << "Missing range for timestamp [" << itr.start_timestamp << ", " + << itr.end_timestamp << "]"; + + for(auto mitr = mbeg; mitr != mend; ++mitr) + mitr->second += itr.bytes; + } + + constexpr auto bytes_multiplier = 1024; + + auto mem_cpy_tracks = + std::unordered_map{}; + auto mem_cpy_cnt_names = std::vector{}; + mem_cpy_cnt_names.reserve(mem_cpy_endpoints.size()); + for(auto& mitr : mem_cpy_endpoints) + { + mem_cpy_endpoints[mitr.first].emplace(mem_cpy_extremes.first - 5000, 0); + mem_cpy_endpoints[mitr.first].emplace(mem_cpy_extremes.second + 5000, 0); + + auto _track_name = std::stringstream{}; + const auto* _agent = _get_agent(mitr.first); + + if(_agent->type == ROCPROFILER_AGENT_TYPE_CPU) + _track_name << "COPY BYTES to AGENT [" << _agent->logical_node_id << "] (CPU)"; + else if(_agent->type == ROCPROFILER_AGENT_TYPE_GPU) + _track_name << "COPY BYTES to AGENT [" << _agent->logical_node_id << "] (GPU)"; + + constexpr auto _unit = ::perfetto::CounterTrack::Unit::UNIT_SIZE_BYTES; + auto& _name = mem_cpy_cnt_names.emplace_back(_track_name.str()); + mem_cpy_tracks.emplace(mitr.first, + ::perfetto::CounterTrack{_name.c_str()} + .set_unit(_unit) + .set_unit_multiplier(bytes_multiplier) + .set_is_incremental(false)); + } + + for(auto& mitr : mem_cpy_endpoints) + { + for(auto itr : mitr.second) + { + TRACE_COUNTER(sdk::perfetto_category::name, + mem_cpy_tracks.at(mitr.first), + itr.first, + itr.second / bytes_multiplier); + tracing_session->FlushBlocking(); + } + } + } + + ::perfetto::TrackEvent::Flush(); + tracing_session->FlushBlocking(); + tracing_session->StopBlocking(); + + auto filename = std::string{"results"}; + auto ofs = get_output_stream(ocfg, filename, ".pftrace"); + + auto amount_read = std::atomic{0}; + auto is_done = std::promise{}; + auto _mtx = std::mutex{}; + auto _reader = [&ofs, &_mtx, &is_done, &amount_read]( + ::perfetto::TracingSession::ReadTraceCallbackArgs _args) { + auto _lk = std::unique_lock{_mtx}; + if(_args.data && _args.size > 0) + { + ROCP_TRACE << "Writing " << _args.size << " B to trace..."; + // Write the trace data into file + ofs.stream->write(_args.data, _args.size); + amount_read += _args.size; + } + ROCP_INFO_IF(!_args.has_more && amount_read > 0) + << "Wrote " << amount_read << " B to perfetto trace file"; + if(!_args.has_more) is_done.set_value(); + }; + + for(size_t i = 0; i < 2; ++i) + { + ROCP_TRACE << "Reading trace..."; + amount_read = 0; + is_done = std::promise{}; + tracing_session->ReadTrace(_reader); + is_done.get_future().wait(); + } + + ROCP_TRACE << "Destroying tracing session..."; + tracing_session.reset(); + + ROCP_TRACE << "Flushing trace output stream..."; + (*ofs.stream) << std::flush; + + ROCP_TRACE << "Destroying trace output stream..."; + ofs.close(); +} + +} // namespace tool +} // namespace rocprofiler + +PERFETTO_TRACK_EVENT_STATIC_STORAGE(); diff --git a/source/lib/output/generatePerfetto.hpp b/source/lib/output/generatePerfetto.hpp new file mode 100644 index 0000000000..0417416b79 --- /dev/null +++ b/source/lib/output/generatePerfetto.hpp @@ -0,0 +1,50 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "agent_info.hpp" +#include "generator.hpp" +#include "metadata.hpp" +#include "output_config.hpp" + +#include +#include + +namespace rocprofiler +{ +namespace tool +{ +void +write_perfetto( + const output_config& cfg, + const metadata& tool_metadata, + std::vector agent_data, + const generator& hip_api_gen, + const generator& hsa_api_gen, + const generator& kernel_dispatch_gen, + const generator& memory_copy_gen, + const generator& marker_api_gen, + const generator& scratch_memory_gen, + const generator& rccl_api_gen); +} // namespace tool +} // namespace rocprofiler diff --git a/source/lib/rocprofiler-sdk-tool/generateStats.cpp b/source/lib/output/generateStats.cpp similarity index 60% rename from source/lib/rocprofiler-sdk-tool/generateStats.cpp rename to source/lib/output/generateStats.cpp index e03cef2bbb..7e55f3648d 100644 --- a/source/lib/rocprofiler-sdk-tool/generateStats.cpp +++ b/source/lib/output/generateStats.cpp @@ -21,11 +21,10 @@ // SOFTWARE. #include "generateStats.hpp" -#include "config.hpp" #include "domain_type.hpp" -#include "helper.hpp" -#include "output_file.hpp" +#include "output_stream.hpp" #include "statistics.hpp" +#include "timestamps.hpp" #include "lib/common/logging.hpp" @@ -63,120 +62,149 @@ get_stats(const stats_map_t& data_v) } // namespace stats_entry_t -generate_stats(tool_table* tool_functions, - const std::deque& data) +generate_stats(const output_config& /*cfg*/, + const metadata& tool_metadata, + const generator& data) { auto kernel_stats = stats_map_t{}; - for(const auto& record : data) + for(auto ditr : data) { - auto kernel_name = tool_functions->tool_get_kernel_name_fn( - record.dispatch_info.kernel_id, record.correlation_id.external.value); + for(auto record : data.get(ditr)) + { + auto kernel_name = tool_metadata.get_kernel_name(record.dispatch_info.kernel_id, + record.correlation_id.external.value); - kernel_stats[kernel_name] += (record.end_timestamp - record.start_timestamp); + kernel_stats[kernel_name] += (record.end_timestamp - record.start_timestamp); + } } return get_stats(kernel_stats); } stats_entry_t -generate_stats(tool_table* tool_functions, - const std::deque& data) +generate_stats(const output_config& /*cfg*/, + const metadata& tool_metadata, + const generator& data) { auto hip_stats = stats_map_t{}; - for(const auto& record : data) + for(auto ditr : data) { - auto api_name = tool_functions->tool_get_operation_name_fn(record.kind, record.operation); - hip_stats[api_name] += (record.end_timestamp - record.start_timestamp); + for(auto record : data.get(ditr)) + { + auto api_name = tool_metadata.get_operation_name(record.kind, record.operation); + hip_stats[api_name] += (record.end_timestamp - record.start_timestamp); + } } return get_stats(hip_stats); } stats_entry_t -generate_stats(tool_table* tool_functions, - const std::deque& data) +generate_stats(const output_config& /*cfg*/, + const metadata& tool_metadata, + const generator& data) { auto hsa_stats = stats_map_t{}; - for(const auto& record : data) + for(auto ditr : data) { - auto api_name = tool_functions->tool_get_operation_name_fn(record.kind, record.operation); - hsa_stats[api_name] += (record.end_timestamp - record.start_timestamp); + for(auto record : data.get(ditr)) + { + auto api_name = tool_metadata.get_operation_name(record.kind, record.operation); + hsa_stats[api_name] += (record.end_timestamp - record.start_timestamp); + } } return get_stats(hsa_stats); } stats_entry_t -generate_stats(tool_table* tool_functions, - const std::deque& data) +generate_stats(const output_config& /*cfg*/, + const metadata& tool_metadata, + const generator& data) { auto memory_copy_stats = stats_map_t{}; - for(const auto& record : data) + for(auto ditr : data) { - auto api_name = tool_functions->tool_get_operation_name_fn(record.kind, record.operation); - memory_copy_stats[api_name] += (record.end_timestamp - record.start_timestamp); + for(auto record : data.get(ditr)) + { + auto api_name = tool_metadata.get_operation_name(record.kind, record.operation); + memory_copy_stats[api_name] += (record.end_timestamp - record.start_timestamp); + } } return get_stats(memory_copy_stats); } stats_entry_t -generate_stats(tool_table* tool_functions, - const std::deque& data) +generate_stats(const output_config& /*cfg*/, + const metadata& tool_metadata, + const generator& data) { auto marker_stats = stats_map_t{}; - for(const auto& record : data) + for(auto ditr : data) { - auto _name = std::string_view{}; - - if(record.kind == ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API && - (record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxMarkA || - record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangePushA || - record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangeStartA)) + for(auto record : data.get(ditr)) { - _name = tool_functions->tool_get_roctx_msg_fn(record.correlation_id.internal); - } - else - { - _name = tool_functions->tool_get_operation_name_fn(record.kind, record.operation); - } + auto _name = std::string_view{}; - marker_stats[_name] += (record.end_timestamp - record.start_timestamp); + if(record.kind == ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API && + (record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxMarkA || + record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangePushA || + record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangeStartA)) + { + _name = tool_metadata.get_marker_message(record.correlation_id.internal); + } + else + { + _name = tool_metadata.get_operation_name(record.kind, record.operation); + } + + marker_stats[_name] += (record.end_timestamp - record.start_timestamp); + } } return get_stats(marker_stats); } stats_entry_t -generate_stats(tool_table* /*tool_functions*/, - const std::deque& /*data*/) +generate_stats(const output_config& /*cfg*/, + const metadata& /*tool_metadata*/, + const generator& /*data*/) { return stats_entry_t{}; } stats_entry_t -generate_stats(tool_table* tool_functions, - const std::deque& data) +generate_stats(const output_config& /*cfg*/, + const metadata& tool_metadata, + const generator& data) { auto scratch_memory_stats = stats_map_t{}; - for(const auto& record : data) + for(auto ditr : data) { - auto op_name = tool_functions->tool_get_operation_name_fn(record.kind, record.operation); - scratch_memory_stats[op_name] += (record.end_timestamp - record.start_timestamp); + for(auto record : data.get(ditr)) + { + auto op_name = tool_metadata.get_operation_name(record.kind, record.operation); + scratch_memory_stats[op_name] += (record.end_timestamp - record.start_timestamp); + } } return get_stats(scratch_memory_stats); } stats_entry_t -generate_stats(tool_table* tool_functions, - const std::deque& data) +generate_stats(const output_config& /*cfg*/, + const metadata& tool_metadata, + const generator& data) { auto rccl_stats = stats_map_t{}; - for(const auto& record : data) + for(auto ditr : data) { - auto api_name = tool_functions->tool_get_operation_name_fn(record.kind, record.operation); - rccl_stats[api_name] += (record.end_timestamp - record.start_timestamp); + for(auto record : data.get(ditr)) + { + auto api_name = tool_metadata.get_operation_name(record.kind, record.operation); + rccl_stats[api_name] += (record.end_timestamp - record.start_timestamp); + } } return get_stats(rccl_stats); @@ -185,7 +213,8 @@ generate_stats(tool_table* t namespace { void -generate_stats(output_stream_t& os, +generate_stats(const output_config& cfg, + output_stream& os, std::string_view label, const domain_stats_vec_t& data_v, std::string_view indent_v) @@ -237,11 +266,11 @@ generate_stats(output_stream_t& os, "DOMAIN", domain_width, "CALLS", - fmt::format("DURATION ({})", tool::get_config().stats_summary_unit), - fmt::format("AVERAGE ({})", tool::get_config().stats_summary_unit), + fmt::format("DURATION ({})", cfg.stats_summary_unit), + fmt::format("AVERAGE ({})", cfg.stats_summary_unit), "PERCENT (INC)", - fmt::format("MIN ({})", tool::get_config().stats_summary_unit), - fmt::format("MAX ({})", tool::get_config().stats_summary_unit), + fmt::format("MIN ({})", cfg.stats_summary_unit), + fmt::format("MAX ({})", cfg.stats_summary_unit), "STDDEV"); (*os.stream) << indent_v << _header << "\n" << std::flush; @@ -265,9 +294,9 @@ generate_stats(output_stream_t& os, auto _row = std::string{}; - if(tool::get_config().stats_summary_unit_value > 1) + if(cfg.stats_summary_unit_value > 1) { - auto _unit_div = static_cast(tool::get_config().stats_summary_unit_value); + auto _unit_div = static_cast(cfg.stats_summary_unit_value); _row = fmt::format("{}| {:<{}} | {:<{}} | {:15} | {:15} | {:15.3e} | {:>13} | {:15} | " "{:15} | {:15.3e} |", indent_v, @@ -309,7 +338,9 @@ generate_stats(output_stream_t& os, } // namespace void -generate_stats(tool_table* /*tool_functions*/, const domain_stats_vec_t& inp_data) +generate_stats(const output_config& cfg, + const metadata& /*tool_metadata*/, + const domain_stats_vec_t& inp_data) { auto data_v = inp_data; @@ -317,10 +348,10 @@ generate_stats(tool_table* /*tool_functions*/, const domain_stats_vec_t& inp_dat return lhs.first < rhs.first; }); - output_stream_t _os = get_output_stream(tool::get_config().stats_summary_file, ".txt"); - auto _indent = (_os.writes_to_file()) ? std::string_view{} : std::string_view{" "}; + output_stream _os = get_output_stream(cfg, cfg.stats_summary_file, ".txt"); + auto _indent = (_os.writes_to_file()) ? std::string_view{} : std::string_view{" "}; - if(tool::get_config().stats_summary_per_domain) + if(cfg.stats_summary_per_domain) { for(const auto& itr : data_v) { @@ -329,14 +360,14 @@ generate_stats(tool_table* /*tool_functions*/, const domain_stats_vec_t& inp_dat auto _name = fmt::format("{} SUMMARY", get_domain_column_name(itr.first)); auto _tmp = domain_stats_vec_t{}; _tmp.emplace_back(itr.first, itr.second); - generate_stats(_os, _name, _tmp, _indent); + generate_stats(cfg, _os, _name, _tmp, _indent); } } - if(!tool::get_config().stats_summary_groups.empty()) + if(!cfg.stats_summary_groups.empty()) { auto domain_groups = std::vector{}; - for(const auto& itr : tool::get_config().stats_summary_groups) + for(const auto& itr : cfg.stats_summary_groups) { auto _names = std::vector{}; auto _tmp = domain_stats_vec_t{}; @@ -356,11 +387,11 @@ generate_stats(tool_table* /*tool_functions*/, const domain_stats_vec_t& inp_dat << "summary group regex '" << itr << "' matched with zero domain groups"; auto _name = fmt::format("{} SUMMARY", fmt::join(_names.begin(), _names.end(), " + ")); - generate_stats(_os, _name, _tmp, _indent); + generate_stats(cfg, _os, _name, _tmp, _indent); } } - if(tool::get_config().stats_summary) generate_stats(_os, "SUMMARY", data_v, _indent); + if(cfg.stats_summary) generate_stats(cfg, _os, "SUMMARY", data_v, _indent); } } // namespace tool } // namespace rocprofiler diff --git a/source/lib/output/generateStats.hpp b/source/lib/output/generateStats.hpp new file mode 100644 index 0000000000..bb46c31570 --- /dev/null +++ b/source/lib/output/generateStats.hpp @@ -0,0 +1,78 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "generator.hpp" +#include "metadata.hpp" +#include "statistics.hpp" + +namespace rocprofiler +{ +namespace tool +{ +stats_entry_t +generate_stats(const output_config& cfg, + const metadata& tool_metadata, + const generator& data); + +stats_entry_t +generate_stats(const output_config& cfg, + const metadata& tool_metadata, + const generator& data); + +stats_entry_t +generate_stats(const output_config& cfg, + const metadata& tool_metadata, + const generator& data); + +stats_entry_t +generate_stats(const output_config& cfg, + const metadata& tool_metadata, + const generator& data); + +stats_entry_t +generate_stats(const output_config& cfg, + const metadata& tool_metadata, + const generator& data); + +stats_entry_t +generate_stats(const output_config& cfg, + const metadata& tool_metadata, + const generator& data); + +stats_entry_t +generate_stats(const output_config& cfg, + const metadata& tool_metadata, + const generator& data); + +stats_entry_t +generate_stats(const output_config& cfg, + const metadata& tool_metadata, + const generator& data); + +void +generate_stats(const output_config& cfg, + const metadata& tool_metadata, + const domain_stats_vec_t& data); +} // namespace tool +} // namespace rocprofiler diff --git a/source/lib/output/generator.hpp b/source/lib/output/generator.hpp new file mode 100644 index 0000000000..7b8eb53f10 --- /dev/null +++ b/source/lib/output/generator.hpp @@ -0,0 +1,153 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "tmp_file_buffer.hpp" + +#include "lib/common/container/ring_buffer.hpp" +#include "lib/common/logging.hpp" + +#include + +#include +#include +#include +#include + +namespace rocprofiler +{ +namespace tool +{ +/// converts a container of ring buffers of element Tp into a single container of elements +template class ContainerT, typename... ParamsT> +ContainerT +get_buffer_elements(ContainerT, ParamsT...>&& data) +{ + auto ret = ContainerT{}; + for(auto& buf : data) + { + Tp* record = nullptr; + do + { + record = buf.retrieve(); + if(record) ret.emplace_back(*record); + } while(record != nullptr); + } + + return ret; +} + +template +std::vector +get_buffer_elements(common::container::ring_buffer&& buf) +{ + auto ret = std::vector{}; + Tp* record = nullptr; + ret.reserve(buf.count()); + do + { + record = buf.retrieve(); + if(record) ret.emplace_back(*record); + } while(record != nullptr); + + return ret; +} + +template +struct buffered_output; + +template +struct generator +{ + template + friend struct buffered_output; + + generator() = delete; + ~generator() = default; + + generator(const generator&) = delete; + generator(generator&&) = delete; + generator& operator=(const generator&) = delete; + generator& operator=(generator&&) = delete; + + auto begin() { return file_pos.begin(); } + auto begin() const { return file_pos.begin(); } + auto cbegin() const { return file_pos.cbegin(); } + + auto end() { return file_pos.end(); } + auto end() const { return file_pos.end(); } + auto cend() const { return file_pos.cend(); } + + auto size() const { return file_pos.size(); } + auto empty() const { return file_pos.empty(); } + + std::vector get(std::streampos itr) const; + +private: + generator(file_buffer* fbuf); + + file_buffer* filebuf = nullptr; + std::lock_guard lk_guard; + std::set file_pos = {}; +}; + +template +generator::generator(file_buffer* fbuf) +: filebuf{fbuf} +, lk_guard{filebuf->file.file_mutex} +, file_pos{filebuf->file.file_pos} +{} + +template +std::vector +generator::get(std::streampos itr) const +{ + auto _data = std::vector{}; + auto& _fs = filebuf->file.stream; + _fs.seekg(itr); // set to the absolute position + if(!_fs.eof()) + { + auto _buffer = ring_buffer_t{}; + _buffer.load(_fs); + _data = get_buffer_elements(std::move(_buffer)); + } + return _data; +} +} // namespace tool +} // namespace rocprofiler + +namespace cereal +{ +template +void +save(ArchiveT& ar, const rocprofiler::tool::generator& data) +{ + ar.makeArray(); + for(auto itr : data) + { + auto dat = data.get(itr); + for(auto ditr : dat) + ar(ditr); + } +} +} // namespace cereal diff --git a/source/lib/output/kernel_symbol_info.hpp b/source/lib/output/kernel_symbol_info.hpp new file mode 100644 index 0000000000..c573c8c78c --- /dev/null +++ b/source/lib/output/kernel_symbol_info.hpp @@ -0,0 +1,98 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "lib/common/demangle.hpp" +#include "lib/common/logging.hpp" + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace rocprofiler +{ +namespace tool +{ +using rocprofiler_code_object_info_t = rocprofiler_callback_tracing_code_object_load_data_t; +using code_object_info = rocprofiler_code_object_info_t; +using code_object_data_vec_t = std::vector; +using code_object_data_map_t = std::unordered_map; + +using rocprofiler_kernel_symbol_info_t = + rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t; + +struct kernel_symbol_info : rocprofiler_kernel_symbol_info_t +{ + using base_type = rocprofiler_kernel_symbol_info_t; + + template + kernel_symbol_info(const base_type& _base, FuncT&& _formatter) + : base_type{_base} + , formatted_kernel_name{_formatter(CHECK_NOTNULL(_base.kernel_name))} + , demangled_kernel_name{common::cxx_demangle(CHECK_NOTNULL(_base.kernel_name))} + , truncated_kernel_name{common::truncate_name(demangled_kernel_name)} + {} + + kernel_symbol_info(); + ~kernel_symbol_info() = default; + kernel_symbol_info(const kernel_symbol_info&) = default; + kernel_symbol_info(kernel_symbol_info&&) noexcept = default; + kernel_symbol_info& operator=(const kernel_symbol_info&) = default; + kernel_symbol_info& operator=(kernel_symbol_info&&) noexcept = default; + + std::string formatted_kernel_name = {}; + std::string demangled_kernel_name = {}; + std::string truncated_kernel_name = {}; +}; + +using kernel_symbol_data_vec_t = std::vector; +using kernel_symbol_data_map_t = std::unordered_map; +} // namespace tool +} // namespace rocprofiler + +namespace cereal +{ +#define SAVE_DATA_FIELD(FIELD) ar(make_nvp(#FIELD, data.FIELD)) + +template +void +save(ArchiveT& ar, const ::rocprofiler::tool::kernel_symbol_info& data) +{ + cereal::save(ar, + static_cast(data)); + SAVE_DATA_FIELD(formatted_kernel_name); + SAVE_DATA_FIELD(demangled_kernel_name); + SAVE_DATA_FIELD(truncated_kernel_name); +} + +#undef SAVE_DATA_FIELD +} // namespace cereal diff --git a/source/lib/output/metadata.cpp b/source/lib/output/metadata.cpp new file mode 100644 index 0000000000..6d5995580b --- /dev/null +++ b/source/lib/output/metadata.cpp @@ -0,0 +1,412 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "metadata.hpp" + +#include "lib/common/string_entry.hpp" +#include "lib/output/agent_info.hpp" + +#include + +#include + +namespace rocprofiler +{ +namespace tool +{ +namespace +{ +rocprofiler_status_t +dimensions_info_callback(rocprofiler_counter_id_t /*id*/, + const rocprofiler_record_dimension_info_t* dim_info, + long unsigned int num_dims, + void* user_data) +{ + auto* dimensions_info = static_cast(user_data); + dimensions_info->reserve(num_dims); + for(size_t j = 0; j < num_dims; j++) + dimensions_info->emplace_back(dim_info[j]); + return ROCPROFILER_STATUS_SUCCESS; +} +} // namespace + +kernel_symbol_info::kernel_symbol_info() +: base_type{0, 0, 0, "", 0, 0, 0, 0, 0, 0, 0, 0} +{} + +metadata::metadata(inprocess) +: buffer_names{sdk::get_buffer_tracing_names()} +, callback_names{sdk::get_callback_tracing_names()} +{ + ROCPROFILER_CHECK(rocprofiler_query_available_agents( + ROCPROFILER_AGENT_INFO_VERSION_0, + [](rocprofiler_agent_version_t, const void** _agents, size_t _num_agents, void* _data) { + auto* _agents_v = static_cast(_data); + _agents_v->reserve(_num_agents); + for(size_t i = 0; i < _num_agents; ++i) + { + auto* agent = static_cast(_agents[i]); + _agents_v->emplace_back(*agent); + } + return ROCPROFILER_STATUS_SUCCESS; + }, + sizeof(rocprofiler_agent_v0_t), + &agents)); + + { + auto _gpu_agents = std::vector{}; + + _gpu_agents.reserve(agents.size()); + for(auto& itr : agents) + { + if(itr.type == ROCPROFILER_AGENT_TYPE_GPU) _gpu_agents.emplace_back(&itr); + } + + // make sure they are sorted by node id + std::sort(_gpu_agents.begin(), _gpu_agents.end(), [](const auto& lhs, const auto& rhs) { + return CHECK_NOTNULL(lhs)->node_id < CHECK_NOTNULL(rhs)->node_id; + }); + + int64_t _dev_id = 0; + for(auto& itr : _gpu_agents) + itr->gpu_index = _dev_id++; + } + + for(auto itr : agents) + agents_map.emplace(itr.id, itr); +} + +void metadata::init(inprocess) +{ + if(inprocess_init) return; + + inprocess_init = true; + for(auto itr : agents) + { + if(itr.type == ROCPROFILER_AGENT_TYPE_CPU) continue; + + ROCPROFILER_CHECK(rocprofiler_iterate_agent_supported_counters( + itr.id, + [](rocprofiler_agent_id_t id, + rocprofiler_counter_id_t* counters, + size_t num_counters, + void* user_data) { + auto* data_v = static_cast(user_data); + data_v->emplace(id, counter_info_vec_t{}); + for(size_t i = 0; i < num_counters; ++i) + { + auto _info = rocprofiler_counter_info_v0_t{}; + auto _dim_ids = std::vector{}; + auto _dim_info = std::vector{}; + + ROCPROFILER_CHECK(rocprofiler_query_counter_info( + counters[i], + ROCPROFILER_COUNTER_INFO_VERSION_0, + &static_cast(_info))); + + ROCPROFILER_CHECK(rocprofiler_iterate_counter_dimensions( + counters[i], dimensions_info_callback, &_dim_info)); + + _dim_ids.reserve(_dim_info.size()); + for(auto ditr : _dim_info) + _dim_ids.emplace_back(ditr.id); + + data_v->at(id).emplace_back( + id, _info, std::move(_dim_ids), std::move(_dim_info)); + } + return ROCPROFILER_STATUS_SUCCESS; + }, + &agent_counter_info)); + } +} + +const agent_info* +metadata::get_agent(rocprofiler_agent_id_t _val) const +{ + for(const auto& itr : agents) + { + if(itr.id == _val) return &itr; + } + return nullptr; +} + +const code_object_info* +metadata::get_code_object(uint64_t code_obj_id) const +{ + return code_objects.rlock([code_obj_id](const auto& _data) -> const code_object_info* { + return &_data.at(code_obj_id); + }); +} + +const kernel_symbol_info* +metadata::get_kernel_symbol(uint64_t kernel_id) const +{ + return kernel_symbols.rlock([kernel_id](const auto& _data) -> const kernel_symbol_info* { + return &_data.at(kernel_id); + }); +} + +const tool_counter_info* +metadata::get_counter_info(uint64_t instance_id) const +{ + auto _counter_id = rocprofiler_counter_id_t{.handle = 0}; + ROCPROFILER_CHECK(rocprofiler_query_record_counter_id(instance_id, &_counter_id)); + return get_counter_info(_counter_id); +} + +const tool_counter_info* +metadata::get_counter_info(rocprofiler_counter_id_t id) const +{ + for(const auto& itr : agent_counter_info) + { + for(const auto& aitr : itr.second) + { + if(aitr.id == id) return &aitr; + } + } + return nullptr; +} + +const counter_dimension_info_vec_t* +metadata::get_counter_dimension_info(uint64_t instance_id) const +{ + return &CHECK_NOTNULL(get_counter_info(instance_id))->dimensions; +} + +code_object_data_vec_t +metadata::get_code_objects() const +{ + auto _data = code_objects.rlock([](const auto& _data_v) { + auto _info = std::vector{}; + _info.reserve(_data_v.size()); + for(const auto& itr : _data_v) + _info.emplace_back(itr.second); + return _info; + }); + + uint64_t _sz = 0; + for(const auto& itr : _data) + _sz = std::max(_sz, itr.code_object_id); + + auto _code_obj_data = std::vector{}; + _code_obj_data.resize(_sz + 1, code_object_info{}); + // index by the code object id + for(auto& itr : _data) + _code_obj_data.at(itr.code_object_id) = itr; + + return _code_obj_data; +} + +kernel_symbol_data_vec_t +metadata::get_kernel_symbols() const +{ + auto _data = kernel_symbols.rlock([](const auto& _data_v) { + auto _info = std::vector{}; + _info.reserve(_data_v.size()); + for(const auto& itr : _data_v) + _info.emplace_back(itr.second); + return _info; + }); + + uint64_t kernel_data_size = 0; + for(const auto& itr : _data) + kernel_data_size = std::max(kernel_data_size, itr.kernel_id); + + auto _symbol_data = std::vector{}; + _symbol_data.resize(kernel_data_size + 1, kernel_symbol_info{}); + // index by the kernel id + for(auto& itr : _data) + _symbol_data.at(itr.kernel_id) = std::move(itr); + + return _symbol_data; +} + +metadata::agent_info_ptr_vec_t +metadata::get_gpu_agents() const +{ + auto _data = metadata::agent_info_ptr_vec_t{}; + for(const auto& itr : agents) + { + if(itr.type == ROCPROFILER_AGENT_TYPE_GPU) _data.emplace_back(&itr); + } + return _data; +} + +counter_info_vec_t +metadata::get_counter_info() const +{ + auto _ret = std::vector{}; + for(const auto& itr : agent_counter_info) + { + for(const auto& iitr : itr.second) + _ret.emplace_back(iitr); + } + return _ret; +} + +counter_dimension_vec_t +metadata::get_counter_dimension_info() const +{ + auto _ret = counter_dimension_vec_t{}; + for(const auto& itr : agent_counter_info) + { + for(const auto& iitr : itr.second) + for(const auto& ditr : iitr.dimensions) + _ret.emplace_back(ditr); + } + + auto _sorter = [](const rocprofiler_record_dimension_info_t& lhs, + const rocprofiler_record_dimension_info_t& rhs) { + return std::tie(lhs.id, lhs.instance_size) < std::tie(rhs.id, rhs.instance_size); + }; + auto _equiv = [](const rocprofiler_record_dimension_info_t& lhs, + const rocprofiler_record_dimension_info_t& rhs) { + return std::tie(lhs.id, lhs.instance_size) == std::tie(rhs.id, rhs.instance_size); + }; + + std::sort(_ret.begin(), _ret.end(), _sorter); + _ret.erase(std::unique(_ret.begin(), _ret.end(), _equiv), _ret.end()); + + return _ret; +} + +bool +metadata::add_marker_message(uint64_t corr_id, std::string&& msg) +{ + return marker_messages.wlock( + [](auto& _data, uint64_t _cid_v, std::string&& _msg) -> bool { + return _data.emplace(_cid_v, std::move(_msg)).second; + }, + corr_id, + std::move(msg)); +} + +bool +metadata::add_code_object(code_object_info obj) +{ + return code_objects.wlock( + [](code_object_data_map_t& _data_v, code_object_info _obj_v) -> bool { + return _data_v.emplace(_obj_v.code_object_id, _obj_v).second; + }, + obj); +} + +bool +metadata::add_kernel_symbol(kernel_symbol_info&& sym) +{ + return kernel_symbols.wlock( + [](kernel_symbol_data_map_t& _data_v, kernel_symbol_info&& _sym_v) -> bool { + return _data_v.emplace(_sym_v.kernel_id, std::move(_sym_v)).second; + }, + std::move(sym)); +} + +bool +metadata::add_string_entry(size_t key, std::string_view str) +{ + return string_entries.ulock( + [](const auto& _data, size_t _key, std::string_view) { return (_data.count(_key) > 0); }, + [](auto& _data, size_t _key, std::string_view _str) { + _data.emplace(_key, new std::string{_str}); + return true; + }, + key, + str); +} + +bool +metadata::add_external_correlation_id(uint64_t val) +{ + return external_corr_ids.wlock( + [](auto& _data, uint64_t _val) { return _data.emplace(_val).second; }, val); +} + +std::string_view +metadata::get_marker_message(uint64_t corr_id) const +{ + return marker_messages.rlock( + [](const auto& _data, uint64_t _corr_id_v) -> std::string_view { + return _data.at(_corr_id_v); + }, + corr_id); +} + +std::string_view +metadata::get_kernel_name(uint64_t kernel_id, uint64_t rename_id) const +{ + if(rename_id > 0) + { + if(const auto* _name = common::get_string_entry(rename_id)) return std::string_view{*_name}; + } + + const auto* _kernel_data = get_kernel_symbol(kernel_id); + return CHECK_NOTNULL(_kernel_data)->formatted_kernel_name; +} + +std::string_view +metadata::get_kind_name(rocprofiler_callback_tracing_kind_t kind) const +{ + return callback_names.at(kind); +} + +std::string_view +metadata::get_kind_name(rocprofiler_buffer_tracing_kind_t kind) const +{ + return buffer_names.at(kind); +} + +std::string_view +metadata::get_operation_name(rocprofiler_callback_tracing_kind_t kind, + rocprofiler_tracing_operation_t op) const +{ + return callback_names.at(kind, op); +} + +std::string_view +metadata::get_operation_name(rocprofiler_buffer_tracing_kind_t kind, + rocprofiler_tracing_operation_t op) const +{ + return buffer_names.at(kind, op); +} + +uint64_t +metadata::get_node_id(rocprofiler_agent_id_t _val) const +{ + return CHECK_NOTNULL(get_agent(_val))->logical_node_id; +} + +const std::string* +metadata::get_string_entry(size_t key) const +{ + const auto* ret = string_entries.rlock( + [](const auto& _data, size_t _key) -> const std::string* { + if(_data.count(_key) > 0) return _data.at(_key).get(); + return nullptr; + }, + key); + + if(!ret) ret = common::get_string_entry(key); + + return ret; +} +} // namespace tool +} // namespace rocprofiler diff --git a/source/lib/output/metadata.hpp b/source/lib/output/metadata.hpp new file mode 100644 index 0000000000..e093a43449 --- /dev/null +++ b/source/lib/output/metadata.hpp @@ -0,0 +1,160 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "agent_info.hpp" +#include "counter_info.hpp" +#include "kernel_symbol_info.hpp" + +#include "lib/common/container/small_vector.hpp" +#include "lib/common/demangle.hpp" +#include "lib/common/logging.hpp" +#include "lib/common/synchronized.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#define ROCPROFILER_CHECK_NESTED(VAR, RESULT) \ + { \ + rocprofiler_status_t ROCPROFILER_VARIABLE(CHECKSTATUS, VAR) = RESULT; \ + if(ROCPROFILER_VARIABLE(CHECKSTATUS, VAR) != ROCPROFILER_STATUS_SUCCESS) \ + { \ + std::string_view status_msg = \ + rocprofiler_get_status_string(ROCPROFILER_VARIABLE(CHECKSTATUS, VAR)); \ + ROCP_FATAL << "[" << __FUNCTION__ << "] " << #RESULT << " failed with error code " \ + << ROCPROFILER_VARIABLE(CHECKSTATUS, VAR) << " :: " << status_msg; \ + } \ + } + +#define ROCPROFILER_CHECK(RESULT) ROCPROFILER_CHECK_NESTED(__COUNTER__, RESULT) + +namespace rocprofiler +{ +namespace tool +{ +using marker_message_map_t = std::unordered_map; +using marker_message_ordered_map_t = std::map; +using string_entry_map_t = std::unordered_map>; +using counter_dimension_vec_t = std::vector; +using external_corr_id_set_t = std::unordered_set; + +template +using synced_map = common::Synchronized; + +struct metadata +{ + using agent_info_ptr_vec_t = common::container::small_vector; + + struct inprocess + {}; + + pid_t process_id = 0; + uint64_t process_start_ns = 0; + uint64_t process_end_ns = 0; + agent_info_vec_t agents = {}; + agent_info_map_t agents_map = {}; + agent_counter_info_map_t agent_counter_info = {}; + sdk::buffer_name_info buffer_names = {}; + sdk::callback_name_info callback_names = {}; + synced_map code_objects = {}; + synced_map kernel_symbols = {}; + synced_map marker_messages = {}; + synced_map string_entries = {}; + synced_map external_corr_ids = {}; + + metadata() = default; + metadata(inprocess); + + ~metadata() = default; + metadata(const metadata&) = delete; + metadata(metadata&&) noexcept = delete; + metadata& operator=(const metadata&) = delete; + metadata& operator=(metadata&&) noexcept = delete; + + void init(inprocess); + + const agent_info* get_agent(rocprofiler_agent_id_t _val) const; + const code_object_info* get_code_object(uint64_t code_obj_id) const; + const kernel_symbol_info* get_kernel_symbol(uint64_t kernel_id) const; + const tool_counter_info* get_counter_info(uint64_t instance_id) const; + const tool_counter_info* get_counter_info(rocprofiler_counter_id_t id) const; + const counter_dimension_info_vec_t* get_counter_dimension_info(uint64_t instance_id) const; + + code_object_data_vec_t get_code_objects() const; + kernel_symbol_data_vec_t get_kernel_symbols() const; + agent_info_ptr_vec_t get_gpu_agents() const; + counter_info_vec_t get_counter_info() const; + counter_dimension_vec_t get_counter_dimension_info() const; + + template + Tp get_marker_messages(Tp&&); + + bool add_marker_message(uint64_t corr_id, std::string&& msg); + bool add_code_object(code_object_info obj); + bool add_kernel_symbol(kernel_symbol_info&& sym); + bool add_string_entry(size_t key, std::string_view str); + bool add_external_correlation_id(uint64_t); + + std::string_view get_marker_message(uint64_t corr_id) const; + std::string_view get_kernel_name(uint64_t kernel_id, uint64_t rename_id) const; + std::string_view get_kind_name(rocprofiler_callback_tracing_kind_t kind) const; + std::string_view get_kind_name(rocprofiler_buffer_tracing_kind_t kind) const; + std::string_view get_operation_name(rocprofiler_callback_tracing_kind_t kind, + rocprofiler_tracing_operation_t op) const; + std::string_view get_operation_name(rocprofiler_buffer_tracing_kind_t kind, + rocprofiler_tracing_operation_t op) const; + uint64_t get_node_id(rocprofiler_agent_id_t _val) const; + const std::string* get_string_entry(size_t key) const; + +private: + bool inprocess_init = false; +}; + +template +Tp +metadata::get_marker_messages(Tp&& _inp) +{ + return marker_messages.rlock( + [](const auto& _data_v, auto&& _inp_v) { + for(const auto& itr : _data_v) + _inp_v.emplace(itr.first, itr.second); + return _inp_v; + }, + std::move(_inp)); +} +} // namespace tool +} // namespace rocprofiler diff --git a/source/lib/output/output_config.cpp b/source/lib/output/output_config.cpp new file mode 100644 index 0000000000..81041c57ea --- /dev/null +++ b/source/lib/output/output_config.cpp @@ -0,0 +1,124 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "output_config.hpp" + +namespace rocprofiler +{ +namespace tool +{ +output_config +output_config::load_from_env() +{ + auto cfg = output_config{}; + cfg.parse_env(); + return cfg; +} + +output_config +output_config::load_from_env(output_config&& cfg) +{ + cfg.parse_env(); + return cfg; +} + +void +output_config::parse_env() +{ + stats = common::get_env("ROCPROF_STATS", stats); + stats_summary = common::get_env("ROCPROF_STATS_SUMMARY", stats_summary); + stats_summary_per_domain = + common::get_env("ROCPROF_STATS_SUMMARY_PER_DOMAIN", stats_summary_per_domain); + stats_summary_unit = common::get_env("ROCPROF_STATS_SUMMARY_UNITS", stats_summary_unit); + stats_summary_file = common::get_env("ROCPROF_STATS_SUMMARY_OUTPUT", stats_summary_file); + + perfetto_backend = common::get_env("ROCPROF_PERFETTO_BACKEND", perfetto_backend); + perfetto_buffer_fill_policy = + common::get_env("ROCPROF_PERFETTO_BUFFER_FILL_POLICY", perfetto_buffer_fill_policy); + perfetto_shmem_size_hint = + common::get_env("ROCPROF_PERFETTO_SHMEM_SIZE_HINT_KB", perfetto_shmem_size_hint); + perfetto_buffer_size = common::get_env("ROCPROF_PERFETTO_BUFFER_SIZE_KB", perfetto_buffer_size); + + output_path = common::get_env("ROCPROF_OUTPUT_PATH", output_path); + output_file = common::get_env("ROCPROF_OUTPUT_FILE_NAME", output_file); + tmp_directory = common::get_env("ROCPROF_TMPDIR", tmp_directory); + kernel_rename = common::get_env("ROCPROF_KERNEL_RENAME", false); + + auto to_upper = [](std::string val) { + for(auto& vitr : val) + vitr = toupper(vitr); + return val; + }; + + output_format = common::get_env("ROCPROF_OUTPUT_FORMAT", output_format); + auto entries = std::set{}; + for(const auto& itr : sdk::parse::tokenize(output_format, " \t,;:")) + entries.emplace(to_upper(itr)); + + csv_output = entries.count("CSV") > 0 || entries.empty(); + json_output = entries.count("JSON") > 0; + pftrace_output = entries.count("PFTRACE") > 0; + otf2_output = entries.count("OTF2") > 0; + + const auto supported_formats = + std::set{"CSV", "JSON", "PFTRACE", "OTF2", "ROCPD"}; + for(const auto& itr : entries) + { + LOG_IF(FATAL, supported_formats.count(itr) == 0) + << "Unsupported output format type: " << itr; + } + + const auto supported_perfetto_backends = std::set{"inprocess", "system"}; + LOG_IF(FATAL, supported_perfetto_backends.count(perfetto_backend) == 0) + << "Unsupported perfetto backend type: " << perfetto_backend; + + if(stats_summary_unit == "sec") + stats_summary_unit_value = common::units::sec; + else if(stats_summary_unit == "msec") + stats_summary_unit_value = common::units::msec; + else if(stats_summary_unit == "usec") + stats_summary_unit_value = common::units::usec; + else if(stats_summary_unit == "nsec") + stats_summary_unit_value = common::units::nsec; + else + { + ROCP_FATAL << "Unsupported summary units value: " << stats_summary_unit; + } + + if(auto _summary_grps = common::get_env("ROCPROF_STATS_SUMMARY_GROUPS", ""); + !_summary_grps.empty()) + { + stats_summary_groups = + sdk::parse::tokenize(_summary_grps, std::vector{"##@@##"}); + + // remove any empty strings (just in case these slipped through) + stats_summary_groups.erase(std::remove_if(stats_summary_groups.begin(), + stats_summary_groups.end(), + [](const auto& itr) { return itr.empty(); }), + stats_summary_groups.end()); + } + + // enable summary output if any of these are enabled + summary_output = (stats_summary || stats_summary_per_domain || !stats_summary_groups.empty()); +} +} // namespace tool +} // namespace rocprofiler diff --git a/source/lib/output/output_config.hpp b/source/lib/output/output_config.hpp new file mode 100644 index 0000000000..197a996d3f --- /dev/null +++ b/source/lib/output/output_config.hpp @@ -0,0 +1,128 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "format_path.hpp" + +#include "lib/common/environment.hpp" +#include "lib/common/filesystem.hpp" +#include "lib/common/mpl.hpp" +#include "lib/common/units.hpp" + +#include +#include + +#include + +#include +#include +#include +#include +#include + +namespace rocprofiler +{ +namespace tool +{ +namespace defaults +{ +constexpr auto perfetto_buffer_size_kb = (1 * common::units::GiB) / common::units::KiB; +constexpr auto perfetto_shmem_size_hint_kb = 64; +} // namespace defaults + +struct output_config +{ + output_config() = default; + ~output_config() = default; + output_config(const output_config&) = default; + output_config(output_config&&) noexcept = default; + output_config& operator=(const output_config&) = default; + output_config& operator=(output_config&&) noexcept = default; + + bool stats = false; + bool stats_summary = false; + bool stats_summary_per_domain = false; + bool csv_output = false; + bool json_output = false; + bool pftrace_output = false; + bool otf2_output = false; + bool summary_output = false; + bool kernel_rename = false; + uint64_t stats_summary_unit_value = 1; + size_t perfetto_shmem_size_hint = defaults::perfetto_shmem_size_hint_kb; + size_t perfetto_buffer_size = defaults::perfetto_buffer_size_kb; + std::string stats_summary_unit = "nsec"; + std::string output_path = "%cwd%"; + std::string output_file = "%hostname%/%pid%"; + std::string tmp_directory = output_path; + std::string stats_summary_file = "stderr"; + std::string perfetto_backend = "inprocess"; + std::string perfetto_buffer_fill_policy = "discard"; + std::vector stats_summary_groups = {}; + + template + void save(ArchiveT&) const; + + template + void load(ArchiveT&) + {} + + static output_config load_from_env(); + static output_config load_from_env(output_config&&); + +private: + void parse_env(); + + std::string output_format = "ROCPD"; +}; + +template +void +output_config::save(ArchiveT& ar) const +{ +#define CFG_SERIALIZE_MEMBER(VAR) ar(cereal::make_nvp(#VAR, VAR)) +#define CFG_SERIALIZE_NAMED_MEMBER(NAME, VAR) ar(cereal::make_nvp(NAME, VAR)) + + CFG_SERIALIZE_NAMED_MEMBER("output_path", format_path(output_path)); + CFG_SERIALIZE_NAMED_MEMBER("output_file", format_path(output_file)); + CFG_SERIALIZE_NAMED_MEMBER("tmp_directory", format_path(tmp_directory)); + CFG_SERIALIZE_NAMED_MEMBER("raw_output_path", output_path); + CFG_SERIALIZE_NAMED_MEMBER("raw_output_file", output_file); + CFG_SERIALIZE_NAMED_MEMBER("raw_tmp_directory", tmp_directory); + + CFG_SERIALIZE_MEMBER(perfetto_shmem_size_hint); + CFG_SERIALIZE_MEMBER(perfetto_buffer_size); + CFG_SERIALIZE_MEMBER(perfetto_buffer_fill_policy); + CFG_SERIALIZE_MEMBER(perfetto_backend); + + CFG_SERIALIZE_NAMED_MEMBER("summary", stats_summary); + CFG_SERIALIZE_NAMED_MEMBER("summary_per_domain", stats_summary_per_domain); + CFG_SERIALIZE_NAMED_MEMBER("summary_groups", stats_summary_groups); + CFG_SERIALIZE_NAMED_MEMBER("summary_unit", stats_summary_unit); + CFG_SERIALIZE_NAMED_MEMBER("summary_file", stats_summary_file); + +#undef CFG_SERIALIZE_MEMBER +#undef CFG_SERIALIZE_NAMED_MEMBER +} +} // namespace tool +} // namespace rocprofiler diff --git a/source/lib/output/output_key.cpp b/source/lib/output/output_key.cpp new file mode 100644 index 0000000000..ce4e590c51 --- /dev/null +++ b/source/lib/output/output_key.cpp @@ -0,0 +1,280 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// + +#include "output_key.hpp" +#include "format_path.hpp" + +#include "lib/common/environment.hpp" +#include "lib/common/filesystem.hpp" +#include "lib/common/utility.hpp" + +#include +#include +#include +#include + +namespace rocprofiler +{ +namespace tool +{ +namespace +{ +namespace fs = common::filesystem; + +template +auto +as_pointer(Tp&& _val) +{ + return new Tp{_val}; +} + +std::string* +get_local_datetime(const std::string& dt_format, std::time_t*& dt_curr); + +std::time_t* launch_time = nullptr; +const auto* launch_clock = as_pointer(std::chrono::system_clock::now()); +const auto* launch_datetime = + get_local_datetime(common::get_env("ROCPROF_TIME_FORMAT", "%F_%H.%M"), launch_time); +const auto* launch_date = + get_local_datetime(common::get_env("ROCPROF_DATE_FORMAT", "%F"), launch_time); + +std::string* +get_local_datetime(const std::string& dt_format, std::time_t*& _dt_curr) +{ + constexpr auto strsize = 512; + + if(!_dt_curr) _dt_curr = new std::time_t{std::time_t{std::time(nullptr)}}; + + char mbstr[strsize] = {}; + memset(mbstr, '\0', sizeof(mbstr) * sizeof(char)); + + if(std::strftime(mbstr, sizeof(mbstr) - 1, dt_format.c_str(), std::localtime(_dt_curr)) != 0) + return new std::string{mbstr}; + + return nullptr; +} + +bool +not_is_space(int ch) +{ + return std::isspace(ch) == 0; +} + +std::string +ltrim(std::string s, bool (*f)(int) = not_is_space) +{ + s.erase(s.begin(), std::find_if(s.begin(), s.end(), f)); + return s; +} + +std::string +rtrim(std::string s, bool (*f)(int) = not_is_space) +{ + s.erase(std::find_if(s.rbegin(), s.rend(), f).base(), s.end()); + return s; +} + +std::string +trim(std::string s, bool (*f)(int) = not_is_space) +{ + ltrim(s, f); + rtrim(s, f); + return s; +} + +std::string +get_hostname() +{ + auto _hostname_buff = std::array{}; + _hostname_buff.fill('\0'); + if(gethostname(_hostname_buff.data(), _hostname_buff.size() - 1) != 0) + { + auto _err = errno; + ROCP_WARNING << "Hostname unknown. gethostname failed with error code " << _err << ": " + << strerror(_err); + return std::string{"UNKNOWN_HOSTNAME"}; + } + + return std::string{_hostname_buff.data()}; +} + +std::vector +get_siblings(pid_t _id = getppid()) +{ + auto _data = std::vector{}; + + auto _ifs = std::ifstream{"/proc/" + std::to_string(_id) + "/task/" + std::to_string(_id) + + "/children"}; + while(_ifs) + { + pid_t _n = 0; + _ifs >> _n; + if(!_ifs || _n <= 0) break; + _data.emplace_back(_n); + } + return _data; +} + +auto +get_num_siblings(pid_t _id = getppid()) +{ + return get_siblings(_id).size(); +} +} // namespace + +output_key::output_key(std::string _key, std::string _val, std::string _desc) +: key{std::move(_key)} +, value{std::move(_val)} +, description{std::move(_desc)} +{} + +std::vector +output_keys(std::string _tag) +{ + using strpair_t = std::pair; + + auto _cmdline = common::read_command_line(getpid()); + + if(_tag.empty() && !_cmdline.empty()) _tag = ::basename(_cmdline.front().c_str()); + + std::string _argv_string = {}; // entire argv cmd + std::string _args_string = {}; // cmdline args + std::string _argt_string = _tag; // prefix + cmdline args + const std::string& _tag0_string = _tag; // only the basic prefix + auto _options = std::vector{}; + + auto _replace = [](auto& _v, const strpair_t& pitr) { + auto pos = std::string::npos; + while((pos = _v.find(pitr.first)) != std::string::npos) + _v.replace(pos, pitr.first.length(), pitr.second); + }; + + if(_cmdline.size() > 1 && _cmdline.at(1) == "--") _cmdline.erase(_cmdline.begin() + 1); + + for(auto& itr : _cmdline) + { + itr = trim(itr); + _replace(itr, {"/", "_"}); + while(!itr.empty() && itr.at(0) == '.') + itr = itr.substr(1); + while(!itr.empty() && itr.at(0) == '_') + itr = itr.substr(1); + } + + if(!_cmdline.empty()) + { + for(size_t i = 0; i < _cmdline.size(); ++i) + { + const auto _l = std::string{(i == 0) ? "" : "_"}; + auto _v = _cmdline.at(i); + _argv_string += _l + _v; + if(i > 0) + { + _argt_string += (i > 1) ? (_l + _v) : _v; + _args_string += (i > 1) ? (_l + _v) : _v; + } + } + } + + auto _mpi_size = get_mpi_size(); + auto _mpi_rank = get_mpi_rank(); + + auto _dmp_size = fmt::format("{}", (_mpi_size) > 0 ? _mpi_size : 1); + auto _dmp_rank = fmt::format("{}", (_mpi_rank) > 0 ? _mpi_rank : 0); + auto _proc_id = fmt::format("{}", getpid()); + auto _parent_id = fmt::format("{}", getppid()); + auto _pgroup_id = fmt::format("{}", getpgid(getpid())); + auto _session_id = fmt::format("{}", getsid(getpid())); + auto _proc_size = fmt::format("{}", get_num_siblings()); + auto _pwd_string = common::get_env("PWD", "."); + auto _slurm_job_id = common::get_env("SLURM_JOB_ID", "0"); + auto _slurm_proc_id = common::get_env("SLURM_PROCID", _dmp_rank); + + auto _uniq_id = _proc_id; + if(common::get_env("SLURM_PROCID", -1) >= 0) + { + _uniq_id = _slurm_proc_id; + } + else if(_mpi_size > 0 || _mpi_rank >= 0) + { + _uniq_id = _dmp_rank; + } + + for(auto&& itr : std::initializer_list{ + {"%argv%", _argv_string, "Entire command-line condensed into a single string"}, + {"%argt%", + _argt_string, + "Similar to `%argv%` except basename of first command line argument"}, + {"%args%", _args_string, "All command line arguments condensed into a single string"}, + {"%tag%", _tag0_string, "Basename of first command line argument"}}) + { + _options.emplace_back(itr); + } + + if(!_cmdline.empty()) + { + for(size_t i = 0; i < _cmdline.size(); ++i) + { + auto _v = _cmdline.at(i); + _options.emplace_back(fmt::format("%arg{}%", i), _v, fmt::format("Argument #{}", i)); + } + } + + auto _launch_time = (launch_datetime) ? *launch_datetime : std::string{".UNKNOWN_LAUNCH_TIME."}; + auto _launch_date = (launch_date) ? *launch_date : std::string{".UNKNOWN_LAUNCH_DATE."}; + auto _hostname = get_hostname(); + + for(auto&& itr : std::initializer_list{ + {"%hostname%", _hostname, "Network hostname"}, + {"%pid%", _proc_id, "Process identifier"}, + {"%ppid%", _parent_id, "Parent process identifier"}, + {"%pgid%", _pgroup_id, "Process group identifier"}, + {"%psid%", _session_id, "Process session identifier"}, + {"%psize%", _proc_size, "Number of sibling process"}, + {"%job%", _slurm_job_id, "SLURM_JOB_ID env variable"}, + {"%rank%", _slurm_proc_id, "MPI/UPC++ rank"}, + {"%size%", _dmp_size, "MPI/UPC++ size"}, + {"%nid%", _uniq_id, "%rank% if possible, otherwise %pid%"}, + {"%cwd%", fs::current_path().string(), "Current working path"}, + {"%launch_date%", _launch_date, "Date according to date format ROCPROF_DATE_FORMAT"}, + {"%launch_time%", _launch_time, "Date and/or time according to ROCPROF_TIME_FORMAT"}, + }) + { + _options.emplace_back(itr); + } + + for(auto&& itr : std::initializer_list{ + {"%h", _hostname, "Shorthand for %hostname%"}, + {"%p", _proc_id, "Shorthand for %pid%"}, + {"%j", _slurm_job_id, "Shorthand for %job%"}, + {"%r", _slurm_proc_id, "Shorthand for %rank%"}, + {"%s", _dmp_size, "Shorthand for %size"}, + }) + { + _options.emplace_back(itr); + } + + return _options; +} +} // namespace tool +} // namespace rocprofiler diff --git a/source/lib/output/output_key.hpp b/source/lib/output/output_key.hpp new file mode 100644 index 0000000000..6dfca519c7 --- /dev/null +++ b/source/lib/output/output_key.hpp @@ -0,0 +1,64 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// + +#pragma once + +#include "lib/common/mpl.hpp" + +#include + +#include +#include +#include + +namespace rocprofiler +{ +namespace tool +{ +struct output_key +{ + output_key(std::string _key, std::string _val, std::string _desc = {}); + + template ::value, int> = 0> + output_key(std::string _key, Tp&& _val, std::string _desc = {}); + + operator std::pair() const; + + std::string key = {}; + std::string value = {}; + std::string description = {}; +}; + +template ::value, int>> +output_key::output_key(std::string _key, Tp&& _val, std::string _desc) +: key{std::move(_key)} +, value{fmt::format("{}", std::forward(_val))} +, description{std::move(_desc)} +{} + +std::vector +output_keys(std::string _tag = {}); +} // namespace tool +} // namespace rocprofiler diff --git a/source/lib/rocprofiler-sdk-tool/output_file.cpp b/source/lib/output/output_stream.cpp similarity index 81% rename from source/lib/rocprofiler-sdk-tool/output_file.cpp rename to source/lib/output/output_stream.cpp index 14a1227af0..1a7845cc59 100644 --- a/source/lib/rocprofiler-sdk-tool/output_file.cpp +++ b/source/lib/output/output_stream.cpp @@ -20,8 +20,7 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. -#include "output_file.hpp" -#include "config.hpp" +#include "output_stream.hpp" #include "lib/common/filesystem.hpp" #include "lib/common/logging.hpp" @@ -29,6 +28,9 @@ #include #include +#include +#include + namespace rocprofiler { namespace tool @@ -42,9 +44,9 @@ const auto stderr_names = std::unordered_set{"stderr", "STDERR } // namespace std::string -get_output_filename(std::string_view fname, std::string_view ext) +get_output_filename(const output_config& cfg, std::string_view fname, std::string_view ext) { - auto cfg_output_path = tool::format(tool::get_config().output_path); + auto cfg_output_path = tool::format_path(cfg.output_path); // add a period to provided file extension if necessary constexpr auto period = std::string_view{"."}; @@ -53,7 +55,7 @@ get_output_filename(std::string_view fname, std::string_view ext) fmt::format("{}{}", (!ext.empty() && ext.find('.') != 0) ? period : noperiod, ext); auto output_path = fs::path{cfg_output_path}; - auto output_prefix = tool::format(tool::get_config().output_file); + auto output_prefix = tool::format_path(cfg.output_file); if(fs::exists(output_path) && !fs::is_directory(fs::status(output_path))) { @@ -66,7 +68,8 @@ get_output_filename(std::string_view fname, std::string_view ext) fs::create_directories(output_path); } - auto _ofname = tool::format(output_path / fmt::format("{}_{}{}", output_prefix, fname, _ext)); + auto _ofname = + tool::format_path(output_path / fmt::format("{}_{}{}", output_prefix, fname, _ext)); // the prefix may contain a subdirectory if(auto _ofname_path = fs::path{_ofname}.parent_path(); !fs::exists(_ofname_path)) @@ -83,10 +86,10 @@ get_output_filename(std::string_view fname, std::string_view ext) return _ofname; } -output_stream_t -get_output_stream(std::string_view fname, std::string_view ext) +output_stream +get_output_stream(const output_config& cfg, std::string_view fname, std::string_view ext) { - auto cfg_output_path = tool::format(tool::get_config().output_path); + auto cfg_output_path = tool::format_path(cfg.output_path); if(stdout_names.count(cfg_output_path) > 0 || stdout_names.count(fname) > 0) return {&std::cout, [](auto*&) {}}; @@ -95,7 +98,7 @@ get_output_stream(std::string_view fname, std::string_view ext) else if(cfg_output_path.empty() || fname.empty()) return {&std::clog, [](auto*&) {}}; - auto output_file = get_output_filename(fname, ext); + auto output_file = get_output_filename(cfg, fname, ext); auto* _ofs = new std::ofstream{output_file}; LOG_IF(FATAL, !_ofs && !*_ofs) << fmt::format("Failed to open {} for output", output_file); @@ -107,15 +110,5 @@ get_output_stream(std::string_view fname, std::string_view ext) v = nullptr; }}; } - -output_file::~output_file() -{ - if(m_os.stream) - ROCP_INFO << "Closing result file: " << m_name; - else - ROCP_WARNING << "output_file::~output_file does not have a output stream instance!"; - - m_os.close(); -} } // namespace tool } // namespace rocprofiler diff --git a/source/lib/output/output_stream.hpp b/source/lib/output/output_stream.hpp new file mode 100644 index 0000000000..d666d6e80c --- /dev/null +++ b/source/lib/output/output_stream.hpp @@ -0,0 +1,85 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "lib/common/filesystem.hpp" +#include "output_config.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace rocprofiler +{ +namespace tool +{ +using ostream_dtor_t = void (*)(std::ostream*&); + +using output_stream_pair_t = std::pair; + +struct output_stream +{ + output_stream() = default; + output_stream(std::ostream* _os, ostream_dtor_t _dtor) + : stream{_os} + , dtor{_dtor} + {} + + ~output_stream() { close(); } + output_stream(const output_stream&) = delete; + output_stream(output_stream&&) noexcept = default; + output_stream& operator=(const output_stream&) = delete; + output_stream& operator=(output_stream&&) noexcept = default; + + explicit operator bool() const { return stream != nullptr; } + + template + std::ostream& operator<<(Tp&& value) + { + return ((stream) ? *stream : std::cerr) << std::forward(value) << std::flush; + } + + void close() + { + if(stream) (*stream) << std::flush; + if(dtor) dtor(stream); + } + + bool writes_to_file() const { return (dynamic_cast(stream) != nullptr); } + + std::ostream* stream = nullptr; + ostream_dtor_t dtor = nullptr; +}; + +std::string +get_output_filename(const output_config& cfg, std::string_view fname, std::string_view ext); + +output_stream +get_output_stream(const output_config& cfg, std::string_view fname, std::string_view ext); +} // namespace tool +} // namespace rocprofiler diff --git a/source/lib/rocprofiler-sdk-tool/statistics.cpp b/source/lib/output/statistics.cpp similarity index 100% rename from source/lib/rocprofiler-sdk-tool/statistics.cpp rename to source/lib/output/statistics.cpp diff --git a/source/lib/rocprofiler-sdk-tool/statistics.hpp b/source/lib/output/statistics.hpp similarity index 100% rename from source/lib/rocprofiler-sdk-tool/statistics.hpp rename to source/lib/output/statistics.hpp diff --git a/source/lib/rocprofiler-sdk-tool/helper.cpp b/source/lib/output/timestamps.hpp similarity index 69% rename from source/lib/rocprofiler-sdk-tool/helper.cpp rename to source/lib/output/timestamps.hpp index 7e5b41593c..c987a8657b 100644 --- a/source/lib/rocprofiler-sdk-tool/helper.cpp +++ b/source/lib/output/timestamps.hpp @@ -20,28 +20,18 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. -#include "helper.hpp" -#include "config.hpp" +#pragma once #include -#include -#include -#include -#include -#include -#include -#include -#include - -::rocprofiler::sdk::buffer_name_info_t -get_buffer_id_names() +namespace rocprofiler { - return ::rocprofiler::sdk::get_buffer_tracing_names(); -} - -::rocprofiler::sdk::callback_name_info_t -get_callback_id_names() +namespace tool { - return ::rocprofiler::sdk::get_callback_tracing_names(); -} +struct timestamps_t +{ + rocprofiler_timestamp_t app_start_time; + rocprofiler_timestamp_t app_end_time; +}; +} // namespace tool +} // namespace rocprofiler diff --git a/source/lib/rocprofiler-sdk-tool/tmp_file.cpp b/source/lib/output/tmp_file.cpp similarity index 86% rename from source/lib/rocprofiler-sdk-tool/tmp_file.cpp rename to source/lib/output/tmp_file.cpp index fffd32ba77..6333abb864 100644 --- a/source/lib/rocprofiler-sdk-tool/tmp_file.cpp +++ b/source/lib/output/tmp_file.cpp @@ -21,9 +21,9 @@ // SOFTWARE. #include "tmp_file.hpp" -#include "config.hpp" #include "lib/common/filesystem.hpp" +#include "lib/common/logging.hpp" namespace fs = ::rocprofiler::common::filesystem; @@ -38,6 +38,8 @@ tmp_file::fopen(const char* _mode) // if the filepath does not exist, open in out mode to create it std::ofstream _ofs{filename}; } + + ROCP_INFO << "opening (via fopen) temporary file: '" << filename << "'..."; file = std::fopen(filename.c_str(), _mode); if(file) fd = ::fileno(file); @@ -59,10 +61,12 @@ tmp_file::flush() { if(stream.is_open()) { + ROCP_INFO << "flushing temporary file: '" << filename << "'..."; stream.flush(); } else if(file != nullptr) { + ROCP_INFO << "flushing temporary file: '" << filename << "'..."; int _ret = fflush(file); int _cnt = 0; while(_ret == EAGAIN || _ret == EINTR) @@ -84,11 +88,13 @@ tmp_file::close() if(stream.is_open()) { + ROCP_INFO << "closing temporary file: '" << filename << "'..."; stream.close(); return !stream.is_open(); } else if(file != nullptr) { + ROCP_INFO << "closing temporary file: '" << filename << "'..."; auto _ret = fclose(file); if(_ret == 0) { @@ -114,6 +120,7 @@ tmp_file::open(std::ios::openmode _mode) _ofs.open(filename, std::ofstream::binary | std::ofstream::out); } + ROCP_INFO << "opening temporary file: '" << filename << "'..."; stream.open(filename, _mode); return (stream.is_open() && stream.good()); } @@ -124,6 +131,7 @@ tmp_file::remove() close(); if(fs::exists(filename)) { + ROCP_INFO << "removing temporary file: '" << filename << "'..."; auto _ret = ::remove(filename.c_str()); return (_ret == 0); } diff --git a/source/lib/rocprofiler-sdk-tool/tmp_file.hpp b/source/lib/output/tmp_file.hpp similarity index 100% rename from source/lib/rocprofiler-sdk-tool/tmp_file.hpp rename to source/lib/output/tmp_file.hpp diff --git a/source/lib/rocprofiler-sdk-tool/generatePerfetto.hpp b/source/lib/output/tmp_file_buffer.cpp similarity index 57% rename from source/lib/rocprofiler-sdk-tool/generatePerfetto.hpp rename to source/lib/output/tmp_file_buffer.cpp index c30ae9ce55..f90e187a8c 100644 --- a/source/lib/rocprofiler-sdk-tool/generatePerfetto.hpp +++ b/source/lib/output/tmp_file_buffer.cpp @@ -20,27 +20,36 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. -#pragma once +#include "tmp_file_buffer.hpp" +#include "domain_type.hpp" -#include "helper.hpp" +#include -#include +#include namespace rocprofiler { namespace tool { -void -write_perfetto( - tool_table* tool_functions, - uint64_t pid, - std::vector agent_data, - std::deque* hip_api_data, - std::deque* hsa_api_data, - std::deque* kernel_dispatch_data, - std::deque* memory_copy_data, - std::deque* marker_api_data, - std::deque* scratch_memory_data, - std::deque* rccl_api_data); +std::string +compose_tmp_file_name(const output_config& cfg, domain_type buffer_type) +{ + return rocprofiler::tool::format_path(fmt::format("{}/.rocprofv3/{}-{}.dat", + cfg.tmp_directory, + "%ppid%-%pid%", + get_domain_trace_file_name(buffer_type))); +} + +tmp_file_name_callback_t& +get_tmp_file_name_callback() +{ + static tmp_file_name_callback_t val = [](domain_type type) -> std::string { + ROCP_CI_LOG(WARNING) << "rocprofv3 does not have a tmp file name callback defined for " + << get_domain_column_name(type) << "."; + auto _cfg = output_config::load_from_env(); + return compose_tmp_file_name(_cfg, type); + }; + return val; +} } // namespace tool } // namespace rocprofiler diff --git a/source/lib/output/tmp_file_buffer.hpp b/source/lib/output/tmp_file_buffer.hpp new file mode 100644 index 0000000000..bbe189a254 --- /dev/null +++ b/source/lib/output/tmp_file_buffer.hpp @@ -0,0 +1,217 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "domain_type.hpp" +#include "output_config.hpp" +#include "tmp_file.hpp" + +#include "lib/common/container/ring_buffer.hpp" +#include "lib/common/logging.hpp" +#include "lib/common/units.hpp" + +#include + +#include +#include +#include +#include +#include +#include + +namespace rocprofiler +{ +namespace tool +{ +template +using ring_buffer_t = rocprofiler::common::container::ring_buffer; + +using tmp_file_name_callback_t = std::function; + +std::string +compose_tmp_file_name(const output_config& cfg, domain_type buffer_type); + +tmp_file_name_callback_t& +get_tmp_file_name_callback(); + +template +struct file_buffer +{ + file_buffer() = delete; + file_buffer(domain_type _domain) + : domain{_domain} + , buffer{16 * static_cast(::rocprofiler::common::units::get_page_size())} + , file{get_tmp_file_name_callback()(_domain)} + {} + + ~file_buffer() = default; + file_buffer(const file_buffer&) = delete; + file_buffer(file_buffer&&) noexcept = default; + file_buffer& operator=(const file_buffer&) = delete; + file_buffer& operator=(file_buffer&&) noexcept = default; + + domain_type domain = {}; + ring_buffer_t buffer = {}; + tmp_file file; +}; + +template +struct file_buffer> +{ + static_assert(std::is_void::value && std::is_empty::value, + "error! instantiated with ring_buffer_t instead of Tp"); +}; + +template +file_buffer*& +get_tmp_file_buffer(domain_type type) +{ + static file_buffer* val = new file_buffer{type}; + return val; +} + +template +void +offload_buffer(domain_type type) +{ + auto* filebuf = get_tmp_file_buffer(type); + + if(!filebuf) + { + ROCP_CI_LOG(WARNING) << "rocprofv3 cannot offload buffer for " + << get_domain_column_name(type) << ". Buffer has been destroyed."; + return; + } + + auto _lk = std::lock_guard(filebuf->file.file_mutex); + [[maybe_unused]] static auto _success = filebuf->file.open(); + auto& _fs = filebuf->file.stream; + filebuf->file.file_pos.emplace(_fs.tellg()); + filebuf->buffer.save(_fs); + filebuf->buffer.clear(); + CHECK(filebuf->buffer.is_empty() == true); +} + +template +void +write_ring_buffer(Tp _v, domain_type type) +{ + auto* filebuf = get_tmp_file_buffer(type); + + if(!filebuf) + { + ROCP_CI_LOG(WARNING) << "rocprofv3 is dropping record from domain " + << get_domain_column_name(type) << ". Buffer has been destroyed."; + return; + } + else if(filebuf->buffer.capacity() == 0) + { + ROCP_CI_LOG(WARNING) << "rocprofv3 is dropping record from domain " + << get_domain_column_name(type) << ". Buffer has a capacity of zero."; + return; + } + + auto* ptr = filebuf->buffer.request(false); + if(ptr == nullptr) + { + offload_buffer(type); + ptr = filebuf->buffer.request(false); + + // if failed, try again + if(!ptr) ptr = filebuf->buffer.request(false); + + // after second failure, emit warning message + ROCP_CI_LOG_IF(WARNING, !ptr) + << "rocprofv3 is dropping record from domain " << get_domain_column_name(type) + << ". No space in buffer: " + << fmt::format( + "capacity={}, record_size={}, used_count={}, free_count={} | raw_info=[{}]", + filebuf->buffer.capacity(), + filebuf->buffer.data_size(), + filebuf->buffer.count(), + filebuf->buffer.free(), + filebuf->buffer.as_string()); + } + + if(ptr) + { + if constexpr(std::is_move_constructible::value) + { + new(ptr) Tp{std::move(_v)}; + } + else if constexpr(std::is_move_assignable::value) + { + *ptr = std::move(_v); + } + else if constexpr(std::is_copy_constructible::value) + { + new(ptr) Tp{_v}; + } + else if constexpr(std::is_copy_assignable::value) + { + *ptr = _v; + } + else + { + static_assert(std::is_void::value, + "data type is neither move/copy constructible nor move/copy assignable"); + } + } +} + +template +void +flush_tmp_buffer(domain_type type) +{ + auto* filebuf = get_tmp_file_buffer(type); + if(filebuf && !filebuf->buffer.is_empty()) offload_buffer(type); +} + +template +void +read_tmp_file(domain_type type) +{ + auto* filebuf = get_tmp_file_buffer(type); + + if(!filebuf) + { + ROCP_CI_LOG(WARNING) << "rocprofv3 cannot read tmp file for " + << get_domain_column_name(type) << ". Buffer has been destroyed."; + return; + } + + auto _lk = std::lock_guard{filebuf->file.file_mutex}; + auto& _fs = filebuf->file.stream; + if(_fs.is_open()) _fs.close(); + filebuf->file.open(std::ios::binary | std::ios::in); + // for(auto itr : filebuf->file.file_pos) + // { + // _fs.seekg(itr); // set to the absolute position + // if(_fs.eof()) break; + // auto _buffer = ring_buffer_t{}; + // _buffer.load(_fs); + // _data.emplace_back(std::move(_buffer)); + // } +} +} // namespace tool +} // namespace rocprofiler diff --git a/source/lib/rocprofiler-sdk-tool/CMakeLists.txt b/source/lib/rocprofiler-sdk-tool/CMakeLists.txt index bb2514cc62..229bd76e91 100644 --- a/source/lib/rocprofiler-sdk-tool/CMakeLists.txt +++ b/source/lib/rocprofiler-sdk-tool/CMakeLists.txt @@ -4,37 +4,9 @@ rocprofiler_activate_clang_tidy() -set(TOOL_HEADERS - buffered_output.hpp - config.hpp - csv.hpp - domain_type.hpp - generateCSV.hpp - generateJSON.hpp - generateOTF2.hpp - generatePerfetto.hpp - generateStats.hpp - helper.hpp - output_file.hpp - statistics.hpp - tmp_file_buffer.hpp - tmp_file.hpp) +set(TOOL_HEADERS config.hpp helper.hpp) -set(TOOL_SOURCES - config.cpp - domain_type.cpp - generateCSV.cpp - generateJSON.cpp - generateOTF2.cpp - generatePerfetto.cpp - generateStats.cpp - helper.cpp - main.c - output_file.cpp - statistics.cpp - tmp_file_buffer.cpp - tmp_file.cpp - tool.cpp) +set(TOOL_SOURCES config.cpp main.c tool.cpp) add_library(rocprofiler-sdk-tool SHARED) target_sources(rocprofiler-sdk-tool PRIVATE ${TOOL_SOURCES} ${TOOL_HEADERS}) @@ -46,6 +18,7 @@ target_link_libraries( rocprofiler-sdk::rocprofiler-sdk-build-flags rocprofiler-sdk::rocprofiler-sdk-memcheck rocprofiler-sdk::rocprofiler-sdk-common-library + rocprofiler-sdk::rocprofiler-sdk-output-library rocprofiler-sdk::rocprofiler-sdk-cereal rocprofiler-sdk::rocprofiler-sdk-perfetto rocprofiler-sdk::rocprofiler-sdk-otf2) diff --git a/source/lib/rocprofiler-sdk-tool/config.cpp b/source/lib/rocprofiler-sdk-tool/config.cpp index 4450d0860b..d3740b1a4a 100644 --- a/source/lib/rocprofiler-sdk-tool/config.cpp +++ b/source/lib/rocprofiler-sdk-tool/config.cpp @@ -30,6 +30,7 @@ #include "lib/common/logging.hpp" #include "lib/common/units.hpp" #include "lib/common/utility.hpp" +#include "lib/output/output_key.hpp" #include @@ -55,20 +56,6 @@ namespace tool { namespace { -template -auto -as_pointer(Tp&& _val) -{ - return new Tp{_val}; -} - -std::string* -get_local_datetime(const std::string& dt_format, std::time_t*& dt_curr); - -std::time_t* launch_time = nullptr; -const auto* launch_clock = as_pointer(std::chrono::system_clock::now()); -const auto* launch_datetime = - get_local_datetime(get_env("ROCP_TIME_FORMAT", "%F_%H.%M"), launch_time); const auto env_regexes = new std::array{std::regex{"(.*)%(env|ENV)\\{([A-Z0-9_]+)\\}%(.*)"}, std::regex{"(.*)\\$(env|ENV)\\{([A-Z0-9_]+)\\}(.*)"}, @@ -79,38 +66,6 @@ const auto env_regexes = // - %q{USER} Compatibility with NVIDIA // -std::string* -get_local_datetime(const std::string& dt_format, std::time_t*& _dt_curr) -{ - constexpr auto strsize = 512; - - if(!_dt_curr) _dt_curr = new std::time_t{std::time_t{std::time(nullptr)}}; - - char mbstr[strsize] = {}; - memset(mbstr, '\0', sizeof(mbstr) * sizeof(char)); - - if(std::strftime(mbstr, sizeof(mbstr) - 1, dt_format.c_str(), std::localtime(_dt_curr)) != 0) - return new std::string{mbstr}; - - return nullptr; -} - -std::string -get_hostname() -{ - auto _hostname_buff = std::array{}; - _hostname_buff.fill('\0'); - if(gethostname(_hostname_buff.data(), _hostname_buff.size() - 1) != 0) - { - auto _err = errno; - ROCP_WARNING << "Hostname unknown. gethostname failed with error code " << _err << ": " - << strerror(_err); - return std::string{"UNKNOWN_HOSTNAME"}; - } - - return std::string{_hostname_buff.data()}; -} - inline bool not_is_space(int ch) { @@ -139,29 +94,6 @@ trim(std::string s, bool (*f)(int) = not_is_space) return s; } -inline std::vector -get_siblings(pid_t _id = getppid()) -{ - auto _data = std::vector{}; - - std::ifstream _ifs{"/proc/" + std::to_string(_id) + "/task/" + std::to_string(_id) + - "/children"}; - while(_ifs) - { - pid_t _n = 0; - _ifs >> _n; - if(!_ifs || _n <= 0) break; - _data.emplace_back(_n); - } - return _data; -} - -inline auto -get_num_siblings(pid_t _id = getppid()) -{ - return get_siblings(_id).size(); -} - // replace unsuported specail chars with space void handle_special_chars(std::string& str) @@ -256,295 +188,13 @@ parse_counters(std::string line) } } // namespace -int -get_mpi_size() -{ - static int _v = get_env("OMPI_COMM_WORLD_SIZE", - get_env("MV2_COMM_WORLD_SIZE", get_env("MPI_SIZE", 0))); - return _v; -} - -int -get_mpi_rank() -{ - static int _v = get_env("OMPI_COMM_WORLD_RANK", - get_env("MV2_COMM_WORLD_RANK", get_env("MPI_RANK", -1))); - return _v; -} - config::config() -: kernel_filter_range{get_kernel_filter_range( +: base_type{base_type::load_from_env()} +, kernel_filter_range{get_kernel_filter_range( get_env("ROCPROF_KERNEL_FILTER_RANGE", std::string{}))} , counters{parse_counters(get_env("ROCPROF_COUNTERS", std::string{}))} { - auto to_upper = [](std::string val) { - for(auto& vitr : val) - vitr = toupper(vitr); - return val; - }; - - auto output_format = get_env("ROCPROF_OUTPUT_FORMAT", "CSV"); - auto entries = std::set{}; - for(const auto& itr : sdk::parse::tokenize(output_format, " \t,;:")) - entries.emplace(to_upper(itr)); - - csv_output = entries.count("CSV") > 0 || entries.empty(); - json_output = entries.count("JSON") > 0; - pftrace_output = entries.count("PFTRACE") > 0; - otf2_output = entries.count("OTF2") > 0; - - const auto supported_formats = std::set{"CSV", "JSON", "PFTRACE", "OTF2"}; - for(const auto& itr : entries) - { - LOG_IF(FATAL, supported_formats.count(itr) == 0) - << "Unsupported output format type: " << itr; - } - if(kernel_filter_include.empty()) kernel_filter_include = std::string(".*"); - - const auto supported_perfetto_backends = std::set{"inprocess", "system"}; - LOG_IF(FATAL, supported_perfetto_backends.count(perfetto_backend) == 0) - << "Unsupported perfetto backend type: " << perfetto_backend; - - if(stats_summary_unit == "sec") - stats_summary_unit_value = common::units::sec; - else if(stats_summary_unit == "msec") - stats_summary_unit_value = common::units::msec; - else if(stats_summary_unit == "usec") - stats_summary_unit_value = common::units::usec; - else if(stats_summary_unit == "nsec") - stats_summary_unit_value = common::units::nsec; - else - { - ROCP_FATAL << "Unsupported summary units value: " << stats_summary_unit; - } - - if(auto _summary_grps = get_env("ROCPROF_STATS_SUMMARY_GROUPS", ""); !_summary_grps.empty()) - { - stats_summary_groups = - sdk::parse::tokenize(_summary_grps, std::vector{"##@@##"}); - - // remove any empty strings (just in case these slipped through) - stats_summary_groups.erase(std::remove_if(stats_summary_groups.begin(), - stats_summary_groups.end(), - [](const auto& itr) { return itr.empty(); }), - stats_summary_groups.end()); - } - - // enable summary output if any of these are enabled - summary_output = (stats_summary || stats_summary_per_domain || !stats_summary_groups.empty()); -} - -std::vector -output_keys(std::string _tag) -{ - using strpair_t = std::pair; - - auto _cmdline = common::read_command_line(getpid()); - - if(_tag.empty() && !_cmdline.empty()) _tag = ::basename(_cmdline.front().c_str()); - - std::string _argv_string = {}; // entire argv cmd - std::string _args_string = {}; // cmdline args - std::string _argt_string = _tag; // prefix + cmdline args - const std::string& _tag0_string = _tag; // only the basic prefix - auto _options = std::vector{}; - - auto _replace = [](auto& _v, const strpair_t& pitr) { - auto pos = std::string::npos; - while((pos = _v.find(pitr.first)) != std::string::npos) - _v.replace(pos, pitr.first.length(), pitr.second); - }; - - if(_cmdline.size() > 1 && _cmdline.at(1) == "--") _cmdline.erase(_cmdline.begin() + 1); - - for(auto& itr : _cmdline) - { - itr = trim(itr); - _replace(itr, {"/", "_"}); - while(!itr.empty() && itr.at(0) == '.') - itr = itr.substr(1); - while(!itr.empty() && itr.at(0) == '_') - itr = itr.substr(1); - } - - if(!_cmdline.empty()) - { - for(size_t i = 0; i < _cmdline.size(); ++i) - { - const auto _l = std::string{(i == 0) ? "" : "_"}; - auto _v = _cmdline.at(i); - _argv_string += _l + _v; - if(i > 0) - { - _argt_string += (i > 1) ? (_l + _v) : _v; - _args_string += (i > 1) ? (_l + _v) : _v; - } - } - } - - auto _mpi_size = get_mpi_size(); - auto _mpi_rank = get_mpi_rank(); - - auto _dmp_size = fmt::format("{}", (_mpi_size) > 0 ? _mpi_size : 1); - auto _dmp_rank = fmt::format("{}", (_mpi_rank) > 0 ? _mpi_rank : 0); - auto _proc_id = fmt::format("{}", getpid()); - auto _parent_id = fmt::format("{}", getppid()); - auto _pgroup_id = fmt::format("{}", getpgid(getpid())); - auto _session_id = fmt::format("{}", getsid(getpid())); - auto _proc_size = fmt::format("{}", get_num_siblings()); - auto _pwd_string = get_env("PWD", "."); - auto _slurm_job_id = get_env("SLURM_JOB_ID", "0"); - auto _slurm_proc_id = get_env("SLURM_PROCID", _dmp_rank); - - auto _uniq_id = _proc_id; - if(get_env("SLURM_PROCID", -1) >= 0) - { - _uniq_id = _slurm_proc_id; - } - else if(_mpi_size > 0 || _mpi_rank >= 0) - { - _uniq_id = _dmp_rank; - } - - for(auto&& itr : std::initializer_list{ - {"%argv%", _argv_string, "Entire command-line condensed into a single string"}, - {"%argt%", - _argt_string, - "Similar to `%argv%` except basename of first command line argument"}, - {"%args%", _args_string, "All command line arguments condensed into a single string"}, - {"%tag%", _tag0_string, "Basename of first command line argument"}}) - { - _options.emplace_back(itr); - } - - if(!_cmdline.empty()) - { - for(size_t i = 0; i < _cmdline.size(); ++i) - { - auto _v = _cmdline.at(i); - _options.emplace_back(fmt::format("%arg{}%", i), _v, fmt::format("Argument #{}", i)); - } - } - - auto _launch_time = (launch_datetime) ? *launch_datetime : std::string{".UNKNOWN_LAUNCH_TIME."}; - auto _hostname = get_hostname(); - - for(auto&& itr : std::initializer_list{ - {"%hostname%", _hostname, "Network hostname"}, - {"%pid%", _proc_id, "Process identifier"}, - {"%ppid%", _parent_id, "Parent process identifier"}, - {"%pgid%", _pgroup_id, "Process group identifier"}, - {"%psid%", _session_id, "Process session identifier"}, - {"%psize%", _proc_size, "Number of sibling process"}, - {"%job%", _slurm_job_id, "SLURM_JOB_ID env variable"}, - {"%rank%", _slurm_proc_id, "MPI/UPC++ rank"}, - {"%size%", _dmp_size, "MPI/UPC++ size"}, - {"%nid%", _uniq_id, "%rank% if possible, otherwise %pid%"}, - {"%launch_time%", _launch_time, "Data and/or time of run according to time format"}, - }) - { - _options.emplace_back(itr); - } - - for(auto&& itr : std::initializer_list{ - {"%h", _hostname, "Shorthand for %hostname%"}, - {"%p", _proc_id, "Shorthand for %pid%"}, - {"%j", _slurm_job_id, "Shorthand for %job%"}, - {"%r", _slurm_proc_id, "Shorthand for %rank%"}, - {"%s", _dmp_size, "Shorthand for %size"}, - }) - { - _options.emplace_back(itr); - } - - return _options; -} - -namespace -{ -std::string -format_impl(std::string _fpath, const std::vector& _keys) -{ - if(_fpath.find('%') == std::string::npos && _fpath.find('$') == std::string::npos) - return _fpath; - - auto _replace = [](auto& _v, const output_key& pitr) { - auto pos = std::string::npos; - while((pos = _v.find(pitr.key)) != std::string::npos) - _v.replace(pos, pitr.key.length(), pitr.value); - }; - - for(auto&& itr : _keys) - _replace(_fpath, itr); - - // environment and configuration variables - try - { - auto strip_leading_and_replace = - [](std::string_view inp_v, std::initializer_list keys, const char* val) { - auto inp = std::string{inp_v}; - for(auto key : keys) - { - auto pos = std::string::npos; - while((pos = inp.find(key)) == 0) - inp = inp.substr(pos + 1); - - while((pos = inp.find(key)) != std::string::npos) - inp = inp.replace(pos, 1, val); - } - return inp; - }; - - for(const auto& _re : *env_regexes) - { - while(std::regex_search(_fpath, _re)) - { - auto _var = std::regex_replace(_fpath, _re, "$3"); - std::string _val = get_env(_var, ""); - _val = strip_leading_and_replace(_val, {'\t', ' ', '/'}, "_"); - auto _beg = std::regex_replace(_fpath, _re, "$1"); - auto _end = std::regex_replace(_fpath, _re, "$4"); - _fpath = fmt::format("{}{}{}", _beg, _val, _end); - } - } - } catch(std::exception& _e) - { - ROCP_WARNING << "[rocprofiler] " << __FUNCTION__ << " threw an exception :: " << _e.what() - << "\n"; - } - - // remove %arg% where N >= argc - try - { - std::regex _re{"(.*)%(arg[0-9]+)%([-/_]*)(.*)"}; - while(std::regex_search(_fpath, _re)) - _fpath = std::regex_replace(_fpath, _re, "$1$4"); - } catch(std::exception& _e) - { - ROCP_WARNING << "[rocprofiler] " << __FUNCTION__ << " threw an exception :: " << _e.what() - << "\n"; - } - - return _fpath; -} - -std::string -format(std::string _fpath, const std::vector& _keys) -{ - if(_fpath.find('%') == std::string::npos && _fpath.find('$') == std::string::npos) - return _fpath; - - auto _ref = _fpath; - _fpath = format_impl(std::move(_fpath), _keys); - - return (_fpath == _ref) ? _fpath : format(std::move(_fpath), _keys); -} -} // namespace - -std::string -format(std::string _fpath, const std::string& _tag) -{ - return format(std::move(_fpath), output_keys(_tag)); + if(kernel_filter_include.empty()) kernel_filter_include = std::string{".*"}; } std::string @@ -566,11 +216,5 @@ initialize() { (void) get_config(); } - -output_key::output_key(std::string _key, std::string _val, std::string _desc) -: key{std::move(_key)} -, value{std::move(_val)} -, description{std::move(_desc)} -{} } // namespace tool } // namespace rocprofiler diff --git a/source/lib/rocprofiler-sdk-tool/config.hpp b/source/lib/rocprofiler-sdk-tool/config.hpp index d4557d3239..2785f2154d 100644 --- a/source/lib/rocprofiler-sdk-tool/config.hpp +++ b/source/lib/rocprofiler-sdk-tool/config.hpp @@ -26,6 +26,9 @@ #include "lib/common/environment.hpp" #include "lib/common/filesystem.hpp" #include "lib/common/mpl.hpp" +#include "lib/common/units.hpp" +#include "lib/output/format_path.hpp" +#include "lib/output/output_config.hpp" #include @@ -41,7 +44,6 @@ namespace rocprofiler { namespace tool { -namespace fs = common::filesystem; using common::get_env; struct config; @@ -49,11 +51,6 @@ struct config; enum class config_context { global = 0, - att_plugin, - cli_plugin, - ctf_plugin, - file_plugin, - perfetto_plugin, }; void @@ -66,66 +63,43 @@ get_config(); std::string format_name(std::string_view _name, const config& = get_config<>()); -std::string -format(std::string _fpath, const std::string& _tag = {}); - -int -get_mpi_size(); - -int -get_mpi_rank(); - -struct config +struct config : output_config { + using base_type = output_config; + config(); - bool demangle = get_env("ROCPROF_DEMANGLE_KERNELS", true); - bool truncate = get_env("ROCPROF_TRUNCATE_KERNELS", false); - bool kernel_trace = get_env("ROCPROF_KERNEL_TRACE", false); - bool hsa_core_api_trace = get_env("ROCPROF_HSA_CORE_API_TRACE", false); - bool hsa_amd_ext_api_trace = get_env("ROCPROF_HSA_AMD_EXT_API_TRACE", false); - bool hsa_image_ext_api_trace = get_env("ROCPROF_HSA_IMAGE_EXT_API_TRACE", false); - bool hsa_finalizer_ext_api_trace = get_env("ROCPROF_HSA_FINALIZER_EXT_API_TRACE", false); - bool marker_api_trace = get_env("ROCPROF_MARKER_API_TRACE", false); - bool memory_copy_trace = get_env("ROCPROF_MEMORY_COPY_TRACE", false); - bool scratch_memory_trace = get_env("ROCPROF_SCRATCH_MEMORY_TRACE", false); - bool counter_collection = get_env("ROCPROF_COUNTER_COLLECTION", false); - bool hip_runtime_api_trace = get_env("ROCPROF_HIP_RUNTIME_API_TRACE", false); - bool hip_compiler_api_trace = get_env("ROCPROF_HIP_COMPILER_API_TRACE", false); - bool rccl_api_trace = get_env("ROCPROF_RCCL_API_TRACE", false); - bool list_metrics = get_env("ROCPROF_LIST_METRICS", false); - bool list_metrics_output_file = get_env("ROCPROF_OUTPUT_LIST_METRICS_FILE", false); - bool stats = get_env("ROCPROF_STATS", false); - bool stats_summary = get_env("ROCPROF_STATS_SUMMARY", false); - bool stats_summary_per_domain = get_env("ROCPROF_STATS_SUMMARY_PER_DOMAIN", false); - bool csv_output = false; - bool json_output = false; - bool pftrace_output = false; - bool otf2_output = false; - bool summary_output = false; - bool kernel_rename = get_env("ROCPROF_KERNEL_RENAME", false); - int mpi_size = get_mpi_size(); - int mpi_rank = get_mpi_rank(); - size_t perfetto_shmem_size_hint = get_env("ROCPROF_PERFETTO_SHMEM_SIZE_HINT_KB", 64); - size_t perfetto_buffer_size = get_env("ROCPROF_PERFETTO_BUFFER_SIZE_KB", 1024000); - uint64_t stats_summary_unit_value = 1; - std::string stats_summary_unit = get_env("ROCPROF_STATS_SUMMARY_UNITS", "nsec"); - std::string output_path = get_env("ROCPROF_OUTPUT_PATH", fs::current_path().string()); - std::string output_file = - get_env("ROCPROF_OUTPUT_FILE_NAME", fmt::format("%hostname%/{}", getpid())); - std::string tmp_directory = get_env("ROCPROF_TMPDIR", output_path); - std::string stats_summary_file = get_env("ROCPROF_STATS_SUMMARY_OUTPUT", "stderr"); + ~config() = default; + config(const config&) = default; + config(config&&) noexcept = default; + config& operator=(const config&) = default; + config& operator=(config&&) noexcept = default; - std::string kernel_filter_include = - get_env("ROCPROF_KERNEL_FILTER_INCLUDE_REGEX", std::string{".*"}); - std::string kernel_filter_exclude = - get_env("ROCPROF_KERNEL_FILTER_EXCLUDE_REGEX", std::string{}); - std::string perfetto_buffer_fill_policy = - get_env("ROCPROF_PERFETTO_BUFFER_FILL_POLICY", std::string{"discard"}); - std::string perfetto_backend = get_env("ROCPROF_PERFETTO_BACKEND", std::string{"inprocess"}); - std::unordered_set kernel_filter_range = {}; - std::set counters = {}; - std::vector stats_summary_groups = {}; + bool demangle = get_env("ROCPROF_DEMANGLE_KERNELS", true); + bool truncate = get_env("ROCPROF_TRUNCATE_KERNELS", false); + bool kernel_trace = get_env("ROCPROF_KERNEL_TRACE", false); + bool hsa_core_api_trace = get_env("ROCPROF_HSA_CORE_API_TRACE", false); + bool hsa_amd_ext_api_trace = get_env("ROCPROF_HSA_AMD_EXT_API_TRACE", false); + bool hsa_image_ext_api_trace = get_env("ROCPROF_HSA_IMAGE_EXT_API_TRACE", false); + bool hsa_finalizer_ext_api_trace = get_env("ROCPROF_HSA_FINALIZER_EXT_API_TRACE", false); + bool marker_api_trace = get_env("ROCPROF_MARKER_API_TRACE", false); + bool memory_copy_trace = get_env("ROCPROF_MEMORY_COPY_TRACE", false); + bool scratch_memory_trace = get_env("ROCPROF_SCRATCH_MEMORY_TRACE", false); + bool counter_collection = get_env("ROCPROF_COUNTER_COLLECTION", false); + bool hip_runtime_api_trace = get_env("ROCPROF_HIP_RUNTIME_API_TRACE", false); + bool hip_compiler_api_trace = get_env("ROCPROF_HIP_COMPILER_API_TRACE", false); + bool rccl_api_trace = get_env("ROCPROF_RCCL_API_TRACE", false); + bool list_metrics = get_env("ROCPROF_LIST_METRICS", false); + bool list_metrics_output_file = get_env("ROCPROF_OUTPUT_LIST_METRICS_FILE", false); + + int mpi_size = get_mpi_size(); + int mpi_rank = get_mpi_rank(); + + std::string kernel_filter_include = get_env("ROCPROF_KERNEL_FILTER_INCLUDE_REGEX", ".*"); + std::string kernel_filter_exclude = get_env("ROCPROF_KERNEL_FILTER_EXCLUDE_REGEX", ""); + + std::unordered_set kernel_filter_range = {}; + std::set counters = {}; template void save(ArchiveT&) const; @@ -142,8 +116,6 @@ config::save(ArchiveT& ar) const #define CFG_SERIALIZE_MEMBER(VAR) ar(cereal::make_nvp(#VAR, VAR)) #define CFG_SERIALIZE_NAMED_MEMBER(NAME, VAR) ar(cereal::make_nvp(NAME, VAR)) - CFG_SERIALIZE_MEMBER(demangle); - CFG_SERIALIZE_MEMBER(truncate); CFG_SERIALIZE_MEMBER(kernel_trace); CFG_SERIALIZE_MEMBER(hsa_core_api_trace); CFG_SERIALIZE_MEMBER(hsa_amd_ext_api_trace); @@ -156,29 +128,14 @@ config::save(ArchiveT& ar) const CFG_SERIALIZE_MEMBER(hip_runtime_api_trace); CFG_SERIALIZE_MEMBER(hip_compiler_api_trace); CFG_SERIALIZE_MEMBER(kernel_rename); - - CFG_SERIALIZE_NAMED_MEMBER("summary", stats_summary); - CFG_SERIALIZE_NAMED_MEMBER("summary_per_domain", stats_summary_per_domain); - CFG_SERIALIZE_NAMED_MEMBER("summary_groups", stats_summary_groups); - CFG_SERIALIZE_NAMED_MEMBER("summary_unit", stats_summary_unit); - CFG_SERIALIZE_NAMED_MEMBER("summary_file", stats_summary_file); - - CFG_SERIALIZE_MEMBER(perfetto_shmem_size_hint); - CFG_SERIALIZE_MEMBER(perfetto_buffer_size); - CFG_SERIALIZE_MEMBER(perfetto_buffer_fill_policy); - CFG_SERIALIZE_MEMBER(perfetto_backend); - - CFG_SERIALIZE_NAMED_MEMBER("raw_tmp_directory", tmp_directory); - CFG_SERIALIZE_NAMED_MEMBER("raw_output_path", output_path); - CFG_SERIALIZE_NAMED_MEMBER("raw_output_file", output_file); - CFG_SERIALIZE_NAMED_MEMBER("tmp_directory", format(tmp_directory)); - CFG_SERIALIZE_NAMED_MEMBER("output_path", format(output_path)); - CFG_SERIALIZE_NAMED_MEMBER("output_file", format(output_file)); - CFG_SERIALIZE_MEMBER(counters); CFG_SERIALIZE_MEMBER(kernel_filter_include); CFG_SERIALIZE_MEMBER(kernel_filter_exclude); CFG_SERIALIZE_MEMBER(kernel_filter_range); + CFG_SERIALIZE_MEMBER(demangle); + CFG_SERIALIZE_MEMBER(truncate); + + static_cast(*this).save(ar); #undef CFG_SERIALIZE_MEMBER #undef CFG_SERIALIZE_NAMED_MEMBER @@ -200,31 +157,5 @@ get_config() return *_v; } } - -struct output_key -{ - output_key(std::string _key, std::string _val, std::string _desc = {}); - - template ::value, int> = 0> - output_key(std::string _key, Tp&& _val, std::string _desc = {}); - - operator std::pair() const; - - std::string key = {}; - std::string value = {}; - std::string description = {}; -}; - -template ::value, int>> -output_key::output_key(std::string _key, Tp&& _val, std::string _desc) -: key{std::move(_key)} -, value{fmt::format("{}", std::forward(_val))} -, description{std::move(_desc)} -{} - -std::vector -output_keys(std::string _tag = {}); } // namespace tool } // namespace rocprofiler diff --git a/source/lib/rocprofiler-sdk-tool/generateCSV.cpp b/source/lib/rocprofiler-sdk-tool/generateCSV.cpp deleted file mode 100644 index 96ded3704c..0000000000 --- a/source/lib/rocprofiler-sdk-tool/generateCSV.cpp +++ /dev/null @@ -1,662 +0,0 @@ -// MIT License -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#include "generateCSV.hpp" -#include "config.hpp" -#include "csv.hpp" -#include "generateStats.hpp" -#include "helper.hpp" -#include "statistics.hpp" - -#include -#include -#include - -#include -#include -#include -#include - -namespace rocprofiler -{ -namespace tool -{ -namespace -{ -tool::output_file -get_stats_output_file(std::string name) -{ - return tool::output_file{std::move(name), - tool::csv::stats_csv_encoder{}, - { - "Name", - "Calls", - "TotalDurationNs", - "AverageNs", - "Percentage", - "MinNs", - "MaxNs", - "StdDev", - }}; -} - -void -write_stats(output_file&& ofs, const stats_entry_vec_t& data_v) -{ - auto data = stats_entry_vec_t{}; - auto _duration = stats_data_t{}; - for(const auto& [id, value] : data_v) - { - data.emplace_back(id, value); - _duration += value; - } - - std::sort(data.begin(), data.end(), [](const auto& lhs, const auto& rhs) { - return (lhs.second.get_sum() > rhs.second.get_sum()); - }); - - constexpr float_type one_hundred = 100.0; - - const float_type _total_duration = _duration.get_sum(); - for(const auto& [name, value] : data) - { - auto duration_ns = value.get_sum(); - auto calls = value.get_count(); - float_type avg_ns = value.get_mean(); - float_type percent_v = (duration_ns / _total_duration) * one_hundred; - - auto _row = std::stringstream{}; - rocprofiler::tool::csv::stats_csv_encoder::write_row(_row, - name, - calls, - duration_ns, - avg_ns, - percentage{percent_v}, - value.get_min(), - value.get_max(), - value.get_stddev()); - ofs << _row.str() << std::flush; - } -} -} // namespace - -void -generate_csv(tool_table* /*tool_functions*/, std::vector& data) -{ - if(data.empty()) return; - - std::sort(data.begin(), data.end(), [](rocprofiler_agent_v0_t lhs, rocprofiler_agent_v0_t rhs) { - return lhs.node_id < rhs.node_id; - }); - - auto ofs = tool::output_file{"agent_info", - tool::csv::agent_info_csv_encoder{}, - {"Node_Id", - "Logical_Node_Id", - "Agent_Type", - "Cpu_Cores_Count", - "Simd_Count", - "Cpu_Core_Id_Base", - "Simd_Id_Base", - "Max_Waves_Per_Simd", - "Lds_Size_In_Kb", - "Gds_Size_In_Kb", - "Num_Gws", - "Wave_Front_Size", - "Num_Xcc", - "Cu_Count", - "Array_Count", - "Num_Shader_Banks", - "Simd_Arrays_Per_Engine", - "Cu_Per_Simd_Array", - "Simd_Per_Cu", - "Max_Slots_Scratch_Cu", - "Gfx_Target_Version", - "Vendor_Id", - "Device_Id", - "Location_Id", - "Domain", - "Drm_Render_Minor", - "Num_Sdma_Engines", - "Num_Sdma_Xgmi_Engines", - "Num_Sdma_Queues_Per_Engine", - "Num_Cp_Queues", - "Max_Engine_Clk_Ccompute", - "Max_Engine_Clk_Fcompute", - "Sdma_Fw_Version", - "Fw_Version", - "Capability", - "Cu_Per_Engine", - "Max_Waves_Per_Cu", - "Family_Id", - "Workgroup_Max_Size", - "Grid_Max_Size", - "Local_Mem_Size", - "Hive_Id", - "Gpu_Id", - "Workgroup_Max_Dim_X", - "Workgroup_Max_Dim_Y", - "Workgroup_Max_Dim_Z", - "Grid_Max_Dim_X", - "Grid_Max_Dim_Y", - "Grid_Max_Dim_Z", - "Name", - "Vendor_Name", - "Product_Name", - "Model_Name"}}; - - for(auto& itr : data) - { - auto _type = std::string_view{}; - if(itr.type == ROCPROFILER_AGENT_TYPE_CPU) - _type = "CPU"; - else if(itr.type == ROCPROFILER_AGENT_TYPE_GPU) - _type = "GPU"; - else - _type = "UNK"; - - auto row_ss = std::stringstream{}; - rocprofiler::tool::csv::agent_info_csv_encoder::write_row(row_ss, - itr.node_id, - itr.logical_node_id, - _type, - itr.cpu_cores_count, - itr.simd_count, - itr.cpu_core_id_base, - itr.simd_id_base, - itr.max_waves_per_simd, - itr.lds_size_in_kb, - itr.gds_size_in_kb, - itr.num_gws, - itr.wave_front_size, - itr.num_xcc, - itr.cu_count, - itr.array_count, - itr.num_shader_banks, - itr.simd_arrays_per_engine, - itr.cu_per_simd_array, - itr.simd_per_cu, - itr.max_slots_scratch_cu, - itr.gfx_target_version, - itr.vendor_id, - itr.device_id, - itr.location_id, - itr.domain, - itr.drm_render_minor, - itr.num_sdma_engines, - itr.num_sdma_xgmi_engines, - itr.num_sdma_queues_per_engine, - itr.num_cp_queues, - itr.max_engine_clk_ccompute, - itr.max_engine_clk_fcompute, - itr.sdma_fw_version.Value, - itr.fw_version.Value, - itr.capability.Value, - itr.cu_per_engine, - itr.max_waves_per_cu, - itr.family_id, - itr.workgroup_max_size, - itr.grid_max_size, - itr.local_mem_size, - itr.hive_id, - itr.gpu_id, - itr.workgroup_max_dim.x, - itr.workgroup_max_dim.y, - itr.workgroup_max_dim.z, - itr.grid_max_dim.x, - itr.grid_max_dim.y, - itr.grid_max_dim.z, - itr.name, - itr.vendor_name, - itr.product_name, - itr.model_name); - ofs << row_ss.str(); - } -} - -void -generate_csv(tool_table* tool_functions, - const std::deque& data, - const stats_entry_t& stats) -{ - if(data.empty()) return; - - if(tool::get_config().stats && stats) - write_stats(get_stats_output_file("kernel_stats"), stats.entries); - - auto ofs = tool::output_file{"kernel_trace", - tool::csv::kernel_trace_csv_encoder{}, - {"Kind", - "Agent_Id", - "Queue_Id", - "Thread_Id", - "Dispatch_Id", - "Kernel_Id", - "Kernel_Name", - "Correlation_Id", - "Start_Timestamp", - "End_Timestamp", - "Private_Segment_Size", - "Group_Segment_Size", - "Workgroup_Size_X", - "Workgroup_Size_Y", - "Workgroup_Size_Z", - "Grid_Size_X", - "Grid_Size_Y", - "Grid_Size_Z"}}; - - for(const auto& record : data) - { - auto row_ss = std::stringstream{}; - auto kernel_name = tool_functions->tool_get_kernel_name_fn( - record.dispatch_info.kernel_id, record.correlation_id.external.value); - rocprofiler::tool::csv::kernel_trace_csv_encoder::write_row( - row_ss, - tool_functions->tool_get_domain_name_fn(record.kind), - tool_functions->tool_get_agent_node_id_fn(record.dispatch_info.agent_id), - record.dispatch_info.queue_id.handle, - record.thread_id, - record.dispatch_info.dispatch_id, - record.dispatch_info.kernel_id, - kernel_name, - record.correlation_id.internal, - record.start_timestamp, - record.end_timestamp, - record.dispatch_info.private_segment_size, - record.dispatch_info.group_segment_size, - record.dispatch_info.workgroup_size.x, - record.dispatch_info.workgroup_size.y, - record.dispatch_info.workgroup_size.z, - record.dispatch_info.grid_size.x, - record.dispatch_info.grid_size.y, - record.dispatch_info.grid_size.z); - - ofs << row_ss.str(); - } -} - -void -generate_csv(tool_table* tool_functions, - const std::deque& data, - const stats_entry_t& stats) -{ - if(data.empty()) return; - - if(tool::get_config().stats && stats) - write_stats(get_stats_output_file("hip_api_stats"), stats.entries); - - auto ofs = tool::output_file{"hip_api_trace", - tool::csv::api_csv_encoder{}, - {"Domain", - "Function", - "Process_Id", - "Thread_Id", - "Correlation_Id", - "Start_Timestamp", - "End_Timestamp"}}; - for(const auto& record : data) - { - auto row_ss = std::stringstream{}; - auto api_name = tool_functions->tool_get_operation_name_fn(record.kind, record.operation); - rocprofiler::tool::csv::api_csv_encoder::write_row( - row_ss, - tool_functions->tool_get_domain_name_fn(record.kind), - api_name, - getpid(), - record.thread_id, - record.correlation_id.internal, - record.start_timestamp, - record.end_timestamp); - - ofs << row_ss.str(); - } -} - -void -generate_csv(tool_table* tool_functions, - const std::deque& data, - const stats_entry_t& stats) -{ - if(data.empty()) return; - - if(tool::get_config().stats && stats) - write_stats(get_stats_output_file("hsa_api_stats"), stats.entries); - - auto ofs = tool::output_file{"hsa_api_trace", - tool::csv::api_csv_encoder{}, - {"Domain", - "Function", - "Process_Id", - "Thread_Id", - "Correlation_Id", - "Start_Timestamp", - "End_Timestamp"}}; - - for(const auto& record : data) - { - auto row_ss = std::stringstream{}; - auto api_name = tool_functions->tool_get_operation_name_fn(record.kind, record.operation); - rocprofiler::tool::csv::api_csv_encoder::write_row( - row_ss, - tool_functions->tool_get_domain_name_fn(record.kind), - api_name, - getpid(), - record.thread_id, - record.correlation_id.internal, - record.start_timestamp, - record.end_timestamp); - - ofs << row_ss.str(); - } -} - -void -generate_csv(tool_table* tool_functions, - const std::deque& data, - const stats_entry_t& stats) -{ - if(data.empty()) return; - - if(tool::get_config().stats && stats) - write_stats(get_stats_output_file("memory_copy_stats"), stats.entries); - - auto ofs = tool::output_file{"memory_copy_trace", - tool::csv::memory_copy_csv_encoder{}, - {"Kind", - "Direction", - "Source_Agent_Id", - "Destination_Agent_Id", - "Correlation_Id", - "Start_Timestamp", - "End_Timestamp"}}; - for(const auto& record : data) - { - auto row_ss = std::stringstream{}; - auto api_name = tool_functions->tool_get_operation_name_fn(record.kind, record.operation); - rocprofiler::tool::csv::memory_copy_csv_encoder::write_row( - row_ss, - tool_functions->tool_get_domain_name_fn(record.kind), - api_name, - tool_functions->tool_get_agent_node_id_fn(record.src_agent_id), - tool_functions->tool_get_agent_node_id_fn(record.dst_agent_id), - record.correlation_id.internal, - record.start_timestamp, - record.end_timestamp); - - ofs << row_ss.str(); - } -} - -void -generate_csv(tool_table* tool_functions, - const std::deque& data, - const stats_entry_t& stats) -{ - if(data.empty()) return; - - if(tool::get_config().stats && stats) - write_stats(get_stats_output_file("marker_api_stats"), stats.entries); - - auto ofs = tool::output_file{"marker_api_trace", - tool::csv::marker_csv_encoder{}, - {"Domain", - "Function", - "Process_Id", - "Thread_Id", - "Correlation_Id", - "Start_Timestamp", - "End_Timestamp"}}; - for(const auto& record : data) - { - auto row_ss = std::stringstream{}; - auto _name = std::string_view{}; - - if(record.kind == ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API && - (record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxMarkA || - record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangePushA || - record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangeStartA)) - { - _name = tool_functions->tool_get_roctx_msg_fn(record.correlation_id.internal); - } - else - { - _name = tool_functions->tool_get_operation_name_fn(record.kind, record.operation); - } - - tool::csv::marker_csv_encoder::write_row( - row_ss, - tool_functions->tool_get_domain_name_fn(record.kind), - _name, - getpid(), - record.thread_id, - record.correlation_id.internal, - record.start_timestamp, - record.end_timestamp); - - ofs << row_ss.str(); - } -} - -void -generate_csv(tool_table* tool_functions, - const std::deque& data, - const stats_entry_t& stats) -{ - if(data.empty()) return; - - if(tool::get_config().stats && stats) - write_stats(get_stats_output_file("counter_collection_stats"), stats.entries); - - auto ofs = tool::output_file{"counter_collection", - tool::csv::counter_collection_csv_encoder{}, - {"Correlation_Id", - "Dispatch_Id", - "Agent_Id", - "Queue_Id", - "Process_Id", - "Thread_Id", - "Grid_Size", - "Kernel_Id", - "Kernel_Name", - "Workgroup_Size", - "LDS_Block_Size", - "Scratch_Size", - "VGPR_Count", - "SGPR_Count", - "Counter_Name", - "Counter_Value", - "Start_Timestamp", - "End_Timestamp"}}; - for(const auto& record : data) - { - auto kernel_id = record.dispatch_data.dispatch_info.kernel_id; - auto counter_name_value = std::map{}; - for(uint64_t i = 0; i < record.counter_count; i++) - { - const auto& count = record.records.at(i); - auto rec = count.record_counter; - std::string counter_name = tool_functions->tool_get_counter_info_name_fn(rec.id); - auto search = counter_name_value.find(counter_name); - if(search == counter_name_value.end()) - counter_name_value.emplace( - std::pair{counter_name, rec.counter_value}); - else - search->second = search->second + rec.counter_value; - } - - const auto& correlation_id = record.dispatch_data.correlation_id; - - auto magnitude = [](rocprofiler_dim3_t dims) { return (dims.x * dims.y * dims.z); }; - auto row_ss = std::stringstream{}; - for(auto& itr : counter_name_value) - { - tool::csv::counter_collection_csv_encoder::write_row( - row_ss, - correlation_id.internal, - record.dispatch_data.dispatch_info.dispatch_id, - tool_functions->tool_get_agent_node_id_fn( - record.dispatch_data.dispatch_info.agent_id), - record.dispatch_data.dispatch_info.queue_id.handle, - getpid(), - record.thread_id, - magnitude(record.dispatch_data.dispatch_info.grid_size), - record.dispatch_data.dispatch_info.kernel_id, - tool_functions->tool_get_kernel_name_fn(kernel_id, correlation_id.external.value), - magnitude(record.dispatch_data.dispatch_info.workgroup_size), - record.lds_block_size_v, - record.dispatch_data.dispatch_info.private_segment_size, - record.arch_vgpr_count, - record.sgpr_count, - itr.first, - itr.second, - record.dispatch_data.start_timestamp, - record.dispatch_data.end_timestamp); - } - ofs << row_ss.str(); - } -} - -void -generate_csv(tool_table* tool_functions, - const std::deque& data, - const stats_entry_t& stats) -{ - if(data.empty()) return; - - if(tool::get_config().stats && stats) - write_stats(get_stats_output_file("scratch_memory_stats"), stats.entries); - - auto ofs = tool::output_file{"scratch_memory_trace", - tool::csv::scratch_memory_encoder{}, - { - "Kind", - "Operation", - "Agent_Id", - "Queue_Id", - "Thread_Id", - "Alloc_flags", - "Start_Timestamp", - "End_Timestamp", - }}; - - for(const auto& record : data) - { - auto row_ss = std::stringstream{}; - auto kind_name = tool_functions->tool_get_domain_name_fn(record.kind); - auto op_name = tool_functions->tool_get_operation_name_fn(record.kind, record.operation); - - tool::csv::scratch_memory_encoder::write_row( - row_ss, - kind_name, - op_name, - tool_functions->tool_get_agent_node_id_fn(record.agent_id), - record.queue_id.handle, - record.thread_id, - record.flags, - record.start_timestamp, - record.end_timestamp); - - ofs << row_ss.str(); - } -} - -void -generate_csv(tool_table* tool_functions, - const std::deque& data, - const stats_entry_t& stats) -{ - if(data.empty()) return; - - if(tool::get_config().stats && stats) - write_stats(get_stats_output_file("rccl_api_stats"), stats.entries); - - auto ofs = tool::output_file{"rccl_api_trace", - tool::csv::api_csv_encoder{}, - {"Domain", - "Function", - "Process_Id", - "Thread_Id", - "Correlation_Id", - "Start_Timestamp", - "End_Timestamp"}}; - for(const auto& record : data) - { - auto row_ss = std::stringstream{}; - auto api_name = tool_functions->tool_get_operation_name_fn(record.kind, record.operation); - rocprofiler::tool::csv::api_csv_encoder::write_row( - row_ss, - tool_functions->tool_get_domain_name_fn(record.kind), - api_name, - getpid(), - record.thread_id, - record.correlation_id.internal, - record.start_timestamp, - record.end_timestamp); - - ofs << row_ss.str(); - } -} - -void -generate_csv(tool_table* /*tool_functions*/, const domain_stats_vec_t& data_v) -{ - using csv_encoder_t = rocprofiler::tool::csv::stats_csv_encoder; - - if(!tool::get_config().stats) return; - - auto _data = data_v; - auto _total_stats = stats_data_t{}; - for(const auto& itr : _data) - _total_stats += itr.second.total; - - if(_total_stats.get_count() == 0) return; - - std::sort(_data.begin(), _data.end(), [](const auto& lhs, const auto& rhs) { - return (lhs.second.total.get_sum() > rhs.second.total.get_sum()); - }); - - auto ofs = get_stats_output_file("domain_stats"); - - const float_type _total_duration = _total_stats.get_sum(); - for(const auto& [type, value] : _data) - { - auto name = get_domain_column_name(type); - auto duration_ns = value.total.get_sum(); - auto calls = value.total.get_count(); - auto avg_ns = value.total.get_mean(); - auto percent_v = value.total.get_percent(_total_duration); - - auto _row = std::stringstream{}; - csv_encoder_t::write_row(_row, - name, - calls, - duration_ns, - avg_ns, - percentage{percent_v}, - value.total.get_min(), - value.total.get_max(), - value.total.get_stddev()); - ofs << _row.str() << std::flush; - } -} -} // namespace tool -} // namespace rocprofiler diff --git a/source/lib/rocprofiler-sdk-tool/generateCSV.hpp b/source/lib/rocprofiler-sdk-tool/generateCSV.hpp deleted file mode 100644 index 743ed0446b..0000000000 --- a/source/lib/rocprofiler-sdk-tool/generateCSV.hpp +++ /dev/null @@ -1,82 +0,0 @@ -// MIT License -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#pragma once - -#include "generateStats.hpp" -#include "helper.hpp" -#include "rocprofiler-sdk/buffer_tracing.h" -#include "statistics.hpp" - -#include - -namespace rocprofiler -{ -namespace tool -{ -void -generate_csv(tool_table* tool_functions, std::vector& data); - -void -generate_csv(tool_table* tool_functions, - const std::deque& data, - const stats_entry_t& stats); - -void -generate_csv(tool_table* tool_functions, - const std::deque& data, - const stats_entry_t& stats); - -void -generate_csv(tool_table* tool_functions, - const std::deque& data, - const stats_entry_t& stats); - -void -generate_csv(tool_table* tool_functions, - const std::deque& data, - const stats_entry_t& stats); - -void -generate_csv(tool_table* tool_functions, - const std::deque& data, - const stats_entry_t& stats); - -void -generate_csv(tool_table* tool_functions, - const std::deque& data, - const stats_entry_t& stats); - -void -generate_csv(tool_table* tool_functions, - const std::deque& data, - const stats_entry_t& stats); - -void -generate_csv(tool_table* tool_functions, - const std::deque& data, - const stats_entry_t& stats); - -void -generate_csv(tool_table* tool_functions, const domain_stats_vec_t& data); -} // namespace tool -} // namespace rocprofiler diff --git a/source/lib/rocprofiler-sdk-tool/generateJSON.cpp b/source/lib/rocprofiler-sdk-tool/generateJSON.cpp deleted file mode 100644 index b431744741..0000000000 --- a/source/lib/rocprofiler-sdk-tool/generateJSON.cpp +++ /dev/null @@ -1,189 +0,0 @@ -// MIT License -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#include "generateJSON.hpp" -#include "config.hpp" -#include "helper.hpp" -#include "output_file.hpp" -#include "statistics.hpp" - -#include "lib/common/string_entry.hpp" -#include "lib/common/utility.hpp" - -#include -#include - -#include - -namespace rocprofiler -{ -namespace tool -{ -void -write_json(tool_table* tool_functions, - uint64_t pid, - const domain_stats_vec_t& domain_stats, - std::vector agent_data, - std::vector counter_data, - std::deque* hip_api_deque, - std::deque* hsa_api_deque, - std::deque* kernel_dispatch_deque, - std::deque* memory_copy_deque, - std::deque* counter_collection_deque, - std::deque* marker_api_deque, - std::deque* scratch_memory_deque, - std::deque* rccl_api_deque) - -{ - using JSONOutputArchive = cereal::MinimalJSONOutputArchive; - - constexpr auto json_prec = 32; - constexpr auto json_indent = JSONOutputArchive::Options::IndentChar::space; - auto json_opts = JSONOutputArchive::Options{json_prec, json_indent, 1}; - auto filename = std::string_view{"results"}; - auto ofs = get_output_stream(filename, ".json"); - - { - auto json_ar = JSONOutputArchive{*ofs.stream, json_opts}; - json_ar.setNextName("rocprofiler-sdk-tool"); - json_ar.startNode(); - - json_ar.makeArray(); - json_ar.startNode(); - - // metadata - { - json_ar.setNextName("metadata"); - json_ar.startNode(); - auto* timestamps = tool_functions->tool_get_app_timestamps_fn(); - json_ar(cereal::make_nvp("pid", pid)); - json_ar(cereal::make_nvp("init_time", timestamps->app_start_time)); - json_ar(cereal::make_nvp("fini_time", timestamps->app_end_time)); - json_ar(cereal::make_nvp("config", get_config())); - json_ar(cereal::make_nvp("command", common::read_command_line(getpid()))); - json_ar.finishNode(); - } - - // summary - { - json_ar.setNextName("summary"); - json_ar.startNode(); - json_ar.makeArray(); - - for(const auto& itr : domain_stats) - { - auto _name = get_domain_column_name(itr.first); - json_ar.startNode(); - - json_ar(cereal::make_nvp("domain", std::string{_name})); - json_ar(cereal::make_nvp("stats", itr.second)); - // itr.second.serialize(json_ar, 0); - - json_ar.finishNode(); - } - - json_ar.finishNode(); - } - - json_ar(cereal::make_nvp("agents", agent_data)); - json_ar(cereal::make_nvp("counters", counter_data)); - - { - auto callback_name_info = get_callback_id_names(); - auto buffer_name_info = get_buffer_id_names(); - auto counter_dims = get_tool_counter_dimension_info(); - auto marker_msg_data = get_callback_roctx_msg(); - - json_ar.setNextName("strings"); - json_ar.startNode(); - json_ar(cereal::make_nvp("callback_records", callback_name_info)); - json_ar(cereal::make_nvp("buffer_records", buffer_name_info)); - json_ar(cereal::make_nvp("marker_api", marker_msg_data)); - - { - auto _extern_corr_id_strings = std::map{}; - if(tool::get_config().kernel_rename) - { - for(auto itr : *kernel_dispatch_deque) - { - auto _value = itr.correlation_id.external.value; - if(_value > 0) - { - const auto* _str = common::get_string_entry(_value); - if(_str) _extern_corr_id_strings.emplace(_value, *_str); - } - } - } - - json_ar.setNextName("correlation_id"); - json_ar.startNode(); - json_ar(cereal::make_nvp("external", _extern_corr_id_strings)); - json_ar.finishNode(); - } - - { - json_ar.setNextName("counters"); - json_ar.startNode(); - json_ar(cereal::make_nvp("dimension_ids", counter_dims)); - json_ar.finishNode(); - } - - json_ar.finishNode(); - } - - { - auto kern_sym_data = get_kernel_symbol_data(); - auto code_obj_data = get_code_object_data(); - - json_ar(cereal::make_nvp("code_objects", code_obj_data)); - json_ar(cereal::make_nvp("kernel_symbols", kern_sym_data)); - } - - { - json_ar.setNextName("callback_records"); - json_ar.startNode(); - json_ar(cereal::make_nvp("counter_collection", *counter_collection_deque)); - json_ar.finishNode(); - } - - { - json_ar.setNextName("buffer_records"); - json_ar.startNode(); - json_ar(cereal::make_nvp("kernel_dispatch", *kernel_dispatch_deque)); - json_ar(cereal::make_nvp("hip_api", *hip_api_deque)); - json_ar(cereal::make_nvp("hsa_api", *hsa_api_deque)); - json_ar(cereal::make_nvp("marker_api", *marker_api_deque)); - json_ar(cereal::make_nvp("rccl_api", *rccl_api_deque)); - json_ar(cereal::make_nvp("memory_copy", *memory_copy_deque)); - json_ar(cereal::make_nvp("scratch_memory", *scratch_memory_deque)); - json_ar.finishNode(); - } - - json_ar.finishNode(); // end array - json_ar.finishNode(); - } - - ofs.close(); -} - -} // namespace tool -} // namespace rocprofiler diff --git a/source/lib/rocprofiler-sdk-tool/generatePerfetto.cpp b/source/lib/rocprofiler-sdk-tool/generatePerfetto.cpp deleted file mode 100644 index adec68f39b..0000000000 --- a/source/lib/rocprofiler-sdk-tool/generatePerfetto.cpp +++ /dev/null @@ -1,593 +0,0 @@ -// MIT License -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#include "generatePerfetto.hpp" -#include "config.hpp" -#include "helper.hpp" -#include "output_file.hpp" - -#include "lib/common/utility.hpp" - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -namespace rocprofiler -{ -namespace tool -{ -namespace -{ -auto main_tid = common::get_tid(); - -template -size_t -get_hash_id(Tp&& _val) -{ - if constexpr(!std::is_pointer::value) - return std::hash{}(std::forward(_val)); - else if constexpr(std::is_same::value) - return get_hash_id(std::string_view{_val}); - else - return get_hash_id(*_val); -} -} // namespace - -void -write_perfetto( - tool_table* tool_functions, - uint64_t /*pid*/, - std::vector agent_data, - std::deque* hip_api_data, - std::deque* hsa_api_data, - std::deque* kernel_dispatch_data, - std::deque* memory_copy_data, - std::deque* marker_api_data, - std::deque* /*scratch_memory_data*/, - std::deque* rccl_api_data) -{ - namespace sdk = ::rocprofiler::sdk; - - auto agents_map = std::unordered_map{}; - for(auto itr : agent_data) - agents_map.emplace(itr.id, itr); - - auto args = ::perfetto::TracingInitArgs{}; - auto track_event_cfg = ::perfetto::protos::gen::TrackEventConfig{}; - auto cfg = ::perfetto::TraceConfig{}; - - // environment settings - auto shmem_size_hint = get_config().perfetto_shmem_size_hint; - auto buffer_size_kb = get_config().perfetto_buffer_size; - - auto* buffer_config = cfg.add_buffers(); - buffer_config->set_size_kb(buffer_size_kb); - - if(get_config().perfetto_buffer_fill_policy == "discard" || - get_config().perfetto_buffer_fill_policy.empty()) - buffer_config->set_fill_policy( - ::perfetto::protos::gen::TraceConfig_BufferConfig_FillPolicy_DISCARD); - else if(get_config().perfetto_buffer_fill_policy == "ring_buffer") - buffer_config->set_fill_policy( - ::perfetto::protos::gen::TraceConfig_BufferConfig_FillPolicy_RING_BUFFER); - else - ROCP_FATAL << "Unsupport perfetto buffer fill policy: '" - << get_config().perfetto_buffer_fill_policy - << "'. Supported: discard, ring_buffer"; - - auto* ds_cfg = cfg.add_data_sources()->mutable_config(); - ds_cfg->set_name("track_event"); // this MUST be track_event - ds_cfg->set_track_event_config_raw(track_event_cfg.SerializeAsString()); - - args.shmem_size_hint_kb = shmem_size_hint; - - if(get_config().perfetto_backend == "inprocess" || get_config().perfetto_backend.empty()) - args.backends |= ::perfetto::kInProcessBackend; - else if(get_config().perfetto_backend == "system") - args.backends |= ::perfetto::kSystemBackend; - else - ROCP_FATAL << "Unsupport perfetto backend: '" << get_config().perfetto_backend - << "'. Supported: inprocess, system"; - - ::perfetto::Tracing::Initialize(args); - ::perfetto::TrackEvent::Register(); - - auto tracing_session = ::perfetto::Tracing::NewTrace(); - - tracing_session->Setup(cfg); - tracing_session->StartBlocking(); - - auto tids = std::set{}; - auto demangled = std::unordered_map{}; - auto agent_thread_ids = std::unordered_map>{}; - auto agent_queue_ids = - std::unordered_map>{}; - auto thread_indexes = std::unordered_map{}; - auto kernel_sym_data = get_kernel_symbol_data(); - - auto thread_tracks = std::unordered_map{}; - auto agent_thread_tracks = - std::unordered_map>{}; - auto agent_queue_tracks = - std::unordered_map>{}; - - auto _get_agent = [&agent_data](rocprofiler_agent_id_t _id) -> const rocprofiler_agent_t* { - for(const auto& itr : agent_data) - { - if(_id == itr.id) return &itr; - } - return CHECK_NOTNULL(nullptr); - }; - - { - for(auto itr : *hsa_api_data) - tids.emplace(itr.thread_id); - for(auto itr : *hip_api_data) - tids.emplace(itr.thread_id); - for(auto itr : *marker_api_data) - tids.emplace(itr.thread_id); - for(auto itr : *rccl_api_data) - tids.emplace(itr.thread_id); - - for(auto itr : *memory_copy_data) - { - tids.emplace(itr.thread_id); - agent_thread_ids[itr.dst_agent_id].emplace(itr.thread_id); - } - - for(auto itr : *kernel_dispatch_data) - { - tids.emplace(itr.thread_id); - agent_queue_ids[itr.dispatch_info.agent_id].emplace(itr.dispatch_info.queue_id); - } - } - - uint64_t nthrn = 0; - for(auto itr : tids) - { - if(itr == main_tid) - { - thread_indexes.emplace(main_tid, 0); - thread_tracks.emplace(main_tid, ::perfetto::ThreadTrack::Current()); - } - else - { - auto _idx = ++nthrn; - thread_indexes.emplace(itr, _idx); - auto _track = ::perfetto::Track{itr}; - auto _desc = _track.Serialize(); - auto _namess = std::stringstream{}; - _namess << "THREAD " << _idx << " (" << itr << ")"; - _desc.set_name(_namess.str()); - perfetto::TrackEvent::SetTrackDescriptor(_track, _desc); - - thread_tracks.emplace(itr, _track); - } - } - - for(const auto& itr : agent_thread_ids) - { - const auto* _agent = _get_agent(itr.first); - - for(auto titr : itr.second) - { - auto _namess = std::stringstream{}; - _namess << "COPY to AGENT [" << _agent->logical_node_id << "] THREAD [" - << thread_indexes.at(titr) << "] "; - - if(_agent->type == ROCPROFILER_AGENT_TYPE_CPU) - _namess << "(CPU)"; - else if(_agent->type == ROCPROFILER_AGENT_TYPE_GPU) - _namess << "(GPU)"; - else - _namess << "(UNK)"; - - auto _track = ::perfetto::Track{get_hash_id(_namess.str())}; - auto _desc = _track.Serialize(); - _desc.set_name(_namess.str()); - - perfetto::TrackEvent::SetTrackDescriptor(_track, _desc); - - agent_thread_tracks[itr.first].emplace(titr, _track); - } - } - - for(const auto& aitr : agent_queue_ids) - { - uint32_t nqueue = 0; - for(auto qitr : aitr.second) - { - const auto* _agent = _get_agent(aitr.first); - - auto _namess = std::stringstream{}; - _namess << "COMPUTE AGENT [" << _agent->logical_node_id << "] QUEUE [" << nqueue++ - << "] "; - - if(_agent->type == ROCPROFILER_AGENT_TYPE_CPU) - _namess << "(CPU)"; - else if(_agent->type == ROCPROFILER_AGENT_TYPE_GPU) - _namess << "(GPU)"; - else - _namess << "(UNK)"; - - auto _track = ::perfetto::Track{get_hash_id(_namess.str())}; - auto _desc = _track.Serialize(); - _desc.set_name(_namess.str()); - - perfetto::TrackEvent::SetTrackDescriptor(_track, _desc); - - agent_queue_tracks[aitr.first].emplace(qitr, _track); - } - } - - // trace events - { - auto buffer_names = sdk::get_buffer_tracing_names(); - auto callbk_name_info = sdk::get_callback_tracing_names(); - - for(auto itr : *hsa_api_data) - { - auto name = buffer_names.at(itr.kind, itr.operation); - auto& track = thread_tracks.at(itr.thread_id); - - TRACE_EVENT_BEGIN(sdk::perfetto_category::name, - ::perfetto::StaticString(name.data()), - track, - itr.start_timestamp, - ::perfetto::Flow::ProcessScoped(itr.correlation_id.internal), - "begin_ns", - itr.start_timestamp, - "end_ns", - itr.end_timestamp, - "delta_ns", - (itr.end_timestamp - itr.start_timestamp), - "tid", - itr.thread_id, - "kind", - itr.kind, - "operation", - itr.operation, - "corr_id", - itr.correlation_id.internal); - TRACE_EVENT_END( - sdk::perfetto_category::name, track, itr.end_timestamp); - tracing_session->FlushBlocking(); - } - - for(auto itr : *hip_api_data) - { - auto name = buffer_names.at(itr.kind, itr.operation); - auto& track = thread_tracks.at(itr.thread_id); - - TRACE_EVENT_BEGIN(sdk::perfetto_category::name, - ::perfetto::StaticString(name.data()), - track, - itr.start_timestamp, - ::perfetto::Flow::ProcessScoped(itr.correlation_id.internal), - "begin_ns", - itr.start_timestamp, - "end_ns", - itr.end_timestamp, - "delta_ns", - (itr.end_timestamp - itr.start_timestamp), - "tid", - itr.thread_id, - "kind", - itr.kind, - "operation", - itr.operation, - "corr_id", - itr.correlation_id.internal); - TRACE_EVENT_END( - sdk::perfetto_category::name, track, itr.end_timestamp); - tracing_session->FlushBlocking(); - } - - for(auto itr : *marker_api_data) - { - auto& track = thread_tracks.at(itr.thread_id); - auto name = (itr.kind == ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API && - itr.operation != ROCPROFILER_MARKER_CORE_API_ID_roctxGetThreadId) - ? tool_functions->tool_get_roctx_msg_fn(itr.correlation_id.internal) - : buffer_names.at(itr.kind, itr.operation); - - TRACE_EVENT_BEGIN(sdk::perfetto_category::name, - ::perfetto::StaticString(name.data()), - track, - itr.start_timestamp, - ::perfetto::Flow::ProcessScoped(itr.correlation_id.internal), - "begin_ns", - itr.start_timestamp, - "end_ns", - itr.end_timestamp, - "delta_ns", - (itr.end_timestamp - itr.start_timestamp), - "tid", - itr.thread_id, - "kind", - itr.kind, - "operation", - itr.operation, - "corr_id", - itr.correlation_id.internal); - TRACE_EVENT_END( - sdk::perfetto_category::name, track, itr.end_timestamp); - tracing_session->FlushBlocking(); - } - - for(auto itr : *rccl_api_data) - { - auto name = buffer_names.at(itr.kind, itr.operation); - auto& track = thread_tracks.at(itr.thread_id); - - TRACE_EVENT_BEGIN(sdk::perfetto_category::name, - ::perfetto::StaticString(name.data()), - track, - itr.start_timestamp, - ::perfetto::Flow::ProcessScoped(itr.correlation_id.internal), - "begin_ns", - itr.start_timestamp, - "end_ns", - itr.end_timestamp, - "delta_ns", - (itr.end_timestamp - itr.start_timestamp), - "tid", - itr.thread_id, - "kind", - itr.kind, - "operation", - itr.operation, - "corr_id", - itr.correlation_id.internal); - TRACE_EVENT_END( - sdk::perfetto_category::name, track, itr.end_timestamp); - tracing_session->FlushBlocking(); - } - - for(auto itr : *memory_copy_data) - { - auto name = buffer_names.at(itr.kind, itr.operation); - auto& track = agent_thread_tracks.at(itr.dst_agent_id).at(itr.thread_id); - - TRACE_EVENT_BEGIN(sdk::perfetto_category::name, - ::perfetto::StaticString(name.data()), - track, - itr.start_timestamp, - ::perfetto::Flow::ProcessScoped(itr.correlation_id.internal), - "begin_ns", - itr.start_timestamp, - "end_ns", - itr.end_timestamp, - "delta_ns", - (itr.end_timestamp - itr.start_timestamp), - "kind", - itr.kind, - "operation", - itr.operation, - "src_agent", - agents_map.at(itr.src_agent_id).logical_node_id, - "dst_agent", - agents_map.at(itr.dst_agent_id).logical_node_id, - "copy_bytes", - itr.bytes, - "corr_id", - itr.correlation_id.internal, - "tid", - itr.thread_id); - TRACE_EVENT_END( - sdk::perfetto_category::name, track, itr.end_timestamp); - tracing_session->FlushBlocking(); - } - - for(auto itr : *kernel_dispatch_data) - { - const auto& info = itr.dispatch_info; - const kernel_symbol_data* sym = nullptr; - for(const auto& kitr : kernel_sym_data) - { - if(kitr.kernel_id == info.kernel_id) - { - sym = &kitr; - break; - } - } - - CHECK(sym != nullptr); - - auto name = std::string_view{sym->kernel_name}; - auto& track = agent_queue_tracks.at(info.agent_id).at(info.queue_id); - - if(demangled.find(name) == demangled.end()) - { - demangled.emplace(name, common::cxx_demangle(name)); - } - - TRACE_EVENT_BEGIN(sdk::perfetto_category::name, - ::perfetto::StaticString(demangled.at(name).c_str()), - track, - itr.start_timestamp, - ::perfetto::Flow::ProcessScoped(itr.correlation_id.internal), - "begin_ns", - itr.start_timestamp, - "end_ns", - itr.end_timestamp, - "delta_ns", - (itr.end_timestamp - itr.start_timestamp), - "kind", - itr.kind, - "agent", - agents_map.at(info.agent_id).logical_node_id, - "corr_id", - itr.correlation_id.internal, - "queue", - info.queue_id.handle, - "tid", - itr.thread_id, - "kernel_id", - info.kernel_id, - "private_segment_size", - info.private_segment_size, - "group_segment_size", - info.group_segment_size, - "workgroup_size", - info.workgroup_size.x * info.workgroup_size.y * info.workgroup_size.z, - "grid_size", - info.grid_size.x * info.grid_size.y * info.grid_size.z); - TRACE_EVENT_END(sdk::perfetto_category::name, - track, - itr.end_timestamp); - tracing_session->FlushBlocking(); - } - } - - // counter tracks - { - // memory copy counter track - auto mem_cpy_endpoints = std::map>{}; - auto mem_cpy_extremes = std::pair{}; - for(auto itr : *memory_copy_data) - { - uint64_t _mean_timestamp = - itr.start_timestamp + (0.5 * (itr.end_timestamp - itr.start_timestamp)); - - mem_cpy_endpoints[itr.dst_agent_id].emplace(itr.start_timestamp - 1000, 0); - mem_cpy_endpoints[itr.dst_agent_id].emplace(itr.start_timestamp, 0); - mem_cpy_endpoints[itr.dst_agent_id].emplace(_mean_timestamp, 0); - mem_cpy_endpoints[itr.dst_agent_id].emplace(itr.end_timestamp, 0); - mem_cpy_endpoints[itr.dst_agent_id].emplace(itr.end_timestamp + 1000, 0); - - mem_cpy_extremes = std::make_pair(std::min(mem_cpy_extremes.first, itr.start_timestamp), - std::max(mem_cpy_extremes.second, itr.end_timestamp)); - } - - for(auto itr : *memory_copy_data) - { - auto mbeg = mem_cpy_endpoints.at(itr.dst_agent_id).lower_bound(itr.start_timestamp); - auto mend = mem_cpy_endpoints.at(itr.dst_agent_id).upper_bound(itr.end_timestamp); - - LOG_IF(FATAL, mbeg == mend) << "Missing range for timestamp [" << itr.start_timestamp - << ", " << itr.end_timestamp << "]"; - - for(auto mitr = mbeg; mitr != mend; ++mitr) - mitr->second += itr.bytes; - } - - constexpr auto bytes_multiplier = 1024; - - auto mem_cpy_tracks = - std::unordered_map{}; - auto mem_cpy_cnt_names = std::vector{}; - mem_cpy_cnt_names.reserve(mem_cpy_endpoints.size()); - for(auto& mitr : mem_cpy_endpoints) - { - mem_cpy_endpoints[mitr.first].emplace(mem_cpy_extremes.first - 5000, 0); - mem_cpy_endpoints[mitr.first].emplace(mem_cpy_extremes.second + 5000, 0); - - auto _track_name = std::stringstream{}; - const auto* _agent = _get_agent(mitr.first); - - if(_agent->type == ROCPROFILER_AGENT_TYPE_CPU) - _track_name << "COPY BYTES to AGENT [" << _agent->logical_node_id << "] (CPU)"; - else if(_agent->type == ROCPROFILER_AGENT_TYPE_GPU) - _track_name << "COPY BYTES to AGENT [" << _agent->logical_node_id << "] (GPU)"; - - constexpr auto _unit = ::perfetto::CounterTrack::Unit::UNIT_SIZE_BYTES; - auto& _name = mem_cpy_cnt_names.emplace_back(_track_name.str()); - mem_cpy_tracks.emplace(mitr.first, - ::perfetto::CounterTrack{_name.c_str()} - .set_unit(_unit) - .set_unit_multiplier(bytes_multiplier) - .set_is_incremental(false)); - } - - for(auto& mitr : mem_cpy_endpoints) - { - for(auto itr : mitr.second) - { - TRACE_COUNTER(sdk::perfetto_category::name, - mem_cpy_tracks.at(mitr.first), - itr.first, - itr.second / bytes_multiplier); - tracing_session->FlushBlocking(); - } - } - } - - ::perfetto::TrackEvent::Flush(); - tracing_session->FlushBlocking(); - tracing_session->StopBlocking(); - - auto filename = std::string{"results"}; - auto ofs = get_output_stream(filename, ".pftrace"); - - auto amount_read = std::atomic{0}; - auto is_done = std::promise{}; - auto _mtx = std::mutex{}; - auto _reader = [&ofs, &_mtx, &is_done, &amount_read]( - ::perfetto::TracingSession::ReadTraceCallbackArgs _args) { - auto _lk = std::unique_lock{_mtx}; - if(_args.data && _args.size > 0) - { - ROCP_TRACE << "Writing " << _args.size << " B to trace..."; - // Write the trace data into file - ofs.stream->write(_args.data, _args.size); - amount_read += _args.size; - } - ROCP_INFO_IF(!_args.has_more && amount_read > 0) - << "Wrote " << amount_read << " B to perfetto trace file"; - if(!_args.has_more) is_done.set_value(); - }; - - for(size_t i = 0; i < 2; ++i) - { - ROCP_TRACE << "Reading trace..."; - amount_read = 0; - is_done = std::promise{}; - tracing_session->ReadTrace(_reader); - is_done.get_future().wait(); - } - - ROCP_TRACE << "Destroying tracing session..."; - tracing_session.reset(); - - ROCP_TRACE << "Flushing trace output stream..."; - (*ofs.stream) << std::flush; - - ROCP_TRACE << "Destroying trace output stream..."; - ofs.close(); -} - -} // namespace tool -} // namespace rocprofiler - -PERFETTO_TRACK_EVENT_STATIC_STORAGE(); diff --git a/source/lib/rocprofiler-sdk-tool/generateStats.hpp b/source/lib/rocprofiler-sdk-tool/generateStats.hpp deleted file mode 100644 index 98dd8c8154..0000000000 --- a/source/lib/rocprofiler-sdk-tool/generateStats.hpp +++ /dev/null @@ -1,67 +0,0 @@ -// MIT License -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#pragma once - -#include "helper.hpp" -#include "statistics.hpp" - -namespace rocprofiler -{ -namespace tool -{ -stats_entry_t -generate_stats(tool_table* tool_functions, - const std::deque& data); - -stats_entry_t -generate_stats(tool_table* tool_functions, - const std::deque& data); - -stats_entry_t -generate_stats(tool_table* tool_functions, - const std::deque& data); - -stats_entry_t -generate_stats(tool_table* tool_functions, - const std::deque& data); - -stats_entry_t -generate_stats(tool_table* tool_functions, - const std::deque& data); - -stats_entry_t -generate_stats(tool_table* tool_functions, - const std::deque& data); - -stats_entry_t -generate_stats(tool_table* tool_functions, - const std::deque& data); - -stats_entry_t -generate_stats(tool_table* tool_functions, - const std::deque& data); - -void -generate_stats(tool_table* tool_functions, const domain_stats_vec_t& data); -} // namespace tool -} // namespace rocprofiler diff --git a/source/lib/rocprofiler-sdk-tool/helper.hpp b/source/lib/rocprofiler-sdk-tool/helper.hpp index c1be874873..08eed5f505 100644 --- a/source/lib/rocprofiler-sdk-tool/helper.hpp +++ b/source/lib/rocprofiler-sdk-tool/helper.hpp @@ -22,20 +22,22 @@ #pragma once -#include "domain_type.hpp" +#include "config.hpp" + #include "lib/common/container/ring_buffer.hpp" #include "lib/common/container/small_vector.hpp" #include "lib/common/defines.hpp" #include "lib/common/demangle.hpp" #include "lib/common/filesystem.hpp" -#include "output_file.hpp" +#include "lib/output/domain_type.hpp" +#include "lib/output/metadata.hpp" +#include "lib/output/output_stream.hpp" #include #include #include #include #include -#include #include #include @@ -53,6 +55,7 @@ #include #include #include +#include #include #include #include @@ -80,96 +83,10 @@ constexpr size_t BUFFER_SIZE_BYTES = 4096; constexpr size_t WATERMARK = (BUFFER_SIZE_BYTES / 2); -using rocprofiler_tool_buffer_kind_names_t = - std::unordered_map; -using rocprofiler_tool_buffer_kind_operation_names_t = - std::unordered_map>; - -using marker_message_map_t = std::unordered_map; -using rocprofiler_kernel_symbol_data_t = - rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t; - -namespace common = ::rocprofiler::common; -namespace tool = ::rocprofiler::tool; - -struct kernel_symbol_data : rocprofiler_kernel_symbol_data_t -{ - using base_type = rocprofiler_kernel_symbol_data_t; - - kernel_symbol_data(const base_type& _base) - : base_type{_base} - , formatted_kernel_name{tool::format_name(CHECK_NOTNULL(_base.kernel_name))} - , demangled_kernel_name{common::cxx_demangle(CHECK_NOTNULL(_base.kernel_name))} - , truncated_kernel_name{common::truncate_name(demangled_kernel_name)} - {} - - kernel_symbol_data(); - ~kernel_symbol_data() = default; - kernel_symbol_data(const kernel_symbol_data&) = default; - kernel_symbol_data(kernel_symbol_data&&) noexcept = default; - kernel_symbol_data& operator=(const kernel_symbol_data&) = default; - kernel_symbol_data& operator=(kernel_symbol_data&&) noexcept = default; - - std::string formatted_kernel_name = {}; - std::string demangled_kernel_name = {}; - std::string truncated_kernel_name = {}; -}; - -inline kernel_symbol_data::kernel_symbol_data() -: base_type{0, 0, 0, "", 0, 0, 0, 0, 0, 0, 0, 0} -{} - -using kernel_symbol_data_map_t = std::unordered_map; - -struct rocprofiler_tool_counter_info_t : rocprofiler_counter_info_v0_t -{ - using parent_type = rocprofiler_counter_info_v0_t; - using dimension_id_vec_t = std::vector; - using dimension_info_vec_t = std::vector; - - rocprofiler_tool_counter_info_t(rocprofiler_agent_id_t _agent_id, - parent_type _info, - dimension_id_vec_t&& _dim_ids, - dimension_info_vec_t&& _dim_info) - : parent_type{_info} - , agent_id{_agent_id} - , dimension_ids{std::move(_dim_ids)} - , dimension_info{std::move(_dim_info)} - {} - - ~rocprofiler_tool_counter_info_t() = default; - rocprofiler_tool_counter_info_t(const rocprofiler_tool_counter_info_t&) = default; - rocprofiler_tool_counter_info_t(rocprofiler_tool_counter_info_t&&) noexcept = default; - rocprofiler_tool_counter_info_t& operator=(const rocprofiler_tool_counter_info_t&) = default; - rocprofiler_tool_counter_info_t& operator=(rocprofiler_tool_counter_info_t&&) noexcept = - default; - - rocprofiler_agent_id_t agent_id = {}; - std::vector dimension_ids = {}; - std::vector dimension_info = {}; -}; - -rocprofiler::sdk::buffer_name_info_t -get_buffer_id_names(); - -::rocprofiler::sdk::callback_name_info_t -get_callback_id_names(); - -std::map -get_callback_roctx_msg(); - -std::vector -get_kernel_symbol_data(); - -std::vector -get_code_object_data(); - -std::vector -get_tool_counter_info(); - -std::vector -get_tool_counter_dimension_info(); +using marker_message_map_t = std::unordered_map; +using tool_counter_info = ::rocprofiler::tool::tool_counter_info; +using kernel_symbol_info = ::rocprofiler::tool::kernel_symbol_info; +using rocprofiler_kernel_symbol_info_t = ::rocprofiler::tool::rocprofiler_kernel_symbol_info_t; enum tracing_marker_kind { @@ -237,167 +154,3 @@ convert_marker_tracing_kind(TracingKindT val) { return convert_marker_tracing_kind(val, std::make_index_sequence{}); } - -struct rocprofiler_tool_dimension_pos_t -{ - uint64_t dimension_id; - size_t instance; - - template - void save(ArchiveT& ar) const - { - ar(cereal::make_nvp("dimension_id", dimension_id)); - ar(cereal::make_nvp("instance", instance)); - } -}; - -struct rocprofiler_tool_record_counter_t -{ - rocprofiler_counter_id_t counter_id = {}; - rocprofiler_record_counter_t record_counter = {}; - - template - void save(ArchiveT& ar) const - { - ar(cereal::make_nvp("counter_id", counter_id)); - ar(cereal::make_nvp("value", record_counter.counter_value)); - } -}; - -struct rocprofiler_tool_counter_collection_record_t -{ - rocprofiler_dispatch_counting_service_data_t dispatch_data = {}; - std::array records = {}; - uint64_t thread_id = 0; - uint64_t arch_vgpr_count = 0; - uint64_t sgpr_count = 0; - uint64_t lds_block_size_v = 0; - uint64_t counter_count = 0; - - template - void save(ArchiveT& ar) const - { - ar(cereal::make_nvp("dispatch_data", dispatch_data)); - // should be removed when moving to buffered tracing - std::vector tmp{records.begin(), - records.begin() + counter_count}; - ar(cereal::make_nvp("records", tmp)); - ar(cereal::make_nvp("thread_id", thread_id)); - ar(cereal::make_nvp("arch_vgpr_count", arch_vgpr_count)); - ar(cereal::make_nvp("sgpr_count", sgpr_count)); - ar(cereal::make_nvp("lds_block_size_v", lds_block_size_v)); - } -}; - -struct timestamps_t -{ - rocprofiler_timestamp_t app_start_time; - rocprofiler_timestamp_t app_end_time; -}; - -namespace rocprofiler -{ -namespace tool -{ -template -struct buffered_output; -} -} // namespace rocprofiler - -using hip_buffered_output_t = - ::rocprofiler::tool::buffered_output; -using hsa_buffered_output_t = - ::rocprofiler::tool::buffered_output; -using kernel_dispatch_buffered_output_t = - ::rocprofiler::tool::buffered_output; -using memory_copy_buffered_output_t = - ::rocprofiler::tool::buffered_output; -using marker_buffered_output_t = - ::rocprofiler::tool::buffered_output; -using rccl_buffered_output_t = - ::rocprofiler::tool::buffered_output; -using counter_collection_buffered_output_t = - ::rocprofiler::tool::buffered_output; -using scratch_memory_buffered_output_t = - ::rocprofiler::tool::buffered_output; - -using tool_get_agent_node_id_fn_t = uint64_t (*)(rocprofiler_agent_id_t); -using tool_get_app_timestamps_fn_t = timestamps_t* (*) (); -using tool_get_kernel_name_fn_t = std::string_view (*)(uint64_t, uint64_t); -using tool_get_domain_name_fn_t = std::string_view (*)(rocprofiler_buffer_tracing_kind_t); -using tool_get_operation_name_fn_t = std::string_view (*)(rocprofiler_buffer_tracing_kind_t, - rocprofiler_tracing_operation_t); -using tool_get_callback_kind_name_fn_t = std::string_view (*)(rocprofiler_callback_tracing_kind_t); -using tool_get_callback_op_name_fn_t = std::string_view (*)(rocprofiler_callback_tracing_kind_t, - uint32_t); -using tool_get_roctx_msg_fn_t = std::string_view (*)(uint64_t); -using tool_get_counter_info_name_fn_t = std::string (*)(uint64_t); - -struct tool_table -{ - // node id - tool_get_agent_node_id_fn_t tool_get_agent_node_id_fn = nullptr; - // timestamps - tool_get_app_timestamps_fn_t tool_get_app_timestamps_fn = nullptr; - // names and messages - tool_get_kernel_name_fn_t tool_get_kernel_name_fn = nullptr; - tool_get_domain_name_fn_t tool_get_domain_name_fn = nullptr; - tool_get_operation_name_fn_t tool_get_operation_name_fn = nullptr; - tool_get_counter_info_name_fn_t tool_get_counter_info_name_fn = nullptr; - tool_get_callback_kind_name_fn_t tool_get_callback_kind_fn = nullptr; - tool_get_callback_op_name_fn_t tool_get_callback_op_name_fn = nullptr; - tool_get_roctx_msg_fn_t tool_get_roctx_msg_fn = nullptr; -}; - -/// converts a container of ring buffers of element Tp into a single container of elements -template class ContainerT, typename... ParamsT> -ContainerT -get_buffer_elements(ContainerT, ParamsT...>&& data) -{ - auto ret = ContainerT{}; - for(auto& buf : data) - { - Tp* record = nullptr; - do - { - record = buf.retrieve(); - if(record) ret.emplace_back(*record); - } while(record != nullptr); - } - - return ret; -} - -namespace cereal -{ -#define SAVE_DATA_FIELD(FIELD) ar(make_nvp(#FIELD, data.FIELD)) - -template -void -save(ArchiveT& ar, const kernel_symbol_data& data) -{ - cereal::save(ar, static_cast(data)); - SAVE_DATA_FIELD(formatted_kernel_name); - SAVE_DATA_FIELD(demangled_kernel_name); - SAVE_DATA_FIELD(truncated_kernel_name); -} - -template -void -save(ArchiveT& ar, const rocprofiler_tool_counter_info_t& data) -{ - SAVE_DATA_FIELD(agent_id); - cereal::save(ar, static_cast(data)); - SAVE_DATA_FIELD(dimension_ids); -} - -#undef SAVE_DATA_FIELD -} // namespace cereal diff --git a/source/lib/rocprofiler-sdk-tool/rocprofv3-multi-node.md b/source/lib/rocprofiler-sdk-tool/rocprofv3-multi-node.md new file mode 100644 index 0000000000..fed6ed368f --- /dev/null +++ b/source/lib/rocprofiler-sdk-tool/rocprofv3-multi-node.md @@ -0,0 +1,335 @@ +# rocprofv3 Multi-Node Profiling Data + +## Overview + +- rocprofv3 adds supports for a `--output-format rocpd` option which enables writing a SQLite database file (one per process) with the collected data + - Use SQL schema from `rocpd` initially to support the rocpd post-processing analysis support +- In order to visualize the data, users will convert the database(s) to their desired visualization formats + - SQL has a relatively easy way to treat multiple separate databases as one database via views +- rocprofv3 provides some command-line tools built on top of a python package designed for post-processing our databases + +### Skills Required for Tasks + +1. Rework rocprofv3 tool library output functions + - __C++__: output functions written in C++ (`^/source/lib/rocprofiler-sdk-tool/generate*`) + - __CMake__: move the output functions into stand-alone library +2. Create Python package skeleton in `^/source/lib/python` + - __Python__: organizing a Python package to be importable (`import rocpd`) and executable (i.e. `python -m rocpd --help`) +3. Adding rocprofv3 SQLite support + - __C++__: just a general skill requirement for working with rocprofiler-sdk + - __CMake__: for integrating SQLite and python bindings into rocprofiler-sdk build + - __SQL__: understanding of SQL statement meanings, knowledge of `rocpd` SQL schema +4. Python bindings for output functions + - __C++__: just a general skill requirement for working with rocprofiler-sdk + - __PyBind11__: for writing Python bindings + +#### Task #1: Rework `rocprofv3` Tool Library Output Functions + +The problems with most of the output functions are: + +- Problem: Access global memory via `tool_table` functions + - Global memory access won't work well for invocation of these functions via Python bindings + - Ideally, these functions should be written in the (pseudo-) functional programming style, i.e., function only accesses memory of arguments, communicates via return value, and avoids concepts like shared states but without restrictions such as immutable data arguments +- Problem: Require all the profiling data to be loaded into memory + - During runtime, rocprofv3 writes data to buffer and when buffer is full, writes the binary blob to a temporary intermediate binary file + - During finalization, rocprofv3 reads _all_ of this data back into memory from the intermediate binary file and then writes to various output forms + - This approach will not work when amount of collected data exceeds amount of available RAM, especially on systems with swap disabled; e.g., 1 TB of profiling data on system with 128 GB of RAM + - We need to be able to stream data in chunks to these output functions + - Proposed approach: function which creates a file handle, function which writes chunk of data to file (invoked multiple times), function which closes file handle + +> Assigned: Markus, Olha, Jin, Araceli (i.e. onboarding group task) + Jonathan (CMake part) + +##### Tasks + +1. Move the `source/lib/rocprofiler-sdk-tool/generate*.{hpp,cpp}` functions into standalone (static) library: `source/lib/tool-data` + - May require `source/lib/tool-common` (static) library if something is needed by both `tool-data` and `rocprofiler-sdk-tool` libraries + - Please consult if you have any questions about where to put things and/or naming conventions + - Pay attention to existing CMake and use similar style + - We will link this library into `rocprofiler-sdk-tool` and link it into Python bindings library +2. Solve global memory access problem + - Probably need some additional data structures which represents the data currently stored/accessed from global memory which will be passed into function. + +### Python Package for Converting Databases to Other Output Formats + +> __Note__: We could potentially reuse `rocpd` for the the python package name since "ROCm Profiling Data" is a pretty appropriate name. + +rocprofv3 will need to rework the output functions within the `librocprofiler-sdk-tool.so` library (underlying library used by `rocprofv3`) in order to support Python bindings. +For example, `generateJSON(...)` currently fetches info from global memory stored during the run, we need these functions to be pure: the only memory operated on is from the function arguments. +Furthermore, these output functions need to support partial writes: invocations with only a subset of the data so that all the data need not be loaded into memory at one time. + +> __Example__ (workflow): get handle to output format, e.g. a Perfetto session, invoke `generatePerfetto(...)` with some of the data, repeat until all data has been passed, close handle to the output format. + +These reworked functions should be moved to another library, e.g. `librocprofiler-sdk-tool-io.(a|so)`. +Once the output functions are isolated and functional, we need to generate python bindings (via PyBind11) so that a python package can be built on top of them. +Various command-line tools can be provided using `__main__.py` file(s) within our python package. +Users can use the python package to write their own scripts. + +> __Example__ (two databases, one Perfetto trace): `rocprofv3-merge --output-format pftrace --out mybenchmark.pftrace --in results-1000.db results-1001.db` + +### Treating multiple SQL databases as one database + +```python +conn = sqlite3.connect('db1.db') +conn.execute("ATTACH DATABASE 'db2.db' AS db2;") +conn.execute("ATTACH DATABASE 'db3.db' AS db3;") + +# Create a view that unifies the 'users' table from all three databases +conn.execute(""" +CREATE VIEW all_users AS +SELECT * FROM users +UNION ALL +SELECT * FROM db2.users +UNION ALL +SELECT * FROM db3.users; +""") + +# Now you can query the view as if it were a single table +cursor = conn.execute("SELECT * FROM all_users;") +for row in cursor: + print(row) + +# Close the connection +conn.close() +``` + +## Proposed SQL Schema + +A more comprehensive SQL Schema is proposed below. This schema is intended to be more comprehensive with respect to the +various types of data that profilers can collect (such as Omnitrace/RSP) + +The schema consists of multiple interrelated tables to capture different categories of profiling data. +Below is a high-level schema with the primary tables and relationships. + +__*Please note, this is a very preliminary sketch of the schema*__. +If you want to weigh in, please restrict comments to the high-level organization, comments that it doesn't contain +fields for correlation IDs or something like that are not particularly helpful at the moment. + +```sql +CREATE TABLE strings ( + id SERIAL PRIMARY KEY, + value VARCHAR(1024) UNIQUE +); + +CREATE TABLE process ( + id INT PRIMARY KEY, + pid INT, + process_name_id INT, + executable_path_id INT, + start_time BIGINT, + end_time BIGINT, + FOREIGN KEY (process_name_id) REFERENCES strings(id) + FOREIGN KEY (executable_path_id) REFERENCES strings(id) +); + +CREATE TABLE thread ( + id INT PRIMARY KEY, + tid INT, + process_id INT, + thread_name_id INT, + start_time BIGINT, + end_time BIGINT, + FOREIGN KEY (process_id) REFERENCES process(process_id), + FOREIGN KEY (thread_name_id) REFERENCES strings(id) +); + +CREATE TABLE cpu_info ( + id SERIAL PRIMARY KEY, + core_id INT, + socket_id INT, + frequency_hz INT, + model_id INT, + cache_size_kb INT, + FOREIGN KEY (model_id) REFERENCES strings(id) +); + +CREATE TABLE gpu_info ( + id SERIAL PRIMARY KEY, + device_name_id INT, + compute_capability_id INT, + memory_size_mb INT, + multiprocessor_count INT, + clock_rate_hz INT, + FOREIGN KEY (device_name_id) REFERENCES strings(id) + FOREIGN KEY (compute_capability_id) REFERENCES strings(id) +); + +CREATE TABLE instrumentation_regions ( + id SERIAL PRIMARY KEY, + process_id INT, + thread_id INT, + region_name_id INT, + start_time BIGINT, + end_time BIGINT, + parent_region_id INT, + duration_ns BIGINT GENERATED ALWAYS AS (end_time - start_time) STORED, + file_id INT, + line_number INT, + additional_info JSONB, + FOREIGN KEY (process_id) REFERENCES process(process_id), + FOREIGN KEY (thread_id) REFERENCES thread(thread_id), + FOREIGN KEY (region_name_id) REFERENCES strings(id), + FOREIGN KEY (file_id) REFERENCES strings(id) +); + +CREATE TABLE call_stacks ( + id SERIAL PRIMARY KEY, + process_id INT, + thread_id INT, + timestamp BIGINT, + stack_depth INT, + function_id INT, + file_id INT, + line_number INT, + parent_sample_id INT, + call_site VARCHAR(1024), + additional_info JSONB, + FOREIGN KEY (process_id) REFERENCES process(process_id), + FOREIGN KEY (thread_id) REFERENCES thread(thread_id), + FOREIGN KEY (function_id) REFERENCES strings(id), + FOREIGN KEY (file_id) REFERENCES strings(id) +); + +CREATE TABLE hardware_counters ( + id SERIAL PRIMARY KEY, + process_id INT, + thread_id INT, + timestamp BIGINT, + event_id INT, + value BIGINT, + FOREIGN KEY (process_id) REFERENCES process(process_id), + FOREIGN KEY (thread_id) REFERENCES thread(thread_id), + FOREIGN KEY (event_id) REFERENCES strings(id) +); + +CREATE TABLE memory_operations ( + id SERIAL PRIMARY KEY, + process_id INT, + thread_id INT, + timestamp BIGINT, + operation_type VARCHAR(50) CHECK (operation_type IN ('ALLOC', 'FREE', 'COPY')), + source_address BIGINT, + destination_address BIGINT, + size_bytes BIGINT, + duration_us BIGINT, + additional_info JSONB, + FOREIGN KEY (process_id) REFERENCES process(process_id), + FOREIGN KEY (thread_id) REFERENCES thread(thread_id) +); + +CREATE TABLE gpu_kernel_launches ( + id SERIAL PRIMARY KEY, + process_id INT, + thread_id INT, + gpu_id INT, + kernel_id INT, + dispatch_id INT, + launch_time BIGINT, + start_time BIGINT, + end_time BIGINT, + grid_size_x INT, + grid_size_y INT, + grid_size_z INT, + block_size_x INT, + block_size_y INT, + block_size_z INT, + shared_mem_bytes INT, + duration_ns BIGINT GENERATED ALWAYS AS (end_time - start_time) STORED, + FOREIGN KEY (process_id) REFERENCES process(process_id), + FOREIGN KEY (thread_id) REFERENCES thread(thread_id), + FOREIGN KEY (gpu_id) REFERENCES gpu_info(gpu_id), + FOREIGN KEY (kernel_id) REFERENCES strings(id) +); + +CREATE TABLE binary_analysis_info ( + id SERIAL PRIMARY KEY, + process_id INT, + binary_name VARCHAR(1024), + function_id INT, + start_address BIGINT, + end_address BIGINT, + instruction_count INT, + file_id INT, + line_number INT, + call_sites JSONB, + additional_info JSONB, + FOREIGN KEY (process_id) REFERENCES process(process_id), + FOREIGN KEY (function_id) REFERENCES strings(id), + FOREIGN KEY (file_id) REFERENCES strings(id) +); +``` + +Explanation of the design considerations: + +1. __Separate String Tables__: Created unique string tables for function names, file names, kernel names, and event names to avoid storing redundant copies. + - `function_names`, `file_names`, `kernel_names`, and `event_names` tables are created to hold unique strings. Each table has a surrogate primary key (`function_id`, `file_id`, `kernel_id`, `event_id`) that is referenced by the main tables. + - This avoids storing redundant copies of long or frequently repeating strings in different tables, reducing the storage footprint and improving consistency. +2. __Foreign Key References__: Main tables reference unique strings using foreign keys for consistency and space efficiency. + - Main tables such as `instrumentation_regions`, `call_stacks`, `gpu_kernel_launches`, etc., reference these unique string tables using foreign keys. + - This makes querying for specific function names or kernel names more efficient, as the strings are indexed separately. +3. __Computed Columns__: Used computed columns for duration fields to facilitate quick analysis. + - The `duration_us` columns are computed based on timestamps, providing useful metrics for quick analysis. +4. __Extensibility__: Designed to be easily extensible with additional string categories if needed. + - New string types or categories can be added by creating new tables, and the main tables can reference them with minor schema adjustments. +5. __JSONB for Additional Metadata__: + - JSONB columns (`additional_info`) are used to handle complex or variable metadata that doesn’t fit neatly into the structured schema (e.g., custom annotations, extra debug info). + +### Example Data Insertion and Lookup + +#### Adding a new function + +```sql +INSERT INTO function_names (function_name) VALUES ('my_function') ON CONFLICT (function_name) DO NOTHING; +``` + +#### Linking a function in a call stack + +```sql +INSERT INTO call_stacks (process_id, thread_id, timestamp, stack_depth, function_id, file_id) +VALUES (123, 456, '2024-09-27 10:00:00', 1, (SELECT function_id FROM function_names WHERE function_name = 'my_function'), + (SELECT file_id FROM file_names WHERE file_name = 'my_file.c')); +``` + +## Q & A + +### All global variables are protected with locks in common synchronized library. How are we sending the data from these variables to the pure functions? + +There is a new `rocprofiler::tool::metadata` struct in `lib/output/metadata.hpp` which will be populated with data from SQL. +This struct is passed to the output functions. + +### If we provide the functionality to flush the trace at regular intervals, do we delete the data in global memory after each flush? If not, how do we keep track of data already read at any given point time during runtime? + +We will probably not delete the metadata (agent info, code objects, kernel symbols, etc.) after a flush. +When we flush, we will swap out the temporary binary file with a new temporary binary file and write/append the database with +the contents of the old temporary binary file. + +### Can a user collect trace at regular flush interval and ask for counter collection at the end of application? + +I am not sure what you mean here. We can write counter collection data when we flush. If the user is asking for periodic +flushing, we will restrict the output format to the database. In other words, I suspect that only `--flush-rate X` will only +be compatible with `--output-format rocpd` -- any additional or alternative data formats and we will throw an error in the +rocprofv3 script. This is for simplicity sake, supporting periodically flushing to CSV, etc. is unnecessary in my opinion. + +### I think hardware_counters table in database schema should have a dispatch_id field to represent the kernel it belongs to + +Please note, the proposed schema states clearly: + +> __*Please note, this is a very preliminary sketch of the schema*__. +> If you want to weigh in, please restrict comments to the high-level organization, comments that it doesn't contain +> fields for correlation IDs or something like that are not particularly helpful at the moment. + +However, I will note that the hardware counters table is probably going to be generic, i.e. supporting CPU HW counters, which +do not have dispatch IDs. Lastly, I will also note, device counter collection is not associated with a dispatch so even in +the case of GPU HW counters, including this field is questionable. + +### What is binary analysis info table? + +More advanced tools such as Omnitrace/Rocprofiler-System do address to line translations. This could also potentally +include the sort of data related to PC sampling + +### What is the Key of gpu info table? Node_id/zero based numbering scheme? + +That isn't defined. Very preliminary sketch. + +### When is user allowed to access the database in case of flushing the trace at regular intervals? Is user allowed to read the database only after tool finalization? Or we create a database file for each interval? + +TBD on the exact details but the user will certainly be able to read the database before tool finalization when it is flushed. diff --git a/source/lib/rocprofiler-sdk-tool/tmp_file_buffer.hpp b/source/lib/rocprofiler-sdk-tool/tmp_file_buffer.hpp deleted file mode 100644 index ca80e569d6..0000000000 --- a/source/lib/rocprofiler-sdk-tool/tmp_file_buffer.hpp +++ /dev/null @@ -1,161 +0,0 @@ -// MIT License -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#pragma once - -#include "domain_type.hpp" -#include "helper.hpp" -#include "tmp_file.hpp" - -#include "lib/common/container/ring_buffer.hpp" -#include "lib/common/logging.hpp" -#include "lib/common/units.hpp" - -#include - -#include -#include -#include -#include -#include -#include - -template -using ring_buffer_t = rocprofiler::common::container::ring_buffer; - -std::string -compose_tmp_file_name(domain_type buffer_type); - -template -std::tuple -get_tmp_file_buffer(domain_type type) -{ - static Tp* _buffer = new Tp(rocprofiler::common::units::get_page_size()); - static tmp_file* _tmp_file = new tmp_file(compose_tmp_file_name(type)); - return std::tuple(_buffer, _tmp_file); -} - -template -void -offload_buffer(domain_type type) -{ - auto [_tmp_buf, _tmp_file] = get_tmp_file_buffer(type); - auto _lk = std::lock_guard(_tmp_file->file_mutex); - [[maybe_unused]] static auto _success = _tmp_file->open(); - auto& _fs = _tmp_file->stream; - _tmp_file->file_pos.emplace(_fs.tellg()); - _tmp_buf->save(_fs); - _tmp_buf->clear(); - CHECK(_tmp_buf->is_empty() == true); -} - -template -void -write_ring_buffer(Tp _v, domain_type type) -{ - auto [_tmp_buf, _tmp_file] = get_tmp_file_buffer>(type); - - if(_tmp_buf->capacity() == 0) - { - ROCP_INFO << "rocprofv3 is dropping record from domain " << get_domain_column_name(type) - << ". Buffer has a capacity of zero."; - return; - } - - auto* ptr = _tmp_buf->request(false); - if(ptr == nullptr) - { - offload_buffer>(type); - ptr = _tmp_buf->request(false); - - // if failed, try again - if(!ptr) ptr = _tmp_buf->request(false); - - // after second failure, emit warning message - ROCP_CI_LOG_IF(WARNING, !ptr) - << "rocprofv3 is dropping record from domain " << get_domain_column_name(type) - << ". No space in buffer: " - << fmt::format( - "capacity={}, record_size={}, used_count={}, free_count={} | raw_info=[{}]", - _tmp_buf->capacity(), - _tmp_buf->data_size(), - _tmp_buf->count(), - _tmp_buf->free(), - _tmp_buf->as_string()); - } - - if(ptr) - { - if constexpr(std::is_move_constructible::value) - { - new(ptr) Tp{std::move(_v)}; - } - else if constexpr(std::is_move_assignable::value) - { - *ptr = std::move(_v); - } - else if constexpr(std::is_copy_constructible::value) - { - new(ptr) Tp{_v}; - } - else if constexpr(std::is_copy_assignable::value) - { - *ptr = _v; - } - else - { - static_assert(std::is_void::value, - "data type is neither move/copy constructible nor move/copy assignable"); - } - } -} - -template -void -flush_tmp_buffer(domain_type type) -{ - auto [_tmp_buf, _tmp_file] = get_tmp_file_buffer(type); - if(!_tmp_buf->is_empty()) offload_buffer(type); -} - -template -std::deque -read_tmp_file(domain_type type) -{ - auto _data = std::deque{}; - - auto [_tmp_buf, _tmp_file] = get_tmp_file_buffer(type); - auto _lk = std::lock_guard{_tmp_file->file_mutex}; - auto& _fs = _tmp_file->stream; - if(_fs.is_open()) _fs.close(); - _tmp_file->open(std::ios::binary | std::ios::in); - for(auto itr : _tmp_file->file_pos) - { - _fs.seekg(itr); // set to the absolute position - if(_fs.eof()) break; - Tp _buffer; - _buffer.load(_fs); - _data.emplace_back(std::move(_buffer)); - } - - return _data; -} diff --git a/source/lib/rocprofiler-sdk-tool/tool.cpp b/source/lib/rocprofiler-sdk-tool/tool.cpp index 480f95db12..ca1f797114 100644 --- a/source/lib/rocprofiler-sdk-tool/tool.cpp +++ b/source/lib/rocprofiler-sdk-tool/tool.cpp @@ -20,19 +20,8 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. -#include "buffered_output.hpp" #include "config.hpp" -#include "csv.hpp" -#include "domain_type.hpp" -#include "generateCSV.hpp" -#include "generateJSON.hpp" -#include "generateOTF2.hpp" -#include "generatePerfetto.hpp" -#include "generateStats.hpp" #include "helper.hpp" -#include "output_file.hpp" -#include "statistics.hpp" -#include "tmp_file.hpp" #include "lib/common/environment.hpp" #include "lib/common/filesystem.hpp" @@ -42,6 +31,20 @@ #include "lib/common/synchronized.hpp" #include "lib/common/units.hpp" #include "lib/common/utility.hpp" +#include "lib/output/buffered_output.hpp" +#include "lib/output/counter_info.hpp" +#include "lib/output/csv.hpp" +#include "lib/output/csv_output_file.hpp" +#include "lib/output/domain_type.hpp" +#include "lib/output/generateCSV.hpp" +#include "lib/output/generateJSON.hpp" +#include "lib/output/generateOTF2.hpp" +#include "lib/output/generatePerfetto.hpp" +#include "lib/output/generateStats.hpp" +#include "lib/output/output_stream.hpp" +#include "lib/output/statistics.hpp" +#include "lib/output/tmp_file.hpp" +#include "lib/output/tmp_file_buffer.hpp" #include #include @@ -83,8 +86,6 @@ namespace tool = ::rocprofiler::tool; namespace { -constexpr uint32_t lds_block_size = 128 * 4; - auto destructors = new std::vector>{}; template @@ -119,24 +120,26 @@ add_destructor(Tp*& ptr) std::call_once(_once, []() { add_destructor(PTR); }); \ } -tool::output_file*& +tool::csv_output_file*& get_list_basic_metrics_file() { static auto* _v = - new tool::output_file{"basic_metrics", - tool::csv::list_basic_metrics_csv_encoder{}, - {"Agent_Id", "Name", "Description", "Block", "Dimensions"}}; + new tool::csv_output_file{tool::get_config(), + "basic_metrics", + tool::csv::list_basic_metrics_csv_encoder{}, + {"Agent_Id", "Name", "Description", "Block", "Dimensions"}}; ADD_DESTRUCTOR(_v); return _v; } -tool::output_file*& +tool::csv_output_file*& get_list_derived_metrics_file() { static auto* _v = - new tool::output_file{"derived_metrics", - tool::csv::list_derived_metrics_csv_encoder{}, - {"Agent_Id", "Name", "Description", "Expression", "Dimensions"}}; + new tool::csv_output_file{tool::get_config(), + "derived_metrics", + tool::csv::list_derived_metrics_csv_encoder{}, + {"Agent_Id", "Name", "Description", "Expression", "Dimensions"}}; ADD_DESTRUCTOR(_v); return _v; } @@ -172,8 +175,6 @@ get_buffers() return _v; } -using rocprofiler_code_object_data_t = rocprofiler_callback_tracing_code_object_load_data_t; - template Tp* as_pointer(Tp&& _val) @@ -181,6 +182,13 @@ as_pointer(Tp&& _val) return new Tp{std::forward(_val)}; } +template +Tp* +as_pointer(Args&&... _args) +{ + return new Tp{std::forward(_args)...}; +} + template Tp* as_pointer() @@ -188,7 +196,6 @@ as_pointer() return new Tp{}; } -using code_object_data_map_t = std::unordered_map; using targeted_kernels_map_t = std::unordered_map>; using counter_dimension_info_map_t = @@ -198,17 +205,9 @@ using kernel_iteration_t = std::unordered_map; using kernel_rename_stack_t = std::stack; -auto code_obj_data = as_pointer>(); -auto* kernel_data = as_pointer>(); -auto* marker_msg_data = as_pointer>(); -auto counter_dimension_data = common::Synchronized{}; -auto target_kernels = common::Synchronized{}; -auto* buffered_name_info = as_pointer(get_buffer_id_names()); -auto* callback_name_info = as_pointer(get_callback_id_names()); -auto* agent_info = as_pointer(agent_info_map_t{}); -auto* tool_functions = as_pointer(tool_table{}); -auto* stats_timestamp = as_pointer(timestamps_t{}); -auto kernel_iteration = common::Synchronized{}; +auto* tool_metadata = as_pointer(tool::metadata::inprocess{}); +auto target_kernels = common::Synchronized{}; +auto kernel_iteration = common::Synchronized{}; thread_local auto thread_dispatch_rename = as_pointer(); thread_local auto thread_dispatch_rename_dtor = common::scope_destructor{[]() { @@ -285,27 +284,6 @@ flush() ROCP_INFO << "Buffers flushed"; } -std::string_view -get_callback_kind(rocprofiler_callback_tracing_kind_t kind) -{ - return CHECK_NOTNULL(callback_name_info)->at(kind); -} - -std::string_view -get_callback_op_name(rocprofiler_callback_tracing_kind_t kind, uint32_t op) -{ - return CHECK_NOTNULL(callback_name_info)->at(kind, op); -} - -std::string_view -get_roctx_msg(uint64_t cid) -{ - return CHECK_NOTNULL(marker_msg_data) - ->rlock( - [](const auto& _data, uint64_t _cid_v) -> std::string_view { return _data.at(_cid_v); }, - cid); -} - int set_kernel_rename_correlation_id(rocprofiler_thread_id_t thr_id, rocprofiler_context_id_t ctx_id, @@ -319,7 +297,11 @@ set_kernel_rename_correlation_id(rocprofiler_thread_id_t << "unexpected kind: " << kind; if(thread_dispatch_rename != nullptr && !thread_dispatch_rename->empty()) - external_corr_id->value = thread_dispatch_rename->top(); + { + auto val = thread_dispatch_rename->top(); + if(tool_metadata) tool_metadata->add_external_correlation_id(val); + external_corr_id->value = val; + } common::consume_args(thr_id, ctx_id, kind, op, internal_corr_id, user_data); @@ -363,7 +345,7 @@ cntrl_tracing_callback(rocprofiler_callback_tracing_record_t record, marker_record.correlation_id = record.correlation_id; marker_record.start_timestamp = user_data->value; marker_record.end_timestamp = ts; - write_ring_buffer(marker_record, domain_type::MARKER); + tool::write_ring_buffer(marker_record, domain_type::MARKER); } } } @@ -373,7 +355,7 @@ kernel_rename_callback(rocprofiler_callback_tracing_record_t record, rocprofiler_user_data_t* user_data, void* data) { - if(!rocprofiler::tool::get_config().kernel_rename || thread_dispatch_rename == nullptr) return; + if(!tool::get_config().kernel_rename || thread_dispatch_rename == nullptr) return; if(record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API) { @@ -429,13 +411,9 @@ callback_tracing_callback(rocprofiler_callback_tracing_record_t record, { if(record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT) { - CHECK_NOTNULL(marker_msg_data) - ->wlock( - [](auto& _data, uint64_t _cid_v, std::string&& _msg) { - _data.emplace(_cid_v, std::move(_msg)); - }, - record.correlation_id.internal, - std::string{marker_data->args.roctxMarkA.message}); + CHECK_NOTNULL(tool_metadata) + ->add_marker_message(record.correlation_id.internal, + std::string{marker_data->args.roctxMarkA.message}); auto marker_record = rocprofiler_buffer_tracing_marker_api_record_t{}; marker_record.size = sizeof(rocprofiler_buffer_tracing_marker_api_record_t); @@ -445,7 +423,7 @@ callback_tracing_callback(rocprofiler_callback_tracing_record_t record, marker_record.correlation_id = record.correlation_id; marker_record.start_timestamp = ts; marker_record.end_timestamp = ts; - write_ring_buffer(marker_record, domain_type::MARKER); + tool::write_ring_buffer(marker_record, domain_type::MARKER); } } else if(record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangePushA) @@ -454,11 +432,8 @@ callback_tracing_callback(rocprofiler_callback_tracing_record_t record, { if(marker_data->args.roctxRangePushA.message) { - CHECK_NOTNULL(marker_msg_data) - ->wlock( - [](auto& _data, uint64_t _cid_v, std::string&& _msg) { - _data.emplace(_cid_v, std::move(_msg)); - }, + CHECK_NOTNULL(tool_metadata) + ->add_marker_message( record.correlation_id.internal, std::string{marker_data->args.roctxRangePushA.message}); @@ -487,7 +462,7 @@ callback_tracing_callback(rocprofiler_callback_tracing_record_t record, stacked_range.pop_back(); val.end_timestamp = ts; - write_ring_buffer(val, domain_type::MARKER); + tool::write_ring_buffer(val, domain_type::MARKER); } } else if(record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangeStartA) @@ -495,13 +470,9 @@ callback_tracing_callback(rocprofiler_callback_tracing_record_t record, if(record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT && marker_data->args.roctxRangeStartA.message) { - CHECK_NOTNULL(marker_msg_data) - ->wlock( - [](auto& _data, uint64_t _cid_v, std::string&& _msg) { - _data.emplace(_cid_v, std::move(_msg)); - }, - record.correlation_id.internal, - std::string{marker_data->args.roctxRangeStartA.message}); + CHECK_NOTNULL(tool_metadata) + ->add_marker_message(record.correlation_id.internal, + std::string{marker_data->args.roctxRangeStartA.message}); auto marker_record = rocprofiler_buffer_tracing_marker_api_record_t{}; marker_record.size = sizeof(rocprofiler_buffer_tracing_marker_api_record_t); @@ -530,7 +501,7 @@ callback_tracing_callback(rocprofiler_callback_tracing_record_t record, [](const auto& map, auto _key) { return map.at(_key); }, _id); _entry.end_timestamp = ts; - write_ring_buffer(_entry, domain_type::MARKER); + tool::write_ring_buffer(_entry, domain_type::MARKER); global_range.wlock([](auto& map, auto _key) { return map.erase(_key); }, _id); } } @@ -550,7 +521,7 @@ callback_tracing_callback(rocprofiler_callback_tracing_record_t record, marker_record.correlation_id = record.correlation_id; marker_record.start_timestamp = user_data->value; marker_record.end_timestamp = ts; - write_ring_buffer(marker_record, domain_type::MARKER); + tool::write_ring_buffer(marker_record, domain_type::MARKER); } } } @@ -570,13 +541,9 @@ code_object_tracing_callback(rocprofiler_callback_tracing_record_t record, { if(record.phase == ROCPROFILER_CALLBACK_PHASE_LOAD) { - auto* obj_data = static_cast(record.payload); + auto* obj_data = static_cast(record.payload); - code_obj_data->wlock( - [](code_object_data_map_t& cdata, rocprofiler_code_object_data_t* obj_data_v) { - cdata.emplace(obj_data_v->code_object_id, *obj_data_v); - }, - CHECK_NOTNULL(obj_data)); + CHECK_NOTNULL(tool_metadata)->add_code_object(*obj_data); } else if(record.phase == ROCPROFILER_CALLBACK_PHASE_UNLOAD) { @@ -587,33 +554,34 @@ code_object_tracing_callback(rocprofiler_callback_tracing_record_t record, if(record.kind == ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT && record.operation == ROCPROFILER_CODE_OBJECT_DEVICE_KERNEL_SYMBOL_REGISTER) { - auto* sym_data = static_cast(record.payload); + auto* sym_data = static_cast(record.payload); if(record.phase == ROCPROFILER_CALLBACK_PHASE_LOAD) { - auto itr = kernel_data->wlock([sym_data](auto& _data) { - return _data.emplace(sym_data->kernel_id, - kernel_symbol_data{get_dereference(sym_data)}); - }); + auto success = CHECK_NOTNULL(tool_metadata) + ->add_kernel_symbol(kernel_symbol_info{ + get_dereference(sym_data), + [](const char* val) { return tool::format_name(val); }}); - ROCP_WARNING_IF(!itr.second) + ROCP_WARNING_IF(!success) << "duplicate kernel symbol data for kernel_id=" << sym_data->kernel_id; // add the kernel to the kernel_targets if - if(itr.second) + if(success) { // if kernel name is provided by user then by default all kernels in the application // are targeted - const auto& kernel_info = itr.first->second; - auto kernel_filter_include = tool::get_config().kernel_filter_include; - auto kernel_filter_exclude = tool::get_config().kernel_filter_exclude; - auto kernel_filter_range = tool::get_config().kernel_filter_range; + const auto* kernel_info = + CHECK_NOTNULL(tool_metadata)->get_kernel_symbol(sym_data->kernel_id); + auto kernel_filter_include = tool::get_config().kernel_filter_include; + auto kernel_filter_exclude = tool::get_config().kernel_filter_exclude; + auto kernel_filter_range = tool::get_config().kernel_filter_range; std::regex include_regex(kernel_filter_include); std::regex exclude_regex(kernel_filter_exclude); - if(std::regex_search(kernel_info.formatted_kernel_name, include_regex)) + if(std::regex_search(kernel_info->formatted_kernel_name, include_regex)) { if(kernel_filter_exclude.empty() || - !std::regex_search(kernel_info.formatted_kernel_name, exclude_regex)) + !std::regex_search(kernel_info->formatted_kernel_name, exclude_regex)) add_kernel_target(sym_data->kernel_id, kernel_filter_range); } } @@ -624,37 +592,6 @@ code_object_tracing_callback(rocprofiler_callback_tracing_record_t record, (void) data; } -std::string_view -get_kernel_name(uint64_t kernel_id, uint64_t rename_id) -{ - if(rename_id > 0) - { - if(const auto* _name = common::get_string_entry(rename_id)) return std::string_view{*_name}; - } - - return CHECK_NOTNULL(kernel_data)->rlock([kernel_id](const auto& _data) -> std::string_view { - return _data.at(kernel_id).formatted_kernel_name; - }); -} - -std::string_view -get_domain_name(rocprofiler_buffer_tracing_kind_t record_kind) -{ - return CHECK_NOTNULL(buffered_name_info)->at(record_kind); -} - -uint64_t -get_agent_node_id(rocprofiler_agent_id_t agent_id) -{ - return agent_info->at(agent_id).logical_node_id; -} - -std::string_view -get_operation_name(rocprofiler_buffer_tracing_kind_t kind, rocprofiler_tracing_operation_t op) -{ - return CHECK_NOTNULL(buffered_name_info)->at(kind, op); -} - void buffered_tracing_callback(rocprofiler_context_id_t /*context*/, rocprofiler_buffer_id_t /*buffer_id*/, @@ -678,7 +615,7 @@ buffered_tracing_callback(rocprofiler_context_id_t /*context*/, auto* record = static_cast( header->payload); - write_ring_buffer(*record, domain_type::KERNEL_DISPATCH); + tool::write_ring_buffer(*record, domain_type::KERNEL_DISPATCH); } else if(header->kind == ROCPROFILER_BUFFER_TRACING_HSA_CORE_API || @@ -689,21 +626,21 @@ buffered_tracing_callback(rocprofiler_context_id_t /*context*/, auto* record = static_cast(header->payload); - write_ring_buffer(*record, domain_type::HSA); + tool::write_ring_buffer(*record, domain_type::HSA); } else if(header->kind == ROCPROFILER_BUFFER_TRACING_MEMORY_COPY) { auto* record = static_cast(header->payload); - write_ring_buffer(*record, domain_type::MEMORY_COPY); + tool::write_ring_buffer(*record, domain_type::MEMORY_COPY); } else if(header->kind == ROCPROFILER_BUFFER_TRACING_SCRATCH_MEMORY) { auto* record = static_cast( header->payload); - write_ring_buffer(*record, domain_type::SCRATCH_MEMORY); + tool::write_ring_buffer(*record, domain_type::SCRATCH_MEMORY); } else if(header->kind == ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API || header->kind == ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API) @@ -711,14 +648,14 @@ buffered_tracing_callback(rocprofiler_context_id_t /*context*/, auto* record = static_cast(header->payload); - write_ring_buffer(*record, domain_type::HIP); + tool::write_ring_buffer(*record, domain_type::HIP); } else if(header->kind == ROCPROFILER_BUFFER_TRACING_RCCL_API) { auto* record = static_cast(header->payload); - write_ring_buffer(*record, domain_type::RCCL); + tool::write_ring_buffer(*record, domain_type::RCCL); } else { @@ -734,157 +671,34 @@ using agent_counter_map_t = std::unordered_map>; rocprofiler_status_t -dimensions_info_callback(rocprofiler_counter_id_t id, +dimensions_info_callback(rocprofiler_counter_id_t /*id*/, const rocprofiler_record_dimension_info_t* dim_info, long unsigned int num_dims, void* user_data) { - if(user_data != nullptr) + ROCP_FATAL_IF(user_data == nullptr) << "dimensions_info_callback invoked without user data"; + + if(user_data) { - auto* dimensions_info = - static_cast*>(user_data); - dimensions_info->reserve(num_dims); + auto* _data = static_cast*>(user_data); + _data->reserve(num_dims); for(size_t j = 0; j < num_dims; j++) - dimensions_info->emplace_back(dim_info[j]); - } - else - { - counter_dimension_data.wlock( - [&id, &dim_info, &num_dims](counter_dimension_info_map_t& counter_dimension_data_v) { - if(counter_dimension_data_v.find(id.handle) == counter_dimension_data_v.end()) - { - auto dimensions = std::vector{}; - dimensions.reserve(num_dims); - for(size_t dim = 0; dim < num_dims; ++dim) - dimensions.emplace_back(dim_info[dim]); - counter_dimension_data_v.emplace(id.handle, std::move(dimensions)); - } - }); + _data->emplace_back(dim_info[j]); } + return ROCPROFILER_STATUS_SUCCESS; } -struct tool_agent -{ - int64_t device_id = 0; - const rocprofiler_agent_v0_t* agent = nullptr; -}; - -using tool_agent_vec_t = std::vector; - auto get_gpu_agents() { - auto _gpu_agents = tool_agent_vec_t{}; - - ROCPROFILER_CALL( - rocprofiler_query_available_agents( - ROCPROFILER_AGENT_INFO_VERSION_0, - [](rocprofiler_agent_version_t, const void** agents, size_t num_agents, void* _data) { - auto* _gpu_agents_v = static_cast(_data); - for(size_t i = 0; i < num_agents; ++i) - { - auto* agent = static_cast(agents[i]); - if(agent->type == ROCPROFILER_AGENT_TYPE_GPU) - _gpu_agents_v->emplace_back(tool_agent{0, agent}); - } - return ROCPROFILER_STATUS_SUCCESS; - }, - sizeof(rocprofiler_agent_t), - &_gpu_agents), - "Iterate rocporfiler agents") - - // make sure they are sorted by node id - std::sort(_gpu_agents.begin(), _gpu_agents.end(), [](const auto& lhs, const auto& rhs) { - return CHECK_NOTNULL(lhs.agent)->node_id < CHECK_NOTNULL(rhs.agent)->node_id; - }); - - int64_t _dev_id = 0; - for(auto& itr : _gpu_agents) - itr.device_id = _dev_id++; - - return _gpu_agents; + return CHECK_NOTNULL(tool_metadata)->get_gpu_agents(); } auto -get_agent_counter_info(const tool_agent_vec_t& _agents) +get_agent_counter_info() { - using value_type = - std::unordered_map>; - - auto _data = value_type{}; - - for(auto itr : _agents) - { - ROCPROFILER_CALL( - rocprofiler_iterate_agent_supported_counters( - itr.agent->id, - [](rocprofiler_agent_id_t id, - rocprofiler_counter_id_t* counters, - size_t num_counters, - void* user_data) { - auto* data_v = static_cast(user_data); - for(size_t i = 0; i < num_counters; ++i) - { - // populate global map - ROCPROFILER_CALL(rocprofiler_iterate_counter_dimensions( - counters[i], dimensions_info_callback, nullptr), - "iterate_dimension_info"); - - auto _info = rocprofiler_counter_info_v0_t{}; - auto _dim_ids = std::vector{}; - auto _dim_info = std::vector{}; - - ROCPROFILER_CALL( - rocprofiler_query_counter_info( - counters[i], ROCPROFILER_COUNTER_INFO_VERSION_0, &_info), - "Could not query counter_id"); - - // populate local vector - ROCPROFILER_CALL(rocprofiler_iterate_counter_dimensions( - counters[i], dimensions_info_callback, &_dim_info), - "iterate_dimension_info"); - - _dim_ids.reserve(_dim_info.size()); - for(auto ditr : _dim_info) - _dim_ids.emplace_back(ditr.id); - - (*data_v)[id].emplace_back( - id, _info, std::move(_dim_ids), std::move(_dim_info)); - } - return ROCPROFILER_STATUS_SUCCESS; - }, - &_data), - "iterate agent supported counters"); - - // Skip unsupported agents - if(_data.find(itr.agent->id) == _data.end()) continue; - - std::sort(_data.at(itr.agent->id).begin(), - _data.at(itr.agent->id).end(), - [](const auto& lhs, const auto& rhs) { return (lhs.id.handle < rhs.id.handle); }); - - for(auto& citr : _data.at(itr.agent->id)) - { - std::sort(citr.dimension_ids.begin(), citr.dimension_ids.end()); - std::sort(citr.dimension_info.begin(), - citr.dimension_info.end(), - [](const auto& lhs, const auto& rhs) { return (lhs.id < rhs.id); }); - } - } - - return _data; -} - -const tool_agent* -get_tool_agent(rocprofiler_agent_id_t id, const tool_agent_vec_t& data) -{ - for(const auto& itr : data) - { - if(id == itr.agent->id) return &itr; - } - - return nullptr; + return CHECK_NOTNULL(tool_metadata)->agent_counter_info; } // this function creates a rocprofiler profile config on the first entry @@ -893,7 +707,7 @@ get_device_counting_service(rocprofiler_agent_id_t agent_id) { static auto data = common::Synchronized{}; static const auto gpu_agents = get_gpu_agents(); - static const auto gpu_agents_counter_info = get_agent_counter_info(gpu_agents); + static const auto gpu_agents_counter_info = get_agent_counter_info(); auto profile = std::optional{}; data.ulock( @@ -907,10 +721,10 @@ get_device_counting_service(rocprofiler_agent_id_t agent_id) return false; }, [agent_id, &profile](agent_counter_map_t& data_v) { - auto counters_v = counter_vec_t{}; - auto found_v = std::vector{}; - const auto* tool_agent_v = get_tool_agent(agent_id, gpu_agents); - auto expected_v = tool::get_config().counters.size(); + auto counters_v = counter_vec_t{}; + auto found_v = std::vector{}; + const auto* agent_v = tool_metadata->get_agent(agent_id); + auto expected_v = tool::get_config().counters.size(); constexpr auto device_qualifier = std::string_view{":device="}; for(const auto& itr : tool::get_config().counters) @@ -930,7 +744,7 @@ get_device_counting_service(rocprofiler_agent_id_t agent_id) auto dev_id_v = std::stol(dev_id_s); // skip this counter if the counter is for a specific device id (which doesn't // this agent's device id) - if(dev_id_v != tool_agent_v->device_id) + if(dev_id_v != agent_v->gpu_index) { --expected_v; // is not expected continue; @@ -956,10 +770,9 @@ get_device_counting_service(rocprofiler_agent_id_t agent_id) ", ")); auto found_counters = fmt::format("{}", fmt::join(found_v.begin(), found_v.end(), ", ")); - LOG(FATAL) << "Unable to find all counters for agent " - << tool_agent_v->agent->node_id << " (gpu-" << tool_agent_v->device_id - << ", " << tool_agent_v->agent->name << ") in [" << requested_counters - << "]. Found: [" << found_counters << "]"; + LOG(FATAL) << "Unable to find all counters for agent " << agent_v->node_id + << " (gpu-" << agent_v->gpu_index << ", " << agent_v->name << ") in [" + << requested_counters << "]. Found: [" << found_counters << "]"; } if(!counters_v.empty()) @@ -1010,22 +823,6 @@ dispatch_callback(rocprofiler_dispatch_counting_service_data_t dispatch_data, } } -std::string -get_counter_info_name(uint64_t record_id) -{ - auto info = rocprofiler_counter_info_v0_t{}; - auto counter_id = rocprofiler_counter_id_t{}; - ROCPROFILER_CALL(rocprofiler_query_record_counter_id(record_id, &counter_id), - "query record counter id"); - if(rocprofiler_query_counter_info(rocprofiler_counter_id_t{counter_id}, - ROCPROFILER_COUNTER_INFO_VERSION_0, - static_cast(&info)) != ROCPROFILER_STATUS_SUCCESS) - { - ROCP_FATAL << "Could not find name for record id: " << record_id; - } - return {info.name}; -} - void counter_record_callback(rocprofiler_dispatch_counting_service_data_t dispatch_data, rocprofiler_record_counter_t* record_data, @@ -1034,26 +831,16 @@ counter_record_callback(rocprofiler_dispatch_counting_service_data_t dispatch_da void* /*callback_data_args*/) { static const auto gpu_agents = get_gpu_agents(); - static const auto gpu_agents_counter_info = get_agent_counter_info(gpu_agents); + static const auto gpu_agents_counter_info = get_agent_counter_info(); - auto counter_record = rocprofiler_tool_counter_collection_record_t{}; + auto counter_record = tool::tool_counter_record_t{}; auto kernel_id = dispatch_data.dispatch_info.kernel_id; counter_record.dispatch_data = dispatch_data; counter_record.thread_id = user_data.value; - const kernel_symbol_data* kernel_info = - kernel_data->rlock([kernel_id](const auto& _data) { return &_data.at(kernel_id); }); - - auto lds_block_size_v = - (kernel_info->group_segment_size + (lds_block_size - 1)) & ~(lds_block_size - 1); - - counter_record.arch_vgpr_count = kernel_info->arch_vgpr_count; - counter_record.sgpr_count = kernel_info->sgpr_count; - counter_record.lds_block_size_v = lds_block_size_v; - - ROCP_FATAL_IF(!kernel_info) << "missing kernel information for kernel_id=" << kernel_id; - + const kernel_symbol_info* kernel_info = + CHECK_NOTNULL(tool_metadata)->get_kernel_symbol(kernel_id); ROCP_ERROR_IF(record_count == 0) << "zero record count for kernel_id=" << kernel_id << " (name=" << kernel_info->kernel_name << ")"; @@ -1070,11 +857,11 @@ counter_record_callback(rocprofiler_dispatch_counting_service_data_t dispatch_da ROCPROFILER_CALL(rocprofiler_query_record_counter_id(record_data[count].id, &_counter_id), "query record counter id"); counter_record.records[count] = - rocprofiler_tool_record_counter_t{_counter_id, record_data[count]}; + tool::tool_counter_value_t{_counter_id, record_data[count].counter_value}; counter_record.counter_count++; } - write_ring_buffer(counter_record, domain_type::COUNTER_COLLECTION); + tool::write_ring_buffer(counter_record, domain_type::COUNTER_COLLECTION); } rocprofiler_status_t @@ -1101,12 +888,12 @@ list_metrics_iterate_agents(rocprofiler_agent_version_t, auto* agent_node_id = static_cast(user_data); for(size_t i = 0; i < num_counters; i++) { - rocprofiler_counter_info_v0_t counter_info; - auto dimensions = std::vector{}; - ROCPROFILER_CALL( - rocprofiler_iterate_counter_dimensions( - counters[i], dimensions_info_callback, static_cast(&dimensions)), - "iterate_dimension_info"); + auto counter_info = rocprofiler_counter_info_v0_t{}; + auto dimensions = std::vector{}; + + ROCPROFILER_CALL(rocprofiler_iterate_counter_dimensions( + counters[i], dimensions_info_callback, &dimensions), + "iterate_dimension_info"); ROCPROFILER_CALL( rocprofiler_query_counter_info(counters[i], @@ -1232,35 +1019,6 @@ finalize_rocprofv3(std::string_view context) } } -timestamps_t* -get_app_timestamps() -{ - return stats_timestamp; -} - -void -init_tool_table() -{ - // agent and timestamp functions - tool_functions->tool_get_agent_node_id_fn = get_agent_node_id; - tool_functions->tool_get_app_timestamps_fn = get_app_timestamps; - - // name functions - tool_functions->tool_get_domain_name_fn = get_domain_name; - tool_functions->tool_get_kernel_name_fn = get_kernel_name; - tool_functions->tool_get_operation_name_fn = get_operation_name; - tool_functions->tool_get_counter_info_name_fn = get_counter_info_name; - tool_functions->tool_get_callback_kind_fn = get_callback_kind; - tool_functions->tool_get_callback_op_name_fn = get_callback_op_name; - tool_functions->tool_get_roctx_msg_fn = get_roctx_msg; -} - -void -fini_tool_table() -{ - *tool_functions = tool_table{}; -} - int tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) { @@ -1269,9 +1027,7 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) constexpr uint64_t buffer_size = 32 * common::units::KiB; constexpr uint64_t buffer_watermark = 31 * common::units::KiB; - rocprofiler_get_timestamp(&(stats_timestamp->app_start_time)); - - init_tool_table(); + tool_metadata->init(tool::metadata::inprocess{}); ROCPROFILER_CALL(rocprofiler_create_context(&get_client_ctx()), "create context failed"); @@ -1521,6 +1277,9 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) ROCPROFILER_CALL(rocprofiler_start_context(get_client_ctx()), "start context failed"); + tool_metadata->process_id = getpid(); + rocprofiler_get_timestamp(&(tool_metadata->process_start_ns)); + return 0; } @@ -1539,14 +1298,13 @@ api_registration_callback(rocprofiler_intercept_table_t, "Iterate rocporfiler agents") } -using stats_data_t = ::rocprofiler::tool::stats_data_t; -using stats_entry_t = ::rocprofiler::tool::stats_entry_t; -using domain_stats_vec_t = ::rocprofiler::tool::domain_stats_vec_t; +using stats_data_t = tool::stats_data_t; +using stats_entry_t = tool::stats_entry_t; +using domain_stats_vec_t = tool::domain_stats_vec_t; template void -generate_output(rocprofiler::tool::buffered_output& output_v, - domain_stats_vec_t& contributions_v) +generate_output(tool::buffered_output& output_v, domain_stats_vec_t& contributions_v) { if(!output_v) return; @@ -1554,7 +1312,8 @@ generate_output(rocprofiler::tool::buffered_output& output_v, if(tool::get_config().stats || tool::get_config().summary_output) { - output_v.stats = rocprofiler::tool::generate_stats(tool_functions, output_v.element_data); + output_v.stats = + tool::generate_stats(tool::get_config(), *tool_metadata, output_v.get_generator()); } if(output_v.stats) @@ -1564,7 +1323,8 @@ generate_output(rocprofiler::tool::buffered_output& output_v, if(tool::get_config().csv_output) { - rocprofiler::tool::generate_csv(tool_functions, output_v.element_data, output_v.stats); + tool::generate_csv( + tool::get_config(), *tool_metadata, output_v.get_generator(), output_v.stats); } } @@ -1574,40 +1334,38 @@ tool_fini(void* /*tool_data*/) client_identifier = nullptr; client_finalizer = nullptr; - rocprofiler_get_timestamp(&(stats_timestamp->app_end_time)); + tool_metadata->process_id = getpid(); + rocprofiler_get_timestamp(&(tool_metadata->process_end_ns)); flush(); rocprofiler_stop_context(get_client_ctx()); flush(); auto kernel_dispatch_output = - kernel_dispatch_buffered_output_t{tool::get_config().kernel_trace}; - auto hsa_output = hsa_buffered_output_t{tool::get_config().hsa_core_api_trace || - tool::get_config().hsa_amd_ext_api_trace || - tool::get_config().hsa_image_ext_api_trace || - tool::get_config().hsa_finalizer_ext_api_trace}; - auto hip_output = hip_buffered_output_t{tool::get_config().hip_runtime_api_trace || - tool::get_config().hip_compiler_api_trace}; - auto memory_copy_output = memory_copy_buffered_output_t{tool::get_config().memory_copy_trace}; - auto marker_output = marker_buffered_output_t{tool::get_config().marker_api_trace}; + tool::kernel_dispatch_buffered_output_t{tool::get_config().kernel_trace}; + auto hsa_output = tool::hsa_buffered_output_t{tool::get_config().hsa_core_api_trace || + tool::get_config().hsa_amd_ext_api_trace || + tool::get_config().hsa_image_ext_api_trace || + tool::get_config().hsa_finalizer_ext_api_trace}; + auto hip_output = tool::hip_buffered_output_t{tool::get_config().hip_runtime_api_trace || + tool::get_config().hip_compiler_api_trace}; + auto memory_copy_output = + tool::memory_copy_buffered_output_t{tool::get_config().memory_copy_trace}; + auto marker_output = tool::marker_buffered_output_t{tool::get_config().marker_api_trace}; auto counters_output = - counter_collection_buffered_output_t{tool::get_config().counter_collection}; + tool::counter_collection_buffered_output_t{tool::get_config().counter_collection}; auto scratch_memory_output = - scratch_memory_buffered_output_t{tool::get_config().scratch_memory_trace}; - auto rccl_output = rccl_buffered_output_t{tool::get_config().rccl_api_trace}; + tool::scratch_memory_buffered_output_t{tool::get_config().scratch_memory_trace}; + auto rccl_output = tool::rccl_buffered_output_t{tool::get_config().rccl_api_trace}; auto node_id_sort = [](const auto& lhs, const auto& rhs) { return lhs.node_id < rhs.node_id; }; - auto _agents = std::vector{}; - _agents.reserve(agent_info->size()); - for(auto& itr : *agent_info) - _agents.emplace_back(itr.second); - + auto _agents = CHECK_NOTNULL(tool_metadata)->agents; std::sort(_agents.begin(), _agents.end(), node_id_sort); if(tool::get_config().csv_output) { - rocprofiler::tool::generate_csv(tool_functions, _agents); + tool::generate_csv(tool::get_config(), *tool_metadata, _agents); } auto contributions = domain_stats_vec_t{}; @@ -1623,58 +1381,72 @@ tool_fini(void* /*tool_data*/) if(tool::get_config().stats && tool::get_config().csv_output) { - rocprofiler::tool::generate_csv(tool_functions, contributions); + tool::generate_csv(tool::get_config(), *tool_metadata, contributions); } if(tool::get_config().json_output) { - auto _counters = get_tool_counter_info(); - rocprofiler::tool::write_json(tool_functions, - getpid(), - contributions, - _agents, - _counters, - &hip_output.element_data, - &hsa_output.element_data, - &kernel_dispatch_output.element_data, - &memory_copy_output.element_data, - &counters_output.element_data, - &marker_output.element_data, - &scratch_memory_output.element_data, - &rccl_output.element_data); + auto json_ar = tool::open_json(tool::get_config()); + + json_ar.start_process(); + tool::write_json(json_ar, tool::get_config(), *tool_metadata, getpid()); + tool::write_json(json_ar, + tool::get_config(), + *tool_metadata, + contributions, + hip_output.get_generator(), + hsa_output.get_generator(), + kernel_dispatch_output.get_generator(), + memory_copy_output.get_generator(), + counters_output.get_generator(), + marker_output.get_generator(), + scratch_memory_output.get_generator(), + rccl_output.get_generator()); + json_ar.finish_process(); + + tool::close_json(json_ar); } if(tool::get_config().pftrace_output) { - rocprofiler::tool::write_perfetto(tool_functions, - getpid(), - _agents, - &hip_output.element_data, - &hsa_output.element_data, - &kernel_dispatch_output.element_data, - &memory_copy_output.element_data, - &marker_output.element_data, - &scratch_memory_output.element_data, - &rccl_output.element_data); + tool::write_perfetto(tool::get_config(), + *tool_metadata, + _agents, + hip_output.get_generator(), + hsa_output.get_generator(), + kernel_dispatch_output.get_generator(), + memory_copy_output.get_generator(), + marker_output.get_generator(), + scratch_memory_output.get_generator(), + rccl_output.get_generator()); } if(tool::get_config().otf2_output) { - rocprofiler::tool::write_otf2(tool_functions, - getpid(), - _agents, - &hip_output.element_data, - &hsa_output.element_data, - &kernel_dispatch_output.element_data, - &memory_copy_output.element_data, - &marker_output.element_data, - &scratch_memory_output.element_data, - &rccl_output.element_data); + auto hip_elem_data = hip_output.load_all(); + auto hsa_elem_data = hsa_output.load_all(); + auto kernel_dispatch_elem_data = kernel_dispatch_output.load_all(); + auto memory_copy_elem_data = memory_copy_output.load_all(); + auto marker_elem_data = marker_output.load_all(); + auto scratch_memory_elem_data = scratch_memory_output.load_all(); + auto rccl_elem_data = rccl_output.load_all(); + + tool::write_otf2(tool::get_config(), + *tool_metadata, + getpid(), + _agents, + &hip_elem_data, + &hsa_elem_data, + &kernel_dispatch_elem_data, + &memory_copy_elem_data, + &marker_elem_data, + &scratch_memory_elem_data, + &rccl_elem_data); } if(tool::get_config().summary_output) { - rocprofiler::tool::generate_stats(tool_functions, contributions); + tool::generate_stats(tool::get_config(), *tool_metadata, contributions); } auto destroy_output = [](auto& _buffered_output_v) { _buffered_output_v.destroy(); }; @@ -1688,7 +1460,6 @@ tool_fini(void* /*tool_data*/) destroy_output(scratch_memory_output); destroy_output(rccl_output); - fini_tool_table(); if(destructors) { for(const auto& itr : *destructors) @@ -1703,86 +1474,15 @@ tool_fini(void* /*tool_data*/) } } // namespace -std::map -get_callback_roctx_msg() -{ - auto _data = marker_msg_data->rlock([](const auto& _data_v) { return _data_v; }); - auto _ret = std::map{}; - for(const auto& itr : _data) - _ret.emplace(itr.first, itr.second); - return _ret; -} - -std::vector -get_kernel_symbol_data() -{ - auto _data = kernel_data->rlock([](const auto& _data_v) { - auto _info = std::vector{}; - _info.reserve(_data_v.size()); - for(const auto& itr : _data_v) - _info.emplace_back(itr.second); - return _info; - }); - - uint64_t kernel_data_size = 0; - for(const auto& itr : _data) - kernel_data_size = std::max(kernel_data_size, itr.kernel_id); - - auto _symbol_data = std::vector{}; - _symbol_data.resize(kernel_data_size + 1, kernel_symbol_data{}); - // index by the kernel id - for(auto& itr : _data) - _symbol_data.at(itr.kernel_id) = std::move(itr); - - return _symbol_data; -} - -std::vector -get_code_object_data() -{ - auto _data = code_obj_data->rlock([](const auto& _data_v) { - auto _info = std::vector{}; - _info.reserve(_data_v.size()); - for(const auto& itr : _data_v) - _info.emplace_back(itr.second); - return _info; - }); - - uint64_t _sz = 0; - for(const auto& itr : _data) - _sz = std::max(_sz, itr.code_object_id); - - auto _code_obj_data = std::vector{}; - _code_obj_data.resize(_sz + 1, rocprofiler_code_object_data_t{}); - // index by the code object id - for(auto& itr : _data) - _code_obj_data.at(itr.code_object_id) = itr; - - return _code_obj_data; -} - -std::vector -get_tool_counter_info() -{ - auto _data = get_agent_counter_info(get_gpu_agents()); - auto _ret = std::vector{}; - for(const auto& itr : _data) - { - for(const auto& iitr : itr.second) - _ret.emplace_back(iitr); - } - return _ret; -} - std::vector get_tool_counter_dimension_info() { - auto _data = get_agent_counter_info(get_gpu_agents()); + auto _data = get_agent_counter_info(); auto _ret = std::vector{}; for(const auto& itr : _data) { for(const auto& iitr : itr.second) - for(const auto& ditr : iitr.dimension_info) + for(const auto& ditr : iitr.dimensions) _ret.emplace_back(ditr); } @@ -1812,8 +1512,7 @@ get_main_function() return user_main; } -bool signal_handler_exit = - rocprofiler::tool::get_env("ROCPROF_INTERNAL_TEST_SIGNAL_HANDLER_VIA_EXIT", false); +bool signal_handler_exit = tool::get_env("ROCPROF_INTERNAL_TEST_SIGNAL_HANDLER_VIA_EXIT", false); } // namespace #define ROCPROFV3_INTERNAL_API __attribute__((visibility("internal"))); @@ -1860,41 +1559,24 @@ rocprofiler_configure(uint32_t version, uint32_t patch = version % 100; // ensure these pointers are not leaked - add_destructor(buffered_name_info); - add_destructor(callback_name_info); - add_destructor(marker_msg_data); - add_destructor(code_obj_data); - add_destructor(kernel_data); - add_destructor(tool_functions); - add_destructor(agent_info); - add_destructor(stats_timestamp); + add_destructor(tool_metadata); // in case main wrapper is not used ::atexit([]() { finalize_rocprofv3("atexit"); }); + tool::get_tmp_file_name_callback() = [](domain_type type) -> std::string { + return compose_tmp_file_name(tool::get_config(), type); + }; + if(tool::get_config().list_metrics) { + tool_metadata->init(tool::metadata::inprocess{}); ROCPROFILER_CALL(rocprofiler_at_intercept_table_registration( api_registration_callback, ROCPROFILER_HSA_TABLE, nullptr), "api registration"); return nullptr; } - ROCPROFILER_CALL( - rocprofiler_query_available_agents( - ROCPROFILER_AGENT_INFO_VERSION_0, - [](rocprofiler_agent_version_t, const void** agents, size_t num_agents, void*) { - for(size_t i = 0; i < num_agents; ++i) - { - auto* agent = static_cast(agents[i]); - agent_info->emplace(agent->id, *agent); - } - return ROCPROFILER_STATUS_SUCCESS; - }, - sizeof(rocprofiler_agent_t), - nullptr), - "Iterate rocporfiler agents") - ROCP_INFO << id->name << " is using rocprofiler-sdk v" << major << "." << minor << "." << patch << " (" << runtime_version << ")"; diff --git a/source/lib/rocprofiler-sdk/hsa/async_copy.cpp b/source/lib/rocprofiler-sdk/hsa/async_copy.cpp index b820b10287..a8e4c4da80 100644 --- a/source/lib/rocprofiler-sdk/hsa/async_copy.cpp +++ b/source/lib/rocprofiler-sdk/hsa/async_copy.cpp @@ -257,8 +257,13 @@ active_signals::sync() { if(m_signal.handle == 0) return; +#if defined(ROCPROFILER_CI_STRICT_TIMESTAMPS) && ROCPROFILER_CI_STRICT_TIMESTAMPS > 0 + constexpr auto timeout_sec = std::chrono::seconds{5}; +#else // wait a maximum of thirty seconds constexpr auto timeout_sec = std::chrono::seconds{30}; +#endif + constexpr auto timeout = std::chrono::duration_cast(timeout_sec).count(); diff --git a/source/lib/rocprofiler-sdk/tracing/profiling_time.hpp b/source/lib/rocprofiler-sdk/tracing/profiling_time.hpp index 29e3ac2886..059c2712d5 100644 --- a/source/lib/rocprofiler-sdk/tracing/profiling_time.hpp +++ b/source/lib/rocprofiler-sdk/tracing/profiling_time.hpp @@ -105,7 +105,6 @@ adjust_profiling_time(std::string_view _label, _label, _label, _value.start, - _label, _bounds.start, (_bounds.start - _value.start)); @@ -116,7 +115,6 @@ adjust_profiling_time(std::string_view _label, _label, _label, _value.end, - _label, _bounds.end, (_value.end - _bounds.end)); } diff --git a/source/scripts/generate-rocpd.py b/source/scripts/generate-rocpd.py new file mode 100755 index 0000000000..6467f27726 --- /dev/null +++ b/source/scripts/generate-rocpd.py @@ -0,0 +1,533 @@ +#!/usr/bin/env python3 +# MIT License +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import sys +import json +import time +import sqlite3 +import argparse + +__author__ = "AMD" +__copyright__ = "Copyright 2023, Advanced Micro Devices, Inc." +__license__ = "MIT" +__maintainer__ = "AMD" +__status__ = "Development" + +""" +This script converts one or more JSON output files from rocprofv3 into a +single SQLite database conforming to the rocpd SQL Schema. +""" + +# this is the list of APIs whose records are inserted into API table which +# needs to be updated whenever tracing support for a new API is added +rocprofv3_apis = ("hip_api", "hsa_api", "marker_api", "rccl_api") + + +class dotdict(dict): + """dot.notation access to dictionary attributes""" + + __getattr__ = dict.get + __setattr__ = dict.__setitem__ + __delattr__ = dict.__delitem__ + + def __init__(self, d): + super(dotdict, self).__init__(d) + for k, v in self.items(): + if isinstance(v, dict): + self.__setitem__(k, dotdict(v)) + elif isinstance(v, (list, tuple)): + self.__setitem__( + k, + [dotdict(i) if isinstance(i, (list, tuple, dict)) else i for i in v], + ) + + +def dump_table(table): + cursor.execute(f"SELECT * FROM {table};") + results = cursor.fetchall() + print(f"\n\n##### {table} #####\n") + for itr in results: + print(" | {}".format(" | ".join([f"{val}" for val in list(itr)]))) + print("") + + +def execute_raw_sql_statements(cursor, statements): + """Helper function for executing a sequence of raw SQL statements""" + + for itr in [ + "{};".format(itr.strip()) for itr in statements.strip().split(";") if itr + ]: + try: + cursor.execute(f"{itr}") + except sqlite3.Error as err: + sys.stderr.write(f"SQLite3 error: {err}\nStatement:\n\t{itr}\n") + sys.stderr.flush() + raise err + + +def create_schema(cursor): + + # Create table + table_schema = """ + CREATE TABLE IF NOT EXISTS "rocpd_metadata" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "tag" varchar(4096) NOT NULL, "value" varchar(4096) NOT NULL); + CREATE TABLE IF NOT EXISTS "rocpd_string" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "string" varchar(4096) NOT NULL UNIQUE ON CONFLICT IGNORE); + CREATE TABLE IF NOT EXISTS "rocpd_op" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "gpuId" integer NOT NULL, "queueId" integer NOT NULL, "sequenceId" integer NOT NULL, "completionSignal" varchar(18) NOT NULL, "start" integer NOT NULL, "end" integer NOT NULL, "description_id" integer NOT NULL REFERENCES "rocpd_string" ("id") DEFERRABLE INITIALLY DEFERRED, "opType_id" integer NOT NULL REFERENCES "rocpd_string" ("id") DEFERRABLE INITIALLY DEFERRED); + CREATE TABLE IF NOT EXISTS "rocpd_api" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "pid" integer NOT NULL, "tid" integer NOT NULL, "start" integer NOT NULL, "end" integer NOT NULL, "apiName_id" integer NOT NULL REFERENCES "rocpd_string" ("id") DEFERRABLE INITIALLY DEFERRED, "args_id" integer NOT NULL REFERENCES "rocpd_string" ("id") DEFERRABLE INITIALLY DEFERRED); + CREATE TABLE IF NOT EXISTS "rocpd_api_ops" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "api_id" integer NOT NULL REFERENCES "rocpd_api" ("id") DEFERRABLE INITIALLY DEFERRED, "op_id" integer NOT NULL REFERENCES "rocpd_op" ("id") DEFERRABLE INITIALLY DEFERRED); + -- optional + CREATE TABLE IF NOT EXISTS "rocpd_kernelcodeobject" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "vgpr" integer NOT NULL, "sgpr" integer NOT NULL, "fbar" integer NOT NULL, "kernel_id" integer NOT NULL); + CREATE TABLE IF NOT EXISTS "rocpd_kernelapi" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "api_ptr_id" integer NOT NULL REFERENCES "rocpd_api" ("id") DEFERRABLE INITIALLY DEFERRED, "stream" varchar(18) NOT NULL, "gridX" integer NOT NULL, "gridY" integer NOT NULL, "gridZ" integer NOT NULL, "workgroupX" integer NOT NULL, "workgroupY" integer NOT NULL, "workgroupZ" integer NOT NULL, "groupSegmentSize" integer NOT NULL, "privateSegmentSize" integer NOT NULL, "kernelArgAddress" varchar(18) NOT NULL, "aquireFence" varchar(8) NOT NULL, "releaseFence" varchar(8) NOT NULL, "codeObject_id" integer NOT NULL REFERENCES "rocpd_kernelcodeobject" ("id") DEFERRABLE INITIALLY DEFERRED, "kernelName_id" integer NOT NULL REFERENCES "rocpd_string" ("id") DEFERRABLE INITIALLY DEFERRED); + CREATE TABLE IF NOT EXISTS "rocpd_copyapi" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "api_ptr_id" integer NOT NULL REFERENCES "rocpd_api" ("id") DEFERRABLE INITIALLY DEFERRED, "stream" varchar(18) NOT NULL, "size" integer NOT NULL, "width" integer NOT NULL, "height" integer NOT NULL, "kind" integer NOT NULL, "dst" varchar(18) NOT NULL, "src" varchar(18) NOT NULL, "dstDevice" integer NOT NULL, "srcDevice" integer NOT NULL, "sync" bool NOT NULL, "pinned" bool NOT NULL); + + INSERT INTO "rocpd_metadata"(tag, value) VALUES ("schema_version", "2"); + + --CREATE TABLE IF NOT EXISTS "rocpd_monitor" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "deviceType" varchar(16) NOT NULL, "deviceId" integer NOT NULL, "monitorType" varchar(16) NOT NULL, "start" integer NOT NULL, "end" integer NOT NULL, "value" varchar(255) NOT NULL); + --CREATE TABLE IF NOT EXISTS "rocpd_barrierop" ("op_ptr_id" integer NOT NULL PRIMARY KEY REFERENCES "rocpd_op" ("id") DEFERRABLE INITIALLY DEFERRED, "signalCount" integer NOT NULL, "aquireFence" varchar(8) NOT NULL, "releaseFence" varchar(8) NOT NULL); + --CREATE TABLE IF NOT EXISTS "rocpd_op_inputSignals" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "from_op_id" integer NOT NULL REFERENCES "rocpd_op" ("id") DEFERRABLE INITIALLY DEFERRED, "to_op_id" integer NOT NULL REFERENCES "rocpd_op" ("id") DEFERRABLE INITIALLY DEFERRED); + """ + + execute_raw_sql_statements(cursor, table_schema) + + +def finalize_schema(cursor): + + table_schema = """ + CREATE VIEW api AS SELECT rocpd_api.id,pid,tid,start,end,A.string AS apiName, B.string AS args FROM rocpd_api + INNER JOIN rocpd_string A ON A.id = rocpd_api.apiName_id + INNER JOIN rocpd_string B ON B.id = rocpd_api.args_id; + CREATE VIEW op AS SELECT rocpd_op.id,gpuId,queueId,sequenceId,start,end,A.string AS description, B.string AS opType FROM rocpd_op + INNER JOIN rocpd_string A ON A.id = rocpd_op.description_id + INNER JOIN rocpd_string B ON B.id = rocpd_op.opType_id; + CREATE VIEW busy AS SELECT A.gpuId, GpuTime, WallTime, GpuTime*1.0/WallTime AS Busy FROM (SELECT gpuId, sum(end-start) AS GpuTime FROM rocpd_op GROUP BY gpuId) A + INNER JOIN (SELECT max(end) - min(start) AS WallTime FROM rocpd_op); + CREATE VIEW top AS SELECT C.string AS Name, count(C.string) AS TotalCalls, sum(A.end-A.start) / 1000 AS TotalDuration, (sum(A.end-A.start)/count(C.string))/ 1000 AS Ave, sum(A.end-A.start) * 100.0 / (SELECT sum(A.end-A.start) FROM rocpd_op A) AS Percentage FROM (SELECT opType_id AS name_id, start, end FROM rocpd_op WHERE description_id in (SELECT id FROM rocpd_string WHERE string='') + UNION SELECT description_id, start, end FROM rocpd_op WHERE description_id not in (SELECT id FROM rocpd_string WHERE string='')) A + JOIN rocpd_string C on C.id = A.name_id GROUP BY Name ORDER BY TotalDuration desc; + CREATE VIEW ktop AS SELECT C.string AS Name, count(C.string) AS TotalCalls, sum(A.end-A.start) / 1000 AS TotalDuration, (sum(A.end-A.start)/count(C.string))/ 1000 AS Ave, sum(A.end-A.start) * 100.0 / (SELECT sum(A.end-A.start) FROM rocpd_api A + JOIN rocpd_kernelapi B on B.api_ptr_id = A.id) AS Percentage FROM rocpd_api A + JOIN rocpd_kernelapi B on B.api_ptr_id = A.id + JOIN rocpd_string C on C.id = B.kernelname_id GROUP BY Name ORDER BY TotalDuration desc; + CREATE VIEW kernel AS SELECT B.id, gpuId, queueId, sequenceId, start, end, (end-start) AS duration, stream, gridX, gridY, gridz, workgroupX, workgroupY, workgroupZ, groupSegmentSize, privateSegmentSize, D.string AS kernelName FROM rocpd_api_ops A + JOIN rocpd_op B on B.id = A.op_id + JOIN rocpd_kernelapi C ON C.api_ptr_id = A.api_id + JOIN rocpd_string D on D.id = kernelName_id; + CREATE VIEW copy AS SELECT B.id, pid, tid, start, end, C.string AS apiName, stream, size, width, height, kind, dst, src, dstDevice, srcDevice, sync, pinned FROM rocpd_copyApi A + JOIN rocpd_api B ON B.id = A.api_ptr_id + JOIN rocpd_string C on C.id = B.apiname_id; + CREATE VIEW copyop AS SELECT B.id, gpuId, queueId, sequenceId, B.start, B.end, (B.end-B.start) AS duration, stream, size, width, height, kind, dst, src, dstDevice, srcDevice, sync, pinned, E.string AS apiName FROM rocpd_api_ops A + JOIN rocpd_op B ON B.id = A.op_id + JOIN rocpd_copyapi C ON C.api_ptr_id = A.api_id + JOIN rocpd_api D on D.id = A.api_id + JOIN rocpd_string E ON E.id = D.apiName_id; + """ + + execute_raw_sql_statements(cursor, table_schema) + + +def normalize_timestamps(itr): + """Make all timestamps relative to the time of rocprofv3 initialization within the application""" + + def _normalize_timestamp_impl(value): + return value - itr.metadata.init_time + + min_val = None + for aitr in [ + "hip_api", + "hsa_api", + "marker_api", + "rccl_api", + "kernel_dispatch", + "memory_copy", + ]: + for ritr in itr.buffer_records[aitr]: + ritr.start_timestamp = _normalize_timestamp_impl(ritr.start_timestamp) + ritr.end_timestamp = _normalize_timestamp_impl(ritr.end_timestamp) + min_val = ( + min([ritr.start_timestamp, min_val]) + if min_val is not None + else ritr.start_timestamp + ) + + print(f" - starting timestamp normalized down to a minimum of {min_val} nsec") + sys.stdout.flush() + + return itr + + +def insert_strings(cursor, itr): + """Populate the strings table with all the strings which will be referenced by various records""" + + strings = [] + + def append_strings(*args): + nonlocal strings + + for aitr in args: + if isinstance(aitr, list): + strings += aitr + else: + strings.append(aitr) + + append_strings("UserMarker") + + for aitr in itr.agents: + append_strings(aitr.name, aitr.vendor_name, aitr.product_name, aitr.model_name) + for ritr in itr.strings.callback_records: + append_strings(ritr.kind, ritr.operations) + for ritr in itr.strings.buffer_records: + append_strings(ritr.kind, ritr.operations) + for ritr in itr.strings.marker_api: + append_strings(ritr.value) + for ritr in itr.strings.counters.dimension_ids: + append_strings(ritr.name) + for ritr in itr.strings.correlation_id.external: + append_strings(ritr.value) + for ritr in itr.kernel_symbols: + append_strings(ritr.kernel_name) + append_strings(ritr.formatted_kernel_name) + append_strings(ritr.demangled_kernel_name) + append_strings(ritr.truncated_kernel_name) + for ritr in itr.code_objects: + append_strings(ritr.uri) + + for itr in sorted(list(set(strings))): + cursor.execute(f"""INSERT INTO rocpd_string (string) VALUES ('{itr}')""") + + +def insert_api_data(cursor, itr, corr_id_offset, **kwargs): + """Add all the HIP, HSA, marker, and RCCL API records to the database. + Eventually we might want to abstract a way to iterate over the APIs covered + here instead of maintaining an explicit list. + """ + + marker_message_strings = dict( + [[eitr.key, eitr.value] for eitr in itr.strings.marker_api] + ) + + def get_api_name(kind, op): + return itr.strings.buffer_records[kind].operations[op] + + def get_marker_message(name, corr_id): + return marker_message_strings.get(corr_id, name) + + max_corr_id = 0 + for aitr in rocprofv3_apis: + for hitr in itr.buffer_records[aitr]: + corr_id = hitr.correlation_id + corr_id.internal += corr_id_offset + name = None + args = None + + if aitr == "marker_api": + apiname = get_api_name(hitr.kind, hitr.operation) + message = get_marker_message(apiname, corr_id.internal) + mode = kwargs.get("marker_mode", "message") + assert mode in ("message", "generic", "api") + if mode == "message": + name = message + args = 1 + elif mode == "api": + name = apiname + args = f"(SELECT id FROM rocpd_string WHERE string = '{message}')" + elif mode == "generic": + name = "UserMarker" + args = f"(SELECT id FROM rocpd_string WHERE string = '{message}')" + else: + name = get_api_name(hitr.kind, hitr.operation) + args = 1 + + assert name is not None + assert args is not None + cursor.execute( + f"""INSERT INTO rocpd_api(id, pid, tid, start, end, apiName_id, args_id) + VALUES ({corr_id.internal}, + {itr.metadata.pid}, + {hitr.thread_id}, + {hitr.start_timestamp}, + {hitr.end_timestamp}, + (SELECT id FROM rocpd_string WHERE string = '{name}'), + {args}); + """ + ) + max_corr_id = max([max_corr_id, corr_id.internal]) + + return max_corr_id + + +def insert_async_data(cursor, itr, corr_id_offset, op_id_offset): + """Add all the kernel and memory copy records to the database. + Eventually we might want to handle page-migration, scratch-memory, etc. but, + at present, rocpd_schema does not support it. + """ + + external_corr_id_strings = dict( + [[eitr.key, eitr.value] for eitr in itr.strings.correlation_id.external] + ) + + def get_api_name(kind, op=None): + return ( + itr.strings.buffer_records[kind].operations[op] + if op is not None + else itr.strings.buffer_records[kind].kind + ) + + def get_kernel_symbol(kernid): + return itr.kernel_symbols[kernid] + + def get_kernel_name(kernid, externid): + if externid > 0: + return external_corr_id_strings[externid] + return get_kernel_symbol(kernid).formatted_kernel_name + + def get_agent_id(agent_id): + for aitr in itr.agents: + if aitr.id.handle == agent_id.handle: + return aitr.node_id + return None + + for kitr in itr.kernel_symbols: + sgpr = kitr.sgpr_count if "sgpr_count" in kitr.keys() else 0 + arch_vgpr = kitr.arch_vgpr_count if "arch_vgpr_count" in kitr.keys() else 0 + accum_vgpr = kitr.accum_vgpr_count if "accum_vgpr_count" in kitr.keys() else 0 + vgpr = arch_vgpr + accum_vgpr + + cursor.execute( + f"""INSERT INTO rocpd_kernelcodeobject(vgpr, sgpr, fbar, kernel_id) + VALUES ({vgpr}, {sgpr}, 0, {kitr.kernel_id}); + """ + ) + + op_id = op_id_offset + for kitr in itr.buffer_records.kernel_dispatch: + kind_name = get_api_name(kitr.kind) + info = kitr.dispatch_info + kernel_id = info.kernel_id + queue_id = info.queue_id.handle + corr_id = kitr.correlation_id + grid = info.grid_size + workgroup = info.workgroup_size + kern_name = get_kernel_name(kernel_id, corr_id.external) + gpu_id = get_agent_id(info.agent_id) + ksym = get_kernel_symbol(kernel_id) + kernel_arg_addr = "{:#x}".format(ksym.kernel_object) + corr_id.internal += corr_id_offset + + cursor.execute( + f"""INSERT INTO rocpd_kernelapi(api_ptr_id, + stream, + gridX, gridY, gridZ, + workgroupX, workgroupY, workgroupZ, + groupSegmentSize, privateSegmentSize, + kernelArgAddress, aquireFence, releaseFence, + codeObject_id, kernelName_id) + VALUES ({corr_id.internal}, + {queue_id}, + {grid.x}, {grid.y}, {grid.z}, + {workgroup.x}, {workgroup.y}, {workgroup.z}, + {info.group_segment_size}, {info.private_segment_size}, + '{kernel_arg_addr}', '', '', + (SELECT id FROM rocpd_kernelcodeobject WHERE kernel_id = {ksym.kernel_id}), + (SELECT id FROM rocpd_string WHERE string = '{kern_name}')); + """ + ) + cursor.execute( + f"""INSERT INTO rocpd_op(id, gpuId, queueId, sequenceId, completionSignal, start, end, description_id, opType_id) + VALUES ({op_id}, + {gpu_id}, + {queue_id}, + {corr_id.internal}, + "", + {kitr.start_timestamp}, + {kitr.end_timestamp}, + (SELECT id FROM rocpd_string WHERE string = '{kern_name}'), + (SELECT id FROM rocpd_string WHERE string = '{kind_name}')); + """ + ) + cursor.execute( + f"""INSERT INTO rocpd_api_ops(api_id, op_id) + VALUES ({corr_id.internal}, + {op_id}); + """ + ) + op_id += 1 + + for mitr in itr.buffer_records.memory_copy: + kind_name = get_api_name(mitr.kind) + op_name = get_api_name(mitr.kind, mitr.operation) + dst_id = get_agent_id(mitr.dst_agent_id) + src_id = get_agent_id(mitr.src_agent_id) + corr_id = mitr.correlation_id + synced = False + pinned = False + corr_id.internal += corr_id_offset + + cursor.execute( + f"""INSERT INTO rocpd_copyapi(api_ptr_id, stream, size, width, height, kind, src, dst, srcDevice, dstDevice, sync, pinned) + VALUES ({corr_id.internal}, + "", + {mitr.bytes}, + {mitr.bytes}, + 1, + (SELECT id FROM rocpd_string WHERE string = '{op_name}'), + "", + "", + {src_id}, + {dst_id}, + {synced}, + {pinned}); + """ + ) + cursor.execute( + f"""INSERT INTO rocpd_op(id, gpuId, queueId, sequenceId, completionSignal, start, end, description_id, opType_id) + VALUES ({op_id}, + {dst_id}, + 0, + {corr_id.internal}, + "", + {mitr.start_timestamp}, + {mitr.end_timestamp}, + (SELECT id FROM rocpd_string WHERE string = '{op_name}'), + (SELECT id FROM rocpd_string WHERE string = '{kind_name}')); + """ + ) + cursor.execute( + f"""INSERT INTO rocpd_api_ops(api_id, op_id) + VALUES ({corr_id.internal}, + {op_id}); + """ + ) + op_id += 1 + + return op_id + + +if __name__ == "__main__": + + rocpd_tables = [ + "metadata", + "string", + "api", + "op", + "api_ops", + "copyapi", + "kernelapi", + "kernelcodeobject", + ] + + parser = argparse.ArgumentParser() + parser.add_argument( + "-i", + "--input", + help="Input rocprofv3 JSON files", + type=str, + nargs="+", + ) + parser.add_argument( + "-o", "--output", help="Output database name", type=str, default="example.db" + ) + parser.add_argument( + "-n", + "--normalize-timestamps", + help="Normalize timestamps relative to the app start time", + action="store_true", + ) + parser.add_argument( + "-m", + "--marker-mode", + help="'generic' is classical rocpd behavior: all marker regions have 'UserMarker' name with message in args; 'message' uses the message as the region name; 'api' uses the name of the marker function with the message in args", + choices=("generic", "message", "api"), + type=str, + default="message", + ) + parser.add_argument( + "-d", + "--dump-tables", + help="Dump generate rocpd tables to console (for debugging)", + type=str, + default=None, + nargs="*", + choices=set(rocpd_tables), + ) + + args = parser.parse_args(sys.argv[1:]) + + start = time.monotonic_ns() + print(f"Opening '{args.output}'...") + + # Connect to an SQLite database (or create it if it doesn't exist) + conn = sqlite3.connect(args.output) + + # Create a cursor object using the cursor() method + cursor = conn.cursor() + + create_schema(cursor) + + corr_id_offset = 0 + op_id_offset = 0 + for itr in args.input: + print(f"Reading '{itr}'...") + with open(itr, "rb") as f: + data = dotdict(json.load(f))["rocprofiler-sdk-tool"] + for ditr in data: + # normalize the timestamps if requested + ditr = normalize_timestamps(ditr) if args.normalize_timestamps else ditr + + # create the strings table + insert_strings(cursor, ditr) + + # insert the api data + _corr_id_offset = insert_api_data( + cursor, ditr, corr_id_offset, marker_mode=args.marker_mode + ) + + # insert the kernel and memory copy data + _op_id_offset = insert_async_data( + cursor, ditr, corr_id_offset, op_id_offset + ) + + # Save (commit) the changes + conn.commit() + + # update the offsets + corr_id_offset = _corr_id_offset + op_id_offset = _op_id_offset + + if args.dump_tables is not None and len(args.dump_tables) == 0: + args.dump_tables = rocpd_tables + + if args.dump_tables is not None: + for itr in args.dump_tables: + dump_table(f"rocpd_{itr}") + + finalize_schema(cursor) + conn.commit() + + print(f"Closing '{args.output}'...") + # Close the connection + conn.close() + + end = time.monotonic_ns() + elapsed_nsec = end - start + elapsed_sec = elapsed_nsec / 1.0e9 + print(f"Runtime time (nsec): {elapsed_nsec}") + print(f"Runtime time (sec) : {elapsed_sec}")