rocprofv3: refactor and reorganize rocprofiler-sdk-tool library (#1138)
* Add rocprofv3-multi-node.md to source/lib/rocprofiler-sdk-tool
* Initial source re-organization
- create "output" static library
* Update include/rocprofiler-sdk/cxx/serialization.hpp
- add GPR count fields to kernel symbol serialization
* Add source/scripts/generate-rocpd.py
- reads one or more JSON output files from rocprofv3 and writes rocpd SQLite3 database
- Note: preliminary implementation
* More reorganization b/t lib/rocprofiler-sdk-tool and lib/output
* Updates to generate-rocpd.py
- add SQL views
- option: --absolute-timestamps -> --normalize-timestamps
- option: --generic-markers
- misc fixes with regards to getting the views working
- support marker names
* Update generate-rocpd.py
- Add --marker-mode option
* Update generate-rocpd.py
- Improve debugging of bad bulk SQLite statements
* Update rocprofv3-multi-node.md
- cleanup of proposed SQL schema
* lib/output/format_path.{hpp,cpp}
- rename format to format_path (in config.hpp and config.cpp)
- move format_path functionality to format_path.{hpp,cpp}
* Rework lib/output/tmp_file_buffer.{hpp,cpp}
* Update output_key.cpp
- support %cwd%, %launch_date%
* Rework lib/output/buffered_output.hpp
* Support csv_output_file constructed via domain_type
* Update lib/output/domain_type.{hpp,cpp}
- get_domain_trace_file_name
- get_domain_stats_file_name
* Update lib/rocprofiler-sdk-tool/tool.cpp
- tweak headers
* Update lib/output/generate*.cpp
- remove include of helpers.hpp
- CSV uses domain_type for filenames
* Update samples/counter_collection/per_dev_serialization.cpp
- make wait_on volatile
* Remove tool_table from lib/output and lib/rocprofiler-sdk-tool
- Also split various structs into their own files
- lib/output/agent_info
- lib/output/metadata
- lib/output/kernel_symbol_info
- lib/output/counter_info
- Implemented rocprofiler::tool::metadata
* Optimize rocprofiler_tool_counter_collection_record_t
- reduce the size of the struct from 24784 bytes to 8376 bytes
* Introduced output_config
- split subset of config (from tools library) into output_config to be able to configure the output generating functions separately from the tool library
- this is a significant step towards the output generating functions not relying on static global memory
* Stream chunks of data into output instead of loading all info memory
* Remove duplicate group_segment_size in rocprofiler_kernel_dispatch_info_t serialization
* Adding Q&A to rocprofv3-multi-node.md
* Remove all remaining include lib/rocprofiler-sdk-tool from lib/output
- migrated a fair amount of code from lib/rocprofiler-sdk-tool/helper.hpp to lib/output
* Update Q&A of rocprofv3-multi-node.md
* Fix minor compilation errors + minor cleanup
* Update hsa/async_copy.cpp
- when ROCPROFILER_CI_STRICT_TIMESTAMPS > 0, reduce the active_signal sync wait time
* Update profiling_time.hpp
- fix log messages for when start/end time is less/greater than enqueue/current CPU time
* Fix generate_stats for tool_counter_record_t
* Dictionary optimization for generate-rocpd.py
---------
Co-authored-by: SrirakshaNag <104580803+SrirakshaNag@users.noreply.github.com>
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
98858b60ec
Коммит
5eb8c2658c
@@ -49,7 +49,7 @@ target_link_libraries(
|
||||
PRIVATE rocprofiler-sdk::rocprofiler-sdk amd_comgr
|
||||
rocprofiler-sdk::samples-common-library rocprofiler-sdk::samples-build-flags)
|
||||
|
||||
rocprofiler_samples_get_preload_env(PRELOAD_ENV advanced-thread-trace)
|
||||
rocprofiler_samples_get_preload_env(PRELOAD_ENV)
|
||||
|
||||
add_test(NAME advanced-thread-trace COMMAND $<TARGET_FILE:advanced-thread-trace>)
|
||||
|
||||
|
||||
@@ -106,7 +106,7 @@ target_link_libraries(
|
||||
PRIVATE rocprofiler-sdk::samples-common-library rocprofiler-sdk::rocprofiler-sdk
|
||||
amd_comgr rocprofiler-sdk::samples-build-flags)
|
||||
|
||||
rocprofiler_samples_get_preload_env(PRELOAD_ENV code-object-isa-decode)
|
||||
rocprofiler_samples_get_preload_env(PRELOAD_ENV)
|
||||
|
||||
add_test(NAME code-object-isa-decode COMMAND $<TARGET_FILE:code-object-isa-decode>)
|
||||
|
||||
|
||||
@@ -36,13 +36,16 @@
|
||||
} while(0)
|
||||
|
||||
__global__ void
|
||||
kernelA(int* wait_on, int value, int* no_opt)
|
||||
kernelA(int devid, volatile int* wait_on, int value, int* no_opt)
|
||||
{
|
||||
printf("[device=%i][begin] Wait on %i: %i (%i)\n", devid, value, *wait_on, *no_opt);
|
||||
while(*wait_on != value)
|
||||
{
|
||||
(*no_opt)++;
|
||||
};
|
||||
printf("[device=%i][break] Wait on %i: %i (%i)\n", devid, value, *wait_on, *no_opt);
|
||||
(*wait_on)--;
|
||||
printf("[device=%i][return] Wait on %i: %i (%i)\n", devid, value, *wait_on, *no_opt);
|
||||
}
|
||||
|
||||
int
|
||||
@@ -53,17 +56,23 @@ main(int, char**)
|
||||
if(ntotdevice < 2) return 0;
|
||||
|
||||
start();
|
||||
int* check_value = nullptr;
|
||||
int* no_opt = nullptr;
|
||||
volatile int* check_value = nullptr;
|
||||
int* no_opt_0 = nullptr;
|
||||
int* no_opt_1 = nullptr;
|
||||
HIP_CALL(hipMallocManaged(&check_value, sizeof(*check_value)));
|
||||
HIP_CALL(hipMallocManaged(&no_opt, sizeof(*no_opt)));
|
||||
*no_opt = 0;
|
||||
HIP_CALL(hipMallocManaged(&no_opt_0, sizeof(*no_opt_0)));
|
||||
HIP_CALL(hipMallocManaged(&no_opt_1, sizeof(*no_opt_1)));
|
||||
*no_opt_0 = 0;
|
||||
*no_opt_1 = 0;
|
||||
*check_value = 1;
|
||||
|
||||
// Will hang if per-device serialization is not functional
|
||||
HIP_CALL(hipSetDevice(0));
|
||||
hipLaunchKernelGGL(kernelA, dim3(1), dim3(1), 0, 0, check_value, 0, no_opt);
|
||||
hipLaunchKernelGGL(kernelA, dim3(1), dim3(1), 0, 0, 0, check_value, 0, no_opt_0);
|
||||
|
||||
HIP_CALL(hipSetDevice(1));
|
||||
hipLaunchKernelGGL(kernelA, dim3(1), dim3(1), 0, 0, check_value, 1, no_opt);
|
||||
hipLaunchKernelGGL(kernelA, dim3(1), dim3(1), 0, 0, 1, check_value, 1, no_opt_1);
|
||||
|
||||
HIP_CALL(hipSetDevice(0));
|
||||
HIP_CALL(hipDeviceSynchronize());
|
||||
|
||||
|
||||
@@ -172,6 +172,9 @@ save(ArchiveT& ar, rocprofiler_callback_tracing_code_object_kernel_symbol_regist
|
||||
ROCP_SDK_SAVE_DATA_FIELD(kernarg_segment_alignment);
|
||||
ROCP_SDK_SAVE_DATA_FIELD(group_segment_size);
|
||||
ROCP_SDK_SAVE_DATA_FIELD(private_segment_size);
|
||||
ROCP_SDK_SAVE_DATA_FIELD(sgpr_count);
|
||||
ROCP_SDK_SAVE_DATA_FIELD(arch_vgpr_count);
|
||||
ROCP_SDK_SAVE_DATA_FIELD(accum_vgpr_count);
|
||||
}
|
||||
|
||||
template <typename ArchiveT>
|
||||
@@ -301,7 +304,6 @@ save(ArchiveT& ar, rocprofiler_kernel_dispatch_info_t data)
|
||||
ROCP_SDK_SAVE_DATA_FIELD(private_segment_size);
|
||||
ROCP_SDK_SAVE_DATA_FIELD(group_segment_size);
|
||||
ROCP_SDK_SAVE_DATA_FIELD(workgroup_size);
|
||||
ROCP_SDK_SAVE_DATA_FIELD(group_segment_size);
|
||||
ROCP_SDK_SAVE_DATA_FIELD(grid_size);
|
||||
}
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
#
|
||||
set(CMAKE_INSTALL_DEFAULT_COMPONENT_NAME "core")
|
||||
add_subdirectory(common)
|
||||
add_subdirectory(output)
|
||||
add_subdirectory(rocprofiler-sdk)
|
||||
|
||||
set(CMAKE_INSTALL_DEFAULT_COMPONENT_NAME "tools")
|
||||
|
||||
@@ -73,6 +73,9 @@ public:
|
||||
// Do not allow this data structure to be copied, std::move only.
|
||||
Synchronized(const Synchronized&) = delete;
|
||||
|
||||
// return a copy of the data
|
||||
value_type get() const;
|
||||
|
||||
template <typename FuncT, typename... Args>
|
||||
decltype(auto) rlock(FuncT&& lambda, Args&&... args) const;
|
||||
|
||||
@@ -100,6 +103,14 @@ private:
|
||||
//
|
||||
// member definitions
|
||||
//
|
||||
template <typename LockedType, bool IsMappedTypeV>
|
||||
typename Synchronized<LockedType, IsMappedTypeV>::value_type
|
||||
Synchronized<LockedType, IsMappedTypeV>::get() const
|
||||
{
|
||||
auto lock = std::shared_lock{m_mutex};
|
||||
return m_data;
|
||||
}
|
||||
|
||||
template <typename LockedType, bool IsMappedTypeV>
|
||||
template <typename FuncT, typename... Args>
|
||||
decltype(auto)
|
||||
|
||||
@@ -0,0 +1,61 @@
|
||||
#
|
||||
# Tool library used by rocprofiler
|
||||
#
|
||||
|
||||
rocprofiler_activate_clang_tidy()
|
||||
|
||||
set(TOOL_OUTPUT_HEADERS
|
||||
agent_info.hpp
|
||||
buffered_output.hpp
|
||||
counter_info.hpp
|
||||
csv.hpp
|
||||
csv_output_file.hpp
|
||||
domain_type.hpp
|
||||
format_path.hpp
|
||||
generateCSV.hpp
|
||||
generateJSON.hpp
|
||||
generateOTF2.hpp
|
||||
generatePerfetto.hpp
|
||||
generateStats.hpp
|
||||
generator.hpp
|
||||
kernel_symbol_info.hpp
|
||||
metadata.hpp
|
||||
output_config.hpp
|
||||
output_key.hpp
|
||||
output_stream.hpp
|
||||
statistics.hpp
|
||||
timestamps.hpp
|
||||
tmp_file_buffer.hpp
|
||||
tmp_file.hpp)
|
||||
|
||||
set(TOOL_OUTPUT_SOURCES
|
||||
csv_output_file.cpp
|
||||
domain_type.cpp
|
||||
format_path.cpp
|
||||
generateCSV.cpp
|
||||
generateJSON.cpp
|
||||
generateOTF2.cpp
|
||||
generatePerfetto.cpp
|
||||
generateStats.cpp
|
||||
metadata.cpp
|
||||
output_config.cpp
|
||||
output_key.cpp
|
||||
output_stream.cpp
|
||||
statistics.cpp
|
||||
tmp_file_buffer.cpp
|
||||
tmp_file.cpp)
|
||||
|
||||
add_library(rocprofiler-sdk-output-library STATIC)
|
||||
add_library(rocprofiler-sdk::rocprofiler-sdk-output-library ALIAS
|
||||
rocprofiler-sdk-output-library)
|
||||
target_sources(rocprofiler-sdk-output-library PRIVATE ${TOOL_OUTPUT_SOURCES}
|
||||
${TOOL_OUTPUT_HEADERS})
|
||||
target_link_libraries(
|
||||
rocprofiler-sdk-output-library
|
||||
PRIVATE rocprofiler-sdk::rocprofiler-sdk-headers
|
||||
rocprofiler-sdk::rocprofiler-sdk-build-flags
|
||||
rocprofiler-sdk::rocprofiler-sdk-memcheck
|
||||
rocprofiler-sdk::rocprofiler-sdk-common-library
|
||||
rocprofiler-sdk::rocprofiler-sdk-cereal
|
||||
rocprofiler-sdk::rocprofiler-sdk-perfetto
|
||||
rocprofiler-sdk::rocprofiler-sdk-otf2)
|
||||
+39
-16
@@ -22,27 +22,50 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "helper.hpp"
|
||||
#include "statistics.hpp"
|
||||
#include <rocprofiler-sdk/agent.h>
|
||||
#include <rocprofiler-sdk/cxx/serialization.hpp>
|
||||
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include "rocprofiler-sdk/fwd.h"
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace tool
|
||||
{
|
||||
void
|
||||
write_json(tool_table* tool_functions,
|
||||
uint64_t pid,
|
||||
const domain_stats_vec_t& domain_stats,
|
||||
std::vector<rocprofiler_agent_v0_t> agent_data,
|
||||
std::vector<rocprofiler_tool_counter_info_t> counter_data,
|
||||
std::deque<rocprofiler_buffer_tracing_hip_api_record_t>* hip_api_deque,
|
||||
std::deque<rocprofiler_buffer_tracing_hsa_api_record_t>* hsa_api_deque,
|
||||
std::deque<rocprofiler_buffer_tracing_kernel_dispatch_record_t>* kernel_dispatch_deque,
|
||||
std::deque<rocprofiler_buffer_tracing_memory_copy_record_t>* memory_copy_deque,
|
||||
std::deque<rocprofiler_tool_counter_collection_record_t>* counter_collection_deque,
|
||||
std::deque<rocprofiler_buffer_tracing_marker_api_record_t>* marker_api_deque,
|
||||
std::deque<rocprofiler_buffer_tracing_scratch_memory_record_t>* scratch_memory_deque,
|
||||
std::deque<rocprofiler_buffer_tracing_rccl_api_record_t>* rccl_api_deque);
|
||||
struct agent_info : rocprofiler_agent_v0_t
|
||||
{
|
||||
using base_type = rocprofiler_agent_v0_t;
|
||||
|
||||
agent_info(base_type _base)
|
||||
: base_type{_base}
|
||||
{}
|
||||
|
||||
~agent_info() = default;
|
||||
agent_info(const agent_info&) = default;
|
||||
agent_info(agent_info&&) noexcept = default;
|
||||
agent_info& operator=(const agent_info&) = default;
|
||||
agent_info& operator=(agent_info&&) noexcept = default;
|
||||
|
||||
int64_t gpu_index =
|
||||
(base_type::type == ROCPROFILER_AGENT_TYPE_GPU) ? base_type::logical_node_type_id : -1;
|
||||
};
|
||||
|
||||
using agent_info_vec_t = std::vector<agent_info>;
|
||||
using agent_info_map_t = std::unordered_map<rocprofiler_agent_id_t, agent_info>;
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
|
||||
namespace cereal
|
||||
{
|
||||
#define SAVE_DATA_FIELD(FIELD) ar(make_nvp(#FIELD, data.FIELD))
|
||||
|
||||
template <typename ArchiveT>
|
||||
void
|
||||
save(ArchiveT& ar, const ::rocprofiler::tool::agent_info& data)
|
||||
{
|
||||
cereal::save(ar, static_cast<const rocprofiler_agent_v0_t&>(data));
|
||||
}
|
||||
|
||||
#undef SAVE_DATA_FIELD
|
||||
} // namespace cereal
|
||||
+57
-15
@@ -22,7 +22,8 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "helper.hpp"
|
||||
#include "counter_info.hpp"
|
||||
#include "generator.hpp"
|
||||
#include "statistics.hpp"
|
||||
#include "tmp_file_buffer.hpp"
|
||||
|
||||
@@ -31,6 +32,8 @@
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <deque>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace tool
|
||||
@@ -41,25 +44,27 @@ using stats_data_t = statistics<uint64_t, float_type>;
|
||||
template <typename Tp, domain_type DomainT>
|
||||
struct buffered_output
|
||||
{
|
||||
using ring_buffer_type = rocprofiler::common::container::ring_buffer<Tp>;
|
||||
using type = Tp;
|
||||
static constexpr auto buffer_type_v = DomainT;
|
||||
|
||||
explicit buffered_output(bool _enabled);
|
||||
~buffered_output() = default;
|
||||
buffered_output(const buffered_output&) = delete;
|
||||
buffered_output(buffered_output&&) noexcept = delete;
|
||||
buffered_output& operator=(const buffered_output&) = default;
|
||||
buffered_output& operator=(buffered_output&&) noexcept = default;
|
||||
buffered_output& operator=(const buffered_output&) = delete;
|
||||
buffered_output& operator=(buffered_output&&) noexcept = delete;
|
||||
|
||||
operator bool() const { return enabled; }
|
||||
|
||||
void flush();
|
||||
void read();
|
||||
void clear();
|
||||
void destroy();
|
||||
|
||||
operator bool() const { return enabled; }
|
||||
generator<Tp> get_generator() const { return generator<Tp>{get_tmp_file_buffer<Tp>(DomainT)}; }
|
||||
std::deque<Tp> load_all();
|
||||
|
||||
std::deque<Tp> element_data = {};
|
||||
stats_entry_t stats = {};
|
||||
stats_entry_t stats = {};
|
||||
|
||||
private:
|
||||
bool enabled = false;
|
||||
@@ -76,7 +81,7 @@ buffered_output<Tp, DomainT>::flush()
|
||||
{
|
||||
if(!enabled) return;
|
||||
|
||||
flush_tmp_buffer<ring_buffer_type>(buffer_type_v);
|
||||
flush_tmp_buffer<type>(buffer_type_v);
|
||||
}
|
||||
|
||||
template <typename Tp, domain_type DomainT>
|
||||
@@ -87,7 +92,26 @@ buffered_output<Tp, DomainT>::read()
|
||||
|
||||
flush();
|
||||
|
||||
element_data = get_buffer_elements(read_tmp_file<ring_buffer_type>(buffer_type_v));
|
||||
read_tmp_file<type>(buffer_type_v);
|
||||
}
|
||||
|
||||
template <typename Tp, domain_type DomainT>
|
||||
std::deque<Tp>
|
||||
buffered_output<Tp, DomainT>::load_all()
|
||||
{
|
||||
auto data = std::deque<Tp>{};
|
||||
if(enabled)
|
||||
{
|
||||
auto gen = get_generator();
|
||||
for(auto ditr : gen)
|
||||
{
|
||||
for(auto itr : gen.get(ditr))
|
||||
{
|
||||
data.emplace_back(itr);
|
||||
}
|
||||
}
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
template <typename Tp, domain_type DomainT>
|
||||
@@ -95,8 +119,6 @@ void
|
||||
buffered_output<Tp, DomainT>::clear()
|
||||
{
|
||||
if(!enabled) return;
|
||||
|
||||
element_data.clear();
|
||||
}
|
||||
|
||||
template <typename Tp, domain_type DomainT>
|
||||
@@ -106,10 +128,30 @@ buffered_output<Tp, DomainT>::destroy()
|
||||
if(!enabled) return;
|
||||
|
||||
clear();
|
||||
auto [_tmp_buf, _tmp_file] = get_tmp_file_buffer<ring_buffer_type>(buffer_type_v);
|
||||
_tmp_buf->destroy();
|
||||
delete _tmp_buf;
|
||||
delete _tmp_file;
|
||||
auto*& filebuf = get_tmp_file_buffer<type>(buffer_type_v);
|
||||
file_buffer<type>* tmp = nullptr;
|
||||
std::swap(filebuf, tmp);
|
||||
tmp->buffer.destroy();
|
||||
delete tmp;
|
||||
}
|
||||
|
||||
using hip_buffered_output_t =
|
||||
buffered_output<rocprofiler_buffer_tracing_hip_api_record_t, domain_type::HIP>;
|
||||
using hsa_buffered_output_t =
|
||||
buffered_output<rocprofiler_buffer_tracing_hsa_api_record_t, domain_type::HSA>;
|
||||
using kernel_dispatch_buffered_output_t =
|
||||
buffered_output<rocprofiler_buffer_tracing_kernel_dispatch_record_t,
|
||||
domain_type::KERNEL_DISPATCH>;
|
||||
using memory_copy_buffered_output_t =
|
||||
buffered_output<rocprofiler_buffer_tracing_memory_copy_record_t, domain_type::MEMORY_COPY>;
|
||||
using marker_buffered_output_t =
|
||||
buffered_output<rocprofiler_buffer_tracing_marker_api_record_t, domain_type::MARKER>;
|
||||
using rccl_buffered_output_t =
|
||||
buffered_output<rocprofiler_buffer_tracing_rccl_api_record_t, domain_type::RCCL>;
|
||||
using counter_collection_buffered_output_t =
|
||||
buffered_output<tool_counter_record_t, domain_type::COUNTER_COLLECTION>;
|
||||
using scratch_memory_buffered_output_t =
|
||||
buffered_output<rocprofiler_buffer_tracing_scratch_memory_record_t,
|
||||
domain_type::SCRATCH_MEMORY>;
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
@@ -0,0 +1,125 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "lib/common/logging.hpp"
|
||||
|
||||
#include <rocprofiler-sdk/agent.h>
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
#include <rocprofiler-sdk/cxx/hash.hpp>
|
||||
#include <rocprofiler-sdk/cxx/operators.hpp>
|
||||
#include <rocprofiler-sdk/cxx/serialization.hpp>
|
||||
|
||||
#include <cstdint>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace tool
|
||||
{
|
||||
constexpr uint32_t lds_block_size = 128 * 4;
|
||||
|
||||
using counter_dimension_id_vec_t = std::vector<rocprofiler_counter_dimension_id_t>;
|
||||
using counter_dimension_info_vec_t = std::vector<rocprofiler_record_dimension_info_t>;
|
||||
|
||||
struct tool_counter_info : rocprofiler_counter_info_v0_t
|
||||
{
|
||||
using parent_type = rocprofiler_counter_info_v0_t;
|
||||
|
||||
tool_counter_info(rocprofiler_agent_id_t _agent_id,
|
||||
parent_type _info,
|
||||
counter_dimension_id_vec_t&& _dim_ids,
|
||||
counter_dimension_info_vec_t&& _dim_info)
|
||||
: parent_type{_info}
|
||||
, agent_id{_agent_id}
|
||||
, dimension_ids{std::move(_dim_ids)}
|
||||
, dimensions{std::move(_dim_info)}
|
||||
{}
|
||||
|
||||
~tool_counter_info() = default;
|
||||
tool_counter_info(const tool_counter_info&) = default;
|
||||
tool_counter_info(tool_counter_info&&) noexcept = default;
|
||||
tool_counter_info& operator=(const tool_counter_info&) = default;
|
||||
tool_counter_info& operator=(tool_counter_info&&) noexcept = default;
|
||||
|
||||
rocprofiler_agent_id_t agent_id = {};
|
||||
counter_dimension_id_vec_t dimension_ids = {};
|
||||
counter_dimension_info_vec_t dimensions = {};
|
||||
};
|
||||
|
||||
using counter_info_vec_t = std::vector<tool_counter_info>;
|
||||
using agent_counter_info_map_t = std::unordered_map<rocprofiler_agent_id_t, counter_info_vec_t>;
|
||||
|
||||
struct tool_counter_value_t
|
||||
{
|
||||
rocprofiler_counter_id_t id = {};
|
||||
double value = 0;
|
||||
|
||||
template <typename ArchiveT>
|
||||
void save(ArchiveT& ar) const
|
||||
{
|
||||
ar(cereal::make_nvp("counter_id", id));
|
||||
ar(cereal::make_nvp("value", value));
|
||||
}
|
||||
};
|
||||
|
||||
struct tool_counter_record_t
|
||||
{
|
||||
static constexpr size_t max_capacity = 512;
|
||||
|
||||
uint64_t thread_id = 0;
|
||||
rocprofiler_dispatch_counting_service_data_t dispatch_data = {};
|
||||
std::array<tool_counter_value_t, max_capacity> records = {};
|
||||
uint64_t counter_count = 0;
|
||||
|
||||
template <typename ArchiveT>
|
||||
void save(ArchiveT& ar) const
|
||||
{
|
||||
// should be removed when moving to buffered tracing
|
||||
auto tmp =
|
||||
std::vector<tool_counter_value_t>{records.begin(), records.begin() + counter_count};
|
||||
|
||||
ar(cereal::make_nvp("thread_id", thread_id));
|
||||
ar(cereal::make_nvp("dispatch_data", dispatch_data));
|
||||
ar(cereal::make_nvp("records", tmp));
|
||||
}
|
||||
};
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
|
||||
namespace cereal
|
||||
{
|
||||
#define SAVE_DATA_FIELD(FIELD) ar(make_nvp(#FIELD, data.FIELD))
|
||||
|
||||
template <typename ArchiveT>
|
||||
void
|
||||
save(ArchiveT& ar, const ::rocprofiler::tool::tool_counter_info& data)
|
||||
{
|
||||
SAVE_DATA_FIELD(agent_id);
|
||||
cereal::save(ar, static_cast<const rocprofiler_counter_info_v0_t&>(data));
|
||||
SAVE_DATA_FIELD(dimension_ids);
|
||||
}
|
||||
|
||||
#undef SAVE_DATA_FIELD
|
||||
} // namespace cereal
|
||||
+17
-8
@@ -20,17 +20,26 @@
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include "tmp_file_buffer.hpp"
|
||||
#include "csv_output_file.hpp"
|
||||
|
||||
#include "lib/common/filesystem.hpp"
|
||||
#include "lib/common/logging.hpp"
|
||||
|
||||
#include <fmt/core.h>
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <utility>
|
||||
#include <string_view>
|
||||
#include <unordered_set>
|
||||
|
||||
std::string
|
||||
compose_tmp_file_name(domain_type buffer_type)
|
||||
namespace rocprofiler
|
||||
{
|
||||
return rocprofiler::tool::format(fmt::format("{}/.rocprofv3/{}-{}.dat",
|
||||
rocprofiler::tool::get_config().tmp_directory,
|
||||
"%ppid%-%pid%",
|
||||
get_domain_file_name(buffer_type)));
|
||||
namespace tool
|
||||
{
|
||||
csv_output_file::~csv_output_file()
|
||||
{
|
||||
if(m_os.stream) ROCP_INFO << "Closing result file: " << m_name;
|
||||
|
||||
m_os.close();
|
||||
}
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
+31
-56
@@ -22,8 +22,9 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "config.hpp"
|
||||
#include "csv.hpp"
|
||||
#include "domain_type.hpp"
|
||||
#include "output_stream.hpp"
|
||||
|
||||
#include "lib/common/filesystem.hpp"
|
||||
|
||||
@@ -40,59 +41,24 @@ namespace rocprofiler
|
||||
{
|
||||
namespace tool
|
||||
{
|
||||
using ostream_dtor_t = void (*)(std::ostream*&);
|
||||
|
||||
using output_stream_pair_t = std::pair<std::ostream*, ostream_dtor_t>;
|
||||
|
||||
struct output_stream_t
|
||||
{
|
||||
output_stream_t() = default;
|
||||
output_stream_t(std::ostream* _os, ostream_dtor_t _dtor)
|
||||
: stream{_os}
|
||||
, dtor{_dtor}
|
||||
{}
|
||||
|
||||
~output_stream_t() { close(); }
|
||||
output_stream_t(const output_stream_t&) = delete;
|
||||
output_stream_t(output_stream_t&&) noexcept = default;
|
||||
output_stream_t& operator=(const output_stream_t&) = delete;
|
||||
output_stream_t& operator=(output_stream_t&&) noexcept = default;
|
||||
|
||||
explicit operator bool() const { return stream != nullptr; }
|
||||
|
||||
template <typename Tp>
|
||||
std::ostream& operator<<(Tp&& value)
|
||||
{
|
||||
return ((stream) ? *stream : std::cerr) << std::forward<Tp>(value) << std::flush;
|
||||
}
|
||||
|
||||
void close()
|
||||
{
|
||||
if(stream) (*stream) << std::flush;
|
||||
if(dtor) dtor(stream);
|
||||
}
|
||||
|
||||
bool writes_to_file() const { return (dynamic_cast<std::ofstream*>(stream) != nullptr); }
|
||||
|
||||
std::ostream* stream = nullptr;
|
||||
ostream_dtor_t dtor = nullptr;
|
||||
};
|
||||
|
||||
std::string
|
||||
get_output_filename(std::string_view fname, std::string_view ext);
|
||||
|
||||
output_stream_t
|
||||
get_output_stream(std::string_view fname, std::string_view ext);
|
||||
|
||||
struct output_file
|
||||
struct csv_output_file
|
||||
{
|
||||
template <size_t N>
|
||||
output_file(std::string name, csv::csv_encoder<N>, std::array<std::string_view, N>&& header);
|
||||
csv_output_file(const output_config& cfg,
|
||||
std::string_view name,
|
||||
csv::csv_encoder<N>,
|
||||
std::array<std::string_view, N>&& header);
|
||||
|
||||
~output_file();
|
||||
template <size_t N>
|
||||
csv_output_file(const output_config& cfg,
|
||||
domain_type domain,
|
||||
csv::csv_encoder<N>,
|
||||
std::array<std::string_view, N>&& header);
|
||||
|
||||
output_file(const output_file&) = delete;
|
||||
output_file& operator=(const output_file&) = delete;
|
||||
~csv_output_file();
|
||||
|
||||
csv_output_file(const csv_output_file&) = delete;
|
||||
csv_output_file& operator=(const csv_output_file&) = delete;
|
||||
|
||||
std::string name() const { return m_name; }
|
||||
|
||||
@@ -108,15 +74,16 @@ struct output_file
|
||||
private:
|
||||
const std::string m_name = {};
|
||||
std::mutex m_mutex = {};
|
||||
output_stream_t m_os = {};
|
||||
output_stream m_os = {};
|
||||
};
|
||||
|
||||
template <size_t N>
|
||||
output_file::output_file(std::string name,
|
||||
csv::csv_encoder<N> encoder,
|
||||
std::array<std::string_view, N>&& header)
|
||||
: m_name{std::move(name)}
|
||||
, m_os{get_output_stream(m_name, ".csv")}
|
||||
csv_output_file::csv_output_file(const output_config& cfg,
|
||||
std::string_view name,
|
||||
csv::csv_encoder<N> encoder,
|
||||
std::array<std::string_view, N>&& header)
|
||||
: m_name{std::string{name}}
|
||||
, m_os{get_output_stream(cfg, m_name, ".csv")}
|
||||
{
|
||||
for(auto& itr : header)
|
||||
{
|
||||
@@ -127,5 +94,13 @@ output_file::output_file(std::string name,
|
||||
// write the csv header
|
||||
if(m_os.stream) encoder.write_row(*m_os.stream, header);
|
||||
}
|
||||
|
||||
template <size_t N>
|
||||
csv_output_file::csv_output_file(const output_config& cfg,
|
||||
domain_type domain,
|
||||
csv::csv_encoder<N> encoder,
|
||||
std::array<std::string_view, N>&& header)
|
||||
: csv_output_file{cfg, get_domain_trace_file_name(domain), encoder, std::move(header)}
|
||||
{}
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
+42
-16
@@ -29,33 +29,51 @@ namespace
|
||||
template <domain_type DomainT>
|
||||
struct domain_type_name;
|
||||
|
||||
#define DEFINE_BUFFER_TYPE_NAME(ENUM_VALUE, COLUMN_NAME, FILENAME) \
|
||||
#define DEFINE_BUFFER_TYPE_NAME(ENUM_VALUE, COLUMN_NAME, TRACE_FILENAME, STATS_FILENAME) \
|
||||
template <> \
|
||||
struct domain_type_name<domain_type::ENUM_VALUE> \
|
||||
{ \
|
||||
static constexpr auto column_name = COLUMN_NAME; \
|
||||
static constexpr auto filename = FILENAME; \
|
||||
static constexpr auto column_name = COLUMN_NAME; \
|
||||
static constexpr auto trace_filename = TRACE_FILENAME; \
|
||||
static constexpr auto stats_filename = STATS_FILENAME; \
|
||||
};
|
||||
|
||||
DEFINE_BUFFER_TYPE_NAME(HSA, "HSA_API", "hsa_api")
|
||||
DEFINE_BUFFER_TYPE_NAME(HIP, "HIP_API", "hip_api")
|
||||
DEFINE_BUFFER_TYPE_NAME(MARKER, "MARKER_API", "marker_api")
|
||||
DEFINE_BUFFER_TYPE_NAME(KERNEL_DISPATCH, "KERNEL_DISPATCH", "kernel_dispatch")
|
||||
DEFINE_BUFFER_TYPE_NAME(MEMORY_COPY, "MEMORY_COPY", "memory_copy")
|
||||
DEFINE_BUFFER_TYPE_NAME(SCRATCH_MEMORY, "SCRATCH_MEMORY", "scratch_memory")
|
||||
DEFINE_BUFFER_TYPE_NAME(COUNTER_COLLECTION, "COUNTER_COLLECTION", "counter_collection")
|
||||
DEFINE_BUFFER_TYPE_NAME(RCCL, "RCCL_API", "rccl_api")
|
||||
DEFINE_BUFFER_TYPE_NAME(HSA, "HSA_API", "hsa_api_trace", "hsa_api_stats")
|
||||
DEFINE_BUFFER_TYPE_NAME(HIP, "HIP_API", "hip_api_trace", "hip_api_stats")
|
||||
DEFINE_BUFFER_TYPE_NAME(MARKER, "MARKER_API", "marker_api_trace", "marker_api_stats")
|
||||
DEFINE_BUFFER_TYPE_NAME(KERNEL_DISPATCH, "KERNEL_DISPATCH", "kernel_trace", "kernel_stats")
|
||||
DEFINE_BUFFER_TYPE_NAME(MEMORY_COPY, "MEMORY_COPY", "memory_copy_trace", "memory_copy_stats")
|
||||
DEFINE_BUFFER_TYPE_NAME(SCRATCH_MEMORY,
|
||||
"SCRATCH_MEMORY",
|
||||
"scratch_memory_trace",
|
||||
"scratch_memory_stats")
|
||||
DEFINE_BUFFER_TYPE_NAME(COUNTER_COLLECTION,
|
||||
"COUNTER_COLLECTION",
|
||||
"counter_collection",
|
||||
"counter_collection_stats")
|
||||
DEFINE_BUFFER_TYPE_NAME(RCCL, "RCCL_API", "rccl_api_trace", "rccl_api_stats")
|
||||
|
||||
#undef DEFINE_BUFFER_TYPE_NAME
|
||||
|
||||
template <size_t Idx, size_t... TailIdx>
|
||||
std::string_view
|
||||
get_domain_file_name(domain_type _buffer_type, std::index_sequence<Idx, TailIdx...>)
|
||||
get_domain_trace_file_name(domain_type _buffer_type, std::index_sequence<Idx, TailIdx...>)
|
||||
{
|
||||
if(static_cast<size_t>(_buffer_type) == Idx)
|
||||
return domain_type_name<static_cast<domain_type>(Idx)>::filename;
|
||||
return domain_type_name<static_cast<domain_type>(Idx)>::trace_filename;
|
||||
if constexpr(sizeof...(TailIdx) > 0)
|
||||
return get_domain_file_name(_buffer_type, std::index_sequence<TailIdx...>{});
|
||||
return get_domain_trace_file_name(_buffer_type, std::index_sequence<TailIdx...>{});
|
||||
return std::string_view{};
|
||||
}
|
||||
|
||||
template <size_t Idx, size_t... TailIdx>
|
||||
std::string_view
|
||||
get_domain_stats_file_name(domain_type _buffer_type, std::index_sequence<Idx, TailIdx...>)
|
||||
{
|
||||
if(static_cast<size_t>(_buffer_type) == Idx)
|
||||
return domain_type_name<static_cast<domain_type>(Idx)>::stats_filename;
|
||||
if constexpr(sizeof...(TailIdx) > 0)
|
||||
return get_domain_stats_file_name(_buffer_type, std::index_sequence<TailIdx...>{});
|
||||
return std::string_view{};
|
||||
}
|
||||
|
||||
@@ -73,11 +91,19 @@ get_domain_column_name(domain_type buffer_type, std::index_sequence<Idx, IdxTail
|
||||
} // namespace
|
||||
|
||||
std::string_view
|
||||
get_domain_file_name(domain_type _buffer_type)
|
||||
get_domain_trace_file_name(domain_type _buffer_type)
|
||||
{
|
||||
constexpr auto buffer_type_last_v = static_cast<size_t>(domain_type::LAST);
|
||||
|
||||
return get_domain_file_name(_buffer_type, std::make_index_sequence<buffer_type_last_v>{});
|
||||
return get_domain_trace_file_name(_buffer_type, std::make_index_sequence<buffer_type_last_v>{});
|
||||
}
|
||||
|
||||
std::string_view
|
||||
get_domain_stats_file_name(domain_type _buffer_type)
|
||||
{
|
||||
constexpr auto buffer_type_last_v = static_cast<size_t>(domain_type::LAST);
|
||||
|
||||
return get_domain_stats_file_name(_buffer_type, std::make_index_sequence<buffer_type_last_v>{});
|
||||
}
|
||||
|
||||
std::string_view
|
||||
+4
-1
@@ -38,7 +38,10 @@ enum class domain_type
|
||||
};
|
||||
|
||||
std::string_view
|
||||
get_domain_file_name(domain_type val);
|
||||
get_domain_trace_file_name(domain_type val);
|
||||
|
||||
std::string_view
|
||||
get_domain_stats_file_name(domain_type val);
|
||||
|
||||
std::string_view
|
||||
get_domain_column_name(domain_type _buffer_type);
|
||||
@@ -0,0 +1,172 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
//
|
||||
|
||||
#include "format_path.hpp"
|
||||
|
||||
#include "lib/common/defines.hpp"
|
||||
#include "lib/common/demangle.hpp"
|
||||
#include "lib/common/environment.hpp"
|
||||
#include "lib/common/filesystem.hpp"
|
||||
#include "lib/common/logging.hpp"
|
||||
#include "lib/common/units.hpp"
|
||||
#include "lib/common/utility.hpp"
|
||||
#include "lib/output/output_key.hpp"
|
||||
|
||||
#include <rocprofiler-sdk/cxx/details/tokenize.hpp>
|
||||
|
||||
#include <fmt/core.h>
|
||||
|
||||
#include <linux/limits.h>
|
||||
#include <unistd.h>
|
||||
#include <algorithm>
|
||||
#include <chrono>
|
||||
#include <cstring>
|
||||
#include <ctime>
|
||||
#include <fstream>
|
||||
#include <regex>
|
||||
#include <set>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <vector>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace tool
|
||||
{
|
||||
namespace
|
||||
{
|
||||
const auto env_regexes =
|
||||
new std::array<std::regex, 3>{std::regex{"(.*)%(env|ENV)\\{([A-Z0-9_]+)\\}%(.*)"},
|
||||
std::regex{"(.*)\\$(env|ENV)\\{([A-Z0-9_]+)\\}(.*)"},
|
||||
std::regex{"(.*)%q\\{([A-Z0-9_]+)\\}(.*)"}};
|
||||
// env regex examples:
|
||||
// - %env{USER}% Consistent with other output key formats (start+end with %)
|
||||
// - $ENV{USER} Similar to CMake
|
||||
// - %q{USER} Compatibility with NVIDIA
|
||||
//
|
||||
|
||||
std::string
|
||||
format_path_impl(std::string _fpath, const std::vector<output_key>& _keys)
|
||||
{
|
||||
if(_fpath.find('%') == std::string::npos && _fpath.find('$') == std::string::npos)
|
||||
return _fpath;
|
||||
|
||||
auto _replace = [](auto& _v, const output_key& pitr) {
|
||||
auto pos = std::string::npos;
|
||||
while((pos = _v.find(pitr.key)) != std::string::npos)
|
||||
_v.replace(pos, pitr.key.length(), pitr.value);
|
||||
};
|
||||
|
||||
for(auto&& itr : _keys)
|
||||
_replace(_fpath, itr);
|
||||
|
||||
// environment and configuration variables
|
||||
try
|
||||
{
|
||||
auto strip_leading_and_replace =
|
||||
[](std::string_view inp_v, std::initializer_list<char> keys, const char* val) {
|
||||
auto inp = std::string{inp_v};
|
||||
for(auto key : keys)
|
||||
{
|
||||
auto pos = std::string::npos;
|
||||
while((pos = inp.find(key)) == 0)
|
||||
inp = inp.substr(pos + 1);
|
||||
|
||||
while((pos = inp.find(key)) != std::string::npos)
|
||||
inp = inp.replace(pos, 1, val);
|
||||
}
|
||||
return inp;
|
||||
};
|
||||
|
||||
for(const auto& _re : *env_regexes)
|
||||
{
|
||||
while(std::regex_search(_fpath, _re))
|
||||
{
|
||||
auto _var = std::regex_replace(_fpath, _re, "$3");
|
||||
std::string _val = common::get_env<std::string>(_var, "");
|
||||
_val = strip_leading_and_replace(_val, {'\t', ' ', '/'}, "_");
|
||||
auto _beg = std::regex_replace(_fpath, _re, "$1");
|
||||
auto _end = std::regex_replace(_fpath, _re, "$4");
|
||||
_fpath = fmt::format("{}{}{}", _beg, _val, _end);
|
||||
}
|
||||
}
|
||||
} catch(std::exception& _e)
|
||||
{
|
||||
ROCP_WARNING << "[rocprofiler] " << __FUNCTION__ << " threw an exception :: " << _e.what()
|
||||
<< "\n";
|
||||
}
|
||||
|
||||
// remove %arg<N>% where N >= argc
|
||||
try
|
||||
{
|
||||
std::regex _re{"(.*)%(arg[0-9]+)%([-/_]*)(.*)"};
|
||||
while(std::regex_search(_fpath, _re))
|
||||
_fpath = std::regex_replace(_fpath, _re, "$1$4");
|
||||
} catch(std::exception& _e)
|
||||
{
|
||||
ROCP_WARNING << "[rocprofiler] " << __FUNCTION__ << " threw an exception :: " << _e.what()
|
||||
<< "\n";
|
||||
}
|
||||
|
||||
return _fpath;
|
||||
}
|
||||
|
||||
std::string
|
||||
format_path(std::string&& _fpath, const std::vector<output_key>& _keys)
|
||||
{
|
||||
if(_fpath.find('%') == std::string::npos && _fpath.find('$') == std::string::npos)
|
||||
return _fpath;
|
||||
|
||||
auto _ref = _fpath;
|
||||
_fpath = format_path_impl(std::move(_fpath), _keys);
|
||||
|
||||
return (_fpath == _ref) ? _fpath : format_path(std::move(_fpath), _keys);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
int
|
||||
get_mpi_size()
|
||||
{
|
||||
static int _v = common::get_env<int>(
|
||||
"OMPI_COMM_WORLD_SIZE",
|
||||
common::get_env<int>("MV2_COMM_WORLD_SIZE", common::get_env<int>("MPI_SIZE", 0)));
|
||||
return _v;
|
||||
}
|
||||
|
||||
int
|
||||
get_mpi_rank()
|
||||
{
|
||||
static int _v = common::get_env<int>(
|
||||
"OMPI_COMM_WORLD_RANK",
|
||||
common::get_env<int>("MV2_COMM_WORLD_RANK", common::get_env<int>("MPI_RANK", -1)));
|
||||
return _v;
|
||||
}
|
||||
|
||||
std::string
|
||||
format_path(std::string _fpath, const std::string& _tag)
|
||||
{
|
||||
return format_path(std::move(_fpath), output_keys(_tag));
|
||||
}
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
@@ -0,0 +1,53 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "lib/common/environment.hpp"
|
||||
#include "lib/common/filesystem.hpp"
|
||||
#include "lib/common/mpl.hpp"
|
||||
|
||||
#include <rocprofiler-sdk/cxx/serialization.hpp>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <set>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace tool
|
||||
{
|
||||
int
|
||||
get_mpi_size();
|
||||
|
||||
int
|
||||
get_mpi_rank();
|
||||
|
||||
std::string
|
||||
format_path(std::string _fpath, const std::string& _tag = {});
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
@@ -0,0 +1,714 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include "generateCSV.hpp"
|
||||
#include "csv.hpp"
|
||||
#include "csv_output_file.hpp"
|
||||
#include "domain_type.hpp"
|
||||
#include "generateStats.hpp"
|
||||
#include "output_config.hpp"
|
||||
#include "output_stream.hpp"
|
||||
#include "statistics.hpp"
|
||||
#include "timestamps.hpp"
|
||||
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
#include <rocprofiler-sdk/marker/api_id.h>
|
||||
|
||||
#include <unistd.h>
|
||||
#include <cstdint>
|
||||
#include <iomanip>
|
||||
#include <string_view>
|
||||
#include <utility>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace tool
|
||||
{
|
||||
namespace
|
||||
{
|
||||
tool::csv_output_file
|
||||
get_stats_output_file(const output_config& cfg, std::string_view name)
|
||||
{
|
||||
return tool::csv_output_file{cfg,
|
||||
name,
|
||||
tool::csv::stats_csv_encoder{},
|
||||
{
|
||||
"Name",
|
||||
"Calls",
|
||||
"TotalDurationNs",
|
||||
"AverageNs",
|
||||
"Percentage",
|
||||
"MinNs",
|
||||
"MaxNs",
|
||||
"StdDev",
|
||||
}};
|
||||
}
|
||||
|
||||
tool::csv_output_file
|
||||
get_stats_output_file(const output_config& cfg, domain_type domain)
|
||||
{
|
||||
return get_stats_output_file(cfg, get_domain_stats_file_name(domain));
|
||||
}
|
||||
|
||||
void
|
||||
write_stats(tool::csv_output_file&& ofs, const stats_entry_vec_t& data_v)
|
||||
{
|
||||
auto data = stats_entry_vec_t{};
|
||||
auto _duration = stats_data_t{};
|
||||
for(const auto& [id, value] : data_v)
|
||||
{
|
||||
data.emplace_back(id, value);
|
||||
_duration += value;
|
||||
}
|
||||
|
||||
std::sort(data.begin(), data.end(), [](const auto& lhs, const auto& rhs) {
|
||||
return (lhs.second.get_sum() > rhs.second.get_sum());
|
||||
});
|
||||
|
||||
constexpr float_type one_hundred = 100.0;
|
||||
|
||||
const float_type _total_duration = _duration.get_sum();
|
||||
for(const auto& [name, value] : data)
|
||||
{
|
||||
auto duration_ns = value.get_sum();
|
||||
auto calls = value.get_count();
|
||||
float_type avg_ns = value.get_mean();
|
||||
float_type percent_v = (duration_ns / _total_duration) * one_hundred;
|
||||
|
||||
auto _row = std::stringstream{};
|
||||
rocprofiler::tool::csv::stats_csv_encoder::write_row<stats_formatter>(_row,
|
||||
name,
|
||||
calls,
|
||||
duration_ns,
|
||||
avg_ns,
|
||||
percentage{percent_v},
|
||||
value.get_min(),
|
||||
value.get_max(),
|
||||
value.get_stddev());
|
||||
ofs << _row.str() << std::flush;
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void
|
||||
generate_csv(const output_config& cfg,
|
||||
const metadata& /*tool_metadata*/,
|
||||
std::vector<agent_info>& data)
|
||||
{
|
||||
if(data.empty()) return;
|
||||
|
||||
std::sort(data.begin(), data.end(), [](const agent_info& lhs, const agent_info& rhs) {
|
||||
return lhs.node_id < rhs.node_id;
|
||||
});
|
||||
|
||||
auto ofs = tool::csv_output_file{cfg,
|
||||
"agent_info",
|
||||
tool::csv::agent_info_csv_encoder{},
|
||||
{"Node_Id",
|
||||
"Logical_Node_Id",
|
||||
"Agent_Type",
|
||||
"Cpu_Cores_Count",
|
||||
"Simd_Count",
|
||||
"Cpu_Core_Id_Base",
|
||||
"Simd_Id_Base",
|
||||
"Max_Waves_Per_Simd",
|
||||
"Lds_Size_In_Kb",
|
||||
"Gds_Size_In_Kb",
|
||||
"Num_Gws",
|
||||
"Wave_Front_Size",
|
||||
"Num_Xcc",
|
||||
"Cu_Count",
|
||||
"Array_Count",
|
||||
"Num_Shader_Banks",
|
||||
"Simd_Arrays_Per_Engine",
|
||||
"Cu_Per_Simd_Array",
|
||||
"Simd_Per_Cu",
|
||||
"Max_Slots_Scratch_Cu",
|
||||
"Gfx_Target_Version",
|
||||
"Vendor_Id",
|
||||
"Device_Id",
|
||||
"Location_Id",
|
||||
"Domain",
|
||||
"Drm_Render_Minor",
|
||||
"Num_Sdma_Engines",
|
||||
"Num_Sdma_Xgmi_Engines",
|
||||
"Num_Sdma_Queues_Per_Engine",
|
||||
"Num_Cp_Queues",
|
||||
"Max_Engine_Clk_Ccompute",
|
||||
"Max_Engine_Clk_Fcompute",
|
||||
"Sdma_Fw_Version",
|
||||
"Fw_Version",
|
||||
"Capability",
|
||||
"Cu_Per_Engine",
|
||||
"Max_Waves_Per_Cu",
|
||||
"Family_Id",
|
||||
"Workgroup_Max_Size",
|
||||
"Grid_Max_Size",
|
||||
"Local_Mem_Size",
|
||||
"Hive_Id",
|
||||
"Gpu_Id",
|
||||
"Workgroup_Max_Dim_X",
|
||||
"Workgroup_Max_Dim_Y",
|
||||
"Workgroup_Max_Dim_Z",
|
||||
"Grid_Max_Dim_X",
|
||||
"Grid_Max_Dim_Y",
|
||||
"Grid_Max_Dim_Z",
|
||||
"Name",
|
||||
"Vendor_Name",
|
||||
"Product_Name",
|
||||
"Model_Name"}};
|
||||
|
||||
for(auto& itr : data)
|
||||
{
|
||||
auto _type = std::string_view{};
|
||||
if(itr.type == ROCPROFILER_AGENT_TYPE_CPU)
|
||||
_type = "CPU";
|
||||
else if(itr.type == ROCPROFILER_AGENT_TYPE_GPU)
|
||||
_type = "GPU";
|
||||
else
|
||||
_type = "UNK";
|
||||
|
||||
auto row_ss = std::stringstream{};
|
||||
rocprofiler::tool::csv::agent_info_csv_encoder::write_row(row_ss,
|
||||
itr.node_id,
|
||||
itr.logical_node_id,
|
||||
_type,
|
||||
itr.cpu_cores_count,
|
||||
itr.simd_count,
|
||||
itr.cpu_core_id_base,
|
||||
itr.simd_id_base,
|
||||
itr.max_waves_per_simd,
|
||||
itr.lds_size_in_kb,
|
||||
itr.gds_size_in_kb,
|
||||
itr.num_gws,
|
||||
itr.wave_front_size,
|
||||
itr.num_xcc,
|
||||
itr.cu_count,
|
||||
itr.array_count,
|
||||
itr.num_shader_banks,
|
||||
itr.simd_arrays_per_engine,
|
||||
itr.cu_per_simd_array,
|
||||
itr.simd_per_cu,
|
||||
itr.max_slots_scratch_cu,
|
||||
itr.gfx_target_version,
|
||||
itr.vendor_id,
|
||||
itr.device_id,
|
||||
itr.location_id,
|
||||
itr.domain,
|
||||
itr.drm_render_minor,
|
||||
itr.num_sdma_engines,
|
||||
itr.num_sdma_xgmi_engines,
|
||||
itr.num_sdma_queues_per_engine,
|
||||
itr.num_cp_queues,
|
||||
itr.max_engine_clk_ccompute,
|
||||
itr.max_engine_clk_fcompute,
|
||||
itr.sdma_fw_version.Value,
|
||||
itr.fw_version.Value,
|
||||
itr.capability.Value,
|
||||
itr.cu_per_engine,
|
||||
itr.max_waves_per_cu,
|
||||
itr.family_id,
|
||||
itr.workgroup_max_size,
|
||||
itr.grid_max_size,
|
||||
itr.local_mem_size,
|
||||
itr.hive_id,
|
||||
itr.gpu_id,
|
||||
itr.workgroup_max_dim.x,
|
||||
itr.workgroup_max_dim.y,
|
||||
itr.workgroup_max_dim.z,
|
||||
itr.grid_max_dim.x,
|
||||
itr.grid_max_dim.y,
|
||||
itr.grid_max_dim.z,
|
||||
itr.name,
|
||||
itr.vendor_name,
|
||||
itr.product_name,
|
||||
itr.model_name);
|
||||
ofs << row_ss.str();
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
generate_csv(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<rocprofiler_buffer_tracing_kernel_dispatch_record_t>& data,
|
||||
const stats_entry_t& stats)
|
||||
{
|
||||
if(data.empty()) return;
|
||||
|
||||
if(cfg.stats && stats)
|
||||
write_stats(get_stats_output_file(cfg, domain_type::KERNEL_DISPATCH), stats.entries);
|
||||
|
||||
auto ofs = tool::csv_output_file{cfg,
|
||||
domain_type::KERNEL_DISPATCH,
|
||||
tool::csv::kernel_trace_csv_encoder{},
|
||||
{"Kind",
|
||||
"Agent_Id",
|
||||
"Queue_Id",
|
||||
"Thread_Id",
|
||||
"Dispatch_Id",
|
||||
"Kernel_Id",
|
||||
"Kernel_Name",
|
||||
"Correlation_Id",
|
||||
"Start_Timestamp",
|
||||
"End_Timestamp",
|
||||
"Private_Segment_Size",
|
||||
"Group_Segment_Size",
|
||||
"Workgroup_Size_X",
|
||||
"Workgroup_Size_Y",
|
||||
"Workgroup_Size_Z",
|
||||
"Grid_Size_X",
|
||||
"Grid_Size_Y",
|
||||
"Grid_Size_Z"}};
|
||||
|
||||
for(auto ditr : data)
|
||||
{
|
||||
for(auto record : data.get(ditr))
|
||||
{
|
||||
auto row_ss = std::stringstream{};
|
||||
auto kernel_name = tool_metadata.get_kernel_name(record.dispatch_info.kernel_id,
|
||||
record.correlation_id.external.value);
|
||||
rocprofiler::tool::csv::kernel_trace_csv_encoder::write_row(
|
||||
row_ss,
|
||||
tool_metadata.get_kind_name(record.kind),
|
||||
tool_metadata.get_node_id(record.dispatch_info.agent_id),
|
||||
record.dispatch_info.queue_id.handle,
|
||||
record.thread_id,
|
||||
record.dispatch_info.dispatch_id,
|
||||
record.dispatch_info.kernel_id,
|
||||
kernel_name,
|
||||
record.correlation_id.internal,
|
||||
record.start_timestamp,
|
||||
record.end_timestamp,
|
||||
record.dispatch_info.private_segment_size,
|
||||
record.dispatch_info.group_segment_size,
|
||||
record.dispatch_info.workgroup_size.x,
|
||||
record.dispatch_info.workgroup_size.y,
|
||||
record.dispatch_info.workgroup_size.z,
|
||||
record.dispatch_info.grid_size.x,
|
||||
record.dispatch_info.grid_size.y,
|
||||
record.dispatch_info.grid_size.z);
|
||||
|
||||
ofs << row_ss.str();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
generate_csv(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<rocprofiler_buffer_tracing_hip_api_record_t>& data,
|
||||
const stats_entry_t& stats)
|
||||
{
|
||||
if(data.empty()) return;
|
||||
|
||||
if(cfg.stats && stats) write_stats(get_stats_output_file(cfg, domain_type::HIP), stats.entries);
|
||||
|
||||
auto ofs = tool::csv_output_file{cfg,
|
||||
domain_type::HIP,
|
||||
tool::csv::api_csv_encoder{},
|
||||
{"Domain",
|
||||
"Function",
|
||||
"Process_Id",
|
||||
"Thread_Id",
|
||||
"Correlation_Id",
|
||||
"Start_Timestamp",
|
||||
"End_Timestamp"}};
|
||||
for(auto ditr : data)
|
||||
{
|
||||
for(auto record : data.get(ditr))
|
||||
{
|
||||
auto row_ss = std::stringstream{};
|
||||
auto api_name = tool_metadata.get_operation_name(record.kind, record.operation);
|
||||
rocprofiler::tool::csv::api_csv_encoder::write_row(
|
||||
row_ss,
|
||||
tool_metadata.get_kind_name(record.kind),
|
||||
api_name,
|
||||
tool_metadata.process_id,
|
||||
record.thread_id,
|
||||
record.correlation_id.internal,
|
||||
record.start_timestamp,
|
||||
record.end_timestamp);
|
||||
|
||||
ofs << row_ss.str();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
generate_csv(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<rocprofiler_buffer_tracing_hsa_api_record_t>& data,
|
||||
const stats_entry_t& stats)
|
||||
{
|
||||
if(data.empty()) return;
|
||||
|
||||
if(cfg.stats && stats) write_stats(get_stats_output_file(cfg, domain_type::HSA), stats.entries);
|
||||
|
||||
auto ofs = tool::csv_output_file{cfg,
|
||||
domain_type::HSA,
|
||||
tool::csv::api_csv_encoder{},
|
||||
{"Domain",
|
||||
"Function",
|
||||
"Process_Id",
|
||||
"Thread_Id",
|
||||
"Correlation_Id",
|
||||
"Start_Timestamp",
|
||||
"End_Timestamp"}};
|
||||
|
||||
for(auto ditr : data)
|
||||
{
|
||||
for(auto record : data.get(ditr))
|
||||
{
|
||||
auto row_ss = std::stringstream{};
|
||||
auto api_name = tool_metadata.get_operation_name(record.kind, record.operation);
|
||||
rocprofiler::tool::csv::api_csv_encoder::write_row(
|
||||
row_ss,
|
||||
tool_metadata.get_kind_name(record.kind),
|
||||
api_name,
|
||||
tool_metadata.process_id,
|
||||
record.thread_id,
|
||||
record.correlation_id.internal,
|
||||
record.start_timestamp,
|
||||
record.end_timestamp);
|
||||
|
||||
ofs << row_ss.str();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
generate_csv(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<rocprofiler_buffer_tracing_memory_copy_record_t>& data,
|
||||
const stats_entry_t& stats)
|
||||
{
|
||||
if(data.empty()) return;
|
||||
|
||||
if(cfg.stats && stats)
|
||||
write_stats(get_stats_output_file(cfg, domain_type::MEMORY_COPY), stats.entries);
|
||||
|
||||
auto ofs = tool::csv_output_file{cfg,
|
||||
domain_type::MEMORY_COPY,
|
||||
tool::csv::memory_copy_csv_encoder{},
|
||||
{"Kind",
|
||||
"Direction",
|
||||
"Source_Agent_Id",
|
||||
"Destination_Agent_Id",
|
||||
"Correlation_Id",
|
||||
"Start_Timestamp",
|
||||
"End_Timestamp"}};
|
||||
for(auto ditr : data)
|
||||
{
|
||||
for(auto record : data.get(ditr))
|
||||
{
|
||||
auto row_ss = std::stringstream{};
|
||||
auto api_name = tool_metadata.get_operation_name(record.kind, record.operation);
|
||||
rocprofiler::tool::csv::memory_copy_csv_encoder::write_row(
|
||||
row_ss,
|
||||
tool_metadata.get_kind_name(record.kind),
|
||||
api_name,
|
||||
tool_metadata.get_node_id(record.src_agent_id),
|
||||
tool_metadata.get_node_id(record.dst_agent_id),
|
||||
record.correlation_id.internal,
|
||||
record.start_timestamp,
|
||||
record.end_timestamp);
|
||||
|
||||
ofs << row_ss.str();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
generate_csv(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<rocprofiler_buffer_tracing_marker_api_record_t>& data,
|
||||
const stats_entry_t& stats)
|
||||
{
|
||||
if(data.empty()) return;
|
||||
|
||||
if(cfg.stats && stats)
|
||||
write_stats(get_stats_output_file(cfg, domain_type::MARKER), stats.entries);
|
||||
|
||||
auto ofs = tool::csv_output_file{cfg,
|
||||
domain_type::MARKER,
|
||||
tool::csv::marker_csv_encoder{},
|
||||
{"Domain",
|
||||
"Function",
|
||||
"Process_Id",
|
||||
"Thread_Id",
|
||||
"Correlation_Id",
|
||||
"Start_Timestamp",
|
||||
"End_Timestamp"}};
|
||||
for(auto ditr : data)
|
||||
{
|
||||
for(auto record : data.get(ditr))
|
||||
{
|
||||
auto row_ss = std::stringstream{};
|
||||
auto _name = std::string_view{};
|
||||
|
||||
if(record.kind == ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API &&
|
||||
(record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxMarkA ||
|
||||
record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangePushA ||
|
||||
record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangeStartA))
|
||||
{
|
||||
_name = tool_metadata.get_marker_message(record.correlation_id.internal);
|
||||
}
|
||||
else
|
||||
{
|
||||
_name = tool_metadata.get_operation_name(record.kind, record.operation);
|
||||
}
|
||||
|
||||
tool::csv::marker_csv_encoder::write_row(row_ss,
|
||||
tool_metadata.get_kind_name(record.kind),
|
||||
_name,
|
||||
tool_metadata.process_id,
|
||||
record.thread_id,
|
||||
record.correlation_id.internal,
|
||||
record.start_timestamp,
|
||||
record.end_timestamp);
|
||||
|
||||
ofs << row_ss.str();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
generate_csv(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<tool_counter_record_t>& data,
|
||||
const stats_entry_t& stats)
|
||||
{
|
||||
if(data.empty()) return;
|
||||
|
||||
if(cfg.stats && stats)
|
||||
write_stats(get_stats_output_file(cfg, domain_type::COUNTER_COLLECTION), stats.entries);
|
||||
|
||||
auto ofs = tool::csv_output_file{cfg,
|
||||
domain_type::COUNTER_COLLECTION,
|
||||
tool::csv::counter_collection_csv_encoder{},
|
||||
{"Correlation_Id",
|
||||
"Dispatch_Id",
|
||||
"Agent_Id",
|
||||
"Queue_Id",
|
||||
"Process_Id",
|
||||
"Thread_Id",
|
||||
"Grid_Size",
|
||||
"Kernel_Id",
|
||||
"Kernel_Name",
|
||||
"Workgroup_Size",
|
||||
"LDS_Block_Size",
|
||||
"Scratch_Size",
|
||||
"VGPR_Count",
|
||||
"SGPR_Count",
|
||||
"Counter_Name",
|
||||
"Counter_Value",
|
||||
"Start_Timestamp",
|
||||
"End_Timestamp"}};
|
||||
for(auto ditr : data)
|
||||
{
|
||||
for(auto record : data.get(ditr))
|
||||
{
|
||||
auto kernel_id = record.dispatch_data.dispatch_info.kernel_id;
|
||||
auto counter_name_value = std::map<std::string_view, double>{};
|
||||
for(uint64_t i = 0; i < record.counter_count; i++)
|
||||
{
|
||||
const auto& rec = record.records.at(i);
|
||||
std::string_view counter_name = tool_metadata.get_counter_info(rec.id)->name;
|
||||
auto search = counter_name_value.find(counter_name);
|
||||
if(search == counter_name_value.end())
|
||||
counter_name_value.emplace(
|
||||
std::pair<std::string_view, double>{counter_name, rec.value});
|
||||
else
|
||||
search->second += rec.value;
|
||||
}
|
||||
|
||||
const auto& correlation_id = record.dispatch_data.correlation_id;
|
||||
const auto* kernel_info = tool_metadata.get_kernel_symbol(kernel_id);
|
||||
auto lds_block_size_v =
|
||||
(kernel_info->group_segment_size + (lds_block_size - 1)) & ~(lds_block_size - 1);
|
||||
|
||||
auto magnitude = [](rocprofiler_dim3_t dims) { return (dims.x * dims.y * dims.z); };
|
||||
auto row_ss = std::stringstream{};
|
||||
for(auto& itr : counter_name_value)
|
||||
{
|
||||
tool::csv::counter_collection_csv_encoder::write_row(
|
||||
row_ss,
|
||||
correlation_id.internal,
|
||||
record.dispatch_data.dispatch_info.dispatch_id,
|
||||
tool_metadata.get_node_id(record.dispatch_data.dispatch_info.agent_id),
|
||||
record.dispatch_data.dispatch_info.queue_id.handle,
|
||||
tool_metadata.process_id,
|
||||
record.thread_id,
|
||||
magnitude(record.dispatch_data.dispatch_info.grid_size),
|
||||
record.dispatch_data.dispatch_info.kernel_id,
|
||||
tool_metadata.get_kernel_name(kernel_id, correlation_id.external.value),
|
||||
magnitude(record.dispatch_data.dispatch_info.workgroup_size),
|
||||
lds_block_size_v,
|
||||
record.dispatch_data.dispatch_info.private_segment_size,
|
||||
kernel_info->arch_vgpr_count,
|
||||
kernel_info->sgpr_count,
|
||||
itr.first,
|
||||
itr.second,
|
||||
record.dispatch_data.start_timestamp,
|
||||
record.dispatch_data.end_timestamp);
|
||||
}
|
||||
ofs << row_ss.str();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
generate_csv(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<rocprofiler_buffer_tracing_scratch_memory_record_t>& data,
|
||||
const stats_entry_t& stats)
|
||||
{
|
||||
if(data.empty()) return;
|
||||
|
||||
if(cfg.stats && stats)
|
||||
write_stats(get_stats_output_file(cfg, domain_type::SCRATCH_MEMORY), stats.entries);
|
||||
|
||||
auto ofs = tool::csv_output_file{cfg,
|
||||
domain_type::SCRATCH_MEMORY,
|
||||
tool::csv::scratch_memory_encoder{},
|
||||
{
|
||||
"Kind",
|
||||
"Operation",
|
||||
"Agent_Id",
|
||||
"Queue_Id",
|
||||
"Thread_Id",
|
||||
"Alloc_flags",
|
||||
"Start_Timestamp",
|
||||
"End_Timestamp",
|
||||
}};
|
||||
|
||||
for(auto ditr : data)
|
||||
{
|
||||
for(auto record : data.get(ditr))
|
||||
{
|
||||
auto row_ss = std::stringstream{};
|
||||
auto kind_name = tool_metadata.get_kind_name(record.kind);
|
||||
auto op_name = tool_metadata.get_operation_name(record.kind, record.operation);
|
||||
|
||||
tool::csv::scratch_memory_encoder::write_row(row_ss,
|
||||
kind_name,
|
||||
op_name,
|
||||
tool_metadata.get_node_id(record.agent_id),
|
||||
record.queue_id.handle,
|
||||
record.thread_id,
|
||||
record.flags,
|
||||
record.start_timestamp,
|
||||
record.end_timestamp);
|
||||
|
||||
ofs << row_ss.str();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
generate_csv(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<rocprofiler_buffer_tracing_rccl_api_record_t>& data,
|
||||
const stats_entry_t& stats)
|
||||
{
|
||||
if(data.empty()) return;
|
||||
|
||||
if(cfg.stats && stats)
|
||||
write_stats(get_stats_output_file(cfg, domain_type::RCCL), stats.entries);
|
||||
|
||||
auto ofs = tool::csv_output_file{cfg,
|
||||
domain_type::RCCL,
|
||||
tool::csv::api_csv_encoder{},
|
||||
{"Domain",
|
||||
"Function",
|
||||
"Process_Id",
|
||||
"Thread_Id",
|
||||
"Correlation_Id",
|
||||
"Start_Timestamp",
|
||||
"End_Timestamp"}};
|
||||
for(auto ditr : data)
|
||||
{
|
||||
for(auto record : data.get(ditr))
|
||||
{
|
||||
auto row_ss = std::stringstream{};
|
||||
auto api_name = tool_metadata.get_operation_name(record.kind, record.operation);
|
||||
rocprofiler::tool::csv::api_csv_encoder::write_row(
|
||||
row_ss,
|
||||
tool_metadata.get_kind_name(record.kind),
|
||||
api_name,
|
||||
tool_metadata.process_id,
|
||||
record.thread_id,
|
||||
record.correlation_id.internal,
|
||||
record.start_timestamp,
|
||||
record.end_timestamp);
|
||||
|
||||
ofs << row_ss.str();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
generate_csv(const output_config& cfg,
|
||||
const metadata& /*tool_metadata*/,
|
||||
const domain_stats_vec_t& data_v)
|
||||
{
|
||||
using csv_encoder_t = rocprofiler::tool::csv::stats_csv_encoder;
|
||||
|
||||
if(!cfg.stats) return;
|
||||
|
||||
auto _data = data_v;
|
||||
auto _total_stats = stats_data_t{};
|
||||
for(const auto& itr : _data)
|
||||
_total_stats += itr.second.total;
|
||||
|
||||
if(_total_stats.get_count() == 0) return;
|
||||
|
||||
std::sort(_data.begin(), _data.end(), [](const auto& lhs, const auto& rhs) {
|
||||
return (lhs.second.total.get_sum() > rhs.second.total.get_sum());
|
||||
});
|
||||
|
||||
auto ofs = get_stats_output_file(cfg, "domain_stats");
|
||||
|
||||
const float_type _total_duration = _total_stats.get_sum();
|
||||
for(const auto& [type, value] : _data)
|
||||
{
|
||||
auto name = get_domain_column_name(type);
|
||||
auto duration_ns = value.total.get_sum();
|
||||
auto calls = value.total.get_count();
|
||||
auto avg_ns = value.total.get_mean();
|
||||
auto percent_v = value.total.get_percent(_total_duration);
|
||||
|
||||
auto _row = std::stringstream{};
|
||||
csv_encoder_t::write_row<stats_formatter>(_row,
|
||||
name,
|
||||
calls,
|
||||
duration_ns,
|
||||
avg_ns,
|
||||
percentage{percent_v},
|
||||
value.total.get_min(),
|
||||
value.total.get_max(),
|
||||
value.total.get_stddev());
|
||||
ofs << _row.str() << std::flush;
|
||||
}
|
||||
}
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
@@ -0,0 +1,95 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "generateStats.hpp"
|
||||
#include "generator.hpp"
|
||||
#include "output_config.hpp"
|
||||
#include "statistics.hpp"
|
||||
|
||||
#include <rocprofiler-sdk/agent.h>
|
||||
#include <rocprofiler-sdk/buffer_tracing.h>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace tool
|
||||
{
|
||||
void
|
||||
generate_csv(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
std::vector<agent_info>& data);
|
||||
|
||||
void
|
||||
generate_csv(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<rocprofiler_buffer_tracing_kernel_dispatch_record_t>& data,
|
||||
const stats_entry_t& stats);
|
||||
|
||||
void
|
||||
generate_csv(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<rocprofiler_buffer_tracing_hip_api_record_t>& data,
|
||||
const stats_entry_t& stats);
|
||||
|
||||
void
|
||||
generate_csv(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<rocprofiler_buffer_tracing_hsa_api_record_t>& data,
|
||||
const stats_entry_t& stats);
|
||||
|
||||
void
|
||||
generate_csv(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<rocprofiler_buffer_tracing_memory_copy_record_t>& data,
|
||||
const stats_entry_t& stats);
|
||||
|
||||
void
|
||||
generate_csv(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<rocprofiler_buffer_tracing_marker_api_record_t>& data,
|
||||
const stats_entry_t& stats);
|
||||
|
||||
void
|
||||
generate_csv(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<tool_counter_record_t>& data,
|
||||
const stats_entry_t& stats);
|
||||
|
||||
void
|
||||
generate_csv(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<rocprofiler_buffer_tracing_scratch_memory_record_t>& data,
|
||||
const stats_entry_t& stats);
|
||||
|
||||
void
|
||||
generate_csv(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<rocprofiler_buffer_tracing_rccl_api_record_t>& data,
|
||||
const stats_entry_t& stats);
|
||||
|
||||
void
|
||||
generate_csv(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const domain_stats_vec_t& data);
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
@@ -0,0 +1,224 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include "generateJSON.hpp"
|
||||
#include "output_stream.hpp"
|
||||
#include "statistics.hpp"
|
||||
#include "timestamps.hpp"
|
||||
|
||||
#include "lib/common/string_entry.hpp"
|
||||
#include "lib/common/utility.hpp"
|
||||
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
#include <rocprofiler-sdk/marker/api_id.h>
|
||||
|
||||
#include <utility>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace tool
|
||||
{
|
||||
json_output::json_output(const output_config& cfg,
|
||||
std::string_view filename,
|
||||
JSONOutputArchive::Options _opts)
|
||||
: stream{get_output_stream(cfg, filename, ".json")}
|
||||
, archive{new JSONOutputArchive{*stream.stream, _opts}}
|
||||
{
|
||||
archive->setNextName("rocprofiler-sdk-tool");
|
||||
archive->startNode();
|
||||
archive->makeArray();
|
||||
}
|
||||
|
||||
json_output::~json_output() { close(); }
|
||||
|
||||
void
|
||||
json_output::close()
|
||||
{
|
||||
if(archive && stream)
|
||||
{
|
||||
archive->finishNode();
|
||||
archive.reset();
|
||||
stream.close();
|
||||
}
|
||||
}
|
||||
|
||||
json_output
|
||||
open_json(const output_config& cfg)
|
||||
{
|
||||
constexpr auto json_prec = 16;
|
||||
constexpr auto json_indent = JSONOutputArchive::Options::IndentChar::space;
|
||||
auto json_opts = JSONOutputArchive::Options{json_prec, json_indent, 0};
|
||||
auto filename = std::string_view{"results"};
|
||||
|
||||
return json_output{cfg, filename, json_opts};
|
||||
}
|
||||
|
||||
void
|
||||
json_output::start_process()
|
||||
{
|
||||
startNode();
|
||||
}
|
||||
|
||||
void
|
||||
json_output::finish_process()
|
||||
{
|
||||
finishNode();
|
||||
}
|
||||
|
||||
void
|
||||
close_json(json_output& json_ar)
|
||||
{
|
||||
json_ar.close();
|
||||
}
|
||||
|
||||
void
|
||||
write_json(json_output& json_ar,
|
||||
const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
uint64_t pid)
|
||||
{
|
||||
// metadata
|
||||
{
|
||||
auto timestamps =
|
||||
timestamps_t{tool_metadata.process_start_ns, tool_metadata.process_end_ns};
|
||||
|
||||
json_ar.setNextName("metadata");
|
||||
json_ar.startNode();
|
||||
json_ar(cereal::make_nvp("pid", pid));
|
||||
json_ar(cereal::make_nvp("init_time", timestamps.app_start_time));
|
||||
json_ar(cereal::make_nvp("fini_time", timestamps.app_end_time));
|
||||
json_ar(cereal::make_nvp("config", cfg));
|
||||
json_ar(cereal::make_nvp("command", common::read_command_line(pid)));
|
||||
json_ar.finishNode();
|
||||
}
|
||||
|
||||
json_ar(cereal::make_nvp("agents", tool_metadata.agents));
|
||||
json_ar(cereal::make_nvp("counters", tool_metadata.get_counter_info()));
|
||||
|
||||
{
|
||||
auto callback_name_info = tool_metadata.callback_names;
|
||||
auto buffer_name_info = tool_metadata.buffer_names;
|
||||
auto counter_dims = tool_metadata.get_counter_dimension_info();
|
||||
auto marker_msg_data = tool_metadata.marker_messages.get();
|
||||
|
||||
json_ar.setNextName("strings");
|
||||
json_ar.startNode();
|
||||
json_ar(cereal::make_nvp("callback_records", callback_name_info));
|
||||
json_ar(cereal::make_nvp("buffer_records", buffer_name_info));
|
||||
json_ar(cereal::make_nvp("marker_api", marker_msg_data));
|
||||
|
||||
{
|
||||
auto _extern_corr_id_strings = std::map<size_t, std::string>{};
|
||||
if(cfg.kernel_rename)
|
||||
{
|
||||
for(auto itr : tool_metadata.external_corr_ids.get())
|
||||
{
|
||||
if(itr > 0)
|
||||
{
|
||||
const auto* _str = tool_metadata.get_string_entry(itr);
|
||||
if(_str) _extern_corr_id_strings.emplace(itr, *_str);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
json_ar.setNextName("correlation_id");
|
||||
json_ar.startNode();
|
||||
json_ar(cereal::make_nvp("external", _extern_corr_id_strings));
|
||||
json_ar.finishNode();
|
||||
}
|
||||
|
||||
{
|
||||
json_ar.setNextName("counters");
|
||||
json_ar.startNode();
|
||||
json_ar(cereal::make_nvp("dimension_ids", counter_dims));
|
||||
json_ar.finishNode();
|
||||
}
|
||||
|
||||
json_ar.finishNode();
|
||||
}
|
||||
|
||||
{
|
||||
auto kern_sym_data = tool_metadata.get_kernel_symbols();
|
||||
auto code_obj_data = tool_metadata.get_code_objects();
|
||||
|
||||
json_ar(cereal::make_nvp("code_objects", code_obj_data));
|
||||
json_ar(cereal::make_nvp("kernel_symbols", kern_sym_data));
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
write_json(json_output& json_ar,
|
||||
const output_config& /*cfg*/,
|
||||
const metadata& /*tool_metadata*/,
|
||||
const domain_stats_vec_t& domain_stats,
|
||||
generator<rocprofiler_buffer_tracing_hip_api_record_t>&& hip_api_gen,
|
||||
generator<rocprofiler_buffer_tracing_hsa_api_record_t> hsa_api_gen,
|
||||
generator<rocprofiler_buffer_tracing_kernel_dispatch_record_t> kernel_dispatch_gen,
|
||||
generator<rocprofiler_buffer_tracing_memory_copy_record_t> memory_copy_gen,
|
||||
generator<tool_counter_record_t> counter_collection_gen,
|
||||
generator<rocprofiler_buffer_tracing_marker_api_record_t> marker_api_gen,
|
||||
generator<rocprofiler_buffer_tracing_scratch_memory_record_t> scratch_memory_gen,
|
||||
generator<rocprofiler_buffer_tracing_rccl_api_record_t> rccl_api_gen)
|
||||
|
||||
{
|
||||
// summary
|
||||
{
|
||||
json_ar.setNextName("summary");
|
||||
json_ar.startNode();
|
||||
json_ar.makeArray();
|
||||
|
||||
for(const auto& itr : domain_stats)
|
||||
{
|
||||
auto _name = get_domain_column_name(itr.first);
|
||||
json_ar.startNode();
|
||||
|
||||
json_ar(cereal::make_nvp("domain", std::string{_name}));
|
||||
json_ar(cereal::make_nvp("stats", itr.second));
|
||||
|
||||
json_ar.finishNode();
|
||||
}
|
||||
|
||||
json_ar.finishNode();
|
||||
}
|
||||
|
||||
{
|
||||
json_ar.setNextName("callback_records");
|
||||
json_ar.startNode();
|
||||
json_ar(cereal::make_nvp("counter_collection", counter_collection_gen));
|
||||
json_ar.finishNode();
|
||||
}
|
||||
|
||||
{
|
||||
json_ar.setNextName("buffer_records");
|
||||
json_ar.startNode();
|
||||
json_ar(cereal::make_nvp("kernel_dispatch", kernel_dispatch_gen));
|
||||
json_ar(cereal::make_nvp("hip_api", hip_api_gen));
|
||||
json_ar(cereal::make_nvp("hsa_api", hsa_api_gen));
|
||||
json_ar(cereal::make_nvp("marker_api", marker_api_gen));
|
||||
json_ar(cereal::make_nvp("rccl_api", rccl_api_gen));
|
||||
json_ar(cereal::make_nvp("memory_copy", memory_copy_gen));
|
||||
json_ar(cereal::make_nvp("scratch_memory", scratch_memory_gen));
|
||||
json_ar.finishNode();
|
||||
}
|
||||
}
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
@@ -0,0 +1,97 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "agent_info.hpp"
|
||||
#include "buffered_output.hpp"
|
||||
#include "metadata.hpp"
|
||||
#include "output_config.hpp"
|
||||
#include "output_stream.hpp"
|
||||
#include "statistics.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
#include <deque>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace tool
|
||||
{
|
||||
using JSONOutputArchive = ::cereal::MinimalJSONOutputArchive;
|
||||
|
||||
struct json_output
|
||||
{
|
||||
json_output(const output_config& cfg,
|
||||
std::string_view filename,
|
||||
JSONOutputArchive::Options _opts);
|
||||
~json_output();
|
||||
|
||||
json_output(const json_output&) = delete;
|
||||
json_output(json_output&&) noexcept = default;
|
||||
json_output& operator=(const json_output&) = delete;
|
||||
json_output& operator=(json_output&&) noexcept = default;
|
||||
|
||||
template <typename... Args>
|
||||
decltype(auto) operator()(Args&&... args)
|
||||
{
|
||||
return (*archive)(std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
decltype(auto) startNode() { return archive->startNode(); }
|
||||
decltype(auto) finishNode() { return archive->finishNode(); }
|
||||
decltype(auto) makeArray() { return archive->makeArray(); }
|
||||
decltype(auto) setNextName(const char* name) { archive->setNextName(name); }
|
||||
|
||||
void start_process();
|
||||
void finish_process();
|
||||
|
||||
void close();
|
||||
|
||||
private:
|
||||
output_stream stream = {};
|
||||
std::unique_ptr<JSONOutputArchive> archive = {};
|
||||
};
|
||||
|
||||
json_output
|
||||
open_json(const output_config& cfg);
|
||||
|
||||
void
|
||||
close_json(json_output& ar);
|
||||
|
||||
void
|
||||
write_json(json_output&, const output_config& cfg, const metadata& tool_metadata, uint64_t pid);
|
||||
|
||||
void
|
||||
write_json(json_output& json_ar,
|
||||
const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const domain_stats_vec_t& domain_stats,
|
||||
generator<rocprofiler_buffer_tracing_hip_api_record_t>&& hip_api_gen,
|
||||
generator<rocprofiler_buffer_tracing_hsa_api_record_t> hsa_api_gen,
|
||||
generator<rocprofiler_buffer_tracing_kernel_dispatch_record_t> kernel_dispatch_gen,
|
||||
generator<rocprofiler_buffer_tracing_memory_copy_record_t> memory_copy_gen,
|
||||
generator<tool_counter_record_t> counter_collection_gen,
|
||||
generator<rocprofiler_buffer_tracing_marker_api_record_t> marker_api_gen,
|
||||
generator<rocprofiler_buffer_tracing_scratch_memory_record_t> scratch_memory_gen,
|
||||
generator<rocprofiler_buffer_tracing_rccl_api_record_t> rccl_api_gen);
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
+18
-20
@@ -21,9 +21,8 @@
|
||||
// SOFTWARE.
|
||||
|
||||
#include "generateOTF2.hpp"
|
||||
#include "config.hpp"
|
||||
#include "helper.hpp"
|
||||
#include "output_file.hpp"
|
||||
#include "output_stream.hpp"
|
||||
#include "timestamps.hpp"
|
||||
|
||||
#include "lib/common/filesystem.hpp"
|
||||
#include "lib/common/mpl.hpp"
|
||||
@@ -273,11 +272,11 @@ add_event(std::string_view name,
|
||||
}
|
||||
|
||||
void
|
||||
setup()
|
||||
setup(const output_config& cfg)
|
||||
{
|
||||
namespace fs = common::filesystem;
|
||||
|
||||
auto _filename = get_output_filename("results", std::string_view{});
|
||||
auto _filename = get_output_filename(cfg, "results", std::string_view{});
|
||||
auto _filepath = fs::path{_filename};
|
||||
auto _name = _filepath.filename().string();
|
||||
auto _path = _filepath.parent_path().string();
|
||||
@@ -344,9 +343,10 @@ create_attribute_list()
|
||||
} // namespace
|
||||
|
||||
void
|
||||
write_otf2(tool_table* tool_functions,
|
||||
write_otf2(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
uint64_t pid,
|
||||
const std::vector<rocprofiler_agent_v0_t>& agent_data,
|
||||
const std::vector<agent_info>& agent_data,
|
||||
std::deque<rocprofiler_buffer_tracing_hip_api_record_t>* hip_api_data,
|
||||
std::deque<rocprofiler_buffer_tracing_hsa_api_record_t>* hsa_api_data,
|
||||
std::deque<rocprofiler_buffer_tracing_kernel_dispatch_record_t>* kernel_dispatch_data,
|
||||
@@ -357,16 +357,14 @@ write_otf2(tool_table* tool
|
||||
{
|
||||
namespace sdk = ::rocprofiler::sdk;
|
||||
|
||||
setup();
|
||||
setup(cfg);
|
||||
|
||||
auto _app_ts = *tool_functions->tool_get_app_timestamps_fn();
|
||||
auto agents_map = std::unordered_map<rocprofiler_agent_id_t, rocprofiler_agent_t>{};
|
||||
for(auto itr : agent_data)
|
||||
agents_map.emplace(itr.id, itr);
|
||||
auto _app_ts = timestamps_t{tool_metadata.process_start_ns, tool_metadata.process_end_ns};
|
||||
auto agents_map = tool_metadata.agents_map;
|
||||
|
||||
const auto kernel_sym_data = get_kernel_symbol_data();
|
||||
const auto buffer_names = sdk::get_buffer_tracing_names();
|
||||
auto tids = std::set<rocprofiler_thread_id_t>{};
|
||||
const auto kernel_sym_data = tool_metadata.get_kernel_symbols();
|
||||
const auto& buffer_names = tool_metadata.buffer_names;
|
||||
auto tids = std::set<rocprofiler_thread_id_t>{};
|
||||
auto agent_thread_ids = std::map<rocprofiler_thread_id_t, std::set<rocprofiler_agent_id_t>>{};
|
||||
auto agent_queue_ids =
|
||||
std::map<rocprofiler_thread_id_t,
|
||||
@@ -387,7 +385,7 @@ write_otf2(tool_table* tool
|
||||
|
||||
auto _get_kernel_sym_data =
|
||||
[&kernel_sym_data](
|
||||
const rocprofiler_kernel_dispatch_info_t& _info) -> const kernel_symbol_data* {
|
||||
const rocprofiler_kernel_dispatch_info_t& _info) -> const kernel_symbol_info* {
|
||||
for(const auto& kitr : kernel_sym_data)
|
||||
if(kitr.kernel_id == _info.kernel_id) return &kitr;
|
||||
return CHECK_NOTNULL(nullptr);
|
||||
@@ -514,7 +512,7 @@ write_otf2(tool_table* tool
|
||||
auto add_event_data = [&buffer_names,
|
||||
&_hash_data,
|
||||
&_data,
|
||||
&tool_functions,
|
||||
&tool_metadata,
|
||||
&thread_event_info,
|
||||
&get_attr](const auto* _inp, auto _attrib) {
|
||||
if(!_inp) return;
|
||||
@@ -533,7 +531,7 @@ write_otf2(tool_table* tool
|
||||
paradigm = OTF2_PARADIGM_USER;
|
||||
if(itr.kind == ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API &&
|
||||
itr.operation != ROCPROFILER_MARKER_CORE_API_ID_roctxGetThreadId)
|
||||
name = tool_functions->tool_get_roctx_msg_fn(itr.correlation_id.internal);
|
||||
name = tool_metadata.get_marker_message(itr.correlation_id.internal);
|
||||
}
|
||||
|
||||
_hash_data.emplace(
|
||||
@@ -591,8 +589,8 @@ write_otf2(tool_table* tool
|
||||
const auto* sym = _get_kernel_sym_data(info);
|
||||
CHECK(sym != nullptr);
|
||||
|
||||
auto name = tool_functions->tool_get_kernel_name_fn(info.kernel_id,
|
||||
itr.correlation_id.external.value);
|
||||
auto name =
|
||||
tool_metadata.get_kernel_name(info.kernel_id, itr.correlation_id.external.value);
|
||||
_hash_data.emplace(
|
||||
get_hash_id(name),
|
||||
region_info{std::string{name}, OTF2_REGION_ROLE_FUNCTION, OTF2_PARADIGM_HIP});
|
||||
+7
-3
@@ -22,8 +22,11 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "helper.hpp"
|
||||
#include "agent_info.hpp"
|
||||
#include "metadata.hpp"
|
||||
#include "output_config.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
#include <deque>
|
||||
|
||||
namespace rocprofiler
|
||||
@@ -31,9 +34,10 @@ namespace rocprofiler
|
||||
namespace tool
|
||||
{
|
||||
void
|
||||
write_otf2(tool_table* tool_functions,
|
||||
write_otf2(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
uint64_t pid,
|
||||
const std::vector<rocprofiler_agent_v0_t>& agent_data,
|
||||
const std::vector<agent_info>& agent_data,
|
||||
std::deque<rocprofiler_buffer_tracing_hip_api_record_t>* hip_api_data,
|
||||
std::deque<rocprofiler_buffer_tracing_hsa_api_record_t>* hsa_api_data,
|
||||
std::deque<rocprofiler_buffer_tracing_kernel_dispatch_record_t>* kernel_dispatch_data,
|
||||
@@ -0,0 +1,606 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include "generatePerfetto.hpp"
|
||||
#include "output_stream.hpp"
|
||||
#include "timestamps.hpp"
|
||||
|
||||
#include "lib/common/utility.hpp"
|
||||
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
#include <rocprofiler-sdk/marker/api_id.h>
|
||||
#include <rocprofiler-sdk/cxx/hash.hpp>
|
||||
#include <rocprofiler-sdk/cxx/operators.hpp>
|
||||
#include <rocprofiler-sdk/cxx/perfetto.hpp>
|
||||
|
||||
#include <atomic>
|
||||
#include <future>
|
||||
#include <map>
|
||||
#include <thread>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace tool
|
||||
{
|
||||
namespace
|
||||
{
|
||||
auto main_tid = common::get_tid();
|
||||
|
||||
template <typename Tp>
|
||||
size_t
|
||||
get_hash_id(Tp&& _val)
|
||||
{
|
||||
if constexpr(!std::is_pointer<Tp>::value)
|
||||
return std::hash<Tp>{}(std::forward<Tp>(_val));
|
||||
else if constexpr(std::is_same<Tp, const char*>::value)
|
||||
return get_hash_id(std::string_view{_val});
|
||||
else
|
||||
return get_hash_id(*_val);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void
|
||||
write_perfetto(
|
||||
const output_config& ocfg,
|
||||
const metadata& tool_metadata,
|
||||
std::vector<agent_info> agent_data,
|
||||
const generator<rocprofiler_buffer_tracing_hip_api_record_t>& hip_api_gen,
|
||||
const generator<rocprofiler_buffer_tracing_hsa_api_record_t>& hsa_api_gen,
|
||||
const generator<rocprofiler_buffer_tracing_kernel_dispatch_record_t>& kernel_dispatch_gen,
|
||||
const generator<rocprofiler_buffer_tracing_memory_copy_record_t>& memory_copy_gen,
|
||||
const generator<rocprofiler_buffer_tracing_marker_api_record_t>& marker_api_gen,
|
||||
const generator<rocprofiler_buffer_tracing_scratch_memory_record_t>& /*scratch_memory_gen*/,
|
||||
const generator<rocprofiler_buffer_tracing_rccl_api_record_t>& rccl_api_gen)
|
||||
{
|
||||
namespace sdk = ::rocprofiler::sdk;
|
||||
|
||||
// auto root_process_track = ::perfetto::Track{};
|
||||
// uint64_t process_uuid = tool_metadata.process_start_ns ^ tool_metadata.process_id;
|
||||
// auto process_track = ::perfetto::Track{process_uuid, root_process_track};
|
||||
// auto process_track = ::perfetto::ProcessTrack::Current();
|
||||
|
||||
auto agents_map = std::unordered_map<rocprofiler_agent_id_t, rocprofiler_agent_t>{};
|
||||
for(auto itr : agent_data)
|
||||
agents_map.emplace(itr.id, itr);
|
||||
|
||||
auto args = ::perfetto::TracingInitArgs{};
|
||||
auto track_event_cfg = ::perfetto::protos::gen::TrackEventConfig{};
|
||||
auto cfg = ::perfetto::TraceConfig{};
|
||||
|
||||
// environment settings
|
||||
auto shmem_size_hint = ocfg.perfetto_shmem_size_hint;
|
||||
auto buffer_size_kb = ocfg.perfetto_buffer_size;
|
||||
|
||||
auto* buffer_config = cfg.add_buffers();
|
||||
buffer_config->set_size_kb(buffer_size_kb);
|
||||
|
||||
if(ocfg.perfetto_buffer_fill_policy == "discard" || ocfg.perfetto_buffer_fill_policy.empty())
|
||||
buffer_config->set_fill_policy(
|
||||
::perfetto::protos::gen::TraceConfig_BufferConfig_FillPolicy_DISCARD);
|
||||
else if(ocfg.perfetto_buffer_fill_policy == "ring_buffer")
|
||||
buffer_config->set_fill_policy(
|
||||
::perfetto::protos::gen::TraceConfig_BufferConfig_FillPolicy_RING_BUFFER);
|
||||
else
|
||||
ROCP_FATAL << "Unsupport perfetto buffer fill policy: '" << ocfg.perfetto_buffer_fill_policy
|
||||
<< "'. Supported: discard, ring_buffer";
|
||||
|
||||
auto* ds_cfg = cfg.add_data_sources()->mutable_config();
|
||||
ds_cfg->set_name("track_event"); // this MUST be track_event
|
||||
ds_cfg->set_track_event_config_raw(track_event_cfg.SerializeAsString());
|
||||
|
||||
args.shmem_size_hint_kb = shmem_size_hint;
|
||||
|
||||
if(ocfg.perfetto_backend == "inprocess" || ocfg.perfetto_backend.empty())
|
||||
args.backends |= ::perfetto::kInProcessBackend;
|
||||
else if(ocfg.perfetto_backend == "system")
|
||||
args.backends |= ::perfetto::kSystemBackend;
|
||||
else
|
||||
ROCP_FATAL << "Unsupport perfetto backend: '" << ocfg.perfetto_backend
|
||||
<< "'. Supported: inprocess, system";
|
||||
|
||||
::perfetto::Tracing::Initialize(args);
|
||||
::perfetto::TrackEvent::Register();
|
||||
|
||||
auto tracing_session = ::perfetto::Tracing::NewTrace();
|
||||
|
||||
tracing_session->Setup(cfg);
|
||||
tracing_session->StartBlocking();
|
||||
|
||||
auto tids = std::set<rocprofiler_thread_id_t>{};
|
||||
auto demangled = std::unordered_map<std::string_view, std::string>{};
|
||||
auto agent_thread_ids = std::unordered_map<rocprofiler_agent_id_t, std::set<uint64_t>>{};
|
||||
auto agent_queue_ids =
|
||||
std::unordered_map<rocprofiler_agent_id_t, std::unordered_set<rocprofiler_queue_id_t>>{};
|
||||
auto thread_indexes = std::unordered_map<rocprofiler_thread_id_t, uint64_t>{};
|
||||
|
||||
auto thread_tracks = std::unordered_map<rocprofiler_thread_id_t, ::perfetto::Track>{};
|
||||
auto agent_thread_tracks =
|
||||
std::unordered_map<rocprofiler_agent_id_t,
|
||||
std::unordered_map<uint64_t, ::perfetto::Track>>{};
|
||||
auto agent_queue_tracks =
|
||||
std::unordered_map<rocprofiler_agent_id_t,
|
||||
std::unordered_map<rocprofiler_queue_id_t, ::perfetto::Track>>{};
|
||||
|
||||
auto _get_agent = [&agent_data](rocprofiler_agent_id_t _id) -> const rocprofiler_agent_t* {
|
||||
for(const auto& itr : agent_data)
|
||||
{
|
||||
if(_id == itr.id) return &itr;
|
||||
}
|
||||
return CHECK_NOTNULL(nullptr);
|
||||
};
|
||||
|
||||
{
|
||||
for(auto ditr : hsa_api_gen)
|
||||
for(auto itr : hsa_api_gen.get(ditr))
|
||||
tids.emplace(itr.thread_id);
|
||||
for(auto ditr : hip_api_gen)
|
||||
for(auto itr : hip_api_gen.get(ditr))
|
||||
tids.emplace(itr.thread_id);
|
||||
for(auto ditr : marker_api_gen)
|
||||
for(auto itr : marker_api_gen.get(ditr))
|
||||
tids.emplace(itr.thread_id);
|
||||
for(auto ditr : rccl_api_gen)
|
||||
for(auto itr : rccl_api_gen.get(ditr))
|
||||
tids.emplace(itr.thread_id);
|
||||
|
||||
for(auto ditr : memory_copy_gen)
|
||||
for(auto itr : memory_copy_gen.get(ditr))
|
||||
{
|
||||
tids.emplace(itr.thread_id);
|
||||
agent_thread_ids[itr.dst_agent_id].emplace(itr.thread_id);
|
||||
}
|
||||
|
||||
for(auto ditr : kernel_dispatch_gen)
|
||||
for(auto itr : kernel_dispatch_gen.get(ditr))
|
||||
{
|
||||
tids.emplace(itr.thread_id);
|
||||
agent_queue_ids[itr.dispatch_info.agent_id].emplace(itr.dispatch_info.queue_id);
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t nthrn = 0;
|
||||
for(auto itr : tids)
|
||||
{
|
||||
if(itr == main_tid)
|
||||
{
|
||||
thread_indexes.emplace(main_tid, 0);
|
||||
thread_tracks.emplace(main_tid, ::perfetto::ThreadTrack::Current());
|
||||
}
|
||||
else
|
||||
{
|
||||
auto _idx = ++nthrn;
|
||||
thread_indexes.emplace(itr, _idx);
|
||||
auto _track = ::perfetto::Track{itr};
|
||||
auto _desc = _track.Serialize();
|
||||
auto _namess = std::stringstream{};
|
||||
_namess << "THREAD " << _idx << " (" << itr << ")";
|
||||
_desc.set_name(_namess.str());
|
||||
perfetto::TrackEvent::SetTrackDescriptor(_track, _desc);
|
||||
|
||||
thread_tracks.emplace(itr, _track);
|
||||
}
|
||||
}
|
||||
|
||||
for(const auto& itr : agent_thread_ids)
|
||||
{
|
||||
const auto* _agent = _get_agent(itr.first);
|
||||
|
||||
for(auto titr : itr.second)
|
||||
{
|
||||
auto _namess = std::stringstream{};
|
||||
_namess << "COPY to AGENT [" << _agent->logical_node_id << "] THREAD ["
|
||||
<< thread_indexes.at(titr) << "] ";
|
||||
|
||||
if(_agent->type == ROCPROFILER_AGENT_TYPE_CPU)
|
||||
_namess << "(CPU)";
|
||||
else if(_agent->type == ROCPROFILER_AGENT_TYPE_GPU)
|
||||
_namess << "(GPU)";
|
||||
else
|
||||
_namess << "(UNK)";
|
||||
|
||||
auto _track = ::perfetto::Track{get_hash_id(_namess.str())};
|
||||
auto _desc = _track.Serialize();
|
||||
_desc.set_name(_namess.str());
|
||||
|
||||
perfetto::TrackEvent::SetTrackDescriptor(_track, _desc);
|
||||
|
||||
agent_thread_tracks[itr.first].emplace(titr, _track);
|
||||
}
|
||||
}
|
||||
|
||||
for(const auto& aitr : agent_queue_ids)
|
||||
{
|
||||
uint32_t nqueue = 0;
|
||||
for(auto qitr : aitr.second)
|
||||
{
|
||||
const auto* _agent = _get_agent(aitr.first);
|
||||
|
||||
auto _namess = std::stringstream{};
|
||||
_namess << "COMPUTE AGENT [" << _agent->logical_node_id << "] QUEUE [" << nqueue++
|
||||
<< "] ";
|
||||
|
||||
if(_agent->type == ROCPROFILER_AGENT_TYPE_CPU)
|
||||
_namess << "(CPU)";
|
||||
else if(_agent->type == ROCPROFILER_AGENT_TYPE_GPU)
|
||||
_namess << "(GPU)";
|
||||
else
|
||||
_namess << "(UNK)";
|
||||
|
||||
auto _track = ::perfetto::Track{get_hash_id(_namess.str())};
|
||||
auto _desc = _track.Serialize();
|
||||
_desc.set_name(_namess.str());
|
||||
|
||||
perfetto::TrackEvent::SetTrackDescriptor(_track, _desc);
|
||||
|
||||
agent_queue_tracks[aitr.first].emplace(qitr, _track);
|
||||
}
|
||||
}
|
||||
|
||||
// trace events
|
||||
{
|
||||
auto buffer_names = sdk::get_buffer_tracing_names();
|
||||
auto callbk_name_info = sdk::get_callback_tracing_names();
|
||||
|
||||
for(auto ditr : hsa_api_gen)
|
||||
for(auto itr : hsa_api_gen.get(ditr))
|
||||
{
|
||||
auto name = buffer_names.at(itr.kind, itr.operation);
|
||||
auto& track = thread_tracks.at(itr.thread_id);
|
||||
|
||||
TRACE_EVENT_BEGIN(sdk::perfetto_category<sdk::category::hsa_api>::name,
|
||||
::perfetto::StaticString(name.data()),
|
||||
track,
|
||||
itr.start_timestamp,
|
||||
::perfetto::Flow::ProcessScoped(itr.correlation_id.internal),
|
||||
"begin_ns",
|
||||
itr.start_timestamp,
|
||||
"end_ns",
|
||||
itr.end_timestamp,
|
||||
"delta_ns",
|
||||
(itr.end_timestamp - itr.start_timestamp),
|
||||
"tid",
|
||||
itr.thread_id,
|
||||
"kind",
|
||||
itr.kind,
|
||||
"operation",
|
||||
itr.operation,
|
||||
"corr_id",
|
||||
itr.correlation_id.internal);
|
||||
TRACE_EVENT_END(
|
||||
sdk::perfetto_category<sdk::category::hsa_api>::name, track, itr.end_timestamp);
|
||||
tracing_session->FlushBlocking();
|
||||
}
|
||||
|
||||
for(auto ditr : hip_api_gen)
|
||||
for(auto itr : hip_api_gen.get(ditr))
|
||||
{
|
||||
auto name = buffer_names.at(itr.kind, itr.operation);
|
||||
auto& track = thread_tracks.at(itr.thread_id);
|
||||
|
||||
TRACE_EVENT_BEGIN(sdk::perfetto_category<sdk::category::hip_api>::name,
|
||||
::perfetto::StaticString(name.data()),
|
||||
track,
|
||||
itr.start_timestamp,
|
||||
::perfetto::Flow::ProcessScoped(itr.correlation_id.internal),
|
||||
"begin_ns",
|
||||
itr.start_timestamp,
|
||||
"end_ns",
|
||||
itr.end_timestamp,
|
||||
"delta_ns",
|
||||
(itr.end_timestamp - itr.start_timestamp),
|
||||
"tid",
|
||||
itr.thread_id,
|
||||
"kind",
|
||||
itr.kind,
|
||||
"operation",
|
||||
itr.operation,
|
||||
"corr_id",
|
||||
itr.correlation_id.internal);
|
||||
TRACE_EVENT_END(
|
||||
sdk::perfetto_category<sdk::category::hip_api>::name, track, itr.end_timestamp);
|
||||
tracing_session->FlushBlocking();
|
||||
}
|
||||
|
||||
for(auto ditr : marker_api_gen)
|
||||
for(auto itr : marker_api_gen.get(ditr))
|
||||
{
|
||||
auto& track = thread_tracks.at(itr.thread_id);
|
||||
auto name = (itr.kind == ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API &&
|
||||
itr.operation != ROCPROFILER_MARKER_CORE_API_ID_roctxGetThreadId)
|
||||
? tool_metadata.get_marker_message(itr.correlation_id.internal)
|
||||
: buffer_names.at(itr.kind, itr.operation);
|
||||
|
||||
TRACE_EVENT_BEGIN(sdk::perfetto_category<sdk::category::marker_api>::name,
|
||||
::perfetto::StaticString(name.data()),
|
||||
track,
|
||||
itr.start_timestamp,
|
||||
::perfetto::Flow::ProcessScoped(itr.correlation_id.internal),
|
||||
"begin_ns",
|
||||
itr.start_timestamp,
|
||||
"end_ns",
|
||||
itr.end_timestamp,
|
||||
"delta_ns",
|
||||
(itr.end_timestamp - itr.start_timestamp),
|
||||
"tid",
|
||||
itr.thread_id,
|
||||
"kind",
|
||||
itr.kind,
|
||||
"operation",
|
||||
itr.operation,
|
||||
"corr_id",
|
||||
itr.correlation_id.internal);
|
||||
TRACE_EVENT_END(sdk::perfetto_category<sdk::category::marker_api>::name,
|
||||
track,
|
||||
itr.end_timestamp);
|
||||
tracing_session->FlushBlocking();
|
||||
}
|
||||
|
||||
for(auto ditr : rccl_api_gen)
|
||||
for(auto itr : rccl_api_gen.get(ditr))
|
||||
{
|
||||
auto name = buffer_names.at(itr.kind, itr.operation);
|
||||
auto& track = thread_tracks.at(itr.thread_id);
|
||||
|
||||
TRACE_EVENT_BEGIN(sdk::perfetto_category<sdk::category::rccl_api>::name,
|
||||
::perfetto::StaticString(name.data()),
|
||||
track,
|
||||
itr.start_timestamp,
|
||||
::perfetto::Flow::ProcessScoped(itr.correlation_id.internal),
|
||||
"begin_ns",
|
||||
itr.start_timestamp,
|
||||
"end_ns",
|
||||
itr.end_timestamp,
|
||||
"delta_ns",
|
||||
(itr.end_timestamp - itr.start_timestamp),
|
||||
"tid",
|
||||
itr.thread_id,
|
||||
"kind",
|
||||
itr.kind,
|
||||
"operation",
|
||||
itr.operation,
|
||||
"corr_id",
|
||||
itr.correlation_id.internal);
|
||||
TRACE_EVENT_END(sdk::perfetto_category<sdk::category::rccl_api>::name,
|
||||
track,
|
||||
itr.end_timestamp);
|
||||
tracing_session->FlushBlocking();
|
||||
}
|
||||
|
||||
for(auto ditr : memory_copy_gen)
|
||||
for(auto itr : memory_copy_gen.get(ditr))
|
||||
{
|
||||
auto name = buffer_names.at(itr.kind, itr.operation);
|
||||
auto& track = agent_thread_tracks.at(itr.dst_agent_id).at(itr.thread_id);
|
||||
|
||||
TRACE_EVENT_BEGIN(sdk::perfetto_category<sdk::category::memory_copy>::name,
|
||||
::perfetto::StaticString(name.data()),
|
||||
track,
|
||||
itr.start_timestamp,
|
||||
::perfetto::Flow::ProcessScoped(itr.correlation_id.internal),
|
||||
"begin_ns",
|
||||
itr.start_timestamp,
|
||||
"end_ns",
|
||||
itr.end_timestamp,
|
||||
"delta_ns",
|
||||
(itr.end_timestamp - itr.start_timestamp),
|
||||
"kind",
|
||||
itr.kind,
|
||||
"operation",
|
||||
itr.operation,
|
||||
"src_agent",
|
||||
agents_map.at(itr.src_agent_id).logical_node_id,
|
||||
"dst_agent",
|
||||
agents_map.at(itr.dst_agent_id).logical_node_id,
|
||||
"copy_bytes",
|
||||
itr.bytes,
|
||||
"corr_id",
|
||||
itr.correlation_id.internal,
|
||||
"tid",
|
||||
itr.thread_id);
|
||||
TRACE_EVENT_END(sdk::perfetto_category<sdk::category::memory_copy>::name,
|
||||
track,
|
||||
itr.end_timestamp);
|
||||
tracing_session->FlushBlocking();
|
||||
}
|
||||
|
||||
for(auto ditr : kernel_dispatch_gen)
|
||||
for(auto itr : kernel_dispatch_gen.get(ditr))
|
||||
{
|
||||
const auto& info = itr.dispatch_info;
|
||||
const kernel_symbol_info* sym = tool_metadata.get_kernel_symbol(info.kernel_id);
|
||||
|
||||
CHECK(sym != nullptr);
|
||||
|
||||
auto name = std::string_view{sym->kernel_name};
|
||||
auto& track = agent_queue_tracks.at(info.agent_id).at(info.queue_id);
|
||||
|
||||
if(demangled.find(name) == demangled.end())
|
||||
{
|
||||
demangled.emplace(name, common::cxx_demangle(name));
|
||||
}
|
||||
|
||||
TRACE_EVENT_BEGIN(
|
||||
sdk::perfetto_category<sdk::category::kernel_dispatch>::name,
|
||||
::perfetto::StaticString(demangled.at(name).c_str()),
|
||||
track,
|
||||
itr.start_timestamp,
|
||||
::perfetto::Flow::ProcessScoped(itr.correlation_id.internal),
|
||||
"begin_ns",
|
||||
itr.start_timestamp,
|
||||
"end_ns",
|
||||
itr.end_timestamp,
|
||||
"delta_ns",
|
||||
(itr.end_timestamp - itr.start_timestamp),
|
||||
"kind",
|
||||
itr.kind,
|
||||
"agent",
|
||||
agents_map.at(info.agent_id).logical_node_id,
|
||||
"corr_id",
|
||||
itr.correlation_id.internal,
|
||||
"queue",
|
||||
info.queue_id.handle,
|
||||
"tid",
|
||||
itr.thread_id,
|
||||
"kernel_id",
|
||||
info.kernel_id,
|
||||
"private_segment_size",
|
||||
info.private_segment_size,
|
||||
"group_segment_size",
|
||||
info.group_segment_size,
|
||||
"workgroup_size",
|
||||
info.workgroup_size.x * info.workgroup_size.y * info.workgroup_size.z,
|
||||
"grid_size",
|
||||
info.grid_size.x * info.grid_size.y * info.grid_size.z);
|
||||
TRACE_EVENT_END(sdk::perfetto_category<sdk::category::kernel_dispatch>::name,
|
||||
track,
|
||||
itr.end_timestamp);
|
||||
tracing_session->FlushBlocking();
|
||||
}
|
||||
}
|
||||
|
||||
// counter tracks
|
||||
{
|
||||
// memory copy counter track
|
||||
auto mem_cpy_endpoints = std::map<rocprofiler_agent_id_t, std::map<uint64_t, uint64_t>>{};
|
||||
auto mem_cpy_extremes = std::pair<uint64_t, uint64_t>{};
|
||||
for(auto ditr : memory_copy_gen)
|
||||
for(auto itr : memory_copy_gen.get(ditr))
|
||||
{
|
||||
uint64_t _mean_timestamp =
|
||||
itr.start_timestamp + (0.5 * (itr.end_timestamp - itr.start_timestamp));
|
||||
|
||||
mem_cpy_endpoints[itr.dst_agent_id].emplace(itr.start_timestamp - 1000, 0);
|
||||
mem_cpy_endpoints[itr.dst_agent_id].emplace(itr.start_timestamp, 0);
|
||||
mem_cpy_endpoints[itr.dst_agent_id].emplace(_mean_timestamp, 0);
|
||||
mem_cpy_endpoints[itr.dst_agent_id].emplace(itr.end_timestamp, 0);
|
||||
mem_cpy_endpoints[itr.dst_agent_id].emplace(itr.end_timestamp + 1000, 0);
|
||||
|
||||
mem_cpy_extremes =
|
||||
std::make_pair(std::min(mem_cpy_extremes.first, itr.start_timestamp),
|
||||
std::max(mem_cpy_extremes.second, itr.end_timestamp));
|
||||
}
|
||||
|
||||
for(auto ditr : memory_copy_gen)
|
||||
for(auto itr : memory_copy_gen.get(ditr))
|
||||
{
|
||||
auto mbeg = mem_cpy_endpoints.at(itr.dst_agent_id).lower_bound(itr.start_timestamp);
|
||||
auto mend = mem_cpy_endpoints.at(itr.dst_agent_id).upper_bound(itr.end_timestamp);
|
||||
|
||||
LOG_IF(FATAL, mbeg == mend)
|
||||
<< "Missing range for timestamp [" << itr.start_timestamp << ", "
|
||||
<< itr.end_timestamp << "]";
|
||||
|
||||
for(auto mitr = mbeg; mitr != mend; ++mitr)
|
||||
mitr->second += itr.bytes;
|
||||
}
|
||||
|
||||
constexpr auto bytes_multiplier = 1024;
|
||||
|
||||
auto mem_cpy_tracks =
|
||||
std::unordered_map<rocprofiler_agent_id_t, ::perfetto::CounterTrack>{};
|
||||
auto mem_cpy_cnt_names = std::vector<std::string>{};
|
||||
mem_cpy_cnt_names.reserve(mem_cpy_endpoints.size());
|
||||
for(auto& mitr : mem_cpy_endpoints)
|
||||
{
|
||||
mem_cpy_endpoints[mitr.first].emplace(mem_cpy_extremes.first - 5000, 0);
|
||||
mem_cpy_endpoints[mitr.first].emplace(mem_cpy_extremes.second + 5000, 0);
|
||||
|
||||
auto _track_name = std::stringstream{};
|
||||
const auto* _agent = _get_agent(mitr.first);
|
||||
|
||||
if(_agent->type == ROCPROFILER_AGENT_TYPE_CPU)
|
||||
_track_name << "COPY BYTES to AGENT [" << _agent->logical_node_id << "] (CPU)";
|
||||
else if(_agent->type == ROCPROFILER_AGENT_TYPE_GPU)
|
||||
_track_name << "COPY BYTES to AGENT [" << _agent->logical_node_id << "] (GPU)";
|
||||
|
||||
constexpr auto _unit = ::perfetto::CounterTrack::Unit::UNIT_SIZE_BYTES;
|
||||
auto& _name = mem_cpy_cnt_names.emplace_back(_track_name.str());
|
||||
mem_cpy_tracks.emplace(mitr.first,
|
||||
::perfetto::CounterTrack{_name.c_str()}
|
||||
.set_unit(_unit)
|
||||
.set_unit_multiplier(bytes_multiplier)
|
||||
.set_is_incremental(false));
|
||||
}
|
||||
|
||||
for(auto& mitr : mem_cpy_endpoints)
|
||||
{
|
||||
for(auto itr : mitr.second)
|
||||
{
|
||||
TRACE_COUNTER(sdk::perfetto_category<sdk::category::memory_copy>::name,
|
||||
mem_cpy_tracks.at(mitr.first),
|
||||
itr.first,
|
||||
itr.second / bytes_multiplier);
|
||||
tracing_session->FlushBlocking();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
::perfetto::TrackEvent::Flush();
|
||||
tracing_session->FlushBlocking();
|
||||
tracing_session->StopBlocking();
|
||||
|
||||
auto filename = std::string{"results"};
|
||||
auto ofs = get_output_stream(ocfg, filename, ".pftrace");
|
||||
|
||||
auto amount_read = std::atomic<size_t>{0};
|
||||
auto is_done = std::promise<void>{};
|
||||
auto _mtx = std::mutex{};
|
||||
auto _reader = [&ofs, &_mtx, &is_done, &amount_read](
|
||||
::perfetto::TracingSession::ReadTraceCallbackArgs _args) {
|
||||
auto _lk = std::unique_lock<std::mutex>{_mtx};
|
||||
if(_args.data && _args.size > 0)
|
||||
{
|
||||
ROCP_TRACE << "Writing " << _args.size << " B to trace...";
|
||||
// Write the trace data into file
|
||||
ofs.stream->write(_args.data, _args.size);
|
||||
amount_read += _args.size;
|
||||
}
|
||||
ROCP_INFO_IF(!_args.has_more && amount_read > 0)
|
||||
<< "Wrote " << amount_read << " B to perfetto trace file";
|
||||
if(!_args.has_more) is_done.set_value();
|
||||
};
|
||||
|
||||
for(size_t i = 0; i < 2; ++i)
|
||||
{
|
||||
ROCP_TRACE << "Reading trace...";
|
||||
amount_read = 0;
|
||||
is_done = std::promise<void>{};
|
||||
tracing_session->ReadTrace(_reader);
|
||||
is_done.get_future().wait();
|
||||
}
|
||||
|
||||
ROCP_TRACE << "Destroying tracing session...";
|
||||
tracing_session.reset();
|
||||
|
||||
ROCP_TRACE << "Flushing trace output stream...";
|
||||
(*ofs.stream) << std::flush;
|
||||
|
||||
ROCP_TRACE << "Destroying trace output stream...";
|
||||
ofs.close();
|
||||
}
|
||||
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
|
||||
PERFETTO_TRACK_EVENT_STATIC_STORAGE();
|
||||
@@ -0,0 +1,50 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "agent_info.hpp"
|
||||
#include "generator.hpp"
|
||||
#include "metadata.hpp"
|
||||
#include "output_config.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
#include <deque>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace tool
|
||||
{
|
||||
void
|
||||
write_perfetto(
|
||||
const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
std::vector<agent_info> agent_data,
|
||||
const generator<rocprofiler_buffer_tracing_hip_api_record_t>& hip_api_gen,
|
||||
const generator<rocprofiler_buffer_tracing_hsa_api_record_t>& hsa_api_gen,
|
||||
const generator<rocprofiler_buffer_tracing_kernel_dispatch_record_t>& kernel_dispatch_gen,
|
||||
const generator<rocprofiler_buffer_tracing_memory_copy_record_t>& memory_copy_gen,
|
||||
const generator<rocprofiler_buffer_tracing_marker_api_record_t>& marker_api_gen,
|
||||
const generator<rocprofiler_buffer_tracing_scratch_memory_record_t>& scratch_memory_gen,
|
||||
const generator<rocprofiler_buffer_tracing_rccl_api_record_t>& rccl_api_gen);
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
+99
-68
@@ -21,11 +21,10 @@
|
||||
// SOFTWARE.
|
||||
|
||||
#include "generateStats.hpp"
|
||||
#include "config.hpp"
|
||||
#include "domain_type.hpp"
|
||||
#include "helper.hpp"
|
||||
#include "output_file.hpp"
|
||||
#include "output_stream.hpp"
|
||||
#include "statistics.hpp"
|
||||
#include "timestamps.hpp"
|
||||
|
||||
#include "lib/common/logging.hpp"
|
||||
|
||||
@@ -63,120 +62,149 @@ get_stats(const stats_map_t& data_v)
|
||||
} // namespace
|
||||
|
||||
stats_entry_t
|
||||
generate_stats(tool_table* tool_functions,
|
||||
const std::deque<rocprofiler_buffer_tracing_kernel_dispatch_record_t>& data)
|
||||
generate_stats(const output_config& /*cfg*/,
|
||||
const metadata& tool_metadata,
|
||||
const generator<rocprofiler_buffer_tracing_kernel_dispatch_record_t>& data)
|
||||
{
|
||||
auto kernel_stats = stats_map_t{};
|
||||
for(const auto& record : data)
|
||||
for(auto ditr : data)
|
||||
{
|
||||
auto kernel_name = tool_functions->tool_get_kernel_name_fn(
|
||||
record.dispatch_info.kernel_id, record.correlation_id.external.value);
|
||||
for(auto record : data.get(ditr))
|
||||
{
|
||||
auto kernel_name = tool_metadata.get_kernel_name(record.dispatch_info.kernel_id,
|
||||
record.correlation_id.external.value);
|
||||
|
||||
kernel_stats[kernel_name] += (record.end_timestamp - record.start_timestamp);
|
||||
kernel_stats[kernel_name] += (record.end_timestamp - record.start_timestamp);
|
||||
}
|
||||
}
|
||||
|
||||
return get_stats(kernel_stats);
|
||||
}
|
||||
|
||||
stats_entry_t
|
||||
generate_stats(tool_table* tool_functions,
|
||||
const std::deque<rocprofiler_buffer_tracing_hip_api_record_t>& data)
|
||||
generate_stats(const output_config& /*cfg*/,
|
||||
const metadata& tool_metadata,
|
||||
const generator<rocprofiler_buffer_tracing_hip_api_record_t>& data)
|
||||
{
|
||||
auto hip_stats = stats_map_t{};
|
||||
for(const auto& record : data)
|
||||
for(auto ditr : data)
|
||||
{
|
||||
auto api_name = tool_functions->tool_get_operation_name_fn(record.kind, record.operation);
|
||||
hip_stats[api_name] += (record.end_timestamp - record.start_timestamp);
|
||||
for(auto record : data.get(ditr))
|
||||
{
|
||||
auto api_name = tool_metadata.get_operation_name(record.kind, record.operation);
|
||||
hip_stats[api_name] += (record.end_timestamp - record.start_timestamp);
|
||||
}
|
||||
}
|
||||
|
||||
return get_stats(hip_stats);
|
||||
}
|
||||
|
||||
stats_entry_t
|
||||
generate_stats(tool_table* tool_functions,
|
||||
const std::deque<rocprofiler_buffer_tracing_hsa_api_record_t>& data)
|
||||
generate_stats(const output_config& /*cfg*/,
|
||||
const metadata& tool_metadata,
|
||||
const generator<rocprofiler_buffer_tracing_hsa_api_record_t>& data)
|
||||
{
|
||||
auto hsa_stats = stats_map_t{};
|
||||
for(const auto& record : data)
|
||||
for(auto ditr : data)
|
||||
{
|
||||
auto api_name = tool_functions->tool_get_operation_name_fn(record.kind, record.operation);
|
||||
hsa_stats[api_name] += (record.end_timestamp - record.start_timestamp);
|
||||
for(auto record : data.get(ditr))
|
||||
{
|
||||
auto api_name = tool_metadata.get_operation_name(record.kind, record.operation);
|
||||
hsa_stats[api_name] += (record.end_timestamp - record.start_timestamp);
|
||||
}
|
||||
}
|
||||
|
||||
return get_stats(hsa_stats);
|
||||
}
|
||||
|
||||
stats_entry_t
|
||||
generate_stats(tool_table* tool_functions,
|
||||
const std::deque<rocprofiler_buffer_tracing_memory_copy_record_t>& data)
|
||||
generate_stats(const output_config& /*cfg*/,
|
||||
const metadata& tool_metadata,
|
||||
const generator<rocprofiler_buffer_tracing_memory_copy_record_t>& data)
|
||||
{
|
||||
auto memory_copy_stats = stats_map_t{};
|
||||
for(const auto& record : data)
|
||||
for(auto ditr : data)
|
||||
{
|
||||
auto api_name = tool_functions->tool_get_operation_name_fn(record.kind, record.operation);
|
||||
memory_copy_stats[api_name] += (record.end_timestamp - record.start_timestamp);
|
||||
for(auto record : data.get(ditr))
|
||||
{
|
||||
auto api_name = tool_metadata.get_operation_name(record.kind, record.operation);
|
||||
memory_copy_stats[api_name] += (record.end_timestamp - record.start_timestamp);
|
||||
}
|
||||
}
|
||||
|
||||
return get_stats(memory_copy_stats);
|
||||
}
|
||||
|
||||
stats_entry_t
|
||||
generate_stats(tool_table* tool_functions,
|
||||
const std::deque<rocprofiler_buffer_tracing_marker_api_record_t>& data)
|
||||
generate_stats(const output_config& /*cfg*/,
|
||||
const metadata& tool_metadata,
|
||||
const generator<rocprofiler_buffer_tracing_marker_api_record_t>& data)
|
||||
{
|
||||
auto marker_stats = stats_map_t{};
|
||||
for(const auto& record : data)
|
||||
for(auto ditr : data)
|
||||
{
|
||||
auto _name = std::string_view{};
|
||||
|
||||
if(record.kind == ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API &&
|
||||
(record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxMarkA ||
|
||||
record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangePushA ||
|
||||
record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangeStartA))
|
||||
for(auto record : data.get(ditr))
|
||||
{
|
||||
_name = tool_functions->tool_get_roctx_msg_fn(record.correlation_id.internal);
|
||||
}
|
||||
else
|
||||
{
|
||||
_name = tool_functions->tool_get_operation_name_fn(record.kind, record.operation);
|
||||
}
|
||||
auto _name = std::string_view{};
|
||||
|
||||
marker_stats[_name] += (record.end_timestamp - record.start_timestamp);
|
||||
if(record.kind == ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API &&
|
||||
(record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxMarkA ||
|
||||
record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangePushA ||
|
||||
record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangeStartA))
|
||||
{
|
||||
_name = tool_metadata.get_marker_message(record.correlation_id.internal);
|
||||
}
|
||||
else
|
||||
{
|
||||
_name = tool_metadata.get_operation_name(record.kind, record.operation);
|
||||
}
|
||||
|
||||
marker_stats[_name] += (record.end_timestamp - record.start_timestamp);
|
||||
}
|
||||
}
|
||||
|
||||
return get_stats(marker_stats);
|
||||
}
|
||||
|
||||
stats_entry_t
|
||||
generate_stats(tool_table* /*tool_functions*/,
|
||||
const std::deque<rocprofiler_tool_counter_collection_record_t>& /*data*/)
|
||||
generate_stats(const output_config& /*cfg*/,
|
||||
const metadata& /*tool_metadata*/,
|
||||
const generator<tool_counter_record_t>& /*data*/)
|
||||
{
|
||||
return stats_entry_t{};
|
||||
}
|
||||
|
||||
stats_entry_t
|
||||
generate_stats(tool_table* tool_functions,
|
||||
const std::deque<rocprofiler_buffer_tracing_scratch_memory_record_t>& data)
|
||||
generate_stats(const output_config& /*cfg*/,
|
||||
const metadata& tool_metadata,
|
||||
const generator<rocprofiler_buffer_tracing_scratch_memory_record_t>& data)
|
||||
{
|
||||
auto scratch_memory_stats = stats_map_t{};
|
||||
for(const auto& record : data)
|
||||
for(auto ditr : data)
|
||||
{
|
||||
auto op_name = tool_functions->tool_get_operation_name_fn(record.kind, record.operation);
|
||||
scratch_memory_stats[op_name] += (record.end_timestamp - record.start_timestamp);
|
||||
for(auto record : data.get(ditr))
|
||||
{
|
||||
auto op_name = tool_metadata.get_operation_name(record.kind, record.operation);
|
||||
scratch_memory_stats[op_name] += (record.end_timestamp - record.start_timestamp);
|
||||
}
|
||||
}
|
||||
|
||||
return get_stats(scratch_memory_stats);
|
||||
}
|
||||
|
||||
stats_entry_t
|
||||
generate_stats(tool_table* tool_functions,
|
||||
const std::deque<rocprofiler_buffer_tracing_rccl_api_record_t>& data)
|
||||
generate_stats(const output_config& /*cfg*/,
|
||||
const metadata& tool_metadata,
|
||||
const generator<rocprofiler_buffer_tracing_rccl_api_record_t>& data)
|
||||
{
|
||||
auto rccl_stats = stats_map_t{};
|
||||
for(const auto& record : data)
|
||||
for(auto ditr : data)
|
||||
{
|
||||
auto api_name = tool_functions->tool_get_operation_name_fn(record.kind, record.operation);
|
||||
rccl_stats[api_name] += (record.end_timestamp - record.start_timestamp);
|
||||
for(auto record : data.get(ditr))
|
||||
{
|
||||
auto api_name = tool_metadata.get_operation_name(record.kind, record.operation);
|
||||
rccl_stats[api_name] += (record.end_timestamp - record.start_timestamp);
|
||||
}
|
||||
}
|
||||
|
||||
return get_stats(rccl_stats);
|
||||
@@ -185,7 +213,8 @@ generate_stats(tool_table* t
|
||||
namespace
|
||||
{
|
||||
void
|
||||
generate_stats(output_stream_t& os,
|
||||
generate_stats(const output_config& cfg,
|
||||
output_stream& os,
|
||||
std::string_view label,
|
||||
const domain_stats_vec_t& data_v,
|
||||
std::string_view indent_v)
|
||||
@@ -237,11 +266,11 @@ generate_stats(output_stream_t& os,
|
||||
"DOMAIN",
|
||||
domain_width,
|
||||
"CALLS",
|
||||
fmt::format("DURATION ({})", tool::get_config().stats_summary_unit),
|
||||
fmt::format("AVERAGE ({})", tool::get_config().stats_summary_unit),
|
||||
fmt::format("DURATION ({})", cfg.stats_summary_unit),
|
||||
fmt::format("AVERAGE ({})", cfg.stats_summary_unit),
|
||||
"PERCENT (INC)",
|
||||
fmt::format("MIN ({})", tool::get_config().stats_summary_unit),
|
||||
fmt::format("MAX ({})", tool::get_config().stats_summary_unit),
|
||||
fmt::format("MIN ({})", cfg.stats_summary_unit),
|
||||
fmt::format("MAX ({})", cfg.stats_summary_unit),
|
||||
"STDDEV");
|
||||
(*os.stream) << indent_v << _header << "\n" << std::flush;
|
||||
|
||||
@@ -265,9 +294,9 @@ generate_stats(output_stream_t& os,
|
||||
|
||||
auto _row = std::string{};
|
||||
|
||||
if(tool::get_config().stats_summary_unit_value > 1)
|
||||
if(cfg.stats_summary_unit_value > 1)
|
||||
{
|
||||
auto _unit_div = static_cast<double>(tool::get_config().stats_summary_unit_value);
|
||||
auto _unit_div = static_cast<double>(cfg.stats_summary_unit_value);
|
||||
_row = fmt::format("{}| {:<{}} | {:<{}} | {:15} | {:15} | {:15.3e} | {:>13} | {:15} | "
|
||||
"{:15} | {:15.3e} |",
|
||||
indent_v,
|
||||
@@ -309,7 +338,9 @@ generate_stats(output_stream_t& os,
|
||||
} // namespace
|
||||
|
||||
void
|
||||
generate_stats(tool_table* /*tool_functions*/, const domain_stats_vec_t& inp_data)
|
||||
generate_stats(const output_config& cfg,
|
||||
const metadata& /*tool_metadata*/,
|
||||
const domain_stats_vec_t& inp_data)
|
||||
{
|
||||
auto data_v = inp_data;
|
||||
|
||||
@@ -317,10 +348,10 @@ generate_stats(tool_table* /*tool_functions*/, const domain_stats_vec_t& inp_dat
|
||||
return lhs.first < rhs.first;
|
||||
});
|
||||
|
||||
output_stream_t _os = get_output_stream(tool::get_config().stats_summary_file, ".txt");
|
||||
auto _indent = (_os.writes_to_file()) ? std::string_view{} : std::string_view{" "};
|
||||
output_stream _os = get_output_stream(cfg, cfg.stats_summary_file, ".txt");
|
||||
auto _indent = (_os.writes_to_file()) ? std::string_view{} : std::string_view{" "};
|
||||
|
||||
if(tool::get_config().stats_summary_per_domain)
|
||||
if(cfg.stats_summary_per_domain)
|
||||
{
|
||||
for(const auto& itr : data_v)
|
||||
{
|
||||
@@ -329,14 +360,14 @@ generate_stats(tool_table* /*tool_functions*/, const domain_stats_vec_t& inp_dat
|
||||
auto _name = fmt::format("{} SUMMARY", get_domain_column_name(itr.first));
|
||||
auto _tmp = domain_stats_vec_t{};
|
||||
_tmp.emplace_back(itr.first, itr.second);
|
||||
generate_stats(_os, _name, _tmp, _indent);
|
||||
generate_stats(cfg, _os, _name, _tmp, _indent);
|
||||
}
|
||||
}
|
||||
|
||||
if(!tool::get_config().stats_summary_groups.empty())
|
||||
if(!cfg.stats_summary_groups.empty())
|
||||
{
|
||||
auto domain_groups = std::vector<domain_stats_vec_t>{};
|
||||
for(const auto& itr : tool::get_config().stats_summary_groups)
|
||||
for(const auto& itr : cfg.stats_summary_groups)
|
||||
{
|
||||
auto _names = std::vector<std::string>{};
|
||||
auto _tmp = domain_stats_vec_t{};
|
||||
@@ -356,11 +387,11 @@ generate_stats(tool_table* /*tool_functions*/, const domain_stats_vec_t& inp_dat
|
||||
<< "summary group regex '" << itr << "' matched with zero domain groups";
|
||||
|
||||
auto _name = fmt::format("{} SUMMARY", fmt::join(_names.begin(), _names.end(), " + "));
|
||||
generate_stats(_os, _name, _tmp, _indent);
|
||||
generate_stats(cfg, _os, _name, _tmp, _indent);
|
||||
}
|
||||
}
|
||||
|
||||
if(tool::get_config().stats_summary) generate_stats(_os, "SUMMARY", data_v, _indent);
|
||||
if(cfg.stats_summary) generate_stats(cfg, _os, "SUMMARY", data_v, _indent);
|
||||
}
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
@@ -0,0 +1,78 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "generator.hpp"
|
||||
#include "metadata.hpp"
|
||||
#include "statistics.hpp"
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace tool
|
||||
{
|
||||
stats_entry_t
|
||||
generate_stats(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<rocprofiler_buffer_tracing_kernel_dispatch_record_t>& data);
|
||||
|
||||
stats_entry_t
|
||||
generate_stats(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<rocprofiler_buffer_tracing_hip_api_record_t>& data);
|
||||
|
||||
stats_entry_t
|
||||
generate_stats(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<rocprofiler_buffer_tracing_hsa_api_record_t>& data);
|
||||
|
||||
stats_entry_t
|
||||
generate_stats(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<rocprofiler_buffer_tracing_memory_copy_record_t>& data);
|
||||
|
||||
stats_entry_t
|
||||
generate_stats(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<rocprofiler_buffer_tracing_marker_api_record_t>& data);
|
||||
|
||||
stats_entry_t
|
||||
generate_stats(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<tool_counter_record_t>& data);
|
||||
|
||||
stats_entry_t
|
||||
generate_stats(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<rocprofiler_buffer_tracing_scratch_memory_record_t>& data);
|
||||
|
||||
stats_entry_t
|
||||
generate_stats(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<rocprofiler_buffer_tracing_rccl_api_record_t>& data);
|
||||
|
||||
void
|
||||
generate_stats(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const domain_stats_vec_t& data);
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
@@ -0,0 +1,153 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "tmp_file_buffer.hpp"
|
||||
|
||||
#include "lib/common/container/ring_buffer.hpp"
|
||||
#include "lib/common/logging.hpp"
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <iosfwd>
|
||||
#include <mutex>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace tool
|
||||
{
|
||||
/// converts a container of ring buffers of element Tp into a single container of elements
|
||||
template <typename Tp, template <typename, typename...> class ContainerT, typename... ParamsT>
|
||||
ContainerT<Tp>
|
||||
get_buffer_elements(ContainerT<common::container::ring_buffer<Tp>, ParamsT...>&& data)
|
||||
{
|
||||
auto ret = ContainerT<Tp>{};
|
||||
for(auto& buf : data)
|
||||
{
|
||||
Tp* record = nullptr;
|
||||
do
|
||||
{
|
||||
record = buf.retrieve();
|
||||
if(record) ret.emplace_back(*record);
|
||||
} while(record != nullptr);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <typename Tp>
|
||||
std::vector<Tp>
|
||||
get_buffer_elements(common::container::ring_buffer<Tp>&& buf)
|
||||
{
|
||||
auto ret = std::vector<Tp>{};
|
||||
Tp* record = nullptr;
|
||||
ret.reserve(buf.count());
|
||||
do
|
||||
{
|
||||
record = buf.retrieve();
|
||||
if(record) ret.emplace_back(*record);
|
||||
} while(record != nullptr);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <typename Tp, domain_type DomainT>
|
||||
struct buffered_output;
|
||||
|
||||
template <typename Tp>
|
||||
struct generator
|
||||
{
|
||||
template <typename Up, domain_type DomainT>
|
||||
friend struct buffered_output;
|
||||
|
||||
generator() = delete;
|
||||
~generator() = default;
|
||||
|
||||
generator(const generator&) = delete;
|
||||
generator(generator&&) = delete;
|
||||
generator& operator=(const generator&) = delete;
|
||||
generator& operator=(generator&&) = delete;
|
||||
|
||||
auto begin() { return file_pos.begin(); }
|
||||
auto begin() const { return file_pos.begin(); }
|
||||
auto cbegin() const { return file_pos.cbegin(); }
|
||||
|
||||
auto end() { return file_pos.end(); }
|
||||
auto end() const { return file_pos.end(); }
|
||||
auto cend() const { return file_pos.cend(); }
|
||||
|
||||
auto size() const { return file_pos.size(); }
|
||||
auto empty() const { return file_pos.empty(); }
|
||||
|
||||
std::vector<Tp> get(std::streampos itr) const;
|
||||
|
||||
private:
|
||||
generator(file_buffer<Tp>* fbuf);
|
||||
|
||||
file_buffer<Tp>* filebuf = nullptr;
|
||||
std::lock_guard<std::mutex> lk_guard;
|
||||
std::set<std::streampos> file_pos = {};
|
||||
};
|
||||
|
||||
template <typename Tp>
|
||||
generator<Tp>::generator(file_buffer<Tp>* fbuf)
|
||||
: filebuf{fbuf}
|
||||
, lk_guard{filebuf->file.file_mutex}
|
||||
, file_pos{filebuf->file.file_pos}
|
||||
{}
|
||||
|
||||
template <typename Tp>
|
||||
std::vector<Tp>
|
||||
generator<Tp>::get(std::streampos itr) const
|
||||
{
|
||||
auto _data = std::vector<Tp>{};
|
||||
auto& _fs = filebuf->file.stream;
|
||||
_fs.seekg(itr); // set to the absolute position
|
||||
if(!_fs.eof())
|
||||
{
|
||||
auto _buffer = ring_buffer_t<Tp>{};
|
||||
_buffer.load(_fs);
|
||||
_data = get_buffer_elements(std::move(_buffer));
|
||||
}
|
||||
return _data;
|
||||
}
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
|
||||
namespace cereal
|
||||
{
|
||||
template <typename ArchiveT, typename Tp>
|
||||
void
|
||||
save(ArchiveT& ar, const rocprofiler::tool::generator<Tp>& data)
|
||||
{
|
||||
ar.makeArray();
|
||||
for(auto itr : data)
|
||||
{
|
||||
auto dat = data.get(itr);
|
||||
for(auto ditr : dat)
|
||||
ar(ditr);
|
||||
}
|
||||
}
|
||||
} // namespace cereal
|
||||
@@ -0,0 +1,98 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "lib/common/demangle.hpp"
|
||||
#include "lib/common/logging.hpp"
|
||||
|
||||
#include <rocprofiler-sdk/callback_tracing.h>
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
#include <rocprofiler-sdk/cxx/hash.hpp>
|
||||
#include <rocprofiler-sdk/cxx/name_info.hpp>
|
||||
#include <rocprofiler-sdk/cxx/operators.hpp>
|
||||
#include <rocprofiler-sdk/cxx/serialization.hpp>
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace tool
|
||||
{
|
||||
using rocprofiler_code_object_info_t = rocprofiler_callback_tracing_code_object_load_data_t;
|
||||
using code_object_info = rocprofiler_code_object_info_t;
|
||||
using code_object_data_vec_t = std::vector<code_object_info>;
|
||||
using code_object_data_map_t = std::unordered_map<uint64_t, code_object_info>;
|
||||
|
||||
using rocprofiler_kernel_symbol_info_t =
|
||||
rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t;
|
||||
|
||||
struct kernel_symbol_info : rocprofiler_kernel_symbol_info_t
|
||||
{
|
||||
using base_type = rocprofiler_kernel_symbol_info_t;
|
||||
|
||||
template <typename FuncT>
|
||||
kernel_symbol_info(const base_type& _base, FuncT&& _formatter)
|
||||
: base_type{_base}
|
||||
, formatted_kernel_name{_formatter(CHECK_NOTNULL(_base.kernel_name))}
|
||||
, demangled_kernel_name{common::cxx_demangle(CHECK_NOTNULL(_base.kernel_name))}
|
||||
, truncated_kernel_name{common::truncate_name(demangled_kernel_name)}
|
||||
{}
|
||||
|
||||
kernel_symbol_info();
|
||||
~kernel_symbol_info() = default;
|
||||
kernel_symbol_info(const kernel_symbol_info&) = default;
|
||||
kernel_symbol_info(kernel_symbol_info&&) noexcept = default;
|
||||
kernel_symbol_info& operator=(const kernel_symbol_info&) = default;
|
||||
kernel_symbol_info& operator=(kernel_symbol_info&&) noexcept = default;
|
||||
|
||||
std::string formatted_kernel_name = {};
|
||||
std::string demangled_kernel_name = {};
|
||||
std::string truncated_kernel_name = {};
|
||||
};
|
||||
|
||||
using kernel_symbol_data_vec_t = std::vector<kernel_symbol_info>;
|
||||
using kernel_symbol_data_map_t = std::unordered_map<rocprofiler_kernel_id_t, kernel_symbol_info>;
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
|
||||
namespace cereal
|
||||
{
|
||||
#define SAVE_DATA_FIELD(FIELD) ar(make_nvp(#FIELD, data.FIELD))
|
||||
|
||||
template <typename ArchiveT>
|
||||
void
|
||||
save(ArchiveT& ar, const ::rocprofiler::tool::kernel_symbol_info& data)
|
||||
{
|
||||
cereal::save(ar,
|
||||
static_cast<const ::rocprofiler::tool::rocprofiler_kernel_symbol_info_t&>(data));
|
||||
SAVE_DATA_FIELD(formatted_kernel_name);
|
||||
SAVE_DATA_FIELD(demangled_kernel_name);
|
||||
SAVE_DATA_FIELD(truncated_kernel_name);
|
||||
}
|
||||
|
||||
#undef SAVE_DATA_FIELD
|
||||
} // namespace cereal
|
||||
@@ -0,0 +1,412 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include "metadata.hpp"
|
||||
|
||||
#include "lib/common/string_entry.hpp"
|
||||
#include "lib/output/agent_info.hpp"
|
||||
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
|
||||
#include <memory>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace tool
|
||||
{
|
||||
namespace
|
||||
{
|
||||
rocprofiler_status_t
|
||||
dimensions_info_callback(rocprofiler_counter_id_t /*id*/,
|
||||
const rocprofiler_record_dimension_info_t* dim_info,
|
||||
long unsigned int num_dims,
|
||||
void* user_data)
|
||||
{
|
||||
auto* dimensions_info = static_cast<counter_dimension_info_vec_t*>(user_data);
|
||||
dimensions_info->reserve(num_dims);
|
||||
for(size_t j = 0; j < num_dims; j++)
|
||||
dimensions_info->emplace_back(dim_info[j]);
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
kernel_symbol_info::kernel_symbol_info()
|
||||
: base_type{0, 0, 0, "", 0, 0, 0, 0, 0, 0, 0, 0}
|
||||
{}
|
||||
|
||||
metadata::metadata(inprocess)
|
||||
: buffer_names{sdk::get_buffer_tracing_names()}
|
||||
, callback_names{sdk::get_callback_tracing_names()}
|
||||
{
|
||||
ROCPROFILER_CHECK(rocprofiler_query_available_agents(
|
||||
ROCPROFILER_AGENT_INFO_VERSION_0,
|
||||
[](rocprofiler_agent_version_t, const void** _agents, size_t _num_agents, void* _data) {
|
||||
auto* _agents_v = static_cast<agent_info_vec_t*>(_data);
|
||||
_agents_v->reserve(_num_agents);
|
||||
for(size_t i = 0; i < _num_agents; ++i)
|
||||
{
|
||||
auto* agent = static_cast<const rocprofiler_agent_v0_t*>(_agents[i]);
|
||||
_agents_v->emplace_back(*agent);
|
||||
}
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
},
|
||||
sizeof(rocprofiler_agent_v0_t),
|
||||
&agents));
|
||||
|
||||
{
|
||||
auto _gpu_agents = std::vector<agent_info*>{};
|
||||
|
||||
_gpu_agents.reserve(agents.size());
|
||||
for(auto& itr : agents)
|
||||
{
|
||||
if(itr.type == ROCPROFILER_AGENT_TYPE_GPU) _gpu_agents.emplace_back(&itr);
|
||||
}
|
||||
|
||||
// make sure they are sorted by node id
|
||||
std::sort(_gpu_agents.begin(), _gpu_agents.end(), [](const auto& lhs, const auto& rhs) {
|
||||
return CHECK_NOTNULL(lhs)->node_id < CHECK_NOTNULL(rhs)->node_id;
|
||||
});
|
||||
|
||||
int64_t _dev_id = 0;
|
||||
for(auto& itr : _gpu_agents)
|
||||
itr->gpu_index = _dev_id++;
|
||||
}
|
||||
|
||||
for(auto itr : agents)
|
||||
agents_map.emplace(itr.id, itr);
|
||||
}
|
||||
|
||||
void metadata::init(inprocess)
|
||||
{
|
||||
if(inprocess_init) return;
|
||||
|
||||
inprocess_init = true;
|
||||
for(auto itr : agents)
|
||||
{
|
||||
if(itr.type == ROCPROFILER_AGENT_TYPE_CPU) continue;
|
||||
|
||||
ROCPROFILER_CHECK(rocprofiler_iterate_agent_supported_counters(
|
||||
itr.id,
|
||||
[](rocprofiler_agent_id_t id,
|
||||
rocprofiler_counter_id_t* counters,
|
||||
size_t num_counters,
|
||||
void* user_data) {
|
||||
auto* data_v = static_cast<agent_counter_info_map_t*>(user_data);
|
||||
data_v->emplace(id, counter_info_vec_t{});
|
||||
for(size_t i = 0; i < num_counters; ++i)
|
||||
{
|
||||
auto _info = rocprofiler_counter_info_v0_t{};
|
||||
auto _dim_ids = std::vector<rocprofiler_counter_dimension_id_t>{};
|
||||
auto _dim_info = std::vector<rocprofiler_record_dimension_info_t>{};
|
||||
|
||||
ROCPROFILER_CHECK(rocprofiler_query_counter_info(
|
||||
counters[i],
|
||||
ROCPROFILER_COUNTER_INFO_VERSION_0,
|
||||
&static_cast<rocprofiler_counter_info_v0_t&>(_info)));
|
||||
|
||||
ROCPROFILER_CHECK(rocprofiler_iterate_counter_dimensions(
|
||||
counters[i], dimensions_info_callback, &_dim_info));
|
||||
|
||||
_dim_ids.reserve(_dim_info.size());
|
||||
for(auto ditr : _dim_info)
|
||||
_dim_ids.emplace_back(ditr.id);
|
||||
|
||||
data_v->at(id).emplace_back(
|
||||
id, _info, std::move(_dim_ids), std::move(_dim_info));
|
||||
}
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
},
|
||||
&agent_counter_info));
|
||||
}
|
||||
}
|
||||
|
||||
const agent_info*
|
||||
metadata::get_agent(rocprofiler_agent_id_t _val) const
|
||||
{
|
||||
for(const auto& itr : agents)
|
||||
{
|
||||
if(itr.id == _val) return &itr;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const code_object_info*
|
||||
metadata::get_code_object(uint64_t code_obj_id) const
|
||||
{
|
||||
return code_objects.rlock([code_obj_id](const auto& _data) -> const code_object_info* {
|
||||
return &_data.at(code_obj_id);
|
||||
});
|
||||
}
|
||||
|
||||
const kernel_symbol_info*
|
||||
metadata::get_kernel_symbol(uint64_t kernel_id) const
|
||||
{
|
||||
return kernel_symbols.rlock([kernel_id](const auto& _data) -> const kernel_symbol_info* {
|
||||
return &_data.at(kernel_id);
|
||||
});
|
||||
}
|
||||
|
||||
const tool_counter_info*
|
||||
metadata::get_counter_info(uint64_t instance_id) const
|
||||
{
|
||||
auto _counter_id = rocprofiler_counter_id_t{.handle = 0};
|
||||
ROCPROFILER_CHECK(rocprofiler_query_record_counter_id(instance_id, &_counter_id));
|
||||
return get_counter_info(_counter_id);
|
||||
}
|
||||
|
||||
const tool_counter_info*
|
||||
metadata::get_counter_info(rocprofiler_counter_id_t id) const
|
||||
{
|
||||
for(const auto& itr : agent_counter_info)
|
||||
{
|
||||
for(const auto& aitr : itr.second)
|
||||
{
|
||||
if(aitr.id == id) return &aitr;
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const counter_dimension_info_vec_t*
|
||||
metadata::get_counter_dimension_info(uint64_t instance_id) const
|
||||
{
|
||||
return &CHECK_NOTNULL(get_counter_info(instance_id))->dimensions;
|
||||
}
|
||||
|
||||
code_object_data_vec_t
|
||||
metadata::get_code_objects() const
|
||||
{
|
||||
auto _data = code_objects.rlock([](const auto& _data_v) {
|
||||
auto _info = std::vector<code_object_info>{};
|
||||
_info.reserve(_data_v.size());
|
||||
for(const auto& itr : _data_v)
|
||||
_info.emplace_back(itr.second);
|
||||
return _info;
|
||||
});
|
||||
|
||||
uint64_t _sz = 0;
|
||||
for(const auto& itr : _data)
|
||||
_sz = std::max(_sz, itr.code_object_id);
|
||||
|
||||
auto _code_obj_data = std::vector<code_object_info>{};
|
||||
_code_obj_data.resize(_sz + 1, code_object_info{});
|
||||
// index by the code object id
|
||||
for(auto& itr : _data)
|
||||
_code_obj_data.at(itr.code_object_id) = itr;
|
||||
|
||||
return _code_obj_data;
|
||||
}
|
||||
|
||||
kernel_symbol_data_vec_t
|
||||
metadata::get_kernel_symbols() const
|
||||
{
|
||||
auto _data = kernel_symbols.rlock([](const auto& _data_v) {
|
||||
auto _info = std::vector<kernel_symbol_info>{};
|
||||
_info.reserve(_data_v.size());
|
||||
for(const auto& itr : _data_v)
|
||||
_info.emplace_back(itr.second);
|
||||
return _info;
|
||||
});
|
||||
|
||||
uint64_t kernel_data_size = 0;
|
||||
for(const auto& itr : _data)
|
||||
kernel_data_size = std::max(kernel_data_size, itr.kernel_id);
|
||||
|
||||
auto _symbol_data = std::vector<kernel_symbol_info>{};
|
||||
_symbol_data.resize(kernel_data_size + 1, kernel_symbol_info{});
|
||||
// index by the kernel id
|
||||
for(auto& itr : _data)
|
||||
_symbol_data.at(itr.kernel_id) = std::move(itr);
|
||||
|
||||
return _symbol_data;
|
||||
}
|
||||
|
||||
metadata::agent_info_ptr_vec_t
|
||||
metadata::get_gpu_agents() const
|
||||
{
|
||||
auto _data = metadata::agent_info_ptr_vec_t{};
|
||||
for(const auto& itr : agents)
|
||||
{
|
||||
if(itr.type == ROCPROFILER_AGENT_TYPE_GPU) _data.emplace_back(&itr);
|
||||
}
|
||||
return _data;
|
||||
}
|
||||
|
||||
counter_info_vec_t
|
||||
metadata::get_counter_info() const
|
||||
{
|
||||
auto _ret = std::vector<tool_counter_info>{};
|
||||
for(const auto& itr : agent_counter_info)
|
||||
{
|
||||
for(const auto& iitr : itr.second)
|
||||
_ret.emplace_back(iitr);
|
||||
}
|
||||
return _ret;
|
||||
}
|
||||
|
||||
counter_dimension_vec_t
|
||||
metadata::get_counter_dimension_info() const
|
||||
{
|
||||
auto _ret = counter_dimension_vec_t{};
|
||||
for(const auto& itr : agent_counter_info)
|
||||
{
|
||||
for(const auto& iitr : itr.second)
|
||||
for(const auto& ditr : iitr.dimensions)
|
||||
_ret.emplace_back(ditr);
|
||||
}
|
||||
|
||||
auto _sorter = [](const rocprofiler_record_dimension_info_t& lhs,
|
||||
const rocprofiler_record_dimension_info_t& rhs) {
|
||||
return std::tie(lhs.id, lhs.instance_size) < std::tie(rhs.id, rhs.instance_size);
|
||||
};
|
||||
auto _equiv = [](const rocprofiler_record_dimension_info_t& lhs,
|
||||
const rocprofiler_record_dimension_info_t& rhs) {
|
||||
return std::tie(lhs.id, lhs.instance_size) == std::tie(rhs.id, rhs.instance_size);
|
||||
};
|
||||
|
||||
std::sort(_ret.begin(), _ret.end(), _sorter);
|
||||
_ret.erase(std::unique(_ret.begin(), _ret.end(), _equiv), _ret.end());
|
||||
|
||||
return _ret;
|
||||
}
|
||||
|
||||
bool
|
||||
metadata::add_marker_message(uint64_t corr_id, std::string&& msg)
|
||||
{
|
||||
return marker_messages.wlock(
|
||||
[](auto& _data, uint64_t _cid_v, std::string&& _msg) -> bool {
|
||||
return _data.emplace(_cid_v, std::move(_msg)).second;
|
||||
},
|
||||
corr_id,
|
||||
std::move(msg));
|
||||
}
|
||||
|
||||
bool
|
||||
metadata::add_code_object(code_object_info obj)
|
||||
{
|
||||
return code_objects.wlock(
|
||||
[](code_object_data_map_t& _data_v, code_object_info _obj_v) -> bool {
|
||||
return _data_v.emplace(_obj_v.code_object_id, _obj_v).second;
|
||||
},
|
||||
obj);
|
||||
}
|
||||
|
||||
bool
|
||||
metadata::add_kernel_symbol(kernel_symbol_info&& sym)
|
||||
{
|
||||
return kernel_symbols.wlock(
|
||||
[](kernel_symbol_data_map_t& _data_v, kernel_symbol_info&& _sym_v) -> bool {
|
||||
return _data_v.emplace(_sym_v.kernel_id, std::move(_sym_v)).second;
|
||||
},
|
||||
std::move(sym));
|
||||
}
|
||||
|
||||
bool
|
||||
metadata::add_string_entry(size_t key, std::string_view str)
|
||||
{
|
||||
return string_entries.ulock(
|
||||
[](const auto& _data, size_t _key, std::string_view) { return (_data.count(_key) > 0); },
|
||||
[](auto& _data, size_t _key, std::string_view _str) {
|
||||
_data.emplace(_key, new std::string{_str});
|
||||
return true;
|
||||
},
|
||||
key,
|
||||
str);
|
||||
}
|
||||
|
||||
bool
|
||||
metadata::add_external_correlation_id(uint64_t val)
|
||||
{
|
||||
return external_corr_ids.wlock(
|
||||
[](auto& _data, uint64_t _val) { return _data.emplace(_val).second; }, val);
|
||||
}
|
||||
|
||||
std::string_view
|
||||
metadata::get_marker_message(uint64_t corr_id) const
|
||||
{
|
||||
return marker_messages.rlock(
|
||||
[](const auto& _data, uint64_t _corr_id_v) -> std::string_view {
|
||||
return _data.at(_corr_id_v);
|
||||
},
|
||||
corr_id);
|
||||
}
|
||||
|
||||
std::string_view
|
||||
metadata::get_kernel_name(uint64_t kernel_id, uint64_t rename_id) const
|
||||
{
|
||||
if(rename_id > 0)
|
||||
{
|
||||
if(const auto* _name = common::get_string_entry(rename_id)) return std::string_view{*_name};
|
||||
}
|
||||
|
||||
const auto* _kernel_data = get_kernel_symbol(kernel_id);
|
||||
return CHECK_NOTNULL(_kernel_data)->formatted_kernel_name;
|
||||
}
|
||||
|
||||
std::string_view
|
||||
metadata::get_kind_name(rocprofiler_callback_tracing_kind_t kind) const
|
||||
{
|
||||
return callback_names.at(kind);
|
||||
}
|
||||
|
||||
std::string_view
|
||||
metadata::get_kind_name(rocprofiler_buffer_tracing_kind_t kind) const
|
||||
{
|
||||
return buffer_names.at(kind);
|
||||
}
|
||||
|
||||
std::string_view
|
||||
metadata::get_operation_name(rocprofiler_callback_tracing_kind_t kind,
|
||||
rocprofiler_tracing_operation_t op) const
|
||||
{
|
||||
return callback_names.at(kind, op);
|
||||
}
|
||||
|
||||
std::string_view
|
||||
metadata::get_operation_name(rocprofiler_buffer_tracing_kind_t kind,
|
||||
rocprofiler_tracing_operation_t op) const
|
||||
{
|
||||
return buffer_names.at(kind, op);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
metadata::get_node_id(rocprofiler_agent_id_t _val) const
|
||||
{
|
||||
return CHECK_NOTNULL(get_agent(_val))->logical_node_id;
|
||||
}
|
||||
|
||||
const std::string*
|
||||
metadata::get_string_entry(size_t key) const
|
||||
{
|
||||
const auto* ret = string_entries.rlock(
|
||||
[](const auto& _data, size_t _key) -> const std::string* {
|
||||
if(_data.count(_key) > 0) return _data.at(_key).get();
|
||||
return nullptr;
|
||||
},
|
||||
key);
|
||||
|
||||
if(!ret) ret = common::get_string_entry(key);
|
||||
|
||||
return ret;
|
||||
}
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
@@ -0,0 +1,160 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "agent_info.hpp"
|
||||
#include "counter_info.hpp"
|
||||
#include "kernel_symbol_info.hpp"
|
||||
|
||||
#include "lib/common/container/small_vector.hpp"
|
||||
#include "lib/common/demangle.hpp"
|
||||
#include "lib/common/logging.hpp"
|
||||
#include "lib/common/synchronized.hpp"
|
||||
|
||||
#include <rocprofiler-sdk/agent.h>
|
||||
#include <rocprofiler-sdk/buffer_tracing.h>
|
||||
#include <rocprofiler-sdk/callback_tracing.h>
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
#include <rocprofiler-sdk/rocprofiler.h>
|
||||
#include <rocprofiler-sdk/cxx/hash.hpp>
|
||||
#include <rocprofiler-sdk/cxx/name_info.hpp>
|
||||
#include <rocprofiler-sdk/cxx/operators.hpp>
|
||||
|
||||
#include <cstdint>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
#define ROCPROFILER_CHECK_NESTED(VAR, RESULT) \
|
||||
{ \
|
||||
rocprofiler_status_t ROCPROFILER_VARIABLE(CHECKSTATUS, VAR) = RESULT; \
|
||||
if(ROCPROFILER_VARIABLE(CHECKSTATUS, VAR) != ROCPROFILER_STATUS_SUCCESS) \
|
||||
{ \
|
||||
std::string_view status_msg = \
|
||||
rocprofiler_get_status_string(ROCPROFILER_VARIABLE(CHECKSTATUS, VAR)); \
|
||||
ROCP_FATAL << "[" << __FUNCTION__ << "] " << #RESULT << " failed with error code " \
|
||||
<< ROCPROFILER_VARIABLE(CHECKSTATUS, VAR) << " :: " << status_msg; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define ROCPROFILER_CHECK(RESULT) ROCPROFILER_CHECK_NESTED(__COUNTER__, RESULT)
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace tool
|
||||
{
|
||||
using marker_message_map_t = std::unordered_map<uint64_t, std::string>;
|
||||
using marker_message_ordered_map_t = std::map<uint64_t, std::string>;
|
||||
using string_entry_map_t = std::unordered_map<size_t, std::unique_ptr<std::string>>;
|
||||
using counter_dimension_vec_t = std::vector<rocprofiler_record_dimension_info_t>;
|
||||
using external_corr_id_set_t = std::unordered_set<uint64_t>;
|
||||
|
||||
template <typename Tp>
|
||||
using synced_map = common::Synchronized<Tp, true>;
|
||||
|
||||
struct metadata
|
||||
{
|
||||
using agent_info_ptr_vec_t = common::container::small_vector<const agent_info*, 16>;
|
||||
|
||||
struct inprocess
|
||||
{};
|
||||
|
||||
pid_t process_id = 0;
|
||||
uint64_t process_start_ns = 0;
|
||||
uint64_t process_end_ns = 0;
|
||||
agent_info_vec_t agents = {};
|
||||
agent_info_map_t agents_map = {};
|
||||
agent_counter_info_map_t agent_counter_info = {};
|
||||
sdk::buffer_name_info buffer_names = {};
|
||||
sdk::callback_name_info callback_names = {};
|
||||
synced_map<code_object_data_map_t> code_objects = {};
|
||||
synced_map<kernel_symbol_data_map_t> kernel_symbols = {};
|
||||
synced_map<marker_message_map_t> marker_messages = {};
|
||||
synced_map<string_entry_map_t> string_entries = {};
|
||||
synced_map<external_corr_id_set_t> external_corr_ids = {};
|
||||
|
||||
metadata() = default;
|
||||
metadata(inprocess);
|
||||
|
||||
~metadata() = default;
|
||||
metadata(const metadata&) = delete;
|
||||
metadata(metadata&&) noexcept = delete;
|
||||
metadata& operator=(const metadata&) = delete;
|
||||
metadata& operator=(metadata&&) noexcept = delete;
|
||||
|
||||
void init(inprocess);
|
||||
|
||||
const agent_info* get_agent(rocprofiler_agent_id_t _val) const;
|
||||
const code_object_info* get_code_object(uint64_t code_obj_id) const;
|
||||
const kernel_symbol_info* get_kernel_symbol(uint64_t kernel_id) const;
|
||||
const tool_counter_info* get_counter_info(uint64_t instance_id) const;
|
||||
const tool_counter_info* get_counter_info(rocprofiler_counter_id_t id) const;
|
||||
const counter_dimension_info_vec_t* get_counter_dimension_info(uint64_t instance_id) const;
|
||||
|
||||
code_object_data_vec_t get_code_objects() const;
|
||||
kernel_symbol_data_vec_t get_kernel_symbols() const;
|
||||
agent_info_ptr_vec_t get_gpu_agents() const;
|
||||
counter_info_vec_t get_counter_info() const;
|
||||
counter_dimension_vec_t get_counter_dimension_info() const;
|
||||
|
||||
template <typename Tp>
|
||||
Tp get_marker_messages(Tp&&);
|
||||
|
||||
bool add_marker_message(uint64_t corr_id, std::string&& msg);
|
||||
bool add_code_object(code_object_info obj);
|
||||
bool add_kernel_symbol(kernel_symbol_info&& sym);
|
||||
bool add_string_entry(size_t key, std::string_view str);
|
||||
bool add_external_correlation_id(uint64_t);
|
||||
|
||||
std::string_view get_marker_message(uint64_t corr_id) const;
|
||||
std::string_view get_kernel_name(uint64_t kernel_id, uint64_t rename_id) const;
|
||||
std::string_view get_kind_name(rocprofiler_callback_tracing_kind_t kind) const;
|
||||
std::string_view get_kind_name(rocprofiler_buffer_tracing_kind_t kind) const;
|
||||
std::string_view get_operation_name(rocprofiler_callback_tracing_kind_t kind,
|
||||
rocprofiler_tracing_operation_t op) const;
|
||||
std::string_view get_operation_name(rocprofiler_buffer_tracing_kind_t kind,
|
||||
rocprofiler_tracing_operation_t op) const;
|
||||
uint64_t get_node_id(rocprofiler_agent_id_t _val) const;
|
||||
const std::string* get_string_entry(size_t key) const;
|
||||
|
||||
private:
|
||||
bool inprocess_init = false;
|
||||
};
|
||||
|
||||
template <typename Tp>
|
||||
Tp
|
||||
metadata::get_marker_messages(Tp&& _inp)
|
||||
{
|
||||
return marker_messages.rlock(
|
||||
[](const auto& _data_v, auto&& _inp_v) {
|
||||
for(const auto& itr : _data_v)
|
||||
_inp_v.emplace(itr.first, itr.second);
|
||||
return _inp_v;
|
||||
},
|
||||
std::move(_inp));
|
||||
}
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
@@ -0,0 +1,124 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include "output_config.hpp"
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace tool
|
||||
{
|
||||
output_config
|
||||
output_config::load_from_env()
|
||||
{
|
||||
auto cfg = output_config{};
|
||||
cfg.parse_env();
|
||||
return cfg;
|
||||
}
|
||||
|
||||
output_config
|
||||
output_config::load_from_env(output_config&& cfg)
|
||||
{
|
||||
cfg.parse_env();
|
||||
return cfg;
|
||||
}
|
||||
|
||||
void
|
||||
output_config::parse_env()
|
||||
{
|
||||
stats = common::get_env("ROCPROF_STATS", stats);
|
||||
stats_summary = common::get_env("ROCPROF_STATS_SUMMARY", stats_summary);
|
||||
stats_summary_per_domain =
|
||||
common::get_env("ROCPROF_STATS_SUMMARY_PER_DOMAIN", stats_summary_per_domain);
|
||||
stats_summary_unit = common::get_env("ROCPROF_STATS_SUMMARY_UNITS", stats_summary_unit);
|
||||
stats_summary_file = common::get_env("ROCPROF_STATS_SUMMARY_OUTPUT", stats_summary_file);
|
||||
|
||||
perfetto_backend = common::get_env("ROCPROF_PERFETTO_BACKEND", perfetto_backend);
|
||||
perfetto_buffer_fill_policy =
|
||||
common::get_env("ROCPROF_PERFETTO_BUFFER_FILL_POLICY", perfetto_buffer_fill_policy);
|
||||
perfetto_shmem_size_hint =
|
||||
common::get_env("ROCPROF_PERFETTO_SHMEM_SIZE_HINT_KB", perfetto_shmem_size_hint);
|
||||
perfetto_buffer_size = common::get_env("ROCPROF_PERFETTO_BUFFER_SIZE_KB", perfetto_buffer_size);
|
||||
|
||||
output_path = common::get_env("ROCPROF_OUTPUT_PATH", output_path);
|
||||
output_file = common::get_env("ROCPROF_OUTPUT_FILE_NAME", output_file);
|
||||
tmp_directory = common::get_env("ROCPROF_TMPDIR", tmp_directory);
|
||||
kernel_rename = common::get_env("ROCPROF_KERNEL_RENAME", false);
|
||||
|
||||
auto to_upper = [](std::string val) {
|
||||
for(auto& vitr : val)
|
||||
vitr = toupper(vitr);
|
||||
return val;
|
||||
};
|
||||
|
||||
output_format = common::get_env("ROCPROF_OUTPUT_FORMAT", output_format);
|
||||
auto entries = std::set<std::string>{};
|
||||
for(const auto& itr : sdk::parse::tokenize(output_format, " \t,;:"))
|
||||
entries.emplace(to_upper(itr));
|
||||
|
||||
csv_output = entries.count("CSV") > 0 || entries.empty();
|
||||
json_output = entries.count("JSON") > 0;
|
||||
pftrace_output = entries.count("PFTRACE") > 0;
|
||||
otf2_output = entries.count("OTF2") > 0;
|
||||
|
||||
const auto supported_formats =
|
||||
std::set<std::string_view>{"CSV", "JSON", "PFTRACE", "OTF2", "ROCPD"};
|
||||
for(const auto& itr : entries)
|
||||
{
|
||||
LOG_IF(FATAL, supported_formats.count(itr) == 0)
|
||||
<< "Unsupported output format type: " << itr;
|
||||
}
|
||||
|
||||
const auto supported_perfetto_backends = std::set<std::string_view>{"inprocess", "system"};
|
||||
LOG_IF(FATAL, supported_perfetto_backends.count(perfetto_backend) == 0)
|
||||
<< "Unsupported perfetto backend type: " << perfetto_backend;
|
||||
|
||||
if(stats_summary_unit == "sec")
|
||||
stats_summary_unit_value = common::units::sec;
|
||||
else if(stats_summary_unit == "msec")
|
||||
stats_summary_unit_value = common::units::msec;
|
||||
else if(stats_summary_unit == "usec")
|
||||
stats_summary_unit_value = common::units::usec;
|
||||
else if(stats_summary_unit == "nsec")
|
||||
stats_summary_unit_value = common::units::nsec;
|
||||
else
|
||||
{
|
||||
ROCP_FATAL << "Unsupported summary units value: " << stats_summary_unit;
|
||||
}
|
||||
|
||||
if(auto _summary_grps = common::get_env("ROCPROF_STATS_SUMMARY_GROUPS", "");
|
||||
!_summary_grps.empty())
|
||||
{
|
||||
stats_summary_groups =
|
||||
sdk::parse::tokenize(_summary_grps, std::vector<std::string_view>{"##@@##"});
|
||||
|
||||
// remove any empty strings (just in case these slipped through)
|
||||
stats_summary_groups.erase(std::remove_if(stats_summary_groups.begin(),
|
||||
stats_summary_groups.end(),
|
||||
[](const auto& itr) { return itr.empty(); }),
|
||||
stats_summary_groups.end());
|
||||
}
|
||||
|
||||
// enable summary output if any of these are enabled
|
||||
summary_output = (stats_summary || stats_summary_per_domain || !stats_summary_groups.empty());
|
||||
}
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
@@ -0,0 +1,128 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "format_path.hpp"
|
||||
|
||||
#include "lib/common/environment.hpp"
|
||||
#include "lib/common/filesystem.hpp"
|
||||
#include "lib/common/mpl.hpp"
|
||||
#include "lib/common/units.hpp"
|
||||
|
||||
#include <rocprofiler-sdk/cxx/details/tokenize.hpp>
|
||||
#include <rocprofiler-sdk/cxx/serialization.hpp>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <set>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace tool
|
||||
{
|
||||
namespace defaults
|
||||
{
|
||||
constexpr auto perfetto_buffer_size_kb = (1 * common::units::GiB) / common::units::KiB;
|
||||
constexpr auto perfetto_shmem_size_hint_kb = 64;
|
||||
} // namespace defaults
|
||||
|
||||
struct output_config
|
||||
{
|
||||
output_config() = default;
|
||||
~output_config() = default;
|
||||
output_config(const output_config&) = default;
|
||||
output_config(output_config&&) noexcept = default;
|
||||
output_config& operator=(const output_config&) = default;
|
||||
output_config& operator=(output_config&&) noexcept = default;
|
||||
|
||||
bool stats = false;
|
||||
bool stats_summary = false;
|
||||
bool stats_summary_per_domain = false;
|
||||
bool csv_output = false;
|
||||
bool json_output = false;
|
||||
bool pftrace_output = false;
|
||||
bool otf2_output = false;
|
||||
bool summary_output = false;
|
||||
bool kernel_rename = false;
|
||||
uint64_t stats_summary_unit_value = 1;
|
||||
size_t perfetto_shmem_size_hint = defaults::perfetto_shmem_size_hint_kb;
|
||||
size_t perfetto_buffer_size = defaults::perfetto_buffer_size_kb;
|
||||
std::string stats_summary_unit = "nsec";
|
||||
std::string output_path = "%cwd%";
|
||||
std::string output_file = "%hostname%/%pid%";
|
||||
std::string tmp_directory = output_path;
|
||||
std::string stats_summary_file = "stderr";
|
||||
std::string perfetto_backend = "inprocess";
|
||||
std::string perfetto_buffer_fill_policy = "discard";
|
||||
std::vector<std::string> stats_summary_groups = {};
|
||||
|
||||
template <typename ArchiveT>
|
||||
void save(ArchiveT&) const;
|
||||
|
||||
template <typename ArchiveT>
|
||||
void load(ArchiveT&)
|
||||
{}
|
||||
|
||||
static output_config load_from_env();
|
||||
static output_config load_from_env(output_config&&);
|
||||
|
||||
private:
|
||||
void parse_env();
|
||||
|
||||
std::string output_format = "ROCPD";
|
||||
};
|
||||
|
||||
template <typename ArchiveT>
|
||||
void
|
||||
output_config::save(ArchiveT& ar) const
|
||||
{
|
||||
#define CFG_SERIALIZE_MEMBER(VAR) ar(cereal::make_nvp(#VAR, VAR))
|
||||
#define CFG_SERIALIZE_NAMED_MEMBER(NAME, VAR) ar(cereal::make_nvp(NAME, VAR))
|
||||
|
||||
CFG_SERIALIZE_NAMED_MEMBER("output_path", format_path(output_path));
|
||||
CFG_SERIALIZE_NAMED_MEMBER("output_file", format_path(output_file));
|
||||
CFG_SERIALIZE_NAMED_MEMBER("tmp_directory", format_path(tmp_directory));
|
||||
CFG_SERIALIZE_NAMED_MEMBER("raw_output_path", output_path);
|
||||
CFG_SERIALIZE_NAMED_MEMBER("raw_output_file", output_file);
|
||||
CFG_SERIALIZE_NAMED_MEMBER("raw_tmp_directory", tmp_directory);
|
||||
|
||||
CFG_SERIALIZE_MEMBER(perfetto_shmem_size_hint);
|
||||
CFG_SERIALIZE_MEMBER(perfetto_buffer_size);
|
||||
CFG_SERIALIZE_MEMBER(perfetto_buffer_fill_policy);
|
||||
CFG_SERIALIZE_MEMBER(perfetto_backend);
|
||||
|
||||
CFG_SERIALIZE_NAMED_MEMBER("summary", stats_summary);
|
||||
CFG_SERIALIZE_NAMED_MEMBER("summary_per_domain", stats_summary_per_domain);
|
||||
CFG_SERIALIZE_NAMED_MEMBER("summary_groups", stats_summary_groups);
|
||||
CFG_SERIALIZE_NAMED_MEMBER("summary_unit", stats_summary_unit);
|
||||
CFG_SERIALIZE_NAMED_MEMBER("summary_file", stats_summary_file);
|
||||
|
||||
#undef CFG_SERIALIZE_MEMBER
|
||||
#undef CFG_SERIALIZE_NAMED_MEMBER
|
||||
}
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
@@ -0,0 +1,280 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
//
|
||||
|
||||
#include "output_key.hpp"
|
||||
#include "format_path.hpp"
|
||||
|
||||
#include "lib/common/environment.hpp"
|
||||
#include "lib/common/filesystem.hpp"
|
||||
#include "lib/common/utility.hpp"
|
||||
|
||||
#include <linux/limits.h>
|
||||
#include <array>
|
||||
#include <fstream>
|
||||
#include <regex>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace tool
|
||||
{
|
||||
namespace
|
||||
{
|
||||
namespace fs = common::filesystem;
|
||||
|
||||
template <typename Tp>
|
||||
auto
|
||||
as_pointer(Tp&& _val)
|
||||
{
|
||||
return new Tp{_val};
|
||||
}
|
||||
|
||||
std::string*
|
||||
get_local_datetime(const std::string& dt_format, std::time_t*& dt_curr);
|
||||
|
||||
std::time_t* launch_time = nullptr;
|
||||
const auto* launch_clock = as_pointer(std::chrono::system_clock::now());
|
||||
const auto* launch_datetime =
|
||||
get_local_datetime(common::get_env("ROCPROF_TIME_FORMAT", "%F_%H.%M"), launch_time);
|
||||
const auto* launch_date =
|
||||
get_local_datetime(common::get_env("ROCPROF_DATE_FORMAT", "%F"), launch_time);
|
||||
|
||||
std::string*
|
||||
get_local_datetime(const std::string& dt_format, std::time_t*& _dt_curr)
|
||||
{
|
||||
constexpr auto strsize = 512;
|
||||
|
||||
if(!_dt_curr) _dt_curr = new std::time_t{std::time_t{std::time(nullptr)}};
|
||||
|
||||
char mbstr[strsize] = {};
|
||||
memset(mbstr, '\0', sizeof(mbstr) * sizeof(char));
|
||||
|
||||
if(std::strftime(mbstr, sizeof(mbstr) - 1, dt_format.c_str(), std::localtime(_dt_curr)) != 0)
|
||||
return new std::string{mbstr};
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
bool
|
||||
not_is_space(int ch)
|
||||
{
|
||||
return std::isspace(ch) == 0;
|
||||
}
|
||||
|
||||
std::string
|
||||
ltrim(std::string s, bool (*f)(int) = not_is_space)
|
||||
{
|
||||
s.erase(s.begin(), std::find_if(s.begin(), s.end(), f));
|
||||
return s;
|
||||
}
|
||||
|
||||
std::string
|
||||
rtrim(std::string s, bool (*f)(int) = not_is_space)
|
||||
{
|
||||
s.erase(std::find_if(s.rbegin(), s.rend(), f).base(), s.end());
|
||||
return s;
|
||||
}
|
||||
|
||||
std::string
|
||||
trim(std::string s, bool (*f)(int) = not_is_space)
|
||||
{
|
||||
ltrim(s, f);
|
||||
rtrim(s, f);
|
||||
return s;
|
||||
}
|
||||
|
||||
std::string
|
||||
get_hostname()
|
||||
{
|
||||
auto _hostname_buff = std::array<char, PATH_MAX>{};
|
||||
_hostname_buff.fill('\0');
|
||||
if(gethostname(_hostname_buff.data(), _hostname_buff.size() - 1) != 0)
|
||||
{
|
||||
auto _err = errno;
|
||||
ROCP_WARNING << "Hostname unknown. gethostname failed with error code " << _err << ": "
|
||||
<< strerror(_err);
|
||||
return std::string{"UNKNOWN_HOSTNAME"};
|
||||
}
|
||||
|
||||
return std::string{_hostname_buff.data()};
|
||||
}
|
||||
|
||||
std::vector<pid_t>
|
||||
get_siblings(pid_t _id = getppid())
|
||||
{
|
||||
auto _data = std::vector<pid_t>{};
|
||||
|
||||
auto _ifs = std::ifstream{"/proc/" + std::to_string(_id) + "/task/" + std::to_string(_id) +
|
||||
"/children"};
|
||||
while(_ifs)
|
||||
{
|
||||
pid_t _n = 0;
|
||||
_ifs >> _n;
|
||||
if(!_ifs || _n <= 0) break;
|
||||
_data.emplace_back(_n);
|
||||
}
|
||||
return _data;
|
||||
}
|
||||
|
||||
auto
|
||||
get_num_siblings(pid_t _id = getppid())
|
||||
{
|
||||
return get_siblings(_id).size();
|
||||
}
|
||||
} // namespace
|
||||
|
||||
output_key::output_key(std::string _key, std::string _val, std::string _desc)
|
||||
: key{std::move(_key)}
|
||||
, value{std::move(_val)}
|
||||
, description{std::move(_desc)}
|
||||
{}
|
||||
|
||||
std::vector<output_key>
|
||||
output_keys(std::string _tag)
|
||||
{
|
||||
using strpair_t = std::pair<std::string, std::string>;
|
||||
|
||||
auto _cmdline = common::read_command_line(getpid());
|
||||
|
||||
if(_tag.empty() && !_cmdline.empty()) _tag = ::basename(_cmdline.front().c_str());
|
||||
|
||||
std::string _argv_string = {}; // entire argv cmd
|
||||
std::string _args_string = {}; // cmdline args
|
||||
std::string _argt_string = _tag; // prefix + cmdline args
|
||||
const std::string& _tag0_string = _tag; // only the basic prefix
|
||||
auto _options = std::vector<output_key>{};
|
||||
|
||||
auto _replace = [](auto& _v, const strpair_t& pitr) {
|
||||
auto pos = std::string::npos;
|
||||
while((pos = _v.find(pitr.first)) != std::string::npos)
|
||||
_v.replace(pos, pitr.first.length(), pitr.second);
|
||||
};
|
||||
|
||||
if(_cmdline.size() > 1 && _cmdline.at(1) == "--") _cmdline.erase(_cmdline.begin() + 1);
|
||||
|
||||
for(auto& itr : _cmdline)
|
||||
{
|
||||
itr = trim(itr);
|
||||
_replace(itr, {"/", "_"});
|
||||
while(!itr.empty() && itr.at(0) == '.')
|
||||
itr = itr.substr(1);
|
||||
while(!itr.empty() && itr.at(0) == '_')
|
||||
itr = itr.substr(1);
|
||||
}
|
||||
|
||||
if(!_cmdline.empty())
|
||||
{
|
||||
for(size_t i = 0; i < _cmdline.size(); ++i)
|
||||
{
|
||||
const auto _l = std::string{(i == 0) ? "" : "_"};
|
||||
auto _v = _cmdline.at(i);
|
||||
_argv_string += _l + _v;
|
||||
if(i > 0)
|
||||
{
|
||||
_argt_string += (i > 1) ? (_l + _v) : _v;
|
||||
_args_string += (i > 1) ? (_l + _v) : _v;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto _mpi_size = get_mpi_size();
|
||||
auto _mpi_rank = get_mpi_rank();
|
||||
|
||||
auto _dmp_size = fmt::format("{}", (_mpi_size) > 0 ? _mpi_size : 1);
|
||||
auto _dmp_rank = fmt::format("{}", (_mpi_rank) > 0 ? _mpi_rank : 0);
|
||||
auto _proc_id = fmt::format("{}", getpid());
|
||||
auto _parent_id = fmt::format("{}", getppid());
|
||||
auto _pgroup_id = fmt::format("{}", getpgid(getpid()));
|
||||
auto _session_id = fmt::format("{}", getsid(getpid()));
|
||||
auto _proc_size = fmt::format("{}", get_num_siblings());
|
||||
auto _pwd_string = common::get_env<std::string>("PWD", ".");
|
||||
auto _slurm_job_id = common::get_env<std::string>("SLURM_JOB_ID", "0");
|
||||
auto _slurm_proc_id = common::get_env("SLURM_PROCID", _dmp_rank);
|
||||
|
||||
auto _uniq_id = _proc_id;
|
||||
if(common::get_env<int32_t>("SLURM_PROCID", -1) >= 0)
|
||||
{
|
||||
_uniq_id = _slurm_proc_id;
|
||||
}
|
||||
else if(_mpi_size > 0 || _mpi_rank >= 0)
|
||||
{
|
||||
_uniq_id = _dmp_rank;
|
||||
}
|
||||
|
||||
for(auto&& itr : std::initializer_list<output_key>{
|
||||
{"%argv%", _argv_string, "Entire command-line condensed into a single string"},
|
||||
{"%argt%",
|
||||
_argt_string,
|
||||
"Similar to `%argv%` except basename of first command line argument"},
|
||||
{"%args%", _args_string, "All command line arguments condensed into a single string"},
|
||||
{"%tag%", _tag0_string, "Basename of first command line argument"}})
|
||||
{
|
||||
_options.emplace_back(itr);
|
||||
}
|
||||
|
||||
if(!_cmdline.empty())
|
||||
{
|
||||
for(size_t i = 0; i < _cmdline.size(); ++i)
|
||||
{
|
||||
auto _v = _cmdline.at(i);
|
||||
_options.emplace_back(fmt::format("%arg{}%", i), _v, fmt::format("Argument #{}", i));
|
||||
}
|
||||
}
|
||||
|
||||
auto _launch_time = (launch_datetime) ? *launch_datetime : std::string{".UNKNOWN_LAUNCH_TIME."};
|
||||
auto _launch_date = (launch_date) ? *launch_date : std::string{".UNKNOWN_LAUNCH_DATE."};
|
||||
auto _hostname = get_hostname();
|
||||
|
||||
for(auto&& itr : std::initializer_list<output_key>{
|
||||
{"%hostname%", _hostname, "Network hostname"},
|
||||
{"%pid%", _proc_id, "Process identifier"},
|
||||
{"%ppid%", _parent_id, "Parent process identifier"},
|
||||
{"%pgid%", _pgroup_id, "Process group identifier"},
|
||||
{"%psid%", _session_id, "Process session identifier"},
|
||||
{"%psize%", _proc_size, "Number of sibling process"},
|
||||
{"%job%", _slurm_job_id, "SLURM_JOB_ID env variable"},
|
||||
{"%rank%", _slurm_proc_id, "MPI/UPC++ rank"},
|
||||
{"%size%", _dmp_size, "MPI/UPC++ size"},
|
||||
{"%nid%", _uniq_id, "%rank% if possible, otherwise %pid%"},
|
||||
{"%cwd%", fs::current_path().string(), "Current working path"},
|
||||
{"%launch_date%", _launch_date, "Date according to date format ROCPROF_DATE_FORMAT"},
|
||||
{"%launch_time%", _launch_time, "Date and/or time according to ROCPROF_TIME_FORMAT"},
|
||||
})
|
||||
{
|
||||
_options.emplace_back(itr);
|
||||
}
|
||||
|
||||
for(auto&& itr : std::initializer_list<output_key>{
|
||||
{"%h", _hostname, "Shorthand for %hostname%"},
|
||||
{"%p", _proc_id, "Shorthand for %pid%"},
|
||||
{"%j", _slurm_job_id, "Shorthand for %job%"},
|
||||
{"%r", _slurm_proc_id, "Shorthand for %rank%"},
|
||||
{"%s", _dmp_size, "Shorthand for %size"},
|
||||
})
|
||||
{
|
||||
_options.emplace_back(itr);
|
||||
}
|
||||
|
||||
return _options;
|
||||
}
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
@@ -0,0 +1,64 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "lib/common/mpl.hpp"
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace tool
|
||||
{
|
||||
struct output_key
|
||||
{
|
||||
output_key(std::string _key, std::string _val, std::string _desc = {});
|
||||
|
||||
template <typename Tp,
|
||||
typename Up = Tp,
|
||||
std::enable_if_t<!common::mpl::is_string_type<Up>::value, int> = 0>
|
||||
output_key(std::string _key, Tp&& _val, std::string _desc = {});
|
||||
|
||||
operator std::pair<std::string, std::string>() const;
|
||||
|
||||
std::string key = {};
|
||||
std::string value = {};
|
||||
std::string description = {};
|
||||
};
|
||||
|
||||
template <typename Tp, typename Up, std::enable_if_t<!common::mpl::is_string_type<Up>::value, int>>
|
||||
output_key::output_key(std::string _key, Tp&& _val, std::string _desc)
|
||||
: key{std::move(_key)}
|
||||
, value{fmt::format("{}", std::forward<Tp>(_val))}
|
||||
, description{std::move(_desc)}
|
||||
{}
|
||||
|
||||
std::vector<output_key>
|
||||
output_keys(std::string _tag = {});
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
+13
-20
@@ -20,8 +20,7 @@
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include "output_file.hpp"
|
||||
#include "config.hpp"
|
||||
#include "output_stream.hpp"
|
||||
|
||||
#include "lib/common/filesystem.hpp"
|
||||
#include "lib/common/logging.hpp"
|
||||
@@ -29,6 +28,9 @@
|
||||
#include <fmt/core.h>
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <string_view>
|
||||
#include <unordered_set>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace tool
|
||||
@@ -42,9 +44,9 @@ const auto stderr_names = std::unordered_set<std::string_view>{"stderr", "STDERR
|
||||
} // namespace
|
||||
|
||||
std::string
|
||||
get_output_filename(std::string_view fname, std::string_view ext)
|
||||
get_output_filename(const output_config& cfg, std::string_view fname, std::string_view ext)
|
||||
{
|
||||
auto cfg_output_path = tool::format(tool::get_config().output_path);
|
||||
auto cfg_output_path = tool::format_path(cfg.output_path);
|
||||
|
||||
// add a period to provided file extension if necessary
|
||||
constexpr auto period = std::string_view{"."};
|
||||
@@ -53,7 +55,7 @@ get_output_filename(std::string_view fname, std::string_view ext)
|
||||
fmt::format("{}{}", (!ext.empty() && ext.find('.') != 0) ? period : noperiod, ext);
|
||||
|
||||
auto output_path = fs::path{cfg_output_path};
|
||||
auto output_prefix = tool::format(tool::get_config().output_file);
|
||||
auto output_prefix = tool::format_path(cfg.output_file);
|
||||
|
||||
if(fs::exists(output_path) && !fs::is_directory(fs::status(output_path)))
|
||||
{
|
||||
@@ -66,7 +68,8 @@ get_output_filename(std::string_view fname, std::string_view ext)
|
||||
fs::create_directories(output_path);
|
||||
}
|
||||
|
||||
auto _ofname = tool::format(output_path / fmt::format("{}_{}{}", output_prefix, fname, _ext));
|
||||
auto _ofname =
|
||||
tool::format_path(output_path / fmt::format("{}_{}{}", output_prefix, fname, _ext));
|
||||
|
||||
// the prefix may contain a subdirectory
|
||||
if(auto _ofname_path = fs::path{_ofname}.parent_path(); !fs::exists(_ofname_path))
|
||||
@@ -83,10 +86,10 @@ get_output_filename(std::string_view fname, std::string_view ext)
|
||||
return _ofname;
|
||||
}
|
||||
|
||||
output_stream_t
|
||||
get_output_stream(std::string_view fname, std::string_view ext)
|
||||
output_stream
|
||||
get_output_stream(const output_config& cfg, std::string_view fname, std::string_view ext)
|
||||
{
|
||||
auto cfg_output_path = tool::format(tool::get_config().output_path);
|
||||
auto cfg_output_path = tool::format_path(cfg.output_path);
|
||||
|
||||
if(stdout_names.count(cfg_output_path) > 0 || stdout_names.count(fname) > 0)
|
||||
return {&std::cout, [](auto*&) {}};
|
||||
@@ -95,7 +98,7 @@ get_output_stream(std::string_view fname, std::string_view ext)
|
||||
else if(cfg_output_path.empty() || fname.empty())
|
||||
return {&std::clog, [](auto*&) {}};
|
||||
|
||||
auto output_file = get_output_filename(fname, ext);
|
||||
auto output_file = get_output_filename(cfg, fname, ext);
|
||||
auto* _ofs = new std::ofstream{output_file};
|
||||
|
||||
LOG_IF(FATAL, !_ofs && !*_ofs) << fmt::format("Failed to open {} for output", output_file);
|
||||
@@ -107,15 +110,5 @@ get_output_stream(std::string_view fname, std::string_view ext)
|
||||
v = nullptr;
|
||||
}};
|
||||
}
|
||||
|
||||
output_file::~output_file()
|
||||
{
|
||||
if(m_os.stream)
|
||||
ROCP_INFO << "Closing result file: " << m_name;
|
||||
else
|
||||
ROCP_WARNING << "output_file::~output_file does not have a output stream instance!";
|
||||
|
||||
m_os.close();
|
||||
}
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
@@ -0,0 +1,85 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "lib/common/filesystem.hpp"
|
||||
#include "output_config.hpp"
|
||||
|
||||
#include <array>
|
||||
#include <iostream>
|
||||
#include <mutex>
|
||||
#include <ostream>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace tool
|
||||
{
|
||||
using ostream_dtor_t = void (*)(std::ostream*&);
|
||||
|
||||
using output_stream_pair_t = std::pair<std::ostream*, ostream_dtor_t>;
|
||||
|
||||
struct output_stream
|
||||
{
|
||||
output_stream() = default;
|
||||
output_stream(std::ostream* _os, ostream_dtor_t _dtor)
|
||||
: stream{_os}
|
||||
, dtor{_dtor}
|
||||
{}
|
||||
|
||||
~output_stream() { close(); }
|
||||
output_stream(const output_stream&) = delete;
|
||||
output_stream(output_stream&&) noexcept = default;
|
||||
output_stream& operator=(const output_stream&) = delete;
|
||||
output_stream& operator=(output_stream&&) noexcept = default;
|
||||
|
||||
explicit operator bool() const { return stream != nullptr; }
|
||||
|
||||
template <typename Tp>
|
||||
std::ostream& operator<<(Tp&& value)
|
||||
{
|
||||
return ((stream) ? *stream : std::cerr) << std::forward<Tp>(value) << std::flush;
|
||||
}
|
||||
|
||||
void close()
|
||||
{
|
||||
if(stream) (*stream) << std::flush;
|
||||
if(dtor) dtor(stream);
|
||||
}
|
||||
|
||||
bool writes_to_file() const { return (dynamic_cast<std::ofstream*>(stream) != nullptr); }
|
||||
|
||||
std::ostream* stream = nullptr;
|
||||
ostream_dtor_t dtor = nullptr;
|
||||
};
|
||||
|
||||
std::string
|
||||
get_output_filename(const output_config& cfg, std::string_view fname, std::string_view ext);
|
||||
|
||||
output_stream
|
||||
get_output_stream(const output_config& cfg, std::string_view fname, std::string_view ext);
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
+10
-20
@@ -20,28 +20,18 @@
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include "helper.hpp"
|
||||
#include "config.hpp"
|
||||
#pragma once
|
||||
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
#include <rocprofiler-sdk/cxx/name_info.hpp>
|
||||
|
||||
#include <atomic>
|
||||
#include <iostream>
|
||||
#include <mutex>
|
||||
#include <tuple>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <utility>
|
||||
|
||||
::rocprofiler::sdk::buffer_name_info_t<std::string_view>
|
||||
get_buffer_id_names()
|
||||
namespace rocprofiler
|
||||
{
|
||||
return ::rocprofiler::sdk::get_buffer_tracing_names();
|
||||
}
|
||||
|
||||
::rocprofiler::sdk::callback_name_info_t<std::string_view>
|
||||
get_callback_id_names()
|
||||
namespace tool
|
||||
{
|
||||
return ::rocprofiler::sdk::get_callback_tracing_names();
|
||||
}
|
||||
struct timestamps_t
|
||||
{
|
||||
rocprofiler_timestamp_t app_start_time;
|
||||
rocprofiler_timestamp_t app_end_time;
|
||||
};
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
+9
-1
@@ -21,9 +21,9 @@
|
||||
// SOFTWARE.
|
||||
|
||||
#include "tmp_file.hpp"
|
||||
#include "config.hpp"
|
||||
|
||||
#include "lib/common/filesystem.hpp"
|
||||
#include "lib/common/logging.hpp"
|
||||
|
||||
namespace fs = ::rocprofiler::common::filesystem;
|
||||
|
||||
@@ -38,6 +38,8 @@ tmp_file::fopen(const char* _mode)
|
||||
// if the filepath does not exist, open in out mode to create it
|
||||
std::ofstream _ofs{filename};
|
||||
}
|
||||
|
||||
ROCP_INFO << "opening (via fopen) temporary file: '" << filename << "'...";
|
||||
file = std::fopen(filename.c_str(), _mode);
|
||||
if(file) fd = ::fileno(file);
|
||||
|
||||
@@ -59,10 +61,12 @@ tmp_file::flush()
|
||||
{
|
||||
if(stream.is_open())
|
||||
{
|
||||
ROCP_INFO << "flushing temporary file: '" << filename << "'...";
|
||||
stream.flush();
|
||||
}
|
||||
else if(file != nullptr)
|
||||
{
|
||||
ROCP_INFO << "flushing temporary file: '" << filename << "'...";
|
||||
int _ret = fflush(file);
|
||||
int _cnt = 0;
|
||||
while(_ret == EAGAIN || _ret == EINTR)
|
||||
@@ -84,11 +88,13 @@ tmp_file::close()
|
||||
|
||||
if(stream.is_open())
|
||||
{
|
||||
ROCP_INFO << "closing temporary file: '" << filename << "'...";
|
||||
stream.close();
|
||||
return !stream.is_open();
|
||||
}
|
||||
else if(file != nullptr)
|
||||
{
|
||||
ROCP_INFO << "closing temporary file: '" << filename << "'...";
|
||||
auto _ret = fclose(file);
|
||||
if(_ret == 0)
|
||||
{
|
||||
@@ -114,6 +120,7 @@ tmp_file::open(std::ios::openmode _mode)
|
||||
_ofs.open(filename, std::ofstream::binary | std::ofstream::out);
|
||||
}
|
||||
|
||||
ROCP_INFO << "opening temporary file: '" << filename << "'...";
|
||||
stream.open(filename, _mode);
|
||||
return (stream.is_open() && stream.good());
|
||||
}
|
||||
@@ -124,6 +131,7 @@ tmp_file::remove()
|
||||
close();
|
||||
if(fs::exists(filename))
|
||||
{
|
||||
ROCP_INFO << "removing temporary file: '" << filename << "'...";
|
||||
auto _ret = ::remove(filename.c_str());
|
||||
return (_ret == 0);
|
||||
}
|
||||
+24
-15
@@ -20,27 +20,36 @@
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
#include "tmp_file_buffer.hpp"
|
||||
#include "domain_type.hpp"
|
||||
|
||||
#include "helper.hpp"
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <deque>
|
||||
#include <utility>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace tool
|
||||
{
|
||||
void
|
||||
write_perfetto(
|
||||
tool_table* tool_functions,
|
||||
uint64_t pid,
|
||||
std::vector<rocprofiler_agent_v0_t> agent_data,
|
||||
std::deque<rocprofiler_buffer_tracing_hip_api_record_t>* hip_api_data,
|
||||
std::deque<rocprofiler_buffer_tracing_hsa_api_record_t>* hsa_api_data,
|
||||
std::deque<rocprofiler_buffer_tracing_kernel_dispatch_record_t>* kernel_dispatch_data,
|
||||
std::deque<rocprofiler_buffer_tracing_memory_copy_record_t>* memory_copy_data,
|
||||
std::deque<rocprofiler_buffer_tracing_marker_api_record_t>* marker_api_data,
|
||||
std::deque<rocprofiler_buffer_tracing_scratch_memory_record_t>* scratch_memory_data,
|
||||
std::deque<rocprofiler_buffer_tracing_rccl_api_record_t>* rccl_api_data);
|
||||
std::string
|
||||
compose_tmp_file_name(const output_config& cfg, domain_type buffer_type)
|
||||
{
|
||||
return rocprofiler::tool::format_path(fmt::format("{}/.rocprofv3/{}-{}.dat",
|
||||
cfg.tmp_directory,
|
||||
"%ppid%-%pid%",
|
||||
get_domain_trace_file_name(buffer_type)));
|
||||
}
|
||||
|
||||
tmp_file_name_callback_t&
|
||||
get_tmp_file_name_callback()
|
||||
{
|
||||
static tmp_file_name_callback_t val = [](domain_type type) -> std::string {
|
||||
ROCP_CI_LOG(WARNING) << "rocprofv3 does not have a tmp file name callback defined for "
|
||||
<< get_domain_column_name(type) << ".";
|
||||
auto _cfg = output_config::load_from_env();
|
||||
return compose_tmp_file_name(_cfg, type);
|
||||
};
|
||||
return val;
|
||||
}
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
@@ -0,0 +1,217 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "domain_type.hpp"
|
||||
#include "output_config.hpp"
|
||||
#include "tmp_file.hpp"
|
||||
|
||||
#include "lib/common/container/ring_buffer.hpp"
|
||||
#include "lib/common/logging.hpp"
|
||||
#include "lib/common/units.hpp"
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <deque>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace tool
|
||||
{
|
||||
template <typename Tp>
|
||||
using ring_buffer_t = rocprofiler::common::container::ring_buffer<Tp>;
|
||||
|
||||
using tmp_file_name_callback_t = std::function<std::string(domain_type)>;
|
||||
|
||||
std::string
|
||||
compose_tmp_file_name(const output_config& cfg, domain_type buffer_type);
|
||||
|
||||
tmp_file_name_callback_t&
|
||||
get_tmp_file_name_callback();
|
||||
|
||||
template <typename Tp>
|
||||
struct file_buffer
|
||||
{
|
||||
file_buffer() = delete;
|
||||
file_buffer(domain_type _domain)
|
||||
: domain{_domain}
|
||||
, buffer{16 * static_cast<uint64_t>(::rocprofiler::common::units::get_page_size())}
|
||||
, file{get_tmp_file_name_callback()(_domain)}
|
||||
{}
|
||||
|
||||
~file_buffer() = default;
|
||||
file_buffer(const file_buffer&) = delete;
|
||||
file_buffer(file_buffer&&) noexcept = default;
|
||||
file_buffer& operator=(const file_buffer&) = delete;
|
||||
file_buffer& operator=(file_buffer&&) noexcept = default;
|
||||
|
||||
domain_type domain = {};
|
||||
ring_buffer_t<Tp> buffer = {};
|
||||
tmp_file file;
|
||||
};
|
||||
|
||||
template <typename Tp>
|
||||
struct file_buffer<ring_buffer_t<Tp>>
|
||||
{
|
||||
static_assert(std::is_void<Tp>::value && std::is_empty<Tp>::value,
|
||||
"error! instantiated with ring_buffer_t<Tp> instead of Tp");
|
||||
};
|
||||
|
||||
template <typename Tp>
|
||||
file_buffer<Tp>*&
|
||||
get_tmp_file_buffer(domain_type type)
|
||||
{
|
||||
static file_buffer<Tp>* val = new file_buffer<Tp>{type};
|
||||
return val;
|
||||
}
|
||||
|
||||
template <typename Tp>
|
||||
void
|
||||
offload_buffer(domain_type type)
|
||||
{
|
||||
auto* filebuf = get_tmp_file_buffer<Tp>(type);
|
||||
|
||||
if(!filebuf)
|
||||
{
|
||||
ROCP_CI_LOG(WARNING) << "rocprofv3 cannot offload buffer for "
|
||||
<< get_domain_column_name(type) << ". Buffer has been destroyed.";
|
||||
return;
|
||||
}
|
||||
|
||||
auto _lk = std::lock_guard<std::mutex>(filebuf->file.file_mutex);
|
||||
[[maybe_unused]] static auto _success = filebuf->file.open();
|
||||
auto& _fs = filebuf->file.stream;
|
||||
filebuf->file.file_pos.emplace(_fs.tellg());
|
||||
filebuf->buffer.save(_fs);
|
||||
filebuf->buffer.clear();
|
||||
CHECK(filebuf->buffer.is_empty() == true);
|
||||
}
|
||||
|
||||
template <typename Tp>
|
||||
void
|
||||
write_ring_buffer(Tp _v, domain_type type)
|
||||
{
|
||||
auto* filebuf = get_tmp_file_buffer<Tp>(type);
|
||||
|
||||
if(!filebuf)
|
||||
{
|
||||
ROCP_CI_LOG(WARNING) << "rocprofv3 is dropping record from domain "
|
||||
<< get_domain_column_name(type) << ". Buffer has been destroyed.";
|
||||
return;
|
||||
}
|
||||
else if(filebuf->buffer.capacity() == 0)
|
||||
{
|
||||
ROCP_CI_LOG(WARNING) << "rocprofv3 is dropping record from domain "
|
||||
<< get_domain_column_name(type) << ". Buffer has a capacity of zero.";
|
||||
return;
|
||||
}
|
||||
|
||||
auto* ptr = filebuf->buffer.request(false);
|
||||
if(ptr == nullptr)
|
||||
{
|
||||
offload_buffer<Tp>(type);
|
||||
ptr = filebuf->buffer.request(false);
|
||||
|
||||
// if failed, try again
|
||||
if(!ptr) ptr = filebuf->buffer.request(false);
|
||||
|
||||
// after second failure, emit warning message
|
||||
ROCP_CI_LOG_IF(WARNING, !ptr)
|
||||
<< "rocprofv3 is dropping record from domain " << get_domain_column_name(type)
|
||||
<< ". No space in buffer: "
|
||||
<< fmt::format(
|
||||
"capacity={}, record_size={}, used_count={}, free_count={} | raw_info=[{}]",
|
||||
filebuf->buffer.capacity(),
|
||||
filebuf->buffer.data_size(),
|
||||
filebuf->buffer.count(),
|
||||
filebuf->buffer.free(),
|
||||
filebuf->buffer.as_string());
|
||||
}
|
||||
|
||||
if(ptr)
|
||||
{
|
||||
if constexpr(std::is_move_constructible<Tp>::value)
|
||||
{
|
||||
new(ptr) Tp{std::move(_v)};
|
||||
}
|
||||
else if constexpr(std::is_move_assignable<Tp>::value)
|
||||
{
|
||||
*ptr = std::move(_v);
|
||||
}
|
||||
else if constexpr(std::is_copy_constructible<Tp>::value)
|
||||
{
|
||||
new(ptr) Tp{_v};
|
||||
}
|
||||
else if constexpr(std::is_copy_assignable<Tp>::value)
|
||||
{
|
||||
*ptr = _v;
|
||||
}
|
||||
else
|
||||
{
|
||||
static_assert(std::is_void<Tp>::value,
|
||||
"data type is neither move/copy constructible nor move/copy assignable");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Tp>
|
||||
void
|
||||
flush_tmp_buffer(domain_type type)
|
||||
{
|
||||
auto* filebuf = get_tmp_file_buffer<Tp>(type);
|
||||
if(filebuf && !filebuf->buffer.is_empty()) offload_buffer<Tp>(type);
|
||||
}
|
||||
|
||||
template <typename Tp>
|
||||
void
|
||||
read_tmp_file(domain_type type)
|
||||
{
|
||||
auto* filebuf = get_tmp_file_buffer<Tp>(type);
|
||||
|
||||
if(!filebuf)
|
||||
{
|
||||
ROCP_CI_LOG(WARNING) << "rocprofv3 cannot read tmp file for "
|
||||
<< get_domain_column_name(type) << ". Buffer has been destroyed.";
|
||||
return;
|
||||
}
|
||||
|
||||
auto _lk = std::lock_guard<std::mutex>{filebuf->file.file_mutex};
|
||||
auto& _fs = filebuf->file.stream;
|
||||
if(_fs.is_open()) _fs.close();
|
||||
filebuf->file.open(std::ios::binary | std::ios::in);
|
||||
// for(auto itr : filebuf->file.file_pos)
|
||||
// {
|
||||
// _fs.seekg(itr); // set to the absolute position
|
||||
// if(_fs.eof()) break;
|
||||
// auto _buffer = ring_buffer_t<Tp>{};
|
||||
// _buffer.load(_fs);
|
||||
// _data.emplace_back(std::move(_buffer));
|
||||
// }
|
||||
}
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
@@ -4,37 +4,9 @@
|
||||
|
||||
rocprofiler_activate_clang_tidy()
|
||||
|
||||
set(TOOL_HEADERS
|
||||
buffered_output.hpp
|
||||
config.hpp
|
||||
csv.hpp
|
||||
domain_type.hpp
|
||||
generateCSV.hpp
|
||||
generateJSON.hpp
|
||||
generateOTF2.hpp
|
||||
generatePerfetto.hpp
|
||||
generateStats.hpp
|
||||
helper.hpp
|
||||
output_file.hpp
|
||||
statistics.hpp
|
||||
tmp_file_buffer.hpp
|
||||
tmp_file.hpp)
|
||||
set(TOOL_HEADERS config.hpp helper.hpp)
|
||||
|
||||
set(TOOL_SOURCES
|
||||
config.cpp
|
||||
domain_type.cpp
|
||||
generateCSV.cpp
|
||||
generateJSON.cpp
|
||||
generateOTF2.cpp
|
||||
generatePerfetto.cpp
|
||||
generateStats.cpp
|
||||
helper.cpp
|
||||
main.c
|
||||
output_file.cpp
|
||||
statistics.cpp
|
||||
tmp_file_buffer.cpp
|
||||
tmp_file.cpp
|
||||
tool.cpp)
|
||||
set(TOOL_SOURCES config.cpp main.c tool.cpp)
|
||||
|
||||
add_library(rocprofiler-sdk-tool SHARED)
|
||||
target_sources(rocprofiler-sdk-tool PRIVATE ${TOOL_SOURCES} ${TOOL_HEADERS})
|
||||
@@ -46,6 +18,7 @@ target_link_libraries(
|
||||
rocprofiler-sdk::rocprofiler-sdk-build-flags
|
||||
rocprofiler-sdk::rocprofiler-sdk-memcheck
|
||||
rocprofiler-sdk::rocprofiler-sdk-common-library
|
||||
rocprofiler-sdk::rocprofiler-sdk-output-library
|
||||
rocprofiler-sdk::rocprofiler-sdk-cereal
|
||||
rocprofiler-sdk::rocprofiler-sdk-perfetto
|
||||
rocprofiler-sdk::rocprofiler-sdk-otf2)
|
||||
|
||||
@@ -30,6 +30,7 @@
|
||||
#include "lib/common/logging.hpp"
|
||||
#include "lib/common/units.hpp"
|
||||
#include "lib/common/utility.hpp"
|
||||
#include "lib/output/output_key.hpp"
|
||||
|
||||
#include <rocprofiler-sdk/cxx/details/tokenize.hpp>
|
||||
|
||||
@@ -55,20 +56,6 @@ namespace tool
|
||||
{
|
||||
namespace
|
||||
{
|
||||
template <typename Tp>
|
||||
auto
|
||||
as_pointer(Tp&& _val)
|
||||
{
|
||||
return new Tp{_val};
|
||||
}
|
||||
|
||||
std::string*
|
||||
get_local_datetime(const std::string& dt_format, std::time_t*& dt_curr);
|
||||
|
||||
std::time_t* launch_time = nullptr;
|
||||
const auto* launch_clock = as_pointer(std::chrono::system_clock::now());
|
||||
const auto* launch_datetime =
|
||||
get_local_datetime(get_env("ROCP_TIME_FORMAT", "%F_%H.%M"), launch_time);
|
||||
const auto env_regexes =
|
||||
new std::array<std::regex, 3>{std::regex{"(.*)%(env|ENV)\\{([A-Z0-9_]+)\\}%(.*)"},
|
||||
std::regex{"(.*)\\$(env|ENV)\\{([A-Z0-9_]+)\\}(.*)"},
|
||||
@@ -79,38 +66,6 @@ const auto env_regexes =
|
||||
// - %q{USER} Compatibility with NVIDIA
|
||||
//
|
||||
|
||||
std::string*
|
||||
get_local_datetime(const std::string& dt_format, std::time_t*& _dt_curr)
|
||||
{
|
||||
constexpr auto strsize = 512;
|
||||
|
||||
if(!_dt_curr) _dt_curr = new std::time_t{std::time_t{std::time(nullptr)}};
|
||||
|
||||
char mbstr[strsize] = {};
|
||||
memset(mbstr, '\0', sizeof(mbstr) * sizeof(char));
|
||||
|
||||
if(std::strftime(mbstr, sizeof(mbstr) - 1, dt_format.c_str(), std::localtime(_dt_curr)) != 0)
|
||||
return new std::string{mbstr};
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::string
|
||||
get_hostname()
|
||||
{
|
||||
auto _hostname_buff = std::array<char, PATH_MAX>{};
|
||||
_hostname_buff.fill('\0');
|
||||
if(gethostname(_hostname_buff.data(), _hostname_buff.size() - 1) != 0)
|
||||
{
|
||||
auto _err = errno;
|
||||
ROCP_WARNING << "Hostname unknown. gethostname failed with error code " << _err << ": "
|
||||
<< strerror(_err);
|
||||
return std::string{"UNKNOWN_HOSTNAME"};
|
||||
}
|
||||
|
||||
return std::string{_hostname_buff.data()};
|
||||
}
|
||||
|
||||
inline bool
|
||||
not_is_space(int ch)
|
||||
{
|
||||
@@ -139,29 +94,6 @@ trim(std::string s, bool (*f)(int) = not_is_space)
|
||||
return s;
|
||||
}
|
||||
|
||||
inline std::vector<pid_t>
|
||||
get_siblings(pid_t _id = getppid())
|
||||
{
|
||||
auto _data = std::vector<pid_t>{};
|
||||
|
||||
std::ifstream _ifs{"/proc/" + std::to_string(_id) + "/task/" + std::to_string(_id) +
|
||||
"/children"};
|
||||
while(_ifs)
|
||||
{
|
||||
pid_t _n = 0;
|
||||
_ifs >> _n;
|
||||
if(!_ifs || _n <= 0) break;
|
||||
_data.emplace_back(_n);
|
||||
}
|
||||
return _data;
|
||||
}
|
||||
|
||||
inline auto
|
||||
get_num_siblings(pid_t _id = getppid())
|
||||
{
|
||||
return get_siblings(_id).size();
|
||||
}
|
||||
|
||||
// replace unsuported specail chars with space
|
||||
void
|
||||
handle_special_chars(std::string& str)
|
||||
@@ -256,295 +188,13 @@ parse_counters(std::string line)
|
||||
}
|
||||
} // namespace
|
||||
|
||||
int
|
||||
get_mpi_size()
|
||||
{
|
||||
static int _v = get_env<int>("OMPI_COMM_WORLD_SIZE",
|
||||
get_env<int>("MV2_COMM_WORLD_SIZE", get_env<int>("MPI_SIZE", 0)));
|
||||
return _v;
|
||||
}
|
||||
|
||||
int
|
||||
get_mpi_rank()
|
||||
{
|
||||
static int _v = get_env<int>("OMPI_COMM_WORLD_RANK",
|
||||
get_env<int>("MV2_COMM_WORLD_RANK", get_env<int>("MPI_RANK", -1)));
|
||||
return _v;
|
||||
}
|
||||
|
||||
config::config()
|
||||
: kernel_filter_range{get_kernel_filter_range(
|
||||
: base_type{base_type::load_from_env()}
|
||||
, kernel_filter_range{get_kernel_filter_range(
|
||||
get_env("ROCPROF_KERNEL_FILTER_RANGE", std::string{}))}
|
||||
, counters{parse_counters(get_env("ROCPROF_COUNTERS", std::string{}))}
|
||||
{
|
||||
auto to_upper = [](std::string val) {
|
||||
for(auto& vitr : val)
|
||||
vitr = toupper(vitr);
|
||||
return val;
|
||||
};
|
||||
|
||||
auto output_format = get_env("ROCPROF_OUTPUT_FORMAT", "CSV");
|
||||
auto entries = std::set<std::string>{};
|
||||
for(const auto& itr : sdk::parse::tokenize(output_format, " \t,;:"))
|
||||
entries.emplace(to_upper(itr));
|
||||
|
||||
csv_output = entries.count("CSV") > 0 || entries.empty();
|
||||
json_output = entries.count("JSON") > 0;
|
||||
pftrace_output = entries.count("PFTRACE") > 0;
|
||||
otf2_output = entries.count("OTF2") > 0;
|
||||
|
||||
const auto supported_formats = std::set<std::string_view>{"CSV", "JSON", "PFTRACE", "OTF2"};
|
||||
for(const auto& itr : entries)
|
||||
{
|
||||
LOG_IF(FATAL, supported_formats.count(itr) == 0)
|
||||
<< "Unsupported output format type: " << itr;
|
||||
}
|
||||
if(kernel_filter_include.empty()) kernel_filter_include = std::string(".*");
|
||||
|
||||
const auto supported_perfetto_backends = std::set<std::string_view>{"inprocess", "system"};
|
||||
LOG_IF(FATAL, supported_perfetto_backends.count(perfetto_backend) == 0)
|
||||
<< "Unsupported perfetto backend type: " << perfetto_backend;
|
||||
|
||||
if(stats_summary_unit == "sec")
|
||||
stats_summary_unit_value = common::units::sec;
|
||||
else if(stats_summary_unit == "msec")
|
||||
stats_summary_unit_value = common::units::msec;
|
||||
else if(stats_summary_unit == "usec")
|
||||
stats_summary_unit_value = common::units::usec;
|
||||
else if(stats_summary_unit == "nsec")
|
||||
stats_summary_unit_value = common::units::nsec;
|
||||
else
|
||||
{
|
||||
ROCP_FATAL << "Unsupported summary units value: " << stats_summary_unit;
|
||||
}
|
||||
|
||||
if(auto _summary_grps = get_env("ROCPROF_STATS_SUMMARY_GROUPS", ""); !_summary_grps.empty())
|
||||
{
|
||||
stats_summary_groups =
|
||||
sdk::parse::tokenize(_summary_grps, std::vector<std::string_view>{"##@@##"});
|
||||
|
||||
// remove any empty strings (just in case these slipped through)
|
||||
stats_summary_groups.erase(std::remove_if(stats_summary_groups.begin(),
|
||||
stats_summary_groups.end(),
|
||||
[](const auto& itr) { return itr.empty(); }),
|
||||
stats_summary_groups.end());
|
||||
}
|
||||
|
||||
// enable summary output if any of these are enabled
|
||||
summary_output = (stats_summary || stats_summary_per_domain || !stats_summary_groups.empty());
|
||||
}
|
||||
|
||||
std::vector<output_key>
|
||||
output_keys(std::string _tag)
|
||||
{
|
||||
using strpair_t = std::pair<std::string, std::string>;
|
||||
|
||||
auto _cmdline = common::read_command_line(getpid());
|
||||
|
||||
if(_tag.empty() && !_cmdline.empty()) _tag = ::basename(_cmdline.front().c_str());
|
||||
|
||||
std::string _argv_string = {}; // entire argv cmd
|
||||
std::string _args_string = {}; // cmdline args
|
||||
std::string _argt_string = _tag; // prefix + cmdline args
|
||||
const std::string& _tag0_string = _tag; // only the basic prefix
|
||||
auto _options = std::vector<output_key>{};
|
||||
|
||||
auto _replace = [](auto& _v, const strpair_t& pitr) {
|
||||
auto pos = std::string::npos;
|
||||
while((pos = _v.find(pitr.first)) != std::string::npos)
|
||||
_v.replace(pos, pitr.first.length(), pitr.second);
|
||||
};
|
||||
|
||||
if(_cmdline.size() > 1 && _cmdline.at(1) == "--") _cmdline.erase(_cmdline.begin() + 1);
|
||||
|
||||
for(auto& itr : _cmdline)
|
||||
{
|
||||
itr = trim(itr);
|
||||
_replace(itr, {"/", "_"});
|
||||
while(!itr.empty() && itr.at(0) == '.')
|
||||
itr = itr.substr(1);
|
||||
while(!itr.empty() && itr.at(0) == '_')
|
||||
itr = itr.substr(1);
|
||||
}
|
||||
|
||||
if(!_cmdline.empty())
|
||||
{
|
||||
for(size_t i = 0; i < _cmdline.size(); ++i)
|
||||
{
|
||||
const auto _l = std::string{(i == 0) ? "" : "_"};
|
||||
auto _v = _cmdline.at(i);
|
||||
_argv_string += _l + _v;
|
||||
if(i > 0)
|
||||
{
|
||||
_argt_string += (i > 1) ? (_l + _v) : _v;
|
||||
_args_string += (i > 1) ? (_l + _v) : _v;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto _mpi_size = get_mpi_size();
|
||||
auto _mpi_rank = get_mpi_rank();
|
||||
|
||||
auto _dmp_size = fmt::format("{}", (_mpi_size) > 0 ? _mpi_size : 1);
|
||||
auto _dmp_rank = fmt::format("{}", (_mpi_rank) > 0 ? _mpi_rank : 0);
|
||||
auto _proc_id = fmt::format("{}", getpid());
|
||||
auto _parent_id = fmt::format("{}", getppid());
|
||||
auto _pgroup_id = fmt::format("{}", getpgid(getpid()));
|
||||
auto _session_id = fmt::format("{}", getsid(getpid()));
|
||||
auto _proc_size = fmt::format("{}", get_num_siblings());
|
||||
auto _pwd_string = get_env<std::string>("PWD", ".");
|
||||
auto _slurm_job_id = get_env<std::string>("SLURM_JOB_ID", "0");
|
||||
auto _slurm_proc_id = get_env("SLURM_PROCID", _dmp_rank);
|
||||
|
||||
auto _uniq_id = _proc_id;
|
||||
if(get_env<int32_t>("SLURM_PROCID", -1) >= 0)
|
||||
{
|
||||
_uniq_id = _slurm_proc_id;
|
||||
}
|
||||
else if(_mpi_size > 0 || _mpi_rank >= 0)
|
||||
{
|
||||
_uniq_id = _dmp_rank;
|
||||
}
|
||||
|
||||
for(auto&& itr : std::initializer_list<output_key>{
|
||||
{"%argv%", _argv_string, "Entire command-line condensed into a single string"},
|
||||
{"%argt%",
|
||||
_argt_string,
|
||||
"Similar to `%argv%` except basename of first command line argument"},
|
||||
{"%args%", _args_string, "All command line arguments condensed into a single string"},
|
||||
{"%tag%", _tag0_string, "Basename of first command line argument"}})
|
||||
{
|
||||
_options.emplace_back(itr);
|
||||
}
|
||||
|
||||
if(!_cmdline.empty())
|
||||
{
|
||||
for(size_t i = 0; i < _cmdline.size(); ++i)
|
||||
{
|
||||
auto _v = _cmdline.at(i);
|
||||
_options.emplace_back(fmt::format("%arg{}%", i), _v, fmt::format("Argument #{}", i));
|
||||
}
|
||||
}
|
||||
|
||||
auto _launch_time = (launch_datetime) ? *launch_datetime : std::string{".UNKNOWN_LAUNCH_TIME."};
|
||||
auto _hostname = get_hostname();
|
||||
|
||||
for(auto&& itr : std::initializer_list<output_key>{
|
||||
{"%hostname%", _hostname, "Network hostname"},
|
||||
{"%pid%", _proc_id, "Process identifier"},
|
||||
{"%ppid%", _parent_id, "Parent process identifier"},
|
||||
{"%pgid%", _pgroup_id, "Process group identifier"},
|
||||
{"%psid%", _session_id, "Process session identifier"},
|
||||
{"%psize%", _proc_size, "Number of sibling process"},
|
||||
{"%job%", _slurm_job_id, "SLURM_JOB_ID env variable"},
|
||||
{"%rank%", _slurm_proc_id, "MPI/UPC++ rank"},
|
||||
{"%size%", _dmp_size, "MPI/UPC++ size"},
|
||||
{"%nid%", _uniq_id, "%rank% if possible, otherwise %pid%"},
|
||||
{"%launch_time%", _launch_time, "Data and/or time of run according to time format"},
|
||||
})
|
||||
{
|
||||
_options.emplace_back(itr);
|
||||
}
|
||||
|
||||
for(auto&& itr : std::initializer_list<output_key>{
|
||||
{"%h", _hostname, "Shorthand for %hostname%"},
|
||||
{"%p", _proc_id, "Shorthand for %pid%"},
|
||||
{"%j", _slurm_job_id, "Shorthand for %job%"},
|
||||
{"%r", _slurm_proc_id, "Shorthand for %rank%"},
|
||||
{"%s", _dmp_size, "Shorthand for %size"},
|
||||
})
|
||||
{
|
||||
_options.emplace_back(itr);
|
||||
}
|
||||
|
||||
return _options;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
std::string
|
||||
format_impl(std::string _fpath, const std::vector<output_key>& _keys)
|
||||
{
|
||||
if(_fpath.find('%') == std::string::npos && _fpath.find('$') == std::string::npos)
|
||||
return _fpath;
|
||||
|
||||
auto _replace = [](auto& _v, const output_key& pitr) {
|
||||
auto pos = std::string::npos;
|
||||
while((pos = _v.find(pitr.key)) != std::string::npos)
|
||||
_v.replace(pos, pitr.key.length(), pitr.value);
|
||||
};
|
||||
|
||||
for(auto&& itr : _keys)
|
||||
_replace(_fpath, itr);
|
||||
|
||||
// environment and configuration variables
|
||||
try
|
||||
{
|
||||
auto strip_leading_and_replace =
|
||||
[](std::string_view inp_v, std::initializer_list<char> keys, const char* val) {
|
||||
auto inp = std::string{inp_v};
|
||||
for(auto key : keys)
|
||||
{
|
||||
auto pos = std::string::npos;
|
||||
while((pos = inp.find(key)) == 0)
|
||||
inp = inp.substr(pos + 1);
|
||||
|
||||
while((pos = inp.find(key)) != std::string::npos)
|
||||
inp = inp.replace(pos, 1, val);
|
||||
}
|
||||
return inp;
|
||||
};
|
||||
|
||||
for(const auto& _re : *env_regexes)
|
||||
{
|
||||
while(std::regex_search(_fpath, _re))
|
||||
{
|
||||
auto _var = std::regex_replace(_fpath, _re, "$3");
|
||||
std::string _val = get_env<std::string>(_var, "");
|
||||
_val = strip_leading_and_replace(_val, {'\t', ' ', '/'}, "_");
|
||||
auto _beg = std::regex_replace(_fpath, _re, "$1");
|
||||
auto _end = std::regex_replace(_fpath, _re, "$4");
|
||||
_fpath = fmt::format("{}{}{}", _beg, _val, _end);
|
||||
}
|
||||
}
|
||||
} catch(std::exception& _e)
|
||||
{
|
||||
ROCP_WARNING << "[rocprofiler] " << __FUNCTION__ << " threw an exception :: " << _e.what()
|
||||
<< "\n";
|
||||
}
|
||||
|
||||
// remove %arg<N>% where N >= argc
|
||||
try
|
||||
{
|
||||
std::regex _re{"(.*)%(arg[0-9]+)%([-/_]*)(.*)"};
|
||||
while(std::regex_search(_fpath, _re))
|
||||
_fpath = std::regex_replace(_fpath, _re, "$1$4");
|
||||
} catch(std::exception& _e)
|
||||
{
|
||||
ROCP_WARNING << "[rocprofiler] " << __FUNCTION__ << " threw an exception :: " << _e.what()
|
||||
<< "\n";
|
||||
}
|
||||
|
||||
return _fpath;
|
||||
}
|
||||
|
||||
std::string
|
||||
format(std::string _fpath, const std::vector<output_key>& _keys)
|
||||
{
|
||||
if(_fpath.find('%') == std::string::npos && _fpath.find('$') == std::string::npos)
|
||||
return _fpath;
|
||||
|
||||
auto _ref = _fpath;
|
||||
_fpath = format_impl(std::move(_fpath), _keys);
|
||||
|
||||
return (_fpath == _ref) ? _fpath : format(std::move(_fpath), _keys);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
std::string
|
||||
format(std::string _fpath, const std::string& _tag)
|
||||
{
|
||||
return format(std::move(_fpath), output_keys(_tag));
|
||||
if(kernel_filter_include.empty()) kernel_filter_include = std::string{".*"};
|
||||
}
|
||||
|
||||
std::string
|
||||
@@ -566,11 +216,5 @@ initialize()
|
||||
{
|
||||
(void) get_config<config_context::global>();
|
||||
}
|
||||
|
||||
output_key::output_key(std::string _key, std::string _val, std::string _desc)
|
||||
: key{std::move(_key)}
|
||||
, value{std::move(_val)}
|
||||
, description{std::move(_desc)}
|
||||
{}
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
|
||||
@@ -26,6 +26,9 @@
|
||||
#include "lib/common/environment.hpp"
|
||||
#include "lib/common/filesystem.hpp"
|
||||
#include "lib/common/mpl.hpp"
|
||||
#include "lib/common/units.hpp"
|
||||
#include "lib/output/format_path.hpp"
|
||||
#include "lib/output/output_config.hpp"
|
||||
|
||||
#include <rocprofiler-sdk/cxx/serialization.hpp>
|
||||
|
||||
@@ -41,7 +44,6 @@ namespace rocprofiler
|
||||
{
|
||||
namespace tool
|
||||
{
|
||||
namespace fs = common::filesystem;
|
||||
using common::get_env;
|
||||
|
||||
struct config;
|
||||
@@ -49,11 +51,6 @@ struct config;
|
||||
enum class config_context
|
||||
{
|
||||
global = 0,
|
||||
att_plugin,
|
||||
cli_plugin,
|
||||
ctf_plugin,
|
||||
file_plugin,
|
||||
perfetto_plugin,
|
||||
};
|
||||
|
||||
void
|
||||
@@ -66,66 +63,43 @@ get_config();
|
||||
std::string
|
||||
format_name(std::string_view _name, const config& = get_config<>());
|
||||
|
||||
std::string
|
||||
format(std::string _fpath, const std::string& _tag = {});
|
||||
|
||||
int
|
||||
get_mpi_size();
|
||||
|
||||
int
|
||||
get_mpi_rank();
|
||||
|
||||
struct config
|
||||
struct config : output_config
|
||||
{
|
||||
using base_type = output_config;
|
||||
|
||||
config();
|
||||
|
||||
bool demangle = get_env("ROCPROF_DEMANGLE_KERNELS", true);
|
||||
bool truncate = get_env("ROCPROF_TRUNCATE_KERNELS", false);
|
||||
bool kernel_trace = get_env("ROCPROF_KERNEL_TRACE", false);
|
||||
bool hsa_core_api_trace = get_env("ROCPROF_HSA_CORE_API_TRACE", false);
|
||||
bool hsa_amd_ext_api_trace = get_env("ROCPROF_HSA_AMD_EXT_API_TRACE", false);
|
||||
bool hsa_image_ext_api_trace = get_env("ROCPROF_HSA_IMAGE_EXT_API_TRACE", false);
|
||||
bool hsa_finalizer_ext_api_trace = get_env("ROCPROF_HSA_FINALIZER_EXT_API_TRACE", false);
|
||||
bool marker_api_trace = get_env("ROCPROF_MARKER_API_TRACE", false);
|
||||
bool memory_copy_trace = get_env("ROCPROF_MEMORY_COPY_TRACE", false);
|
||||
bool scratch_memory_trace = get_env("ROCPROF_SCRATCH_MEMORY_TRACE", false);
|
||||
bool counter_collection = get_env("ROCPROF_COUNTER_COLLECTION", false);
|
||||
bool hip_runtime_api_trace = get_env("ROCPROF_HIP_RUNTIME_API_TRACE", false);
|
||||
bool hip_compiler_api_trace = get_env("ROCPROF_HIP_COMPILER_API_TRACE", false);
|
||||
bool rccl_api_trace = get_env("ROCPROF_RCCL_API_TRACE", false);
|
||||
bool list_metrics = get_env("ROCPROF_LIST_METRICS", false);
|
||||
bool list_metrics_output_file = get_env("ROCPROF_OUTPUT_LIST_METRICS_FILE", false);
|
||||
bool stats = get_env("ROCPROF_STATS", false);
|
||||
bool stats_summary = get_env("ROCPROF_STATS_SUMMARY", false);
|
||||
bool stats_summary_per_domain = get_env("ROCPROF_STATS_SUMMARY_PER_DOMAIN", false);
|
||||
bool csv_output = false;
|
||||
bool json_output = false;
|
||||
bool pftrace_output = false;
|
||||
bool otf2_output = false;
|
||||
bool summary_output = false;
|
||||
bool kernel_rename = get_env("ROCPROF_KERNEL_RENAME", false);
|
||||
int mpi_size = get_mpi_size();
|
||||
int mpi_rank = get_mpi_rank();
|
||||
size_t perfetto_shmem_size_hint = get_env("ROCPROF_PERFETTO_SHMEM_SIZE_HINT_KB", 64);
|
||||
size_t perfetto_buffer_size = get_env("ROCPROF_PERFETTO_BUFFER_SIZE_KB", 1024000);
|
||||
uint64_t stats_summary_unit_value = 1;
|
||||
std::string stats_summary_unit = get_env("ROCPROF_STATS_SUMMARY_UNITS", "nsec");
|
||||
std::string output_path = get_env("ROCPROF_OUTPUT_PATH", fs::current_path().string());
|
||||
std::string output_file =
|
||||
get_env("ROCPROF_OUTPUT_FILE_NAME", fmt::format("%hostname%/{}", getpid()));
|
||||
std::string tmp_directory = get_env("ROCPROF_TMPDIR", output_path);
|
||||
std::string stats_summary_file = get_env("ROCPROF_STATS_SUMMARY_OUTPUT", "stderr");
|
||||
~config() = default;
|
||||
config(const config&) = default;
|
||||
config(config&&) noexcept = default;
|
||||
config& operator=(const config&) = default;
|
||||
config& operator=(config&&) noexcept = default;
|
||||
|
||||
std::string kernel_filter_include =
|
||||
get_env("ROCPROF_KERNEL_FILTER_INCLUDE_REGEX", std::string{".*"});
|
||||
std::string kernel_filter_exclude =
|
||||
get_env("ROCPROF_KERNEL_FILTER_EXCLUDE_REGEX", std::string{});
|
||||
std::string perfetto_buffer_fill_policy =
|
||||
get_env("ROCPROF_PERFETTO_BUFFER_FILL_POLICY", std::string{"discard"});
|
||||
std::string perfetto_backend = get_env("ROCPROF_PERFETTO_BACKEND", std::string{"inprocess"});
|
||||
std::unordered_set<uint32_t> kernel_filter_range = {};
|
||||
std::set<std::string> counters = {};
|
||||
std::vector<std::string> stats_summary_groups = {};
|
||||
bool demangle = get_env("ROCPROF_DEMANGLE_KERNELS", true);
|
||||
bool truncate = get_env("ROCPROF_TRUNCATE_KERNELS", false);
|
||||
bool kernel_trace = get_env("ROCPROF_KERNEL_TRACE", false);
|
||||
bool hsa_core_api_trace = get_env("ROCPROF_HSA_CORE_API_TRACE", false);
|
||||
bool hsa_amd_ext_api_trace = get_env("ROCPROF_HSA_AMD_EXT_API_TRACE", false);
|
||||
bool hsa_image_ext_api_trace = get_env("ROCPROF_HSA_IMAGE_EXT_API_TRACE", false);
|
||||
bool hsa_finalizer_ext_api_trace = get_env("ROCPROF_HSA_FINALIZER_EXT_API_TRACE", false);
|
||||
bool marker_api_trace = get_env("ROCPROF_MARKER_API_TRACE", false);
|
||||
bool memory_copy_trace = get_env("ROCPROF_MEMORY_COPY_TRACE", false);
|
||||
bool scratch_memory_trace = get_env("ROCPROF_SCRATCH_MEMORY_TRACE", false);
|
||||
bool counter_collection = get_env("ROCPROF_COUNTER_COLLECTION", false);
|
||||
bool hip_runtime_api_trace = get_env("ROCPROF_HIP_RUNTIME_API_TRACE", false);
|
||||
bool hip_compiler_api_trace = get_env("ROCPROF_HIP_COMPILER_API_TRACE", false);
|
||||
bool rccl_api_trace = get_env("ROCPROF_RCCL_API_TRACE", false);
|
||||
bool list_metrics = get_env("ROCPROF_LIST_METRICS", false);
|
||||
bool list_metrics_output_file = get_env("ROCPROF_OUTPUT_LIST_METRICS_FILE", false);
|
||||
|
||||
int mpi_size = get_mpi_size();
|
||||
int mpi_rank = get_mpi_rank();
|
||||
|
||||
std::string kernel_filter_include = get_env("ROCPROF_KERNEL_FILTER_INCLUDE_REGEX", ".*");
|
||||
std::string kernel_filter_exclude = get_env("ROCPROF_KERNEL_FILTER_EXCLUDE_REGEX", "");
|
||||
|
||||
std::unordered_set<uint32_t> kernel_filter_range = {};
|
||||
std::set<std::string> counters = {};
|
||||
|
||||
template <typename ArchiveT>
|
||||
void save(ArchiveT&) const;
|
||||
@@ -142,8 +116,6 @@ config::save(ArchiveT& ar) const
|
||||
#define CFG_SERIALIZE_MEMBER(VAR) ar(cereal::make_nvp(#VAR, VAR))
|
||||
#define CFG_SERIALIZE_NAMED_MEMBER(NAME, VAR) ar(cereal::make_nvp(NAME, VAR))
|
||||
|
||||
CFG_SERIALIZE_MEMBER(demangle);
|
||||
CFG_SERIALIZE_MEMBER(truncate);
|
||||
CFG_SERIALIZE_MEMBER(kernel_trace);
|
||||
CFG_SERIALIZE_MEMBER(hsa_core_api_trace);
|
||||
CFG_SERIALIZE_MEMBER(hsa_amd_ext_api_trace);
|
||||
@@ -156,29 +128,14 @@ config::save(ArchiveT& ar) const
|
||||
CFG_SERIALIZE_MEMBER(hip_runtime_api_trace);
|
||||
CFG_SERIALIZE_MEMBER(hip_compiler_api_trace);
|
||||
CFG_SERIALIZE_MEMBER(kernel_rename);
|
||||
|
||||
CFG_SERIALIZE_NAMED_MEMBER("summary", stats_summary);
|
||||
CFG_SERIALIZE_NAMED_MEMBER("summary_per_domain", stats_summary_per_domain);
|
||||
CFG_SERIALIZE_NAMED_MEMBER("summary_groups", stats_summary_groups);
|
||||
CFG_SERIALIZE_NAMED_MEMBER("summary_unit", stats_summary_unit);
|
||||
CFG_SERIALIZE_NAMED_MEMBER("summary_file", stats_summary_file);
|
||||
|
||||
CFG_SERIALIZE_MEMBER(perfetto_shmem_size_hint);
|
||||
CFG_SERIALIZE_MEMBER(perfetto_buffer_size);
|
||||
CFG_SERIALIZE_MEMBER(perfetto_buffer_fill_policy);
|
||||
CFG_SERIALIZE_MEMBER(perfetto_backend);
|
||||
|
||||
CFG_SERIALIZE_NAMED_MEMBER("raw_tmp_directory", tmp_directory);
|
||||
CFG_SERIALIZE_NAMED_MEMBER("raw_output_path", output_path);
|
||||
CFG_SERIALIZE_NAMED_MEMBER("raw_output_file", output_file);
|
||||
CFG_SERIALIZE_NAMED_MEMBER("tmp_directory", format(tmp_directory));
|
||||
CFG_SERIALIZE_NAMED_MEMBER("output_path", format(output_path));
|
||||
CFG_SERIALIZE_NAMED_MEMBER("output_file", format(output_file));
|
||||
|
||||
CFG_SERIALIZE_MEMBER(counters);
|
||||
CFG_SERIALIZE_MEMBER(kernel_filter_include);
|
||||
CFG_SERIALIZE_MEMBER(kernel_filter_exclude);
|
||||
CFG_SERIALIZE_MEMBER(kernel_filter_range);
|
||||
CFG_SERIALIZE_MEMBER(demangle);
|
||||
CFG_SERIALIZE_MEMBER(truncate);
|
||||
|
||||
static_cast<const base_type&>(*this).save(ar);
|
||||
|
||||
#undef CFG_SERIALIZE_MEMBER
|
||||
#undef CFG_SERIALIZE_NAMED_MEMBER
|
||||
@@ -200,31 +157,5 @@ get_config()
|
||||
return *_v;
|
||||
}
|
||||
}
|
||||
|
||||
struct output_key
|
||||
{
|
||||
output_key(std::string _key, std::string _val, std::string _desc = {});
|
||||
|
||||
template <typename Tp,
|
||||
typename Up = Tp,
|
||||
std::enable_if_t<!common::mpl::is_string_type<Up>::value, int> = 0>
|
||||
output_key(std::string _key, Tp&& _val, std::string _desc = {});
|
||||
|
||||
operator std::pair<std::string, std::string>() const;
|
||||
|
||||
std::string key = {};
|
||||
std::string value = {};
|
||||
std::string description = {};
|
||||
};
|
||||
|
||||
template <typename Tp, typename Up, std::enable_if_t<!common::mpl::is_string_type<Up>::value, int>>
|
||||
output_key::output_key(std::string _key, Tp&& _val, std::string _desc)
|
||||
: key{std::move(_key)}
|
||||
, value{fmt::format("{}", std::forward<Tp>(_val))}
|
||||
, description{std::move(_desc)}
|
||||
{}
|
||||
|
||||
std::vector<output_key>
|
||||
output_keys(std::string _tag = {});
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
|
||||
@@ -1,662 +0,0 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include "generateCSV.hpp"
|
||||
#include "config.hpp"
|
||||
#include "csv.hpp"
|
||||
#include "generateStats.hpp"
|
||||
#include "helper.hpp"
|
||||
#include "statistics.hpp"
|
||||
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
#include <rocprofiler-sdk/marker/api_id.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <cstdint>
|
||||
#include <iomanip>
|
||||
#include <string_view>
|
||||
#include <utility>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace tool
|
||||
{
|
||||
namespace
|
||||
{
|
||||
tool::output_file
|
||||
get_stats_output_file(std::string name)
|
||||
{
|
||||
return tool::output_file{std::move(name),
|
||||
tool::csv::stats_csv_encoder{},
|
||||
{
|
||||
"Name",
|
||||
"Calls",
|
||||
"TotalDurationNs",
|
||||
"AverageNs",
|
||||
"Percentage",
|
||||
"MinNs",
|
||||
"MaxNs",
|
||||
"StdDev",
|
||||
}};
|
||||
}
|
||||
|
||||
void
|
||||
write_stats(output_file&& ofs, const stats_entry_vec_t& data_v)
|
||||
{
|
||||
auto data = stats_entry_vec_t{};
|
||||
auto _duration = stats_data_t{};
|
||||
for(const auto& [id, value] : data_v)
|
||||
{
|
||||
data.emplace_back(id, value);
|
||||
_duration += value;
|
||||
}
|
||||
|
||||
std::sort(data.begin(), data.end(), [](const auto& lhs, const auto& rhs) {
|
||||
return (lhs.second.get_sum() > rhs.second.get_sum());
|
||||
});
|
||||
|
||||
constexpr float_type one_hundred = 100.0;
|
||||
|
||||
const float_type _total_duration = _duration.get_sum();
|
||||
for(const auto& [name, value] : data)
|
||||
{
|
||||
auto duration_ns = value.get_sum();
|
||||
auto calls = value.get_count();
|
||||
float_type avg_ns = value.get_mean();
|
||||
float_type percent_v = (duration_ns / _total_duration) * one_hundred;
|
||||
|
||||
auto _row = std::stringstream{};
|
||||
rocprofiler::tool::csv::stats_csv_encoder::write_row<stats_formatter>(_row,
|
||||
name,
|
||||
calls,
|
||||
duration_ns,
|
||||
avg_ns,
|
||||
percentage{percent_v},
|
||||
value.get_min(),
|
||||
value.get_max(),
|
||||
value.get_stddev());
|
||||
ofs << _row.str() << std::flush;
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void
|
||||
generate_csv(tool_table* /*tool_functions*/, std::vector<rocprofiler_agent_v0_t>& data)
|
||||
{
|
||||
if(data.empty()) return;
|
||||
|
||||
std::sort(data.begin(), data.end(), [](rocprofiler_agent_v0_t lhs, rocprofiler_agent_v0_t rhs) {
|
||||
return lhs.node_id < rhs.node_id;
|
||||
});
|
||||
|
||||
auto ofs = tool::output_file{"agent_info",
|
||||
tool::csv::agent_info_csv_encoder{},
|
||||
{"Node_Id",
|
||||
"Logical_Node_Id",
|
||||
"Agent_Type",
|
||||
"Cpu_Cores_Count",
|
||||
"Simd_Count",
|
||||
"Cpu_Core_Id_Base",
|
||||
"Simd_Id_Base",
|
||||
"Max_Waves_Per_Simd",
|
||||
"Lds_Size_In_Kb",
|
||||
"Gds_Size_In_Kb",
|
||||
"Num_Gws",
|
||||
"Wave_Front_Size",
|
||||
"Num_Xcc",
|
||||
"Cu_Count",
|
||||
"Array_Count",
|
||||
"Num_Shader_Banks",
|
||||
"Simd_Arrays_Per_Engine",
|
||||
"Cu_Per_Simd_Array",
|
||||
"Simd_Per_Cu",
|
||||
"Max_Slots_Scratch_Cu",
|
||||
"Gfx_Target_Version",
|
||||
"Vendor_Id",
|
||||
"Device_Id",
|
||||
"Location_Id",
|
||||
"Domain",
|
||||
"Drm_Render_Minor",
|
||||
"Num_Sdma_Engines",
|
||||
"Num_Sdma_Xgmi_Engines",
|
||||
"Num_Sdma_Queues_Per_Engine",
|
||||
"Num_Cp_Queues",
|
||||
"Max_Engine_Clk_Ccompute",
|
||||
"Max_Engine_Clk_Fcompute",
|
||||
"Sdma_Fw_Version",
|
||||
"Fw_Version",
|
||||
"Capability",
|
||||
"Cu_Per_Engine",
|
||||
"Max_Waves_Per_Cu",
|
||||
"Family_Id",
|
||||
"Workgroup_Max_Size",
|
||||
"Grid_Max_Size",
|
||||
"Local_Mem_Size",
|
||||
"Hive_Id",
|
||||
"Gpu_Id",
|
||||
"Workgroup_Max_Dim_X",
|
||||
"Workgroup_Max_Dim_Y",
|
||||
"Workgroup_Max_Dim_Z",
|
||||
"Grid_Max_Dim_X",
|
||||
"Grid_Max_Dim_Y",
|
||||
"Grid_Max_Dim_Z",
|
||||
"Name",
|
||||
"Vendor_Name",
|
||||
"Product_Name",
|
||||
"Model_Name"}};
|
||||
|
||||
for(auto& itr : data)
|
||||
{
|
||||
auto _type = std::string_view{};
|
||||
if(itr.type == ROCPROFILER_AGENT_TYPE_CPU)
|
||||
_type = "CPU";
|
||||
else if(itr.type == ROCPROFILER_AGENT_TYPE_GPU)
|
||||
_type = "GPU";
|
||||
else
|
||||
_type = "UNK";
|
||||
|
||||
auto row_ss = std::stringstream{};
|
||||
rocprofiler::tool::csv::agent_info_csv_encoder::write_row(row_ss,
|
||||
itr.node_id,
|
||||
itr.logical_node_id,
|
||||
_type,
|
||||
itr.cpu_cores_count,
|
||||
itr.simd_count,
|
||||
itr.cpu_core_id_base,
|
||||
itr.simd_id_base,
|
||||
itr.max_waves_per_simd,
|
||||
itr.lds_size_in_kb,
|
||||
itr.gds_size_in_kb,
|
||||
itr.num_gws,
|
||||
itr.wave_front_size,
|
||||
itr.num_xcc,
|
||||
itr.cu_count,
|
||||
itr.array_count,
|
||||
itr.num_shader_banks,
|
||||
itr.simd_arrays_per_engine,
|
||||
itr.cu_per_simd_array,
|
||||
itr.simd_per_cu,
|
||||
itr.max_slots_scratch_cu,
|
||||
itr.gfx_target_version,
|
||||
itr.vendor_id,
|
||||
itr.device_id,
|
||||
itr.location_id,
|
||||
itr.domain,
|
||||
itr.drm_render_minor,
|
||||
itr.num_sdma_engines,
|
||||
itr.num_sdma_xgmi_engines,
|
||||
itr.num_sdma_queues_per_engine,
|
||||
itr.num_cp_queues,
|
||||
itr.max_engine_clk_ccompute,
|
||||
itr.max_engine_clk_fcompute,
|
||||
itr.sdma_fw_version.Value,
|
||||
itr.fw_version.Value,
|
||||
itr.capability.Value,
|
||||
itr.cu_per_engine,
|
||||
itr.max_waves_per_cu,
|
||||
itr.family_id,
|
||||
itr.workgroup_max_size,
|
||||
itr.grid_max_size,
|
||||
itr.local_mem_size,
|
||||
itr.hive_id,
|
||||
itr.gpu_id,
|
||||
itr.workgroup_max_dim.x,
|
||||
itr.workgroup_max_dim.y,
|
||||
itr.workgroup_max_dim.z,
|
||||
itr.grid_max_dim.x,
|
||||
itr.grid_max_dim.y,
|
||||
itr.grid_max_dim.z,
|
||||
itr.name,
|
||||
itr.vendor_name,
|
||||
itr.product_name,
|
||||
itr.model_name);
|
||||
ofs << row_ss.str();
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
generate_csv(tool_table* tool_functions,
|
||||
const std::deque<rocprofiler_buffer_tracing_kernel_dispatch_record_t>& data,
|
||||
const stats_entry_t& stats)
|
||||
{
|
||||
if(data.empty()) return;
|
||||
|
||||
if(tool::get_config().stats && stats)
|
||||
write_stats(get_stats_output_file("kernel_stats"), stats.entries);
|
||||
|
||||
auto ofs = tool::output_file{"kernel_trace",
|
||||
tool::csv::kernel_trace_csv_encoder{},
|
||||
{"Kind",
|
||||
"Agent_Id",
|
||||
"Queue_Id",
|
||||
"Thread_Id",
|
||||
"Dispatch_Id",
|
||||
"Kernel_Id",
|
||||
"Kernel_Name",
|
||||
"Correlation_Id",
|
||||
"Start_Timestamp",
|
||||
"End_Timestamp",
|
||||
"Private_Segment_Size",
|
||||
"Group_Segment_Size",
|
||||
"Workgroup_Size_X",
|
||||
"Workgroup_Size_Y",
|
||||
"Workgroup_Size_Z",
|
||||
"Grid_Size_X",
|
||||
"Grid_Size_Y",
|
||||
"Grid_Size_Z"}};
|
||||
|
||||
for(const auto& record : data)
|
||||
{
|
||||
auto row_ss = std::stringstream{};
|
||||
auto kernel_name = tool_functions->tool_get_kernel_name_fn(
|
||||
record.dispatch_info.kernel_id, record.correlation_id.external.value);
|
||||
rocprofiler::tool::csv::kernel_trace_csv_encoder::write_row(
|
||||
row_ss,
|
||||
tool_functions->tool_get_domain_name_fn(record.kind),
|
||||
tool_functions->tool_get_agent_node_id_fn(record.dispatch_info.agent_id),
|
||||
record.dispatch_info.queue_id.handle,
|
||||
record.thread_id,
|
||||
record.dispatch_info.dispatch_id,
|
||||
record.dispatch_info.kernel_id,
|
||||
kernel_name,
|
||||
record.correlation_id.internal,
|
||||
record.start_timestamp,
|
||||
record.end_timestamp,
|
||||
record.dispatch_info.private_segment_size,
|
||||
record.dispatch_info.group_segment_size,
|
||||
record.dispatch_info.workgroup_size.x,
|
||||
record.dispatch_info.workgroup_size.y,
|
||||
record.dispatch_info.workgroup_size.z,
|
||||
record.dispatch_info.grid_size.x,
|
||||
record.dispatch_info.grid_size.y,
|
||||
record.dispatch_info.grid_size.z);
|
||||
|
||||
ofs << row_ss.str();
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
generate_csv(tool_table* tool_functions,
|
||||
const std::deque<rocprofiler_buffer_tracing_hip_api_record_t>& data,
|
||||
const stats_entry_t& stats)
|
||||
{
|
||||
if(data.empty()) return;
|
||||
|
||||
if(tool::get_config().stats && stats)
|
||||
write_stats(get_stats_output_file("hip_api_stats"), stats.entries);
|
||||
|
||||
auto ofs = tool::output_file{"hip_api_trace",
|
||||
tool::csv::api_csv_encoder{},
|
||||
{"Domain",
|
||||
"Function",
|
||||
"Process_Id",
|
||||
"Thread_Id",
|
||||
"Correlation_Id",
|
||||
"Start_Timestamp",
|
||||
"End_Timestamp"}};
|
||||
for(const auto& record : data)
|
||||
{
|
||||
auto row_ss = std::stringstream{};
|
||||
auto api_name = tool_functions->tool_get_operation_name_fn(record.kind, record.operation);
|
||||
rocprofiler::tool::csv::api_csv_encoder::write_row(
|
||||
row_ss,
|
||||
tool_functions->tool_get_domain_name_fn(record.kind),
|
||||
api_name,
|
||||
getpid(),
|
||||
record.thread_id,
|
||||
record.correlation_id.internal,
|
||||
record.start_timestamp,
|
||||
record.end_timestamp);
|
||||
|
||||
ofs << row_ss.str();
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
generate_csv(tool_table* tool_functions,
|
||||
const std::deque<rocprofiler_buffer_tracing_hsa_api_record_t>& data,
|
||||
const stats_entry_t& stats)
|
||||
{
|
||||
if(data.empty()) return;
|
||||
|
||||
if(tool::get_config().stats && stats)
|
||||
write_stats(get_stats_output_file("hsa_api_stats"), stats.entries);
|
||||
|
||||
auto ofs = tool::output_file{"hsa_api_trace",
|
||||
tool::csv::api_csv_encoder{},
|
||||
{"Domain",
|
||||
"Function",
|
||||
"Process_Id",
|
||||
"Thread_Id",
|
||||
"Correlation_Id",
|
||||
"Start_Timestamp",
|
||||
"End_Timestamp"}};
|
||||
|
||||
for(const auto& record : data)
|
||||
{
|
||||
auto row_ss = std::stringstream{};
|
||||
auto api_name = tool_functions->tool_get_operation_name_fn(record.kind, record.operation);
|
||||
rocprofiler::tool::csv::api_csv_encoder::write_row(
|
||||
row_ss,
|
||||
tool_functions->tool_get_domain_name_fn(record.kind),
|
||||
api_name,
|
||||
getpid(),
|
||||
record.thread_id,
|
||||
record.correlation_id.internal,
|
||||
record.start_timestamp,
|
||||
record.end_timestamp);
|
||||
|
||||
ofs << row_ss.str();
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
generate_csv(tool_table* tool_functions,
|
||||
const std::deque<rocprofiler_buffer_tracing_memory_copy_record_t>& data,
|
||||
const stats_entry_t& stats)
|
||||
{
|
||||
if(data.empty()) return;
|
||||
|
||||
if(tool::get_config().stats && stats)
|
||||
write_stats(get_stats_output_file("memory_copy_stats"), stats.entries);
|
||||
|
||||
auto ofs = tool::output_file{"memory_copy_trace",
|
||||
tool::csv::memory_copy_csv_encoder{},
|
||||
{"Kind",
|
||||
"Direction",
|
||||
"Source_Agent_Id",
|
||||
"Destination_Agent_Id",
|
||||
"Correlation_Id",
|
||||
"Start_Timestamp",
|
||||
"End_Timestamp"}};
|
||||
for(const auto& record : data)
|
||||
{
|
||||
auto row_ss = std::stringstream{};
|
||||
auto api_name = tool_functions->tool_get_operation_name_fn(record.kind, record.operation);
|
||||
rocprofiler::tool::csv::memory_copy_csv_encoder::write_row(
|
||||
row_ss,
|
||||
tool_functions->tool_get_domain_name_fn(record.kind),
|
||||
api_name,
|
||||
tool_functions->tool_get_agent_node_id_fn(record.src_agent_id),
|
||||
tool_functions->tool_get_agent_node_id_fn(record.dst_agent_id),
|
||||
record.correlation_id.internal,
|
||||
record.start_timestamp,
|
||||
record.end_timestamp);
|
||||
|
||||
ofs << row_ss.str();
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
generate_csv(tool_table* tool_functions,
|
||||
const std::deque<rocprofiler_buffer_tracing_marker_api_record_t>& data,
|
||||
const stats_entry_t& stats)
|
||||
{
|
||||
if(data.empty()) return;
|
||||
|
||||
if(tool::get_config().stats && stats)
|
||||
write_stats(get_stats_output_file("marker_api_stats"), stats.entries);
|
||||
|
||||
auto ofs = tool::output_file{"marker_api_trace",
|
||||
tool::csv::marker_csv_encoder{},
|
||||
{"Domain",
|
||||
"Function",
|
||||
"Process_Id",
|
||||
"Thread_Id",
|
||||
"Correlation_Id",
|
||||
"Start_Timestamp",
|
||||
"End_Timestamp"}};
|
||||
for(const auto& record : data)
|
||||
{
|
||||
auto row_ss = std::stringstream{};
|
||||
auto _name = std::string_view{};
|
||||
|
||||
if(record.kind == ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API &&
|
||||
(record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxMarkA ||
|
||||
record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangePushA ||
|
||||
record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangeStartA))
|
||||
{
|
||||
_name = tool_functions->tool_get_roctx_msg_fn(record.correlation_id.internal);
|
||||
}
|
||||
else
|
||||
{
|
||||
_name = tool_functions->tool_get_operation_name_fn(record.kind, record.operation);
|
||||
}
|
||||
|
||||
tool::csv::marker_csv_encoder::write_row(
|
||||
row_ss,
|
||||
tool_functions->tool_get_domain_name_fn(record.kind),
|
||||
_name,
|
||||
getpid(),
|
||||
record.thread_id,
|
||||
record.correlation_id.internal,
|
||||
record.start_timestamp,
|
||||
record.end_timestamp);
|
||||
|
||||
ofs << row_ss.str();
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
generate_csv(tool_table* tool_functions,
|
||||
const std::deque<rocprofiler_tool_counter_collection_record_t>& data,
|
||||
const stats_entry_t& stats)
|
||||
{
|
||||
if(data.empty()) return;
|
||||
|
||||
if(tool::get_config().stats && stats)
|
||||
write_stats(get_stats_output_file("counter_collection_stats"), stats.entries);
|
||||
|
||||
auto ofs = tool::output_file{"counter_collection",
|
||||
tool::csv::counter_collection_csv_encoder{},
|
||||
{"Correlation_Id",
|
||||
"Dispatch_Id",
|
||||
"Agent_Id",
|
||||
"Queue_Id",
|
||||
"Process_Id",
|
||||
"Thread_Id",
|
||||
"Grid_Size",
|
||||
"Kernel_Id",
|
||||
"Kernel_Name",
|
||||
"Workgroup_Size",
|
||||
"LDS_Block_Size",
|
||||
"Scratch_Size",
|
||||
"VGPR_Count",
|
||||
"SGPR_Count",
|
||||
"Counter_Name",
|
||||
"Counter_Value",
|
||||
"Start_Timestamp",
|
||||
"End_Timestamp"}};
|
||||
for(const auto& record : data)
|
||||
{
|
||||
auto kernel_id = record.dispatch_data.dispatch_info.kernel_id;
|
||||
auto counter_name_value = std::map<std::string, double>{};
|
||||
for(uint64_t i = 0; i < record.counter_count; i++)
|
||||
{
|
||||
const auto& count = record.records.at(i);
|
||||
auto rec = count.record_counter;
|
||||
std::string counter_name = tool_functions->tool_get_counter_info_name_fn(rec.id);
|
||||
auto search = counter_name_value.find(counter_name);
|
||||
if(search == counter_name_value.end())
|
||||
counter_name_value.emplace(
|
||||
std::pair<std::string, double>{counter_name, rec.counter_value});
|
||||
else
|
||||
search->second = search->second + rec.counter_value;
|
||||
}
|
||||
|
||||
const auto& correlation_id = record.dispatch_data.correlation_id;
|
||||
|
||||
auto magnitude = [](rocprofiler_dim3_t dims) { return (dims.x * dims.y * dims.z); };
|
||||
auto row_ss = std::stringstream{};
|
||||
for(auto& itr : counter_name_value)
|
||||
{
|
||||
tool::csv::counter_collection_csv_encoder::write_row(
|
||||
row_ss,
|
||||
correlation_id.internal,
|
||||
record.dispatch_data.dispatch_info.dispatch_id,
|
||||
tool_functions->tool_get_agent_node_id_fn(
|
||||
record.dispatch_data.dispatch_info.agent_id),
|
||||
record.dispatch_data.dispatch_info.queue_id.handle,
|
||||
getpid(),
|
||||
record.thread_id,
|
||||
magnitude(record.dispatch_data.dispatch_info.grid_size),
|
||||
record.dispatch_data.dispatch_info.kernel_id,
|
||||
tool_functions->tool_get_kernel_name_fn(kernel_id, correlation_id.external.value),
|
||||
magnitude(record.dispatch_data.dispatch_info.workgroup_size),
|
||||
record.lds_block_size_v,
|
||||
record.dispatch_data.dispatch_info.private_segment_size,
|
||||
record.arch_vgpr_count,
|
||||
record.sgpr_count,
|
||||
itr.first,
|
||||
itr.second,
|
||||
record.dispatch_data.start_timestamp,
|
||||
record.dispatch_data.end_timestamp);
|
||||
}
|
||||
ofs << row_ss.str();
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
generate_csv(tool_table* tool_functions,
|
||||
const std::deque<rocprofiler_buffer_tracing_scratch_memory_record_t>& data,
|
||||
const stats_entry_t& stats)
|
||||
{
|
||||
if(data.empty()) return;
|
||||
|
||||
if(tool::get_config().stats && stats)
|
||||
write_stats(get_stats_output_file("scratch_memory_stats"), stats.entries);
|
||||
|
||||
auto ofs = tool::output_file{"scratch_memory_trace",
|
||||
tool::csv::scratch_memory_encoder{},
|
||||
{
|
||||
"Kind",
|
||||
"Operation",
|
||||
"Agent_Id",
|
||||
"Queue_Id",
|
||||
"Thread_Id",
|
||||
"Alloc_flags",
|
||||
"Start_Timestamp",
|
||||
"End_Timestamp",
|
||||
}};
|
||||
|
||||
for(const auto& record : data)
|
||||
{
|
||||
auto row_ss = std::stringstream{};
|
||||
auto kind_name = tool_functions->tool_get_domain_name_fn(record.kind);
|
||||
auto op_name = tool_functions->tool_get_operation_name_fn(record.kind, record.operation);
|
||||
|
||||
tool::csv::scratch_memory_encoder::write_row(
|
||||
row_ss,
|
||||
kind_name,
|
||||
op_name,
|
||||
tool_functions->tool_get_agent_node_id_fn(record.agent_id),
|
||||
record.queue_id.handle,
|
||||
record.thread_id,
|
||||
record.flags,
|
||||
record.start_timestamp,
|
||||
record.end_timestamp);
|
||||
|
||||
ofs << row_ss.str();
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
generate_csv(tool_table* tool_functions,
|
||||
const std::deque<rocprofiler_buffer_tracing_rccl_api_record_t>& data,
|
||||
const stats_entry_t& stats)
|
||||
{
|
||||
if(data.empty()) return;
|
||||
|
||||
if(tool::get_config().stats && stats)
|
||||
write_stats(get_stats_output_file("rccl_api_stats"), stats.entries);
|
||||
|
||||
auto ofs = tool::output_file{"rccl_api_trace",
|
||||
tool::csv::api_csv_encoder{},
|
||||
{"Domain",
|
||||
"Function",
|
||||
"Process_Id",
|
||||
"Thread_Id",
|
||||
"Correlation_Id",
|
||||
"Start_Timestamp",
|
||||
"End_Timestamp"}};
|
||||
for(const auto& record : data)
|
||||
{
|
||||
auto row_ss = std::stringstream{};
|
||||
auto api_name = tool_functions->tool_get_operation_name_fn(record.kind, record.operation);
|
||||
rocprofiler::tool::csv::api_csv_encoder::write_row(
|
||||
row_ss,
|
||||
tool_functions->tool_get_domain_name_fn(record.kind),
|
||||
api_name,
|
||||
getpid(),
|
||||
record.thread_id,
|
||||
record.correlation_id.internal,
|
||||
record.start_timestamp,
|
||||
record.end_timestamp);
|
||||
|
||||
ofs << row_ss.str();
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
generate_csv(tool_table* /*tool_functions*/, const domain_stats_vec_t& data_v)
|
||||
{
|
||||
using csv_encoder_t = rocprofiler::tool::csv::stats_csv_encoder;
|
||||
|
||||
if(!tool::get_config().stats) return;
|
||||
|
||||
auto _data = data_v;
|
||||
auto _total_stats = stats_data_t{};
|
||||
for(const auto& itr : _data)
|
||||
_total_stats += itr.second.total;
|
||||
|
||||
if(_total_stats.get_count() == 0) return;
|
||||
|
||||
std::sort(_data.begin(), _data.end(), [](const auto& lhs, const auto& rhs) {
|
||||
return (lhs.second.total.get_sum() > rhs.second.total.get_sum());
|
||||
});
|
||||
|
||||
auto ofs = get_stats_output_file("domain_stats");
|
||||
|
||||
const float_type _total_duration = _total_stats.get_sum();
|
||||
for(const auto& [type, value] : _data)
|
||||
{
|
||||
auto name = get_domain_column_name(type);
|
||||
auto duration_ns = value.total.get_sum();
|
||||
auto calls = value.total.get_count();
|
||||
auto avg_ns = value.total.get_mean();
|
||||
auto percent_v = value.total.get_percent(_total_duration);
|
||||
|
||||
auto _row = std::stringstream{};
|
||||
csv_encoder_t::write_row<stats_formatter>(_row,
|
||||
name,
|
||||
calls,
|
||||
duration_ns,
|
||||
avg_ns,
|
||||
percentage{percent_v},
|
||||
value.total.get_min(),
|
||||
value.total.get_max(),
|
||||
value.total.get_stddev());
|
||||
ofs << _row.str() << std::flush;
|
||||
}
|
||||
}
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
@@ -1,82 +0,0 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "generateStats.hpp"
|
||||
#include "helper.hpp"
|
||||
#include "rocprofiler-sdk/buffer_tracing.h"
|
||||
#include "statistics.hpp"
|
||||
|
||||
#include <rocprofiler-sdk/agent.h>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace tool
|
||||
{
|
||||
void
|
||||
generate_csv(tool_table* tool_functions, std::vector<rocprofiler_agent_v0_t>& data);
|
||||
|
||||
void
|
||||
generate_csv(tool_table* tool_functions,
|
||||
const std::deque<rocprofiler_buffer_tracing_kernel_dispatch_record_t>& data,
|
||||
const stats_entry_t& stats);
|
||||
|
||||
void
|
||||
generate_csv(tool_table* tool_functions,
|
||||
const std::deque<rocprofiler_buffer_tracing_hip_api_record_t>& data,
|
||||
const stats_entry_t& stats);
|
||||
|
||||
void
|
||||
generate_csv(tool_table* tool_functions,
|
||||
const std::deque<rocprofiler_buffer_tracing_hsa_api_record_t>& data,
|
||||
const stats_entry_t& stats);
|
||||
|
||||
void
|
||||
generate_csv(tool_table* tool_functions,
|
||||
const std::deque<rocprofiler_buffer_tracing_memory_copy_record_t>& data,
|
||||
const stats_entry_t& stats);
|
||||
|
||||
void
|
||||
generate_csv(tool_table* tool_functions,
|
||||
const std::deque<rocprofiler_buffer_tracing_marker_api_record_t>& data,
|
||||
const stats_entry_t& stats);
|
||||
|
||||
void
|
||||
generate_csv(tool_table* tool_functions,
|
||||
const std::deque<rocprofiler_tool_counter_collection_record_t>& data,
|
||||
const stats_entry_t& stats);
|
||||
|
||||
void
|
||||
generate_csv(tool_table* tool_functions,
|
||||
const std::deque<rocprofiler_buffer_tracing_scratch_memory_record_t>& data,
|
||||
const stats_entry_t& stats);
|
||||
|
||||
void
|
||||
generate_csv(tool_table* tool_functions,
|
||||
const std::deque<rocprofiler_buffer_tracing_rccl_api_record_t>& data,
|
||||
const stats_entry_t& stats);
|
||||
|
||||
void
|
||||
generate_csv(tool_table* tool_functions, const domain_stats_vec_t& data);
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
@@ -1,189 +0,0 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include "generateJSON.hpp"
|
||||
#include "config.hpp"
|
||||
#include "helper.hpp"
|
||||
#include "output_file.hpp"
|
||||
#include "statistics.hpp"
|
||||
|
||||
#include "lib/common/string_entry.hpp"
|
||||
#include "lib/common/utility.hpp"
|
||||
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
#include <rocprofiler-sdk/marker/api_id.h>
|
||||
|
||||
#include <utility>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace tool
|
||||
{
|
||||
void
|
||||
write_json(tool_table* tool_functions,
|
||||
uint64_t pid,
|
||||
const domain_stats_vec_t& domain_stats,
|
||||
std::vector<rocprofiler_agent_v0_t> agent_data,
|
||||
std::vector<rocprofiler_tool_counter_info_t> counter_data,
|
||||
std::deque<rocprofiler_buffer_tracing_hip_api_record_t>* hip_api_deque,
|
||||
std::deque<rocprofiler_buffer_tracing_hsa_api_record_t>* hsa_api_deque,
|
||||
std::deque<rocprofiler_buffer_tracing_kernel_dispatch_record_t>* kernel_dispatch_deque,
|
||||
std::deque<rocprofiler_buffer_tracing_memory_copy_record_t>* memory_copy_deque,
|
||||
std::deque<rocprofiler_tool_counter_collection_record_t>* counter_collection_deque,
|
||||
std::deque<rocprofiler_buffer_tracing_marker_api_record_t>* marker_api_deque,
|
||||
std::deque<rocprofiler_buffer_tracing_scratch_memory_record_t>* scratch_memory_deque,
|
||||
std::deque<rocprofiler_buffer_tracing_rccl_api_record_t>* rccl_api_deque)
|
||||
|
||||
{
|
||||
using JSONOutputArchive = cereal::MinimalJSONOutputArchive;
|
||||
|
||||
constexpr auto json_prec = 32;
|
||||
constexpr auto json_indent = JSONOutputArchive::Options::IndentChar::space;
|
||||
auto json_opts = JSONOutputArchive::Options{json_prec, json_indent, 1};
|
||||
auto filename = std::string_view{"results"};
|
||||
auto ofs = get_output_stream(filename, ".json");
|
||||
|
||||
{
|
||||
auto json_ar = JSONOutputArchive{*ofs.stream, json_opts};
|
||||
json_ar.setNextName("rocprofiler-sdk-tool");
|
||||
json_ar.startNode();
|
||||
|
||||
json_ar.makeArray();
|
||||
json_ar.startNode();
|
||||
|
||||
// metadata
|
||||
{
|
||||
json_ar.setNextName("metadata");
|
||||
json_ar.startNode();
|
||||
auto* timestamps = tool_functions->tool_get_app_timestamps_fn();
|
||||
json_ar(cereal::make_nvp("pid", pid));
|
||||
json_ar(cereal::make_nvp("init_time", timestamps->app_start_time));
|
||||
json_ar(cereal::make_nvp("fini_time", timestamps->app_end_time));
|
||||
json_ar(cereal::make_nvp("config", get_config()));
|
||||
json_ar(cereal::make_nvp("command", common::read_command_line(getpid())));
|
||||
json_ar.finishNode();
|
||||
}
|
||||
|
||||
// summary
|
||||
{
|
||||
json_ar.setNextName("summary");
|
||||
json_ar.startNode();
|
||||
json_ar.makeArray();
|
||||
|
||||
for(const auto& itr : domain_stats)
|
||||
{
|
||||
auto _name = get_domain_column_name(itr.first);
|
||||
json_ar.startNode();
|
||||
|
||||
json_ar(cereal::make_nvp("domain", std::string{_name}));
|
||||
json_ar(cereal::make_nvp("stats", itr.second));
|
||||
// itr.second.serialize(json_ar, 0);
|
||||
|
||||
json_ar.finishNode();
|
||||
}
|
||||
|
||||
json_ar.finishNode();
|
||||
}
|
||||
|
||||
json_ar(cereal::make_nvp("agents", agent_data));
|
||||
json_ar(cereal::make_nvp("counters", counter_data));
|
||||
|
||||
{
|
||||
auto callback_name_info = get_callback_id_names();
|
||||
auto buffer_name_info = get_buffer_id_names();
|
||||
auto counter_dims = get_tool_counter_dimension_info();
|
||||
auto marker_msg_data = get_callback_roctx_msg();
|
||||
|
||||
json_ar.setNextName("strings");
|
||||
json_ar.startNode();
|
||||
json_ar(cereal::make_nvp("callback_records", callback_name_info));
|
||||
json_ar(cereal::make_nvp("buffer_records", buffer_name_info));
|
||||
json_ar(cereal::make_nvp("marker_api", marker_msg_data));
|
||||
|
||||
{
|
||||
auto _extern_corr_id_strings = std::map<size_t, std::string>{};
|
||||
if(tool::get_config().kernel_rename)
|
||||
{
|
||||
for(auto itr : *kernel_dispatch_deque)
|
||||
{
|
||||
auto _value = itr.correlation_id.external.value;
|
||||
if(_value > 0)
|
||||
{
|
||||
const auto* _str = common::get_string_entry(_value);
|
||||
if(_str) _extern_corr_id_strings.emplace(_value, *_str);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
json_ar.setNextName("correlation_id");
|
||||
json_ar.startNode();
|
||||
json_ar(cereal::make_nvp("external", _extern_corr_id_strings));
|
||||
json_ar.finishNode();
|
||||
}
|
||||
|
||||
{
|
||||
json_ar.setNextName("counters");
|
||||
json_ar.startNode();
|
||||
json_ar(cereal::make_nvp("dimension_ids", counter_dims));
|
||||
json_ar.finishNode();
|
||||
}
|
||||
|
||||
json_ar.finishNode();
|
||||
}
|
||||
|
||||
{
|
||||
auto kern_sym_data = get_kernel_symbol_data();
|
||||
auto code_obj_data = get_code_object_data();
|
||||
|
||||
json_ar(cereal::make_nvp("code_objects", code_obj_data));
|
||||
json_ar(cereal::make_nvp("kernel_symbols", kern_sym_data));
|
||||
}
|
||||
|
||||
{
|
||||
json_ar.setNextName("callback_records");
|
||||
json_ar.startNode();
|
||||
json_ar(cereal::make_nvp("counter_collection", *counter_collection_deque));
|
||||
json_ar.finishNode();
|
||||
}
|
||||
|
||||
{
|
||||
json_ar.setNextName("buffer_records");
|
||||
json_ar.startNode();
|
||||
json_ar(cereal::make_nvp("kernel_dispatch", *kernel_dispatch_deque));
|
||||
json_ar(cereal::make_nvp("hip_api", *hip_api_deque));
|
||||
json_ar(cereal::make_nvp("hsa_api", *hsa_api_deque));
|
||||
json_ar(cereal::make_nvp("marker_api", *marker_api_deque));
|
||||
json_ar(cereal::make_nvp("rccl_api", *rccl_api_deque));
|
||||
json_ar(cereal::make_nvp("memory_copy", *memory_copy_deque));
|
||||
json_ar(cereal::make_nvp("scratch_memory", *scratch_memory_deque));
|
||||
json_ar.finishNode();
|
||||
}
|
||||
|
||||
json_ar.finishNode(); // end array
|
||||
json_ar.finishNode();
|
||||
}
|
||||
|
||||
ofs.close();
|
||||
}
|
||||
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
@@ -1,593 +0,0 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include "generatePerfetto.hpp"
|
||||
#include "config.hpp"
|
||||
#include "helper.hpp"
|
||||
#include "output_file.hpp"
|
||||
|
||||
#include "lib/common/utility.hpp"
|
||||
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
#include <rocprofiler-sdk/marker/api_id.h>
|
||||
#include <atomic>
|
||||
#include <future>
|
||||
#include <rocprofiler-sdk/cxx/hash.hpp>
|
||||
#include <rocprofiler-sdk/cxx/operators.hpp>
|
||||
#include <rocprofiler-sdk/cxx/perfetto.hpp>
|
||||
|
||||
#include <map>
|
||||
#include <thread>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace tool
|
||||
{
|
||||
namespace
|
||||
{
|
||||
auto main_tid = common::get_tid();
|
||||
|
||||
template <typename Tp>
|
||||
size_t
|
||||
get_hash_id(Tp&& _val)
|
||||
{
|
||||
if constexpr(!std::is_pointer<Tp>::value)
|
||||
return std::hash<Tp>{}(std::forward<Tp>(_val));
|
||||
else if constexpr(std::is_same<Tp, const char*>::value)
|
||||
return get_hash_id(std::string_view{_val});
|
||||
else
|
||||
return get_hash_id(*_val);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void
|
||||
write_perfetto(
|
||||
tool_table* tool_functions,
|
||||
uint64_t /*pid*/,
|
||||
std::vector<rocprofiler_agent_v0_t> agent_data,
|
||||
std::deque<rocprofiler_buffer_tracing_hip_api_record_t>* hip_api_data,
|
||||
std::deque<rocprofiler_buffer_tracing_hsa_api_record_t>* hsa_api_data,
|
||||
std::deque<rocprofiler_buffer_tracing_kernel_dispatch_record_t>* kernel_dispatch_data,
|
||||
std::deque<rocprofiler_buffer_tracing_memory_copy_record_t>* memory_copy_data,
|
||||
std::deque<rocprofiler_buffer_tracing_marker_api_record_t>* marker_api_data,
|
||||
std::deque<rocprofiler_buffer_tracing_scratch_memory_record_t>* /*scratch_memory_data*/,
|
||||
std::deque<rocprofiler_buffer_tracing_rccl_api_record_t>* rccl_api_data)
|
||||
{
|
||||
namespace sdk = ::rocprofiler::sdk;
|
||||
|
||||
auto agents_map = std::unordered_map<rocprofiler_agent_id_t, rocprofiler_agent_t>{};
|
||||
for(auto itr : agent_data)
|
||||
agents_map.emplace(itr.id, itr);
|
||||
|
||||
auto args = ::perfetto::TracingInitArgs{};
|
||||
auto track_event_cfg = ::perfetto::protos::gen::TrackEventConfig{};
|
||||
auto cfg = ::perfetto::TraceConfig{};
|
||||
|
||||
// environment settings
|
||||
auto shmem_size_hint = get_config().perfetto_shmem_size_hint;
|
||||
auto buffer_size_kb = get_config().perfetto_buffer_size;
|
||||
|
||||
auto* buffer_config = cfg.add_buffers();
|
||||
buffer_config->set_size_kb(buffer_size_kb);
|
||||
|
||||
if(get_config().perfetto_buffer_fill_policy == "discard" ||
|
||||
get_config().perfetto_buffer_fill_policy.empty())
|
||||
buffer_config->set_fill_policy(
|
||||
::perfetto::protos::gen::TraceConfig_BufferConfig_FillPolicy_DISCARD);
|
||||
else if(get_config().perfetto_buffer_fill_policy == "ring_buffer")
|
||||
buffer_config->set_fill_policy(
|
||||
::perfetto::protos::gen::TraceConfig_BufferConfig_FillPolicy_RING_BUFFER);
|
||||
else
|
||||
ROCP_FATAL << "Unsupport perfetto buffer fill policy: '"
|
||||
<< get_config().perfetto_buffer_fill_policy
|
||||
<< "'. Supported: discard, ring_buffer";
|
||||
|
||||
auto* ds_cfg = cfg.add_data_sources()->mutable_config();
|
||||
ds_cfg->set_name("track_event"); // this MUST be track_event
|
||||
ds_cfg->set_track_event_config_raw(track_event_cfg.SerializeAsString());
|
||||
|
||||
args.shmem_size_hint_kb = shmem_size_hint;
|
||||
|
||||
if(get_config().perfetto_backend == "inprocess" || get_config().perfetto_backend.empty())
|
||||
args.backends |= ::perfetto::kInProcessBackend;
|
||||
else if(get_config().perfetto_backend == "system")
|
||||
args.backends |= ::perfetto::kSystemBackend;
|
||||
else
|
||||
ROCP_FATAL << "Unsupport perfetto backend: '" << get_config().perfetto_backend
|
||||
<< "'. Supported: inprocess, system";
|
||||
|
||||
::perfetto::Tracing::Initialize(args);
|
||||
::perfetto::TrackEvent::Register();
|
||||
|
||||
auto tracing_session = ::perfetto::Tracing::NewTrace();
|
||||
|
||||
tracing_session->Setup(cfg);
|
||||
tracing_session->StartBlocking();
|
||||
|
||||
auto tids = std::set<rocprofiler_thread_id_t>{};
|
||||
auto demangled = std::unordered_map<std::string_view, std::string>{};
|
||||
auto agent_thread_ids = std::unordered_map<rocprofiler_agent_id_t, std::set<uint64_t>>{};
|
||||
auto agent_queue_ids =
|
||||
std::unordered_map<rocprofiler_agent_id_t, std::unordered_set<rocprofiler_queue_id_t>>{};
|
||||
auto thread_indexes = std::unordered_map<rocprofiler_thread_id_t, uint64_t>{};
|
||||
auto kernel_sym_data = get_kernel_symbol_data();
|
||||
|
||||
auto thread_tracks = std::unordered_map<rocprofiler_thread_id_t, ::perfetto::Track>{};
|
||||
auto agent_thread_tracks =
|
||||
std::unordered_map<rocprofiler_agent_id_t,
|
||||
std::unordered_map<uint64_t, ::perfetto::Track>>{};
|
||||
auto agent_queue_tracks =
|
||||
std::unordered_map<rocprofiler_agent_id_t,
|
||||
std::unordered_map<rocprofiler_queue_id_t, ::perfetto::Track>>{};
|
||||
|
||||
auto _get_agent = [&agent_data](rocprofiler_agent_id_t _id) -> const rocprofiler_agent_t* {
|
||||
for(const auto& itr : agent_data)
|
||||
{
|
||||
if(_id == itr.id) return &itr;
|
||||
}
|
||||
return CHECK_NOTNULL(nullptr);
|
||||
};
|
||||
|
||||
{
|
||||
for(auto itr : *hsa_api_data)
|
||||
tids.emplace(itr.thread_id);
|
||||
for(auto itr : *hip_api_data)
|
||||
tids.emplace(itr.thread_id);
|
||||
for(auto itr : *marker_api_data)
|
||||
tids.emplace(itr.thread_id);
|
||||
for(auto itr : *rccl_api_data)
|
||||
tids.emplace(itr.thread_id);
|
||||
|
||||
for(auto itr : *memory_copy_data)
|
||||
{
|
||||
tids.emplace(itr.thread_id);
|
||||
agent_thread_ids[itr.dst_agent_id].emplace(itr.thread_id);
|
||||
}
|
||||
|
||||
for(auto itr : *kernel_dispatch_data)
|
||||
{
|
||||
tids.emplace(itr.thread_id);
|
||||
agent_queue_ids[itr.dispatch_info.agent_id].emplace(itr.dispatch_info.queue_id);
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t nthrn = 0;
|
||||
for(auto itr : tids)
|
||||
{
|
||||
if(itr == main_tid)
|
||||
{
|
||||
thread_indexes.emplace(main_tid, 0);
|
||||
thread_tracks.emplace(main_tid, ::perfetto::ThreadTrack::Current());
|
||||
}
|
||||
else
|
||||
{
|
||||
auto _idx = ++nthrn;
|
||||
thread_indexes.emplace(itr, _idx);
|
||||
auto _track = ::perfetto::Track{itr};
|
||||
auto _desc = _track.Serialize();
|
||||
auto _namess = std::stringstream{};
|
||||
_namess << "THREAD " << _idx << " (" << itr << ")";
|
||||
_desc.set_name(_namess.str());
|
||||
perfetto::TrackEvent::SetTrackDescriptor(_track, _desc);
|
||||
|
||||
thread_tracks.emplace(itr, _track);
|
||||
}
|
||||
}
|
||||
|
||||
for(const auto& itr : agent_thread_ids)
|
||||
{
|
||||
const auto* _agent = _get_agent(itr.first);
|
||||
|
||||
for(auto titr : itr.second)
|
||||
{
|
||||
auto _namess = std::stringstream{};
|
||||
_namess << "COPY to AGENT [" << _agent->logical_node_id << "] THREAD ["
|
||||
<< thread_indexes.at(titr) << "] ";
|
||||
|
||||
if(_agent->type == ROCPROFILER_AGENT_TYPE_CPU)
|
||||
_namess << "(CPU)";
|
||||
else if(_agent->type == ROCPROFILER_AGENT_TYPE_GPU)
|
||||
_namess << "(GPU)";
|
||||
else
|
||||
_namess << "(UNK)";
|
||||
|
||||
auto _track = ::perfetto::Track{get_hash_id(_namess.str())};
|
||||
auto _desc = _track.Serialize();
|
||||
_desc.set_name(_namess.str());
|
||||
|
||||
perfetto::TrackEvent::SetTrackDescriptor(_track, _desc);
|
||||
|
||||
agent_thread_tracks[itr.first].emplace(titr, _track);
|
||||
}
|
||||
}
|
||||
|
||||
for(const auto& aitr : agent_queue_ids)
|
||||
{
|
||||
uint32_t nqueue = 0;
|
||||
for(auto qitr : aitr.second)
|
||||
{
|
||||
const auto* _agent = _get_agent(aitr.first);
|
||||
|
||||
auto _namess = std::stringstream{};
|
||||
_namess << "COMPUTE AGENT [" << _agent->logical_node_id << "] QUEUE [" << nqueue++
|
||||
<< "] ";
|
||||
|
||||
if(_agent->type == ROCPROFILER_AGENT_TYPE_CPU)
|
||||
_namess << "(CPU)";
|
||||
else if(_agent->type == ROCPROFILER_AGENT_TYPE_GPU)
|
||||
_namess << "(GPU)";
|
||||
else
|
||||
_namess << "(UNK)";
|
||||
|
||||
auto _track = ::perfetto::Track{get_hash_id(_namess.str())};
|
||||
auto _desc = _track.Serialize();
|
||||
_desc.set_name(_namess.str());
|
||||
|
||||
perfetto::TrackEvent::SetTrackDescriptor(_track, _desc);
|
||||
|
||||
agent_queue_tracks[aitr.first].emplace(qitr, _track);
|
||||
}
|
||||
}
|
||||
|
||||
// trace events
|
||||
{
|
||||
auto buffer_names = sdk::get_buffer_tracing_names();
|
||||
auto callbk_name_info = sdk::get_callback_tracing_names();
|
||||
|
||||
for(auto itr : *hsa_api_data)
|
||||
{
|
||||
auto name = buffer_names.at(itr.kind, itr.operation);
|
||||
auto& track = thread_tracks.at(itr.thread_id);
|
||||
|
||||
TRACE_EVENT_BEGIN(sdk::perfetto_category<sdk::category::hsa_api>::name,
|
||||
::perfetto::StaticString(name.data()),
|
||||
track,
|
||||
itr.start_timestamp,
|
||||
::perfetto::Flow::ProcessScoped(itr.correlation_id.internal),
|
||||
"begin_ns",
|
||||
itr.start_timestamp,
|
||||
"end_ns",
|
||||
itr.end_timestamp,
|
||||
"delta_ns",
|
||||
(itr.end_timestamp - itr.start_timestamp),
|
||||
"tid",
|
||||
itr.thread_id,
|
||||
"kind",
|
||||
itr.kind,
|
||||
"operation",
|
||||
itr.operation,
|
||||
"corr_id",
|
||||
itr.correlation_id.internal);
|
||||
TRACE_EVENT_END(
|
||||
sdk::perfetto_category<sdk::category::hsa_api>::name, track, itr.end_timestamp);
|
||||
tracing_session->FlushBlocking();
|
||||
}
|
||||
|
||||
for(auto itr : *hip_api_data)
|
||||
{
|
||||
auto name = buffer_names.at(itr.kind, itr.operation);
|
||||
auto& track = thread_tracks.at(itr.thread_id);
|
||||
|
||||
TRACE_EVENT_BEGIN(sdk::perfetto_category<sdk::category::hip_api>::name,
|
||||
::perfetto::StaticString(name.data()),
|
||||
track,
|
||||
itr.start_timestamp,
|
||||
::perfetto::Flow::ProcessScoped(itr.correlation_id.internal),
|
||||
"begin_ns",
|
||||
itr.start_timestamp,
|
||||
"end_ns",
|
||||
itr.end_timestamp,
|
||||
"delta_ns",
|
||||
(itr.end_timestamp - itr.start_timestamp),
|
||||
"tid",
|
||||
itr.thread_id,
|
||||
"kind",
|
||||
itr.kind,
|
||||
"operation",
|
||||
itr.operation,
|
||||
"corr_id",
|
||||
itr.correlation_id.internal);
|
||||
TRACE_EVENT_END(
|
||||
sdk::perfetto_category<sdk::category::hip_api>::name, track, itr.end_timestamp);
|
||||
tracing_session->FlushBlocking();
|
||||
}
|
||||
|
||||
for(auto itr : *marker_api_data)
|
||||
{
|
||||
auto& track = thread_tracks.at(itr.thread_id);
|
||||
auto name = (itr.kind == ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API &&
|
||||
itr.operation != ROCPROFILER_MARKER_CORE_API_ID_roctxGetThreadId)
|
||||
? tool_functions->tool_get_roctx_msg_fn(itr.correlation_id.internal)
|
||||
: buffer_names.at(itr.kind, itr.operation);
|
||||
|
||||
TRACE_EVENT_BEGIN(sdk::perfetto_category<sdk::category::marker_api>::name,
|
||||
::perfetto::StaticString(name.data()),
|
||||
track,
|
||||
itr.start_timestamp,
|
||||
::perfetto::Flow::ProcessScoped(itr.correlation_id.internal),
|
||||
"begin_ns",
|
||||
itr.start_timestamp,
|
||||
"end_ns",
|
||||
itr.end_timestamp,
|
||||
"delta_ns",
|
||||
(itr.end_timestamp - itr.start_timestamp),
|
||||
"tid",
|
||||
itr.thread_id,
|
||||
"kind",
|
||||
itr.kind,
|
||||
"operation",
|
||||
itr.operation,
|
||||
"corr_id",
|
||||
itr.correlation_id.internal);
|
||||
TRACE_EVENT_END(
|
||||
sdk::perfetto_category<sdk::category::marker_api>::name, track, itr.end_timestamp);
|
||||
tracing_session->FlushBlocking();
|
||||
}
|
||||
|
||||
for(auto itr : *rccl_api_data)
|
||||
{
|
||||
auto name = buffer_names.at(itr.kind, itr.operation);
|
||||
auto& track = thread_tracks.at(itr.thread_id);
|
||||
|
||||
TRACE_EVENT_BEGIN(sdk::perfetto_category<sdk::category::rccl_api>::name,
|
||||
::perfetto::StaticString(name.data()),
|
||||
track,
|
||||
itr.start_timestamp,
|
||||
::perfetto::Flow::ProcessScoped(itr.correlation_id.internal),
|
||||
"begin_ns",
|
||||
itr.start_timestamp,
|
||||
"end_ns",
|
||||
itr.end_timestamp,
|
||||
"delta_ns",
|
||||
(itr.end_timestamp - itr.start_timestamp),
|
||||
"tid",
|
||||
itr.thread_id,
|
||||
"kind",
|
||||
itr.kind,
|
||||
"operation",
|
||||
itr.operation,
|
||||
"corr_id",
|
||||
itr.correlation_id.internal);
|
||||
TRACE_EVENT_END(
|
||||
sdk::perfetto_category<sdk::category::rccl_api>::name, track, itr.end_timestamp);
|
||||
tracing_session->FlushBlocking();
|
||||
}
|
||||
|
||||
for(auto itr : *memory_copy_data)
|
||||
{
|
||||
auto name = buffer_names.at(itr.kind, itr.operation);
|
||||
auto& track = agent_thread_tracks.at(itr.dst_agent_id).at(itr.thread_id);
|
||||
|
||||
TRACE_EVENT_BEGIN(sdk::perfetto_category<sdk::category::memory_copy>::name,
|
||||
::perfetto::StaticString(name.data()),
|
||||
track,
|
||||
itr.start_timestamp,
|
||||
::perfetto::Flow::ProcessScoped(itr.correlation_id.internal),
|
||||
"begin_ns",
|
||||
itr.start_timestamp,
|
||||
"end_ns",
|
||||
itr.end_timestamp,
|
||||
"delta_ns",
|
||||
(itr.end_timestamp - itr.start_timestamp),
|
||||
"kind",
|
||||
itr.kind,
|
||||
"operation",
|
||||
itr.operation,
|
||||
"src_agent",
|
||||
agents_map.at(itr.src_agent_id).logical_node_id,
|
||||
"dst_agent",
|
||||
agents_map.at(itr.dst_agent_id).logical_node_id,
|
||||
"copy_bytes",
|
||||
itr.bytes,
|
||||
"corr_id",
|
||||
itr.correlation_id.internal,
|
||||
"tid",
|
||||
itr.thread_id);
|
||||
TRACE_EVENT_END(
|
||||
sdk::perfetto_category<sdk::category::memory_copy>::name, track, itr.end_timestamp);
|
||||
tracing_session->FlushBlocking();
|
||||
}
|
||||
|
||||
for(auto itr : *kernel_dispatch_data)
|
||||
{
|
||||
const auto& info = itr.dispatch_info;
|
||||
const kernel_symbol_data* sym = nullptr;
|
||||
for(const auto& kitr : kernel_sym_data)
|
||||
{
|
||||
if(kitr.kernel_id == info.kernel_id)
|
||||
{
|
||||
sym = &kitr;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
CHECK(sym != nullptr);
|
||||
|
||||
auto name = std::string_view{sym->kernel_name};
|
||||
auto& track = agent_queue_tracks.at(info.agent_id).at(info.queue_id);
|
||||
|
||||
if(demangled.find(name) == demangled.end())
|
||||
{
|
||||
demangled.emplace(name, common::cxx_demangle(name));
|
||||
}
|
||||
|
||||
TRACE_EVENT_BEGIN(sdk::perfetto_category<sdk::category::kernel_dispatch>::name,
|
||||
::perfetto::StaticString(demangled.at(name).c_str()),
|
||||
track,
|
||||
itr.start_timestamp,
|
||||
::perfetto::Flow::ProcessScoped(itr.correlation_id.internal),
|
||||
"begin_ns",
|
||||
itr.start_timestamp,
|
||||
"end_ns",
|
||||
itr.end_timestamp,
|
||||
"delta_ns",
|
||||
(itr.end_timestamp - itr.start_timestamp),
|
||||
"kind",
|
||||
itr.kind,
|
||||
"agent",
|
||||
agents_map.at(info.agent_id).logical_node_id,
|
||||
"corr_id",
|
||||
itr.correlation_id.internal,
|
||||
"queue",
|
||||
info.queue_id.handle,
|
||||
"tid",
|
||||
itr.thread_id,
|
||||
"kernel_id",
|
||||
info.kernel_id,
|
||||
"private_segment_size",
|
||||
info.private_segment_size,
|
||||
"group_segment_size",
|
||||
info.group_segment_size,
|
||||
"workgroup_size",
|
||||
info.workgroup_size.x * info.workgroup_size.y * info.workgroup_size.z,
|
||||
"grid_size",
|
||||
info.grid_size.x * info.grid_size.y * info.grid_size.z);
|
||||
TRACE_EVENT_END(sdk::perfetto_category<sdk::category::kernel_dispatch>::name,
|
||||
track,
|
||||
itr.end_timestamp);
|
||||
tracing_session->FlushBlocking();
|
||||
}
|
||||
}
|
||||
|
||||
// counter tracks
|
||||
{
|
||||
// memory copy counter track
|
||||
auto mem_cpy_endpoints = std::map<rocprofiler_agent_id_t, std::map<uint64_t, uint64_t>>{};
|
||||
auto mem_cpy_extremes = std::pair<uint64_t, uint64_t>{};
|
||||
for(auto itr : *memory_copy_data)
|
||||
{
|
||||
uint64_t _mean_timestamp =
|
||||
itr.start_timestamp + (0.5 * (itr.end_timestamp - itr.start_timestamp));
|
||||
|
||||
mem_cpy_endpoints[itr.dst_agent_id].emplace(itr.start_timestamp - 1000, 0);
|
||||
mem_cpy_endpoints[itr.dst_agent_id].emplace(itr.start_timestamp, 0);
|
||||
mem_cpy_endpoints[itr.dst_agent_id].emplace(_mean_timestamp, 0);
|
||||
mem_cpy_endpoints[itr.dst_agent_id].emplace(itr.end_timestamp, 0);
|
||||
mem_cpy_endpoints[itr.dst_agent_id].emplace(itr.end_timestamp + 1000, 0);
|
||||
|
||||
mem_cpy_extremes = std::make_pair(std::min(mem_cpy_extremes.first, itr.start_timestamp),
|
||||
std::max(mem_cpy_extremes.second, itr.end_timestamp));
|
||||
}
|
||||
|
||||
for(auto itr : *memory_copy_data)
|
||||
{
|
||||
auto mbeg = mem_cpy_endpoints.at(itr.dst_agent_id).lower_bound(itr.start_timestamp);
|
||||
auto mend = mem_cpy_endpoints.at(itr.dst_agent_id).upper_bound(itr.end_timestamp);
|
||||
|
||||
LOG_IF(FATAL, mbeg == mend) << "Missing range for timestamp [" << itr.start_timestamp
|
||||
<< ", " << itr.end_timestamp << "]";
|
||||
|
||||
for(auto mitr = mbeg; mitr != mend; ++mitr)
|
||||
mitr->second += itr.bytes;
|
||||
}
|
||||
|
||||
constexpr auto bytes_multiplier = 1024;
|
||||
|
||||
auto mem_cpy_tracks =
|
||||
std::unordered_map<rocprofiler_agent_id_t, ::perfetto::CounterTrack>{};
|
||||
auto mem_cpy_cnt_names = std::vector<std::string>{};
|
||||
mem_cpy_cnt_names.reserve(mem_cpy_endpoints.size());
|
||||
for(auto& mitr : mem_cpy_endpoints)
|
||||
{
|
||||
mem_cpy_endpoints[mitr.first].emplace(mem_cpy_extremes.first - 5000, 0);
|
||||
mem_cpy_endpoints[mitr.first].emplace(mem_cpy_extremes.second + 5000, 0);
|
||||
|
||||
auto _track_name = std::stringstream{};
|
||||
const auto* _agent = _get_agent(mitr.first);
|
||||
|
||||
if(_agent->type == ROCPROFILER_AGENT_TYPE_CPU)
|
||||
_track_name << "COPY BYTES to AGENT [" << _agent->logical_node_id << "] (CPU)";
|
||||
else if(_agent->type == ROCPROFILER_AGENT_TYPE_GPU)
|
||||
_track_name << "COPY BYTES to AGENT [" << _agent->logical_node_id << "] (GPU)";
|
||||
|
||||
constexpr auto _unit = ::perfetto::CounterTrack::Unit::UNIT_SIZE_BYTES;
|
||||
auto& _name = mem_cpy_cnt_names.emplace_back(_track_name.str());
|
||||
mem_cpy_tracks.emplace(mitr.first,
|
||||
::perfetto::CounterTrack{_name.c_str()}
|
||||
.set_unit(_unit)
|
||||
.set_unit_multiplier(bytes_multiplier)
|
||||
.set_is_incremental(false));
|
||||
}
|
||||
|
||||
for(auto& mitr : mem_cpy_endpoints)
|
||||
{
|
||||
for(auto itr : mitr.second)
|
||||
{
|
||||
TRACE_COUNTER(sdk::perfetto_category<sdk::category::memory_copy>::name,
|
||||
mem_cpy_tracks.at(mitr.first),
|
||||
itr.first,
|
||||
itr.second / bytes_multiplier);
|
||||
tracing_session->FlushBlocking();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
::perfetto::TrackEvent::Flush();
|
||||
tracing_session->FlushBlocking();
|
||||
tracing_session->StopBlocking();
|
||||
|
||||
auto filename = std::string{"results"};
|
||||
auto ofs = get_output_stream(filename, ".pftrace");
|
||||
|
||||
auto amount_read = std::atomic<size_t>{0};
|
||||
auto is_done = std::promise<void>{};
|
||||
auto _mtx = std::mutex{};
|
||||
auto _reader = [&ofs, &_mtx, &is_done, &amount_read](
|
||||
::perfetto::TracingSession::ReadTraceCallbackArgs _args) {
|
||||
auto _lk = std::unique_lock<std::mutex>{_mtx};
|
||||
if(_args.data && _args.size > 0)
|
||||
{
|
||||
ROCP_TRACE << "Writing " << _args.size << " B to trace...";
|
||||
// Write the trace data into file
|
||||
ofs.stream->write(_args.data, _args.size);
|
||||
amount_read += _args.size;
|
||||
}
|
||||
ROCP_INFO_IF(!_args.has_more && amount_read > 0)
|
||||
<< "Wrote " << amount_read << " B to perfetto trace file";
|
||||
if(!_args.has_more) is_done.set_value();
|
||||
};
|
||||
|
||||
for(size_t i = 0; i < 2; ++i)
|
||||
{
|
||||
ROCP_TRACE << "Reading trace...";
|
||||
amount_read = 0;
|
||||
is_done = std::promise<void>{};
|
||||
tracing_session->ReadTrace(_reader);
|
||||
is_done.get_future().wait();
|
||||
}
|
||||
|
||||
ROCP_TRACE << "Destroying tracing session...";
|
||||
tracing_session.reset();
|
||||
|
||||
ROCP_TRACE << "Flushing trace output stream...";
|
||||
(*ofs.stream) << std::flush;
|
||||
|
||||
ROCP_TRACE << "Destroying trace output stream...";
|
||||
ofs.close();
|
||||
}
|
||||
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
|
||||
PERFETTO_TRACK_EVENT_STATIC_STORAGE();
|
||||
@@ -1,67 +0,0 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "helper.hpp"
|
||||
#include "statistics.hpp"
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace tool
|
||||
{
|
||||
stats_entry_t
|
||||
generate_stats(tool_table* tool_functions,
|
||||
const std::deque<rocprofiler_buffer_tracing_kernel_dispatch_record_t>& data);
|
||||
|
||||
stats_entry_t
|
||||
generate_stats(tool_table* tool_functions,
|
||||
const std::deque<rocprofiler_buffer_tracing_hip_api_record_t>& data);
|
||||
|
||||
stats_entry_t
|
||||
generate_stats(tool_table* tool_functions,
|
||||
const std::deque<rocprofiler_buffer_tracing_hsa_api_record_t>& data);
|
||||
|
||||
stats_entry_t
|
||||
generate_stats(tool_table* tool_functions,
|
||||
const std::deque<rocprofiler_buffer_tracing_memory_copy_record_t>& data);
|
||||
|
||||
stats_entry_t
|
||||
generate_stats(tool_table* tool_functions,
|
||||
const std::deque<rocprofiler_buffer_tracing_marker_api_record_t>& data);
|
||||
|
||||
stats_entry_t
|
||||
generate_stats(tool_table* tool_functions,
|
||||
const std::deque<rocprofiler_tool_counter_collection_record_t>& data);
|
||||
|
||||
stats_entry_t
|
||||
generate_stats(tool_table* tool_functions,
|
||||
const std::deque<rocprofiler_buffer_tracing_scratch_memory_record_t>& data);
|
||||
|
||||
stats_entry_t
|
||||
generate_stats(tool_table* tool_functions,
|
||||
const std::deque<rocprofiler_buffer_tracing_rccl_api_record_t>& data);
|
||||
|
||||
void
|
||||
generate_stats(tool_table* tool_functions, const domain_stats_vec_t& data);
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
@@ -22,20 +22,22 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "domain_type.hpp"
|
||||
#include "config.hpp"
|
||||
|
||||
#include "lib/common/container/ring_buffer.hpp"
|
||||
#include "lib/common/container/small_vector.hpp"
|
||||
#include "lib/common/defines.hpp"
|
||||
#include "lib/common/demangle.hpp"
|
||||
#include "lib/common/filesystem.hpp"
|
||||
#include "output_file.hpp"
|
||||
#include "lib/output/domain_type.hpp"
|
||||
#include "lib/output/metadata.hpp"
|
||||
#include "lib/output/output_stream.hpp"
|
||||
|
||||
#include <rocprofiler-sdk/agent.h>
|
||||
#include <rocprofiler-sdk/callback_tracing.h>
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
#include <rocprofiler-sdk/registration.h>
|
||||
#include <rocprofiler-sdk/rocprofiler.h>
|
||||
#include <cstdint>
|
||||
#include <rocprofiler-sdk/cxx/name_info.hpp>
|
||||
#include <rocprofiler-sdk/cxx/serialization.hpp>
|
||||
|
||||
@@ -53,6 +55,7 @@
|
||||
#include <sys/syscall.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
#include <cstdint>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
@@ -80,96 +83,10 @@
|
||||
constexpr size_t BUFFER_SIZE_BYTES = 4096;
|
||||
constexpr size_t WATERMARK = (BUFFER_SIZE_BYTES / 2);
|
||||
|
||||
using rocprofiler_tool_buffer_kind_names_t =
|
||||
std::unordered_map<rocprofiler_buffer_tracing_kind_t, std::string>;
|
||||
using rocprofiler_tool_buffer_kind_operation_names_t =
|
||||
std::unordered_map<rocprofiler_buffer_tracing_kind_t,
|
||||
std::unordered_map<uint32_t, std::string>>;
|
||||
|
||||
using marker_message_map_t = std::unordered_map<uint64_t, std::string>;
|
||||
using rocprofiler_kernel_symbol_data_t =
|
||||
rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t;
|
||||
|
||||
namespace common = ::rocprofiler::common;
|
||||
namespace tool = ::rocprofiler::tool;
|
||||
|
||||
struct kernel_symbol_data : rocprofiler_kernel_symbol_data_t
|
||||
{
|
||||
using base_type = rocprofiler_kernel_symbol_data_t;
|
||||
|
||||
kernel_symbol_data(const base_type& _base)
|
||||
: base_type{_base}
|
||||
, formatted_kernel_name{tool::format_name(CHECK_NOTNULL(_base.kernel_name))}
|
||||
, demangled_kernel_name{common::cxx_demangle(CHECK_NOTNULL(_base.kernel_name))}
|
||||
, truncated_kernel_name{common::truncate_name(demangled_kernel_name)}
|
||||
{}
|
||||
|
||||
kernel_symbol_data();
|
||||
~kernel_symbol_data() = default;
|
||||
kernel_symbol_data(const kernel_symbol_data&) = default;
|
||||
kernel_symbol_data(kernel_symbol_data&&) noexcept = default;
|
||||
kernel_symbol_data& operator=(const kernel_symbol_data&) = default;
|
||||
kernel_symbol_data& operator=(kernel_symbol_data&&) noexcept = default;
|
||||
|
||||
std::string formatted_kernel_name = {};
|
||||
std::string demangled_kernel_name = {};
|
||||
std::string truncated_kernel_name = {};
|
||||
};
|
||||
|
||||
inline kernel_symbol_data::kernel_symbol_data()
|
||||
: base_type{0, 0, 0, "", 0, 0, 0, 0, 0, 0, 0, 0}
|
||||
{}
|
||||
|
||||
using kernel_symbol_data_map_t = std::unordered_map<rocprofiler_kernel_id_t, kernel_symbol_data>;
|
||||
|
||||
struct rocprofiler_tool_counter_info_t : rocprofiler_counter_info_v0_t
|
||||
{
|
||||
using parent_type = rocprofiler_counter_info_v0_t;
|
||||
using dimension_id_vec_t = std::vector<rocprofiler_counter_dimension_id_t>;
|
||||
using dimension_info_vec_t = std::vector<rocprofiler_record_dimension_info_t>;
|
||||
|
||||
rocprofiler_tool_counter_info_t(rocprofiler_agent_id_t _agent_id,
|
||||
parent_type _info,
|
||||
dimension_id_vec_t&& _dim_ids,
|
||||
dimension_info_vec_t&& _dim_info)
|
||||
: parent_type{_info}
|
||||
, agent_id{_agent_id}
|
||||
, dimension_ids{std::move(_dim_ids)}
|
||||
, dimension_info{std::move(_dim_info)}
|
||||
{}
|
||||
|
||||
~rocprofiler_tool_counter_info_t() = default;
|
||||
rocprofiler_tool_counter_info_t(const rocprofiler_tool_counter_info_t&) = default;
|
||||
rocprofiler_tool_counter_info_t(rocprofiler_tool_counter_info_t&&) noexcept = default;
|
||||
rocprofiler_tool_counter_info_t& operator=(const rocprofiler_tool_counter_info_t&) = default;
|
||||
rocprofiler_tool_counter_info_t& operator=(rocprofiler_tool_counter_info_t&&) noexcept =
|
||||
default;
|
||||
|
||||
rocprofiler_agent_id_t agent_id = {};
|
||||
std::vector<rocprofiler_counter_dimension_id_t> dimension_ids = {};
|
||||
std::vector<rocprofiler_record_dimension_info_t> dimension_info = {};
|
||||
};
|
||||
|
||||
rocprofiler::sdk::buffer_name_info_t<std::string_view>
|
||||
get_buffer_id_names();
|
||||
|
||||
::rocprofiler::sdk::callback_name_info_t<std::string_view>
|
||||
get_callback_id_names();
|
||||
|
||||
std::map<uint64_t, std::string>
|
||||
get_callback_roctx_msg();
|
||||
|
||||
std::vector<kernel_symbol_data>
|
||||
get_kernel_symbol_data();
|
||||
|
||||
std::vector<rocprofiler_callback_tracing_code_object_load_data_t>
|
||||
get_code_object_data();
|
||||
|
||||
std::vector<rocprofiler_tool_counter_info_t>
|
||||
get_tool_counter_info();
|
||||
|
||||
std::vector<rocprofiler_record_dimension_info_t>
|
||||
get_tool_counter_dimension_info();
|
||||
using marker_message_map_t = std::unordered_map<uint64_t, std::string>;
|
||||
using tool_counter_info = ::rocprofiler::tool::tool_counter_info;
|
||||
using kernel_symbol_info = ::rocprofiler::tool::kernel_symbol_info;
|
||||
using rocprofiler_kernel_symbol_info_t = ::rocprofiler::tool::rocprofiler_kernel_symbol_info_t;
|
||||
|
||||
enum tracing_marker_kind
|
||||
{
|
||||
@@ -237,167 +154,3 @@ convert_marker_tracing_kind(TracingKindT val)
|
||||
{
|
||||
return convert_marker_tracing_kind(val, std::make_index_sequence<MARKER_API_LAST>{});
|
||||
}
|
||||
|
||||
struct rocprofiler_tool_dimension_pos_t
|
||||
{
|
||||
uint64_t dimension_id;
|
||||
size_t instance;
|
||||
|
||||
template <typename ArchiveT>
|
||||
void save(ArchiveT& ar) const
|
||||
{
|
||||
ar(cereal::make_nvp("dimension_id", dimension_id));
|
||||
ar(cereal::make_nvp("instance", instance));
|
||||
}
|
||||
};
|
||||
|
||||
struct rocprofiler_tool_record_counter_t
|
||||
{
|
||||
rocprofiler_counter_id_t counter_id = {};
|
||||
rocprofiler_record_counter_t record_counter = {};
|
||||
|
||||
template <typename ArchiveT>
|
||||
void save(ArchiveT& ar) const
|
||||
{
|
||||
ar(cereal::make_nvp("counter_id", counter_id));
|
||||
ar(cereal::make_nvp("value", record_counter.counter_value));
|
||||
}
|
||||
};
|
||||
|
||||
struct rocprofiler_tool_counter_collection_record_t
|
||||
{
|
||||
rocprofiler_dispatch_counting_service_data_t dispatch_data = {};
|
||||
std::array<rocprofiler_tool_record_counter_t, 512> records = {};
|
||||
uint64_t thread_id = 0;
|
||||
uint64_t arch_vgpr_count = 0;
|
||||
uint64_t sgpr_count = 0;
|
||||
uint64_t lds_block_size_v = 0;
|
||||
uint64_t counter_count = 0;
|
||||
|
||||
template <typename ArchiveT>
|
||||
void save(ArchiveT& ar) const
|
||||
{
|
||||
ar(cereal::make_nvp("dispatch_data", dispatch_data));
|
||||
// should be removed when moving to buffered tracing
|
||||
std::vector<rocprofiler_tool_record_counter_t> tmp{records.begin(),
|
||||
records.begin() + counter_count};
|
||||
ar(cereal::make_nvp("records", tmp));
|
||||
ar(cereal::make_nvp("thread_id", thread_id));
|
||||
ar(cereal::make_nvp("arch_vgpr_count", arch_vgpr_count));
|
||||
ar(cereal::make_nvp("sgpr_count", sgpr_count));
|
||||
ar(cereal::make_nvp("lds_block_size_v", lds_block_size_v));
|
||||
}
|
||||
};
|
||||
|
||||
struct timestamps_t
|
||||
{
|
||||
rocprofiler_timestamp_t app_start_time;
|
||||
rocprofiler_timestamp_t app_end_time;
|
||||
};
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace tool
|
||||
{
|
||||
template <typename Tp, domain_type DomainT>
|
||||
struct buffered_output;
|
||||
}
|
||||
} // namespace rocprofiler
|
||||
|
||||
using hip_buffered_output_t =
|
||||
::rocprofiler::tool::buffered_output<rocprofiler_buffer_tracing_hip_api_record_t,
|
||||
domain_type::HIP>;
|
||||
using hsa_buffered_output_t =
|
||||
::rocprofiler::tool::buffered_output<rocprofiler_buffer_tracing_hsa_api_record_t,
|
||||
domain_type::HSA>;
|
||||
using kernel_dispatch_buffered_output_t =
|
||||
::rocprofiler::tool::buffered_output<rocprofiler_buffer_tracing_kernel_dispatch_record_t,
|
||||
domain_type::KERNEL_DISPATCH>;
|
||||
using memory_copy_buffered_output_t =
|
||||
::rocprofiler::tool::buffered_output<rocprofiler_buffer_tracing_memory_copy_record_t,
|
||||
domain_type::MEMORY_COPY>;
|
||||
using marker_buffered_output_t =
|
||||
::rocprofiler::tool::buffered_output<rocprofiler_buffer_tracing_marker_api_record_t,
|
||||
domain_type::MARKER>;
|
||||
using rccl_buffered_output_t =
|
||||
::rocprofiler::tool::buffered_output<rocprofiler_buffer_tracing_rccl_api_record_t,
|
||||
domain_type::RCCL>;
|
||||
using counter_collection_buffered_output_t =
|
||||
::rocprofiler::tool::buffered_output<rocprofiler_tool_counter_collection_record_t,
|
||||
domain_type::COUNTER_COLLECTION>;
|
||||
using scratch_memory_buffered_output_t =
|
||||
::rocprofiler::tool::buffered_output<rocprofiler_buffer_tracing_scratch_memory_record_t,
|
||||
domain_type::SCRATCH_MEMORY>;
|
||||
|
||||
using tool_get_agent_node_id_fn_t = uint64_t (*)(rocprofiler_agent_id_t);
|
||||
using tool_get_app_timestamps_fn_t = timestamps_t* (*) ();
|
||||
using tool_get_kernel_name_fn_t = std::string_view (*)(uint64_t, uint64_t);
|
||||
using tool_get_domain_name_fn_t = std::string_view (*)(rocprofiler_buffer_tracing_kind_t);
|
||||
using tool_get_operation_name_fn_t = std::string_view (*)(rocprofiler_buffer_tracing_kind_t,
|
||||
rocprofiler_tracing_operation_t);
|
||||
using tool_get_callback_kind_name_fn_t = std::string_view (*)(rocprofiler_callback_tracing_kind_t);
|
||||
using tool_get_callback_op_name_fn_t = std::string_view (*)(rocprofiler_callback_tracing_kind_t,
|
||||
uint32_t);
|
||||
using tool_get_roctx_msg_fn_t = std::string_view (*)(uint64_t);
|
||||
using tool_get_counter_info_name_fn_t = std::string (*)(uint64_t);
|
||||
|
||||
struct tool_table
|
||||
{
|
||||
// node id
|
||||
tool_get_agent_node_id_fn_t tool_get_agent_node_id_fn = nullptr;
|
||||
// timestamps
|
||||
tool_get_app_timestamps_fn_t tool_get_app_timestamps_fn = nullptr;
|
||||
// names and messages
|
||||
tool_get_kernel_name_fn_t tool_get_kernel_name_fn = nullptr;
|
||||
tool_get_domain_name_fn_t tool_get_domain_name_fn = nullptr;
|
||||
tool_get_operation_name_fn_t tool_get_operation_name_fn = nullptr;
|
||||
tool_get_counter_info_name_fn_t tool_get_counter_info_name_fn = nullptr;
|
||||
tool_get_callback_kind_name_fn_t tool_get_callback_kind_fn = nullptr;
|
||||
tool_get_callback_op_name_fn_t tool_get_callback_op_name_fn = nullptr;
|
||||
tool_get_roctx_msg_fn_t tool_get_roctx_msg_fn = nullptr;
|
||||
};
|
||||
|
||||
/// converts a container of ring buffers of element Tp into a single container of elements
|
||||
template <typename Tp, template <typename, typename...> class ContainerT, typename... ParamsT>
|
||||
ContainerT<Tp>
|
||||
get_buffer_elements(ContainerT<rocprofiler::common::container::ring_buffer<Tp>, ParamsT...>&& data)
|
||||
{
|
||||
auto ret = ContainerT<Tp>{};
|
||||
for(auto& buf : data)
|
||||
{
|
||||
Tp* record = nullptr;
|
||||
do
|
||||
{
|
||||
record = buf.retrieve();
|
||||
if(record) ret.emplace_back(*record);
|
||||
} while(record != nullptr);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
namespace cereal
|
||||
{
|
||||
#define SAVE_DATA_FIELD(FIELD) ar(make_nvp(#FIELD, data.FIELD))
|
||||
|
||||
template <typename ArchiveT>
|
||||
void
|
||||
save(ArchiveT& ar, const kernel_symbol_data& data)
|
||||
{
|
||||
cereal::save(ar, static_cast<const rocprofiler_kernel_symbol_data_t&>(data));
|
||||
SAVE_DATA_FIELD(formatted_kernel_name);
|
||||
SAVE_DATA_FIELD(demangled_kernel_name);
|
||||
SAVE_DATA_FIELD(truncated_kernel_name);
|
||||
}
|
||||
|
||||
template <typename ArchiveT>
|
||||
void
|
||||
save(ArchiveT& ar, const rocprofiler_tool_counter_info_t& data)
|
||||
{
|
||||
SAVE_DATA_FIELD(agent_id);
|
||||
cereal::save(ar, static_cast<const rocprofiler_counter_info_v0_t&>(data));
|
||||
SAVE_DATA_FIELD(dimension_ids);
|
||||
}
|
||||
|
||||
#undef SAVE_DATA_FIELD
|
||||
} // namespace cereal
|
||||
|
||||
@@ -0,0 +1,335 @@
|
||||
# rocprofv3 Multi-Node Profiling Data
|
||||
|
||||
## Overview
|
||||
|
||||
- rocprofv3 adds supports for a `--output-format rocpd` option which enables writing a SQLite database file (one per process) with the collected data
|
||||
- Use SQL schema from `rocpd` initially to support the rocpd post-processing analysis support
|
||||
- In order to visualize the data, users will convert the database(s) to their desired visualization formats
|
||||
- SQL has a relatively easy way to treat multiple separate databases as one database via views
|
||||
- rocprofv3 provides some command-line tools built on top of a python package designed for post-processing our databases
|
||||
|
||||
### Skills Required for Tasks
|
||||
|
||||
1. Rework rocprofv3 tool library output functions
|
||||
- __C++__: output functions written in C++ (`^/source/lib/rocprofiler-sdk-tool/generate*`)
|
||||
- __CMake__: move the output functions into stand-alone library
|
||||
2. Create Python package skeleton in `^/source/lib/python`
|
||||
- __Python__: organizing a Python package to be importable (`import rocpd`) and executable (i.e. `python -m rocpd --help`)
|
||||
3. Adding rocprofv3 SQLite support
|
||||
- __C++__: just a general skill requirement for working with rocprofiler-sdk
|
||||
- __CMake__: for integrating SQLite and python bindings into rocprofiler-sdk build
|
||||
- __SQL__: understanding of SQL statement meanings, knowledge of `rocpd` SQL schema
|
||||
4. Python bindings for output functions
|
||||
- __C++__: just a general skill requirement for working with rocprofiler-sdk
|
||||
- __PyBind11__: for writing Python bindings
|
||||
|
||||
#### Task #1: Rework `rocprofv3` Tool Library Output Functions
|
||||
|
||||
The problems with most of the output functions are:
|
||||
|
||||
- Problem: Access global memory via `tool_table` functions
|
||||
- Global memory access won't work well for invocation of these functions via Python bindings
|
||||
- Ideally, these functions should be written in the (pseudo-) functional programming style, i.e., function only accesses memory of arguments, communicates via return value, and avoids concepts like shared states but without restrictions such as immutable data arguments
|
||||
- Problem: Require all the profiling data to be loaded into memory
|
||||
- During runtime, rocprofv3 writes data to buffer and when buffer is full, writes the binary blob to a temporary intermediate binary file
|
||||
- During finalization, rocprofv3 reads _all_ of this data back into memory from the intermediate binary file and then writes to various output forms
|
||||
- This approach will not work when amount of collected data exceeds amount of available RAM, especially on systems with swap disabled; e.g., 1 TB of profiling data on system with 128 GB of RAM
|
||||
- We need to be able to stream data in chunks to these output functions
|
||||
- Proposed approach: function which creates a file handle, function which writes chunk of data to file (invoked multiple times), function which closes file handle
|
||||
|
||||
> Assigned: Markus, Olha, Jin, Araceli (i.e. onboarding group task) + Jonathan (CMake part)
|
||||
|
||||
##### Tasks
|
||||
|
||||
1. Move the `source/lib/rocprofiler-sdk-tool/generate*.{hpp,cpp}` functions into standalone (static) library: `source/lib/tool-data`
|
||||
- May require `source/lib/tool-common` (static) library if something is needed by both `tool-data` and `rocprofiler-sdk-tool` libraries
|
||||
- Please consult if you have any questions about where to put things and/or naming conventions
|
||||
- Pay attention to existing CMake and use similar style
|
||||
- We will link this library into `rocprofiler-sdk-tool` and link it into Python bindings library
|
||||
2. Solve global memory access problem
|
||||
- Probably need some additional data structures which represents the data currently stored/accessed from global memory which will be passed into function.
|
||||
|
||||
### Python Package for Converting Databases to Other Output Formats
|
||||
|
||||
> __Note__: We could potentially reuse `rocpd` for the the python package name since "ROCm Profiling Data" is a pretty appropriate name.
|
||||
|
||||
rocprofv3 will need to rework the output functions within the `librocprofiler-sdk-tool.so` library (underlying library used by `rocprofv3`) in order to support Python bindings.
|
||||
For example, `generateJSON(...)` currently fetches info from global memory stored during the run, we need these functions to be pure: the only memory operated on is from the function arguments.
|
||||
Furthermore, these output functions need to support partial writes: invocations with only a subset of the data so that all the data need not be loaded into memory at one time.
|
||||
|
||||
> __Example__ (workflow): get handle to output format, e.g. a Perfetto session, invoke `generatePerfetto(...)` with some of the data, repeat until all data has been passed, close handle to the output format.
|
||||
|
||||
These reworked functions should be moved to another library, e.g. `librocprofiler-sdk-tool-io.(a|so)`.
|
||||
Once the output functions are isolated and functional, we need to generate python bindings (via PyBind11) so that a python package can be built on top of them.
|
||||
Various command-line tools can be provided using `__main__.py` file(s) within our python package.
|
||||
Users can use the python package to write their own scripts.
|
||||
|
||||
> __Example__ (two databases, one Perfetto trace): `rocprofv3-merge --output-format pftrace --out mybenchmark.pftrace --in results-1000.db results-1001.db`
|
||||
|
||||
### Treating multiple SQL databases as one database
|
||||
|
||||
```python
|
||||
conn = sqlite3.connect('db1.db')
|
||||
conn.execute("ATTACH DATABASE 'db2.db' AS db2;")
|
||||
conn.execute("ATTACH DATABASE 'db3.db' AS db3;")
|
||||
|
||||
# Create a view that unifies the 'users' table from all three databases
|
||||
conn.execute("""
|
||||
CREATE VIEW all_users AS
|
||||
SELECT * FROM users
|
||||
UNION ALL
|
||||
SELECT * FROM db2.users
|
||||
UNION ALL
|
||||
SELECT * FROM db3.users;
|
||||
""")
|
||||
|
||||
# Now you can query the view as if it were a single table
|
||||
cursor = conn.execute("SELECT * FROM all_users;")
|
||||
for row in cursor:
|
||||
print(row)
|
||||
|
||||
# Close the connection
|
||||
conn.close()
|
||||
```
|
||||
|
||||
## Proposed SQL Schema
|
||||
|
||||
A more comprehensive SQL Schema is proposed below. This schema is intended to be more comprehensive with respect to the
|
||||
various types of data that profilers can collect (such as Omnitrace/RSP)
|
||||
|
||||
The schema consists of multiple interrelated tables to capture different categories of profiling data.
|
||||
Below is a high-level schema with the primary tables and relationships.
|
||||
|
||||
__*Please note, this is a very preliminary sketch of the schema*__.
|
||||
If you want to weigh in, please restrict comments to the high-level organization, comments that it doesn't contain
|
||||
fields for correlation IDs or something like that are not particularly helpful at the moment.
|
||||
|
||||
```sql
|
||||
CREATE TABLE strings (
|
||||
id SERIAL PRIMARY KEY,
|
||||
value VARCHAR(1024) UNIQUE
|
||||
);
|
||||
|
||||
CREATE TABLE process (
|
||||
id INT PRIMARY KEY,
|
||||
pid INT,
|
||||
process_name_id INT,
|
||||
executable_path_id INT,
|
||||
start_time BIGINT,
|
||||
end_time BIGINT,
|
||||
FOREIGN KEY (process_name_id) REFERENCES strings(id)
|
||||
FOREIGN KEY (executable_path_id) REFERENCES strings(id)
|
||||
);
|
||||
|
||||
CREATE TABLE thread (
|
||||
id INT PRIMARY KEY,
|
||||
tid INT,
|
||||
process_id INT,
|
||||
thread_name_id INT,
|
||||
start_time BIGINT,
|
||||
end_time BIGINT,
|
||||
FOREIGN KEY (process_id) REFERENCES process(process_id),
|
||||
FOREIGN KEY (thread_name_id) REFERENCES strings(id)
|
||||
);
|
||||
|
||||
CREATE TABLE cpu_info (
|
||||
id SERIAL PRIMARY KEY,
|
||||
core_id INT,
|
||||
socket_id INT,
|
||||
frequency_hz INT,
|
||||
model_id INT,
|
||||
cache_size_kb INT,
|
||||
FOREIGN KEY (model_id) REFERENCES strings(id)
|
||||
);
|
||||
|
||||
CREATE TABLE gpu_info (
|
||||
id SERIAL PRIMARY KEY,
|
||||
device_name_id INT,
|
||||
compute_capability_id INT,
|
||||
memory_size_mb INT,
|
||||
multiprocessor_count INT,
|
||||
clock_rate_hz INT,
|
||||
FOREIGN KEY (device_name_id) REFERENCES strings(id)
|
||||
FOREIGN KEY (compute_capability_id) REFERENCES strings(id)
|
||||
);
|
||||
|
||||
CREATE TABLE instrumentation_regions (
|
||||
id SERIAL PRIMARY KEY,
|
||||
process_id INT,
|
||||
thread_id INT,
|
||||
region_name_id INT,
|
||||
start_time BIGINT,
|
||||
end_time BIGINT,
|
||||
parent_region_id INT,
|
||||
duration_ns BIGINT GENERATED ALWAYS AS (end_time - start_time) STORED,
|
||||
file_id INT,
|
||||
line_number INT,
|
||||
additional_info JSONB,
|
||||
FOREIGN KEY (process_id) REFERENCES process(process_id),
|
||||
FOREIGN KEY (thread_id) REFERENCES thread(thread_id),
|
||||
FOREIGN KEY (region_name_id) REFERENCES strings(id),
|
||||
FOREIGN KEY (file_id) REFERENCES strings(id)
|
||||
);
|
||||
|
||||
CREATE TABLE call_stacks (
|
||||
id SERIAL PRIMARY KEY,
|
||||
process_id INT,
|
||||
thread_id INT,
|
||||
timestamp BIGINT,
|
||||
stack_depth INT,
|
||||
function_id INT,
|
||||
file_id INT,
|
||||
line_number INT,
|
||||
parent_sample_id INT,
|
||||
call_site VARCHAR(1024),
|
||||
additional_info JSONB,
|
||||
FOREIGN KEY (process_id) REFERENCES process(process_id),
|
||||
FOREIGN KEY (thread_id) REFERENCES thread(thread_id),
|
||||
FOREIGN KEY (function_id) REFERENCES strings(id),
|
||||
FOREIGN KEY (file_id) REFERENCES strings(id)
|
||||
);
|
||||
|
||||
CREATE TABLE hardware_counters (
|
||||
id SERIAL PRIMARY KEY,
|
||||
process_id INT,
|
||||
thread_id INT,
|
||||
timestamp BIGINT,
|
||||
event_id INT,
|
||||
value BIGINT,
|
||||
FOREIGN KEY (process_id) REFERENCES process(process_id),
|
||||
FOREIGN KEY (thread_id) REFERENCES thread(thread_id),
|
||||
FOREIGN KEY (event_id) REFERENCES strings(id)
|
||||
);
|
||||
|
||||
CREATE TABLE memory_operations (
|
||||
id SERIAL PRIMARY KEY,
|
||||
process_id INT,
|
||||
thread_id INT,
|
||||
timestamp BIGINT,
|
||||
operation_type VARCHAR(50) CHECK (operation_type IN ('ALLOC', 'FREE', 'COPY')),
|
||||
source_address BIGINT,
|
||||
destination_address BIGINT,
|
||||
size_bytes BIGINT,
|
||||
duration_us BIGINT,
|
||||
additional_info JSONB,
|
||||
FOREIGN KEY (process_id) REFERENCES process(process_id),
|
||||
FOREIGN KEY (thread_id) REFERENCES thread(thread_id)
|
||||
);
|
||||
|
||||
CREATE TABLE gpu_kernel_launches (
|
||||
id SERIAL PRIMARY KEY,
|
||||
process_id INT,
|
||||
thread_id INT,
|
||||
gpu_id INT,
|
||||
kernel_id INT,
|
||||
dispatch_id INT,
|
||||
launch_time BIGINT,
|
||||
start_time BIGINT,
|
||||
end_time BIGINT,
|
||||
grid_size_x INT,
|
||||
grid_size_y INT,
|
||||
grid_size_z INT,
|
||||
block_size_x INT,
|
||||
block_size_y INT,
|
||||
block_size_z INT,
|
||||
shared_mem_bytes INT,
|
||||
duration_ns BIGINT GENERATED ALWAYS AS (end_time - start_time) STORED,
|
||||
FOREIGN KEY (process_id) REFERENCES process(process_id),
|
||||
FOREIGN KEY (thread_id) REFERENCES thread(thread_id),
|
||||
FOREIGN KEY (gpu_id) REFERENCES gpu_info(gpu_id),
|
||||
FOREIGN KEY (kernel_id) REFERENCES strings(id)
|
||||
);
|
||||
|
||||
CREATE TABLE binary_analysis_info (
|
||||
id SERIAL PRIMARY KEY,
|
||||
process_id INT,
|
||||
binary_name VARCHAR(1024),
|
||||
function_id INT,
|
||||
start_address BIGINT,
|
||||
end_address BIGINT,
|
||||
instruction_count INT,
|
||||
file_id INT,
|
||||
line_number INT,
|
||||
call_sites JSONB,
|
||||
additional_info JSONB,
|
||||
FOREIGN KEY (process_id) REFERENCES process(process_id),
|
||||
FOREIGN KEY (function_id) REFERENCES strings(id),
|
||||
FOREIGN KEY (file_id) REFERENCES strings(id)
|
||||
);
|
||||
```
|
||||
|
||||
Explanation of the design considerations:
|
||||
|
||||
1. __Separate String Tables__: Created unique string tables for function names, file names, kernel names, and event names to avoid storing redundant copies.
|
||||
- `function_names`, `file_names`, `kernel_names`, and `event_names` tables are created to hold unique strings. Each table has a surrogate primary key (`function_id`, `file_id`, `kernel_id`, `event_id`) that is referenced by the main tables.
|
||||
- This avoids storing redundant copies of long or frequently repeating strings in different tables, reducing the storage footprint and improving consistency.
|
||||
2. __Foreign Key References__: Main tables reference unique strings using foreign keys for consistency and space efficiency.
|
||||
- Main tables such as `instrumentation_regions`, `call_stacks`, `gpu_kernel_launches`, etc., reference these unique string tables using foreign keys.
|
||||
- This makes querying for specific function names or kernel names more efficient, as the strings are indexed separately.
|
||||
3. __Computed Columns__: Used computed columns for duration fields to facilitate quick analysis.
|
||||
- The `duration_us` columns are computed based on timestamps, providing useful metrics for quick analysis.
|
||||
4. __Extensibility__: Designed to be easily extensible with additional string categories if needed.
|
||||
- New string types or categories can be added by creating new tables, and the main tables can reference them with minor schema adjustments.
|
||||
5. __JSONB for Additional Metadata__:
|
||||
- JSONB columns (`additional_info`) are used to handle complex or variable metadata that doesn’t fit neatly into the structured schema (e.g., custom annotations, extra debug info).
|
||||
|
||||
### Example Data Insertion and Lookup
|
||||
|
||||
#### Adding a new function
|
||||
|
||||
```sql
|
||||
INSERT INTO function_names (function_name) VALUES ('my_function') ON CONFLICT (function_name) DO NOTHING;
|
||||
```
|
||||
|
||||
#### Linking a function in a call stack
|
||||
|
||||
```sql
|
||||
INSERT INTO call_stacks (process_id, thread_id, timestamp, stack_depth, function_id, file_id)
|
||||
VALUES (123, 456, '2024-09-27 10:00:00', 1, (SELECT function_id FROM function_names WHERE function_name = 'my_function'),
|
||||
(SELECT file_id FROM file_names WHERE file_name = 'my_file.c'));
|
||||
```
|
||||
|
||||
## Q & A
|
||||
|
||||
### All global variables are protected with locks in common synchronized library. How are we sending the data from these variables to the pure functions?
|
||||
|
||||
There is a new `rocprofiler::tool::metadata` struct in `lib/output/metadata.hpp` which will be populated with data from SQL.
|
||||
This struct is passed to the output functions.
|
||||
|
||||
### If we provide the functionality to flush the trace at regular intervals, do we delete the data in global memory after each flush? If not, how do we keep track of data already read at any given point time during runtime?
|
||||
|
||||
We will probably not delete the metadata (agent info, code objects, kernel symbols, etc.) after a flush.
|
||||
When we flush, we will swap out the temporary binary file with a new temporary binary file and write/append the database with
|
||||
the contents of the old temporary binary file.
|
||||
|
||||
### Can a user collect trace at regular flush interval and ask for counter collection at the end of application?
|
||||
|
||||
I am not sure what you mean here. We can write counter collection data when we flush. If the user is asking for periodic
|
||||
flushing, we will restrict the output format to the database. In other words, I suspect that only `--flush-rate X` will only
|
||||
be compatible with `--output-format rocpd` -- any additional or alternative data formats and we will throw an error in the
|
||||
rocprofv3 script. This is for simplicity sake, supporting periodically flushing to CSV, etc. is unnecessary in my opinion.
|
||||
|
||||
### I think hardware_counters table in database schema should have a dispatch_id field to represent the kernel it belongs to
|
||||
|
||||
Please note, the proposed schema states clearly:
|
||||
|
||||
> __*Please note, this is a very preliminary sketch of the schema*__.
|
||||
> If you want to weigh in, please restrict comments to the high-level organization, comments that it doesn't contain
|
||||
> fields for correlation IDs or something like that are not particularly helpful at the moment.
|
||||
|
||||
However, I will note that the hardware counters table is probably going to be generic, i.e. supporting CPU HW counters, which
|
||||
do not have dispatch IDs. Lastly, I will also note, device counter collection is not associated with a dispatch so even in
|
||||
the case of GPU HW counters, including this field is questionable.
|
||||
|
||||
### What is binary analysis info table?
|
||||
|
||||
More advanced tools such as Omnitrace/Rocprofiler-System do address to line translations. This could also potentally
|
||||
include the sort of data related to PC sampling
|
||||
|
||||
### What is the Key of gpu info table? Node_id/zero based numbering scheme?
|
||||
|
||||
That isn't defined. Very preliminary sketch.
|
||||
|
||||
### When is user allowed to access the database in case of flushing the trace at regular intervals? Is user allowed to read the database only after tool finalization? Or we create a database file for each interval?
|
||||
|
||||
TBD on the exact details but the user will certainly be able to read the database before tool finalization when it is flushed.
|
||||
@@ -1,161 +0,0 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "domain_type.hpp"
|
||||
#include "helper.hpp"
|
||||
#include "tmp_file.hpp"
|
||||
|
||||
#include "lib/common/container/ring_buffer.hpp"
|
||||
#include "lib/common/logging.hpp"
|
||||
#include "lib/common/units.hpp"
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <deque>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
|
||||
template <typename Tp>
|
||||
using ring_buffer_t = rocprofiler::common::container::ring_buffer<Tp>;
|
||||
|
||||
std::string
|
||||
compose_tmp_file_name(domain_type buffer_type);
|
||||
|
||||
template <typename Tp>
|
||||
std::tuple<Tp*, tmp_file*>
|
||||
get_tmp_file_buffer(domain_type type)
|
||||
{
|
||||
static Tp* _buffer = new Tp(rocprofiler::common::units::get_page_size());
|
||||
static tmp_file* _tmp_file = new tmp_file(compose_tmp_file_name(type));
|
||||
return std::tuple(_buffer, _tmp_file);
|
||||
}
|
||||
|
||||
template <typename Tp>
|
||||
void
|
||||
offload_buffer(domain_type type)
|
||||
{
|
||||
auto [_tmp_buf, _tmp_file] = get_tmp_file_buffer<Tp>(type);
|
||||
auto _lk = std::lock_guard<std::mutex>(_tmp_file->file_mutex);
|
||||
[[maybe_unused]] static auto _success = _tmp_file->open();
|
||||
auto& _fs = _tmp_file->stream;
|
||||
_tmp_file->file_pos.emplace(_fs.tellg());
|
||||
_tmp_buf->save(_fs);
|
||||
_tmp_buf->clear();
|
||||
CHECK(_tmp_buf->is_empty() == true);
|
||||
}
|
||||
|
||||
template <typename Tp>
|
||||
void
|
||||
write_ring_buffer(Tp _v, domain_type type)
|
||||
{
|
||||
auto [_tmp_buf, _tmp_file] = get_tmp_file_buffer<ring_buffer_t<Tp>>(type);
|
||||
|
||||
if(_tmp_buf->capacity() == 0)
|
||||
{
|
||||
ROCP_INFO << "rocprofv3 is dropping record from domain " << get_domain_column_name(type)
|
||||
<< ". Buffer has a capacity of zero.";
|
||||
return;
|
||||
}
|
||||
|
||||
auto* ptr = _tmp_buf->request(false);
|
||||
if(ptr == nullptr)
|
||||
{
|
||||
offload_buffer<ring_buffer_t<Tp>>(type);
|
||||
ptr = _tmp_buf->request(false);
|
||||
|
||||
// if failed, try again
|
||||
if(!ptr) ptr = _tmp_buf->request(false);
|
||||
|
||||
// after second failure, emit warning message
|
||||
ROCP_CI_LOG_IF(WARNING, !ptr)
|
||||
<< "rocprofv3 is dropping record from domain " << get_domain_column_name(type)
|
||||
<< ". No space in buffer: "
|
||||
<< fmt::format(
|
||||
"capacity={}, record_size={}, used_count={}, free_count={} | raw_info=[{}]",
|
||||
_tmp_buf->capacity(),
|
||||
_tmp_buf->data_size(),
|
||||
_tmp_buf->count(),
|
||||
_tmp_buf->free(),
|
||||
_tmp_buf->as_string());
|
||||
}
|
||||
|
||||
if(ptr)
|
||||
{
|
||||
if constexpr(std::is_move_constructible<Tp>::value)
|
||||
{
|
||||
new(ptr) Tp{std::move(_v)};
|
||||
}
|
||||
else if constexpr(std::is_move_assignable<Tp>::value)
|
||||
{
|
||||
*ptr = std::move(_v);
|
||||
}
|
||||
else if constexpr(std::is_copy_constructible<Tp>::value)
|
||||
{
|
||||
new(ptr) Tp{_v};
|
||||
}
|
||||
else if constexpr(std::is_copy_assignable<Tp>::value)
|
||||
{
|
||||
*ptr = _v;
|
||||
}
|
||||
else
|
||||
{
|
||||
static_assert(std::is_void<Tp>::value,
|
||||
"data type is neither move/copy constructible nor move/copy assignable");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Tp>
|
||||
void
|
||||
flush_tmp_buffer(domain_type type)
|
||||
{
|
||||
auto [_tmp_buf, _tmp_file] = get_tmp_file_buffer<Tp>(type);
|
||||
if(!_tmp_buf->is_empty()) offload_buffer<Tp>(type);
|
||||
}
|
||||
|
||||
template <typename Tp>
|
||||
std::deque<Tp>
|
||||
read_tmp_file(domain_type type)
|
||||
{
|
||||
auto _data = std::deque<Tp>{};
|
||||
|
||||
auto [_tmp_buf, _tmp_file] = get_tmp_file_buffer<Tp>(type);
|
||||
auto _lk = std::lock_guard<std::mutex>{_tmp_file->file_mutex};
|
||||
auto& _fs = _tmp_file->stream;
|
||||
if(_fs.is_open()) _fs.close();
|
||||
_tmp_file->open(std::ios::binary | std::ios::in);
|
||||
for(auto itr : _tmp_file->file_pos)
|
||||
{
|
||||
_fs.seekg(itr); // set to the absolute position
|
||||
if(_fs.eof()) break;
|
||||
Tp _buffer;
|
||||
_buffer.load(_fs);
|
||||
_data.emplace_back(std::move(_buffer));
|
||||
}
|
||||
|
||||
return _data;
|
||||
}
|
||||
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
@@ -257,8 +257,13 @@ active_signals::sync()
|
||||
{
|
||||
if(m_signal.handle == 0) return;
|
||||
|
||||
#if defined(ROCPROFILER_CI_STRICT_TIMESTAMPS) && ROCPROFILER_CI_STRICT_TIMESTAMPS > 0
|
||||
constexpr auto timeout_sec = std::chrono::seconds{5};
|
||||
#else
|
||||
// wait a maximum of thirty seconds
|
||||
constexpr auto timeout_sec = std::chrono::seconds{30};
|
||||
#endif
|
||||
|
||||
constexpr auto timeout =
|
||||
std::chrono::duration_cast<std::chrono::nanoseconds>(timeout_sec).count();
|
||||
|
||||
|
||||
@@ -105,7 +105,6 @@ adjust_profiling_time(std::string_view _label,
|
||||
_label,
|
||||
_label,
|
||||
_value.start,
|
||||
_label,
|
||||
_bounds.start,
|
||||
(_bounds.start - _value.start));
|
||||
|
||||
@@ -116,7 +115,6 @@ adjust_profiling_time(std::string_view _label,
|
||||
_label,
|
||||
_label,
|
||||
_value.end,
|
||||
_label,
|
||||
_bounds.end,
|
||||
(_value.end - _bounds.end));
|
||||
}
|
||||
|
||||
Исполняемый файл
+533
@@ -0,0 +1,533 @@
|
||||
#!/usr/bin/env python3
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
import sys
|
||||
import json
|
||||
import time
|
||||
import sqlite3
|
||||
import argparse
|
||||
|
||||
__author__ = "AMD"
|
||||
__copyright__ = "Copyright 2023, Advanced Micro Devices, Inc."
|
||||
__license__ = "MIT"
|
||||
__maintainer__ = "AMD"
|
||||
__status__ = "Development"
|
||||
|
||||
"""
|
||||
This script converts one or more JSON output files from rocprofv3 into a
|
||||
single SQLite database conforming to the rocpd SQL Schema.
|
||||
"""
|
||||
|
||||
# this is the list of APIs whose records are inserted into API table which
|
||||
# needs to be updated whenever tracing support for a new API is added
|
||||
rocprofv3_apis = ("hip_api", "hsa_api", "marker_api", "rccl_api")
|
||||
|
||||
|
||||
class dotdict(dict):
|
||||
"""dot.notation access to dictionary attributes"""
|
||||
|
||||
__getattr__ = dict.get
|
||||
__setattr__ = dict.__setitem__
|
||||
__delattr__ = dict.__delitem__
|
||||
|
||||
def __init__(self, d):
|
||||
super(dotdict, self).__init__(d)
|
||||
for k, v in self.items():
|
||||
if isinstance(v, dict):
|
||||
self.__setitem__(k, dotdict(v))
|
||||
elif isinstance(v, (list, tuple)):
|
||||
self.__setitem__(
|
||||
k,
|
||||
[dotdict(i) if isinstance(i, (list, tuple, dict)) else i for i in v],
|
||||
)
|
||||
|
||||
|
||||
def dump_table(table):
|
||||
cursor.execute(f"SELECT * FROM {table};")
|
||||
results = cursor.fetchall()
|
||||
print(f"\n\n##### {table} #####\n")
|
||||
for itr in results:
|
||||
print(" | {}".format(" | ".join([f"{val}" for val in list(itr)])))
|
||||
print("")
|
||||
|
||||
|
||||
def execute_raw_sql_statements(cursor, statements):
|
||||
"""Helper function for executing a sequence of raw SQL statements"""
|
||||
|
||||
for itr in [
|
||||
"{};".format(itr.strip()) for itr in statements.strip().split(";") if itr
|
||||
]:
|
||||
try:
|
||||
cursor.execute(f"{itr}")
|
||||
except sqlite3.Error as err:
|
||||
sys.stderr.write(f"SQLite3 error: {err}\nStatement:\n\t{itr}\n")
|
||||
sys.stderr.flush()
|
||||
raise err
|
||||
|
||||
|
||||
def create_schema(cursor):
|
||||
|
||||
# Create table
|
||||
table_schema = """
|
||||
CREATE TABLE IF NOT EXISTS "rocpd_metadata" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "tag" varchar(4096) NOT NULL, "value" varchar(4096) NOT NULL);
|
||||
CREATE TABLE IF NOT EXISTS "rocpd_string" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "string" varchar(4096) NOT NULL UNIQUE ON CONFLICT IGNORE);
|
||||
CREATE TABLE IF NOT EXISTS "rocpd_op" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "gpuId" integer NOT NULL, "queueId" integer NOT NULL, "sequenceId" integer NOT NULL, "completionSignal" varchar(18) NOT NULL, "start" integer NOT NULL, "end" integer NOT NULL, "description_id" integer NOT NULL REFERENCES "rocpd_string" ("id") DEFERRABLE INITIALLY DEFERRED, "opType_id" integer NOT NULL REFERENCES "rocpd_string" ("id") DEFERRABLE INITIALLY DEFERRED);
|
||||
CREATE TABLE IF NOT EXISTS "rocpd_api" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "pid" integer NOT NULL, "tid" integer NOT NULL, "start" integer NOT NULL, "end" integer NOT NULL, "apiName_id" integer NOT NULL REFERENCES "rocpd_string" ("id") DEFERRABLE INITIALLY DEFERRED, "args_id" integer NOT NULL REFERENCES "rocpd_string" ("id") DEFERRABLE INITIALLY DEFERRED);
|
||||
CREATE TABLE IF NOT EXISTS "rocpd_api_ops" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "api_id" integer NOT NULL REFERENCES "rocpd_api" ("id") DEFERRABLE INITIALLY DEFERRED, "op_id" integer NOT NULL REFERENCES "rocpd_op" ("id") DEFERRABLE INITIALLY DEFERRED);
|
||||
-- optional
|
||||
CREATE TABLE IF NOT EXISTS "rocpd_kernelcodeobject" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "vgpr" integer NOT NULL, "sgpr" integer NOT NULL, "fbar" integer NOT NULL, "kernel_id" integer NOT NULL);
|
||||
CREATE TABLE IF NOT EXISTS "rocpd_kernelapi" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "api_ptr_id" integer NOT NULL REFERENCES "rocpd_api" ("id") DEFERRABLE INITIALLY DEFERRED, "stream" varchar(18) NOT NULL, "gridX" integer NOT NULL, "gridY" integer NOT NULL, "gridZ" integer NOT NULL, "workgroupX" integer NOT NULL, "workgroupY" integer NOT NULL, "workgroupZ" integer NOT NULL, "groupSegmentSize" integer NOT NULL, "privateSegmentSize" integer NOT NULL, "kernelArgAddress" varchar(18) NOT NULL, "aquireFence" varchar(8) NOT NULL, "releaseFence" varchar(8) NOT NULL, "codeObject_id" integer NOT NULL REFERENCES "rocpd_kernelcodeobject" ("id") DEFERRABLE INITIALLY DEFERRED, "kernelName_id" integer NOT NULL REFERENCES "rocpd_string" ("id") DEFERRABLE INITIALLY DEFERRED);
|
||||
CREATE TABLE IF NOT EXISTS "rocpd_copyapi" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "api_ptr_id" integer NOT NULL REFERENCES "rocpd_api" ("id") DEFERRABLE INITIALLY DEFERRED, "stream" varchar(18) NOT NULL, "size" integer NOT NULL, "width" integer NOT NULL, "height" integer NOT NULL, "kind" integer NOT NULL, "dst" varchar(18) NOT NULL, "src" varchar(18) NOT NULL, "dstDevice" integer NOT NULL, "srcDevice" integer NOT NULL, "sync" bool NOT NULL, "pinned" bool NOT NULL);
|
||||
|
||||
INSERT INTO "rocpd_metadata"(tag, value) VALUES ("schema_version", "2");
|
||||
|
||||
--CREATE TABLE IF NOT EXISTS "rocpd_monitor" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "deviceType" varchar(16) NOT NULL, "deviceId" integer NOT NULL, "monitorType" varchar(16) NOT NULL, "start" integer NOT NULL, "end" integer NOT NULL, "value" varchar(255) NOT NULL);
|
||||
--CREATE TABLE IF NOT EXISTS "rocpd_barrierop" ("op_ptr_id" integer NOT NULL PRIMARY KEY REFERENCES "rocpd_op" ("id") DEFERRABLE INITIALLY DEFERRED, "signalCount" integer NOT NULL, "aquireFence" varchar(8) NOT NULL, "releaseFence" varchar(8) NOT NULL);
|
||||
--CREATE TABLE IF NOT EXISTS "rocpd_op_inputSignals" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "from_op_id" integer NOT NULL REFERENCES "rocpd_op" ("id") DEFERRABLE INITIALLY DEFERRED, "to_op_id" integer NOT NULL REFERENCES "rocpd_op" ("id") DEFERRABLE INITIALLY DEFERRED);
|
||||
"""
|
||||
|
||||
execute_raw_sql_statements(cursor, table_schema)
|
||||
|
||||
|
||||
def finalize_schema(cursor):
|
||||
|
||||
table_schema = """
|
||||
CREATE VIEW api AS SELECT rocpd_api.id,pid,tid,start,end,A.string AS apiName, B.string AS args FROM rocpd_api
|
||||
INNER JOIN rocpd_string A ON A.id = rocpd_api.apiName_id
|
||||
INNER JOIN rocpd_string B ON B.id = rocpd_api.args_id;
|
||||
CREATE VIEW op AS SELECT rocpd_op.id,gpuId,queueId,sequenceId,start,end,A.string AS description, B.string AS opType FROM rocpd_op
|
||||
INNER JOIN rocpd_string A ON A.id = rocpd_op.description_id
|
||||
INNER JOIN rocpd_string B ON B.id = rocpd_op.opType_id;
|
||||
CREATE VIEW busy AS SELECT A.gpuId, GpuTime, WallTime, GpuTime*1.0/WallTime AS Busy FROM (SELECT gpuId, sum(end-start) AS GpuTime FROM rocpd_op GROUP BY gpuId) A
|
||||
INNER JOIN (SELECT max(end) - min(start) AS WallTime FROM rocpd_op);
|
||||
CREATE VIEW top AS SELECT C.string AS Name, count(C.string) AS TotalCalls, sum(A.end-A.start) / 1000 AS TotalDuration, (sum(A.end-A.start)/count(C.string))/ 1000 AS Ave, sum(A.end-A.start) * 100.0 / (SELECT sum(A.end-A.start) FROM rocpd_op A) AS Percentage FROM (SELECT opType_id AS name_id, start, end FROM rocpd_op WHERE description_id in (SELECT id FROM rocpd_string WHERE string='')
|
||||
UNION SELECT description_id, start, end FROM rocpd_op WHERE description_id not in (SELECT id FROM rocpd_string WHERE string='')) A
|
||||
JOIN rocpd_string C on C.id = A.name_id GROUP BY Name ORDER BY TotalDuration desc;
|
||||
CREATE VIEW ktop AS SELECT C.string AS Name, count(C.string) AS TotalCalls, sum(A.end-A.start) / 1000 AS TotalDuration, (sum(A.end-A.start)/count(C.string))/ 1000 AS Ave, sum(A.end-A.start) * 100.0 / (SELECT sum(A.end-A.start) FROM rocpd_api A
|
||||
JOIN rocpd_kernelapi B on B.api_ptr_id = A.id) AS Percentage FROM rocpd_api A
|
||||
JOIN rocpd_kernelapi B on B.api_ptr_id = A.id
|
||||
JOIN rocpd_string C on C.id = B.kernelname_id GROUP BY Name ORDER BY TotalDuration desc;
|
||||
CREATE VIEW kernel AS SELECT B.id, gpuId, queueId, sequenceId, start, end, (end-start) AS duration, stream, gridX, gridY, gridz, workgroupX, workgroupY, workgroupZ, groupSegmentSize, privateSegmentSize, D.string AS kernelName FROM rocpd_api_ops A
|
||||
JOIN rocpd_op B on B.id = A.op_id
|
||||
JOIN rocpd_kernelapi C ON C.api_ptr_id = A.api_id
|
||||
JOIN rocpd_string D on D.id = kernelName_id;
|
||||
CREATE VIEW copy AS SELECT B.id, pid, tid, start, end, C.string AS apiName, stream, size, width, height, kind, dst, src, dstDevice, srcDevice, sync, pinned FROM rocpd_copyApi A
|
||||
JOIN rocpd_api B ON B.id = A.api_ptr_id
|
||||
JOIN rocpd_string C on C.id = B.apiname_id;
|
||||
CREATE VIEW copyop AS SELECT B.id, gpuId, queueId, sequenceId, B.start, B.end, (B.end-B.start) AS duration, stream, size, width, height, kind, dst, src, dstDevice, srcDevice, sync, pinned, E.string AS apiName FROM rocpd_api_ops A
|
||||
JOIN rocpd_op B ON B.id = A.op_id
|
||||
JOIN rocpd_copyapi C ON C.api_ptr_id = A.api_id
|
||||
JOIN rocpd_api D on D.id = A.api_id
|
||||
JOIN rocpd_string E ON E.id = D.apiName_id;
|
||||
"""
|
||||
|
||||
execute_raw_sql_statements(cursor, table_schema)
|
||||
|
||||
|
||||
def normalize_timestamps(itr):
|
||||
"""Make all timestamps relative to the time of rocprofv3 initialization within the application"""
|
||||
|
||||
def _normalize_timestamp_impl(value):
|
||||
return value - itr.metadata.init_time
|
||||
|
||||
min_val = None
|
||||
for aitr in [
|
||||
"hip_api",
|
||||
"hsa_api",
|
||||
"marker_api",
|
||||
"rccl_api",
|
||||
"kernel_dispatch",
|
||||
"memory_copy",
|
||||
]:
|
||||
for ritr in itr.buffer_records[aitr]:
|
||||
ritr.start_timestamp = _normalize_timestamp_impl(ritr.start_timestamp)
|
||||
ritr.end_timestamp = _normalize_timestamp_impl(ritr.end_timestamp)
|
||||
min_val = (
|
||||
min([ritr.start_timestamp, min_val])
|
||||
if min_val is not None
|
||||
else ritr.start_timestamp
|
||||
)
|
||||
|
||||
print(f" - starting timestamp normalized down to a minimum of {min_val} nsec")
|
||||
sys.stdout.flush()
|
||||
|
||||
return itr
|
||||
|
||||
|
||||
def insert_strings(cursor, itr):
|
||||
"""Populate the strings table with all the strings which will be referenced by various records"""
|
||||
|
||||
strings = []
|
||||
|
||||
def append_strings(*args):
|
||||
nonlocal strings
|
||||
|
||||
for aitr in args:
|
||||
if isinstance(aitr, list):
|
||||
strings += aitr
|
||||
else:
|
||||
strings.append(aitr)
|
||||
|
||||
append_strings("UserMarker")
|
||||
|
||||
for aitr in itr.agents:
|
||||
append_strings(aitr.name, aitr.vendor_name, aitr.product_name, aitr.model_name)
|
||||
for ritr in itr.strings.callback_records:
|
||||
append_strings(ritr.kind, ritr.operations)
|
||||
for ritr in itr.strings.buffer_records:
|
||||
append_strings(ritr.kind, ritr.operations)
|
||||
for ritr in itr.strings.marker_api:
|
||||
append_strings(ritr.value)
|
||||
for ritr in itr.strings.counters.dimension_ids:
|
||||
append_strings(ritr.name)
|
||||
for ritr in itr.strings.correlation_id.external:
|
||||
append_strings(ritr.value)
|
||||
for ritr in itr.kernel_symbols:
|
||||
append_strings(ritr.kernel_name)
|
||||
append_strings(ritr.formatted_kernel_name)
|
||||
append_strings(ritr.demangled_kernel_name)
|
||||
append_strings(ritr.truncated_kernel_name)
|
||||
for ritr in itr.code_objects:
|
||||
append_strings(ritr.uri)
|
||||
|
||||
for itr in sorted(list(set(strings))):
|
||||
cursor.execute(f"""INSERT INTO rocpd_string (string) VALUES ('{itr}')""")
|
||||
|
||||
|
||||
def insert_api_data(cursor, itr, corr_id_offset, **kwargs):
|
||||
"""Add all the HIP, HSA, marker, and RCCL API records to the database.
|
||||
Eventually we might want to abstract a way to iterate over the APIs covered
|
||||
here instead of maintaining an explicit list.
|
||||
"""
|
||||
|
||||
marker_message_strings = dict(
|
||||
[[eitr.key, eitr.value] for eitr in itr.strings.marker_api]
|
||||
)
|
||||
|
||||
def get_api_name(kind, op):
|
||||
return itr.strings.buffer_records[kind].operations[op]
|
||||
|
||||
def get_marker_message(name, corr_id):
|
||||
return marker_message_strings.get(corr_id, name)
|
||||
|
||||
max_corr_id = 0
|
||||
for aitr in rocprofv3_apis:
|
||||
for hitr in itr.buffer_records[aitr]:
|
||||
corr_id = hitr.correlation_id
|
||||
corr_id.internal += corr_id_offset
|
||||
name = None
|
||||
args = None
|
||||
|
||||
if aitr == "marker_api":
|
||||
apiname = get_api_name(hitr.kind, hitr.operation)
|
||||
message = get_marker_message(apiname, corr_id.internal)
|
||||
mode = kwargs.get("marker_mode", "message")
|
||||
assert mode in ("message", "generic", "api")
|
||||
if mode == "message":
|
||||
name = message
|
||||
args = 1
|
||||
elif mode == "api":
|
||||
name = apiname
|
||||
args = f"(SELECT id FROM rocpd_string WHERE string = '{message}')"
|
||||
elif mode == "generic":
|
||||
name = "UserMarker"
|
||||
args = f"(SELECT id FROM rocpd_string WHERE string = '{message}')"
|
||||
else:
|
||||
name = get_api_name(hitr.kind, hitr.operation)
|
||||
args = 1
|
||||
|
||||
assert name is not None
|
||||
assert args is not None
|
||||
cursor.execute(
|
||||
f"""INSERT INTO rocpd_api(id, pid, tid, start, end, apiName_id, args_id)
|
||||
VALUES ({corr_id.internal},
|
||||
{itr.metadata.pid},
|
||||
{hitr.thread_id},
|
||||
{hitr.start_timestamp},
|
||||
{hitr.end_timestamp},
|
||||
(SELECT id FROM rocpd_string WHERE string = '{name}'),
|
||||
{args});
|
||||
"""
|
||||
)
|
||||
max_corr_id = max([max_corr_id, corr_id.internal])
|
||||
|
||||
return max_corr_id
|
||||
|
||||
|
||||
def insert_async_data(cursor, itr, corr_id_offset, op_id_offset):
|
||||
"""Add all the kernel and memory copy records to the database.
|
||||
Eventually we might want to handle page-migration, scratch-memory, etc. but,
|
||||
at present, rocpd_schema does not support it.
|
||||
"""
|
||||
|
||||
external_corr_id_strings = dict(
|
||||
[[eitr.key, eitr.value] for eitr in itr.strings.correlation_id.external]
|
||||
)
|
||||
|
||||
def get_api_name(kind, op=None):
|
||||
return (
|
||||
itr.strings.buffer_records[kind].operations[op]
|
||||
if op is not None
|
||||
else itr.strings.buffer_records[kind].kind
|
||||
)
|
||||
|
||||
def get_kernel_symbol(kernid):
|
||||
return itr.kernel_symbols[kernid]
|
||||
|
||||
def get_kernel_name(kernid, externid):
|
||||
if externid > 0:
|
||||
return external_corr_id_strings[externid]
|
||||
return get_kernel_symbol(kernid).formatted_kernel_name
|
||||
|
||||
def get_agent_id(agent_id):
|
||||
for aitr in itr.agents:
|
||||
if aitr.id.handle == agent_id.handle:
|
||||
return aitr.node_id
|
||||
return None
|
||||
|
||||
for kitr in itr.kernel_symbols:
|
||||
sgpr = kitr.sgpr_count if "sgpr_count" in kitr.keys() else 0
|
||||
arch_vgpr = kitr.arch_vgpr_count if "arch_vgpr_count" in kitr.keys() else 0
|
||||
accum_vgpr = kitr.accum_vgpr_count if "accum_vgpr_count" in kitr.keys() else 0
|
||||
vgpr = arch_vgpr + accum_vgpr
|
||||
|
||||
cursor.execute(
|
||||
f"""INSERT INTO rocpd_kernelcodeobject(vgpr, sgpr, fbar, kernel_id)
|
||||
VALUES ({vgpr}, {sgpr}, 0, {kitr.kernel_id});
|
||||
"""
|
||||
)
|
||||
|
||||
op_id = op_id_offset
|
||||
for kitr in itr.buffer_records.kernel_dispatch:
|
||||
kind_name = get_api_name(kitr.kind)
|
||||
info = kitr.dispatch_info
|
||||
kernel_id = info.kernel_id
|
||||
queue_id = info.queue_id.handle
|
||||
corr_id = kitr.correlation_id
|
||||
grid = info.grid_size
|
||||
workgroup = info.workgroup_size
|
||||
kern_name = get_kernel_name(kernel_id, corr_id.external)
|
||||
gpu_id = get_agent_id(info.agent_id)
|
||||
ksym = get_kernel_symbol(kernel_id)
|
||||
kernel_arg_addr = "{:#x}".format(ksym.kernel_object)
|
||||
corr_id.internal += corr_id_offset
|
||||
|
||||
cursor.execute(
|
||||
f"""INSERT INTO rocpd_kernelapi(api_ptr_id,
|
||||
stream,
|
||||
gridX, gridY, gridZ,
|
||||
workgroupX, workgroupY, workgroupZ,
|
||||
groupSegmentSize, privateSegmentSize,
|
||||
kernelArgAddress, aquireFence, releaseFence,
|
||||
codeObject_id, kernelName_id)
|
||||
VALUES ({corr_id.internal},
|
||||
{queue_id},
|
||||
{grid.x}, {grid.y}, {grid.z},
|
||||
{workgroup.x}, {workgroup.y}, {workgroup.z},
|
||||
{info.group_segment_size}, {info.private_segment_size},
|
||||
'{kernel_arg_addr}', '', '',
|
||||
(SELECT id FROM rocpd_kernelcodeobject WHERE kernel_id = {ksym.kernel_id}),
|
||||
(SELECT id FROM rocpd_string WHERE string = '{kern_name}'));
|
||||
"""
|
||||
)
|
||||
cursor.execute(
|
||||
f"""INSERT INTO rocpd_op(id, gpuId, queueId, sequenceId, completionSignal, start, end, description_id, opType_id)
|
||||
VALUES ({op_id},
|
||||
{gpu_id},
|
||||
{queue_id},
|
||||
{corr_id.internal},
|
||||
"",
|
||||
{kitr.start_timestamp},
|
||||
{kitr.end_timestamp},
|
||||
(SELECT id FROM rocpd_string WHERE string = '{kern_name}'),
|
||||
(SELECT id FROM rocpd_string WHERE string = '{kind_name}'));
|
||||
"""
|
||||
)
|
||||
cursor.execute(
|
||||
f"""INSERT INTO rocpd_api_ops(api_id, op_id)
|
||||
VALUES ({corr_id.internal},
|
||||
{op_id});
|
||||
"""
|
||||
)
|
||||
op_id += 1
|
||||
|
||||
for mitr in itr.buffer_records.memory_copy:
|
||||
kind_name = get_api_name(mitr.kind)
|
||||
op_name = get_api_name(mitr.kind, mitr.operation)
|
||||
dst_id = get_agent_id(mitr.dst_agent_id)
|
||||
src_id = get_agent_id(mitr.src_agent_id)
|
||||
corr_id = mitr.correlation_id
|
||||
synced = False
|
||||
pinned = False
|
||||
corr_id.internal += corr_id_offset
|
||||
|
||||
cursor.execute(
|
||||
f"""INSERT INTO rocpd_copyapi(api_ptr_id, stream, size, width, height, kind, src, dst, srcDevice, dstDevice, sync, pinned)
|
||||
VALUES ({corr_id.internal},
|
||||
"",
|
||||
{mitr.bytes},
|
||||
{mitr.bytes},
|
||||
1,
|
||||
(SELECT id FROM rocpd_string WHERE string = '{op_name}'),
|
||||
"",
|
||||
"",
|
||||
{src_id},
|
||||
{dst_id},
|
||||
{synced},
|
||||
{pinned});
|
||||
"""
|
||||
)
|
||||
cursor.execute(
|
||||
f"""INSERT INTO rocpd_op(id, gpuId, queueId, sequenceId, completionSignal, start, end, description_id, opType_id)
|
||||
VALUES ({op_id},
|
||||
{dst_id},
|
||||
0,
|
||||
{corr_id.internal},
|
||||
"",
|
||||
{mitr.start_timestamp},
|
||||
{mitr.end_timestamp},
|
||||
(SELECT id FROM rocpd_string WHERE string = '{op_name}'),
|
||||
(SELECT id FROM rocpd_string WHERE string = '{kind_name}'));
|
||||
"""
|
||||
)
|
||||
cursor.execute(
|
||||
f"""INSERT INTO rocpd_api_ops(api_id, op_id)
|
||||
VALUES ({corr_id.internal},
|
||||
{op_id});
|
||||
"""
|
||||
)
|
||||
op_id += 1
|
||||
|
||||
return op_id
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
rocpd_tables = [
|
||||
"metadata",
|
||||
"string",
|
||||
"api",
|
||||
"op",
|
||||
"api_ops",
|
||||
"copyapi",
|
||||
"kernelapi",
|
||||
"kernelcodeobject",
|
||||
]
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"-i",
|
||||
"--input",
|
||||
help="Input rocprofv3 JSON files",
|
||||
type=str,
|
||||
nargs="+",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-o", "--output", help="Output database name", type=str, default="example.db"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-n",
|
||||
"--normalize-timestamps",
|
||||
help="Normalize timestamps relative to the app start time",
|
||||
action="store_true",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-m",
|
||||
"--marker-mode",
|
||||
help="'generic' is classical rocpd behavior: all marker regions have 'UserMarker' name with message in args; 'message' uses the message as the region name; 'api' uses the name of the marker function with the message in args",
|
||||
choices=("generic", "message", "api"),
|
||||
type=str,
|
||||
default="message",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-d",
|
||||
"--dump-tables",
|
||||
help="Dump generate rocpd tables to console (for debugging)",
|
||||
type=str,
|
||||
default=None,
|
||||
nargs="*",
|
||||
choices=set(rocpd_tables),
|
||||
)
|
||||
|
||||
args = parser.parse_args(sys.argv[1:])
|
||||
|
||||
start = time.monotonic_ns()
|
||||
print(f"Opening '{args.output}'...")
|
||||
|
||||
# Connect to an SQLite database (or create it if it doesn't exist)
|
||||
conn = sqlite3.connect(args.output)
|
||||
|
||||
# Create a cursor object using the cursor() method
|
||||
cursor = conn.cursor()
|
||||
|
||||
create_schema(cursor)
|
||||
|
||||
corr_id_offset = 0
|
||||
op_id_offset = 0
|
||||
for itr in args.input:
|
||||
print(f"Reading '{itr}'...")
|
||||
with open(itr, "rb") as f:
|
||||
data = dotdict(json.load(f))["rocprofiler-sdk-tool"]
|
||||
for ditr in data:
|
||||
# normalize the timestamps if requested
|
||||
ditr = normalize_timestamps(ditr) if args.normalize_timestamps else ditr
|
||||
|
||||
# create the strings table
|
||||
insert_strings(cursor, ditr)
|
||||
|
||||
# insert the api data
|
||||
_corr_id_offset = insert_api_data(
|
||||
cursor, ditr, corr_id_offset, marker_mode=args.marker_mode
|
||||
)
|
||||
|
||||
# insert the kernel and memory copy data
|
||||
_op_id_offset = insert_async_data(
|
||||
cursor, ditr, corr_id_offset, op_id_offset
|
||||
)
|
||||
|
||||
# Save (commit) the changes
|
||||
conn.commit()
|
||||
|
||||
# update the offsets
|
||||
corr_id_offset = _corr_id_offset
|
||||
op_id_offset = _op_id_offset
|
||||
|
||||
if args.dump_tables is not None and len(args.dump_tables) == 0:
|
||||
args.dump_tables = rocpd_tables
|
||||
|
||||
if args.dump_tables is not None:
|
||||
for itr in args.dump_tables:
|
||||
dump_table(f"rocpd_{itr}")
|
||||
|
||||
finalize_schema(cursor)
|
||||
conn.commit()
|
||||
|
||||
print(f"Closing '{args.output}'...")
|
||||
# Close the connection
|
||||
conn.close()
|
||||
|
||||
end = time.monotonic_ns()
|
||||
elapsed_nsec = end - start
|
||||
elapsed_sec = elapsed_nsec / 1.0e9
|
||||
print(f"Runtime time (nsec): {elapsed_nsec}")
|
||||
print(f"Runtime time (sec) : {elapsed_sec}")
|
||||
Ссылка в новой задаче
Block a user