Files
rocm-systems/source/lib/output/buffered_output.hpp
T
Trowbridge, Ian e626df43eb Fix HIP Streams Duplication Error (#313)
* Fix stream duplication and fixed tests

* Added comments to explain stream.cpp code, change stream nullptr check to occur in update table to prevent readding null stream, simplified hip-streams bin file code, add destroyStreams to hip-streams bin file code

* Removed roctx from CMakeLists.txt

* Updated documentation

* Fix documentation

* Removed update_table for HIP compiler table and updated stream.cpp to remove support for HIP compiler table

* Added runtime initialization check for HIP

* Changed tool name, working on fixing memory management

* Added context for counter collection kernel rename combination

* Changed name from map to set and changed description

* Fix documentation description for group-by-queue

* Merged memory copy and kernel operations onto a single track when on the same stream

* Updated perfetto output to remove hardware information from track name to merge all memory copy and kernel operations on the same stream to the same track:

* Most pr comments addressed

* Added filter for counter collection and removed kernel buffer tracing hack

* Added PR comment fixes

---------

Co-authored-by: Madsen, Jonathan <Jonathan.Madsen@amd.com>
2025-05-01 00:56:15 -05:00

193 righe
6.1 KiB
C++

// MIT License
//
// Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
#include "counter_info.hpp"
#include "generator.hpp"
#include "pc_sample_transform.hpp"
#include "statistics.hpp"
#include "stream_info.hpp"
#include "tmp_file_buffer.hpp"
#include "lib/common/container/ring_buffer.hpp"
#include "lib/common/logging.hpp"
#include <fmt/format.h>
#include <deque>
namespace rocprofiler
{
namespace tool
{
using float_type = double;
using stats_data_t = statistics<uint64_t, float_type>;
template <typename Tp, domain_type DomainT>
struct buffered_output
{
using type = Tp;
static constexpr auto buffer_type_v = DomainT;
explicit buffered_output(bool _enabled);
~buffered_output() = default;
buffered_output(const buffered_output&) = delete;
buffered_output(buffered_output&&) noexcept = delete;
buffered_output& operator=(const buffered_output&) = delete;
buffered_output& operator=(buffered_output&&) noexcept = delete;
operator bool() const { return enabled; }
void flush();
void read();
void clear();
void destroy();
uint64_t get_num_bytes() const;
generator<Tp> get_generator() const { return generator<Tp>{get_tmp_file_buffer<Tp>(DomainT)}; }
std::deque<Tp> load_all();
stats_entry_t stats = {};
private:
bool enabled = false;
};
template <typename Tp, domain_type DomainT>
buffered_output<Tp, DomainT>::buffered_output(bool _enabled)
: enabled{_enabled}
{}
template <typename Tp, domain_type DomainT>
void
buffered_output<Tp, DomainT>::flush()
{
if(!enabled) return;
flush_tmp_buffer<type>(buffer_type_v);
}
template <typename Tp, domain_type DomainT>
void
buffered_output<Tp, DomainT>::read()
{
if(!enabled) return;
flush();
read_tmp_file<type>(buffer_type_v);
}
template <typename Tp, domain_type DomainT>
std::deque<Tp>
buffered_output<Tp, DomainT>::load_all()
{
auto data = std::deque<Tp>{};
if(enabled)
{
auto gen = get_generator();
for(auto ditr : gen)
{
for(auto itr : gen.get(ditr))
{
data.emplace_back(itr);
}
}
}
return data;
}
template <typename Tp, domain_type DomainT>
void
buffered_output<Tp, DomainT>::clear()
{
if(!enabled) return;
}
template <typename Tp, domain_type DomainT>
void
buffered_output<Tp, DomainT>::destroy()
{
if(!enabled) return;
clear();
auto*& filebuf = get_tmp_file_buffer<type>(buffer_type_v);
if(filebuf)
{
file_buffer<type>* tmp = nullptr;
std::swap(filebuf, tmp);
tmp->buffer.destroy();
if(tmp->file)
{
tmp->file.close();
tmp->file.remove();
}
delete tmp;
}
}
template <typename Tp, domain_type DomainT>
uint64_t
buffered_output<Tp, DomainT>::get_num_bytes() const
{
if(auto*& filebuf = get_tmp_file_buffer<type>(buffer_type_v); filebuf) return filebuf->nbytes;
return 0;
}
using hip_buffered_output_t =
buffered_output<rocprofiler_buffer_tracing_hip_api_ext_record_t, domain_type::HIP>;
using hsa_buffered_output_t =
buffered_output<rocprofiler_buffer_tracing_hsa_api_record_t, domain_type::HSA>;
using marker_buffered_output_t =
buffered_output<rocprofiler_buffer_tracing_marker_api_record_t, domain_type::MARKER>;
using rccl_buffered_output_t =
buffered_output<rocprofiler_buffer_tracing_rccl_api_record_t, domain_type::RCCL>;
using counter_collection_buffered_output_t =
buffered_output<tool_counter_record_t, domain_type::COUNTER_COLLECTION>;
using scratch_memory_buffered_output_t =
buffered_output<rocprofiler_buffer_tracing_scratch_memory_record_t,
domain_type::SCRATCH_MEMORY>;
using memory_allocation_buffered_output_t =
buffered_output<rocprofiler_buffer_tracing_memory_allocation_record_t,
domain_type::MEMORY_ALLOCATION>;
using counter_records_buffered_output_t =
::rocprofiler::tool::buffered_output<rocprofiler::tool::serialized_counter_record_t,
domain_type::COUNTER_VALUES>;
using pc_sampling_host_trap_buffered_output_t =
buffered_output<rocprofiler::tool::rocprofiler_tool_pc_sampling_host_trap_record_t,
domain_type::PC_SAMPLING_HOST_TRAP>;
using rocdecode_buffered_output_t =
buffered_output<rocprofiler_buffer_tracing_rocdecode_api_ext_record_t, domain_type::ROCDECODE>;
using rocjpeg_buffered_output_t =
buffered_output<rocprofiler_buffer_tracing_rocjpeg_api_record_t, domain_type::ROCJPEG>;
using kernel_dispatch_buffered_output_with_stream_t =
buffered_output<tool_buffer_tracing_kernel_dispatch_ext_record_t, domain_type::KERNEL_DISPATCH>;
using memory_copy_buffered_output_with_stream_t =
buffered_output<tool_buffer_tracing_memory_copy_ext_record_t, domain_type::MEMORY_COPY>;
using pc_sampling_stochastic_buffered_output_t =
buffered_output<rocprofiler::tool::rocprofiler_tool_pc_sampling_stochastic_record_t,
domain_type::PC_SAMPLING_STOCHASTIC>;
} // namespace tool
} // namespace rocprofiler