Files
rocm-systems/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/tool.cpp
T
Jonathan R. Madsen fafab6b713 Use -sdk suffix and reset VERSION to 0.0.0 (#263)
* Fix find_package(rocprofiler) in build tree

* Move include/rocprofiler to include/rocprofiler-sdk

* Update include/CMakeLists.txt

- add_subdirectory(rocprofiler-sdk)

* Move lib/rocprofiler to lib/rocprofiler-sdk

* Move lib/rocprofiler-tool to lib/rocprofiler-sdk-tool

* Update lib/CMakeLists.txt

- add_subdirectory(rocprofiler-sdk)
- add_subdirectory(rocprofiler-sdk-tool)

* Update lib/rocprofiler-sdk/CMakeLists.txt

* Rename rocprofiler-tool to rocprofiler-sdk-tool

* Replace include rocprofiler/ with include rocprofiler-sdk/

* Replace include lib/rocprofiler/ with include lib/rocprofiler-sdk/

* Set VERSION to 0.0.0 and finish install to rocprofiler-sdk

* More fixes for rocprofiler -> rocprofiler-sdk

- fix issue with rocprofiler-sdk-config.cmake.in
- fix counters xml install path

* Fix documentation generation

* Create rocprofiler_LIB_ROCPROFILER_SDK_DIR for build tree

* cmake formatting (cmake-format) (#264)

Co-authored-by: jrmadsen <jrmadsen@users.noreply.github.com>

---------

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>

[ROCm/rocprofiler-sdk commit: 9a0c84efa6]
2023-11-29 20:43:18 -06:00

465 строки
17 KiB
C++

// MIT License
//
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include "helper.hpp"
#include "trace_buffer.hpp"
#include "lib/common/environment.hpp"
#include "lib/common/filesystem.hpp"
#include <fmt/core.h>
#include <unistd.h>
#include <fstream>
#include <iomanip>
#include <mutex>
#include <shared_mutex>
#include <unordered_map>
#include <vector>
namespace common = ::rocprofiler::common;
namespace fs = common::filesystem;
TRACE_BUFFER_INSTANTIATE();
// static const uint32_t lds_block_size = 128 * 4;
namespace
{
auto tool_buffer = rocprofiler_buffer_id_t{};
auto output_path =
fs::path{common::get_env<std::string>("ROCPROFILER_OUTPUT_PATH", fs::current_path().string())};
auto output_file_name =
common::get_env<std::string>("ROCPROFILER_OUTPUT_FILE_NAME", std::to_string(getpid()) + "-");
std::pair<std::ostream*, void (*)(std::ostream*&)>
get_output_stream(const std::string& fname, const std::string& ext = ".ext")
{
if(output_path.string().empty()) return {&std::clog, [](auto*&) {}};
if(fs::exists(output_path) && !fs::is_directory(fs::status(output_path)))
throw std::runtime_error{
fmt::format("ROCPROFILER_OUTPUT_PATH ({}) already exists and is not a directory",
output_path.string())};
if(!fs::exists(output_path)) fs::create_directory(output_path);
auto output_file = output_path / (output_file_name + fname + ext);
auto* _ofs = new std::ofstream{output_file};
if(!_ofs && !*_ofs)
throw std::runtime_error{
fmt::format("Failed to open {} for output", (output_path / output_file_name).string())};
std::cout << "Results File: " << output_file << std::endl;
return {_ofs, [](std::ostream*& v) {
if(v) dynamic_cast<std::ofstream*>(v)->close();
delete v;
v = nullptr;
}};
}
template <typename Tp>
std::string
as_hex(Tp _v, size_t _width = 16)
{
auto _ss = std::stringstream{};
_ss.fill('0');
_ss << "0x" << std::hex << std::setw(_width) << _v;
return _ss.str();
}
} // namespace
struct output_file
{
output_file(std::string name)
: m_name(std::move(name))
{
std::tie(m_stream, m_dtor) = get_output_stream(name);
}
~output_file() { m_dtor(m_stream); }
output_file(const output_file&) = delete;
output_file& operator=(const output_file&) = delete;
std::string name() const { return m_name; }
template <typename T>
std::ostream& operator<<(T&& value)
{
return (*m_stream) << std::forward<T>(value);
}
std::ostream& operator<<(std::ostream& (*func)(std::ostream&) ) { return (*m_stream) << func; }
private:
using stream_dtor_t = void (*)(std::ostream*&);
const std::string m_name = {};
std::ostream* m_stream = nullptr;
stream_dtor_t m_dtor = [](std::ostream*&) {};
};
auto&
get_hsa_api_file()
{
static auto _v = output_file{"hsa_api_trace"};
return _v;
}
auto&
get_kernel_trace_file()
{
static auto _v = output_file{"kernel_trace"};
return _v;
}
std::shared_mutex kernel_data_mutex;
std::unordered_map<rocprofiler_kernel_id_t, std::string> kernel_data;
struct hsa_api_trace_entry_t
{
std::atomic<uint32_t> valid;
rocprofiler_callback_tracing_record_t record;
rocprofiler_timestamp_t begin_timestamp;
rocprofiler_timestamp_t end_timestamp;
std::string_view api_name;
hsa_api_trace_entry_t(rocprofiler_timestamp_t begin,
rocprofiler_timestamp_t end,
rocprofiler_callback_tracing_record_t tracer_record,
std::string_view name
)
: valid(TRACE_ENTRY_INIT)
, record(tracer_record)
, begin_timestamp(begin)
, end_timestamp(end)
, api_name(name)
{}
};
TraceBuffer<hsa_api_trace_entry_t> hsa_api_buffer("HSA API",
0x200000,
[](hsa_api_trace_entry_t* entry) {
TracerFlushRecord(
entry,
ROCPROFILER_CALLBACK_TRACING_HSA_API);
});
rocprofiler_tool_callback_name_info_t name_info;
void
tool_fini(void* tool_data)
{
(void) (tool_data);
}
void
TracerFlushRecord(void* data, rocprofiler_callback_tracing_kind_t kind)
{
if(kind == ROCPROFILER_CALLBACK_TRACING_HSA_API)
{
auto* entry = reinterpret_cast<hsa_api_trace_entry_t*>(data);
get_hsa_api_file() << entry->api_name << " " << entry->begin_timestamp << ":"
<< entry->end_timestamp << " " << entry->record.correlation_id.internal
<< '\n';
}
}
void
rocprofiler_tracing_callback(rocprofiler_callback_tracing_record_t record,
rocprofiler_user_data_t* user_data,
void* data)
{
if(record.kind == ROCPROFILER_CALLBACK_TRACING_HIP_API)
{
// To be implemented
throw std::runtime_error{"not implemented"};
}
else if(record.kind == ROCPROFILER_CALLBACK_TRACING_HSA_API)
{
auto timestamp = rocprofiler_timestamp_t{};
ROCPROFILER_CALL(rocprofiler_get_timestamp(&timestamp), "timestamp failed");
if(record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER)
{
user_data->value = timestamp;
}
else if(record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT)
{
const auto* info_name_str = name_info.operation_names[record.kind][record.operation];
hsa_api_trace_entry_t& entry =
hsa_api_buffer.Emplace(user_data->value, timestamp, record, info_name_str);
entry.valid.store(TRACE_ENTRY_COMPLETE, std::memory_order_release);
}
}
else if(record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_API)
{
// To be implemented
throw std::runtime_error{"not implemented"};
}
(void) (data);
}
void
code_object_tracing_callback(rocprofiler_callback_tracing_record_t record,
rocprofiler_user_data_t* user_data,
void* data)
{
if(record.kind == ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT &&
record.operation == ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT_LOAD)
{
if(record.phase == ROCPROFILER_CALLBACK_PHASE_UNLOAD)
{
// flush the buffer to ensure that any lookups for the client kernel names for the code
// object are completed
auto flush_status = rocprofiler_flush_buffer(tool_buffer);
if(flush_status != ROCPROFILER_STATUS_ERROR_BUFFER_BUSY)
ROCPROFILER_CALL(flush_status, "buffer flush");
}
}
if(record.kind == ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT &&
record.operation == ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT_DEVICE_KERNEL_SYMBOL_REGISTER)
{
auto* sym_data =
static_cast<rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t*>(
record.payload);
if(record.phase == ROCPROFILER_CALLBACK_PHASE_LOAD)
{
auto kernel_name =
std::regex_replace(sym_data->kernel_name, std::regex{"(\\.kd)$"}, "");
int demangle_status = 0;
kernel_name = cxa_demangle(kernel_name, &demangle_status);
std::unique_lock<std::shared_mutex> lock(kernel_data_mutex);
kernel_data.emplace(sym_data->kernel_id, kernel_name);
}
// The map entry cannot be erased here
// since we are tracing the kernel symbols here not the kernel dispatch
// else if(record.phase == ROCPROFILER_CALLBACK_PHASE_UNLOAD)
//{
// kernel_data.erase(data->kernel_id);
//}
}
(void) user_data;
(void) data;
}
void
kernel_tracing_callback(rocprofiler_context_id_t context,
rocprofiler_buffer_id_t buffer_id,
rocprofiler_record_header_t** headers,
size_t num_headers,
void* user_data,
uint64_t /*drop_count*/)
{
if(num_headers == 0)
throw std::runtime_error{
"rocprofiler invoked a buffer callback with no headers. this should never happen"};
else if(headers == nullptr)
throw std::runtime_error{"rocprofiler invoked a buffer callback with a null pointer to the "
"array of headers. this should never happen"};
for(size_t i = 0; i < num_headers; ++i)
{
auto* header = headers[i];
if(header == nullptr)
{
throw std::runtime_error{
"rocprofiler provided a null pointer to header. this should never happen"};
}
else if(header->hash !=
rocprofiler_record_header_compute_hash(header->category, header->kind))
{
throw std::runtime_error{"rocprofiler_record_header_t (category | kind) != hash"};
}
else if(header->category == ROCPROFILER_BUFFER_CATEGORY_TRACING &&
header->kind == ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH)
{
auto* record =
static_cast<rocprofiler_buffer_tracing_kernel_dispatch_record_t*>(header->payload);
std::string kernel_name;
{
std::shared_lock<std::shared_mutex> lock(kernel_data_mutex);
kernel_name = kernel_data.at(record->kernel_id);
}
get_kernel_trace_file()
<< "agent_id=" << record->agent_id.handle
<< ", queue_id=" << record->queue_id.handle << ", kernel_id=" << record->kernel_id
<< ", kernel=" << kernel_name << ", context=" << context.handle
<< ", buffer_id=" << buffer_id.handle << ", cid=" << record->correlation_id.internal
<< ", extern_cid=" << record->correlation_id.external.value
<< ", kind=" << record->kind << ", start=" << record->start_timestamp
<< ", stop=" << record->end_timestamp
<< ", private_segment_size=" << record->private_segment_size
<< ", group_segment_size=" << record->group_segment_size << ", workgroup_size=("
<< record->workgroup_size.x << "," << record->workgroup_size.y << ","
<< record->workgroup_size.z << "), grid_size=(" << record->grid_size.x << ","
<< record->grid_size.y << "," << record->grid_size.z << ")" << '\n';
}
}
(void) (user_data);
}
rocprofiler_tool_callback_name_info_t
get_callback_id_names()
{
auto cb_name_info = rocprofiler_tool_callback_name_info_t{};
//
// callback for each kind operation
//
static auto tracing_kind_operation_cb =
[](rocprofiler_callback_tracing_kind_t kindv, uint32_t operation, void* data_v) {
auto* name_info_v = static_cast<rocprofiler_tool_callback_name_info_t*>(data_v);
if(kindv == ROCPROFILER_CALLBACK_TRACING_HSA_API)
{
const char* name = nullptr;
ROCPROFILER_CALL(rocprofiler_query_callback_tracing_kind_operation_name(
kindv, operation, &name, nullptr),
"query callback failed");
if(name) name_info_v->operation_names[kindv][operation] = name;
}
if(kindv == ROCPROFILER_CALLBACK_TRACING_HIP_API)
{
const char* name = nullptr;
ROCPROFILER_CALL(rocprofiler_query_callback_tracing_kind_operation_name(
kindv, operation, &name, nullptr),
"query callback failed");
if(name) name_info_v->operation_names[kindv][operation] = name;
}
return 0;
};
//
// callback for each callback kind (i.e. domain)
//
static auto tracing_kind_cb = [](rocprofiler_callback_tracing_kind_t kind, void* data) {
// store the callback kind name
auto* name_info_v = static_cast<rocprofiler_tool_callback_name_info_t*>(data);
const char* name = nullptr;
ROCPROFILER_CALL(rocprofiler_query_callback_tracing_kind_name(kind, &name, nullptr),
"query callback failed");
if(name) name_info_v->kind_names[kind] = name;
if(kind == ROCPROFILER_CALLBACK_TRACING_HSA_API)
{
ROCPROFILER_CALL(rocprofiler_iterate_callback_tracing_kind_operations(
kind, tracing_kind_operation_cb, static_cast<void*>(data)),
"query callback failed");
}
return 0;
};
ROCPROFILER_CALL(rocprofiler_iterate_callback_tracing_kinds(tracing_kind_cb,
static_cast<void*>(&cb_name_info)),
"iterate_callback failed");
return cb_name_info;
}
int
tool_init(rocprofiler_client_finalize_t /*fini_func*/, void* tool_data)
{
// Add the rocporfiler_call macro
rocprofiler_context_id_t context_id;
name_info = get_callback_id_names();
ROCPROFILER_CALL(rocprofiler_create_context(&context_id), "create context failed");
if(common::get_env("ROCPROFILER_KERNEL_TRACE", false))
{
ROCPROFILER_CALL(
rocprofiler_configure_callback_tracing_service(context_id,
ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT,
nullptr,
0,
code_object_tracing_callback,
nullptr),
"tracing configure failed");
ROCPROFILER_CALL(rocprofiler_create_buffer(context_id,
4096,
2048,
ROCPROFILER_BUFFER_POLICY_LOSSLESS,
kernel_tracing_callback,
tool_data,
&tool_buffer),
"buffer creation");
ROCPROFILER_CALL(
rocprofiler_configure_buffer_tracing_service(
context_id, ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH, nullptr, 0, tool_buffer),
"buffer tracing service for kernel dispatch configure");
}
if(common::get_env("ROCPROFILER_HSA_API_TRACE", false))
{
// Requesting all operations
ROCPROFILER_CALL(
rocprofiler_configure_callback_tracing_service(context_id,
ROCPROFILER_CALLBACK_TRACING_HSA_API,
nullptr,
0,
rocprofiler_tracing_callback,
nullptr),
"tracing configure failed");
}
ROCPROFILER_CALL(rocprofiler_start_context(context_id), "start context failed");
return 0;
}
extern "C" rocprofiler_tool_configure_result_t*
rocprofiler_configure(uint32_t /*version*/,
const char* /*runtime_version*/,
uint32_t priority,
rocprofiler_client_id_t* id)
{
// only activate if main tool
if(priority > 0) return nullptr;
// set the client name
id->name = "rocporfiler-tool";
// store client info
// client::client_id = id;
// create configure data
static auto cfg = rocprofiler_tool_configure_result_t{
sizeof(rocprofiler_tool_configure_result_t), &tool_init, &tool_fini, nullptr};
// return pointer to configure data
return &cfg;
// data passed around all the callbacks
}