7b6d3c70bd
* Moved tests/apps to tests/bin * Renamed cmake project in tests/bin * Update samples - Use ROCPROFILER_DEFAULT_FAIL_REGEX - tweaks to stdout messages * Update tests - Use ROCPROFILER_DEFAULT_FAIL_REGEX * Add tests/lib - libraries with HIP code * Update PTL submodule - remove atexit delete of thread_id_map * Update cmake/rocprofiler_options.cmake - Set ROCPROFILER_DEFAULT_FAIL_REGEX * Update common lib: env + logging - improved customization of logging settings - default to disabling logging to files - install failure handler for rocprofv3 - set_env support in environment.* * Add lib/rocprofiler-sdk/shared_library.cpp - shared library constructor * Update lib/rocprofiler-sdk-tool/tool.cpp - destructor thread safety - convert callback_name_info and buffered_name_info to pointers - install failure handler for logging * Add tests/bin/hip-in-libraries - hip-in-libraries is an exe which uses two shared libraries where each shared library contains HIP kernels - used for testing deadlocking within __hipRegisterFatBinary * Update bin/rocprofv3 - reorganized the env variables - use exec to launch command - set ROCPROFILER_LIBRARY_CTOR=1 * Add tests/rocprofv3/tracing-hip-in-libraries - uses hip-in-libraries exe for exe which uses shared libraries to launch HIP kernels * Update bin/rocprofv3 - fix counter collection (no exec) * Update lib/rocprofiler-sdk-tool/tool.cpp - replace "Kernel-Name" with "Kernel_Name" * Update lib/rocprofiler-sdk/registration.cpp Use RTLD_LOCAL instead of RTLD_GLOBAL for env libraries * Update tests/rocprofv3 - replace "Kernel-Name" with "Kernel_Name" * Update tests - vector-ops (bin) stream syncs + runs with 4 queues per device - improve counter-collection/input1 validation - rocprofv3/tracing-hip-in-libraries does not do sys-trace - improved validation script for tracing-hip-in-libraries - updated dispatch_callback in json-tool.cpp following reworking of prototypes for counter collection * Update samples/counter_collection - updated dispatch_callback(s) and record_callback(s) following reworking of prototypes * Update bin/rocprofv3 - reorganized help menu - added options for sub-HSA tables - added --hip-runtime-trace - changed --hip-trace to include --hip-compiler-trace * Update lib/rocprofiler-sdk-tool - improved kernel filtering - removed arch_vgpr, accum_vgpr, sgpr code (in rocprofiler-sdk) - fixed issue with counter-collection w/o tracing - added support for fine grained HSA API tracing - removed directly linking to HSA-runtime * Update lib/rocprofiler-sdk/agent.cpp - rocp_agents != hsa_agents is non-fatal when ROCPROFILER_BUILD_CI=OFF (CMake option) * GPR (vector and scalar) info in kernel symbol data - rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t contains general purpose register info * Header include order fix - Include repo headers first - Third party library headers next - standard library headers last * Update dispatch profiling public API - introduce rocprofiler_profile_counting_dispatch_data_t - change signature of rocprofiler_profile_counting_dispatch_callback_t and rocprofiler_profile_counting_record_callback_t - provide rocprofiler_user_data_t pointer in dispatch callback - provide rocprofiler_user_data_t value (from dispatch cb) in record callback * Update tests/bin/CMakeLists.txt - fix add_subdirectory(hip-in-libraries) order * Update VERSION - bump to 0.2.0 in prep for AFAR
285 satır
11 KiB
C++
285 satır
11 KiB
C++
// MIT License
|
|
//
|
|
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
|
//
|
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
// of this software and associated documentation files (the "Software"), to deal
|
|
// in the Software without restriction, including without limitation the rights
|
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
// copies of the Software, and to permit persons to whom the Software is
|
|
// furnished to do so, subject to the following conditions:
|
|
//
|
|
// The above copyright notice and this permission notice shall be included in all
|
|
// copies or substantial portions of the Software.
|
|
//
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
// SOFTWARE.
|
|
|
|
#include "client.hpp"
|
|
|
|
#include <fstream>
|
|
#include <functional>
|
|
#include <iostream>
|
|
#include <mutex>
|
|
#include <set>
|
|
#include <shared_mutex>
|
|
#include <sstream>
|
|
#include <unordered_map>
|
|
#include <vector>
|
|
|
|
#include <rocprofiler-sdk/fwd.h>
|
|
#include <rocprofiler-sdk/registration.h>
|
|
#include <rocprofiler-sdk/rocprofiler.h>
|
|
|
|
#define ROCPROFILER_CALL(result, msg) \
|
|
{ \
|
|
rocprofiler_status_t CHECKSTATUS = result; \
|
|
if(CHECKSTATUS != ROCPROFILER_STATUS_SUCCESS) \
|
|
{ \
|
|
std::string status_msg = rocprofiler_get_status_string(CHECKSTATUS); \
|
|
std::cerr << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " << msg \
|
|
<< " failed with error code " << CHECKSTATUS << ": " << status_msg \
|
|
<< std::endl; \
|
|
std::stringstream errmsg{}; \
|
|
errmsg << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " << msg " failure (" \
|
|
<< status_msg << ")"; \
|
|
throw std::runtime_error(errmsg.str()); \
|
|
} \
|
|
}
|
|
|
|
int
|
|
start()
|
|
{
|
|
return 1;
|
|
}
|
|
|
|
namespace
|
|
{
|
|
rocprofiler_context_id_t&
|
|
get_client_ctx()
|
|
{
|
|
static rocprofiler_context_id_t ctx;
|
|
return ctx;
|
|
}
|
|
|
|
rocprofiler_buffer_id_t&
|
|
get_buffer()
|
|
{
|
|
static rocprofiler_buffer_id_t buf = {};
|
|
return buf;
|
|
}
|
|
|
|
/**
|
|
* Buffer callback called when the buffer is full. rocprofiler_record_header_t
|
|
* can contain counter records as well as other records (such as tracing). These
|
|
* records need to be filtered based on the category type.
|
|
*/
|
|
void
|
|
buffered_callback(rocprofiler_context_id_t,
|
|
rocprofiler_buffer_id_t,
|
|
rocprofiler_record_header_t** headers,
|
|
size_t num_headers,
|
|
void* user_data,
|
|
uint64_t)
|
|
{
|
|
static int enter_count = 0;
|
|
enter_count++;
|
|
if(enter_count % 100 != 0) return;
|
|
std::stringstream ss;
|
|
// Iterate through the returned records
|
|
for(size_t i = 0; i < num_headers; ++i)
|
|
{
|
|
auto* header = headers[i];
|
|
if(header->category == ROCPROFILER_BUFFER_CATEGORY_COUNTERS && header->kind == 0)
|
|
{
|
|
// Print the returned counter data.
|
|
auto* record = static_cast<rocprofiler_record_counter_t*>(header->payload);
|
|
ss << "(Id: " << record->id << " Value [D]: " << record->counter_value
|
|
<< " Corr_Id: " << record->correlation_id.internal << "),";
|
|
}
|
|
}
|
|
|
|
auto* output_stream = static_cast<std::ostream*>(user_data);
|
|
if(!output_stream) throw std::runtime_error{"nullptr to output stream"};
|
|
|
|
*output_stream << "[" << __FUNCTION__ << "] " << ss.str() << "\n";
|
|
}
|
|
|
|
/**
|
|
* Callback from rocprofiler when an kernel dispatch is enqueued into the HSA queue.
|
|
* rocprofiler_profile_config_id_t* is a return to specify what counters to collect
|
|
* for this dispatch (dispatch_packet). This example function creates a profile
|
|
* to collect the counter SQ_WAVES for all kernel dispatch packets.
|
|
*/
|
|
void
|
|
dispatch_callback(rocprofiler_profile_counting_dispatch_data_t dispatch_data,
|
|
rocprofiler_profile_config_id_t* config,
|
|
rocprofiler_user_data_t* /*user_data*/,
|
|
void* /*callback_data_args*/)
|
|
{
|
|
/**
|
|
* This simple example uses the same profile counter set for all agents.
|
|
* We store this in a cache to prevent constructing many identical profile counter
|
|
* sets. We first check the cache to see if we have already constructed a counter"
|
|
* set for the agent. If we have, return it. Otherwise, construct a new profile counter
|
|
* set.
|
|
*/
|
|
static std::shared_mutex m_mutex = {};
|
|
static std::unordered_map<uint64_t, rocprofiler_profile_config_id_t> profile_cache = {};
|
|
|
|
auto search_cache = [&]() {
|
|
if(auto pos = profile_cache.find(dispatch_data.agent_id.handle); pos != profile_cache.end())
|
|
{
|
|
*config = pos->second;
|
|
return true;
|
|
}
|
|
return false;
|
|
};
|
|
|
|
{
|
|
auto rlock = std::shared_lock{m_mutex};
|
|
if(search_cache()) return;
|
|
}
|
|
|
|
auto wlock = std::unique_lock{m_mutex};
|
|
if(search_cache()) return;
|
|
|
|
// Counters we want to collect (here its SQ_WAVES)
|
|
std::set<std::string> counters_to_collect = {"SQ_WAVES"};
|
|
// GPU Counter IDs
|
|
std::vector<rocprofiler_counter_id_t> gpu_counters;
|
|
|
|
// Iterate through the agents and get the counters available on that agent
|
|
ROCPROFILER_CALL(rocprofiler_iterate_agent_supported_counters(
|
|
dispatch_data.agent_id,
|
|
[](rocprofiler_agent_id_t,
|
|
rocprofiler_counter_id_t* counters,
|
|
size_t num_counters,
|
|
void* user_data) {
|
|
std::vector<rocprofiler_counter_id_t>* vec =
|
|
static_cast<std::vector<rocprofiler_counter_id_t>*>(user_data);
|
|
for(size_t i = 0; i < num_counters; i++)
|
|
{
|
|
vec->push_back(counters[i]);
|
|
}
|
|
return ROCPROFILER_STATUS_SUCCESS;
|
|
},
|
|
static_cast<void*>(&gpu_counters)),
|
|
"Could not fetch supported counters");
|
|
|
|
std::vector<rocprofiler_counter_id_t> collect_counters;
|
|
// Look for the counters contained in counters_to_collect in gpu_counters
|
|
for(auto& counter : gpu_counters)
|
|
{
|
|
rocprofiler_counter_info_v0_t version;
|
|
ROCPROFILER_CALL(
|
|
rocprofiler_query_counter_info(
|
|
counter, ROCPROFILER_COUNTER_INFO_VERSION_0, static_cast<void*>(&version)),
|
|
"Could not query info for counter");
|
|
if(counters_to_collect.count(std::string(version.name)) > 0)
|
|
{
|
|
std::clog << "Counter: " << counter.handle << " " << version.name << "\n";
|
|
collect_counters.push_back(counter);
|
|
}
|
|
}
|
|
|
|
// Create a colleciton profile for the counters
|
|
rocprofiler_profile_config_id_t profile;
|
|
ROCPROFILER_CALL(
|
|
rocprofiler_create_profile_config(
|
|
dispatch_data.agent_id, collect_counters.data(), collect_counters.size(), &profile),
|
|
"Could not construct profile cfg");
|
|
|
|
profile_cache.emplace(dispatch_data.agent_id.handle, profile);
|
|
// Return the profile to collect those counters for this dispatch
|
|
*config = profile;
|
|
}
|
|
|
|
int
|
|
tool_init(rocprofiler_client_finalize_t, void* user_data)
|
|
{
|
|
ROCPROFILER_CALL(rocprofiler_create_context(&get_client_ctx()), "context creation failed");
|
|
|
|
ROCPROFILER_CALL(rocprofiler_create_buffer(get_client_ctx(),
|
|
4096,
|
|
2048,
|
|
ROCPROFILER_BUFFER_POLICY_LOSSLESS,
|
|
buffered_callback,
|
|
user_data,
|
|
&get_buffer()),
|
|
"buffer creation failed");
|
|
|
|
auto client_thread = rocprofiler_callback_thread_t{};
|
|
ROCPROFILER_CALL(rocprofiler_create_callback_thread(&client_thread),
|
|
"failure creating callback thread");
|
|
ROCPROFILER_CALL(rocprofiler_assign_callback_thread(get_buffer(), client_thread),
|
|
"failed to assign thread for buffer");
|
|
ROCPROFILER_CALL(rocprofiler_configure_buffered_dispatch_profile_counting_service(
|
|
get_client_ctx(), get_buffer(), dispatch_callback, nullptr),
|
|
"Could not setup buffered service");
|
|
ROCPROFILER_CALL(rocprofiler_start_context(get_client_ctx()), "start context");
|
|
|
|
// no errors
|
|
return 0;
|
|
}
|
|
|
|
void
|
|
tool_fini(void* user_data)
|
|
{
|
|
std::clog << "In tool fini\n";
|
|
ROCPROFILER_CALL(rocprofiler_flush_buffer(get_buffer()), "buffer flush");
|
|
rocprofiler_stop_context(get_client_ctx());
|
|
|
|
auto* output_stream = static_cast<std::ostream*>(user_data);
|
|
*output_stream << std::flush;
|
|
if(output_stream != &std::cout && output_stream != &std::cerr) delete output_stream;
|
|
}
|
|
} // namespace
|
|
|
|
extern "C" rocprofiler_tool_configure_result_t*
|
|
rocprofiler_configure(uint32_t version,
|
|
const char* runtime_version,
|
|
uint32_t,
|
|
rocprofiler_client_id_t* id)
|
|
{
|
|
// set the client name
|
|
id->name = "CounterClientSample";
|
|
|
|
// compute major/minor/patch version info
|
|
uint32_t major = version / 10000;
|
|
uint32_t minor = (version % 10000) / 100;
|
|
uint32_t patch = version % 100;
|
|
|
|
// generate info string
|
|
auto info = std::stringstream{};
|
|
info << id->name << " is using rocprofiler-sdk v" << major << "." << minor << "." << patch
|
|
<< " (" << runtime_version << ")";
|
|
|
|
std::clog << info.str() << std::endl;
|
|
|
|
std::ostream* output_stream = nullptr;
|
|
std::string filename = "counter_collection.log";
|
|
if(auto* outfile = getenv("ROCPROFILER_SAMPLE_OUTPUT_FILE"); outfile) filename = outfile;
|
|
if(filename == "stdout")
|
|
output_stream = &std::cout;
|
|
else if(filename == "stderr")
|
|
output_stream = &std::cerr;
|
|
else
|
|
output_stream = new std::ofstream{filename};
|
|
|
|
// create configure data
|
|
static auto cfg =
|
|
rocprofiler_tool_configure_result_t{sizeof(rocprofiler_tool_configure_result_t),
|
|
&tool_init,
|
|
&tool_fini,
|
|
static_cast<void*>(output_stream)};
|
|
|
|
// return pointer to configure data
|
|
return &cfg;
|
|
}
|