[2/N] Agent Counter implementation with unit tests to check functionality (#846)
Agent Counter Collection API with tests and samples. --------- Co-authored-by: Benjamin Welton <ben@amd.com>
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
7eeb7376b4
Коммит
28e6430d04
@@ -106,3 +106,29 @@ set_tests_properties(
|
||||
PROPERTIES TIMEOUT 120 LABELS "samples" ENVIRONMENT
|
||||
"${counter-collection-functional-counter-env}" FAIL_REGULAR_EXPRESSION
|
||||
"${ROCPROFILER_DEFAULT_FAIL_REGEX}")
|
||||
|
||||
add_library(counter-collection-agent-profiling-client SHARED)
|
||||
target_sources(counter-collection-agent-profiling-client PRIVATE agent_profiling.cpp
|
||||
client.hpp)
|
||||
target_link_libraries(
|
||||
counter-collection-agent-profiling-client
|
||||
PUBLIC rocprofiler::samples-build-flags
|
||||
PRIVATE rocprofiler-sdk::rocprofiler-sdk rocprofiler::samples-common-library)
|
||||
|
||||
add_executable(counter-collection-agent-profiling)
|
||||
target_sources(counter-collection-agent-profiling PRIVATE main.cpp)
|
||||
target_link_libraries(counter-collection-agent-profiling
|
||||
PRIVATE counter-collection-agent-profiling-client Threads::Threads)
|
||||
|
||||
rocprofiler_samples_get_preload_env(PRELOAD_ENV counter-collection-agent-profiling-client)
|
||||
|
||||
set(counter-collection-functional-counter-env "${PRELOAD_ENV}" "${LIBRARY_PATH_ENV}")
|
||||
|
||||
add_test(NAME counter-collection-agent-profiling
|
||||
COMMAND $<TARGET_FILE:counter-collection-agent-profiling>)
|
||||
|
||||
set_tests_properties(
|
||||
counter-collection-agent-profiling
|
||||
PROPERTIES TIMEOUT 120 LABELS "samples" ENVIRONMENT
|
||||
"${counter-collection-functional-counter-env}" FAIL_REGULAR_EXPRESSION
|
||||
"${ROCPROFILER_DEFAULT_FAIL_REGEX}")
|
||||
|
||||
@@ -0,0 +1,347 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include "client.hpp"
|
||||
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <fstream>
|
||||
#include <functional>
|
||||
#include <iostream>
|
||||
#include <mutex>
|
||||
#include <set>
|
||||
#include <shared_mutex>
|
||||
#include <sstream>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
#include <rocprofiler-sdk/registration.h>
|
||||
#include <rocprofiler-sdk/rocprofiler.h>
|
||||
|
||||
#define ROCPROFILER_CALL(result, msg) \
|
||||
{ \
|
||||
rocprofiler_status_t CHECKSTATUS = result; \
|
||||
if(CHECKSTATUS != ROCPROFILER_STATUS_SUCCESS) \
|
||||
{ \
|
||||
std::string status_msg = rocprofiler_get_status_string(CHECKSTATUS); \
|
||||
std::cerr << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " << msg \
|
||||
<< " failed with error code " << CHECKSTATUS << ": " << status_msg \
|
||||
<< std::endl; \
|
||||
std::stringstream errmsg{}; \
|
||||
errmsg << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " << msg " failure (" \
|
||||
<< status_msg << ")"; \
|
||||
throw std::runtime_error(errmsg.str()); \
|
||||
} \
|
||||
}
|
||||
|
||||
int
|
||||
start()
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
rocprofiler_context_id_t&
|
||||
get_client_ctx()
|
||||
{
|
||||
static rocprofiler_context_id_t ctx;
|
||||
return ctx;
|
||||
}
|
||||
|
||||
rocprofiler_buffer_id_t&
|
||||
get_buffer()
|
||||
{
|
||||
static rocprofiler_buffer_id_t buf = {};
|
||||
return buf;
|
||||
}
|
||||
|
||||
/**
|
||||
* Buffer callback called when the buffer is full. rocprofiler_record_header_t
|
||||
* can contain counter records as well as other records (such as tracing). These
|
||||
* records need to be filtered based on the category type.
|
||||
*/
|
||||
void
|
||||
buffered_callback(rocprofiler_context_id_t,
|
||||
rocprofiler_buffer_id_t,
|
||||
rocprofiler_record_header_t** headers,
|
||||
size_t num_headers,
|
||||
void* user_data,
|
||||
uint64_t)
|
||||
{
|
||||
std::stringstream ss;
|
||||
// Iterate through the returned records
|
||||
for(size_t i = 0; i < num_headers; ++i)
|
||||
{
|
||||
auto* header = headers[i];
|
||||
if(header->category == ROCPROFILER_BUFFER_CATEGORY_COUNTERS &&
|
||||
header->kind == ROCPROFILER_COUNTER_RECORD_PROFILE_COUNTING_DISPATCH_HEADER)
|
||||
{}
|
||||
else if(header->category == ROCPROFILER_BUFFER_CATEGORY_COUNTERS &&
|
||||
header->kind == ROCPROFILER_COUNTER_RECORD_VALUE)
|
||||
{
|
||||
// Print the returned counter data.
|
||||
auto* record = static_cast<rocprofiler_record_counter_t*>(header->payload);
|
||||
ss << " (Id: " << record->id << " Value [D]: " << record->counter_value << ","
|
||||
<< " user_data: " << record->user_data.value << "),";
|
||||
}
|
||||
}
|
||||
|
||||
auto* output_stream = static_cast<std::ostream*>(user_data);
|
||||
if(!output_stream) throw std::runtime_error{"nullptr to output stream"};
|
||||
|
||||
*output_stream << "[" << __FUNCTION__ << "] " << ss.str() << "\n";
|
||||
}
|
||||
|
||||
std::unordered_map<uint64_t, rocprofiler_profile_config_id_t>&
|
||||
get_profile_cache()
|
||||
{
|
||||
static std::unordered_map<uint64_t, rocprofiler_profile_config_id_t> profile_cache;
|
||||
return profile_cache;
|
||||
}
|
||||
|
||||
/**
|
||||
* Callback from rocprofiler when an kernel dispatch is enqueued into the HSA queue.
|
||||
* rocprofiler_profile_config_id_t* is a return to specify what counters to collect
|
||||
* for this dispatch (dispatch_packet). This example function creates a profile
|
||||
* to collect the counter SQ_WAVES for all kernel dispatch packets.
|
||||
*/
|
||||
void
|
||||
set_profile(rocprofiler_context_id_t context_id,
|
||||
rocprofiler_agent_id_t agent,
|
||||
rocprofiler_agent_set_profile_callback_t set_config,
|
||||
void*)
|
||||
{
|
||||
/**
|
||||
* This simple example uses the same profile counter set for all agents.
|
||||
* We store this in a cache to prevent constructing many identical profile counter
|
||||
* sets. We first check the cache to see if we have already constructed a counter"
|
||||
* set for the agent. If we have, return it. Otherwise, construct a new profile counter
|
||||
* set.
|
||||
*/
|
||||
auto search_cache = [&]() {
|
||||
if(auto pos = get_profile_cache().find(agent.handle); pos != get_profile_cache().end())
|
||||
{
|
||||
set_config(context_id, pos->second);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
if(!search_cache())
|
||||
{
|
||||
std::cerr << "No profile for agent found in cache\n";
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
rocprofiler_profile_config_id_t
|
||||
build_profile_for_agent(rocprofiler_agent_id_t agent)
|
||||
{
|
||||
std::set<std::string> counters_to_collect = {"SQ_WAVES"};
|
||||
std::vector<rocprofiler_counter_id_t> gpu_counters;
|
||||
|
||||
ROCPROFILER_CALL(rocprofiler_iterate_agent_supported_counters(
|
||||
agent,
|
||||
[](rocprofiler_agent_id_t,
|
||||
rocprofiler_counter_id_t* counters,
|
||||
size_t num_counters,
|
||||
void* user_data) {
|
||||
std::vector<rocprofiler_counter_id_t>* vec =
|
||||
static_cast<std::vector<rocprofiler_counter_id_t>*>(user_data);
|
||||
for(size_t i = 0; i < num_counters; i++)
|
||||
{
|
||||
vec->push_back(counters[i]);
|
||||
}
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
},
|
||||
static_cast<void*>(&gpu_counters)),
|
||||
"Could not fetch supported counters");
|
||||
|
||||
std::vector<rocprofiler_counter_id_t> collect_counters;
|
||||
for(auto& counter : gpu_counters)
|
||||
{
|
||||
rocprofiler_counter_info_v0_t version;
|
||||
ROCPROFILER_CALL(
|
||||
rocprofiler_query_counter_info(
|
||||
counter, ROCPROFILER_COUNTER_INFO_VERSION_0, static_cast<void*>(&version)),
|
||||
"Could not query info for counter");
|
||||
if(counters_to_collect.count(std::string(version.name)) > 0)
|
||||
{
|
||||
std::clog << "Counter: " << counter.handle << " " << version.name << "\n";
|
||||
collect_counters.push_back(counter);
|
||||
}
|
||||
}
|
||||
|
||||
rocprofiler_profile_config_id_t profile;
|
||||
ROCPROFILER_CALL(rocprofiler_create_profile_config(
|
||||
agent, collect_counters.data(), collect_counters.size(), &profile),
|
||||
"Could not construct profile cfg");
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
std::atomic<bool>&
|
||||
exit_toggle()
|
||||
{
|
||||
static std::atomic<bool> exit_toggle = false;
|
||||
return exit_toggle;
|
||||
}
|
||||
|
||||
int
|
||||
tool_init(rocprofiler_client_finalize_t, void* user_data)
|
||||
{
|
||||
ROCPROFILER_CALL(rocprofiler_create_context(&get_client_ctx()), "context creation failed");
|
||||
|
||||
ROCPROFILER_CALL(rocprofiler_create_buffer(get_client_ctx(),
|
||||
4096,
|
||||
2048,
|
||||
ROCPROFILER_BUFFER_POLICY_LOSSLESS,
|
||||
buffered_callback,
|
||||
user_data,
|
||||
&get_buffer()),
|
||||
"buffer creation failed");
|
||||
|
||||
std::vector<rocprofiler_agent_v0_t> agents;
|
||||
rocprofiler_query_available_agents_cb_t iterate_cb = [](rocprofiler_agent_version_t agents_ver,
|
||||
const void** agents_arr,
|
||||
size_t num_agents,
|
||||
void* udata) {
|
||||
if(agents_ver != ROCPROFILER_AGENT_INFO_VERSION_0)
|
||||
throw std::runtime_error{"unexpected rocprofiler agent version"};
|
||||
auto* agents_v = static_cast<std::vector<rocprofiler_agent_v0_t>*>(udata);
|
||||
for(size_t i = 0; i < num_agents; ++i)
|
||||
agents_v->emplace_back(*static_cast<const rocprofiler_agent_v0_t*>(agents_arr[i]));
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
};
|
||||
|
||||
ROCPROFILER_CALL(
|
||||
rocprofiler_query_available_agents(ROCPROFILER_AGENT_INFO_VERSION_0,
|
||||
iterate_cb,
|
||||
sizeof(rocprofiler_agent_t),
|
||||
const_cast<void*>(static_cast<const void*>(&agents))),
|
||||
"query available agents");
|
||||
|
||||
auto client_thread = rocprofiler_callback_thread_t{};
|
||||
ROCPROFILER_CALL(rocprofiler_create_callback_thread(&client_thread),
|
||||
"failure creating callback thread");
|
||||
ROCPROFILER_CALL(rocprofiler_assign_callback_thread(get_buffer(), client_thread),
|
||||
"failed to assign thread for buffer");
|
||||
|
||||
// Construct the profiles in advance for each agent that is a GPU
|
||||
rocprofiler_agent_id_t agent_id;
|
||||
for(const auto& agent : agents)
|
||||
{
|
||||
if(agent.type == ROCPROFILER_AGENT_TYPE_GPU)
|
||||
{
|
||||
get_profile_cache().emplace(agent.id.handle, build_profile_for_agent(agent.id));
|
||||
agent_id = agent.id;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if(agents.empty())
|
||||
{
|
||||
std::cerr << "No agents found" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
ROCPROFILER_CALL(rocprofiler_configure_agent_profile_counting_service(
|
||||
get_client_ctx(), get_buffer(), agent_id, set_profile, nullptr),
|
||||
"Could not setup buffered service");
|
||||
|
||||
std::thread([=]() {
|
||||
size_t count = 1;
|
||||
rocprofiler_start_context(get_client_ctx());
|
||||
while(exit_toggle().load() == false)
|
||||
{
|
||||
rocprofiler_sample_agent_profile_counting_service(
|
||||
get_client_ctx(), {.value = count}, ROCPROFILER_COUNTER_FLAG_NONE);
|
||||
count++;
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(50));
|
||||
}
|
||||
exit_toggle().store(false);
|
||||
}).detach();
|
||||
|
||||
// no errors
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
tool_fini(void* user_data)
|
||||
{
|
||||
exit_toggle().store(true);
|
||||
while(exit_toggle().load() == true)
|
||||
{};
|
||||
|
||||
std::clog << "In tool fini\n";
|
||||
rocprofiler_stop_context(get_client_ctx());
|
||||
ROCPROFILER_CALL(rocprofiler_flush_buffer(get_buffer()), "buffer flush");
|
||||
|
||||
auto* output_stream = static_cast<std::ostream*>(user_data);
|
||||
*output_stream << std::flush;
|
||||
if(output_stream != &std::cout && output_stream != &std::cerr) delete output_stream;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
extern "C" rocprofiler_tool_configure_result_t*
|
||||
rocprofiler_configure(uint32_t version,
|
||||
const char* runtime_version,
|
||||
uint32_t priority,
|
||||
rocprofiler_client_id_t* id)
|
||||
{
|
||||
// set the client name
|
||||
id->name = "CounterClientSample";
|
||||
|
||||
// compute major/minor/patch version info
|
||||
uint32_t major = version / 10000;
|
||||
uint32_t minor = (version % 10000) / 100;
|
||||
uint32_t patch = version % 100;
|
||||
|
||||
// generate info string
|
||||
auto info = std::stringstream{};
|
||||
info << id->name << " (priority=" << priority << ") is using rocprofiler-sdk v" << major << "."
|
||||
<< minor << "." << patch << " (" << runtime_version << ")";
|
||||
|
||||
std::clog << info.str() << std::endl;
|
||||
|
||||
std::ostream* output_stream = nullptr;
|
||||
std::string filename = "counter_collection.log";
|
||||
if(auto* outfile = getenv("ROCPROFILER_SAMPLE_OUTPUT_FILE"); outfile) filename = outfile;
|
||||
if(filename == "stdout")
|
||||
output_stream = &std::cout;
|
||||
else if(filename == "stderr")
|
||||
output_stream = &std::cerr;
|
||||
else
|
||||
output_stream = new std::ofstream{filename};
|
||||
|
||||
// create configure data
|
||||
static auto cfg =
|
||||
rocprofiler_tool_configure_result_t{sizeof(rocprofiler_tool_configure_result_t),
|
||||
&tool_init,
|
||||
&tool_fini,
|
||||
static_cast<void*>(output_stream)};
|
||||
|
||||
// return pointer to configure data
|
||||
return &cfg;
|
||||
}
|
||||
@@ -25,36 +25,98 @@
|
||||
#include <rocprofiler-sdk/defines.h>
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
|
||||
ROCPROFILER_EXTERN_C_INIT
|
||||
|
||||
/**
|
||||
* @defgroup AGENT_PROFILE_COUNTING_SERVICE Agent Profile Counting Service
|
||||
* @brief needs brief description
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
ROCPROFILER_EXTERN_C_INIT
|
||||
|
||||
/**
|
||||
* @brief Configure Profile Counting Service for agent.
|
||||
* @brief Callback to set the profile config for the agent.
|
||||
*
|
||||
* @param [in] context_id context id
|
||||
* @param [in] buffer_id id of the buffer to use for the counting service
|
||||
* @param [in] config_id Profile config detailing the counters to collect for this kernel
|
||||
* @return ::rocprofiler_status_t
|
||||
* @retval ::ROCPROFILER_STATUS_ERROR_PROFILE_NOT_FOUND Returned if the config_id is not found
|
||||
* @retval ::ROCPROFILER_STATUS_ERROR_CONTEXT_INVALID Returned if the ctx is not valid
|
||||
* @retval ::ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED Returned if attempting to make this
|
||||
* call outside of context startup.
|
||||
* @retval ::ROCPROFILER_STATUS_ERROR_AGENT_MISMATCH Agent of profile does not match agent of the
|
||||
* context.
|
||||
* @retval ::ROCPROFILER_STATUS_SUCCESS Returned if succesfully configured
|
||||
*/
|
||||
rocprofiler_status_t ROCPROFILER_API
|
||||
rocprofiler_configure_agent_profile_counting_service(rocprofiler_context_id_t context_id,
|
||||
rocprofiler_buffer_id_t buffer_id,
|
||||
rocprofiler_profile_config_id_t config_id);
|
||||
typedef rocprofiler_status_t (*rocprofiler_agent_set_profile_callback_t)(
|
||||
rocprofiler_context_id_t context_id,
|
||||
rocprofiler_profile_config_id_t config_id);
|
||||
|
||||
/**
|
||||
* @brief Sample Profile Counting Service for agent.
|
||||
* @brief Configure Profile Counting Service for agent. Called when the context is started.
|
||||
* Selects the counters to be used for agent profiling.
|
||||
*
|
||||
* @param [out] data // It is always a size of one
|
||||
* @return ::rocprofiler_status_t
|
||||
* @param [in] context_id context id
|
||||
* @param [in] agent_id agent id
|
||||
* @param [in] set_config Function to call to set the profile config (see
|
||||
* rocprofiler_agent_set_profile_callback_t)
|
||||
* @param [in] user_data Data supplied to rocprofiler_configure_agent_profile_counting_service
|
||||
*/
|
||||
rocprofiler_status_t ROCPROFILER_API
|
||||
rocprofiler_sample_agent_profile_counting_service(rocprofiler_context_id_t context_id);
|
||||
typedef void (*rocprofiler_agent_profile_callback_t)(
|
||||
rocprofiler_context_id_t context_id,
|
||||
rocprofiler_agent_id_t agent_id,
|
||||
rocprofiler_agent_set_profile_callback_t set_config,
|
||||
void* user_data);
|
||||
|
||||
/**
|
||||
* @brief Configure Profile Counting Service for agent. There may only be one agent profile
|
||||
* configured per context and can be only one active context that is profiling a single agent
|
||||
* at a time. Multiple agent contexts can be started at the same time if they are profiling
|
||||
* different agents.
|
||||
*
|
||||
* @param [in] context_id context id
|
||||
* @param [in] buffer_id id of the buffer to use for the counting service. When
|
||||
* rocprofiler_sample_agent_profile_counting_service is called, counter data will be written
|
||||
* to this buffer.
|
||||
* @param [in] agent_id agent to configure profiling on.
|
||||
* @param [in] cb Callback called when the context is started for the tool to specify what
|
||||
* counters to collect (rocprofiler_profile_config_id_t).
|
||||
* @param [in] user_data User supplied data to be passed to the callback cb when triggered
|
||||
* @param [in] config_id Profile config detailing the counters to collect for this kernel
|
||||
* @return ::rocprofiler_status_t
|
||||
* @retval ::ROCPROFILER_STATUS_ERROR_CONTEXT_INVALID Returned if the context does not exist.
|
||||
* @retval ::ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND Returned if the buffer is not found.
|
||||
* @retval ::ROCPROFILER_STATUS_SUCCESS Returned if succesfully configured
|
||||
*/
|
||||
rocprofiler_status_t
|
||||
rocprofiler_configure_agent_profile_counting_service(rocprofiler_context_id_t context_id,
|
||||
rocprofiler_buffer_id_t buffer_id,
|
||||
rocprofiler_agent_id_t agent_id,
|
||||
rocprofiler_agent_profile_callback_t cb,
|
||||
void* user_data)
|
||||
ROCPROFILER_NONNULL(4) ROCPROFILER_API;
|
||||
|
||||
/**
|
||||
* @brief Trigger a read of the counter data for the agent profile. The counter data will be
|
||||
* written to the buffer specified in rocprofiler_configure_agent_profile_counting_service.
|
||||
* The data in rocprofiler_user_data_t will be written to the buffer along with the counter data.
|
||||
* flags can be used to specify if this call should be performed asynchronously (default is
|
||||
* synchronous).
|
||||
*
|
||||
* @param [in] context_id context id
|
||||
* @param [in] user_data User supplied data, included in records outputted to buffer.
|
||||
* @param [in] flags Flags to specify how the counter data should be collected (defaults to sync).
|
||||
* @return ::rocprofiler_status_t
|
||||
* @retval ::ROCPROFILER_STATUS_ERROR_CONTEXT_INVALID Returned if the context does not exist or
|
||||
* the context is not configured for agent profiling.
|
||||
* @retval ::ROCPROFILER_STATUS_ERROR_CONTEXT_ERROR Returned if another operation is in progress (
|
||||
* start/stop ctx or another read).
|
||||
* @retval ::ROCPROFILER_STATUS_ERROR Returned if HSA has not been initialized yet.
|
||||
* @retval ::ROCPROFILER_STATUS_SUCCESS Returned if read request was successful.
|
||||
*/
|
||||
rocprofiler_status_t
|
||||
rocprofiler_sample_agent_profile_counting_service(rocprofiler_context_id_t context_id,
|
||||
rocprofiler_user_data_t user_data,
|
||||
rocprofiler_counter_flag_t flags) ROCPROFILER_API;
|
||||
|
||||
/** @} */
|
||||
|
||||
|
||||
@@ -97,6 +97,11 @@ typedef enum // NOLINT(performance-enum-size)
|
||||
ROCPROFILER_STATUS_ERROR_PROFILE_NOT_FOUND, ///< Could not find the counter profile
|
||||
ROCPROFILER_STATUS_ERROR_AGENT_DISPATCH_CONFLICT, ///< Cannot enable both agent and dispatch
|
||||
///< counting in the same context.
|
||||
ROCPROFILER_STATUS_INTERNAL_NO_AGENT_CONTEXT, ///< No agent context found, may not be an error
|
||||
ROCPROFILER_STATUS_ERROR_SAMPLE_RATE_EXCEEDED, ///< Sample rate exceeded
|
||||
ROCPROFILER_STATUS_ERROR_NO_PROFILE_QUEUE, ///< Profile queue creation failed
|
||||
ROCPROFILER_STATUS_ERROR_NO_HARDWARE_COUNTERS, ///< No hardware counters were specified
|
||||
ROCPROFILER_STATUS_ERROR_AGENT_MISMATCH, ///< Agent mismatch between profile and context.
|
||||
ROCPROFILER_STATUS_LAST,
|
||||
} rocprofiler_status_t;
|
||||
|
||||
@@ -385,6 +390,16 @@ typedef enum
|
||||
/// ::rocprofiler_profile_counting_dispatch_record_t
|
||||
} rocprofiler_counter_record_kind_t;
|
||||
|
||||
/**
|
||||
* @brief Enumeration of flags that can be used with some counter api calls
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
ROCPROFILER_COUNTER_FLAG_NONE = 0,
|
||||
ROCPROFILER_COUNTER_FLAG_ASYNC, ///< Do not wait for completion before returning.
|
||||
ROCPROFILER_COUNTER_FLAG_LAST,
|
||||
} rocprofiler_counter_flag_t;
|
||||
|
||||
//--------------------------------------------------------------------------------------//
|
||||
//
|
||||
// ALIASES
|
||||
@@ -651,6 +666,7 @@ typedef struct
|
||||
rocprofiler_counter_instance_id_t id; ///< counter identifier
|
||||
double counter_value; ///< counter value
|
||||
rocprofiler_dispatch_id_t dispatch_id;
|
||||
rocprofiler_user_data_t user_data;
|
||||
|
||||
/// @var dispatch_id
|
||||
/// @brief A value greater than zero indicates that this counter record is associated with a
|
||||
|
||||
@@ -656,8 +656,8 @@ get_agent_topology()
|
||||
auto&
|
||||
get_agent_caches()
|
||||
{
|
||||
static auto _v = std::vector<hsa::AgentCache>{};
|
||||
return _v;
|
||||
static auto*& _v = common::static_object<std::vector<hsa::AgentCache>>::construct();
|
||||
return *_v;
|
||||
}
|
||||
|
||||
struct agent_pair
|
||||
@@ -920,7 +920,7 @@ construct_agent_cache(::HsaApiTable* table)
|
||||
try
|
||||
{
|
||||
get_agent_caches().emplace_back(
|
||||
rocp_agent, hsa_agent, itr.first, _nearest_cpu, *table->amd_ext_);
|
||||
rocp_agent, hsa_agent, itr.first, _nearest_cpu, *table->amd_ext_, *table->core_);
|
||||
} catch(std::runtime_error& err)
|
||||
{
|
||||
if(rocp_agent->type == ROCPROFILER_AGENT_TYPE_GPU)
|
||||
|
||||
@@ -22,14 +22,29 @@
|
||||
|
||||
#include <rocprofiler-sdk/rocprofiler.h>
|
||||
|
||||
#include "lib/rocprofiler-sdk/context/context.hpp"
|
||||
#include "lib/rocprofiler-sdk/counters/agent_profiling.hpp"
|
||||
#include "lib/rocprofiler-sdk/counters/core.hpp"
|
||||
#include "rocprofiler-sdk/fwd.h"
|
||||
|
||||
extern "C" {
|
||||
rocprofiler_status_t ROCPROFILER_API
|
||||
rocprofiler_configure_agent_profile_counting_service(rocprofiler_context_id_t context_id,
|
||||
rocprofiler_buffer_id_t buffer_id,
|
||||
rocprofiler_profile_config_id_t config_id)
|
||||
rocprofiler_configure_agent_profile_counting_service(rocprofiler_context_id_t context_id,
|
||||
rocprofiler_buffer_id_t buffer_id,
|
||||
rocprofiler_agent_id_t agent_id,
|
||||
rocprofiler_agent_profile_callback_t cb,
|
||||
void* user_data)
|
||||
{
|
||||
return rocprofiler::counters::configure_agent_collection(context_id, buffer_id, config_id);
|
||||
return rocprofiler::counters::configure_agent_collection(
|
||||
context_id, buffer_id, agent_id, cb, user_data);
|
||||
}
|
||||
|
||||
rocprofiler_status_t ROCPROFILER_API
|
||||
rocprofiler_sample_agent_profile_counting_service(rocprofiler_context_id_t context_id,
|
||||
rocprofiler_user_data_t user_data,
|
||||
rocprofiler_counter_flag_t flags)
|
||||
{
|
||||
return rocprofiler::counters::read_agent_ctx(
|
||||
rocprofiler::context::get_registered_context(context_id), user_data, flags);
|
||||
}
|
||||
}
|
||||
@@ -26,6 +26,7 @@
|
||||
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
|
||||
#include "lib/common/logging.hpp"
|
||||
#include "lib/common/synchronized.hpp"
|
||||
#include "lib/common/utility.hpp"
|
||||
#include "lib/rocprofiler-sdk/counters/id_decode.hpp"
|
||||
@@ -111,5 +112,54 @@ get_dim_info(rocprofiler_agent_id_t agent,
|
||||
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
rocprofiler_status_t
|
||||
set_profiler_active_on_queue(const AmdExtTable& api,
|
||||
hsa_amd_memory_pool_t pool,
|
||||
hsa_agent_t hsa_agent,
|
||||
const rocprofiler_profile_pkt_cb& packet_submit)
|
||||
{
|
||||
// Inject packet to enable profiling of other process queues on this queue
|
||||
hsa_ven_amd_aqlprofile_profile_t profile{};
|
||||
profile.agent = hsa_agent;
|
||||
|
||||
// Query for cmd buffer size
|
||||
hsa_ven_amd_aqlprofile_info_type_t info_type =
|
||||
(hsa_ven_amd_aqlprofile_info_type_t)((int) HSA_VEN_AMD_AQLPROFILE_INFO_ENABLE_CMD);
|
||||
if(hsa_ven_amd_aqlprofile_get_info(&profile, info_type, nullptr) != HSA_STATUS_SUCCESS)
|
||||
{
|
||||
return ROCPROFILER_STATUS_ERROR;
|
||||
}
|
||||
// Allocate cmd buffer
|
||||
const size_t mask = 0x1000 - 1;
|
||||
auto size = (profile.command_buffer.size + mask) & ~mask;
|
||||
|
||||
if(api.hsa_amd_memory_pool_allocate_fn(pool, size, 0, &profile.command_buffer.ptr) !=
|
||||
HSA_STATUS_SUCCESS)
|
||||
{
|
||||
ROCP_WARNING << "Failed to allocate memory to enable profile command on agent, some "
|
||||
"counters will be unavailable";
|
||||
return ROCPROFILER_STATUS_ERROR;
|
||||
}
|
||||
if(api.hsa_amd_agents_allow_access_fn(1, &hsa_agent, nullptr, profile.command_buffer.ptr) !=
|
||||
HSA_STATUS_SUCCESS)
|
||||
{
|
||||
ROCP_WARNING << "Agent cannot access memory, some counters will be unavailable";
|
||||
return ROCPROFILER_STATUS_ERROR;
|
||||
}
|
||||
|
||||
hsa::rocprofiler_packet packet{};
|
||||
if(hsa_ven_amd_aqlprofile_get_info(&profile, info_type, &packet.ext_amd_aql_pm4) !=
|
||||
HSA_STATUS_SUCCESS)
|
||||
{
|
||||
ROCP_WARNING << "Failed to generate command packet, some counters will be unavailable";
|
||||
return ROCPROFILER_STATUS_ERROR;
|
||||
}
|
||||
|
||||
packet_submit(packet);
|
||||
api.hsa_amd_memory_pool_free_fn(profile.command_buffer.ptr);
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
} // namespace aql
|
||||
} // namespace rocprofiler
|
||||
|
||||
@@ -32,11 +32,13 @@
|
||||
|
||||
#include "lib/rocprofiler-sdk/agent.hpp"
|
||||
#include "lib/rocprofiler-sdk/counters/metrics.hpp"
|
||||
#include "lib/rocprofiler-sdk/hsa/rocprofiler_packet.hpp"
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace aql
|
||||
{
|
||||
using rocprofiler_profile_pkt_cb = std::function<void(hsa::rocprofiler_packet)>;
|
||||
// Query HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_ID from aqlprofile
|
||||
hsa_ven_amd_aqlprofile_id_query_t
|
||||
get_query_info(rocprofiler_agent_id_t agent, const counters::Metric& metric);
|
||||
@@ -58,5 +60,11 @@ set_dim_id_from_sample(rocprofiler_counter_instance_id_t& id,
|
||||
hsa_agent_t agent,
|
||||
hsa_ven_amd_aqlprofile_event_t event,
|
||||
uint32_t sample_id);
|
||||
|
||||
rocprofiler_status_t
|
||||
set_profiler_active_on_queue(const AmdExtTable& api,
|
||||
hsa_amd_memory_pool_t pool,
|
||||
hsa_agent_t hsa_agent,
|
||||
const rocprofiler_profile_pkt_cb& packet_submit);
|
||||
} // namespace aql
|
||||
} // namespace rocprofiler
|
||||
|
||||
@@ -317,10 +317,13 @@ start_context(rocprofiler_context_id_t context_id)
|
||||
return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_STARTED;
|
||||
}
|
||||
|
||||
auto status = ROCPROFILER_STATUS_SUCCESS;
|
||||
|
||||
if(cfg->counter_collection) rocprofiler::counters::start_context(cfg);
|
||||
if(cfg->thread_trace) cfg->thread_trace->start_context();
|
||||
if(cfg->agent_counter_collection) status = rocprofiler::counters::start_agent_ctx(cfg);
|
||||
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
return status;
|
||||
}
|
||||
|
||||
rocprofiler_status_t
|
||||
@@ -344,9 +347,16 @@ stop_context(rocprofiler_context_id_t idx)
|
||||
if(nactive > 0) get_num_active_contexts().fetch_sub(1, std::memory_order_release);
|
||||
|
||||
if(_expected->counter_collection)
|
||||
{
|
||||
rocprofiler::counters::stop_context(const_cast<context*>(_expected));
|
||||
else if(_expected->thread_trace)
|
||||
_expected->thread_trace->stop_context();
|
||||
}
|
||||
|
||||
if(_expected->thread_trace) _expected->thread_trace->stop_context();
|
||||
|
||||
if(_expected->agent_counter_collection)
|
||||
{
|
||||
rocprofiler::counters::stop_agent_ctx(const_cast<context*>(_expected));
|
||||
}
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -30,9 +30,11 @@
|
||||
#include "lib/common/synchronized.hpp"
|
||||
#include "lib/rocprofiler-sdk/context/correlation_id.hpp"
|
||||
#include "lib/rocprofiler-sdk/context/domain.hpp"
|
||||
#include "lib/rocprofiler-sdk/counters/agent_profiling.hpp"
|
||||
#include "lib/rocprofiler-sdk/counters/core.hpp"
|
||||
#include "lib/rocprofiler-sdk/external_correlation.hpp"
|
||||
#include "lib/rocprofiler-sdk/thread_trace/att_core.hpp"
|
||||
#include "rocprofiler-sdk/agent.h"
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
@@ -86,12 +88,28 @@ struct dispatch_counter_collection_service
|
||||
|
||||
struct agent_counter_collection_service
|
||||
{
|
||||
rocprofiler::counters::agent_callback_data callback_data;
|
||||
// Signal to manage the startup of the context. Allows us to ensure that
|
||||
// the AQL packet we inject with start_context() completes before returning
|
||||
hsa_signal_t start_signal;
|
||||
std::shared_ptr<rocprofiler::counters::profile_config> profile;
|
||||
rocprofiler_buffer_id_t buffer;
|
||||
rocprofiler_agent_id_t agent_id;
|
||||
rocprofiler_agent_profile_callback_t cb;
|
||||
void* user_data;
|
||||
// A flag to state wether or not the counter set is currently enabled. This is primarily
|
||||
// to protect against multithreaded calls to enable a context (and enabling already enabled
|
||||
// counters).
|
||||
std::atomic<bool> enabled{false};
|
||||
// to protect against multithreaded calls to enable a context (and enabling already
|
||||
// enabled counters).
|
||||
|
||||
enum class state
|
||||
{
|
||||
DISABLED,
|
||||
LOCKED,
|
||||
ENABLED
|
||||
};
|
||||
std::atomic<state> status{state::DISABLED};
|
||||
|
||||
common::Synchronized<bool> enabled{false};
|
||||
};
|
||||
|
||||
struct context
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
set(ROCPROFILER_LIB_COUNTERS_SOURCES metrics.cpp dimensions.cpp evaluate_ast.cpp core.cpp
|
||||
id_decode.cpp dispatch_handlers.cpp controller.cpp)
|
||||
set(ROCPROFILER_LIB_COUNTERS_HEADERS metrics.hpp dimensions.hpp evaluate_ast.hpp core.hpp
|
||||
id_decode.hpp dispatch_handlers.hpp controller.hpp)
|
||||
set(ROCPROFILER_LIB_COUNTERS_SOURCES
|
||||
metrics.cpp dimensions.cpp evaluate_ast.cpp core.cpp id_decode.cpp
|
||||
dispatch_handlers.cpp controller.cpp agent_profiling.cpp)
|
||||
set(ROCPROFILER_LIB_COUNTERS_HEADERS
|
||||
metrics.hpp dimensions.hpp evaluate_ast.hpp core.hpp id_decode.hpp
|
||||
dispatch_handlers.hpp controller.hpp agent_profiling.hpp)
|
||||
target_sources(rocprofiler-object-library PRIVATE ${ROCPROFILER_LIB_COUNTERS_SOURCES}
|
||||
${ROCPROFILER_LIB_COUNTERS_HEADERS})
|
||||
|
||||
|
||||
@@ -0,0 +1,492 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include "lib/rocprofiler-sdk/counters/agent_profiling.hpp"
|
||||
#include <cstdint>
|
||||
|
||||
#include "lib/common/logging.hpp"
|
||||
#include "lib/rocprofiler-sdk/buffer.hpp"
|
||||
#include "lib/rocprofiler-sdk/context/context.hpp"
|
||||
#include "lib/rocprofiler-sdk/counters/controller.hpp"
|
||||
#include "lib/rocprofiler-sdk/counters/core.hpp"
|
||||
#include "lib/rocprofiler-sdk/hsa/agent_cache.hpp"
|
||||
#include "lib/rocprofiler-sdk/hsa/queue_controller.hpp"
|
||||
#include "lib/rocprofiler-sdk/hsa/rocprofiler_packet.hpp"
|
||||
#include "rocprofiler-sdk/fwd.h"
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace counters
|
||||
{
|
||||
std::atomic<bool>&
|
||||
hsa_inited()
|
||||
{
|
||||
static std::atomic<bool> inited{false};
|
||||
return inited;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
submitPacket(const CoreApiTable& table, hsa_queue_t* queue, const void* packet)
|
||||
{
|
||||
const uint32_t pkt_size = 0x40;
|
||||
|
||||
// advance command queue
|
||||
const uint64_t write_idx = table.hsa_queue_add_write_index_scacq_screl_fn(queue, 1);
|
||||
while((write_idx - table.hsa_queue_load_read_index_relaxed_fn(queue)) >= queue->size)
|
||||
{
|
||||
sched_yield();
|
||||
}
|
||||
|
||||
const uint32_t slot_idx = (uint32_t)(write_idx % queue->size);
|
||||
// NOLINTBEGIN(performance-no-int-to-ptr)
|
||||
uint32_t* queue_slot =
|
||||
reinterpret_cast<uint32_t*>((uintptr_t)(queue->base_address) + (slot_idx * pkt_size));
|
||||
// NOLINTEND(performance-no-int-to-ptr)
|
||||
|
||||
const uint32_t* slot_data = reinterpret_cast<const uint32_t*>(packet);
|
||||
|
||||
// Copy buffered commands into the queue slot.
|
||||
// Overwrite the AQL invalid header (first dword) last.
|
||||
// This prevents the slot from being read until it's fully written.
|
||||
memcpy(&queue_slot[1], &slot_data[1], pkt_size - sizeof(uint32_t));
|
||||
std::atomic<uint32_t>* header_atomic_ptr =
|
||||
reinterpret_cast<std::atomic<uint32_t>*>(&queue_slot[0]);
|
||||
header_atomic_ptr->store(slot_data[0], std::memory_order_release);
|
||||
|
||||
// ringdoor bell
|
||||
table.hsa_signal_store_relaxed_fn(queue->doorbell_signal, write_idx);
|
||||
|
||||
return write_idx;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
uint16_t
|
||||
header_pkt(hsa_packet_type_t type)
|
||||
{
|
||||
uint16_t header = type << HSA_PACKET_HEADER_TYPE;
|
||||
header |= 1 << HSA_PACKET_HEADER_BARRIER;
|
||||
header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE;
|
||||
header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE;
|
||||
return header;
|
||||
}
|
||||
|
||||
std::unique_ptr<hsa::CounterAQLPacket>
|
||||
construct_aql_pkt(const hsa::AgentCache& agent, std::shared_ptr<profile_config>& profile)
|
||||
{
|
||||
if(counter_callback_info::setup_profile_config(agent, profile) != ROCPROFILER_STATUS_SUCCESS)
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto pkts = profile->pkt_generator->construct_packet(
|
||||
CHECK_NOTNULL(hsa::get_queue_controller())->get_ext_table());
|
||||
|
||||
pkts->start.header = header_pkt(HSA_PACKET_TYPE_VENDOR_SPECIFIC);
|
||||
pkts->start.completion_signal.handle = 0;
|
||||
pkts->stop.header = header_pkt(HSA_PACKET_TYPE_VENDOR_SPECIFIC);
|
||||
pkts->read.header = header_pkt(HSA_PACKET_TYPE_VENDOR_SPECIFIC);
|
||||
return pkts;
|
||||
}
|
||||
|
||||
bool
|
||||
agent_async_handler(hsa_signal_value_t /*signal_v*/, void* data)
|
||||
{
|
||||
const auto* ctx = context::get_registered_context({.handle = (uint64_t) data});
|
||||
if(!ctx) return false;
|
||||
|
||||
const auto& agent_ctx = *ctx->agent_counter_collection;
|
||||
const auto& prof_config = agent_ctx.profile;
|
||||
|
||||
// Decode the AQL packet data
|
||||
auto decoded_pkt =
|
||||
EvaluateAST::read_pkt(prof_config->pkt_generator.get(), *agent_ctx.callback_data.packet);
|
||||
EvaluateAST::read_special_counters(
|
||||
*prof_config->agent, prof_config->required_special_counters, decoded_pkt);
|
||||
|
||||
auto* buf = buffer::get_buffer(agent_ctx.buffer.handle);
|
||||
if(!buf)
|
||||
{
|
||||
ROCP_FATAL << fmt::format("Buffer {} destroyed before record was written",
|
||||
agent_ctx.buffer.handle);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Write out the AQL data to the buffer
|
||||
for(auto& ast : prof_config->asts)
|
||||
{
|
||||
std::vector<std::unique_ptr<std::vector<rocprofiler_record_counter_t>>> cache;
|
||||
auto* ret = CHECK_NOTNULL(ast.evaluate(decoded_pkt, cache));
|
||||
ast.set_out_id(*ret);
|
||||
for(auto& val : *ret)
|
||||
{
|
||||
val.user_data = agent_ctx.callback_data.user_data;
|
||||
buf->emplace(
|
||||
ROCPROFILER_BUFFER_CATEGORY_COUNTERS, ROCPROFILER_COUNTER_RECORD_VALUE, val);
|
||||
}
|
||||
}
|
||||
|
||||
// reset the signal to allow another sample to start
|
||||
agent_ctx.callback_data.table.hsa_signal_store_relaxed_fn(agent_ctx.callback_data.completion,
|
||||
1);
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
init_callback_data(const rocprofiler::context::context& ctx, const hsa::AgentCache& agent)
|
||||
{
|
||||
// Note: Calls to this function should be protected by agent_ctx.status being set
|
||||
// to LOCKED by the caller. This is to prevent multiple threads from trying to
|
||||
// setup the same agent at the same time.
|
||||
auto& agent_ctx = *ctx.agent_counter_collection;
|
||||
if(agent_ctx.callback_data.packet) return;
|
||||
|
||||
agent_ctx.callback_data.packet = construct_aql_pkt(agent, agent_ctx.profile);
|
||||
|
||||
if(agent_ctx.callback_data.completion.handle != 0) return;
|
||||
|
||||
// If we do not have a completion handle, this is our first time profiling this agent.
|
||||
// Setup our shared data structures.
|
||||
agent_ctx.callback_data.queue = agent.profile_queue();
|
||||
|
||||
agent_ctx.callback_data.table = CHECK_NOTNULL(hsa::get_queue_controller())->get_core_table();
|
||||
|
||||
// Tri-state signal
|
||||
// 1: allow next sample to start
|
||||
// 0: sample in progress
|
||||
// -1: sample complete
|
||||
CHECK_EQ(agent_ctx.callback_data.table.hsa_signal_create_fn(
|
||||
1, 0, nullptr, &agent_ctx.callback_data.completion),
|
||||
HSA_STATUS_SUCCESS);
|
||||
|
||||
// Signal to manage the startup of the context. Allows us to ensure that
|
||||
// the AQL packet we inject with start_context() completes before returning
|
||||
CHECK_EQ(
|
||||
agent_ctx.callback_data.table.hsa_signal_create_fn(1, 0, nullptr, &agent_ctx.start_signal),
|
||||
HSA_STATUS_SUCCESS);
|
||||
|
||||
// Setup callback
|
||||
// NOLINTBEGIN(performance-no-int-to-ptr)
|
||||
CHECK_EQ(CHECK_NOTNULL(hsa::get_queue_controller())
|
||||
->get_ext_table()
|
||||
.hsa_amd_signal_async_handler_fn(agent_ctx.callback_data.completion,
|
||||
HSA_SIGNAL_CONDITION_LT,
|
||||
0,
|
||||
agent_async_handler,
|
||||
(void*) ctx.context_idx),
|
||||
HSA_STATUS_SUCCESS);
|
||||
// NOLINTEND(performance-no-int-to-ptr)
|
||||
|
||||
// Set state of the queue to allow profiling (may not be needed since AQL
|
||||
// may do this in the future).
|
||||
aql::set_profiler_active_on_queue(
|
||||
CHECK_NOTNULL(hsa::get_queue_controller())->get_ext_table(),
|
||||
agent.cpu_pool(),
|
||||
agent.get_hsa_agent(),
|
||||
[&](hsa::rocprofiler_packet pkt) {
|
||||
pkt.ext_amd_aql_pm4.completion_signal = agent_ctx.callback_data.completion;
|
||||
submitPacket(
|
||||
agent_ctx.callback_data.table, agent_ctx.callback_data.queue, (void*) &pkt);
|
||||
if(agent_ctx.callback_data.table.hsa_signal_wait_relaxed_fn(
|
||||
agent_ctx.callback_data.completion,
|
||||
HSA_SIGNAL_CONDITION_EQ,
|
||||
0,
|
||||
20000000,
|
||||
HSA_WAIT_STATE_ACTIVE) != 0)
|
||||
{
|
||||
ROCP_FATAL << "Could not set agent to be profiled";
|
||||
}
|
||||
agent_ctx.callback_data.table.hsa_signal_store_relaxed_fn(
|
||||
agent_ctx.callback_data.completion, 1);
|
||||
});
|
||||
}
|
||||
} // namespace
|
||||
|
||||
rocprofiler_status_t
|
||||
read_agent_ctx(const context::context* ctx,
|
||||
rocprofiler_user_data_t user_data,
|
||||
rocprofiler_counter_flag_t flags)
|
||||
{
|
||||
if(!ctx->agent_counter_collection || !ctx->agent_counter_collection->profile)
|
||||
{
|
||||
if(!ctx->agent_counter_collection)
|
||||
{
|
||||
ROCP_ERROR << fmt::format("Context {} has no agent counter collection",
|
||||
ctx->context_idx);
|
||||
}
|
||||
else
|
||||
{
|
||||
ROCP_ERROR << fmt::format("Context {} has no profile", ctx->context_idx);
|
||||
}
|
||||
return ROCPROFILER_STATUS_ERROR_CONTEXT_INVALID;
|
||||
}
|
||||
|
||||
auto& agent_ctx = *ctx->agent_counter_collection;
|
||||
|
||||
if(hsa_inited().load() == false)
|
||||
{
|
||||
return ROCPROFILER_STATUS_ERROR;
|
||||
}
|
||||
|
||||
const auto* agent = agent::get_agent_cache(agent_ctx.profile->agent);
|
||||
|
||||
// If the agent no longer exists or we don't have a profile queue, reading is an error
|
||||
if(!agent || !agent->profile_queue()) return ROCPROFILER_STATUS_ERROR;
|
||||
|
||||
// Set the state to LOCKED to prevent other calls to start/stop/read.
|
||||
auto expected = rocprofiler::context::agent_counter_collection_service::state::ENABLED;
|
||||
if(!agent_ctx.status.compare_exchange_strong(
|
||||
expected, rocprofiler::context::agent_counter_collection_service::state::LOCKED))
|
||||
{
|
||||
return ROCPROFILER_STATUS_ERROR_CONTEXT_ERROR;
|
||||
}
|
||||
|
||||
CHECK(agent_ctx.callback_data.packet);
|
||||
|
||||
ROCP_TRACE << fmt::format("Agent Infor for Running Counter: Name = {}, XCC = {}, "
|
||||
"SE = {}, CU = {}, SIMD = {}",
|
||||
agent->get_rocp_agent()->name,
|
||||
agent->get_rocp_agent()->num_xcc,
|
||||
agent->get_rocp_agent()->num_shader_banks,
|
||||
agent->get_rocp_agent()->cu_count,
|
||||
agent->get_rocp_agent()->simd_arrays_per_engine);
|
||||
|
||||
// Remove when AQL is updated to not require stop to be called first
|
||||
submitPacket(agent_ctx.callback_data.table,
|
||||
agent->profile_queue(),
|
||||
(void*) &agent_ctx.callback_data.packet->stop);
|
||||
|
||||
// Submit the read packet to the queue
|
||||
submitPacket(agent_ctx.callback_data.table,
|
||||
agent->profile_queue(),
|
||||
(void*) &agent_ctx.callback_data.packet->read);
|
||||
|
||||
// Submit a barrier packet. This is needed to flush hardware caches. Without this
|
||||
// the read packet may not have the correct data.
|
||||
rocprofiler::hsa::rocprofiler_packet barrier{};
|
||||
barrier.barrier_and.header = header_pkt(HSA_PACKET_TYPE_BARRIER_AND);
|
||||
barrier.barrier_and.completion_signal = agent_ctx.callback_data.completion;
|
||||
agent_ctx.callback_data.table.hsa_signal_store_relaxed_fn(agent_ctx.callback_data.completion,
|
||||
0);
|
||||
agent_ctx.callback_data.user_data = user_data;
|
||||
submitPacket(
|
||||
agent_ctx.callback_data.table, agent->profile_queue(), (void*) &barrier.barrier_and);
|
||||
|
||||
// Wait for the barrier/read packet to complete
|
||||
if(flags != ROCPROFILER_COUNTER_FLAG_ASYNC)
|
||||
{
|
||||
// Wait for any inprogress samples to complete before returning
|
||||
agent_ctx.callback_data.table.hsa_signal_wait_relaxed_fn(agent_ctx.callback_data.completion,
|
||||
HSA_SIGNAL_CONDITION_EQ,
|
||||
1,
|
||||
UINT64_MAX,
|
||||
HSA_WAIT_STATE_ACTIVE);
|
||||
}
|
||||
|
||||
agent_ctx.status.exchange(
|
||||
rocprofiler::context::agent_counter_collection_service::state::ENABLED);
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
rocprofiler_status_t
|
||||
start_agent_ctx(const context::context* ctx)
|
||||
{
|
||||
auto status = ROCPROFILER_STATUS_SUCCESS;
|
||||
if(!ctx->agent_counter_collection)
|
||||
{
|
||||
return status;
|
||||
}
|
||||
|
||||
auto& agent_ctx = *ctx->agent_counter_collection;
|
||||
|
||||
if(hsa_inited().load() == false)
|
||||
{
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
const auto* agent = agent::get_agent_cache(agent::get_agent(agent_ctx.agent_id));
|
||||
// Note: we may not have an AgentCache yet if HSA is not started.
|
||||
// This is not an error and the startup will happen on hsa registration.
|
||||
if(!agent) return ROCPROFILER_STATUS_ERROR;
|
||||
|
||||
// But if we have an agent cache, we need a profile queue.
|
||||
if(!agent->profile_queue())
|
||||
{
|
||||
return ROCPROFILER_STATUS_ERROR_NO_PROFILE_QUEUE;
|
||||
}
|
||||
|
||||
// Set the state to LOCKED to prevent other calls to start/stop/read.
|
||||
auto expected = rocprofiler::context::agent_counter_collection_service::state::DISABLED;
|
||||
if(!agent_ctx.status.compare_exchange_strong(
|
||||
expected, rocprofiler::context::agent_counter_collection_service::state::LOCKED))
|
||||
{
|
||||
return ROCPROFILER_STATUS_ERROR_SERVICE_ALREADY_CONFIGURED;
|
||||
}
|
||||
|
||||
// Ask the tool what profile we should use for this agent
|
||||
agent_ctx.cb(
|
||||
{.handle = ctx->context_idx},
|
||||
agent_ctx.agent_id,
|
||||
[](rocprofiler_context_id_t context_id,
|
||||
rocprofiler_profile_config_id_t config_id) -> rocprofiler_status_t {
|
||||
auto* cb_ctx = rocprofiler::context::get_mutable_registered_context(context_id);
|
||||
if(!cb_ctx) return ROCPROFILER_STATUS_ERROR_CONTEXT_INVALID;
|
||||
|
||||
auto config = rocprofiler::counters::get_profile_config(config_id);
|
||||
if(!config) return ROCPROFILER_STATUS_ERROR_PROFILE_NOT_FOUND;
|
||||
|
||||
if(!cb_ctx->agent_counter_collection)
|
||||
{
|
||||
return ROCPROFILER_STATUS_ERROR_CONTEXT_INVALID;
|
||||
}
|
||||
|
||||
// Only allow profiles to be set in the locked state
|
||||
if(cb_ctx->agent_counter_collection->status.load() !=
|
||||
rocprofiler::context::agent_counter_collection_service::state::LOCKED)
|
||||
{
|
||||
return ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED;
|
||||
}
|
||||
|
||||
// Only update the profile if it has changed. Avoids packet regeneration.
|
||||
if(!cb_ctx->agent_counter_collection->profile ||
|
||||
cb_ctx->agent_counter_collection->profile->id.handle != config_id.handle)
|
||||
{
|
||||
if(cb_ctx->agent_counter_collection->agent_id.handle != config->agent->id.handle)
|
||||
{
|
||||
return ROCPROFILER_STATUS_ERROR_AGENT_MISMATCH;
|
||||
}
|
||||
|
||||
cb_ctx->agent_counter_collection->profile = config;
|
||||
cb_ctx->agent_counter_collection->callback_data.packet.reset();
|
||||
}
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
},
|
||||
agent_ctx.user_data);
|
||||
|
||||
// User didn't set a profile
|
||||
if(!agent_ctx.profile)
|
||||
{
|
||||
agent_ctx.status.exchange(
|
||||
rocprofiler::context::agent_counter_collection_service::state::DISABLED);
|
||||
return status;
|
||||
}
|
||||
|
||||
// Generate necessary structures in the context (packet gen, etc) to process
|
||||
// this packet.
|
||||
init_callback_data(*ctx, *agent);
|
||||
|
||||
// No hardware counters were actually asked for (i.e. all constants)
|
||||
if(agent_ctx.profile->reqired_hw_counters.empty())
|
||||
{
|
||||
agent_ctx.status.exchange(
|
||||
rocprofiler::context::agent_counter_collection_service::state::DISABLED);
|
||||
return ROCPROFILER_STATUS_ERROR_NO_HARDWARE_COUNTERS;
|
||||
}
|
||||
|
||||
// We could not generate AQL packets for some reason
|
||||
if(!agent_ctx.callback_data.packet)
|
||||
{
|
||||
agent_ctx.status.exchange(
|
||||
rocprofiler::context::agent_counter_collection_service::state::DISABLED);
|
||||
return ROCPROFILER_STATUS_ERROR_AST_GENERATION_FAILED;
|
||||
}
|
||||
|
||||
agent_ctx.callback_data.packet->start.completion_signal = agent_ctx.start_signal;
|
||||
agent_ctx.callback_data.table.hsa_signal_store_relaxed_fn(agent_ctx.start_signal, 1);
|
||||
submitPacket(agent_ctx.callback_data.table,
|
||||
agent->profile_queue(),
|
||||
(void*) &agent_ctx.callback_data.packet->start);
|
||||
|
||||
// Wait for startup to finish before continuing
|
||||
agent_ctx.callback_data.table.hsa_signal_wait_relaxed_fn(
|
||||
agent_ctx.start_signal, HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_ACTIVE);
|
||||
|
||||
agent_ctx.status.exchange(
|
||||
rocprofiler::context::agent_counter_collection_service::state::ENABLED);
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
rocprofiler_status_t
|
||||
stop_agent_ctx(const context::context* ctx)
|
||||
{
|
||||
auto status = ROCPROFILER_STATUS_SUCCESS;
|
||||
if(!ctx->agent_counter_collection || !ctx->agent_counter_collection->profile)
|
||||
{
|
||||
return status;
|
||||
}
|
||||
|
||||
auto& agent_ctx = *ctx->agent_counter_collection;
|
||||
|
||||
if(hsa_inited().load() == false)
|
||||
{
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
const auto* agent = agent::get_agent_cache(agent_ctx.profile->agent);
|
||||
if(!agent || !agent->profile_queue()) return status;
|
||||
|
||||
auto expected = rocprofiler::context::agent_counter_collection_service::state::ENABLED;
|
||||
if(!agent_ctx.status.compare_exchange_strong(
|
||||
expected, rocprofiler::context::agent_counter_collection_service::state::LOCKED))
|
||||
{
|
||||
// Status is already stopped or being enabled elsewhere.
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
CHECK(agent_ctx.callback_data.packet);
|
||||
|
||||
submitPacket(agent_ctx.callback_data.table,
|
||||
agent->profile_queue(),
|
||||
(void*) &agent_ctx.callback_data.packet->stop);
|
||||
|
||||
// Wait for any inprogress samples to complete before returning
|
||||
agent_ctx.callback_data.table.hsa_signal_wait_relaxed_fn(agent_ctx.callback_data.completion,
|
||||
HSA_SIGNAL_CONDITION_EQ,
|
||||
1,
|
||||
UINT64_MAX,
|
||||
HSA_WAIT_STATE_ACTIVE);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
// If we have ctx's that were started before HSA was initialized, we need to
|
||||
// actually start those contexts now.
|
||||
rocprofiler_status_t
|
||||
agent_profile_hsa_registration()
|
||||
{
|
||||
hsa_inited().store(true);
|
||||
|
||||
for(auto& ctx : context::get_active_contexts())
|
||||
{
|
||||
if(!ctx->agent_counter_collection) continue;
|
||||
start_agent_ctx(ctx);
|
||||
}
|
||||
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
agent_callback_data::~agent_callback_data()
|
||||
{
|
||||
if(completion.handle != 0) table.hsa_signal_destroy_fn(completion);
|
||||
}
|
||||
} // namespace counters
|
||||
} // namespace rocprofiler
|
||||
@@ -0,0 +1,86 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
#pragma once
|
||||
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
#include <rocprofiler-sdk/hsa.h>
|
||||
#include <rocprofiler-sdk/rocprofiler.h>
|
||||
|
||||
#include "lib/rocprofiler-sdk/hsa/aql_packet.hpp"
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace context
|
||||
{
|
||||
struct context;
|
||||
}
|
||||
|
||||
namespace counters
|
||||
{
|
||||
struct agent_callback_data
|
||||
{
|
||||
CoreApiTable table;
|
||||
hsa_queue_t* queue{nullptr};
|
||||
std::unique_ptr<hsa::CounterAQLPacket> packet;
|
||||
|
||||
// Tri-state signal used to know what the current state of processing
|
||||
// a sample is. The states are:
|
||||
// 1: allow next sample to start (i.e. no in progress work)
|
||||
// 0: sample in progress
|
||||
// -1: sample complete (i.e. signal for caller that sample is ready)
|
||||
hsa_signal_t completion{.handle = 0};
|
||||
rocprofiler_user_data_t user_data{.value = 0};
|
||||
~agent_callback_data();
|
||||
};
|
||||
|
||||
// If we have contexts that are started before HSA init. This
|
||||
// function will start those contexts. Should only be called
|
||||
// as part of the HSA init process in rocprofiler.
|
||||
rocprofiler_status_t
|
||||
agent_profile_hsa_registration();
|
||||
|
||||
// Send the AQL start packet to a queue on the agent to start
|
||||
// collecting counter data. This function is synchronous and will
|
||||
// return when the agent has started collecting data (or if there
|
||||
// is an error).
|
||||
rocprofiler_status_t
|
||||
start_agent_ctx(const context::context* ctx);
|
||||
|
||||
// Send the AQL end packet to a queue on the agent to stop
|
||||
// collecting counter data. This function is synchronous and will
|
||||
// return when the agent has stopped collecting data (or if there
|
||||
// is an error).
|
||||
rocprofiler_status_t
|
||||
stop_agent_ctx(const context::context* ctx);
|
||||
|
||||
// Read the counter data from the agent. This function is synchronous
|
||||
// if flags is not set to ASYNC. If ASYNC is set, the function will
|
||||
// return before data has been written to the buffer. Overlapping
|
||||
// read calls are not allowed in ASYNC mode and will result in
|
||||
// this call waiting for the previous sample to complete.
|
||||
rocprofiler_status_t
|
||||
read_agent_ctx(const context::context* ctx,
|
||||
rocprofiler_user_data_t user_data,
|
||||
rocprofiler_counter_flag_t flags);
|
||||
|
||||
} // namespace counters
|
||||
} // namespace rocprofiler
|
||||
@@ -64,9 +64,11 @@ CounterController::destroy_profile(uint64_t id)
|
||||
}
|
||||
|
||||
rocprofiler_status_t
|
||||
CounterController::configure_agent_collection(rocprofiler_context_id_t context_id,
|
||||
rocprofiler_buffer_id_t buffer,
|
||||
rocprofiler_profile_config_id_t config_id)
|
||||
CounterController::configure_agent_collection(rocprofiler_context_id_t context_id,
|
||||
rocprofiler_buffer_id_t buffer_id,
|
||||
rocprofiler_agent_id_t agent_id,
|
||||
rocprofiler_agent_profile_callback_t cb,
|
||||
void* user_data)
|
||||
{
|
||||
auto* ctx_p = rocprofiler::context::get_mutable_registered_context(context_id);
|
||||
if(!ctx_p) return ROCPROFILER_STATUS_ERROR_CONTEXT_INVALID;
|
||||
@@ -74,13 +76,10 @@ CounterController::configure_agent_collection(rocprofiler_context_id_t co
|
||||
auto& ctx = *ctx_p;
|
||||
|
||||
if(ctx.counter_collection) return ROCPROFILER_STATUS_ERROR_AGENT_DISPATCH_CONFLICT;
|
||||
if(!rocprofiler::buffer::get_buffer(buffer.handle))
|
||||
if(!rocprofiler::buffer::get_buffer(buffer_id.handle))
|
||||
{
|
||||
return ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND;
|
||||
}
|
||||
auto cfg = get_profile_cfg(config_id);
|
||||
|
||||
if(!cfg) return ROCPROFILER_STATUS_ERROR_PROFILE_NOT_FOUND;
|
||||
|
||||
if(!ctx.agent_counter_collection)
|
||||
{
|
||||
@@ -88,8 +87,10 @@ CounterController::configure_agent_collection(rocprofiler_context_id_t co
|
||||
std::make_unique<rocprofiler::context::agent_counter_collection_service>();
|
||||
}
|
||||
|
||||
ctx.agent_counter_collection->profile = cfg;
|
||||
ctx.agent_counter_collection->buffer = buffer;
|
||||
ctx.agent_counter_collection->agent_id = agent_id;
|
||||
ctx.agent_counter_collection->cb = cb;
|
||||
ctx.agent_counter_collection->user_data = user_data;
|
||||
ctx.agent_counter_collection->buffer = buffer_id;
|
||||
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -87,9 +87,11 @@ public:
|
||||
void* record_callback_args);
|
||||
std::shared_ptr<profile_config> get_profile_cfg(rocprofiler_profile_config_id_t id);
|
||||
|
||||
rocprofiler_status_t configure_agent_collection(rocprofiler_context_id_t context_id,
|
||||
rocprofiler_buffer_id_t buffer,
|
||||
rocprofiler_profile_config_id_t config_id);
|
||||
static rocprofiler_status_t configure_agent_collection(rocprofiler_context_id_t context_id,
|
||||
rocprofiler_buffer_id_t buffer_id,
|
||||
rocprofiler_agent_id_t agent_id,
|
||||
rocprofiler_agent_profile_callback_t cb,
|
||||
void* user_data);
|
||||
|
||||
private:
|
||||
rocprofiler::common::Synchronized<std::unordered_map<uint64_t, std::shared_ptr<profile_config>>>
|
||||
|
||||
@@ -207,11 +207,14 @@ stop_context(const context::context* ctx)
|
||||
}
|
||||
|
||||
rocprofiler_status_t
|
||||
configure_agent_collection(rocprofiler_context_id_t context_id,
|
||||
rocprofiler_buffer_id_t buffer_id,
|
||||
rocprofiler_profile_config_id_t config_id)
|
||||
configure_agent_collection(rocprofiler_context_id_t context_id,
|
||||
rocprofiler_buffer_id_t buffer_id,
|
||||
rocprofiler_agent_id_t agent_id,
|
||||
rocprofiler_agent_profile_callback_t cb,
|
||||
void* user_data)
|
||||
{
|
||||
return get_controller().configure_agent_collection(context_id, buffer_id, config_id);
|
||||
return get_controller().configure_agent_collection(
|
||||
context_id, buffer_id, agent_id, cb, user_data);
|
||||
}
|
||||
|
||||
rocprofiler_status_t
|
||||
|
||||
@@ -94,9 +94,11 @@ configure_callback_dispatch(rocprofiler_context_id_t con
|
||||
void* record_callback_args);
|
||||
|
||||
rocprofiler_status_t
|
||||
configure_agent_collection(rocprofiler_context_id_t context_id,
|
||||
rocprofiler_buffer_id_t buffer_id,
|
||||
rocprofiler_profile_config_id_t config_id);
|
||||
configure_agent_collection(rocprofiler_context_id_t context_id,
|
||||
rocprofiler_buffer_id_t buffer_id,
|
||||
rocprofiler_agent_id_t agent_id,
|
||||
rocprofiler_agent_profile_callback_t cb,
|
||||
void* user_data);
|
||||
|
||||
void
|
||||
start_context(const context::context*);
|
||||
|
||||
@@ -57,7 +57,8 @@ get_reduce_op_type_from_string(const std::string& op)
|
||||
std::vector<rocprofiler_record_counter_t>*
|
||||
perform_reduction(ReduceOperation reduce_op, std::vector<rocprofiler_record_counter_t>* input_array)
|
||||
{
|
||||
rocprofiler_record_counter_t result{.id = 0, .counter_value = 0, .dispatch_id = 0};
|
||||
rocprofiler_record_counter_t result{
|
||||
.id = 0, .counter_value = 0, .dispatch_id = 0, .user_data = {.value = 0}};
|
||||
if(input_array->empty()) return input_array;
|
||||
switch(reduce_op)
|
||||
{
|
||||
@@ -83,12 +84,14 @@ perform_reduction(ReduceOperation reduce_op, std::vector<rocprofiler_record_coun
|
||||
result = std::accumulate(
|
||||
input_array->begin(),
|
||||
input_array->end(),
|
||||
rocprofiler_record_counter_t{.id = 0, .counter_value = 0, .dispatch_id = 0},
|
||||
rocprofiler_record_counter_t{
|
||||
.id = 0, .counter_value = 0, .dispatch_id = 0, .user_data = {.value = 0}},
|
||||
[](auto& a, auto& b) {
|
||||
return rocprofiler_record_counter_t{
|
||||
.id = a.id,
|
||||
.counter_value = a.counter_value + b.counter_value,
|
||||
.dispatch_id = a.dispatch_id};
|
||||
.dispatch_id = a.dispatch_id,
|
||||
.user_data = {.value = 0}};
|
||||
});
|
||||
break;
|
||||
}
|
||||
@@ -97,12 +100,14 @@ perform_reduction(ReduceOperation reduce_op, std::vector<rocprofiler_record_coun
|
||||
result = std::accumulate(
|
||||
input_array->begin(),
|
||||
input_array->end(),
|
||||
rocprofiler_record_counter_t{.id = 0, .counter_value = 0, .dispatch_id = 0},
|
||||
rocprofiler_record_counter_t{
|
||||
.id = 0, .counter_value = 0, .dispatch_id = 0, .user_data = {.value = 0}},
|
||||
[](auto& a, auto& b) {
|
||||
return rocprofiler_record_counter_t{
|
||||
.id = a.id,
|
||||
.counter_value = a.counter_value + b.counter_value,
|
||||
.dispatch_id = a.dispatch_id};
|
||||
.dispatch_id = a.dispatch_id,
|
||||
.user_data = {.value = 0}};
|
||||
});
|
||||
result.counter_value /= input_array->size();
|
||||
break;
|
||||
@@ -219,7 +224,8 @@ EvaluateAST::EvaluateAST(rocprofiler_counter_id_t out_id,
|
||||
_raw_value = std::get<int64_t>(ast.value);
|
||||
_static_value.push_back({.id = 0,
|
||||
.counter_value = static_cast<double>(std::get<int64_t>(ast.value)),
|
||||
.dispatch_id = 0});
|
||||
.dispatch_id = 0,
|
||||
.user_data = {.value = 0}});
|
||||
}
|
||||
|
||||
for(const auto& nextAst : ast.counter_set)
|
||||
@@ -596,28 +602,32 @@ EvaluateAST::evaluate(
|
||||
return rocprofiler_record_counter_t{
|
||||
.id = a.id,
|
||||
.counter_value = a.counter_value + b.counter_value,
|
||||
.dispatch_id = a.dispatch_id};
|
||||
.dispatch_id = a.dispatch_id,
|
||||
.user_data = {.value = 0}};
|
||||
});
|
||||
case SUBTRACTION_NODE:
|
||||
return perform_op([](auto& a, auto& b) {
|
||||
return rocprofiler_record_counter_t{
|
||||
.id = a.id,
|
||||
.counter_value = a.counter_value - b.counter_value,
|
||||
.dispatch_id = a.dispatch_id};
|
||||
.dispatch_id = a.dispatch_id,
|
||||
.user_data = {.value = 0}};
|
||||
});
|
||||
case MULTIPLY_NODE:
|
||||
return perform_op([](auto& a, auto& b) {
|
||||
return rocprofiler_record_counter_t{
|
||||
.id = a.id,
|
||||
.counter_value = a.counter_value * b.counter_value,
|
||||
.dispatch_id = a.dispatch_id};
|
||||
.dispatch_id = a.dispatch_id,
|
||||
.user_data = {.value = 0}};
|
||||
});
|
||||
case DIVIDE_NODE:
|
||||
return perform_op([](auto& a, auto& b) {
|
||||
return rocprofiler_record_counter_t{
|
||||
.id = a.id,
|
||||
.counter_value = (b.counter_value == 0 ? 0 : a.counter_value / b.counter_value),
|
||||
.dispatch_id = a.dispatch_id};
|
||||
.dispatch_id = a.dispatch_id,
|
||||
.user_data = {.value = 0}};
|
||||
});
|
||||
case REFERENCE_NODE:
|
||||
{
|
||||
|
||||
@@ -2,12 +2,54 @@ rocprofiler_deactivate_clang_tidy()
|
||||
|
||||
include(GoogleTest)
|
||||
|
||||
set(ROCPROFILER_LIB_COUNTER_TEST_SOURCES metrics_test.cpp evaluate_ast_test.cpp
|
||||
dimension.cpp init_order.cpp core.cpp)
|
||||
find_program(
|
||||
amdclangpp_EXECUTABLE REQUIRED
|
||||
NAMES amdclang++
|
||||
HINTS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm
|
||||
PATHS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm
|
||||
PATH_SUFFIXES bin llvm/bin NO_CACHE)
|
||||
|
||||
function(generate_hsaco TARGET_ID INPUT_FILE OUTPUT_FILE)
|
||||
separate_arguments(
|
||||
CLANG_ARG_LIST
|
||||
UNIX_COMMAND
|
||||
"-O2 -x cl -Xclang -finclude-default-header -cl-denorms-are-zero -cl-std=CL2.0 -Wl,--build-id=sha1
|
||||
-target amdgcn-amd-amdhsa -mcpu=${TARGET_ID} -o ${OUTPUT_FILE} ${INPUT_FILE}")
|
||||
add_custom_command(
|
||||
OUTPUT ${PROJECT_BINARY_DIR}/${OUTPUT_FILE}
|
||||
COMMAND ${amdclangpp_EXECUTABLE} ${CLANG_ARG_LIST}
|
||||
COMMAND
|
||||
${CMAKE_COMMAND} -E copy
|
||||
${PROJECT_BINARY_DIR}/source/lib/rocprofiler-sdk/counters/tests/${OUTPUT_FILE}
|
||||
${CMAKE_BINARY_DIR}/bin/${OUTPUT_FILE}
|
||||
OUTPUT ${CMAKE_BINARY_DIR}/bin/${OUTPUT_FILE}
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_BINARY_DIR}/bin/${OUTPUT_FILE}
|
||||
${CMAKE_BINARY_DIR}/bin/${OUTPUT_FILE}
|
||||
COMMENT "Building ${OUTPUT_FILE}...")
|
||||
set(HSACO_TARGET_LIST
|
||||
${HSACO_TARGET_LIST} ${PROJECT_BINARY_DIR}/${OUTPUT_FILE}
|
||||
PARENT_SCOPE)
|
||||
endfunction(generate_hsaco)
|
||||
|
||||
foreach(target_id ${GPU_TARGETS})
|
||||
# generate kernel bitcodes
|
||||
generate_hsaco(${target_id} ${CMAKE_CURRENT_SOURCE_DIR}/agent_kernels.cl
|
||||
${target_id}_agent_kernels.hsaco)
|
||||
endforeach()
|
||||
|
||||
add_custom_target(agent_hasco_targets DEPENDS ${HSACO_TARGET_LIST})
|
||||
|
||||
set(ROCPROFILER_LIB_COUNTER_TEST_SOURCES
|
||||
metrics_test.cpp evaluate_ast_test.cpp dimension.cpp init_order.cpp core.cpp
|
||||
code_object_loader.cpp agent_profiling.cpp)
|
||||
set(ROCPROFILER_LIB_COUNTER_TEST_HEADERS code_object_loader.hpp agent_profiling.hpp)
|
||||
|
||||
add_executable(counter-test)
|
||||
|
||||
target_sources(counter-test PRIVATE ${ROCPROFILER_LIB_COUNTER_TEST_SOURCES})
|
||||
target_sources(counter-test PRIVATE ${ROCPROFILER_LIB_COUNTER_TEST_SOURCES}
|
||||
${ROCPROFILER_LIB_COUNTER_TEST_HEADERS})
|
||||
|
||||
add_dependencies(counter-test agent_hasco_targets)
|
||||
|
||||
target_link_libraries(
|
||||
counter-test
|
||||
|
||||
@@ -0,0 +1,31 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
__kernel void
|
||||
null_kernel()
|
||||
{
|
||||
int gid = (int)get_global_id(0);
|
||||
int i = 0;
|
||||
while (gid >= 0) {
|
||||
gid = (int)get_global_id(0);
|
||||
i++;
|
||||
if (i > 1000) break;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,483 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include "lib/rocprofiler-sdk/counters/tests/agent_profiling.hpp"
|
||||
#include "lib/common/logging.hpp"
|
||||
#include "lib/rocprofiler-sdk/counters/tests/code_object_loader.hpp"
|
||||
|
||||
#include "lib/common/filesystem.hpp"
|
||||
#include "lib/common/utility.hpp"
|
||||
#include "lib/rocprofiler-sdk/agent.hpp"
|
||||
#include "lib/rocprofiler-sdk/context/context.hpp"
|
||||
#include "lib/rocprofiler-sdk/counters/core.hpp"
|
||||
#include "lib/rocprofiler-sdk/counters/dispatch_handlers.hpp"
|
||||
#include "lib/rocprofiler-sdk/counters/metrics.hpp"
|
||||
#include "lib/rocprofiler-sdk/hsa/agent_cache.hpp"
|
||||
#include "lib/rocprofiler-sdk/hsa/queue.hpp"
|
||||
#include "lib/rocprofiler-sdk/hsa/queue_controller.hpp"
|
||||
#include "lib/rocprofiler-sdk/registration.hpp"
|
||||
#include "rocprofiler-sdk/buffer.h"
|
||||
|
||||
#include <rocprofiler-sdk/dispatch_profile.h>
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
#include <rocprofiler-sdk/registration.h>
|
||||
#include <rocprofiler-sdk/rocprofiler.h>
|
||||
|
||||
#include <fmt/core.h>
|
||||
#include <gtest/gtest.h>
|
||||
#include <hsa/hsa.h>
|
||||
#include <hsa/hsa_api_trace.h>
|
||||
#include <hsa/hsa_ext_amd.h>
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <sstream>
|
||||
#include <tuple>
|
||||
|
||||
using namespace rocprofiler::counters::testing;
|
||||
using namespace rocprofiler::counters;
|
||||
using namespace rocprofiler;
|
||||
|
||||
#define ROCPROFILER_CALL(result, msg) \
|
||||
{ \
|
||||
rocprofiler_status_t CHECKSTATUS = result; \
|
||||
if(CHECKSTATUS != ROCPROFILER_STATUS_SUCCESS) \
|
||||
{ \
|
||||
std::string status_msg = rocprofiler_get_status_string(CHECKSTATUS); \
|
||||
std::cerr << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " << msg \
|
||||
<< " failed with error code " << CHECKSTATUS << ": " << status_msg \
|
||||
<< std::endl; \
|
||||
std::stringstream errmsg{}; \
|
||||
errmsg << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " << msg " failure (" \
|
||||
<< status_msg << ")"; \
|
||||
ASSERT_EQ(CHECKSTATUS, ROCPROFILER_STATUS_SUCCESS) << errmsg.str(); \
|
||||
} \
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
AmdExtTable&
|
||||
get_ext_table()
|
||||
{
|
||||
static auto _v = []() {
|
||||
auto val = AmdExtTable{};
|
||||
val.hsa_amd_memory_pool_get_info_fn = hsa_amd_memory_pool_get_info;
|
||||
val.hsa_amd_agent_iterate_memory_pools_fn = hsa_amd_agent_iterate_memory_pools;
|
||||
val.hsa_amd_memory_pool_allocate_fn = hsa_amd_memory_pool_allocate;
|
||||
val.hsa_amd_memory_pool_free_fn = hsa_amd_memory_pool_free;
|
||||
val.hsa_amd_agent_memory_pool_get_info_fn = hsa_amd_agent_memory_pool_get_info;
|
||||
val.hsa_amd_agents_allow_access_fn = hsa_amd_agents_allow_access;
|
||||
val.hsa_amd_queue_set_priority_fn = hsa_amd_queue_set_priority;
|
||||
val.hsa_amd_signal_async_handler_fn = hsa_amd_signal_async_handler;
|
||||
return val;
|
||||
}();
|
||||
return _v;
|
||||
}
|
||||
|
||||
CoreApiTable&
|
||||
get_api_table()
|
||||
{
|
||||
static auto _v = []() {
|
||||
auto val = CoreApiTable{};
|
||||
val.hsa_iterate_agents_fn = hsa_iterate_agents;
|
||||
val.hsa_agent_get_info_fn = hsa_agent_get_info;
|
||||
val.hsa_queue_create_fn = hsa_queue_create;
|
||||
val.hsa_queue_destroy_fn = hsa_queue_destroy;
|
||||
val.hsa_signal_create_fn = hsa_signal_create;
|
||||
val.hsa_signal_destroy_fn = hsa_signal_destroy;
|
||||
val.hsa_signal_store_screlease_fn = hsa_signal_store_screlease;
|
||||
val.hsa_signal_load_scacquire_fn = hsa_signal_load_scacquire;
|
||||
val.hsa_signal_add_relaxed_fn = hsa_signal_add_relaxed;
|
||||
val.hsa_signal_subtract_relaxed_fn = hsa_signal_subtract_relaxed;
|
||||
val.hsa_signal_wait_relaxed_fn = hsa_signal_wait_relaxed;
|
||||
val.hsa_queue_create_fn = hsa_queue_create;
|
||||
val.hsa_queue_add_write_index_scacq_screl_fn = hsa_queue_add_write_index_scacq_screl;
|
||||
val.hsa_queue_load_read_index_relaxed_fn = hsa_queue_load_read_index_relaxed;
|
||||
val.hsa_signal_store_relaxed_fn = hsa_signal_store_relaxed;
|
||||
val.hsa_signal_load_relaxed_fn = hsa_signal_load_relaxed;
|
||||
|
||||
return val;
|
||||
}();
|
||||
return _v;
|
||||
}
|
||||
|
||||
auto
|
||||
findDeviceMetrics(const hsa::AgentCache& agent, const std::unordered_set<std::string>& metrics)
|
||||
{
|
||||
std::vector<counters::Metric> ret;
|
||||
auto all_counters = counters::getMetricMap();
|
||||
|
||||
ROCP_ERROR << "Looking up counters for " << std::string(agent.name());
|
||||
auto gfx_metrics = common::get_val(*all_counters, std::string(agent.name()));
|
||||
if(!gfx_metrics)
|
||||
{
|
||||
ROCP_ERROR << "No counters found for " << std::string(agent.name());
|
||||
return ret;
|
||||
}
|
||||
|
||||
for(auto& counter : *gfx_metrics)
|
||||
{
|
||||
if(metrics.count(counter.name()) > 0 || metrics.empty())
|
||||
{
|
||||
ret.push_back(counter);
|
||||
}
|
||||
}
|
||||
ROCP_ERROR << "No counters found for " << std::string(agent.name());
|
||||
return ret;
|
||||
}
|
||||
|
||||
void
|
||||
test_init()
|
||||
{
|
||||
HsaApiTable table;
|
||||
table.amd_ext_ = &get_ext_table();
|
||||
table.core_ = &get_api_table();
|
||||
agent::construct_agent_cache(&table);
|
||||
ASSERT_TRUE(hsa::get_queue_controller() != nullptr);
|
||||
hsa::get_queue_controller()->init(get_api_table(), get_ext_table());
|
||||
}
|
||||
|
||||
std::vector<rocprofiler_record_counter_t>&
|
||||
global_recs()
|
||||
{
|
||||
static std::vector<rocprofiler_record_counter_t> recs;
|
||||
return recs;
|
||||
}
|
||||
|
||||
void
|
||||
check_output_created(rocprofiler_context_id_t,
|
||||
rocprofiler_buffer_id_t,
|
||||
rocprofiler_record_header_t** headers,
|
||||
size_t num_headers,
|
||||
void* user_data,
|
||||
uint64_t)
|
||||
{
|
||||
// verifies that we got a record containing some data for a counter
|
||||
// does NOT validate the counters values.
|
||||
if(user_data == nullptr) return;
|
||||
|
||||
uint64_t found_value = 0;
|
||||
for(size_t i = 0; i < num_headers; ++i)
|
||||
{
|
||||
auto* header = headers[i];
|
||||
if(header->category == ROCPROFILER_BUFFER_CATEGORY_COUNTERS &&
|
||||
header->kind == ROCPROFILER_COUNTER_RECORD_PROFILE_COUNTING_DISPATCH_HEADER)
|
||||
{}
|
||||
else if(header->category == ROCPROFILER_BUFFER_CATEGORY_COUNTERS &&
|
||||
header->kind == ROCPROFILER_COUNTER_RECORD_VALUE)
|
||||
{
|
||||
// Print the returned counter data.
|
||||
auto* record = static_cast<rocprofiler_record_counter_t*>(header->payload);
|
||||
if(found_value != 0 && found_value != record->user_data.value)
|
||||
{
|
||||
ROCP_FATAL << "Have records with different user data values we didn't expect";
|
||||
break;
|
||||
}
|
||||
found_value = record->user_data.value;
|
||||
// ROCP_ERROR << fmt::format("Found counter value: {}", record->counter_value);
|
||||
global_recs().push_back(*record);
|
||||
}
|
||||
}
|
||||
|
||||
auto* signal = reinterpret_cast<hsa_signal_t*>(user_data);
|
||||
hsa_signal_store_relaxed(*signal, static_cast<int64_t>(found_value));
|
||||
}
|
||||
|
||||
struct test_kernels
|
||||
{
|
||||
CodeObject obj;
|
||||
|
||||
test_kernels(const rocprofiler::hsa::AgentCache& agent)
|
||||
{
|
||||
CHECK(agent.get_rocp_agent());
|
||||
// Getting hasco Path
|
||||
std::string hasco_file_path =
|
||||
std::string(agent.get_rocp_agent()->name) + std::string("_agent_kernels.hsaco");
|
||||
search_hasco(common::filesystem::current_path(), hasco_file_path);
|
||||
CHECK_EQ(load_code_object(hasco_file_path, agent.get_hsa_agent(), obj), HSA_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
uint64_t load_kernel(const rocprofiler::hsa::AgentCache& agent,
|
||||
const std::string& kernel_name) const
|
||||
{
|
||||
Kernel kern;
|
||||
CHECK_EQ(get_kernel(obj, kernel_name, agent.get_hsa_agent(), kern), HSA_STATUS_SUCCESS);
|
||||
return kern.handle;
|
||||
}
|
||||
};
|
||||
|
||||
uint16_t
|
||||
packet_header(hsa_packet_type_t type)
|
||||
{
|
||||
uint16_t header = type << HSA_PACKET_HEADER_TYPE;
|
||||
header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE;
|
||||
header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE;
|
||||
return header;
|
||||
}
|
||||
|
||||
rocprofiler::hsa::rocprofiler_packet
|
||||
gen_kernel_pkt(uint64_t obj)
|
||||
{
|
||||
rocprofiler::hsa::rocprofiler_packet packet{};
|
||||
memset(((uint8_t*) &packet.kernel_dispatch) + 4, 0, sizeof(hsa_kernel_dispatch_packet_t) - 4);
|
||||
packet.kernel_dispatch.setup = 1;
|
||||
packet.kernel_dispatch.header = packet_header(HSA_PACKET_TYPE_KERNEL_DISPATCH);
|
||||
packet.kernel_dispatch.workgroup_size_x = 1;
|
||||
packet.kernel_dispatch.workgroup_size_y = 1;
|
||||
packet.kernel_dispatch.workgroup_size_z = 1;
|
||||
packet.kernel_dispatch.grid_size_x = 1;
|
||||
packet.kernel_dispatch.grid_size_y = 1;
|
||||
packet.kernel_dispatch.grid_size_z = 1;
|
||||
packet.kernel_dispatch.kernel_object = obj;
|
||||
packet.kernel_dispatch.kernarg_address = nullptr;
|
||||
packet.kernel_dispatch.completion_signal.handle = 0;
|
||||
ROCP_ERROR << fmt::format("{:x}", packet.kernel_dispatch.kernel_object);
|
||||
return packet;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
submitPacket(hsa_queue_t* queue, const void* packet)
|
||||
{
|
||||
const uint32_t slot_size_b = 0x40;
|
||||
|
||||
// advance command queue
|
||||
const uint64_t write_idx = hsa_queue_add_write_index_scacq_screl(queue, 1);
|
||||
while((write_idx - hsa_queue_load_read_index_relaxed(queue)) >= queue->size)
|
||||
{
|
||||
sched_yield();
|
||||
}
|
||||
|
||||
const uint32_t slot_idx = (uint32_t)(write_idx % queue->size);
|
||||
// NOLINTBEGIN(performance-no-int-to-ptr)
|
||||
uint32_t* queue_slot =
|
||||
reinterpret_cast<uint32_t*>((uintptr_t)(queue->base_address) + (slot_idx * slot_size_b));
|
||||
const uint32_t* slot_data = reinterpret_cast<const uint32_t*>(packet);
|
||||
|
||||
// Copy buffered commands into the queue slot.
|
||||
// Overwrite the AQL invalid header (first dword) last.
|
||||
// This prevents the slot from being read until it's fully written.
|
||||
memcpy(&queue_slot[1], &slot_data[1], slot_size_b - sizeof(uint32_t));
|
||||
std::atomic<uint32_t>* header_atomic_ptr =
|
||||
reinterpret_cast<std::atomic<uint32_t>*>(&queue_slot[0]);
|
||||
// NOLINTEND(performance-no-int-to-ptr)
|
||||
header_atomic_ptr->store(slot_data[0], std::memory_order_release);
|
||||
|
||||
// ringdoor bell
|
||||
hsa_signal_store_relaxed(queue->doorbell_signal, write_idx);
|
||||
|
||||
return write_idx;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
class agent_profile_test : public ::testing::Test
|
||||
{
|
||||
protected:
|
||||
agent_profile_test() {}
|
||||
|
||||
static void test_run(rocprofiler_counter_flag_t flags = ROCPROFILER_COUNTER_FLAG_NONE,
|
||||
const std::unordered_set<std::string>& test_metrics = {},
|
||||
size_t delay = 1)
|
||||
{
|
||||
hsa_init();
|
||||
registration::init_logging();
|
||||
registration::set_init_status(-1);
|
||||
context::push_client(1);
|
||||
test_init();
|
||||
counters::agent_profile_hsa_registration();
|
||||
|
||||
std::string kernel_name = "null_kernel";
|
||||
|
||||
ASSERT_TRUE(hsa::get_queue_controller() != nullptr);
|
||||
ASSERT_GT(hsa::get_queue_controller()->get_supported_agents().size(), 0);
|
||||
for(const auto& [_, agent] : hsa::get_queue_controller()->get_supported_agents())
|
||||
{
|
||||
auto metrics = findDeviceMetrics(agent, test_metrics);
|
||||
ASSERT_FALSE(metrics.empty());
|
||||
ASSERT_TRUE(agent.get_rocp_agent());
|
||||
test_kernels kernel_loader(agent);
|
||||
auto kernel_handle = kernel_loader.load_kernel(agent, kernel_name);
|
||||
auto kernel_pkt = gen_kernel_pkt(kernel_handle);
|
||||
|
||||
hsa_queue_t* queue;
|
||||
CHECK_EQ(hsa_queue_create(agent.get_hsa_agent(),
|
||||
64,
|
||||
HSA_QUEUE_TYPE_SINGLE,
|
||||
nullptr,
|
||||
nullptr,
|
||||
UINT32_MAX,
|
||||
UINT32_MAX,
|
||||
&queue),
|
||||
HSA_STATUS_SUCCESS);
|
||||
|
||||
rocprofiler::hsa::rocprofiler_packet barrier{};
|
||||
hsa_signal_t completion_signal;
|
||||
hsa_signal_create(1, 0, nullptr, &completion_signal);
|
||||
barrier.barrier_and.header = packet_header(HSA_PACKET_TYPE_BARRIER_AND);
|
||||
barrier.barrier_and.completion_signal = completion_signal;
|
||||
|
||||
hsa_signal_t found_data;
|
||||
hsa_signal_create(0, 0, nullptr, &found_data);
|
||||
size_t track_metric = 0;
|
||||
for(auto& metric : metrics)
|
||||
{
|
||||
// global_recs().clear();
|
||||
track_metric++;
|
||||
ROCP_ERROR << "Testing metric " << metric.name();
|
||||
rocprofiler_context_id_t ctx = {.handle = 0};
|
||||
ROCPROFILER_CALL(rocprofiler_create_context(&ctx), "context creation failed");
|
||||
rocprofiler_buffer_id_t opt_buff_id = {.handle = 0};
|
||||
ROCPROFILER_CALL(rocprofiler_create_buffer(ctx,
|
||||
500 * sizeof(size_t),
|
||||
500 * sizeof(size_t),
|
||||
ROCPROFILER_BUFFER_POLICY_LOSSLESS,
|
||||
check_output_created,
|
||||
&found_data,
|
||||
&opt_buff_id),
|
||||
"Could not create buffer");
|
||||
/**
|
||||
* Check profile construction
|
||||
*/
|
||||
rocprofiler_profile_config_id_t cfg_id = {};
|
||||
rocprofiler_counter_id_t id = {.handle = metric.id()};
|
||||
ROCPROFILER_CALL(
|
||||
rocprofiler_create_profile_config(agent.get_rocp_agent()->id, &id, 1, &cfg_id),
|
||||
"Unable to create profile");
|
||||
|
||||
ROCPROFILER_CALL(
|
||||
rocprofiler_configure_agent_profile_counting_service(
|
||||
ctx,
|
||||
opt_buff_id,
|
||||
agent.get_rocp_agent()->id,
|
||||
[](rocprofiler_context_id_t context_id,
|
||||
rocprofiler_agent_id_t,
|
||||
rocprofiler_agent_set_profile_callback_t set_config,
|
||||
void* user_data) {
|
||||
CHECK(user_data);
|
||||
if(auto status = set_config(
|
||||
context_id,
|
||||
*static_cast<rocprofiler_profile_config_id_t*>(user_data));
|
||||
status != ROCPROFILER_STATUS_SUCCESS)
|
||||
{
|
||||
ROCP_FATAL << rocprofiler_get_status_string(status);
|
||||
}
|
||||
},
|
||||
static_cast<void*>(&cfg_id)),
|
||||
"Could not create agent collection");
|
||||
|
||||
// This queue will only be present if a context exists when AgentCache is
|
||||
// construction This is a workaround for the test environment since we create
|
||||
// contexts after AgentCache constructed.
|
||||
agent::get_agent_cache(agent.get_rocp_agent())
|
||||
->init_agent_profile_queue(get_api_table(), get_ext_table());
|
||||
|
||||
hsa_signal_store_screlease(completion_signal, 1);
|
||||
hsa_signal_store_screlease(found_data, 0);
|
||||
auto status = rocprofiler_start_context(ctx);
|
||||
if(status == ROCPROFILER_STATUS_ERROR_NO_HARDWARE_COUNTERS)
|
||||
{
|
||||
ROCP_ERROR << fmt::format("No hardware counters for {}, skipping",
|
||||
metric.name());
|
||||
continue;
|
||||
}
|
||||
else if(status != ROCPROFILER_STATUS_SUCCESS)
|
||||
{
|
||||
ROCP_FATAL << "Failed to start context - "
|
||||
<< rocprofiler_get_status_string(status);
|
||||
}
|
||||
|
||||
ROCPROFILER_CALL(status, "Could not start context");
|
||||
|
||||
// Execute kernel
|
||||
submitPacket(queue, &kernel_pkt);
|
||||
submitPacket(queue, &kernel_pkt);
|
||||
submitPacket(queue, &kernel_pkt);
|
||||
submitPacket(queue, &kernel_pkt);
|
||||
submitPacket(queue, &kernel_pkt);
|
||||
submitPacket(queue, &barrier);
|
||||
usleep(delay);
|
||||
// Wait for completion
|
||||
hsa_signal_wait_relaxed(completion_signal,
|
||||
HSA_SIGNAL_CONDITION_EQ,
|
||||
0,
|
||||
UINT64_MAX,
|
||||
HSA_WAIT_STATE_BLOCKED);
|
||||
|
||||
// Sample the counting service.
|
||||
ROCPROFILER_CALL(rocprofiler_sample_agent_profile_counting_service(
|
||||
ctx, {.value = track_metric}, flags),
|
||||
"Could not sample");
|
||||
ROCPROFILER_CALL(rocprofiler_stop_context(ctx), "Could not stop context");
|
||||
rocprofiler_flush_buffer(opt_buff_id);
|
||||
|
||||
if(hsa_signal_wait_relaxed(found_data,
|
||||
HSA_SIGNAL_CONDITION_EQ,
|
||||
track_metric,
|
||||
20000000,
|
||||
HSA_WAIT_STATE_BLOCKED) !=
|
||||
static_cast<int64_t>(track_metric))
|
||||
{
|
||||
ROCP_FATAL << "Failed to get data for " << metric.name();
|
||||
}
|
||||
}
|
||||
hsa_signal_destroy(completion_signal);
|
||||
hsa_signal_destroy(found_data);
|
||||
hsa_queue_destroy(queue);
|
||||
}
|
||||
registration::set_init_status(1);
|
||||
context::pop_client(1);
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(agent_profile_test, sync_counters) { test_run(); }
|
||||
TEST_F(agent_profile_test, async_counters) { test_run(ROCPROFILER_COUNTER_FLAG_ASYNC); }
|
||||
TEST_F(agent_profile_test, sync_grbm_verify)
|
||||
{
|
||||
test_run(ROCPROFILER_COUNTER_FLAG_NONE, {"GRBM_COUNT"}, 50000);
|
||||
ROCP_ERROR << global_recs().size();
|
||||
|
||||
for(const auto& val : global_recs())
|
||||
{
|
||||
rocprofiler_counter_id_t id;
|
||||
rocprofiler_query_record_counter_id(val.id, &id);
|
||||
rocprofiler_counter_info_v0_t info;
|
||||
rocprofiler_query_counter_info(id, ROCPROFILER_COUNTER_INFO_VERSION_0, &info);
|
||||
ROCP_ERROR << fmt::format("Name: {} Counter value: {}", info.name, val.counter_value);
|
||||
EXPECT_GT(val.counter_value, 0.0);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(agent_profile_test, sync_gpu_util_verify)
|
||||
{
|
||||
test_run(ROCPROFILER_COUNTER_FLAG_NONE, {"GPU_UTIL"}, 50000);
|
||||
ROCP_ERROR << global_recs().size();
|
||||
|
||||
for(const auto& val : global_recs())
|
||||
{
|
||||
rocprofiler_counter_id_t id;
|
||||
rocprofiler_query_record_counter_id(val.id, &id);
|
||||
rocprofiler_counter_info_v0_t info;
|
||||
rocprofiler_query_counter_info(id, ROCPROFILER_COUNTER_INFO_VERSION_0, &info);
|
||||
ROCP_ERROR << fmt::format("Name: {} Counter value: {}", info.name, val.counter_value);
|
||||
EXPECT_GT(val.counter_value, 0.0);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,23 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
@@ -0,0 +1,106 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include "lib/rocprofiler-sdk/counters/tests/code_object_loader.hpp"
|
||||
#include <fcntl.h>
|
||||
|
||||
#include "lib/common/logging.hpp"
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace counters
|
||||
{
|
||||
namespace testing
|
||||
{
|
||||
hsa_status_t
|
||||
load_code_object(const std::string& filename, hsa_agent_t agent, CodeObject& code_object)
|
||||
{
|
||||
hsa_status_t err;
|
||||
code_object.file = open(filename.c_str(), O_RDONLY);
|
||||
if(code_object.file == -1)
|
||||
{
|
||||
ROCP_FATAL << "Could not load code object " << filename;
|
||||
}
|
||||
|
||||
err = hsa_code_object_reader_create_from_file(code_object.file, &code_object.code_obj_rdr);
|
||||
if(err != HSA_STATUS_SUCCESS) return err;
|
||||
|
||||
err = hsa_executable_create_alt(HSA_PROFILE_FULL,
|
||||
HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
|
||||
nullptr,
|
||||
&code_object.executable);
|
||||
if(err != HSA_STATUS_SUCCESS) return err;
|
||||
err = hsa_executable_load_agent_code_object(
|
||||
code_object.executable, agent, code_object.code_obj_rdr, nullptr, nullptr);
|
||||
if(err != HSA_STATUS_SUCCESS) return err;
|
||||
|
||||
err = hsa_executable_freeze(code_object.executable, nullptr);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
hsa_status_t
|
||||
get_kernel(const CodeObject& code_object,
|
||||
const std::string& kernel,
|
||||
hsa_agent_t agent,
|
||||
Kernel& kern)
|
||||
{
|
||||
hsa_executable_symbol_t symbol;
|
||||
hsa_status_t err =
|
||||
hsa_executable_get_symbol_by_name(code_object.executable, kernel.c_str(), &agent, &symbol);
|
||||
if(err != HSA_STATUS_SUCCESS)
|
||||
{
|
||||
err = hsa_executable_get_symbol_by_name(
|
||||
code_object.executable, (kernel + ".kd").c_str(), &agent, &symbol);
|
||||
if(err != HSA_STATUS_SUCCESS)
|
||||
{
|
||||
return err;
|
||||
}
|
||||
}
|
||||
ROCP_INFO << "kernel-name: " << kernel.c_str() << "\n";
|
||||
err = hsa_executable_symbol_get_info(
|
||||
symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &kern.handle);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
void
|
||||
search_hasco(const common::filesystem::path& directory, std::string& filename)
|
||||
{
|
||||
for(const auto& entry : common::filesystem::directory_iterator(directory))
|
||||
{
|
||||
if(common::filesystem::is_regular_file(entry))
|
||||
{
|
||||
if(entry.path().filename() == filename)
|
||||
{
|
||||
filename = entry.path();
|
||||
}
|
||||
}
|
||||
else if(common::filesystem::is_directory(entry))
|
||||
{
|
||||
search_hasco(entry, filename); // Recursive call for subdirectories
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace testing
|
||||
} // namespace counters
|
||||
} // namespace rocprofiler
|
||||
@@ -0,0 +1,65 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <hsa/hsa.h>
|
||||
#include <hsa/hsa_ext_amd.h>
|
||||
#include <string>
|
||||
|
||||
#include "lib/common/filesystem.hpp"
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace counters
|
||||
{
|
||||
namespace testing
|
||||
{
|
||||
struct CodeObject
|
||||
{
|
||||
hsa_file_t file = 0;
|
||||
hsa_code_object_reader_t code_obj_rdr = {};
|
||||
hsa_executable_t executable = {};
|
||||
};
|
||||
|
||||
hsa_status_t
|
||||
load_code_object(const std::string& filename, hsa_agent_t agent, CodeObject& code_object);
|
||||
struct Kernel
|
||||
{
|
||||
uint64_t handle = 0;
|
||||
uint32_t scratch = 0;
|
||||
uint32_t group = 0;
|
||||
uint32_t kernarg_size = 0;
|
||||
uint32_t kernarg_align = 0;
|
||||
};
|
||||
|
||||
hsa_status_t
|
||||
get_kernel(const CodeObject& code_object,
|
||||
const std::string& kernel,
|
||||
hsa_agent_t agent,
|
||||
Kernel& kern);
|
||||
|
||||
void
|
||||
search_hasco(const common::filesystem::path& directory, std::string& filename);
|
||||
} // namespace testing
|
||||
} // namespace counters
|
||||
} // namespace rocprofiler
|
||||
@@ -747,54 +747,3 @@ TEST(core, public_api_iterate_agents)
|
||||
EXPECT_TRUE(from_api.empty());
|
||||
}
|
||||
}
|
||||
|
||||
TEST(core, init_agent_collection)
|
||||
{
|
||||
ASSERT_EQ(hsa_init(), HSA_STATUS_SUCCESS);
|
||||
registration::init_logging();
|
||||
registration::set_init_status(-1);
|
||||
context::push_client(1);
|
||||
ROCPROFILER_CALL(rocprofiler_create_context(&get_client_ctx()), "context creation failed");
|
||||
auto agents = hsa::get_queue_controller()->get_supported_agents();
|
||||
|
||||
rocprofiler_buffer_id_t opt_buff_id = {.handle = 0};
|
||||
ROCPROFILER_CALL(rocprofiler_create_buffer(get_client_ctx(),
|
||||
500 * sizeof(size_t),
|
||||
500 * sizeof(size_t),
|
||||
ROCPROFILER_BUFFER_POLICY_LOSSLESS,
|
||||
null_buffered_callback,
|
||||
nullptr,
|
||||
&opt_buff_id),
|
||||
"Could not create buffer");
|
||||
for(const auto& [_, agent] : agents)
|
||||
{
|
||||
auto metrics = findDeviceMetrics(agent, {});
|
||||
ASSERT_FALSE(metrics.empty());
|
||||
ASSERT_TRUE(agent.get_rocp_agent());
|
||||
for(auto& metric : metrics)
|
||||
{
|
||||
expected_dispatch expected = {};
|
||||
rocprofiler_counter_id_t id = {.handle = metric.id()};
|
||||
ROCPROFILER_CALL(
|
||||
rocprofiler_create_profile_config(agent.get_rocp_agent()->id, &id, 1, &expected.id),
|
||||
"Unable to create profile");
|
||||
|
||||
ROCPROFILER_CALL(rocprofiler_configure_agent_profile_counting_service(
|
||||
get_client_ctx(), opt_buff_id, expected.id),
|
||||
"Could not create agent collection");
|
||||
{
|
||||
auto cfg = counters::get_profile_config(expected.id);
|
||||
auto* ctx = rocprofiler::context::get_mutable_registered_context(get_client_ctx());
|
||||
ASSERT_TRUE(ctx);
|
||||
ASSERT_TRUE(ctx->agent_counter_collection);
|
||||
EXPECT_EQ(ctx->agent_counter_collection->profile, cfg);
|
||||
EXPECT_EQ(ctx->agent_counter_collection->buffer.handle, opt_buff_id.handle);
|
||||
}
|
||||
ROCPROFILER_CALL(rocprofiler_destroy_profile_config(expected.id),
|
||||
"Could not delete profile id");
|
||||
}
|
||||
}
|
||||
rocprofiler_destroy_buffer(opt_buff_id);
|
||||
registration::set_init_status(1);
|
||||
context::pop_client(1);
|
||||
}
|
||||
|
||||
@@ -646,17 +646,22 @@ TEST(evaluate_ast, counter_reduction_sum)
|
||||
sum_vec(base_counter_data["KRUEGER"])),
|
||||
2},
|
||||
{"KRAMER",
|
||||
plus_vec(times_vec(
|
||||
std::vector<rocprofiler_record_counter_t>{
|
||||
{.id = 0, .counter_value = 5.0, .dispatch_id = 0}},
|
||||
sum_vec(base_counter_data["VOORHEES"])),
|
||||
sum_vec(base_counter_data["KRUEGER"])),
|
||||
plus_vec(
|
||||
times_vec(std::vector<rocprofiler_record_counter_t>{{.id = 0,
|
||||
.counter_value = 5.0,
|
||||
.dispatch_id = 0,
|
||||
.user_data = {.value = 0}}},
|
||||
sum_vec(base_counter_data["VOORHEES"])),
|
||||
sum_vec(base_counter_data["KRUEGER"])),
|
||||
2},
|
||||
{"GHOSTFACE",
|
||||
plus_vec(sum_vec(base_counter_data["VOORHEES"]),
|
||||
divide_vec(sum_vec(base_counter_data["KRUEGER"]),
|
||||
std::vector<rocprofiler_record_counter_t>{
|
||||
{.id = 0, .counter_value = 5.0, .dispatch_id = 0}})),
|
||||
divide_vec(
|
||||
sum_vec(base_counter_data["KRUEGER"]),
|
||||
std::vector<rocprofiler_record_counter_t>{{.id = 0,
|
||||
.counter_value = 5.0,
|
||||
.dispatch_id = 0,
|
||||
.user_data = {.value = 0}}})),
|
||||
2},
|
||||
};
|
||||
|
||||
@@ -727,17 +732,22 @@ TEST(evaluate_ast, counter_reduction_min)
|
||||
min_vec(base_counter_data["KRUEGER"])),
|
||||
2},
|
||||
{"KRAMER",
|
||||
plus_vec(times_vec(
|
||||
std::vector<rocprofiler_record_counter_t>{
|
||||
{.id = 0, .counter_value = 5.0, .dispatch_id = 0}},
|
||||
min_vec(base_counter_data["VOORHEES"])),
|
||||
min_vec(base_counter_data["KRUEGER"])),
|
||||
plus_vec(
|
||||
times_vec(std::vector<rocprofiler_record_counter_t>{{.id = 0,
|
||||
.counter_value = 5.0,
|
||||
.dispatch_id = 0,
|
||||
.user_data = {.value = 0}}},
|
||||
min_vec(base_counter_data["VOORHEES"])),
|
||||
min_vec(base_counter_data["KRUEGER"])),
|
||||
2},
|
||||
{"GHOSTFACE",
|
||||
plus_vec(min_vec(base_counter_data["VOORHEES"]),
|
||||
divide_vec(min_vec(base_counter_data["KRUEGER"]),
|
||||
std::vector<rocprofiler_record_counter_t>{
|
||||
{.id = 0, .counter_value = 5.0, .dispatch_id = 0}})),
|
||||
divide_vec(
|
||||
min_vec(base_counter_data["KRUEGER"]),
|
||||
std::vector<rocprofiler_record_counter_t>{{.id = 0,
|
||||
.counter_value = 5.0,
|
||||
.dispatch_id = 0,
|
||||
.user_data = {.value = 0}}})),
|
||||
2},
|
||||
};
|
||||
|
||||
@@ -808,17 +818,22 @@ TEST(evaluate_ast, counter_reduction_max)
|
||||
max_vec(base_counter_data["KRUEGER"])),
|
||||
2},
|
||||
{"KRAMER",
|
||||
plus_vec(times_vec(
|
||||
std::vector<rocprofiler_record_counter_t>{
|
||||
{.id = 0, .counter_value = 5.0, .dispatch_id = 0}},
|
||||
max_vec(base_counter_data["VOORHEES"])),
|
||||
max_vec(base_counter_data["KRUEGER"])),
|
||||
plus_vec(
|
||||
times_vec(std::vector<rocprofiler_record_counter_t>{{.id = 0,
|
||||
.counter_value = 5.0,
|
||||
.dispatch_id = 0,
|
||||
.user_data = {.value = 0}}},
|
||||
max_vec(base_counter_data["VOORHEES"])),
|
||||
max_vec(base_counter_data["KRUEGER"])),
|
||||
2},
|
||||
{"GHOSTFACE",
|
||||
plus_vec(max_vec(base_counter_data["VOORHEES"]),
|
||||
divide_vec(max_vec(base_counter_data["KRUEGER"]),
|
||||
std::vector<rocprofiler_record_counter_t>{
|
||||
{.id = 0, .counter_value = 5.0, .dispatch_id = 0}})),
|
||||
divide_vec(
|
||||
max_vec(base_counter_data["KRUEGER"]),
|
||||
std::vector<rocprofiler_record_counter_t>{{.id = 0,
|
||||
.counter_value = 5.0,
|
||||
.dispatch_id = 0,
|
||||
.user_data = {.value = 0}}})),
|
||||
2},
|
||||
};
|
||||
|
||||
@@ -891,17 +906,22 @@ TEST(evaluate_ast, counter_reduction_avg)
|
||||
avg_vec(base_counter_data["KRUEGER"])),
|
||||
2},
|
||||
{"KRAMER",
|
||||
plus_vec(times_vec(
|
||||
std::vector<rocprofiler_record_counter_t>{
|
||||
{.id = 0, .counter_value = 5.0, .dispatch_id = 0}},
|
||||
avg_vec(base_counter_data["VOORHEES"])),
|
||||
avg_vec(base_counter_data["KRUEGER"])),
|
||||
plus_vec(
|
||||
times_vec(std::vector<rocprofiler_record_counter_t>{{.id = 0,
|
||||
.counter_value = 5.0,
|
||||
.dispatch_id = 0,
|
||||
.user_data = {.value = 0}}},
|
||||
avg_vec(base_counter_data["VOORHEES"])),
|
||||
avg_vec(base_counter_data["KRUEGER"])),
|
||||
2},
|
||||
{"GHOSTFACE",
|
||||
plus_vec(avg_vec(base_counter_data["VOORHEES"]),
|
||||
divide_vec(avg_vec(base_counter_data["KRUEGER"]),
|
||||
std::vector<rocprofiler_record_counter_t>{
|
||||
{.id = 0, .counter_value = 5.0, .dispatch_id = 0}})),
|
||||
divide_vec(
|
||||
avg_vec(base_counter_data["KRUEGER"]),
|
||||
std::vector<rocprofiler_record_counter_t>{{.id = 0,
|
||||
.counter_value = 5.0,
|
||||
.dispatch_id = 0,
|
||||
.user_data = {.value = 0}}})),
|
||||
2},
|
||||
};
|
||||
|
||||
@@ -962,18 +982,23 @@ TEST(evaluate_ast, evaluate_mixed_counters)
|
||||
{"BATES",
|
||||
times_vec(
|
||||
std::vector<rocprofiler_record_counter_t>{
|
||||
{.id = 0, .counter_value = 32, .dispatch_id = 0}},
|
||||
{.id = 0, .counter_value = 32, .dispatch_id = 0, .user_data = {.value = 0}}},
|
||||
sum_vec(base_counter_data["VOORHEES"])),
|
||||
2},
|
||||
{"KRAMER",
|
||||
times_vec(sum_vec(base_counter_data["KRUEGER"]),
|
||||
std::vector<rocprofiler_record_counter_t>{
|
||||
{.id = 0, .counter_value = 8.0 / 5.0, .dispatch_id = 0}}),
|
||||
std::vector<rocprofiler_record_counter_t>{{.id = 0,
|
||||
.counter_value = 8.0 / 5.0,
|
||||
.dispatch_id = 0,
|
||||
.user_data = {.value = 0}}}),
|
||||
3},
|
||||
{"TORRANCE",
|
||||
times_vec(sum_vec(base_counter_data["KRUEGER"]),
|
||||
std::vector<rocprofiler_record_counter_t>{
|
||||
{.id = 0, .counter_value = 104.0 / (156.0 * 8.0), .dispatch_id = 0}}),
|
||||
times_vec(
|
||||
sum_vec(base_counter_data["KRUEGER"]),
|
||||
std::vector<rocprofiler_record_counter_t>{{.id = 0,
|
||||
.counter_value = 104.0 / (156.0 * 8.0),
|
||||
.dispatch_id = 0,
|
||||
.user_data = {.value = 0}}}),
|
||||
4},
|
||||
};
|
||||
|
||||
|
||||
@@ -166,7 +166,12 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
|
||||
|
||||
static const std::unordered_map<std::string, std::vector<std::vector<std::string>>> derived_gfx908 =
|
||||
{{"gfx908",
|
||||
{{"SQ_WAVES_sum",
|
||||
{{"GPU_UTIL",
|
||||
"",
|
||||
"",
|
||||
"100*GRBM_GUI_ACTIVE/GRBM_COUNT",
|
||||
"Percentage of the time that GUI is active"},
|
||||
{"SQ_WAVES_sum",
|
||||
"",
|
||||
"",
|
||||
"reduce(SQ_WAVES,sum)",
|
||||
|
||||
@@ -48,7 +48,7 @@
|
||||
<metric name="TCC_EA_WRREQ_sum" expr=reduce(TCC_EA_WRREQ,sum) descr="Number of transactions (either 32-byte or 64-byte) going over the TC_EA_wrreq interface. Sum over TCC instances."></metric>
|
||||
<metric name="TCC_EA_WRREQ_64B_sum" expr=reduce(TCC_EA_WRREQ_64B,sum) descr="Number of 64-byte transactions going (64-byte write or CMPSWAP) over the TC_EA_wrreq interface. Sum over TCC instances."></metric>
|
||||
<metric name="TCC_WRREQ_STALL_max" expr=reduce(TCC_EA_WRREQ_STALL,max) descr="Number of cycles a write request was stalled. Max over TCC instances."></metric>
|
||||
|
||||
<metric name="GPU_UTIL" expr=100*GRBM_GUI_ACTIVE/GRBM_COUNT descr="Percentage of the time that GUI is active"></metric>
|
||||
<metric name="TCP_TCP_TA_DATA_STALL_CYCLES_sum" expr=reduce(TCP_TCP_TA_DATA_STALL_CYCLES,sum) descr="Total number of TCP stalls TA data interface."></metric>
|
||||
<metric name="TCP_TCP_TA_DATA_STALL_CYCLES_max" expr=reduce(TCP_TCP_TA_DATA_STALL_CYCLES,max) descr="Maximum number of TCP stalls TA data interface."></metric>
|
||||
|
||||
@@ -425,6 +425,7 @@
|
||||
<metric name="TOTAL_16_OPS" expr=(SQ_INSTS_VALU_FMA_F16*2+SQ_INSTS_VALU_ADD_F16+SQ_INSTS_VALU_MUL_F16+SQ_INSTS_VALU_TRANS_F16)*64+((SQ_INSTS_VALU_MFMA_MOPS_F16+SQ_INSTS_VALU_MFMA_MOPS_BF16)*512) descr="The number of 16 bits OPS executed"></metric>
|
||||
<metric name="TOTAL_32_OPS" expr=(SQ_INSTS_VALU_FMA_F32*2+SQ_INSTS_VALU_INT32+SQ_INSTS_VALU_ADD_F32+SQ_INSTS_VALU_MUL_F32+SQ_INSTS_VALU_TRANS_F32)*64+(SQ_INSTS_VALU_MFMA_MOPS_F32*512) descr="The number of 32 bits OPS executed"></metric>
|
||||
<metric name="TOTAL_64_OPS" expr=(SQ_INSTS_VALU_FMA_F64*2+SQ_INSTS_VALU_INT64+SQ_INSTS_VALU_ADD_F64+SQ_INSTS_VALU_MUL_F64)*64+(SQ_INSTS_VALU_MFMA_MOPS_F64*512) descr="The number of 64 bits OPS executed"></metric>
|
||||
<metric name="GPU_UTIL" expr=100*GRBM_GUI_ACTIVE/GRBM_COUNT descr="Percentage of the time that GUI is active"></metric>
|
||||
</gfx940>
|
||||
|
||||
<gfx10>
|
||||
@@ -469,6 +470,9 @@
|
||||
<gfx1031 base="gfx10">
|
||||
</gfx1031>
|
||||
|
||||
<gfx1010 base="gfx10">
|
||||
</gfx1010>
|
||||
|
||||
<gfx1032 base="gfx10">
|
||||
</gfx1032>
|
||||
|
||||
|
||||
@@ -26,6 +26,8 @@
|
||||
#include <fmt/core.h>
|
||||
#include <stdexcept>
|
||||
|
||||
#include "lib/rocprofiler-sdk/context/context.hpp"
|
||||
|
||||
namespace
|
||||
{
|
||||
// This function checks to see if the provided
|
||||
@@ -123,11 +125,48 @@ namespace rocprofiler
|
||||
{
|
||||
namespace hsa
|
||||
{
|
||||
void
|
||||
AgentCache::init_agent_profile_queue(const CoreApiTable& api, const AmdExtTable& ext) const
|
||||
{
|
||||
static std::mutex m_mutex;
|
||||
std::lock_guard<std::mutex> lock(m_mutex);
|
||||
|
||||
using context = rocprofiler::context::context;
|
||||
const auto* agent_ctx = []() -> const context* {
|
||||
for(auto& ctx : rocprofiler::context::get_registered_contexts())
|
||||
{
|
||||
if(ctx->agent_counter_collection) return ctx;
|
||||
}
|
||||
return nullptr;
|
||||
}();
|
||||
|
||||
if(!agent_ctx || m_profile_queue) return;
|
||||
ROCP_ERROR << "Creating Profile Queue";
|
||||
// create the queue and set it to high_priority
|
||||
CHECK(api.hsa_queue_create_fn) << "no hsa_queue_create_fn in api table";
|
||||
auto status = api.hsa_queue_create_fn(get_hsa_agent(),
|
||||
64,
|
||||
HSA_QUEUE_TYPE_SINGLE,
|
||||
nullptr,
|
||||
nullptr,
|
||||
UINT32_MAX,
|
||||
UINT32_MAX,
|
||||
&m_profile_queue);
|
||||
if(status != HSA_STATUS_SUCCESS && status != HSA_STATUS_INFO_BREAK)
|
||||
{
|
||||
throw std::runtime_error("Error: Queue is not initialized");
|
||||
}
|
||||
|
||||
CHECK(ext.hsa_amd_queue_set_priority_fn) << "no hsa_amd_queue_set_priority_fn in api table";
|
||||
ext.hsa_amd_queue_set_priority_fn(m_profile_queue, HSA_AMD_QUEUE_PRIORITY_HIGH);
|
||||
}
|
||||
|
||||
AgentCache::AgentCache(const rocprofiler_agent_t* rocp_agent,
|
||||
hsa_agent_t hsa_agent,
|
||||
size_t index,
|
||||
hsa_agent_t nearest_cpu,
|
||||
const AmdExtTable& ext_table)
|
||||
const AmdExtTable& ext_table,
|
||||
const CoreApiTable& api)
|
||||
: m_rocp_agent{rocp_agent}
|
||||
, m_index{index}
|
||||
, m_hsa_agent{hsa_agent}
|
||||
@@ -139,6 +178,7 @@ AgentCache::AgentCache(const rocprofiler_agent_t* rocp_agent,
|
||||
{
|
||||
init_cpu_pool(ext_table, *this);
|
||||
init_gpu_pool(ext_table, *this);
|
||||
init_agent_profile_queue(api, ext_table);
|
||||
} catch(std::runtime_error& e)
|
||||
{
|
||||
ROCP_WARNING << fmt::format(
|
||||
|
||||
@@ -52,7 +52,8 @@ public:
|
||||
hsa_agent_t hsa_agent,
|
||||
size_t index,
|
||||
hsa_agent_t nearest_cpu,
|
||||
const AmdExtTable& ext_table);
|
||||
const AmdExtTable& ext_table,
|
||||
const CoreApiTable& api);
|
||||
~AgentCache() = default;
|
||||
AgentCache(const AgentCache&) = default;
|
||||
AgentCache(AgentCache&&) noexcept = default;
|
||||
@@ -67,10 +68,12 @@ public:
|
||||
CONST_NONCONST_ACCESSOR(hsa_agent_t, get_hsa_agent, m_hsa_agent);
|
||||
CONST_NONCONST_ACCESSOR(hsa_agent_t, near_cpu, m_nearest_cpu);
|
||||
|
||||
hsa_queue_t* profile_queue() const { return m_profile_queue; }
|
||||
const rocprofiler_agent_t* get_rocp_agent() const { return m_rocp_agent; }
|
||||
std::string_view name() const { return m_name; }
|
||||
size_t index() const { return m_index; }
|
||||
|
||||
void init_agent_profile_queue(const CoreApiTable& api, const AmdExtTable& ext) const;
|
||||
bool operator==(const rocprofiler_agent_t*) const;
|
||||
bool operator==(hsa_agent_t) const;
|
||||
|
||||
@@ -88,7 +91,8 @@ private:
|
||||
hsa_amd_memory_pool_t m_kernarg_pool{.handle = 0};
|
||||
hsa_amd_memory_pool_t m_gpu_pool{.handle = 0};
|
||||
|
||||
std::string_view m_name = {};
|
||||
std::string_view m_name = {};
|
||||
mutable hsa_queue_t* m_profile_queue = {};
|
||||
};
|
||||
|
||||
inline bool
|
||||
|
||||
@@ -748,6 +748,9 @@ rocprofiler_set_api_table(const char* name,
|
||||
// need to construct agent mappings before initializing the queue controller
|
||||
rocprofiler::agent::construct_agent_cache(hsa_api_table);
|
||||
rocprofiler::hsa::queue_controller_init(hsa_api_table);
|
||||
// Process agent ctx's that were started prior to HSA init
|
||||
rocprofiler::counters::agent_profile_hsa_registration();
|
||||
|
||||
rocprofiler::hsa::async_copy_init(hsa_api_table, lib_instance);
|
||||
rocprofiler::code_object::initialize(hsa_api_table);
|
||||
|
||||
|
||||
@@ -96,7 +96,17 @@ ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_ERROR_PROFILE_NOT_FOUND,
|
||||
ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_ERROR_AGENT_DISPATCH_CONFLICT,
|
||||
"Cannot have both an agent counter collection and a dispatch counter "
|
||||
"in the same context")
|
||||
|
||||
ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_INTERNAL_NO_AGENT_CONTEXT,
|
||||
"No context has agent profiling enabled, "
|
||||
"error generally not returned to tools")
|
||||
ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_ERROR_SAMPLE_RATE_EXCEEDED,
|
||||
"A sample is in progress and a new sample cannot be started")
|
||||
ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_ERROR_NO_PROFILE_QUEUE,
|
||||
"No profile queue is available for this agent")
|
||||
ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_ERROR_NO_HARDWARE_COUNTERS,
|
||||
"Counter set does not include any hardware counters")
|
||||
ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_ERROR_AGENT_MISMATCH,
|
||||
"Counter profile agent does not match the agent in the context")
|
||||
template <size_t Idx, size_t... Tail>
|
||||
const char*
|
||||
get_status_name(rocprofiler_status_t status, std::index_sequence<Idx, Tail...>)
|
||||
|
||||
Ссылка в новой задаче
Block a user