[2/N] Agent Counter implementation with unit tests to check functionality (#846)

Agent Counter Collection API with tests and samples.
---------

Co-authored-by: Benjamin Welton <ben@amd.com>
Этот коммит содержится в:
Benjamin Welton
2024-05-21 13:34:54 -07:00
коммит произвёл GitHub
родитель 7eeb7376b4
Коммит 28e6430d04
32 изменённых файлов: 2097 добавлений и 157 удалений
+26
Просмотреть файл
@@ -106,3 +106,29 @@ set_tests_properties(
PROPERTIES TIMEOUT 120 LABELS "samples" ENVIRONMENT
"${counter-collection-functional-counter-env}" FAIL_REGULAR_EXPRESSION
"${ROCPROFILER_DEFAULT_FAIL_REGEX}")
add_library(counter-collection-agent-profiling-client SHARED)
target_sources(counter-collection-agent-profiling-client PRIVATE agent_profiling.cpp
client.hpp)
target_link_libraries(
counter-collection-agent-profiling-client
PUBLIC rocprofiler::samples-build-flags
PRIVATE rocprofiler-sdk::rocprofiler-sdk rocprofiler::samples-common-library)
add_executable(counter-collection-agent-profiling)
target_sources(counter-collection-agent-profiling PRIVATE main.cpp)
target_link_libraries(counter-collection-agent-profiling
PRIVATE counter-collection-agent-profiling-client Threads::Threads)
rocprofiler_samples_get_preload_env(PRELOAD_ENV counter-collection-agent-profiling-client)
set(counter-collection-functional-counter-env "${PRELOAD_ENV}" "${LIBRARY_PATH_ENV}")
add_test(NAME counter-collection-agent-profiling
COMMAND $<TARGET_FILE:counter-collection-agent-profiling>)
set_tests_properties(
counter-collection-agent-profiling
PROPERTIES TIMEOUT 120 LABELS "samples" ENVIRONMENT
"${counter-collection-functional-counter-env}" FAIL_REGULAR_EXPRESSION
"${ROCPROFILER_DEFAULT_FAIL_REGEX}")
+347
Просмотреть файл
@@ -0,0 +1,347 @@
// MIT License
//
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include "client.hpp"
#include <atomic>
#include <chrono>
#include <fstream>
#include <functional>
#include <iostream>
#include <mutex>
#include <set>
#include <shared_mutex>
#include <sstream>
#include <unordered_map>
#include <vector>
#include <rocprofiler-sdk/fwd.h>
#include <rocprofiler-sdk/registration.h>
#include <rocprofiler-sdk/rocprofiler.h>
#define ROCPROFILER_CALL(result, msg) \
{ \
rocprofiler_status_t CHECKSTATUS = result; \
if(CHECKSTATUS != ROCPROFILER_STATUS_SUCCESS) \
{ \
std::string status_msg = rocprofiler_get_status_string(CHECKSTATUS); \
std::cerr << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " << msg \
<< " failed with error code " << CHECKSTATUS << ": " << status_msg \
<< std::endl; \
std::stringstream errmsg{}; \
errmsg << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " << msg " failure (" \
<< status_msg << ")"; \
throw std::runtime_error(errmsg.str()); \
} \
}
int
start()
{
return 1;
}
namespace
{
rocprofiler_context_id_t&
get_client_ctx()
{
static rocprofiler_context_id_t ctx;
return ctx;
}
rocprofiler_buffer_id_t&
get_buffer()
{
static rocprofiler_buffer_id_t buf = {};
return buf;
}
/**
* Buffer callback called when the buffer is full. rocprofiler_record_header_t
* can contain counter records as well as other records (such as tracing). These
* records need to be filtered based on the category type.
*/
void
buffered_callback(rocprofiler_context_id_t,
rocprofiler_buffer_id_t,
rocprofiler_record_header_t** headers,
size_t num_headers,
void* user_data,
uint64_t)
{
std::stringstream ss;
// Iterate through the returned records
for(size_t i = 0; i < num_headers; ++i)
{
auto* header = headers[i];
if(header->category == ROCPROFILER_BUFFER_CATEGORY_COUNTERS &&
header->kind == ROCPROFILER_COUNTER_RECORD_PROFILE_COUNTING_DISPATCH_HEADER)
{}
else if(header->category == ROCPROFILER_BUFFER_CATEGORY_COUNTERS &&
header->kind == ROCPROFILER_COUNTER_RECORD_VALUE)
{
// Print the returned counter data.
auto* record = static_cast<rocprofiler_record_counter_t*>(header->payload);
ss << " (Id: " << record->id << " Value [D]: " << record->counter_value << ","
<< " user_data: " << record->user_data.value << "),";
}
}
auto* output_stream = static_cast<std::ostream*>(user_data);
if(!output_stream) throw std::runtime_error{"nullptr to output stream"};
*output_stream << "[" << __FUNCTION__ << "] " << ss.str() << "\n";
}
std::unordered_map<uint64_t, rocprofiler_profile_config_id_t>&
get_profile_cache()
{
static std::unordered_map<uint64_t, rocprofiler_profile_config_id_t> profile_cache;
return profile_cache;
}
/**
* Callback from rocprofiler when an kernel dispatch is enqueued into the HSA queue.
* rocprofiler_profile_config_id_t* is a return to specify what counters to collect
* for this dispatch (dispatch_packet). This example function creates a profile
* to collect the counter SQ_WAVES for all kernel dispatch packets.
*/
void
set_profile(rocprofiler_context_id_t context_id,
rocprofiler_agent_id_t agent,
rocprofiler_agent_set_profile_callback_t set_config,
void*)
{
/**
* This simple example uses the same profile counter set for all agents.
* We store this in a cache to prevent constructing many identical profile counter
* sets. We first check the cache to see if we have already constructed a counter"
* set for the agent. If we have, return it. Otherwise, construct a new profile counter
* set.
*/
auto search_cache = [&]() {
if(auto pos = get_profile_cache().find(agent.handle); pos != get_profile_cache().end())
{
set_config(context_id, pos->second);
return true;
}
return false;
};
if(!search_cache())
{
std::cerr << "No profile for agent found in cache\n";
exit(-1);
}
}
rocprofiler_profile_config_id_t
build_profile_for_agent(rocprofiler_agent_id_t agent)
{
std::set<std::string> counters_to_collect = {"SQ_WAVES"};
std::vector<rocprofiler_counter_id_t> gpu_counters;
ROCPROFILER_CALL(rocprofiler_iterate_agent_supported_counters(
agent,
[](rocprofiler_agent_id_t,
rocprofiler_counter_id_t* counters,
size_t num_counters,
void* user_data) {
std::vector<rocprofiler_counter_id_t>* vec =
static_cast<std::vector<rocprofiler_counter_id_t>*>(user_data);
for(size_t i = 0; i < num_counters; i++)
{
vec->push_back(counters[i]);
}
return ROCPROFILER_STATUS_SUCCESS;
},
static_cast<void*>(&gpu_counters)),
"Could not fetch supported counters");
std::vector<rocprofiler_counter_id_t> collect_counters;
for(auto& counter : gpu_counters)
{
rocprofiler_counter_info_v0_t version;
ROCPROFILER_CALL(
rocprofiler_query_counter_info(
counter, ROCPROFILER_COUNTER_INFO_VERSION_0, static_cast<void*>(&version)),
"Could not query info for counter");
if(counters_to_collect.count(std::string(version.name)) > 0)
{
std::clog << "Counter: " << counter.handle << " " << version.name << "\n";
collect_counters.push_back(counter);
}
}
rocprofiler_profile_config_id_t profile;
ROCPROFILER_CALL(rocprofiler_create_profile_config(
agent, collect_counters.data(), collect_counters.size(), &profile),
"Could not construct profile cfg");
return profile;
}
std::atomic<bool>&
exit_toggle()
{
static std::atomic<bool> exit_toggle = false;
return exit_toggle;
}
int
tool_init(rocprofiler_client_finalize_t, void* user_data)
{
ROCPROFILER_CALL(rocprofiler_create_context(&get_client_ctx()), "context creation failed");
ROCPROFILER_CALL(rocprofiler_create_buffer(get_client_ctx(),
4096,
2048,
ROCPROFILER_BUFFER_POLICY_LOSSLESS,
buffered_callback,
user_data,
&get_buffer()),
"buffer creation failed");
std::vector<rocprofiler_agent_v0_t> agents;
rocprofiler_query_available_agents_cb_t iterate_cb = [](rocprofiler_agent_version_t agents_ver,
const void** agents_arr,
size_t num_agents,
void* udata) {
if(agents_ver != ROCPROFILER_AGENT_INFO_VERSION_0)
throw std::runtime_error{"unexpected rocprofiler agent version"};
auto* agents_v = static_cast<std::vector<rocprofiler_agent_v0_t>*>(udata);
for(size_t i = 0; i < num_agents; ++i)
agents_v->emplace_back(*static_cast<const rocprofiler_agent_v0_t*>(agents_arr[i]));
return ROCPROFILER_STATUS_SUCCESS;
};
ROCPROFILER_CALL(
rocprofiler_query_available_agents(ROCPROFILER_AGENT_INFO_VERSION_0,
iterate_cb,
sizeof(rocprofiler_agent_t),
const_cast<void*>(static_cast<const void*>(&agents))),
"query available agents");
auto client_thread = rocprofiler_callback_thread_t{};
ROCPROFILER_CALL(rocprofiler_create_callback_thread(&client_thread),
"failure creating callback thread");
ROCPROFILER_CALL(rocprofiler_assign_callback_thread(get_buffer(), client_thread),
"failed to assign thread for buffer");
// Construct the profiles in advance for each agent that is a GPU
rocprofiler_agent_id_t agent_id;
for(const auto& agent : agents)
{
if(agent.type == ROCPROFILER_AGENT_TYPE_GPU)
{
get_profile_cache().emplace(agent.id.handle, build_profile_for_agent(agent.id));
agent_id = agent.id;
break;
}
}
if(agents.empty())
{
std::cerr << "No agents found" << std::endl;
return 1;
}
ROCPROFILER_CALL(rocprofiler_configure_agent_profile_counting_service(
get_client_ctx(), get_buffer(), agent_id, set_profile, nullptr),
"Could not setup buffered service");
std::thread([=]() {
size_t count = 1;
rocprofiler_start_context(get_client_ctx());
while(exit_toggle().load() == false)
{
rocprofiler_sample_agent_profile_counting_service(
get_client_ctx(), {.value = count}, ROCPROFILER_COUNTER_FLAG_NONE);
count++;
std::this_thread::sleep_for(std::chrono::milliseconds(50));
}
exit_toggle().store(false);
}).detach();
// no errors
return 0;
}
void
tool_fini(void* user_data)
{
exit_toggle().store(true);
while(exit_toggle().load() == true)
{};
std::clog << "In tool fini\n";
rocprofiler_stop_context(get_client_ctx());
ROCPROFILER_CALL(rocprofiler_flush_buffer(get_buffer()), "buffer flush");
auto* output_stream = static_cast<std::ostream*>(user_data);
*output_stream << std::flush;
if(output_stream != &std::cout && output_stream != &std::cerr) delete output_stream;
}
} // namespace
extern "C" rocprofiler_tool_configure_result_t*
rocprofiler_configure(uint32_t version,
const char* runtime_version,
uint32_t priority,
rocprofiler_client_id_t* id)
{
// set the client name
id->name = "CounterClientSample";
// compute major/minor/patch version info
uint32_t major = version / 10000;
uint32_t minor = (version % 10000) / 100;
uint32_t patch = version % 100;
// generate info string
auto info = std::stringstream{};
info << id->name << " (priority=" << priority << ") is using rocprofiler-sdk v" << major << "."
<< minor << "." << patch << " (" << runtime_version << ")";
std::clog << info.str() << std::endl;
std::ostream* output_stream = nullptr;
std::string filename = "counter_collection.log";
if(auto* outfile = getenv("ROCPROFILER_SAMPLE_OUTPUT_FILE"); outfile) filename = outfile;
if(filename == "stdout")
output_stream = &std::cout;
else if(filename == "stderr")
output_stream = &std::cerr;
else
output_stream = new std::ofstream{filename};
// create configure data
static auto cfg =
rocprofiler_tool_configure_result_t{sizeof(rocprofiler_tool_configure_result_t),
&tool_init,
&tool_fini,
static_cast<void*>(output_stream)};
// return pointer to configure data
return &cfg;
}
+75 -13
Просмотреть файл
@@ -25,36 +25,98 @@
#include <rocprofiler-sdk/defines.h>
#include <rocprofiler-sdk/fwd.h>
ROCPROFILER_EXTERN_C_INIT
/**
* @defgroup AGENT_PROFILE_COUNTING_SERVICE Agent Profile Counting Service
* @brief needs brief description
*
* @{
*/
ROCPROFILER_EXTERN_C_INIT
/**
* @brief Configure Profile Counting Service for agent.
* @brief Callback to set the profile config for the agent.
*
* @param [in] context_id context id
* @param [in] buffer_id id of the buffer to use for the counting service
* @param [in] config_id Profile config detailing the counters to collect for this kernel
* @return ::rocprofiler_status_t
* @retval ::ROCPROFILER_STATUS_ERROR_PROFILE_NOT_FOUND Returned if the config_id is not found
* @retval ::ROCPROFILER_STATUS_ERROR_CONTEXT_INVALID Returned if the ctx is not valid
* @retval ::ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED Returned if attempting to make this
* call outside of context startup.
* @retval ::ROCPROFILER_STATUS_ERROR_AGENT_MISMATCH Agent of profile does not match agent of the
* context.
* @retval ::ROCPROFILER_STATUS_SUCCESS Returned if succesfully configured
*/
rocprofiler_status_t ROCPROFILER_API
rocprofiler_configure_agent_profile_counting_service(rocprofiler_context_id_t context_id,
rocprofiler_buffer_id_t buffer_id,
rocprofiler_profile_config_id_t config_id);
typedef rocprofiler_status_t (*rocprofiler_agent_set_profile_callback_t)(
rocprofiler_context_id_t context_id,
rocprofiler_profile_config_id_t config_id);
/**
* @brief Sample Profile Counting Service for agent.
* @brief Configure Profile Counting Service for agent. Called when the context is started.
* Selects the counters to be used for agent profiling.
*
* @param [out] data // It is always a size of one
* @return ::rocprofiler_status_t
* @param [in] context_id context id
* @param [in] agent_id agent id
* @param [in] set_config Function to call to set the profile config (see
* rocprofiler_agent_set_profile_callback_t)
* @param [in] user_data Data supplied to rocprofiler_configure_agent_profile_counting_service
*/
rocprofiler_status_t ROCPROFILER_API
rocprofiler_sample_agent_profile_counting_service(rocprofiler_context_id_t context_id);
typedef void (*rocprofiler_agent_profile_callback_t)(
rocprofiler_context_id_t context_id,
rocprofiler_agent_id_t agent_id,
rocprofiler_agent_set_profile_callback_t set_config,
void* user_data);
/**
* @brief Configure Profile Counting Service for agent. There may only be one agent profile
* configured per context and can be only one active context that is profiling a single agent
* at a time. Multiple agent contexts can be started at the same time if they are profiling
* different agents.
*
* @param [in] context_id context id
* @param [in] buffer_id id of the buffer to use for the counting service. When
* rocprofiler_sample_agent_profile_counting_service is called, counter data will be written
* to this buffer.
* @param [in] agent_id agent to configure profiling on.
* @param [in] cb Callback called when the context is started for the tool to specify what
* counters to collect (rocprofiler_profile_config_id_t).
* @param [in] user_data User supplied data to be passed to the callback cb when triggered
* @param [in] config_id Profile config detailing the counters to collect for this kernel
* @return ::rocprofiler_status_t
* @retval ::ROCPROFILER_STATUS_ERROR_CONTEXT_INVALID Returned if the context does not exist.
* @retval ::ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND Returned if the buffer is not found.
* @retval ::ROCPROFILER_STATUS_SUCCESS Returned if succesfully configured
*/
rocprofiler_status_t
rocprofiler_configure_agent_profile_counting_service(rocprofiler_context_id_t context_id,
rocprofiler_buffer_id_t buffer_id,
rocprofiler_agent_id_t agent_id,
rocprofiler_agent_profile_callback_t cb,
void* user_data)
ROCPROFILER_NONNULL(4) ROCPROFILER_API;
/**
* @brief Trigger a read of the counter data for the agent profile. The counter data will be
* written to the buffer specified in rocprofiler_configure_agent_profile_counting_service.
* The data in rocprofiler_user_data_t will be written to the buffer along with the counter data.
* flags can be used to specify if this call should be performed asynchronously (default is
* synchronous).
*
* @param [in] context_id context id
* @param [in] user_data User supplied data, included in records outputted to buffer.
* @param [in] flags Flags to specify how the counter data should be collected (defaults to sync).
* @return ::rocprofiler_status_t
* @retval ::ROCPROFILER_STATUS_ERROR_CONTEXT_INVALID Returned if the context does not exist or
* the context is not configured for agent profiling.
* @retval ::ROCPROFILER_STATUS_ERROR_CONTEXT_ERROR Returned if another operation is in progress (
* start/stop ctx or another read).
* @retval ::ROCPROFILER_STATUS_ERROR Returned if HSA has not been initialized yet.
* @retval ::ROCPROFILER_STATUS_SUCCESS Returned if read request was successful.
*/
rocprofiler_status_t
rocprofiler_sample_agent_profile_counting_service(rocprofiler_context_id_t context_id,
rocprofiler_user_data_t user_data,
rocprofiler_counter_flag_t flags) ROCPROFILER_API;
/** @} */
+16
Просмотреть файл
@@ -97,6 +97,11 @@ typedef enum // NOLINT(performance-enum-size)
ROCPROFILER_STATUS_ERROR_PROFILE_NOT_FOUND, ///< Could not find the counter profile
ROCPROFILER_STATUS_ERROR_AGENT_DISPATCH_CONFLICT, ///< Cannot enable both agent and dispatch
///< counting in the same context.
ROCPROFILER_STATUS_INTERNAL_NO_AGENT_CONTEXT, ///< No agent context found, may not be an error
ROCPROFILER_STATUS_ERROR_SAMPLE_RATE_EXCEEDED, ///< Sample rate exceeded
ROCPROFILER_STATUS_ERROR_NO_PROFILE_QUEUE, ///< Profile queue creation failed
ROCPROFILER_STATUS_ERROR_NO_HARDWARE_COUNTERS, ///< No hardware counters were specified
ROCPROFILER_STATUS_ERROR_AGENT_MISMATCH, ///< Agent mismatch between profile and context.
ROCPROFILER_STATUS_LAST,
} rocprofiler_status_t;
@@ -385,6 +390,16 @@ typedef enum
/// ::rocprofiler_profile_counting_dispatch_record_t
} rocprofiler_counter_record_kind_t;
/**
* @brief Enumeration of flags that can be used with some counter api calls
*/
typedef enum
{
ROCPROFILER_COUNTER_FLAG_NONE = 0,
ROCPROFILER_COUNTER_FLAG_ASYNC, ///< Do not wait for completion before returning.
ROCPROFILER_COUNTER_FLAG_LAST,
} rocprofiler_counter_flag_t;
//--------------------------------------------------------------------------------------//
//
// ALIASES
@@ -651,6 +666,7 @@ typedef struct
rocprofiler_counter_instance_id_t id; ///< counter identifier
double counter_value; ///< counter value
rocprofiler_dispatch_id_t dispatch_id;
rocprofiler_user_data_t user_data;
/// @var dispatch_id
/// @brief A value greater than zero indicates that this counter record is associated with a
+3 -3
Просмотреть файл
@@ -656,8 +656,8 @@ get_agent_topology()
auto&
get_agent_caches()
{
static auto _v = std::vector<hsa::AgentCache>{};
return _v;
static auto*& _v = common::static_object<std::vector<hsa::AgentCache>>::construct();
return *_v;
}
struct agent_pair
@@ -920,7 +920,7 @@ construct_agent_cache(::HsaApiTable* table)
try
{
get_agent_caches().emplace_back(
rocp_agent, hsa_agent, itr.first, _nearest_cpu, *table->amd_ext_);
rocp_agent, hsa_agent, itr.first, _nearest_cpu, *table->amd_ext_, *table->core_);
} catch(std::runtime_error& err)
{
if(rocp_agent->type == ROCPROFILER_AGENT_TYPE_GPU)
+19 -4
Просмотреть файл
@@ -22,14 +22,29 @@
#include <rocprofiler-sdk/rocprofiler.h>
#include "lib/rocprofiler-sdk/context/context.hpp"
#include "lib/rocprofiler-sdk/counters/agent_profiling.hpp"
#include "lib/rocprofiler-sdk/counters/core.hpp"
#include "rocprofiler-sdk/fwd.h"
extern "C" {
rocprofiler_status_t ROCPROFILER_API
rocprofiler_configure_agent_profile_counting_service(rocprofiler_context_id_t context_id,
rocprofiler_buffer_id_t buffer_id,
rocprofiler_profile_config_id_t config_id)
rocprofiler_configure_agent_profile_counting_service(rocprofiler_context_id_t context_id,
rocprofiler_buffer_id_t buffer_id,
rocprofiler_agent_id_t agent_id,
rocprofiler_agent_profile_callback_t cb,
void* user_data)
{
return rocprofiler::counters::configure_agent_collection(context_id, buffer_id, config_id);
return rocprofiler::counters::configure_agent_collection(
context_id, buffer_id, agent_id, cb, user_data);
}
rocprofiler_status_t ROCPROFILER_API
rocprofiler_sample_agent_profile_counting_service(rocprofiler_context_id_t context_id,
rocprofiler_user_data_t user_data,
rocprofiler_counter_flag_t flags)
{
return rocprofiler::counters::read_agent_ctx(
rocprofiler::context::get_registered_context(context_id), user_data, flags);
}
}
+50
Просмотреть файл
@@ -26,6 +26,7 @@
#include <rocprofiler-sdk/fwd.h>
#include "lib/common/logging.hpp"
#include "lib/common/synchronized.hpp"
#include "lib/common/utility.hpp"
#include "lib/rocprofiler-sdk/counters/id_decode.hpp"
@@ -111,5 +112,54 @@ get_dim_info(rocprofiler_agent_id_t agent,
return ROCPROFILER_STATUS_SUCCESS;
}
rocprofiler_status_t
set_profiler_active_on_queue(const AmdExtTable& api,
hsa_amd_memory_pool_t pool,
hsa_agent_t hsa_agent,
const rocprofiler_profile_pkt_cb& packet_submit)
{
// Inject packet to enable profiling of other process queues on this queue
hsa_ven_amd_aqlprofile_profile_t profile{};
profile.agent = hsa_agent;
// Query for cmd buffer size
hsa_ven_amd_aqlprofile_info_type_t info_type =
(hsa_ven_amd_aqlprofile_info_type_t)((int) HSA_VEN_AMD_AQLPROFILE_INFO_ENABLE_CMD);
if(hsa_ven_amd_aqlprofile_get_info(&profile, info_type, nullptr) != HSA_STATUS_SUCCESS)
{
return ROCPROFILER_STATUS_ERROR;
}
// Allocate cmd buffer
const size_t mask = 0x1000 - 1;
auto size = (profile.command_buffer.size + mask) & ~mask;
if(api.hsa_amd_memory_pool_allocate_fn(pool, size, 0, &profile.command_buffer.ptr) !=
HSA_STATUS_SUCCESS)
{
ROCP_WARNING << "Failed to allocate memory to enable profile command on agent, some "
"counters will be unavailable";
return ROCPROFILER_STATUS_ERROR;
}
if(api.hsa_amd_agents_allow_access_fn(1, &hsa_agent, nullptr, profile.command_buffer.ptr) !=
HSA_STATUS_SUCCESS)
{
ROCP_WARNING << "Agent cannot access memory, some counters will be unavailable";
return ROCPROFILER_STATUS_ERROR;
}
hsa::rocprofiler_packet packet{};
if(hsa_ven_amd_aqlprofile_get_info(&profile, info_type, &packet.ext_amd_aql_pm4) !=
HSA_STATUS_SUCCESS)
{
ROCP_WARNING << "Failed to generate command packet, some counters will be unavailable";
return ROCPROFILER_STATUS_ERROR;
}
packet_submit(packet);
api.hsa_amd_memory_pool_free_fn(profile.command_buffer.ptr);
return ROCPROFILER_STATUS_SUCCESS;
}
} // namespace aql
} // namespace rocprofiler
+8
Просмотреть файл
@@ -32,11 +32,13 @@
#include "lib/rocprofiler-sdk/agent.hpp"
#include "lib/rocprofiler-sdk/counters/metrics.hpp"
#include "lib/rocprofiler-sdk/hsa/rocprofiler_packet.hpp"
namespace rocprofiler
{
namespace aql
{
using rocprofiler_profile_pkt_cb = std::function<void(hsa::rocprofiler_packet)>;
// Query HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_ID from aqlprofile
hsa_ven_amd_aqlprofile_id_query_t
get_query_info(rocprofiler_agent_id_t agent, const counters::Metric& metric);
@@ -58,5 +60,11 @@ set_dim_id_from_sample(rocprofiler_counter_instance_id_t& id,
hsa_agent_t agent,
hsa_ven_amd_aqlprofile_event_t event,
uint32_t sample_id);
rocprofiler_status_t
set_profiler_active_on_queue(const AmdExtTable& api,
hsa_amd_memory_pool_t pool,
hsa_agent_t hsa_agent,
const rocprofiler_profile_pkt_cb& packet_submit);
} // namespace aql
} // namespace rocprofiler
+13 -3
Просмотреть файл
@@ -317,10 +317,13 @@ start_context(rocprofiler_context_id_t context_id)
return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_STARTED;
}
auto status = ROCPROFILER_STATUS_SUCCESS;
if(cfg->counter_collection) rocprofiler::counters::start_context(cfg);
if(cfg->thread_trace) cfg->thread_trace->start_context();
if(cfg->agent_counter_collection) status = rocprofiler::counters::start_agent_ctx(cfg);
return ROCPROFILER_STATUS_SUCCESS;
return status;
}
rocprofiler_status_t
@@ -344,9 +347,16 @@ stop_context(rocprofiler_context_id_t idx)
if(nactive > 0) get_num_active_contexts().fetch_sub(1, std::memory_order_release);
if(_expected->counter_collection)
{
rocprofiler::counters::stop_context(const_cast<context*>(_expected));
else if(_expected->thread_trace)
_expected->thread_trace->stop_context();
}
if(_expected->thread_trace) _expected->thread_trace->stop_context();
if(_expected->agent_counter_collection)
{
rocprofiler::counters::stop_agent_ctx(const_cast<context*>(_expected));
}
return ROCPROFILER_STATUS_SUCCESS;
}
}
+21 -3
Просмотреть файл
@@ -30,9 +30,11 @@
#include "lib/common/synchronized.hpp"
#include "lib/rocprofiler-sdk/context/correlation_id.hpp"
#include "lib/rocprofiler-sdk/context/domain.hpp"
#include "lib/rocprofiler-sdk/counters/agent_profiling.hpp"
#include "lib/rocprofiler-sdk/counters/core.hpp"
#include "lib/rocprofiler-sdk/external_correlation.hpp"
#include "lib/rocprofiler-sdk/thread_trace/att_core.hpp"
#include "rocprofiler-sdk/agent.h"
#include <array>
#include <cstddef>
@@ -86,12 +88,28 @@ struct dispatch_counter_collection_service
struct agent_counter_collection_service
{
rocprofiler::counters::agent_callback_data callback_data;
// Signal to manage the startup of the context. Allows us to ensure that
// the AQL packet we inject with start_context() completes before returning
hsa_signal_t start_signal;
std::shared_ptr<rocprofiler::counters::profile_config> profile;
rocprofiler_buffer_id_t buffer;
rocprofiler_agent_id_t agent_id;
rocprofiler_agent_profile_callback_t cb;
void* user_data;
// A flag to state wether or not the counter set is currently enabled. This is primarily
// to protect against multithreaded calls to enable a context (and enabling already enabled
// counters).
std::atomic<bool> enabled{false};
// to protect against multithreaded calls to enable a context (and enabling already
// enabled counters).
enum class state
{
DISABLED,
LOCKED,
ENABLED
};
std::atomic<state> status{state::DISABLED};
common::Synchronized<bool> enabled{false};
};
struct context
+6 -4
Просмотреть файл
@@ -1,7 +1,9 @@
set(ROCPROFILER_LIB_COUNTERS_SOURCES metrics.cpp dimensions.cpp evaluate_ast.cpp core.cpp
id_decode.cpp dispatch_handlers.cpp controller.cpp)
set(ROCPROFILER_LIB_COUNTERS_HEADERS metrics.hpp dimensions.hpp evaluate_ast.hpp core.hpp
id_decode.hpp dispatch_handlers.hpp controller.hpp)
set(ROCPROFILER_LIB_COUNTERS_SOURCES
metrics.cpp dimensions.cpp evaluate_ast.cpp core.cpp id_decode.cpp
dispatch_handlers.cpp controller.cpp agent_profiling.cpp)
set(ROCPROFILER_LIB_COUNTERS_HEADERS
metrics.hpp dimensions.hpp evaluate_ast.hpp core.hpp id_decode.hpp
dispatch_handlers.hpp controller.hpp agent_profiling.hpp)
target_sources(rocprofiler-object-library PRIVATE ${ROCPROFILER_LIB_COUNTERS_SOURCES}
${ROCPROFILER_LIB_COUNTERS_HEADERS})
+492
Просмотреть файл
@@ -0,0 +1,492 @@
// MIT License
//
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include "lib/rocprofiler-sdk/counters/agent_profiling.hpp"
#include <cstdint>
#include "lib/common/logging.hpp"
#include "lib/rocprofiler-sdk/buffer.hpp"
#include "lib/rocprofiler-sdk/context/context.hpp"
#include "lib/rocprofiler-sdk/counters/controller.hpp"
#include "lib/rocprofiler-sdk/counters/core.hpp"
#include "lib/rocprofiler-sdk/hsa/agent_cache.hpp"
#include "lib/rocprofiler-sdk/hsa/queue_controller.hpp"
#include "lib/rocprofiler-sdk/hsa/rocprofiler_packet.hpp"
#include "rocprofiler-sdk/fwd.h"
namespace rocprofiler
{
namespace counters
{
std::atomic<bool>&
hsa_inited()
{
static std::atomic<bool> inited{false};
return inited;
}
uint64_t
submitPacket(const CoreApiTable& table, hsa_queue_t* queue, const void* packet)
{
const uint32_t pkt_size = 0x40;
// advance command queue
const uint64_t write_idx = table.hsa_queue_add_write_index_scacq_screl_fn(queue, 1);
while((write_idx - table.hsa_queue_load_read_index_relaxed_fn(queue)) >= queue->size)
{
sched_yield();
}
const uint32_t slot_idx = (uint32_t)(write_idx % queue->size);
// NOLINTBEGIN(performance-no-int-to-ptr)
uint32_t* queue_slot =
reinterpret_cast<uint32_t*>((uintptr_t)(queue->base_address) + (slot_idx * pkt_size));
// NOLINTEND(performance-no-int-to-ptr)
const uint32_t* slot_data = reinterpret_cast<const uint32_t*>(packet);
// Copy buffered commands into the queue slot.
// Overwrite the AQL invalid header (first dword) last.
// This prevents the slot from being read until it's fully written.
memcpy(&queue_slot[1], &slot_data[1], pkt_size - sizeof(uint32_t));
std::atomic<uint32_t>* header_atomic_ptr =
reinterpret_cast<std::atomic<uint32_t>*>(&queue_slot[0]);
header_atomic_ptr->store(slot_data[0], std::memory_order_release);
// ringdoor bell
table.hsa_signal_store_relaxed_fn(queue->doorbell_signal, write_idx);
return write_idx;
}
namespace
{
uint16_t
header_pkt(hsa_packet_type_t type)
{
uint16_t header = type << HSA_PACKET_HEADER_TYPE;
header |= 1 << HSA_PACKET_HEADER_BARRIER;
header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE;
header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE;
return header;
}
std::unique_ptr<hsa::CounterAQLPacket>
construct_aql_pkt(const hsa::AgentCache& agent, std::shared_ptr<profile_config>& profile)
{
if(counter_callback_info::setup_profile_config(agent, profile) != ROCPROFILER_STATUS_SUCCESS)
{
return nullptr;
}
auto pkts = profile->pkt_generator->construct_packet(
CHECK_NOTNULL(hsa::get_queue_controller())->get_ext_table());
pkts->start.header = header_pkt(HSA_PACKET_TYPE_VENDOR_SPECIFIC);
pkts->start.completion_signal.handle = 0;
pkts->stop.header = header_pkt(HSA_PACKET_TYPE_VENDOR_SPECIFIC);
pkts->read.header = header_pkt(HSA_PACKET_TYPE_VENDOR_SPECIFIC);
return pkts;
}
bool
agent_async_handler(hsa_signal_value_t /*signal_v*/, void* data)
{
const auto* ctx = context::get_registered_context({.handle = (uint64_t) data});
if(!ctx) return false;
const auto& agent_ctx = *ctx->agent_counter_collection;
const auto& prof_config = agent_ctx.profile;
// Decode the AQL packet data
auto decoded_pkt =
EvaluateAST::read_pkt(prof_config->pkt_generator.get(), *agent_ctx.callback_data.packet);
EvaluateAST::read_special_counters(
*prof_config->agent, prof_config->required_special_counters, decoded_pkt);
auto* buf = buffer::get_buffer(agent_ctx.buffer.handle);
if(!buf)
{
ROCP_FATAL << fmt::format("Buffer {} destroyed before record was written",
agent_ctx.buffer.handle);
return false;
}
// Write out the AQL data to the buffer
for(auto& ast : prof_config->asts)
{
std::vector<std::unique_ptr<std::vector<rocprofiler_record_counter_t>>> cache;
auto* ret = CHECK_NOTNULL(ast.evaluate(decoded_pkt, cache));
ast.set_out_id(*ret);
for(auto& val : *ret)
{
val.user_data = agent_ctx.callback_data.user_data;
buf->emplace(
ROCPROFILER_BUFFER_CATEGORY_COUNTERS, ROCPROFILER_COUNTER_RECORD_VALUE, val);
}
}
// reset the signal to allow another sample to start
agent_ctx.callback_data.table.hsa_signal_store_relaxed_fn(agent_ctx.callback_data.completion,
1);
return true;
}
void
init_callback_data(const rocprofiler::context::context& ctx, const hsa::AgentCache& agent)
{
// Note: Calls to this function should be protected by agent_ctx.status being set
// to LOCKED by the caller. This is to prevent multiple threads from trying to
// setup the same agent at the same time.
auto& agent_ctx = *ctx.agent_counter_collection;
if(agent_ctx.callback_data.packet) return;
agent_ctx.callback_data.packet = construct_aql_pkt(agent, agent_ctx.profile);
if(agent_ctx.callback_data.completion.handle != 0) return;
// If we do not have a completion handle, this is our first time profiling this agent.
// Setup our shared data structures.
agent_ctx.callback_data.queue = agent.profile_queue();
agent_ctx.callback_data.table = CHECK_NOTNULL(hsa::get_queue_controller())->get_core_table();
// Tri-state signal
// 1: allow next sample to start
// 0: sample in progress
// -1: sample complete
CHECK_EQ(agent_ctx.callback_data.table.hsa_signal_create_fn(
1, 0, nullptr, &agent_ctx.callback_data.completion),
HSA_STATUS_SUCCESS);
// Signal to manage the startup of the context. Allows us to ensure that
// the AQL packet we inject with start_context() completes before returning
CHECK_EQ(
agent_ctx.callback_data.table.hsa_signal_create_fn(1, 0, nullptr, &agent_ctx.start_signal),
HSA_STATUS_SUCCESS);
// Setup callback
// NOLINTBEGIN(performance-no-int-to-ptr)
CHECK_EQ(CHECK_NOTNULL(hsa::get_queue_controller())
->get_ext_table()
.hsa_amd_signal_async_handler_fn(agent_ctx.callback_data.completion,
HSA_SIGNAL_CONDITION_LT,
0,
agent_async_handler,
(void*) ctx.context_idx),
HSA_STATUS_SUCCESS);
// NOLINTEND(performance-no-int-to-ptr)
// Set state of the queue to allow profiling (may not be needed since AQL
// may do this in the future).
aql::set_profiler_active_on_queue(
CHECK_NOTNULL(hsa::get_queue_controller())->get_ext_table(),
agent.cpu_pool(),
agent.get_hsa_agent(),
[&](hsa::rocprofiler_packet pkt) {
pkt.ext_amd_aql_pm4.completion_signal = agent_ctx.callback_data.completion;
submitPacket(
agent_ctx.callback_data.table, agent_ctx.callback_data.queue, (void*) &pkt);
if(agent_ctx.callback_data.table.hsa_signal_wait_relaxed_fn(
agent_ctx.callback_data.completion,
HSA_SIGNAL_CONDITION_EQ,
0,
20000000,
HSA_WAIT_STATE_ACTIVE) != 0)
{
ROCP_FATAL << "Could not set agent to be profiled";
}
agent_ctx.callback_data.table.hsa_signal_store_relaxed_fn(
agent_ctx.callback_data.completion, 1);
});
}
} // namespace
rocprofiler_status_t
read_agent_ctx(const context::context* ctx,
rocprofiler_user_data_t user_data,
rocprofiler_counter_flag_t flags)
{
if(!ctx->agent_counter_collection || !ctx->agent_counter_collection->profile)
{
if(!ctx->agent_counter_collection)
{
ROCP_ERROR << fmt::format("Context {} has no agent counter collection",
ctx->context_idx);
}
else
{
ROCP_ERROR << fmt::format("Context {} has no profile", ctx->context_idx);
}
return ROCPROFILER_STATUS_ERROR_CONTEXT_INVALID;
}
auto& agent_ctx = *ctx->agent_counter_collection;
if(hsa_inited().load() == false)
{
return ROCPROFILER_STATUS_ERROR;
}
const auto* agent = agent::get_agent_cache(agent_ctx.profile->agent);
// If the agent no longer exists or we don't have a profile queue, reading is an error
if(!agent || !agent->profile_queue()) return ROCPROFILER_STATUS_ERROR;
// Set the state to LOCKED to prevent other calls to start/stop/read.
auto expected = rocprofiler::context::agent_counter_collection_service::state::ENABLED;
if(!agent_ctx.status.compare_exchange_strong(
expected, rocprofiler::context::agent_counter_collection_service::state::LOCKED))
{
return ROCPROFILER_STATUS_ERROR_CONTEXT_ERROR;
}
CHECK(agent_ctx.callback_data.packet);
ROCP_TRACE << fmt::format("Agent Infor for Running Counter: Name = {}, XCC = {}, "
"SE = {}, CU = {}, SIMD = {}",
agent->get_rocp_agent()->name,
agent->get_rocp_agent()->num_xcc,
agent->get_rocp_agent()->num_shader_banks,
agent->get_rocp_agent()->cu_count,
agent->get_rocp_agent()->simd_arrays_per_engine);
// Remove when AQL is updated to not require stop to be called first
submitPacket(agent_ctx.callback_data.table,
agent->profile_queue(),
(void*) &agent_ctx.callback_data.packet->stop);
// Submit the read packet to the queue
submitPacket(agent_ctx.callback_data.table,
agent->profile_queue(),
(void*) &agent_ctx.callback_data.packet->read);
// Submit a barrier packet. This is needed to flush hardware caches. Without this
// the read packet may not have the correct data.
rocprofiler::hsa::rocprofiler_packet barrier{};
barrier.barrier_and.header = header_pkt(HSA_PACKET_TYPE_BARRIER_AND);
barrier.barrier_and.completion_signal = agent_ctx.callback_data.completion;
agent_ctx.callback_data.table.hsa_signal_store_relaxed_fn(agent_ctx.callback_data.completion,
0);
agent_ctx.callback_data.user_data = user_data;
submitPacket(
agent_ctx.callback_data.table, agent->profile_queue(), (void*) &barrier.barrier_and);
// Wait for the barrier/read packet to complete
if(flags != ROCPROFILER_COUNTER_FLAG_ASYNC)
{
// Wait for any inprogress samples to complete before returning
agent_ctx.callback_data.table.hsa_signal_wait_relaxed_fn(agent_ctx.callback_data.completion,
HSA_SIGNAL_CONDITION_EQ,
1,
UINT64_MAX,
HSA_WAIT_STATE_ACTIVE);
}
agent_ctx.status.exchange(
rocprofiler::context::agent_counter_collection_service::state::ENABLED);
return ROCPROFILER_STATUS_SUCCESS;
}
rocprofiler_status_t
start_agent_ctx(const context::context* ctx)
{
auto status = ROCPROFILER_STATUS_SUCCESS;
if(!ctx->agent_counter_collection)
{
return status;
}
auto& agent_ctx = *ctx->agent_counter_collection;
if(hsa_inited().load() == false)
{
return ROCPROFILER_STATUS_SUCCESS;
}
const auto* agent = agent::get_agent_cache(agent::get_agent(agent_ctx.agent_id));
// Note: we may not have an AgentCache yet if HSA is not started.
// This is not an error and the startup will happen on hsa registration.
if(!agent) return ROCPROFILER_STATUS_ERROR;
// But if we have an agent cache, we need a profile queue.
if(!agent->profile_queue())
{
return ROCPROFILER_STATUS_ERROR_NO_PROFILE_QUEUE;
}
// Set the state to LOCKED to prevent other calls to start/stop/read.
auto expected = rocprofiler::context::agent_counter_collection_service::state::DISABLED;
if(!agent_ctx.status.compare_exchange_strong(
expected, rocprofiler::context::agent_counter_collection_service::state::LOCKED))
{
return ROCPROFILER_STATUS_ERROR_SERVICE_ALREADY_CONFIGURED;
}
// Ask the tool what profile we should use for this agent
agent_ctx.cb(
{.handle = ctx->context_idx},
agent_ctx.agent_id,
[](rocprofiler_context_id_t context_id,
rocprofiler_profile_config_id_t config_id) -> rocprofiler_status_t {
auto* cb_ctx = rocprofiler::context::get_mutable_registered_context(context_id);
if(!cb_ctx) return ROCPROFILER_STATUS_ERROR_CONTEXT_INVALID;
auto config = rocprofiler::counters::get_profile_config(config_id);
if(!config) return ROCPROFILER_STATUS_ERROR_PROFILE_NOT_FOUND;
if(!cb_ctx->agent_counter_collection)
{
return ROCPROFILER_STATUS_ERROR_CONTEXT_INVALID;
}
// Only allow profiles to be set in the locked state
if(cb_ctx->agent_counter_collection->status.load() !=
rocprofiler::context::agent_counter_collection_service::state::LOCKED)
{
return ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED;
}
// Only update the profile if it has changed. Avoids packet regeneration.
if(!cb_ctx->agent_counter_collection->profile ||
cb_ctx->agent_counter_collection->profile->id.handle != config_id.handle)
{
if(cb_ctx->agent_counter_collection->agent_id.handle != config->agent->id.handle)
{
return ROCPROFILER_STATUS_ERROR_AGENT_MISMATCH;
}
cb_ctx->agent_counter_collection->profile = config;
cb_ctx->agent_counter_collection->callback_data.packet.reset();
}
return ROCPROFILER_STATUS_SUCCESS;
},
agent_ctx.user_data);
// User didn't set a profile
if(!agent_ctx.profile)
{
agent_ctx.status.exchange(
rocprofiler::context::agent_counter_collection_service::state::DISABLED);
return status;
}
// Generate necessary structures in the context (packet gen, etc) to process
// this packet.
init_callback_data(*ctx, *agent);
// No hardware counters were actually asked for (i.e. all constants)
if(agent_ctx.profile->reqired_hw_counters.empty())
{
agent_ctx.status.exchange(
rocprofiler::context::agent_counter_collection_service::state::DISABLED);
return ROCPROFILER_STATUS_ERROR_NO_HARDWARE_COUNTERS;
}
// We could not generate AQL packets for some reason
if(!agent_ctx.callback_data.packet)
{
agent_ctx.status.exchange(
rocprofiler::context::agent_counter_collection_service::state::DISABLED);
return ROCPROFILER_STATUS_ERROR_AST_GENERATION_FAILED;
}
agent_ctx.callback_data.packet->start.completion_signal = agent_ctx.start_signal;
agent_ctx.callback_data.table.hsa_signal_store_relaxed_fn(agent_ctx.start_signal, 1);
submitPacket(agent_ctx.callback_data.table,
agent->profile_queue(),
(void*) &agent_ctx.callback_data.packet->start);
// Wait for startup to finish before continuing
agent_ctx.callback_data.table.hsa_signal_wait_relaxed_fn(
agent_ctx.start_signal, HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_ACTIVE);
agent_ctx.status.exchange(
rocprofiler::context::agent_counter_collection_service::state::ENABLED);
return ROCPROFILER_STATUS_SUCCESS;
}
rocprofiler_status_t
stop_agent_ctx(const context::context* ctx)
{
auto status = ROCPROFILER_STATUS_SUCCESS;
if(!ctx->agent_counter_collection || !ctx->agent_counter_collection->profile)
{
return status;
}
auto& agent_ctx = *ctx->agent_counter_collection;
if(hsa_inited().load() == false)
{
return ROCPROFILER_STATUS_SUCCESS;
}
const auto* agent = agent::get_agent_cache(agent_ctx.profile->agent);
if(!agent || !agent->profile_queue()) return status;
auto expected = rocprofiler::context::agent_counter_collection_service::state::ENABLED;
if(!agent_ctx.status.compare_exchange_strong(
expected, rocprofiler::context::agent_counter_collection_service::state::LOCKED))
{
// Status is already stopped or being enabled elsewhere.
return ROCPROFILER_STATUS_SUCCESS;
}
CHECK(agent_ctx.callback_data.packet);
submitPacket(agent_ctx.callback_data.table,
agent->profile_queue(),
(void*) &agent_ctx.callback_data.packet->stop);
// Wait for any inprogress samples to complete before returning
agent_ctx.callback_data.table.hsa_signal_wait_relaxed_fn(agent_ctx.callback_data.completion,
HSA_SIGNAL_CONDITION_EQ,
1,
UINT64_MAX,
HSA_WAIT_STATE_ACTIVE);
return status;
}
// If we have ctx's that were started before HSA was initialized, we need to
// actually start those contexts now.
rocprofiler_status_t
agent_profile_hsa_registration()
{
hsa_inited().store(true);
for(auto& ctx : context::get_active_contexts())
{
if(!ctx->agent_counter_collection) continue;
start_agent_ctx(ctx);
}
return ROCPROFILER_STATUS_SUCCESS;
}
agent_callback_data::~agent_callback_data()
{
if(completion.handle != 0) table.hsa_signal_destroy_fn(completion);
}
} // namespace counters
} // namespace rocprofiler
+86
Просмотреть файл
@@ -0,0 +1,86 @@
// MIT License
//
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
#include <rocprofiler-sdk/fwd.h>
#include <rocprofiler-sdk/hsa.h>
#include <rocprofiler-sdk/rocprofiler.h>
#include "lib/rocprofiler-sdk/hsa/aql_packet.hpp"
namespace rocprofiler
{
namespace context
{
struct context;
}
namespace counters
{
struct agent_callback_data
{
CoreApiTable table;
hsa_queue_t* queue{nullptr};
std::unique_ptr<hsa::CounterAQLPacket> packet;
// Tri-state signal used to know what the current state of processing
// a sample is. The states are:
// 1: allow next sample to start (i.e. no in progress work)
// 0: sample in progress
// -1: sample complete (i.e. signal for caller that sample is ready)
hsa_signal_t completion{.handle = 0};
rocprofiler_user_data_t user_data{.value = 0};
~agent_callback_data();
};
// If we have contexts that are started before HSA init. This
// function will start those contexts. Should only be called
// as part of the HSA init process in rocprofiler.
rocprofiler_status_t
agent_profile_hsa_registration();
// Send the AQL start packet to a queue on the agent to start
// collecting counter data. This function is synchronous and will
// return when the agent has started collecting data (or if there
// is an error).
rocprofiler_status_t
start_agent_ctx(const context::context* ctx);
// Send the AQL end packet to a queue on the agent to stop
// collecting counter data. This function is synchronous and will
// return when the agent has stopped collecting data (or if there
// is an error).
rocprofiler_status_t
stop_agent_ctx(const context::context* ctx);
// Read the counter data from the agent. This function is synchronous
// if flags is not set to ASYNC. If ASYNC is set, the function will
// return before data has been written to the buffer. Overlapping
// read calls are not allowed in ASYNC mode and will result in
// this call waiting for the previous sample to complete.
rocprofiler_status_t
read_agent_ctx(const context::context* ctx,
rocprofiler_user_data_t user_data,
rocprofiler_counter_flag_t flags);
} // namespace counters
} // namespace rocprofiler
+10 -9
Просмотреть файл
@@ -64,9 +64,11 @@ CounterController::destroy_profile(uint64_t id)
}
rocprofiler_status_t
CounterController::configure_agent_collection(rocprofiler_context_id_t context_id,
rocprofiler_buffer_id_t buffer,
rocprofiler_profile_config_id_t config_id)
CounterController::configure_agent_collection(rocprofiler_context_id_t context_id,
rocprofiler_buffer_id_t buffer_id,
rocprofiler_agent_id_t agent_id,
rocprofiler_agent_profile_callback_t cb,
void* user_data)
{
auto* ctx_p = rocprofiler::context::get_mutable_registered_context(context_id);
if(!ctx_p) return ROCPROFILER_STATUS_ERROR_CONTEXT_INVALID;
@@ -74,13 +76,10 @@ CounterController::configure_agent_collection(rocprofiler_context_id_t co
auto& ctx = *ctx_p;
if(ctx.counter_collection) return ROCPROFILER_STATUS_ERROR_AGENT_DISPATCH_CONFLICT;
if(!rocprofiler::buffer::get_buffer(buffer.handle))
if(!rocprofiler::buffer::get_buffer(buffer_id.handle))
{
return ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND;
}
auto cfg = get_profile_cfg(config_id);
if(!cfg) return ROCPROFILER_STATUS_ERROR_PROFILE_NOT_FOUND;
if(!ctx.agent_counter_collection)
{
@@ -88,8 +87,10 @@ CounterController::configure_agent_collection(rocprofiler_context_id_t co
std::make_unique<rocprofiler::context::agent_counter_collection_service>();
}
ctx.agent_counter_collection->profile = cfg;
ctx.agent_counter_collection->buffer = buffer;
ctx.agent_counter_collection->agent_id = agent_id;
ctx.agent_counter_collection->cb = cb;
ctx.agent_counter_collection->user_data = user_data;
ctx.agent_counter_collection->buffer = buffer_id;
return ROCPROFILER_STATUS_SUCCESS;
}
+5 -3
Просмотреть файл
@@ -87,9 +87,11 @@ public:
void* record_callback_args);
std::shared_ptr<profile_config> get_profile_cfg(rocprofiler_profile_config_id_t id);
rocprofiler_status_t configure_agent_collection(rocprofiler_context_id_t context_id,
rocprofiler_buffer_id_t buffer,
rocprofiler_profile_config_id_t config_id);
static rocprofiler_status_t configure_agent_collection(rocprofiler_context_id_t context_id,
rocprofiler_buffer_id_t buffer_id,
rocprofiler_agent_id_t agent_id,
rocprofiler_agent_profile_callback_t cb,
void* user_data);
private:
rocprofiler::common::Synchronized<std::unordered_map<uint64_t, std::shared_ptr<profile_config>>>
+7 -4
Просмотреть файл
@@ -207,11 +207,14 @@ stop_context(const context::context* ctx)
}
rocprofiler_status_t
configure_agent_collection(rocprofiler_context_id_t context_id,
rocprofiler_buffer_id_t buffer_id,
rocprofiler_profile_config_id_t config_id)
configure_agent_collection(rocprofiler_context_id_t context_id,
rocprofiler_buffer_id_t buffer_id,
rocprofiler_agent_id_t agent_id,
rocprofiler_agent_profile_callback_t cb,
void* user_data)
{
return get_controller().configure_agent_collection(context_id, buffer_id, config_id);
return get_controller().configure_agent_collection(
context_id, buffer_id, agent_id, cb, user_data);
}
rocprofiler_status_t
+5 -3
Просмотреть файл
@@ -94,9 +94,11 @@ configure_callback_dispatch(rocprofiler_context_id_t con
void* record_callback_args);
rocprofiler_status_t
configure_agent_collection(rocprofiler_context_id_t context_id,
rocprofiler_buffer_id_t buffer_id,
rocprofiler_profile_config_id_t config_id);
configure_agent_collection(rocprofiler_context_id_t context_id,
rocprofiler_buffer_id_t buffer_id,
rocprofiler_agent_id_t agent_id,
rocprofiler_agent_profile_callback_t cb,
void* user_data);
void
start_context(const context::context*);
+20 -10
Просмотреть файл
@@ -57,7 +57,8 @@ get_reduce_op_type_from_string(const std::string& op)
std::vector<rocprofiler_record_counter_t>*
perform_reduction(ReduceOperation reduce_op, std::vector<rocprofiler_record_counter_t>* input_array)
{
rocprofiler_record_counter_t result{.id = 0, .counter_value = 0, .dispatch_id = 0};
rocprofiler_record_counter_t result{
.id = 0, .counter_value = 0, .dispatch_id = 0, .user_data = {.value = 0}};
if(input_array->empty()) return input_array;
switch(reduce_op)
{
@@ -83,12 +84,14 @@ perform_reduction(ReduceOperation reduce_op, std::vector<rocprofiler_record_coun
result = std::accumulate(
input_array->begin(),
input_array->end(),
rocprofiler_record_counter_t{.id = 0, .counter_value = 0, .dispatch_id = 0},
rocprofiler_record_counter_t{
.id = 0, .counter_value = 0, .dispatch_id = 0, .user_data = {.value = 0}},
[](auto& a, auto& b) {
return rocprofiler_record_counter_t{
.id = a.id,
.counter_value = a.counter_value + b.counter_value,
.dispatch_id = a.dispatch_id};
.dispatch_id = a.dispatch_id,
.user_data = {.value = 0}};
});
break;
}
@@ -97,12 +100,14 @@ perform_reduction(ReduceOperation reduce_op, std::vector<rocprofiler_record_coun
result = std::accumulate(
input_array->begin(),
input_array->end(),
rocprofiler_record_counter_t{.id = 0, .counter_value = 0, .dispatch_id = 0},
rocprofiler_record_counter_t{
.id = 0, .counter_value = 0, .dispatch_id = 0, .user_data = {.value = 0}},
[](auto& a, auto& b) {
return rocprofiler_record_counter_t{
.id = a.id,
.counter_value = a.counter_value + b.counter_value,
.dispatch_id = a.dispatch_id};
.dispatch_id = a.dispatch_id,
.user_data = {.value = 0}};
});
result.counter_value /= input_array->size();
break;
@@ -219,7 +224,8 @@ EvaluateAST::EvaluateAST(rocprofiler_counter_id_t out_id,
_raw_value = std::get<int64_t>(ast.value);
_static_value.push_back({.id = 0,
.counter_value = static_cast<double>(std::get<int64_t>(ast.value)),
.dispatch_id = 0});
.dispatch_id = 0,
.user_data = {.value = 0}});
}
for(const auto& nextAst : ast.counter_set)
@@ -596,28 +602,32 @@ EvaluateAST::evaluate(
return rocprofiler_record_counter_t{
.id = a.id,
.counter_value = a.counter_value + b.counter_value,
.dispatch_id = a.dispatch_id};
.dispatch_id = a.dispatch_id,
.user_data = {.value = 0}};
});
case SUBTRACTION_NODE:
return perform_op([](auto& a, auto& b) {
return rocprofiler_record_counter_t{
.id = a.id,
.counter_value = a.counter_value - b.counter_value,
.dispatch_id = a.dispatch_id};
.dispatch_id = a.dispatch_id,
.user_data = {.value = 0}};
});
case MULTIPLY_NODE:
return perform_op([](auto& a, auto& b) {
return rocprofiler_record_counter_t{
.id = a.id,
.counter_value = a.counter_value * b.counter_value,
.dispatch_id = a.dispatch_id};
.dispatch_id = a.dispatch_id,
.user_data = {.value = 0}};
});
case DIVIDE_NODE:
return perform_op([](auto& a, auto& b) {
return rocprofiler_record_counter_t{
.id = a.id,
.counter_value = (b.counter_value == 0 ? 0 : a.counter_value / b.counter_value),
.dispatch_id = a.dispatch_id};
.dispatch_id = a.dispatch_id,
.user_data = {.value = 0}};
});
case REFERENCE_NODE:
{
+45 -3
Просмотреть файл
@@ -2,12 +2,54 @@ rocprofiler_deactivate_clang_tidy()
include(GoogleTest)
set(ROCPROFILER_LIB_COUNTER_TEST_SOURCES metrics_test.cpp evaluate_ast_test.cpp
dimension.cpp init_order.cpp core.cpp)
find_program(
amdclangpp_EXECUTABLE REQUIRED
NAMES amdclang++
HINTS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm
PATHS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm
PATH_SUFFIXES bin llvm/bin NO_CACHE)
function(generate_hsaco TARGET_ID INPUT_FILE OUTPUT_FILE)
separate_arguments(
CLANG_ARG_LIST
UNIX_COMMAND
"-O2 -x cl -Xclang -finclude-default-header -cl-denorms-are-zero -cl-std=CL2.0 -Wl,--build-id=sha1
-target amdgcn-amd-amdhsa -mcpu=${TARGET_ID} -o ${OUTPUT_FILE} ${INPUT_FILE}")
add_custom_command(
OUTPUT ${PROJECT_BINARY_DIR}/${OUTPUT_FILE}
COMMAND ${amdclangpp_EXECUTABLE} ${CLANG_ARG_LIST}
COMMAND
${CMAKE_COMMAND} -E copy
${PROJECT_BINARY_DIR}/source/lib/rocprofiler-sdk/counters/tests/${OUTPUT_FILE}
${CMAKE_BINARY_DIR}/bin/${OUTPUT_FILE}
OUTPUT ${CMAKE_BINARY_DIR}/bin/${OUTPUT_FILE}
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_BINARY_DIR}/bin/${OUTPUT_FILE}
${CMAKE_BINARY_DIR}/bin/${OUTPUT_FILE}
COMMENT "Building ${OUTPUT_FILE}...")
set(HSACO_TARGET_LIST
${HSACO_TARGET_LIST} ${PROJECT_BINARY_DIR}/${OUTPUT_FILE}
PARENT_SCOPE)
endfunction(generate_hsaco)
foreach(target_id ${GPU_TARGETS})
# generate kernel bitcodes
generate_hsaco(${target_id} ${CMAKE_CURRENT_SOURCE_DIR}/agent_kernels.cl
${target_id}_agent_kernels.hsaco)
endforeach()
add_custom_target(agent_hasco_targets DEPENDS ${HSACO_TARGET_LIST})
set(ROCPROFILER_LIB_COUNTER_TEST_SOURCES
metrics_test.cpp evaluate_ast_test.cpp dimension.cpp init_order.cpp core.cpp
code_object_loader.cpp agent_profiling.cpp)
set(ROCPROFILER_LIB_COUNTER_TEST_HEADERS code_object_loader.hpp agent_profiling.hpp)
add_executable(counter-test)
target_sources(counter-test PRIVATE ${ROCPROFILER_LIB_COUNTER_TEST_SOURCES})
target_sources(counter-test PRIVATE ${ROCPROFILER_LIB_COUNTER_TEST_SOURCES}
${ROCPROFILER_LIB_COUNTER_TEST_HEADERS})
add_dependencies(counter-test agent_hasco_targets)
target_link_libraries(
counter-test
+31
Просмотреть файл
@@ -0,0 +1,31 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
__kernel void
null_kernel()
{
int gid = (int)get_global_id(0);
int i = 0;
while (gid >= 0) {
gid = (int)get_global_id(0);
i++;
if (i > 1000) break;
}
}
+483
Просмотреть файл
@@ -0,0 +1,483 @@
// MIT License
//
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include "lib/rocprofiler-sdk/counters/tests/agent_profiling.hpp"
#include "lib/common/logging.hpp"
#include "lib/rocprofiler-sdk/counters/tests/code_object_loader.hpp"
#include "lib/common/filesystem.hpp"
#include "lib/common/utility.hpp"
#include "lib/rocprofiler-sdk/agent.hpp"
#include "lib/rocprofiler-sdk/context/context.hpp"
#include "lib/rocprofiler-sdk/counters/core.hpp"
#include "lib/rocprofiler-sdk/counters/dispatch_handlers.hpp"
#include "lib/rocprofiler-sdk/counters/metrics.hpp"
#include "lib/rocprofiler-sdk/hsa/agent_cache.hpp"
#include "lib/rocprofiler-sdk/hsa/queue.hpp"
#include "lib/rocprofiler-sdk/hsa/queue_controller.hpp"
#include "lib/rocprofiler-sdk/registration.hpp"
#include "rocprofiler-sdk/buffer.h"
#include <rocprofiler-sdk/dispatch_profile.h>
#include <rocprofiler-sdk/fwd.h>
#include <rocprofiler-sdk/registration.h>
#include <rocprofiler-sdk/rocprofiler.h>
#include <fmt/core.h>
#include <gtest/gtest.h>
#include <hsa/hsa.h>
#include <hsa/hsa_api_trace.h>
#include <hsa/hsa_ext_amd.h>
#include <cstddef>
#include <cstdint>
#include <sstream>
#include <tuple>
using namespace rocprofiler::counters::testing;
using namespace rocprofiler::counters;
using namespace rocprofiler;
#define ROCPROFILER_CALL(result, msg) \
{ \
rocprofiler_status_t CHECKSTATUS = result; \
if(CHECKSTATUS != ROCPROFILER_STATUS_SUCCESS) \
{ \
std::string status_msg = rocprofiler_get_status_string(CHECKSTATUS); \
std::cerr << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " << msg \
<< " failed with error code " << CHECKSTATUS << ": " << status_msg \
<< std::endl; \
std::stringstream errmsg{}; \
errmsg << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " << msg " failure (" \
<< status_msg << ")"; \
ASSERT_EQ(CHECKSTATUS, ROCPROFILER_STATUS_SUCCESS) << errmsg.str(); \
} \
}
namespace
{
AmdExtTable&
get_ext_table()
{
static auto _v = []() {
auto val = AmdExtTable{};
val.hsa_amd_memory_pool_get_info_fn = hsa_amd_memory_pool_get_info;
val.hsa_amd_agent_iterate_memory_pools_fn = hsa_amd_agent_iterate_memory_pools;
val.hsa_amd_memory_pool_allocate_fn = hsa_amd_memory_pool_allocate;
val.hsa_amd_memory_pool_free_fn = hsa_amd_memory_pool_free;
val.hsa_amd_agent_memory_pool_get_info_fn = hsa_amd_agent_memory_pool_get_info;
val.hsa_amd_agents_allow_access_fn = hsa_amd_agents_allow_access;
val.hsa_amd_queue_set_priority_fn = hsa_amd_queue_set_priority;
val.hsa_amd_signal_async_handler_fn = hsa_amd_signal_async_handler;
return val;
}();
return _v;
}
CoreApiTable&
get_api_table()
{
static auto _v = []() {
auto val = CoreApiTable{};
val.hsa_iterate_agents_fn = hsa_iterate_agents;
val.hsa_agent_get_info_fn = hsa_agent_get_info;
val.hsa_queue_create_fn = hsa_queue_create;
val.hsa_queue_destroy_fn = hsa_queue_destroy;
val.hsa_signal_create_fn = hsa_signal_create;
val.hsa_signal_destroy_fn = hsa_signal_destroy;
val.hsa_signal_store_screlease_fn = hsa_signal_store_screlease;
val.hsa_signal_load_scacquire_fn = hsa_signal_load_scacquire;
val.hsa_signal_add_relaxed_fn = hsa_signal_add_relaxed;
val.hsa_signal_subtract_relaxed_fn = hsa_signal_subtract_relaxed;
val.hsa_signal_wait_relaxed_fn = hsa_signal_wait_relaxed;
val.hsa_queue_create_fn = hsa_queue_create;
val.hsa_queue_add_write_index_scacq_screl_fn = hsa_queue_add_write_index_scacq_screl;
val.hsa_queue_load_read_index_relaxed_fn = hsa_queue_load_read_index_relaxed;
val.hsa_signal_store_relaxed_fn = hsa_signal_store_relaxed;
val.hsa_signal_load_relaxed_fn = hsa_signal_load_relaxed;
return val;
}();
return _v;
}
auto
findDeviceMetrics(const hsa::AgentCache& agent, const std::unordered_set<std::string>& metrics)
{
std::vector<counters::Metric> ret;
auto all_counters = counters::getMetricMap();
ROCP_ERROR << "Looking up counters for " << std::string(agent.name());
auto gfx_metrics = common::get_val(*all_counters, std::string(agent.name()));
if(!gfx_metrics)
{
ROCP_ERROR << "No counters found for " << std::string(agent.name());
return ret;
}
for(auto& counter : *gfx_metrics)
{
if(metrics.count(counter.name()) > 0 || metrics.empty())
{
ret.push_back(counter);
}
}
ROCP_ERROR << "No counters found for " << std::string(agent.name());
return ret;
}
void
test_init()
{
HsaApiTable table;
table.amd_ext_ = &get_ext_table();
table.core_ = &get_api_table();
agent::construct_agent_cache(&table);
ASSERT_TRUE(hsa::get_queue_controller() != nullptr);
hsa::get_queue_controller()->init(get_api_table(), get_ext_table());
}
std::vector<rocprofiler_record_counter_t>&
global_recs()
{
static std::vector<rocprofiler_record_counter_t> recs;
return recs;
}
void
check_output_created(rocprofiler_context_id_t,
rocprofiler_buffer_id_t,
rocprofiler_record_header_t** headers,
size_t num_headers,
void* user_data,
uint64_t)
{
// verifies that we got a record containing some data for a counter
// does NOT validate the counters values.
if(user_data == nullptr) return;
uint64_t found_value = 0;
for(size_t i = 0; i < num_headers; ++i)
{
auto* header = headers[i];
if(header->category == ROCPROFILER_BUFFER_CATEGORY_COUNTERS &&
header->kind == ROCPROFILER_COUNTER_RECORD_PROFILE_COUNTING_DISPATCH_HEADER)
{}
else if(header->category == ROCPROFILER_BUFFER_CATEGORY_COUNTERS &&
header->kind == ROCPROFILER_COUNTER_RECORD_VALUE)
{
// Print the returned counter data.
auto* record = static_cast<rocprofiler_record_counter_t*>(header->payload);
if(found_value != 0 && found_value != record->user_data.value)
{
ROCP_FATAL << "Have records with different user data values we didn't expect";
break;
}
found_value = record->user_data.value;
// ROCP_ERROR << fmt::format("Found counter value: {}", record->counter_value);
global_recs().push_back(*record);
}
}
auto* signal = reinterpret_cast<hsa_signal_t*>(user_data);
hsa_signal_store_relaxed(*signal, static_cast<int64_t>(found_value));
}
struct test_kernels
{
CodeObject obj;
test_kernels(const rocprofiler::hsa::AgentCache& agent)
{
CHECK(agent.get_rocp_agent());
// Getting hasco Path
std::string hasco_file_path =
std::string(agent.get_rocp_agent()->name) + std::string("_agent_kernels.hsaco");
search_hasco(common::filesystem::current_path(), hasco_file_path);
CHECK_EQ(load_code_object(hasco_file_path, agent.get_hsa_agent(), obj), HSA_STATUS_SUCCESS);
}
uint64_t load_kernel(const rocprofiler::hsa::AgentCache& agent,
const std::string& kernel_name) const
{
Kernel kern;
CHECK_EQ(get_kernel(obj, kernel_name, agent.get_hsa_agent(), kern), HSA_STATUS_SUCCESS);
return kern.handle;
}
};
uint16_t
packet_header(hsa_packet_type_t type)
{
uint16_t header = type << HSA_PACKET_HEADER_TYPE;
header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE;
header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE;
return header;
}
rocprofiler::hsa::rocprofiler_packet
gen_kernel_pkt(uint64_t obj)
{
rocprofiler::hsa::rocprofiler_packet packet{};
memset(((uint8_t*) &packet.kernel_dispatch) + 4, 0, sizeof(hsa_kernel_dispatch_packet_t) - 4);
packet.kernel_dispatch.setup = 1;
packet.kernel_dispatch.header = packet_header(HSA_PACKET_TYPE_KERNEL_DISPATCH);
packet.kernel_dispatch.workgroup_size_x = 1;
packet.kernel_dispatch.workgroup_size_y = 1;
packet.kernel_dispatch.workgroup_size_z = 1;
packet.kernel_dispatch.grid_size_x = 1;
packet.kernel_dispatch.grid_size_y = 1;
packet.kernel_dispatch.grid_size_z = 1;
packet.kernel_dispatch.kernel_object = obj;
packet.kernel_dispatch.kernarg_address = nullptr;
packet.kernel_dispatch.completion_signal.handle = 0;
ROCP_ERROR << fmt::format("{:x}", packet.kernel_dispatch.kernel_object);
return packet;
}
uint64_t
submitPacket(hsa_queue_t* queue, const void* packet)
{
const uint32_t slot_size_b = 0x40;
// advance command queue
const uint64_t write_idx = hsa_queue_add_write_index_scacq_screl(queue, 1);
while((write_idx - hsa_queue_load_read_index_relaxed(queue)) >= queue->size)
{
sched_yield();
}
const uint32_t slot_idx = (uint32_t)(write_idx % queue->size);
// NOLINTBEGIN(performance-no-int-to-ptr)
uint32_t* queue_slot =
reinterpret_cast<uint32_t*>((uintptr_t)(queue->base_address) + (slot_idx * slot_size_b));
const uint32_t* slot_data = reinterpret_cast<const uint32_t*>(packet);
// Copy buffered commands into the queue slot.
// Overwrite the AQL invalid header (first dword) last.
// This prevents the slot from being read until it's fully written.
memcpy(&queue_slot[1], &slot_data[1], slot_size_b - sizeof(uint32_t));
std::atomic<uint32_t>* header_atomic_ptr =
reinterpret_cast<std::atomic<uint32_t>*>(&queue_slot[0]);
// NOLINTEND(performance-no-int-to-ptr)
header_atomic_ptr->store(slot_data[0], std::memory_order_release);
// ringdoor bell
hsa_signal_store_relaxed(queue->doorbell_signal, write_idx);
return write_idx;
}
} // namespace
class agent_profile_test : public ::testing::Test
{
protected:
agent_profile_test() {}
static void test_run(rocprofiler_counter_flag_t flags = ROCPROFILER_COUNTER_FLAG_NONE,
const std::unordered_set<std::string>& test_metrics = {},
size_t delay = 1)
{
hsa_init();
registration::init_logging();
registration::set_init_status(-1);
context::push_client(1);
test_init();
counters::agent_profile_hsa_registration();
std::string kernel_name = "null_kernel";
ASSERT_TRUE(hsa::get_queue_controller() != nullptr);
ASSERT_GT(hsa::get_queue_controller()->get_supported_agents().size(), 0);
for(const auto& [_, agent] : hsa::get_queue_controller()->get_supported_agents())
{
auto metrics = findDeviceMetrics(agent, test_metrics);
ASSERT_FALSE(metrics.empty());
ASSERT_TRUE(agent.get_rocp_agent());
test_kernels kernel_loader(agent);
auto kernel_handle = kernel_loader.load_kernel(agent, kernel_name);
auto kernel_pkt = gen_kernel_pkt(kernel_handle);
hsa_queue_t* queue;
CHECK_EQ(hsa_queue_create(agent.get_hsa_agent(),
64,
HSA_QUEUE_TYPE_SINGLE,
nullptr,
nullptr,
UINT32_MAX,
UINT32_MAX,
&queue),
HSA_STATUS_SUCCESS);
rocprofiler::hsa::rocprofiler_packet barrier{};
hsa_signal_t completion_signal;
hsa_signal_create(1, 0, nullptr, &completion_signal);
barrier.barrier_and.header = packet_header(HSA_PACKET_TYPE_BARRIER_AND);
barrier.barrier_and.completion_signal = completion_signal;
hsa_signal_t found_data;
hsa_signal_create(0, 0, nullptr, &found_data);
size_t track_metric = 0;
for(auto& metric : metrics)
{
// global_recs().clear();
track_metric++;
ROCP_ERROR << "Testing metric " << metric.name();
rocprofiler_context_id_t ctx = {.handle = 0};
ROCPROFILER_CALL(rocprofiler_create_context(&ctx), "context creation failed");
rocprofiler_buffer_id_t opt_buff_id = {.handle = 0};
ROCPROFILER_CALL(rocprofiler_create_buffer(ctx,
500 * sizeof(size_t),
500 * sizeof(size_t),
ROCPROFILER_BUFFER_POLICY_LOSSLESS,
check_output_created,
&found_data,
&opt_buff_id),
"Could not create buffer");
/**
* Check profile construction
*/
rocprofiler_profile_config_id_t cfg_id = {};
rocprofiler_counter_id_t id = {.handle = metric.id()};
ROCPROFILER_CALL(
rocprofiler_create_profile_config(agent.get_rocp_agent()->id, &id, 1, &cfg_id),
"Unable to create profile");
ROCPROFILER_CALL(
rocprofiler_configure_agent_profile_counting_service(
ctx,
opt_buff_id,
agent.get_rocp_agent()->id,
[](rocprofiler_context_id_t context_id,
rocprofiler_agent_id_t,
rocprofiler_agent_set_profile_callback_t set_config,
void* user_data) {
CHECK(user_data);
if(auto status = set_config(
context_id,
*static_cast<rocprofiler_profile_config_id_t*>(user_data));
status != ROCPROFILER_STATUS_SUCCESS)
{
ROCP_FATAL << rocprofiler_get_status_string(status);
}
},
static_cast<void*>(&cfg_id)),
"Could not create agent collection");
// This queue will only be present if a context exists when AgentCache is
// construction This is a workaround for the test environment since we create
// contexts after AgentCache constructed.
agent::get_agent_cache(agent.get_rocp_agent())
->init_agent_profile_queue(get_api_table(), get_ext_table());
hsa_signal_store_screlease(completion_signal, 1);
hsa_signal_store_screlease(found_data, 0);
auto status = rocprofiler_start_context(ctx);
if(status == ROCPROFILER_STATUS_ERROR_NO_HARDWARE_COUNTERS)
{
ROCP_ERROR << fmt::format("No hardware counters for {}, skipping",
metric.name());
continue;
}
else if(status != ROCPROFILER_STATUS_SUCCESS)
{
ROCP_FATAL << "Failed to start context - "
<< rocprofiler_get_status_string(status);
}
ROCPROFILER_CALL(status, "Could not start context");
// Execute kernel
submitPacket(queue, &kernel_pkt);
submitPacket(queue, &kernel_pkt);
submitPacket(queue, &kernel_pkt);
submitPacket(queue, &kernel_pkt);
submitPacket(queue, &kernel_pkt);
submitPacket(queue, &barrier);
usleep(delay);
// Wait for completion
hsa_signal_wait_relaxed(completion_signal,
HSA_SIGNAL_CONDITION_EQ,
0,
UINT64_MAX,
HSA_WAIT_STATE_BLOCKED);
// Sample the counting service.
ROCPROFILER_CALL(rocprofiler_sample_agent_profile_counting_service(
ctx, {.value = track_metric}, flags),
"Could not sample");
ROCPROFILER_CALL(rocprofiler_stop_context(ctx), "Could not stop context");
rocprofiler_flush_buffer(opt_buff_id);
if(hsa_signal_wait_relaxed(found_data,
HSA_SIGNAL_CONDITION_EQ,
track_metric,
20000000,
HSA_WAIT_STATE_BLOCKED) !=
static_cast<int64_t>(track_metric))
{
ROCP_FATAL << "Failed to get data for " << metric.name();
}
}
hsa_signal_destroy(completion_signal);
hsa_signal_destroy(found_data);
hsa_queue_destroy(queue);
}
registration::set_init_status(1);
context::pop_client(1);
}
};
TEST_F(agent_profile_test, sync_counters) { test_run(); }
TEST_F(agent_profile_test, async_counters) { test_run(ROCPROFILER_COUNTER_FLAG_ASYNC); }
TEST_F(agent_profile_test, sync_grbm_verify)
{
test_run(ROCPROFILER_COUNTER_FLAG_NONE, {"GRBM_COUNT"}, 50000);
ROCP_ERROR << global_recs().size();
for(const auto& val : global_recs())
{
rocprofiler_counter_id_t id;
rocprofiler_query_record_counter_id(val.id, &id);
rocprofiler_counter_info_v0_t info;
rocprofiler_query_counter_info(id, ROCPROFILER_COUNTER_INFO_VERSION_0, &info);
ROCP_ERROR << fmt::format("Name: {} Counter value: {}", info.name, val.counter_value);
EXPECT_GT(val.counter_value, 0.0);
}
}
TEST_F(agent_profile_test, sync_gpu_util_verify)
{
test_run(ROCPROFILER_COUNTER_FLAG_NONE, {"GPU_UTIL"}, 50000);
ROCP_ERROR << global_recs().size();
for(const auto& val : global_recs())
{
rocprofiler_counter_id_t id;
rocprofiler_query_record_counter_id(val.id, &id);
rocprofiler_counter_info_v0_t info;
rocprofiler_query_counter_info(id, ROCPROFILER_COUNTER_INFO_VERSION_0, &info);
ROCP_ERROR << fmt::format("Name: {} Counter value: {}", info.name, val.counter_value);
EXPECT_GT(val.counter_value, 0.0);
}
}
+23
Просмотреть файл
@@ -0,0 +1,23 @@
// MIT License
//
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
+106
Просмотреть файл
@@ -0,0 +1,106 @@
// MIT License
//
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include "lib/rocprofiler-sdk/counters/tests/code_object_loader.hpp"
#include <fcntl.h>
#include "lib/common/logging.hpp"
namespace rocprofiler
{
namespace counters
{
namespace testing
{
hsa_status_t
load_code_object(const std::string& filename, hsa_agent_t agent, CodeObject& code_object)
{
hsa_status_t err;
code_object.file = open(filename.c_str(), O_RDONLY);
if(code_object.file == -1)
{
ROCP_FATAL << "Could not load code object " << filename;
}
err = hsa_code_object_reader_create_from_file(code_object.file, &code_object.code_obj_rdr);
if(err != HSA_STATUS_SUCCESS) return err;
err = hsa_executable_create_alt(HSA_PROFILE_FULL,
HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
nullptr,
&code_object.executable);
if(err != HSA_STATUS_SUCCESS) return err;
err = hsa_executable_load_agent_code_object(
code_object.executable, agent, code_object.code_obj_rdr, nullptr, nullptr);
if(err != HSA_STATUS_SUCCESS) return err;
err = hsa_executable_freeze(code_object.executable, nullptr);
return err;
}
hsa_status_t
get_kernel(const CodeObject& code_object,
const std::string& kernel,
hsa_agent_t agent,
Kernel& kern)
{
hsa_executable_symbol_t symbol;
hsa_status_t err =
hsa_executable_get_symbol_by_name(code_object.executable, kernel.c_str(), &agent, &symbol);
if(err != HSA_STATUS_SUCCESS)
{
err = hsa_executable_get_symbol_by_name(
code_object.executable, (kernel + ".kd").c_str(), &agent, &symbol);
if(err != HSA_STATUS_SUCCESS)
{
return err;
}
}
ROCP_INFO << "kernel-name: " << kernel.c_str() << "\n";
err = hsa_executable_symbol_get_info(
symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &kern.handle);
return err;
}
void
search_hasco(const common::filesystem::path& directory, std::string& filename)
{
for(const auto& entry : common::filesystem::directory_iterator(directory))
{
if(common::filesystem::is_regular_file(entry))
{
if(entry.path().filename() == filename)
{
filename = entry.path();
}
}
else if(common::filesystem::is_directory(entry))
{
search_hasco(entry, filename); // Recursive call for subdirectories
}
}
}
} // namespace testing
} // namespace counters
} // namespace rocprofiler
+65
Просмотреть файл
@@ -0,0 +1,65 @@
// MIT License
//
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
#include <hsa/hsa.h>
#include <hsa/hsa_ext_amd.h>
#include <string>
#include "lib/common/filesystem.hpp"
namespace rocprofiler
{
namespace counters
{
namespace testing
{
struct CodeObject
{
hsa_file_t file = 0;
hsa_code_object_reader_t code_obj_rdr = {};
hsa_executable_t executable = {};
};
hsa_status_t
load_code_object(const std::string& filename, hsa_agent_t agent, CodeObject& code_object);
struct Kernel
{
uint64_t handle = 0;
uint32_t scratch = 0;
uint32_t group = 0;
uint32_t kernarg_size = 0;
uint32_t kernarg_align = 0;
};
hsa_status_t
get_kernel(const CodeObject& code_object,
const std::string& kernel,
hsa_agent_t agent,
Kernel& kern);
void
search_hasco(const common::filesystem::path& directory, std::string& filename);
} // namespace testing
} // namespace counters
} // namespace rocprofiler
-51
Просмотреть файл
@@ -747,54 +747,3 @@ TEST(core, public_api_iterate_agents)
EXPECT_TRUE(from_api.empty());
}
}
TEST(core, init_agent_collection)
{
ASSERT_EQ(hsa_init(), HSA_STATUS_SUCCESS);
registration::init_logging();
registration::set_init_status(-1);
context::push_client(1);
ROCPROFILER_CALL(rocprofiler_create_context(&get_client_ctx()), "context creation failed");
auto agents = hsa::get_queue_controller()->get_supported_agents();
rocprofiler_buffer_id_t opt_buff_id = {.handle = 0};
ROCPROFILER_CALL(rocprofiler_create_buffer(get_client_ctx(),
500 * sizeof(size_t),
500 * sizeof(size_t),
ROCPROFILER_BUFFER_POLICY_LOSSLESS,
null_buffered_callback,
nullptr,
&opt_buff_id),
"Could not create buffer");
for(const auto& [_, agent] : agents)
{
auto metrics = findDeviceMetrics(agent, {});
ASSERT_FALSE(metrics.empty());
ASSERT_TRUE(agent.get_rocp_agent());
for(auto& metric : metrics)
{
expected_dispatch expected = {};
rocprofiler_counter_id_t id = {.handle = metric.id()};
ROCPROFILER_CALL(
rocprofiler_create_profile_config(agent.get_rocp_agent()->id, &id, 1, &expected.id),
"Unable to create profile");
ROCPROFILER_CALL(rocprofiler_configure_agent_profile_counting_service(
get_client_ctx(), opt_buff_id, expected.id),
"Could not create agent collection");
{
auto cfg = counters::get_profile_config(expected.id);
auto* ctx = rocprofiler::context::get_mutable_registered_context(get_client_ctx());
ASSERT_TRUE(ctx);
ASSERT_TRUE(ctx->agent_counter_collection);
EXPECT_EQ(ctx->agent_counter_collection->profile, cfg);
EXPECT_EQ(ctx->agent_counter_collection->buffer.handle, opt_buff_id.handle);
}
ROCPROFILER_CALL(rocprofiler_destroy_profile_config(expected.id),
"Could not delete profile id");
}
}
rocprofiler_destroy_buffer(opt_buff_id);
registration::set_init_status(1);
context::pop_client(1);
}
+63 -38
Просмотреть файл
@@ -646,17 +646,22 @@ TEST(evaluate_ast, counter_reduction_sum)
sum_vec(base_counter_data["KRUEGER"])),
2},
{"KRAMER",
plus_vec(times_vec(
std::vector<rocprofiler_record_counter_t>{
{.id = 0, .counter_value = 5.0, .dispatch_id = 0}},
sum_vec(base_counter_data["VOORHEES"])),
sum_vec(base_counter_data["KRUEGER"])),
plus_vec(
times_vec(std::vector<rocprofiler_record_counter_t>{{.id = 0,
.counter_value = 5.0,
.dispatch_id = 0,
.user_data = {.value = 0}}},
sum_vec(base_counter_data["VOORHEES"])),
sum_vec(base_counter_data["KRUEGER"])),
2},
{"GHOSTFACE",
plus_vec(sum_vec(base_counter_data["VOORHEES"]),
divide_vec(sum_vec(base_counter_data["KRUEGER"]),
std::vector<rocprofiler_record_counter_t>{
{.id = 0, .counter_value = 5.0, .dispatch_id = 0}})),
divide_vec(
sum_vec(base_counter_data["KRUEGER"]),
std::vector<rocprofiler_record_counter_t>{{.id = 0,
.counter_value = 5.0,
.dispatch_id = 0,
.user_data = {.value = 0}}})),
2},
};
@@ -727,17 +732,22 @@ TEST(evaluate_ast, counter_reduction_min)
min_vec(base_counter_data["KRUEGER"])),
2},
{"KRAMER",
plus_vec(times_vec(
std::vector<rocprofiler_record_counter_t>{
{.id = 0, .counter_value = 5.0, .dispatch_id = 0}},
min_vec(base_counter_data["VOORHEES"])),
min_vec(base_counter_data["KRUEGER"])),
plus_vec(
times_vec(std::vector<rocprofiler_record_counter_t>{{.id = 0,
.counter_value = 5.0,
.dispatch_id = 0,
.user_data = {.value = 0}}},
min_vec(base_counter_data["VOORHEES"])),
min_vec(base_counter_data["KRUEGER"])),
2},
{"GHOSTFACE",
plus_vec(min_vec(base_counter_data["VOORHEES"]),
divide_vec(min_vec(base_counter_data["KRUEGER"]),
std::vector<rocprofiler_record_counter_t>{
{.id = 0, .counter_value = 5.0, .dispatch_id = 0}})),
divide_vec(
min_vec(base_counter_data["KRUEGER"]),
std::vector<rocprofiler_record_counter_t>{{.id = 0,
.counter_value = 5.0,
.dispatch_id = 0,
.user_data = {.value = 0}}})),
2},
};
@@ -808,17 +818,22 @@ TEST(evaluate_ast, counter_reduction_max)
max_vec(base_counter_data["KRUEGER"])),
2},
{"KRAMER",
plus_vec(times_vec(
std::vector<rocprofiler_record_counter_t>{
{.id = 0, .counter_value = 5.0, .dispatch_id = 0}},
max_vec(base_counter_data["VOORHEES"])),
max_vec(base_counter_data["KRUEGER"])),
plus_vec(
times_vec(std::vector<rocprofiler_record_counter_t>{{.id = 0,
.counter_value = 5.0,
.dispatch_id = 0,
.user_data = {.value = 0}}},
max_vec(base_counter_data["VOORHEES"])),
max_vec(base_counter_data["KRUEGER"])),
2},
{"GHOSTFACE",
plus_vec(max_vec(base_counter_data["VOORHEES"]),
divide_vec(max_vec(base_counter_data["KRUEGER"]),
std::vector<rocprofiler_record_counter_t>{
{.id = 0, .counter_value = 5.0, .dispatch_id = 0}})),
divide_vec(
max_vec(base_counter_data["KRUEGER"]),
std::vector<rocprofiler_record_counter_t>{{.id = 0,
.counter_value = 5.0,
.dispatch_id = 0,
.user_data = {.value = 0}}})),
2},
};
@@ -891,17 +906,22 @@ TEST(evaluate_ast, counter_reduction_avg)
avg_vec(base_counter_data["KRUEGER"])),
2},
{"KRAMER",
plus_vec(times_vec(
std::vector<rocprofiler_record_counter_t>{
{.id = 0, .counter_value = 5.0, .dispatch_id = 0}},
avg_vec(base_counter_data["VOORHEES"])),
avg_vec(base_counter_data["KRUEGER"])),
plus_vec(
times_vec(std::vector<rocprofiler_record_counter_t>{{.id = 0,
.counter_value = 5.0,
.dispatch_id = 0,
.user_data = {.value = 0}}},
avg_vec(base_counter_data["VOORHEES"])),
avg_vec(base_counter_data["KRUEGER"])),
2},
{"GHOSTFACE",
plus_vec(avg_vec(base_counter_data["VOORHEES"]),
divide_vec(avg_vec(base_counter_data["KRUEGER"]),
std::vector<rocprofiler_record_counter_t>{
{.id = 0, .counter_value = 5.0, .dispatch_id = 0}})),
divide_vec(
avg_vec(base_counter_data["KRUEGER"]),
std::vector<rocprofiler_record_counter_t>{{.id = 0,
.counter_value = 5.0,
.dispatch_id = 0,
.user_data = {.value = 0}}})),
2},
};
@@ -962,18 +982,23 @@ TEST(evaluate_ast, evaluate_mixed_counters)
{"BATES",
times_vec(
std::vector<rocprofiler_record_counter_t>{
{.id = 0, .counter_value = 32, .dispatch_id = 0}},
{.id = 0, .counter_value = 32, .dispatch_id = 0, .user_data = {.value = 0}}},
sum_vec(base_counter_data["VOORHEES"])),
2},
{"KRAMER",
times_vec(sum_vec(base_counter_data["KRUEGER"]),
std::vector<rocprofiler_record_counter_t>{
{.id = 0, .counter_value = 8.0 / 5.0, .dispatch_id = 0}}),
std::vector<rocprofiler_record_counter_t>{{.id = 0,
.counter_value = 8.0 / 5.0,
.dispatch_id = 0,
.user_data = {.value = 0}}}),
3},
{"TORRANCE",
times_vec(sum_vec(base_counter_data["KRUEGER"]),
std::vector<rocprofiler_record_counter_t>{
{.id = 0, .counter_value = 104.0 / (156.0 * 8.0), .dispatch_id = 0}}),
times_vec(
sum_vec(base_counter_data["KRUEGER"]),
std::vector<rocprofiler_record_counter_t>{{.id = 0,
.counter_value = 104.0 / (156.0 * 8.0),
.dispatch_id = 0,
.user_data = {.value = 0}}}),
4},
};
+6 -1
Просмотреть файл
@@ -166,7 +166,12 @@ static const std::unordered_map<std::string, std::vector<std::vector<std::string
static const std::unordered_map<std::string, std::vector<std::vector<std::string>>> derived_gfx908 =
{{"gfx908",
{{"SQ_WAVES_sum",
{{"GPU_UTIL",
"",
"",
"100*GRBM_GUI_ACTIVE/GRBM_COUNT",
"Percentage of the time that GUI is active"},
{"SQ_WAVES_sum",
"",
"",
"reduce(SQ_WAVES,sum)",
+5 -1
Просмотреть файл
@@ -48,7 +48,7 @@
<metric name="TCC_EA_WRREQ_sum" expr=reduce(TCC_EA_WRREQ,sum) descr="Number of transactions (either 32-byte or 64-byte) going over the TC_EA_wrreq interface. Sum over TCC instances."></metric>
<metric name="TCC_EA_WRREQ_64B_sum" expr=reduce(TCC_EA_WRREQ_64B,sum) descr="Number of 64-byte transactions going (64-byte write or CMPSWAP) over the TC_EA_wrreq interface. Sum over TCC instances."></metric>
<metric name="TCC_WRREQ_STALL_max" expr=reduce(TCC_EA_WRREQ_STALL,max) descr="Number of cycles a write request was stalled. Max over TCC instances."></metric>
<metric name="GPU_UTIL" expr=100*GRBM_GUI_ACTIVE/GRBM_COUNT descr="Percentage of the time that GUI is active"></metric>
<metric name="TCP_TCP_TA_DATA_STALL_CYCLES_sum" expr=reduce(TCP_TCP_TA_DATA_STALL_CYCLES,sum) descr="Total number of TCP stalls TA data interface."></metric>
<metric name="TCP_TCP_TA_DATA_STALL_CYCLES_max" expr=reduce(TCP_TCP_TA_DATA_STALL_CYCLES,max) descr="Maximum number of TCP stalls TA data interface."></metric>
@@ -425,6 +425,7 @@
<metric name="TOTAL_16_OPS" expr=(SQ_INSTS_VALU_FMA_F16*2+SQ_INSTS_VALU_ADD_F16+SQ_INSTS_VALU_MUL_F16+SQ_INSTS_VALU_TRANS_F16)*64+((SQ_INSTS_VALU_MFMA_MOPS_F16+SQ_INSTS_VALU_MFMA_MOPS_BF16)*512) descr="The number of 16 bits OPS executed"></metric>
<metric name="TOTAL_32_OPS" expr=(SQ_INSTS_VALU_FMA_F32*2+SQ_INSTS_VALU_INT32+SQ_INSTS_VALU_ADD_F32+SQ_INSTS_VALU_MUL_F32+SQ_INSTS_VALU_TRANS_F32)*64+(SQ_INSTS_VALU_MFMA_MOPS_F32*512) descr="The number of 32 bits OPS executed"></metric>
<metric name="TOTAL_64_OPS" expr=(SQ_INSTS_VALU_FMA_F64*2+SQ_INSTS_VALU_INT64+SQ_INSTS_VALU_ADD_F64+SQ_INSTS_VALU_MUL_F64)*64+(SQ_INSTS_VALU_MFMA_MOPS_F64*512) descr="The number of 64 bits OPS executed"></metric>
<metric name="GPU_UTIL" expr=100*GRBM_GUI_ACTIVE/GRBM_COUNT descr="Percentage of the time that GUI is active"></metric>
</gfx940>
<gfx10>
@@ -469,6 +470,9 @@
<gfx1031 base="gfx10">
</gfx1031>
<gfx1010 base="gfx10">
</gfx1010>
<gfx1032 base="gfx10">
</gfx1032>
+41 -1
Просмотреть файл
@@ -26,6 +26,8 @@
#include <fmt/core.h>
#include <stdexcept>
#include "lib/rocprofiler-sdk/context/context.hpp"
namespace
{
// This function checks to see if the provided
@@ -123,11 +125,48 @@ namespace rocprofiler
{
namespace hsa
{
void
AgentCache::init_agent_profile_queue(const CoreApiTable& api, const AmdExtTable& ext) const
{
static std::mutex m_mutex;
std::lock_guard<std::mutex> lock(m_mutex);
using context = rocprofiler::context::context;
const auto* agent_ctx = []() -> const context* {
for(auto& ctx : rocprofiler::context::get_registered_contexts())
{
if(ctx->agent_counter_collection) return ctx;
}
return nullptr;
}();
if(!agent_ctx || m_profile_queue) return;
ROCP_ERROR << "Creating Profile Queue";
// create the queue and set it to high_priority
CHECK(api.hsa_queue_create_fn) << "no hsa_queue_create_fn in api table";
auto status = api.hsa_queue_create_fn(get_hsa_agent(),
64,
HSA_QUEUE_TYPE_SINGLE,
nullptr,
nullptr,
UINT32_MAX,
UINT32_MAX,
&m_profile_queue);
if(status != HSA_STATUS_SUCCESS && status != HSA_STATUS_INFO_BREAK)
{
throw std::runtime_error("Error: Queue is not initialized");
}
CHECK(ext.hsa_amd_queue_set_priority_fn) << "no hsa_amd_queue_set_priority_fn in api table";
ext.hsa_amd_queue_set_priority_fn(m_profile_queue, HSA_AMD_QUEUE_PRIORITY_HIGH);
}
AgentCache::AgentCache(const rocprofiler_agent_t* rocp_agent,
hsa_agent_t hsa_agent,
size_t index,
hsa_agent_t nearest_cpu,
const AmdExtTable& ext_table)
const AmdExtTable& ext_table,
const CoreApiTable& api)
: m_rocp_agent{rocp_agent}
, m_index{index}
, m_hsa_agent{hsa_agent}
@@ -139,6 +178,7 @@ AgentCache::AgentCache(const rocprofiler_agent_t* rocp_agent,
{
init_cpu_pool(ext_table, *this);
init_gpu_pool(ext_table, *this);
init_agent_profile_queue(api, ext_table);
} catch(std::runtime_error& e)
{
ROCP_WARNING << fmt::format(
+6 -2
Просмотреть файл
@@ -52,7 +52,8 @@ public:
hsa_agent_t hsa_agent,
size_t index,
hsa_agent_t nearest_cpu,
const AmdExtTable& ext_table);
const AmdExtTable& ext_table,
const CoreApiTable& api);
~AgentCache() = default;
AgentCache(const AgentCache&) = default;
AgentCache(AgentCache&&) noexcept = default;
@@ -67,10 +68,12 @@ public:
CONST_NONCONST_ACCESSOR(hsa_agent_t, get_hsa_agent, m_hsa_agent);
CONST_NONCONST_ACCESSOR(hsa_agent_t, near_cpu, m_nearest_cpu);
hsa_queue_t* profile_queue() const { return m_profile_queue; }
const rocprofiler_agent_t* get_rocp_agent() const { return m_rocp_agent; }
std::string_view name() const { return m_name; }
size_t index() const { return m_index; }
void init_agent_profile_queue(const CoreApiTable& api, const AmdExtTable& ext) const;
bool operator==(const rocprofiler_agent_t*) const;
bool operator==(hsa_agent_t) const;
@@ -88,7 +91,8 @@ private:
hsa_amd_memory_pool_t m_kernarg_pool{.handle = 0};
hsa_amd_memory_pool_t m_gpu_pool{.handle = 0};
std::string_view m_name = {};
std::string_view m_name = {};
mutable hsa_queue_t* m_profile_queue = {};
};
inline bool
+3
Просмотреть файл
@@ -748,6 +748,9 @@ rocprofiler_set_api_table(const char* name,
// need to construct agent mappings before initializing the queue controller
rocprofiler::agent::construct_agent_cache(hsa_api_table);
rocprofiler::hsa::queue_controller_init(hsa_api_table);
// Process agent ctx's that were started prior to HSA init
rocprofiler::counters::agent_profile_hsa_registration();
rocprofiler::hsa::async_copy_init(hsa_api_table, lib_instance);
rocprofiler::code_object::initialize(hsa_api_table);
+11 -1
Просмотреть файл
@@ -96,7 +96,17 @@ ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_ERROR_PROFILE_NOT_FOUND,
ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_ERROR_AGENT_DISPATCH_CONFLICT,
"Cannot have both an agent counter collection and a dispatch counter "
"in the same context")
ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_INTERNAL_NO_AGENT_CONTEXT,
"No context has agent profiling enabled, "
"error generally not returned to tools")
ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_ERROR_SAMPLE_RATE_EXCEEDED,
"A sample is in progress and a new sample cannot be started")
ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_ERROR_NO_PROFILE_QUEUE,
"No profile queue is available for this agent")
ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_ERROR_NO_HARDWARE_COUNTERS,
"Counter set does not include any hardware counters")
ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_ERROR_AGENT_MISMATCH,
"Counter profile agent does not match the agent in the context")
template <size_t Idx, size_t... Tail>
const char*
get_status_name(rocprofiler_status_t status, std::index_sequence<Idx, Tail...>)