HSA queue file structure reorganization (#136)
* Update lib/rocprofiler/hsa/queue.*
- split AQLPacket, QueueController into their own headers and impls
- added some aliases to queue-related classes to get data on single line
- added default initializers for class member variables
* Update source/lib/rocprofiler includes after hsa queue reorg
[ROCm/rocprofiler-sdk commit: 0f523fbad1]
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
267954fdd3
Коммит
925d1ba1b5
@@ -14,6 +14,7 @@
|
||||
#include "lib/rocprofiler/counters/metrics.hpp"
|
||||
#include "lib/rocprofiler/hsa/agent_cache.hpp"
|
||||
#include "lib/rocprofiler/hsa/queue.hpp"
|
||||
#include "lib/rocprofiler/hsa/queue_controller.hpp"
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
#include "lib/rocprofiler/counters/metrics.hpp"
|
||||
#include "lib/rocprofiler/hsa/agent_cache.hpp"
|
||||
#include "lib/rocprofiler/hsa/queue.hpp"
|
||||
#include "lib/rocprofiler/hsa/queue_controller.hpp"
|
||||
|
||||
extern "C" {
|
||||
/**
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
#include "lib/rocprofiler/aql/helpers.hpp"
|
||||
#include "lib/rocprofiler/aql/packet_construct.hpp"
|
||||
#include "lib/rocprofiler/context/context.hpp"
|
||||
#include "lib/rocprofiler/hsa/queue_controller.hpp"
|
||||
#include "lib/rocprofiler/registration.hpp"
|
||||
|
||||
#include <rocprofiler/rocprofiler.h>
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
set(ROCPROFILER_LIB_HSA_SOURCES hsa.cpp queue.cpp agent_cache.cpp)
|
||||
set(ROCPROFILER_LIB_HSA_SOURCES hsa.cpp queue.cpp queue_controller.cpp agent_cache.cpp)
|
||||
set(ROCPROFILER_LIB_HSA_HEADERS hsa.hpp defines.hpp types.hpp utils.hpp queue.hpp
|
||||
agent_cache.hpp)
|
||||
queue_controller.hpp agent_cache.hpp aql_packet.hpp)
|
||||
target_sources(rocprofiler-object-library PRIVATE ${ROCPROFILER_LIB_HSA_SOURCES}
|
||||
${ROCPROFILER_LIB_HSA_HEADERS})
|
||||
|
||||
|
||||
@@ -0,0 +1,67 @@
|
||||
// Copyright (c) 2018-2023 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <hsa/hsa_ext_amd.h>
|
||||
#include <hsa/hsa_ven_amd_aqlprofile.h>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace hsa
|
||||
{
|
||||
constexpr hsa_ext_amd_aql_pm4_packet_t null_amd_aql_pm4_packet = {
|
||||
.header = 0,
|
||||
.pm4_command = {0},
|
||||
.completion_signal = {.handle = 0}};
|
||||
|
||||
/**
|
||||
* Struct containing AQL packet information. Including start/stop/read
|
||||
* packets along with allocated buffers
|
||||
*/
|
||||
struct AQLPacket
|
||||
{
|
||||
using memory_pool_free_func_t = decltype(::hsa_amd_memory_pool_free)*;
|
||||
|
||||
AQLPacket(memory_pool_free_func_t func);
|
||||
~AQLPacket();
|
||||
|
||||
// Keep move constuctors (i.e. std::move())
|
||||
AQLPacket(AQLPacket&& other) = default;
|
||||
AQLPacket& operator=(AQLPacket&& other) = default;
|
||||
|
||||
// Do not allow copying this class
|
||||
AQLPacket(const AQLPacket&) = delete;
|
||||
AQLPacket& operator=(const AQLPacket&) = delete;
|
||||
|
||||
hsa_ven_amd_aqlprofile_profile_t profile = {};
|
||||
hsa_ext_amd_aql_pm4_packet_t start = null_amd_aql_pm4_packet;
|
||||
hsa_ext_amd_aql_pm4_packet_t stop = null_amd_aql_pm4_packet;
|
||||
hsa_ext_amd_aql_pm4_packet_t read = null_amd_aql_pm4_packet;
|
||||
bool command_buf_mallocd = false;
|
||||
bool output_buffer_malloced = false;
|
||||
memory_pool_free_func_t free_func = nullptr;
|
||||
};
|
||||
|
||||
inline AQLPacket::AQLPacket(memory_pool_free_func_t func)
|
||||
: free_func{func}
|
||||
{}
|
||||
} // namespace hsa
|
||||
} // namespace rocprofiler
|
||||
@@ -21,6 +21,9 @@
|
||||
#include "lib/rocprofiler/hsa/queue.hpp"
|
||||
|
||||
#include <glog/logging.h>
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <thread>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
@@ -89,24 +92,6 @@ AddVendorSpecificPacket(const hsa_ext_amd_aql_pm4_packet_t& packet,
|
||||
{
|
||||
transformed_packets.emplace_back(packet).completion_signal = packet_completion_signal;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void
|
||||
Queue::signal_async_handler(const hsa_signal_t& signal, Queue::queue_info_session_t* data) const
|
||||
{
|
||||
hsa_status_t status = _ext_api.hsa_amd_signal_async_handler_fn(
|
||||
signal, HSA_SIGNAL_CONDITION_EQ, 0, AsyncSignalHandler, static_cast<void*>(data));
|
||||
LOG_IF(FATAL, status != HSA_STATUS_SUCCESS && status != HSA_STATUS_INFO_BREAK)
|
||||
<< "Error: hsa_amd_signal_async_handler failed";
|
||||
}
|
||||
|
||||
void
|
||||
Queue::create_signal(uint32_t attribute, hsa_signal_t* signal) const
|
||||
{
|
||||
hsa_status_t status = _ext_api.hsa_amd_signal_create_fn(1, 0, nullptr, attribute, signal);
|
||||
LOG_IF(FATAL, status != HSA_STATUS_SUCCESS && status != HSA_STATUS_INFO_BREAK)
|
||||
<< "Error: hsa_amd_signal_create failed";
|
||||
}
|
||||
|
||||
template <typename Integral = uint64_t>
|
||||
constexpr Integral
|
||||
@@ -258,6 +243,55 @@ WriteInterceptor(const void* packets,
|
||||
|
||||
writer(transformed_packets.data(), transformed_packets.size());
|
||||
}
|
||||
} // namespace
|
||||
|
||||
AQLPacket::~AQLPacket()
|
||||
{
|
||||
if(!command_buf_mallocd)
|
||||
{
|
||||
free_func(profile.command_buffer.ptr);
|
||||
}
|
||||
else
|
||||
{
|
||||
free(profile.command_buffer.ptr);
|
||||
}
|
||||
|
||||
if(!output_buffer_malloced)
|
||||
{
|
||||
free_func(profile.output_buffer.ptr);
|
||||
}
|
||||
else
|
||||
{
|
||||
free(profile.output_buffer.ptr);
|
||||
}
|
||||
}
|
||||
|
||||
Queue::~Queue()
|
||||
{
|
||||
// Potentially replace with condition variable at some point
|
||||
// but performance may not matter here.
|
||||
while(_active_async_packets.load(std::memory_order_relaxed) > 0)
|
||||
{
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds{1});
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Queue::signal_async_handler(const hsa_signal_t& signal, Queue::queue_info_session_t* data) const
|
||||
{
|
||||
hsa_status_t status = _ext_api.hsa_amd_signal_async_handler_fn(
|
||||
signal, HSA_SIGNAL_CONDITION_EQ, 0, AsyncSignalHandler, static_cast<void*>(data));
|
||||
LOG_IF(FATAL, status != HSA_STATUS_SUCCESS && status != HSA_STATUS_INFO_BREAK)
|
||||
<< "Error: hsa_amd_signal_async_handler failed";
|
||||
}
|
||||
|
||||
void
|
||||
Queue::create_signal(uint32_t attribute, hsa_signal_t* signal) const
|
||||
{
|
||||
hsa_status_t status = _ext_api.hsa_amd_signal_create_fn(1, 0, nullptr, attribute, signal);
|
||||
LOG_IF(FATAL, status != HSA_STATUS_SUCCESS && status != HSA_STATUS_INFO_BREAK)
|
||||
<< "Error: hsa_amd_signal_create failed";
|
||||
}
|
||||
|
||||
Queue::Queue(const AgentCache& agent,
|
||||
uint32_t size,
|
||||
@@ -313,159 +347,5 @@ Queue::remove_callback(ClientID id)
|
||||
if(map.erase(id) == 1) _notifiers--;
|
||||
});
|
||||
}
|
||||
|
||||
void
|
||||
QueueController::add_queue(hsa_queue_t* id, std::unique_ptr<Queue> queue)
|
||||
{
|
||||
CHECK(queue);
|
||||
_callback_cache.wlock([&](auto& callbacks) {
|
||||
_queues.wlock([&](auto& map) {
|
||||
const auto agent_id = queue->get_agent().agent_t().id.handle;
|
||||
map[id] = std::move(queue);
|
||||
for(const auto& [cbid, cb_tuple] : callbacks)
|
||||
{
|
||||
auto& [agent, qcb, ccb] = cb_tuple;
|
||||
if(agent.id.handle == agent_id)
|
||||
{
|
||||
map[id]->register_callback(cbid, qcb, ccb);
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
void
|
||||
QueueController::destory_queue(hsa_queue_t* id)
|
||||
{
|
||||
_queues.wlock([&](auto& map) { map.erase(id); });
|
||||
}
|
||||
|
||||
ClientID
|
||||
QueueController::add_callback(const rocprofiler_agent_t& agent,
|
||||
Queue::QueueCB qcb,
|
||||
Queue::CompletedCB ccb)
|
||||
{
|
||||
static std::atomic<ClientID> client_id = 1;
|
||||
ClientID return_id;
|
||||
_callback_cache.wlock([&](auto& cb_cache) {
|
||||
return_id = client_id;
|
||||
cb_cache[client_id] = std::tuple(agent, qcb, ccb);
|
||||
client_id++;
|
||||
_queues.wlock([&](auto& map) {
|
||||
for(auto& [_, queue] : map)
|
||||
{
|
||||
if(queue->get_agent().agent_t().id.handle == agent.id.handle)
|
||||
{
|
||||
queue->register_callback(return_id, qcb, ccb);
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
return return_id;
|
||||
}
|
||||
|
||||
void
|
||||
QueueController::remove_callback(ClientID id)
|
||||
{
|
||||
_callback_cache.wlock([&](auto& cb_cache) {
|
||||
cb_cache.erase(id);
|
||||
_queues.wlock([&](auto& map) {
|
||||
for(auto& [_, queue] : map)
|
||||
{
|
||||
queue->remove_callback(id);
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// HSA Intercept Functions (create_queue/destroy_queue)
|
||||
hsa_status_t
|
||||
create_queue(hsa_agent_t agent,
|
||||
uint32_t size,
|
||||
hsa_queue_type32_t type,
|
||||
void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data),
|
||||
void* data,
|
||||
uint32_t private_segment_size,
|
||||
uint32_t group_segment_size,
|
||||
hsa_queue_t** queue)
|
||||
{
|
||||
for(const auto& [_, agent_info] : get_queue_controller().get_supported_agents())
|
||||
{
|
||||
if(agent_info.get_agent().handle == agent.handle)
|
||||
{
|
||||
auto new_queue = std::make_unique<Queue>(agent_info,
|
||||
size,
|
||||
type,
|
||||
callback,
|
||||
data,
|
||||
private_segment_size,
|
||||
group_segment_size,
|
||||
get_queue_controller().get_core_table(),
|
||||
get_queue_controller().get_ext_table(),
|
||||
queue);
|
||||
get_queue_controller().add_queue(*queue, std::move(new_queue));
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
}
|
||||
LOG(FATAL) << "Could not find agent - " << agent.handle;
|
||||
return HSA_STATUS_ERROR_FATAL;
|
||||
}
|
||||
|
||||
hsa_status_t
|
||||
destroy_queue(hsa_queue_t* hsa_queue)
|
||||
{
|
||||
get_queue_controller().destory_queue(hsa_queue);
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
void
|
||||
QueueController::Init(CoreApiTable& core_table, AmdExtTable& ext_table)
|
||||
{
|
||||
_core_table = core_table;
|
||||
_ext_table = ext_table;
|
||||
|
||||
core_table.hsa_queue_create_fn = create_queue;
|
||||
core_table.hsa_queue_destroy_fn = destroy_queue;
|
||||
|
||||
// Generate supported agents
|
||||
rocprofiler_query_available_agents(
|
||||
[](const rocprofiler_agent_t** agents, size_t num_agents, void* user_data) {
|
||||
CHECK(user_data);
|
||||
QueueController& queue = *reinterpret_cast<QueueController*>(user_data);
|
||||
for(size_t i = 0; i < num_agents; i++)
|
||||
{
|
||||
const auto& agent = *agents[i];
|
||||
if(agent.type != ROCPROFILER_AGENT_TYPE_GPU) continue;
|
||||
try
|
||||
{
|
||||
queue.get_supported_agents().emplace(
|
||||
i, AgentCache{agent, i, queue.get_core_table(), queue.get_ext_table()});
|
||||
} catch(std::runtime_error& error)
|
||||
{
|
||||
LOG(ERROR) << fmt::format("GPU Agent Construction Failed (HSA queue will not "
|
||||
"be intercepted): {} ({})",
|
||||
agent.id.handle,
|
||||
error.what());
|
||||
}
|
||||
}
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
},
|
||||
sizeof(rocprofiler_agent_t),
|
||||
this);
|
||||
}
|
||||
|
||||
QueueController&
|
||||
get_queue_controller()
|
||||
{
|
||||
static QueueController controller;
|
||||
return controller;
|
||||
}
|
||||
|
||||
void
|
||||
queue_controller_init(HsaApiTable* table)
|
||||
{
|
||||
get_queue_controller().Init(*table->core_, *table->amd_ext_);
|
||||
}
|
||||
|
||||
} // namespace hsa
|
||||
} // namespace rocprofiler
|
||||
|
||||
@@ -1,25 +1,37 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
// Copyright (c) 2018-2023 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <rocprofiler/fwd.h>
|
||||
|
||||
#include "lib/common/synchronized.hpp"
|
||||
#include "lib/rocprofiler/hsa/agent_cache.hpp"
|
||||
#include "lib/rocprofiler/hsa/aql_packet.hpp"
|
||||
|
||||
#include <hsa/amd_hsa_kernel_code.h>
|
||||
#include <hsa/hsa.h>
|
||||
#include <hsa/hsa_api_trace.h>
|
||||
#include <hsa/hsa_ext_amd.h>
|
||||
#include <hsa/hsa_ven_amd_loader.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <functional>
|
||||
#include <iostream>
|
||||
@@ -28,101 +40,52 @@
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include <hsa/amd_hsa_kernel_code.h>
|
||||
#include <hsa/hsa.h>
|
||||
#include <hsa/hsa_api_trace.h>
|
||||
#include <hsa/hsa_ext_amd.h>
|
||||
#include <hsa/hsa_ven_amd_aqlprofile.h>
|
||||
#include <hsa/hsa_ven_amd_loader.h>
|
||||
|
||||
#include <rocprofiler/fwd.h>
|
||||
#include "lib/common/synchronized.hpp"
|
||||
#include "lib/rocprofiler/hsa/agent_cache.hpp"
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace hsa
|
||||
{
|
||||
/**
|
||||
* Struct containing AQL packet information. Including start/stop/read
|
||||
* packets along with allocated buffers
|
||||
*/
|
||||
struct AQLPacket
|
||||
{
|
||||
hsa_ven_amd_aqlprofile_profile_t profile;
|
||||
hsa_ext_amd_aql_pm4_packet_t start{.header = 0,
|
||||
.pm4_command = {0},
|
||||
.completion_signal = {.handle = 0}};
|
||||
hsa_ext_amd_aql_pm4_packet_t stop{.header = 0,
|
||||
.pm4_command = {0},
|
||||
.completion_signal = {.handle = 0}};
|
||||
hsa_ext_amd_aql_pm4_packet_t read{.header = 0,
|
||||
.pm4_command = {0},
|
||||
.completion_signal = {.handle = 0}};
|
||||
bool command_buf_mallocd{false};
|
||||
bool output_buffer_malloced{false};
|
||||
std::function<decltype(hsa_amd_memory_pool_free)> free_func;
|
||||
AQLPacket(std::function<decltype(hsa_amd_memory_pool_free)> func)
|
||||
: free_func(std::move(func))
|
||||
{}
|
||||
|
||||
~AQLPacket()
|
||||
{
|
||||
if(!command_buf_mallocd)
|
||||
{
|
||||
free_func(profile.command_buffer.ptr);
|
||||
}
|
||||
else
|
||||
{
|
||||
free(profile.command_buffer.ptr);
|
||||
}
|
||||
|
||||
if(!output_buffer_malloced)
|
||||
{
|
||||
free_func(profile.output_buffer.ptr);
|
||||
}
|
||||
else
|
||||
{
|
||||
free(profile.output_buffer.ptr);
|
||||
}
|
||||
}
|
||||
|
||||
// Keep move constuctors (i.e. std::move())
|
||||
AQLPacket(AQLPacket&& other) = default;
|
||||
AQLPacket& operator=(AQLPacket&& other) = default;
|
||||
|
||||
// Do not allow copying this class
|
||||
AQLPacket(const AQLPacket&) = delete;
|
||||
AQLPacket& operator=(const AQLPacket&) = delete;
|
||||
};
|
||||
|
||||
using ClientID = int64_t;
|
||||
|
||||
// Interceptor for a single specific queue
|
||||
class Queue
|
||||
{
|
||||
public:
|
||||
using callback_t = void (*)(hsa_status_t status, hsa_queue_t* source, void* data);
|
||||
// Function prototype used to notify consumers that a kernel has been
|
||||
// enqueued. An AQL packet can be returned that will be injected into
|
||||
// the queue.
|
||||
using QueueCB = std::function<
|
||||
std::unique_ptr<AQLPacket>(const Queue&, ClientID, const hsa_ext_amd_aql_pm4_packet_t&)>;
|
||||
// Signals the completion of the kernel packet.
|
||||
using CompletedCB = std::function<void(const Queue&,
|
||||
ClientID,
|
||||
const hsa_ext_amd_aql_pm4_packet_t&,
|
||||
std::unique_ptr<AQLPacket>)>;
|
||||
using callback_map_t = std::unordered_map<ClientID, std::pair<QueueCB, CompletedCB>>;
|
||||
|
||||
// Internal session information that is used by write interceptor
|
||||
// to track state of the intercepted kernel.
|
||||
struct queue_info_session_t
|
||||
{
|
||||
Queue& queue;
|
||||
std::unique_ptr<AQLPacket> inst_pkt;
|
||||
ClientID inst_pkt_id;
|
||||
hsa_ext_amd_aql_pm4_packet_t kernel_pkt;
|
||||
hsa_signal_t interrupt_signal;
|
||||
std::unique_ptr<AQLPacket> inst_pkt = {};
|
||||
ClientID inst_pkt_id = 0;
|
||||
hsa_ext_amd_aql_pm4_packet_t kernel_pkt = null_amd_aql_pm4_packet;
|
||||
hsa_signal_t interrupt_signal = {};
|
||||
};
|
||||
|
||||
Queue(const AgentCache& agent,
|
||||
uint32_t size,
|
||||
hsa_queue_type32_t type,
|
||||
void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data),
|
||||
void* data,
|
||||
uint32_t private_segment_size,
|
||||
uint32_t group_segment_size,
|
||||
CoreApiTable core_api,
|
||||
AmdExtTable ext_api,
|
||||
hsa_queue_t** queue);
|
||||
callback_t callback,
|
||||
void* data,
|
||||
uint32_t private_segment_size,
|
||||
uint32_t group_segment_size,
|
||||
CoreApiTable core_api,
|
||||
AmdExtTable ext_api,
|
||||
hsa_queue_t** queue);
|
||||
|
||||
~Queue();
|
||||
|
||||
const hsa_queue_t* intercept_queue() const { return _intercept_queue; };
|
||||
const AgentCache& get_agent() const { return _agent; }
|
||||
@@ -150,25 +113,6 @@ public:
|
||||
void async_started() { _active_async_packets++; }
|
||||
void async_complete() { _active_async_packets--; }
|
||||
|
||||
~Queue()
|
||||
{
|
||||
// Potentially replace with condition variable at some point
|
||||
// but performance may not matter here.
|
||||
while(_active_async_packets > 0)
|
||||
{}
|
||||
}
|
||||
|
||||
// Function prototype used to notify consumers that a kernel has been
|
||||
// enqueued. An AQL packet can be returned that will be injected into
|
||||
// the queue.
|
||||
using QueueCB = std::function<
|
||||
std::unique_ptr<AQLPacket>(const Queue&, ClientID, const hsa_ext_amd_aql_pm4_packet_t&)>;
|
||||
// Signals the completion of the kernel packet.
|
||||
using CompletedCB = std::function<void(const Queue&,
|
||||
ClientID,
|
||||
const hsa_ext_amd_aql_pm4_packet_t&,
|
||||
std::unique_ptr<AQLPacket>)>;
|
||||
|
||||
void register_callback(ClientID id, QueueCB enqueue_cb, CompletedCB complete_cb);
|
||||
void remove_callback(ClientID id);
|
||||
|
||||
@@ -176,62 +120,14 @@ public:
|
||||
const AmdExtTable& ext_api() const { return _ext_api; }
|
||||
|
||||
private:
|
||||
std::atomic<int64_t> _active_async_packets{0};
|
||||
CoreApiTable _core_api;
|
||||
AmdExtTable _ext_api;
|
||||
const AgentCache& _agent;
|
||||
std::atomic<int> _notifiers;
|
||||
rocprofiler::common::Synchronized<std::unordered_map<ClientID, std::pair<QueueCB, CompletedCB>>>
|
||||
_callbacks;
|
||||
hsa_queue_t* _intercept_queue;
|
||||
std::atomic<int> _notifiers = {0};
|
||||
std::atomic<int64_t> _active_async_packets = {0};
|
||||
CoreApiTable _core_api = {};
|
||||
AmdExtTable _ext_api = {};
|
||||
const AgentCache& _agent;
|
||||
rocprofiler::common::Synchronized<callback_map_t> _callbacks = {};
|
||||
hsa_queue_t* _intercept_queue = nullptr;
|
||||
};
|
||||
|
||||
// Tracks and manages HSA queues
|
||||
class QueueController
|
||||
{
|
||||
public:
|
||||
QueueController() = default;
|
||||
// Initializes the QueueInterceptor. This must be delayed until
|
||||
// HSA has been inited.
|
||||
void Init(CoreApiTable& core_table, AmdExtTable& ext_table);
|
||||
// Called to add a queue that was created by the user program
|
||||
void add_queue(hsa_queue_t*, std::unique_ptr<Queue>);
|
||||
void destory_queue(hsa_queue_t*);
|
||||
|
||||
// Add callback to queues associated with the agent. Returns a client
|
||||
// id that can be used by callers to remove the callback.
|
||||
ClientID add_callback(const rocprofiler_agent_t&, Queue::QueueCB, Queue::CompletedCB);
|
||||
void remove_callback(ClientID);
|
||||
|
||||
const CoreApiTable& get_core_table() const { return _core_table; }
|
||||
const AmdExtTable& get_ext_table() const { return _ext_table; }
|
||||
|
||||
// Gets the list of supported HSA agents that can be intercepted
|
||||
const std::unordered_map<uint32_t, AgentCache>& get_supported_agents() const
|
||||
{
|
||||
return _supported_agents;
|
||||
}
|
||||
|
||||
std::unordered_map<uint32_t, AgentCache>& get_supported_agents() { return _supported_agents; }
|
||||
|
||||
private:
|
||||
CoreApiTable _core_table;
|
||||
AmdExtTable _ext_table;
|
||||
rocprofiler::common::Synchronized<std::unordered_map<hsa_queue_t*, std::unique_ptr<Queue>>>
|
||||
_queues;
|
||||
rocprofiler::common::Synchronized<
|
||||
std::unordered_map<ClientID,
|
||||
std::tuple<rocprofiler_agent_t, Queue::QueueCB, Queue::CompletedCB>>>
|
||||
_callback_cache;
|
||||
|
||||
std::unordered_map<uint32_t, AgentCache> _supported_agents;
|
||||
};
|
||||
|
||||
QueueController&
|
||||
get_queue_controller();
|
||||
|
||||
void
|
||||
queue_controller_init(HsaApiTable* table);
|
||||
|
||||
} // namespace hsa
|
||||
} // namespace rocprofiler
|
||||
|
||||
@@ -0,0 +1,185 @@
|
||||
// Copyright (c) 2018-2023 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "lib/rocprofiler/hsa/queue_controller.hpp"
|
||||
|
||||
#include <glog/logging.h>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace hsa
|
||||
{
|
||||
namespace
|
||||
{
|
||||
// HSA Intercept Functions (create_queue/destroy_queue)
|
||||
hsa_status_t
|
||||
create_queue(hsa_agent_t agent,
|
||||
uint32_t size,
|
||||
hsa_queue_type32_t type,
|
||||
void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data),
|
||||
void* data,
|
||||
uint32_t private_segment_size,
|
||||
uint32_t group_segment_size,
|
||||
hsa_queue_t** queue)
|
||||
{
|
||||
for(const auto& [_, agent_info] : get_queue_controller().get_supported_agents())
|
||||
{
|
||||
if(agent_info.get_agent().handle == agent.handle)
|
||||
{
|
||||
auto new_queue = std::make_unique<Queue>(agent_info,
|
||||
size,
|
||||
type,
|
||||
callback,
|
||||
data,
|
||||
private_segment_size,
|
||||
group_segment_size,
|
||||
get_queue_controller().get_core_table(),
|
||||
get_queue_controller().get_ext_table(),
|
||||
queue);
|
||||
get_queue_controller().add_queue(*queue, std::move(new_queue));
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
}
|
||||
LOG(FATAL) << "Could not find agent - " << agent.handle;
|
||||
return HSA_STATUS_ERROR_FATAL;
|
||||
}
|
||||
|
||||
hsa_status_t
|
||||
destroy_queue(hsa_queue_t* hsa_queue)
|
||||
{
|
||||
get_queue_controller().destory_queue(hsa_queue);
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void
|
||||
QueueController::add_queue(hsa_queue_t* id, std::unique_ptr<Queue> queue)
|
||||
{
|
||||
CHECK(queue);
|
||||
_callback_cache.wlock([&](auto& callbacks) {
|
||||
_queues.wlock([&](auto& map) {
|
||||
const auto agent_id = queue->get_agent().agent_t().id.handle;
|
||||
map[id] = std::move(queue);
|
||||
for(const auto& [cbid, cb_tuple] : callbacks)
|
||||
{
|
||||
auto& [agent, qcb, ccb] = cb_tuple;
|
||||
if(agent.id.handle == agent_id)
|
||||
{
|
||||
map[id]->register_callback(cbid, qcb, ccb);
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
void
|
||||
QueueController::destory_queue(hsa_queue_t* id)
|
||||
{
|
||||
_queues.wlock([&](auto& map) { map.erase(id); });
|
||||
}
|
||||
|
||||
ClientID
|
||||
QueueController::add_callback(const rocprofiler_agent_t& agent,
|
||||
Queue::QueueCB qcb,
|
||||
Queue::CompletedCB ccb)
|
||||
{
|
||||
static std::atomic<ClientID> client_id = 1;
|
||||
ClientID return_id;
|
||||
_callback_cache.wlock([&](auto& cb_cache) {
|
||||
return_id = client_id;
|
||||
cb_cache[client_id] = std::tuple(agent, qcb, ccb);
|
||||
client_id++;
|
||||
_queues.wlock([&](auto& map) {
|
||||
for(auto& [_, queue] : map)
|
||||
{
|
||||
if(queue->get_agent().agent_t().id.handle == agent.id.handle)
|
||||
{
|
||||
queue->register_callback(return_id, qcb, ccb);
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
return return_id;
|
||||
}
|
||||
|
||||
void
|
||||
QueueController::remove_callback(ClientID id)
|
||||
{
|
||||
_callback_cache.wlock([&](auto& cb_cache) {
|
||||
cb_cache.erase(id);
|
||||
_queues.wlock([&](auto& map) {
|
||||
for(auto& [_, queue] : map)
|
||||
{
|
||||
queue->remove_callback(id);
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
void
|
||||
QueueController::init(CoreApiTable& core_table, AmdExtTable& ext_table)
|
||||
{
|
||||
_core_table = core_table;
|
||||
_ext_table = ext_table;
|
||||
|
||||
core_table.hsa_queue_create_fn = create_queue;
|
||||
core_table.hsa_queue_destroy_fn = destroy_queue;
|
||||
|
||||
// Generate supported agents
|
||||
rocprofiler_query_available_agents(
|
||||
[](const rocprofiler_agent_t** agents, size_t num_agents, void* user_data) {
|
||||
CHECK(user_data);
|
||||
QueueController& queue = *reinterpret_cast<QueueController*>(user_data);
|
||||
for(size_t i = 0; i < num_agents; i++)
|
||||
{
|
||||
const auto& agent = *agents[i];
|
||||
if(agent.type != ROCPROFILER_AGENT_TYPE_GPU) continue;
|
||||
try
|
||||
{
|
||||
queue.get_supported_agents().emplace(
|
||||
i, AgentCache{agent, i, queue.get_core_table(), queue.get_ext_table()});
|
||||
} catch(std::runtime_error& error)
|
||||
{
|
||||
LOG(ERROR) << fmt::format("GPU Agent Construction Failed (HSA queue will not "
|
||||
"be intercepted): {} ({})",
|
||||
agent.id.handle,
|
||||
error.what());
|
||||
}
|
||||
}
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
},
|
||||
sizeof(rocprofiler_agent_t),
|
||||
this);
|
||||
}
|
||||
|
||||
QueueController&
|
||||
get_queue_controller()
|
||||
{
|
||||
static QueueController controller;
|
||||
return controller;
|
||||
}
|
||||
|
||||
void
|
||||
queue_controller_init(HsaApiTable* table)
|
||||
{
|
||||
get_queue_controller().init(*table->core_, *table->amd_ext_);
|
||||
}
|
||||
} // namespace hsa
|
||||
} // namespace rocprofiler
|
||||
@@ -0,0 +1,81 @@
|
||||
// Copyright (c) 2018-2023 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <rocprofiler/rocprofiler.h>
|
||||
|
||||
#include "lib/rocprofiler/hsa/queue.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace hsa
|
||||
{
|
||||
// Tracks and manages HSA queues
|
||||
class QueueController
|
||||
{
|
||||
public:
|
||||
QueueController() = default;
|
||||
// Initializes the QueueInterceptor. This must be delayed until
|
||||
// HSA has been inited.
|
||||
void init(CoreApiTable& core_table, AmdExtTable& ext_table);
|
||||
|
||||
// Called to add a queue that was created by the user program
|
||||
void add_queue(hsa_queue_t*, std::unique_ptr<Queue>);
|
||||
void destory_queue(hsa_queue_t*);
|
||||
|
||||
// Add callback to queues associated with the agent. Returns a client
|
||||
// id that can be used by callers to remove the callback.
|
||||
ClientID add_callback(const rocprofiler_agent_t&, Queue::QueueCB, Queue::CompletedCB);
|
||||
void remove_callback(ClientID);
|
||||
|
||||
const CoreApiTable& get_core_table() const { return _core_table; }
|
||||
const AmdExtTable& get_ext_table() const { return _ext_table; }
|
||||
|
||||
// Gets the list of supported HSA agents that can be intercepted
|
||||
const auto& get_supported_agents() const { return _supported_agents; }
|
||||
auto& get_supported_agents() { return _supported_agents; }
|
||||
|
||||
private:
|
||||
using agent_callback_tuple_t =
|
||||
std::tuple<rocprofiler_agent_t, Queue::QueueCB, Queue::CompletedCB>;
|
||||
using queue_map_t = std::unordered_map<hsa_queue_t*, std::unique_ptr<Queue>>;
|
||||
using client_id_map_t = std::unordered_map<ClientID, agent_callback_tuple_t>;
|
||||
using agent_cache_map_t = std::unordered_map<uint32_t, AgentCache>;
|
||||
|
||||
CoreApiTable _core_table = {};
|
||||
AmdExtTable _ext_table = {};
|
||||
common::Synchronized<queue_map_t> _queues = {};
|
||||
common::Synchronized<client_id_map_t> _callback_cache = {};
|
||||
agent_cache_map_t _supported_agents = {};
|
||||
};
|
||||
|
||||
QueueController&
|
||||
get_queue_controller();
|
||||
|
||||
void
|
||||
queue_controller_init(HsaApiTable* table);
|
||||
|
||||
} // namespace hsa
|
||||
} // namespace rocprofiler
|
||||
@@ -24,6 +24,7 @@
|
||||
#include "lib/rocprofiler/context/context.hpp"
|
||||
#include "lib/rocprofiler/hsa/hsa.hpp"
|
||||
#include "lib/rocprofiler/hsa/queue.hpp"
|
||||
#include "lib/rocprofiler/hsa/queue_controller.hpp"
|
||||
#include "lib/rocprofiler/internal_threading.hpp"
|
||||
|
||||
#include <rocprofiler/context.h>
|
||||
|
||||
Ссылка в новой задаче
Block a user