HSA queue file structure reorganization (#136)

* Update lib/rocprofiler/hsa/queue.*

- split AQLPacket, QueueController into their own headers and impls
- added some aliases to queue-related classes to get data on single line
- added default initializers for class member variables

* Update source/lib/rocprofiler includes after hsa queue reorg
Этот коммит содержится в:
Jonathan R. Madsen
2023-10-18 23:15:56 -05:00
коммит произвёл GitHub
родитель 6a3f79e626
Коммит 0f523fbad1
10 изменённых файлов: 455 добавлений и 342 удалений
+1
Просмотреть файл
@@ -14,6 +14,7 @@
#include "lib/rocprofiler/counters/metrics.hpp"
#include "lib/rocprofiler/hsa/agent_cache.hpp"
#include "lib/rocprofiler/hsa/queue.hpp"
#include "lib/rocprofiler/hsa/queue_controller.hpp"
namespace rocprofiler
{
+1
Просмотреть файл
@@ -6,6 +6,7 @@
#include "lib/rocprofiler/counters/metrics.hpp"
#include "lib/rocprofiler/hsa/agent_cache.hpp"
#include "lib/rocprofiler/hsa/queue.hpp"
#include "lib/rocprofiler/hsa/queue_controller.hpp"
extern "C" {
/**
+1
Просмотреть файл
@@ -4,6 +4,7 @@
#include "lib/rocprofiler/aql/helpers.hpp"
#include "lib/rocprofiler/aql/packet_construct.hpp"
#include "lib/rocprofiler/context/context.hpp"
#include "lib/rocprofiler/hsa/queue_controller.hpp"
#include "lib/rocprofiler/registration.hpp"
#include <rocprofiler/rocprofiler.h>
+2 -2
Просмотреть файл
@@ -1,6 +1,6 @@
set(ROCPROFILER_LIB_HSA_SOURCES hsa.cpp queue.cpp agent_cache.cpp)
set(ROCPROFILER_LIB_HSA_SOURCES hsa.cpp queue.cpp queue_controller.cpp agent_cache.cpp)
set(ROCPROFILER_LIB_HSA_HEADERS hsa.hpp defines.hpp types.hpp utils.hpp queue.hpp
agent_cache.hpp)
queue_controller.hpp agent_cache.hpp aql_packet.hpp)
target_sources(rocprofiler-object-library PRIVATE ${ROCPROFILER_LIB_HSA_SOURCES}
${ROCPROFILER_LIB_HSA_HEADERS})
+67
Просмотреть файл
@@ -0,0 +1,67 @@
// Copyright (c) 2018-2023 Advanced Micro Devices, Inc.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#pragma once
#include <hsa/hsa_ext_amd.h>
#include <hsa/hsa_ven_amd_aqlprofile.h>
namespace rocprofiler
{
namespace hsa
{
constexpr hsa_ext_amd_aql_pm4_packet_t null_amd_aql_pm4_packet = {
.header = 0,
.pm4_command = {0},
.completion_signal = {.handle = 0}};
/**
* Struct containing AQL packet information. Including start/stop/read
* packets along with allocated buffers
*/
struct AQLPacket
{
using memory_pool_free_func_t = decltype(::hsa_amd_memory_pool_free)*;
AQLPacket(memory_pool_free_func_t func);
~AQLPacket();
// Keep move constuctors (i.e. std::move())
AQLPacket(AQLPacket&& other) = default;
AQLPacket& operator=(AQLPacket&& other) = default;
// Do not allow copying this class
AQLPacket(const AQLPacket&) = delete;
AQLPacket& operator=(const AQLPacket&) = delete;
hsa_ven_amd_aqlprofile_profile_t profile = {};
hsa_ext_amd_aql_pm4_packet_t start = null_amd_aql_pm4_packet;
hsa_ext_amd_aql_pm4_packet_t stop = null_amd_aql_pm4_packet;
hsa_ext_amd_aql_pm4_packet_t read = null_amd_aql_pm4_packet;
bool command_buf_mallocd = false;
bool output_buffer_malloced = false;
memory_pool_free_func_t free_func = nullptr;
};
inline AQLPacket::AQLPacket(memory_pool_free_func_t func)
: free_func{func}
{}
} // namespace hsa
} // namespace rocprofiler
+52 -172
Просмотреть файл
@@ -21,6 +21,9 @@
#include "lib/rocprofiler/hsa/queue.hpp"
#include <glog/logging.h>
#include <atomic>
#include <chrono>
#include <thread>
namespace rocprofiler
{
@@ -89,24 +92,6 @@ AddVendorSpecificPacket(const hsa_ext_amd_aql_pm4_packet_t& packet,
{
transformed_packets.emplace_back(packet).completion_signal = packet_completion_signal;
}
} // namespace
void
Queue::signal_async_handler(const hsa_signal_t& signal, Queue::queue_info_session_t* data) const
{
hsa_status_t status = _ext_api.hsa_amd_signal_async_handler_fn(
signal, HSA_SIGNAL_CONDITION_EQ, 0, AsyncSignalHandler, static_cast<void*>(data));
LOG_IF(FATAL, status != HSA_STATUS_SUCCESS && status != HSA_STATUS_INFO_BREAK)
<< "Error: hsa_amd_signal_async_handler failed";
}
void
Queue::create_signal(uint32_t attribute, hsa_signal_t* signal) const
{
hsa_status_t status = _ext_api.hsa_amd_signal_create_fn(1, 0, nullptr, attribute, signal);
LOG_IF(FATAL, status != HSA_STATUS_SUCCESS && status != HSA_STATUS_INFO_BREAK)
<< "Error: hsa_amd_signal_create failed";
}
template <typename Integral = uint64_t>
constexpr Integral
@@ -258,6 +243,55 @@ WriteInterceptor(const void* packets,
writer(transformed_packets.data(), transformed_packets.size());
}
} // namespace
AQLPacket::~AQLPacket()
{
if(!command_buf_mallocd)
{
free_func(profile.command_buffer.ptr);
}
else
{
free(profile.command_buffer.ptr);
}
if(!output_buffer_malloced)
{
free_func(profile.output_buffer.ptr);
}
else
{
free(profile.output_buffer.ptr);
}
}
Queue::~Queue()
{
// Potentially replace with condition variable at some point
// but performance may not matter here.
while(_active_async_packets.load(std::memory_order_relaxed) > 0)
{
std::this_thread::sleep_for(std::chrono::milliseconds{1});
}
}
void
Queue::signal_async_handler(const hsa_signal_t& signal, Queue::queue_info_session_t* data) const
{
hsa_status_t status = _ext_api.hsa_amd_signal_async_handler_fn(
signal, HSA_SIGNAL_CONDITION_EQ, 0, AsyncSignalHandler, static_cast<void*>(data));
LOG_IF(FATAL, status != HSA_STATUS_SUCCESS && status != HSA_STATUS_INFO_BREAK)
<< "Error: hsa_amd_signal_async_handler failed";
}
void
Queue::create_signal(uint32_t attribute, hsa_signal_t* signal) const
{
hsa_status_t status = _ext_api.hsa_amd_signal_create_fn(1, 0, nullptr, attribute, signal);
LOG_IF(FATAL, status != HSA_STATUS_SUCCESS && status != HSA_STATUS_INFO_BREAK)
<< "Error: hsa_amd_signal_create failed";
}
Queue::Queue(const AgentCache& agent,
uint32_t size,
@@ -313,159 +347,5 @@ Queue::remove_callback(ClientID id)
if(map.erase(id) == 1) _notifiers--;
});
}
void
QueueController::add_queue(hsa_queue_t* id, std::unique_ptr<Queue> queue)
{
CHECK(queue);
_callback_cache.wlock([&](auto& callbacks) {
_queues.wlock([&](auto& map) {
const auto agent_id = queue->get_agent().agent_t().id.handle;
map[id] = std::move(queue);
for(const auto& [cbid, cb_tuple] : callbacks)
{
auto& [agent, qcb, ccb] = cb_tuple;
if(agent.id.handle == agent_id)
{
map[id]->register_callback(cbid, qcb, ccb);
}
}
});
});
}
void
QueueController::destory_queue(hsa_queue_t* id)
{
_queues.wlock([&](auto& map) { map.erase(id); });
}
ClientID
QueueController::add_callback(const rocprofiler_agent_t& agent,
Queue::QueueCB qcb,
Queue::CompletedCB ccb)
{
static std::atomic<ClientID> client_id = 1;
ClientID return_id;
_callback_cache.wlock([&](auto& cb_cache) {
return_id = client_id;
cb_cache[client_id] = std::tuple(agent, qcb, ccb);
client_id++;
_queues.wlock([&](auto& map) {
for(auto& [_, queue] : map)
{
if(queue->get_agent().agent_t().id.handle == agent.id.handle)
{
queue->register_callback(return_id, qcb, ccb);
}
}
});
});
return return_id;
}
void
QueueController::remove_callback(ClientID id)
{
_callback_cache.wlock([&](auto& cb_cache) {
cb_cache.erase(id);
_queues.wlock([&](auto& map) {
for(auto& [_, queue] : map)
{
queue->remove_callback(id);
}
});
});
}
// HSA Intercept Functions (create_queue/destroy_queue)
hsa_status_t
create_queue(hsa_agent_t agent,
uint32_t size,
hsa_queue_type32_t type,
void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data),
void* data,
uint32_t private_segment_size,
uint32_t group_segment_size,
hsa_queue_t** queue)
{
for(const auto& [_, agent_info] : get_queue_controller().get_supported_agents())
{
if(agent_info.get_agent().handle == agent.handle)
{
auto new_queue = std::make_unique<Queue>(agent_info,
size,
type,
callback,
data,
private_segment_size,
group_segment_size,
get_queue_controller().get_core_table(),
get_queue_controller().get_ext_table(),
queue);
get_queue_controller().add_queue(*queue, std::move(new_queue));
return HSA_STATUS_SUCCESS;
}
}
LOG(FATAL) << "Could not find agent - " << agent.handle;
return HSA_STATUS_ERROR_FATAL;
}
hsa_status_t
destroy_queue(hsa_queue_t* hsa_queue)
{
get_queue_controller().destory_queue(hsa_queue);
return HSA_STATUS_SUCCESS;
}
void
QueueController::Init(CoreApiTable& core_table, AmdExtTable& ext_table)
{
_core_table = core_table;
_ext_table = ext_table;
core_table.hsa_queue_create_fn = create_queue;
core_table.hsa_queue_destroy_fn = destroy_queue;
// Generate supported agents
rocprofiler_query_available_agents(
[](const rocprofiler_agent_t** agents, size_t num_agents, void* user_data) {
CHECK(user_data);
QueueController& queue = *reinterpret_cast<QueueController*>(user_data);
for(size_t i = 0; i < num_agents; i++)
{
const auto& agent = *agents[i];
if(agent.type != ROCPROFILER_AGENT_TYPE_GPU) continue;
try
{
queue.get_supported_agents().emplace(
i, AgentCache{agent, i, queue.get_core_table(), queue.get_ext_table()});
} catch(std::runtime_error& error)
{
LOG(ERROR) << fmt::format("GPU Agent Construction Failed (HSA queue will not "
"be intercepted): {} ({})",
agent.id.handle,
error.what());
}
}
return ROCPROFILER_STATUS_SUCCESS;
},
sizeof(rocprofiler_agent_t),
this);
}
QueueController&
get_queue_controller()
{
static QueueController controller;
return controller;
}
void
queue_controller_init(HsaApiTable* table)
{
get_queue_controller().Init(*table->core_, *table->amd_ext_);
}
} // namespace hsa
} // namespace rocprofiler
+64 -168
Просмотреть файл
@@ -1,25 +1,37 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
// Copyright (c) 2018-2023 Advanced Micro Devices, Inc.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#pragma once
#include <rocprofiler/fwd.h>
#include "lib/common/synchronized.hpp"
#include "lib/rocprofiler/hsa/agent_cache.hpp"
#include "lib/rocprofiler/hsa/aql_packet.hpp"
#include <hsa/amd_hsa_kernel_code.h>
#include <hsa/hsa.h>
#include <hsa/hsa_api_trace.h>
#include <hsa/hsa_ext_amd.h>
#include <hsa/hsa_ven_amd_loader.h>
#include <atomic>
#include <functional>
#include <iostream>
@@ -28,101 +40,52 @@
#include <unordered_map>
#include <vector>
#include <hsa/amd_hsa_kernel_code.h>
#include <hsa/hsa.h>
#include <hsa/hsa_api_trace.h>
#include <hsa/hsa_ext_amd.h>
#include <hsa/hsa_ven_amd_aqlprofile.h>
#include <hsa/hsa_ven_amd_loader.h>
#include <rocprofiler/fwd.h>
#include "lib/common/synchronized.hpp"
#include "lib/rocprofiler/hsa/agent_cache.hpp"
namespace rocprofiler
{
namespace hsa
{
/**
* Struct containing AQL packet information. Including start/stop/read
* packets along with allocated buffers
*/
struct AQLPacket
{
hsa_ven_amd_aqlprofile_profile_t profile;
hsa_ext_amd_aql_pm4_packet_t start{.header = 0,
.pm4_command = {0},
.completion_signal = {.handle = 0}};
hsa_ext_amd_aql_pm4_packet_t stop{.header = 0,
.pm4_command = {0},
.completion_signal = {.handle = 0}};
hsa_ext_amd_aql_pm4_packet_t read{.header = 0,
.pm4_command = {0},
.completion_signal = {.handle = 0}};
bool command_buf_mallocd{false};
bool output_buffer_malloced{false};
std::function<decltype(hsa_amd_memory_pool_free)> free_func;
AQLPacket(std::function<decltype(hsa_amd_memory_pool_free)> func)
: free_func(std::move(func))
{}
~AQLPacket()
{
if(!command_buf_mallocd)
{
free_func(profile.command_buffer.ptr);
}
else
{
free(profile.command_buffer.ptr);
}
if(!output_buffer_malloced)
{
free_func(profile.output_buffer.ptr);
}
else
{
free(profile.output_buffer.ptr);
}
}
// Keep move constuctors (i.e. std::move())
AQLPacket(AQLPacket&& other) = default;
AQLPacket& operator=(AQLPacket&& other) = default;
// Do not allow copying this class
AQLPacket(const AQLPacket&) = delete;
AQLPacket& operator=(const AQLPacket&) = delete;
};
using ClientID = int64_t;
// Interceptor for a single specific queue
class Queue
{
public:
using callback_t = void (*)(hsa_status_t status, hsa_queue_t* source, void* data);
// Function prototype used to notify consumers that a kernel has been
// enqueued. An AQL packet can be returned that will be injected into
// the queue.
using QueueCB = std::function<
std::unique_ptr<AQLPacket>(const Queue&, ClientID, const hsa_ext_amd_aql_pm4_packet_t&)>;
// Signals the completion of the kernel packet.
using CompletedCB = std::function<void(const Queue&,
ClientID,
const hsa_ext_amd_aql_pm4_packet_t&,
std::unique_ptr<AQLPacket>)>;
using callback_map_t = std::unordered_map<ClientID, std::pair<QueueCB, CompletedCB>>;
// Internal session information that is used by write interceptor
// to track state of the intercepted kernel.
struct queue_info_session_t
{
Queue& queue;
std::unique_ptr<AQLPacket> inst_pkt;
ClientID inst_pkt_id;
hsa_ext_amd_aql_pm4_packet_t kernel_pkt;
hsa_signal_t interrupt_signal;
std::unique_ptr<AQLPacket> inst_pkt = {};
ClientID inst_pkt_id = 0;
hsa_ext_amd_aql_pm4_packet_t kernel_pkt = null_amd_aql_pm4_packet;
hsa_signal_t interrupt_signal = {};
};
Queue(const AgentCache& agent,
uint32_t size,
hsa_queue_type32_t type,
void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data),
void* data,
uint32_t private_segment_size,
uint32_t group_segment_size,
CoreApiTable core_api,
AmdExtTable ext_api,
hsa_queue_t** queue);
callback_t callback,
void* data,
uint32_t private_segment_size,
uint32_t group_segment_size,
CoreApiTable core_api,
AmdExtTable ext_api,
hsa_queue_t** queue);
~Queue();
const hsa_queue_t* intercept_queue() const { return _intercept_queue; };
const AgentCache& get_agent() const { return _agent; }
@@ -150,25 +113,6 @@ public:
void async_started() { _active_async_packets++; }
void async_complete() { _active_async_packets--; }
~Queue()
{
// Potentially replace with condition variable at some point
// but performance may not matter here.
while(_active_async_packets > 0)
{}
}
// Function prototype used to notify consumers that a kernel has been
// enqueued. An AQL packet can be returned that will be injected into
// the queue.
using QueueCB = std::function<
std::unique_ptr<AQLPacket>(const Queue&, ClientID, const hsa_ext_amd_aql_pm4_packet_t&)>;
// Signals the completion of the kernel packet.
using CompletedCB = std::function<void(const Queue&,
ClientID,
const hsa_ext_amd_aql_pm4_packet_t&,
std::unique_ptr<AQLPacket>)>;
void register_callback(ClientID id, QueueCB enqueue_cb, CompletedCB complete_cb);
void remove_callback(ClientID id);
@@ -176,62 +120,14 @@ public:
const AmdExtTable& ext_api() const { return _ext_api; }
private:
std::atomic<int64_t> _active_async_packets{0};
CoreApiTable _core_api;
AmdExtTable _ext_api;
const AgentCache& _agent;
std::atomic<int> _notifiers;
rocprofiler::common::Synchronized<std::unordered_map<ClientID, std::pair<QueueCB, CompletedCB>>>
_callbacks;
hsa_queue_t* _intercept_queue;
std::atomic<int> _notifiers = {0};
std::atomic<int64_t> _active_async_packets = {0};
CoreApiTable _core_api = {};
AmdExtTable _ext_api = {};
const AgentCache& _agent;
rocprofiler::common::Synchronized<callback_map_t> _callbacks = {};
hsa_queue_t* _intercept_queue = nullptr;
};
// Tracks and manages HSA queues
class QueueController
{
public:
QueueController() = default;
// Initializes the QueueInterceptor. This must be delayed until
// HSA has been inited.
void Init(CoreApiTable& core_table, AmdExtTable& ext_table);
// Called to add a queue that was created by the user program
void add_queue(hsa_queue_t*, std::unique_ptr<Queue>);
void destory_queue(hsa_queue_t*);
// Add callback to queues associated with the agent. Returns a client
// id that can be used by callers to remove the callback.
ClientID add_callback(const rocprofiler_agent_t&, Queue::QueueCB, Queue::CompletedCB);
void remove_callback(ClientID);
const CoreApiTable& get_core_table() const { return _core_table; }
const AmdExtTable& get_ext_table() const { return _ext_table; }
// Gets the list of supported HSA agents that can be intercepted
const std::unordered_map<uint32_t, AgentCache>& get_supported_agents() const
{
return _supported_agents;
}
std::unordered_map<uint32_t, AgentCache>& get_supported_agents() { return _supported_agents; }
private:
CoreApiTable _core_table;
AmdExtTable _ext_table;
rocprofiler::common::Synchronized<std::unordered_map<hsa_queue_t*, std::unique_ptr<Queue>>>
_queues;
rocprofiler::common::Synchronized<
std::unordered_map<ClientID,
std::tuple<rocprofiler_agent_t, Queue::QueueCB, Queue::CompletedCB>>>
_callback_cache;
std::unordered_map<uint32_t, AgentCache> _supported_agents;
};
QueueController&
get_queue_controller();
void
queue_controller_init(HsaApiTable* table);
} // namespace hsa
} // namespace rocprofiler
+185
Просмотреть файл
@@ -0,0 +1,185 @@
// Copyright (c) 2018-2023 Advanced Micro Devices, Inc.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#include "lib/rocprofiler/hsa/queue_controller.hpp"
#include <glog/logging.h>
namespace rocprofiler
{
namespace hsa
{
namespace
{
// HSA Intercept Functions (create_queue/destroy_queue)
hsa_status_t
create_queue(hsa_agent_t agent,
uint32_t size,
hsa_queue_type32_t type,
void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data),
void* data,
uint32_t private_segment_size,
uint32_t group_segment_size,
hsa_queue_t** queue)
{
for(const auto& [_, agent_info] : get_queue_controller().get_supported_agents())
{
if(agent_info.get_agent().handle == agent.handle)
{
auto new_queue = std::make_unique<Queue>(agent_info,
size,
type,
callback,
data,
private_segment_size,
group_segment_size,
get_queue_controller().get_core_table(),
get_queue_controller().get_ext_table(),
queue);
get_queue_controller().add_queue(*queue, std::move(new_queue));
return HSA_STATUS_SUCCESS;
}
}
LOG(FATAL) << "Could not find agent - " << agent.handle;
return HSA_STATUS_ERROR_FATAL;
}
hsa_status_t
destroy_queue(hsa_queue_t* hsa_queue)
{
get_queue_controller().destory_queue(hsa_queue);
return HSA_STATUS_SUCCESS;
}
} // namespace
void
QueueController::add_queue(hsa_queue_t* id, std::unique_ptr<Queue> queue)
{
CHECK(queue);
_callback_cache.wlock([&](auto& callbacks) {
_queues.wlock([&](auto& map) {
const auto agent_id = queue->get_agent().agent_t().id.handle;
map[id] = std::move(queue);
for(const auto& [cbid, cb_tuple] : callbacks)
{
auto& [agent, qcb, ccb] = cb_tuple;
if(agent.id.handle == agent_id)
{
map[id]->register_callback(cbid, qcb, ccb);
}
}
});
});
}
void
QueueController::destory_queue(hsa_queue_t* id)
{
_queues.wlock([&](auto& map) { map.erase(id); });
}
ClientID
QueueController::add_callback(const rocprofiler_agent_t& agent,
Queue::QueueCB qcb,
Queue::CompletedCB ccb)
{
static std::atomic<ClientID> client_id = 1;
ClientID return_id;
_callback_cache.wlock([&](auto& cb_cache) {
return_id = client_id;
cb_cache[client_id] = std::tuple(agent, qcb, ccb);
client_id++;
_queues.wlock([&](auto& map) {
for(auto& [_, queue] : map)
{
if(queue->get_agent().agent_t().id.handle == agent.id.handle)
{
queue->register_callback(return_id, qcb, ccb);
}
}
});
});
return return_id;
}
void
QueueController::remove_callback(ClientID id)
{
_callback_cache.wlock([&](auto& cb_cache) {
cb_cache.erase(id);
_queues.wlock([&](auto& map) {
for(auto& [_, queue] : map)
{
queue->remove_callback(id);
}
});
});
}
void
QueueController::init(CoreApiTable& core_table, AmdExtTable& ext_table)
{
_core_table = core_table;
_ext_table = ext_table;
core_table.hsa_queue_create_fn = create_queue;
core_table.hsa_queue_destroy_fn = destroy_queue;
// Generate supported agents
rocprofiler_query_available_agents(
[](const rocprofiler_agent_t** agents, size_t num_agents, void* user_data) {
CHECK(user_data);
QueueController& queue = *reinterpret_cast<QueueController*>(user_data);
for(size_t i = 0; i < num_agents; i++)
{
const auto& agent = *agents[i];
if(agent.type != ROCPROFILER_AGENT_TYPE_GPU) continue;
try
{
queue.get_supported_agents().emplace(
i, AgentCache{agent, i, queue.get_core_table(), queue.get_ext_table()});
} catch(std::runtime_error& error)
{
LOG(ERROR) << fmt::format("GPU Agent Construction Failed (HSA queue will not "
"be intercepted): {} ({})",
agent.id.handle,
error.what());
}
}
return ROCPROFILER_STATUS_SUCCESS;
},
sizeof(rocprofiler_agent_t),
this);
}
QueueController&
get_queue_controller()
{
static QueueController controller;
return controller;
}
void
queue_controller_init(HsaApiTable* table)
{
get_queue_controller().init(*table->core_, *table->amd_ext_);
}
} // namespace hsa
} // namespace rocprofiler
+81
Просмотреть файл
@@ -0,0 +1,81 @@
// Copyright (c) 2018-2023 Advanced Micro Devices, Inc.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#pragma once
#include <rocprofiler/rocprofiler.h>
#include "lib/rocprofiler/hsa/queue.hpp"
#include <cstdint>
#include <unordered_map>
#include <vector>
namespace rocprofiler
{
namespace hsa
{
// Tracks and manages HSA queues
class QueueController
{
public:
QueueController() = default;
// Initializes the QueueInterceptor. This must be delayed until
// HSA has been inited.
void init(CoreApiTable& core_table, AmdExtTable& ext_table);
// Called to add a queue that was created by the user program
void add_queue(hsa_queue_t*, std::unique_ptr<Queue>);
void destory_queue(hsa_queue_t*);
// Add callback to queues associated with the agent. Returns a client
// id that can be used by callers to remove the callback.
ClientID add_callback(const rocprofiler_agent_t&, Queue::QueueCB, Queue::CompletedCB);
void remove_callback(ClientID);
const CoreApiTable& get_core_table() const { return _core_table; }
const AmdExtTable& get_ext_table() const { return _ext_table; }
// Gets the list of supported HSA agents that can be intercepted
const auto& get_supported_agents() const { return _supported_agents; }
auto& get_supported_agents() { return _supported_agents; }
private:
using agent_callback_tuple_t =
std::tuple<rocprofiler_agent_t, Queue::QueueCB, Queue::CompletedCB>;
using queue_map_t = std::unordered_map<hsa_queue_t*, std::unique_ptr<Queue>>;
using client_id_map_t = std::unordered_map<ClientID, agent_callback_tuple_t>;
using agent_cache_map_t = std::unordered_map<uint32_t, AgentCache>;
CoreApiTable _core_table = {};
AmdExtTable _ext_table = {};
common::Synchronized<queue_map_t> _queues = {};
common::Synchronized<client_id_map_t> _callback_cache = {};
agent_cache_map_t _supported_agents = {};
};
QueueController&
get_queue_controller();
void
queue_controller_init(HsaApiTable* table);
} // namespace hsa
} // namespace rocprofiler
+1
Просмотреть файл
@@ -24,6 +24,7 @@
#include "lib/rocprofiler/context/context.hpp"
#include "lib/rocprofiler/hsa/hsa.hpp"
#include "lib/rocprofiler/hsa/queue.hpp"
#include "lib/rocprofiler/hsa/queue_controller.hpp"
#include "lib/rocprofiler/internal_threading.hpp"
#include <rocprofiler/context.h>