From 0f523fbad1f9c860a94818beeaaa5d979f65304b Mon Sep 17 00:00:00 2001 From: "Jonathan R. Madsen" Date: Wed, 18 Oct 2023 23:15:56 -0500 Subject: [PATCH] HSA queue file structure reorganization (#136) * Update lib/rocprofiler/hsa/queue.* - split AQLPacket, QueueController into their own headers and impls - added some aliases to queue-related classes to get data on single line - added default initializers for class member variables * Update source/lib/rocprofiler includes after hsa queue reorg --- source/lib/rocprofiler/aql/tests/aql_test.cpp | 1 + source/lib/rocprofiler/counters.cpp | 1 + source/lib/rocprofiler/counters/core.cpp | 1 + source/lib/rocprofiler/hsa/CMakeLists.txt | 4 +- source/lib/rocprofiler/hsa/aql_packet.hpp | 67 +++++ source/lib/rocprofiler/hsa/queue.cpp | 224 ++++------------- source/lib/rocprofiler/hsa/queue.hpp | 232 +++++------------- .../lib/rocprofiler/hsa/queue_controller.cpp | 185 ++++++++++++++ .../lib/rocprofiler/hsa/queue_controller.hpp | 81 ++++++ source/lib/rocprofiler/registration.cpp | 1 + 10 files changed, 455 insertions(+), 342 deletions(-) create mode 100644 source/lib/rocprofiler/hsa/aql_packet.hpp create mode 100644 source/lib/rocprofiler/hsa/queue_controller.cpp create mode 100644 source/lib/rocprofiler/hsa/queue_controller.hpp diff --git a/source/lib/rocprofiler/aql/tests/aql_test.cpp b/source/lib/rocprofiler/aql/tests/aql_test.cpp index bcffd39d90..eccd2d0a5a 100644 --- a/source/lib/rocprofiler/aql/tests/aql_test.cpp +++ b/source/lib/rocprofiler/aql/tests/aql_test.cpp @@ -14,6 +14,7 @@ #include "lib/rocprofiler/counters/metrics.hpp" #include "lib/rocprofiler/hsa/agent_cache.hpp" #include "lib/rocprofiler/hsa/queue.hpp" +#include "lib/rocprofiler/hsa/queue_controller.hpp" namespace rocprofiler { diff --git a/source/lib/rocprofiler/counters.cpp b/source/lib/rocprofiler/counters.cpp index a642072f97..d62b0e9797 100644 --- a/source/lib/rocprofiler/counters.cpp +++ b/source/lib/rocprofiler/counters.cpp @@ -6,6 +6,7 @@ #include "lib/rocprofiler/counters/metrics.hpp" #include "lib/rocprofiler/hsa/agent_cache.hpp" #include "lib/rocprofiler/hsa/queue.hpp" +#include "lib/rocprofiler/hsa/queue_controller.hpp" extern "C" { /** diff --git a/source/lib/rocprofiler/counters/core.cpp b/source/lib/rocprofiler/counters/core.cpp index 82b06d6d91..f2176e0013 100644 --- a/source/lib/rocprofiler/counters/core.cpp +++ b/source/lib/rocprofiler/counters/core.cpp @@ -4,6 +4,7 @@ #include "lib/rocprofiler/aql/helpers.hpp" #include "lib/rocprofiler/aql/packet_construct.hpp" #include "lib/rocprofiler/context/context.hpp" +#include "lib/rocprofiler/hsa/queue_controller.hpp" #include "lib/rocprofiler/registration.hpp" #include diff --git a/source/lib/rocprofiler/hsa/CMakeLists.txt b/source/lib/rocprofiler/hsa/CMakeLists.txt index aadef71a80..ae23d01b6b 100644 --- a/source/lib/rocprofiler/hsa/CMakeLists.txt +++ b/source/lib/rocprofiler/hsa/CMakeLists.txt @@ -1,6 +1,6 @@ -set(ROCPROFILER_LIB_HSA_SOURCES hsa.cpp queue.cpp agent_cache.cpp) +set(ROCPROFILER_LIB_HSA_SOURCES hsa.cpp queue.cpp queue_controller.cpp agent_cache.cpp) set(ROCPROFILER_LIB_HSA_HEADERS hsa.hpp defines.hpp types.hpp utils.hpp queue.hpp - agent_cache.hpp) + queue_controller.hpp agent_cache.hpp aql_packet.hpp) target_sources(rocprofiler-object-library PRIVATE ${ROCPROFILER_LIB_HSA_SOURCES} ${ROCPROFILER_LIB_HSA_HEADERS}) diff --git a/source/lib/rocprofiler/hsa/aql_packet.hpp b/source/lib/rocprofiler/hsa/aql_packet.hpp new file mode 100644 index 0000000000..4548c4eabc --- /dev/null +++ b/source/lib/rocprofiler/hsa/aql_packet.hpp @@ -0,0 +1,67 @@ +// Copyright (c) 2018-2023 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include +#include + +namespace rocprofiler +{ +namespace hsa +{ +constexpr hsa_ext_amd_aql_pm4_packet_t null_amd_aql_pm4_packet = { + .header = 0, + .pm4_command = {0}, + .completion_signal = {.handle = 0}}; + +/** + * Struct containing AQL packet information. Including start/stop/read + * packets along with allocated buffers + */ +struct AQLPacket +{ + using memory_pool_free_func_t = decltype(::hsa_amd_memory_pool_free)*; + + AQLPacket(memory_pool_free_func_t func); + ~AQLPacket(); + + // Keep move constuctors (i.e. std::move()) + AQLPacket(AQLPacket&& other) = default; + AQLPacket& operator=(AQLPacket&& other) = default; + + // Do not allow copying this class + AQLPacket(const AQLPacket&) = delete; + AQLPacket& operator=(const AQLPacket&) = delete; + + hsa_ven_amd_aqlprofile_profile_t profile = {}; + hsa_ext_amd_aql_pm4_packet_t start = null_amd_aql_pm4_packet; + hsa_ext_amd_aql_pm4_packet_t stop = null_amd_aql_pm4_packet; + hsa_ext_amd_aql_pm4_packet_t read = null_amd_aql_pm4_packet; + bool command_buf_mallocd = false; + bool output_buffer_malloced = false; + memory_pool_free_func_t free_func = nullptr; +}; + +inline AQLPacket::AQLPacket(memory_pool_free_func_t func) +: free_func{func} +{} +} // namespace hsa +} // namespace rocprofiler diff --git a/source/lib/rocprofiler/hsa/queue.cpp b/source/lib/rocprofiler/hsa/queue.cpp index a09b660850..c1fccdbecc 100644 --- a/source/lib/rocprofiler/hsa/queue.cpp +++ b/source/lib/rocprofiler/hsa/queue.cpp @@ -21,6 +21,9 @@ #include "lib/rocprofiler/hsa/queue.hpp" #include +#include +#include +#include namespace rocprofiler { @@ -89,24 +92,6 @@ AddVendorSpecificPacket(const hsa_ext_amd_aql_pm4_packet_t& packet, { transformed_packets.emplace_back(packet).completion_signal = packet_completion_signal; } -} // namespace - -void -Queue::signal_async_handler(const hsa_signal_t& signal, Queue::queue_info_session_t* data) const -{ - hsa_status_t status = _ext_api.hsa_amd_signal_async_handler_fn( - signal, HSA_SIGNAL_CONDITION_EQ, 0, AsyncSignalHandler, static_cast(data)); - LOG_IF(FATAL, status != HSA_STATUS_SUCCESS && status != HSA_STATUS_INFO_BREAK) - << "Error: hsa_amd_signal_async_handler failed"; -} - -void -Queue::create_signal(uint32_t attribute, hsa_signal_t* signal) const -{ - hsa_status_t status = _ext_api.hsa_amd_signal_create_fn(1, 0, nullptr, attribute, signal); - LOG_IF(FATAL, status != HSA_STATUS_SUCCESS && status != HSA_STATUS_INFO_BREAK) - << "Error: hsa_amd_signal_create failed"; -} template constexpr Integral @@ -258,6 +243,55 @@ WriteInterceptor(const void* packets, writer(transformed_packets.data(), transformed_packets.size()); } +} // namespace + +AQLPacket::~AQLPacket() +{ + if(!command_buf_mallocd) + { + free_func(profile.command_buffer.ptr); + } + else + { + free(profile.command_buffer.ptr); + } + + if(!output_buffer_malloced) + { + free_func(profile.output_buffer.ptr); + } + else + { + free(profile.output_buffer.ptr); + } +} + +Queue::~Queue() +{ + // Potentially replace with condition variable at some point + // but performance may not matter here. + while(_active_async_packets.load(std::memory_order_relaxed) > 0) + { + std::this_thread::sleep_for(std::chrono::milliseconds{1}); + } +} + +void +Queue::signal_async_handler(const hsa_signal_t& signal, Queue::queue_info_session_t* data) const +{ + hsa_status_t status = _ext_api.hsa_amd_signal_async_handler_fn( + signal, HSA_SIGNAL_CONDITION_EQ, 0, AsyncSignalHandler, static_cast(data)); + LOG_IF(FATAL, status != HSA_STATUS_SUCCESS && status != HSA_STATUS_INFO_BREAK) + << "Error: hsa_amd_signal_async_handler failed"; +} + +void +Queue::create_signal(uint32_t attribute, hsa_signal_t* signal) const +{ + hsa_status_t status = _ext_api.hsa_amd_signal_create_fn(1, 0, nullptr, attribute, signal); + LOG_IF(FATAL, status != HSA_STATUS_SUCCESS && status != HSA_STATUS_INFO_BREAK) + << "Error: hsa_amd_signal_create failed"; +} Queue::Queue(const AgentCache& agent, uint32_t size, @@ -313,159 +347,5 @@ Queue::remove_callback(ClientID id) if(map.erase(id) == 1) _notifiers--; }); } - -void -QueueController::add_queue(hsa_queue_t* id, std::unique_ptr queue) -{ - CHECK(queue); - _callback_cache.wlock([&](auto& callbacks) { - _queues.wlock([&](auto& map) { - const auto agent_id = queue->get_agent().agent_t().id.handle; - map[id] = std::move(queue); - for(const auto& [cbid, cb_tuple] : callbacks) - { - auto& [agent, qcb, ccb] = cb_tuple; - if(agent.id.handle == agent_id) - { - map[id]->register_callback(cbid, qcb, ccb); - } - } - }); - }); -} - -void -QueueController::destory_queue(hsa_queue_t* id) -{ - _queues.wlock([&](auto& map) { map.erase(id); }); -} - -ClientID -QueueController::add_callback(const rocprofiler_agent_t& agent, - Queue::QueueCB qcb, - Queue::CompletedCB ccb) -{ - static std::atomic client_id = 1; - ClientID return_id; - _callback_cache.wlock([&](auto& cb_cache) { - return_id = client_id; - cb_cache[client_id] = std::tuple(agent, qcb, ccb); - client_id++; - _queues.wlock([&](auto& map) { - for(auto& [_, queue] : map) - { - if(queue->get_agent().agent_t().id.handle == agent.id.handle) - { - queue->register_callback(return_id, qcb, ccb); - } - } - }); - }); - return return_id; -} - -void -QueueController::remove_callback(ClientID id) -{ - _callback_cache.wlock([&](auto& cb_cache) { - cb_cache.erase(id); - _queues.wlock([&](auto& map) { - for(auto& [_, queue] : map) - { - queue->remove_callback(id); - } - }); - }); -} - -// HSA Intercept Functions (create_queue/destroy_queue) -hsa_status_t -create_queue(hsa_agent_t agent, - uint32_t size, - hsa_queue_type32_t type, - void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data), - void* data, - uint32_t private_segment_size, - uint32_t group_segment_size, - hsa_queue_t** queue) -{ - for(const auto& [_, agent_info] : get_queue_controller().get_supported_agents()) - { - if(agent_info.get_agent().handle == agent.handle) - { - auto new_queue = std::make_unique(agent_info, - size, - type, - callback, - data, - private_segment_size, - group_segment_size, - get_queue_controller().get_core_table(), - get_queue_controller().get_ext_table(), - queue); - get_queue_controller().add_queue(*queue, std::move(new_queue)); - return HSA_STATUS_SUCCESS; - } - } - LOG(FATAL) << "Could not find agent - " << agent.handle; - return HSA_STATUS_ERROR_FATAL; -} - -hsa_status_t -destroy_queue(hsa_queue_t* hsa_queue) -{ - get_queue_controller().destory_queue(hsa_queue); - return HSA_STATUS_SUCCESS; -} - -void -QueueController::Init(CoreApiTable& core_table, AmdExtTable& ext_table) -{ - _core_table = core_table; - _ext_table = ext_table; - - core_table.hsa_queue_create_fn = create_queue; - core_table.hsa_queue_destroy_fn = destroy_queue; - - // Generate supported agents - rocprofiler_query_available_agents( - [](const rocprofiler_agent_t** agents, size_t num_agents, void* user_data) { - CHECK(user_data); - QueueController& queue = *reinterpret_cast(user_data); - for(size_t i = 0; i < num_agents; i++) - { - const auto& agent = *agents[i]; - if(agent.type != ROCPROFILER_AGENT_TYPE_GPU) continue; - try - { - queue.get_supported_agents().emplace( - i, AgentCache{agent, i, queue.get_core_table(), queue.get_ext_table()}); - } catch(std::runtime_error& error) - { - LOG(ERROR) << fmt::format("GPU Agent Construction Failed (HSA queue will not " - "be intercepted): {} ({})", - agent.id.handle, - error.what()); - } - } - return ROCPROFILER_STATUS_SUCCESS; - }, - sizeof(rocprofiler_agent_t), - this); -} - -QueueController& -get_queue_controller() -{ - static QueueController controller; - return controller; -} - -void -queue_controller_init(HsaApiTable* table) -{ - get_queue_controller().Init(*table->core_, *table->amd_ext_); -} - } // namespace hsa } // namespace rocprofiler diff --git a/source/lib/rocprofiler/hsa/queue.hpp b/source/lib/rocprofiler/hsa/queue.hpp index 82a2fbc6b9..1997199410 100644 --- a/source/lib/rocprofiler/hsa/queue.hpp +++ b/source/lib/rocprofiler/hsa/queue.hpp @@ -1,25 +1,37 @@ -/* Copyright (c) 2022 Advanced Micro Devices, Inc. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. */ +// Copyright (c) 2018-2023 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. #pragma once +#include + +#include "lib/common/synchronized.hpp" +#include "lib/rocprofiler/hsa/agent_cache.hpp" +#include "lib/rocprofiler/hsa/aql_packet.hpp" + +#include +#include +#include +#include +#include + #include #include #include @@ -28,101 +40,52 @@ #include #include -#include -#include -#include -#include -#include -#include - -#include -#include "lib/common/synchronized.hpp" -#include "lib/rocprofiler/hsa/agent_cache.hpp" - namespace rocprofiler { namespace hsa { -/** - * Struct containing AQL packet information. Including start/stop/read - * packets along with allocated buffers - */ -struct AQLPacket -{ - hsa_ven_amd_aqlprofile_profile_t profile; - hsa_ext_amd_aql_pm4_packet_t start{.header = 0, - .pm4_command = {0}, - .completion_signal = {.handle = 0}}; - hsa_ext_amd_aql_pm4_packet_t stop{.header = 0, - .pm4_command = {0}, - .completion_signal = {.handle = 0}}; - hsa_ext_amd_aql_pm4_packet_t read{.header = 0, - .pm4_command = {0}, - .completion_signal = {.handle = 0}}; - bool command_buf_mallocd{false}; - bool output_buffer_malloced{false}; - std::function free_func; - AQLPacket(std::function func) - : free_func(std::move(func)) - {} - - ~AQLPacket() - { - if(!command_buf_mallocd) - { - free_func(profile.command_buffer.ptr); - } - else - { - free(profile.command_buffer.ptr); - } - - if(!output_buffer_malloced) - { - free_func(profile.output_buffer.ptr); - } - else - { - free(profile.output_buffer.ptr); - } - } - - // Keep move constuctors (i.e. std::move()) - AQLPacket(AQLPacket&& other) = default; - AQLPacket& operator=(AQLPacket&& other) = default; - - // Do not allow copying this class - AQLPacket(const AQLPacket&) = delete; - AQLPacket& operator=(const AQLPacket&) = delete; -}; - using ClientID = int64_t; // Interceptor for a single specific queue class Queue { public: + using callback_t = void (*)(hsa_status_t status, hsa_queue_t* source, void* data); + // Function prototype used to notify consumers that a kernel has been + // enqueued. An AQL packet can be returned that will be injected into + // the queue. + using QueueCB = std::function< + std::unique_ptr(const Queue&, ClientID, const hsa_ext_amd_aql_pm4_packet_t&)>; + // Signals the completion of the kernel packet. + using CompletedCB = std::function)>; + using callback_map_t = std::unordered_map>; + // Internal session information that is used by write interceptor // to track state of the intercepted kernel. struct queue_info_session_t { Queue& queue; - std::unique_ptr inst_pkt; - ClientID inst_pkt_id; - hsa_ext_amd_aql_pm4_packet_t kernel_pkt; - hsa_signal_t interrupt_signal; + std::unique_ptr inst_pkt = {}; + ClientID inst_pkt_id = 0; + hsa_ext_amd_aql_pm4_packet_t kernel_pkt = null_amd_aql_pm4_packet; + hsa_signal_t interrupt_signal = {}; }; Queue(const AgentCache& agent, uint32_t size, hsa_queue_type32_t type, - void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data), - void* data, - uint32_t private_segment_size, - uint32_t group_segment_size, - CoreApiTable core_api, - AmdExtTable ext_api, - hsa_queue_t** queue); + callback_t callback, + void* data, + uint32_t private_segment_size, + uint32_t group_segment_size, + CoreApiTable core_api, + AmdExtTable ext_api, + hsa_queue_t** queue); + + ~Queue(); const hsa_queue_t* intercept_queue() const { return _intercept_queue; }; const AgentCache& get_agent() const { return _agent; } @@ -150,25 +113,6 @@ public: void async_started() { _active_async_packets++; } void async_complete() { _active_async_packets--; } - ~Queue() - { - // Potentially replace with condition variable at some point - // but performance may not matter here. - while(_active_async_packets > 0) - {} - } - - // Function prototype used to notify consumers that a kernel has been - // enqueued. An AQL packet can be returned that will be injected into - // the queue. - using QueueCB = std::function< - std::unique_ptr(const Queue&, ClientID, const hsa_ext_amd_aql_pm4_packet_t&)>; - // Signals the completion of the kernel packet. - using CompletedCB = std::function)>; - void register_callback(ClientID id, QueueCB enqueue_cb, CompletedCB complete_cb); void remove_callback(ClientID id); @@ -176,62 +120,14 @@ public: const AmdExtTable& ext_api() const { return _ext_api; } private: - std::atomic _active_async_packets{0}; - CoreApiTable _core_api; - AmdExtTable _ext_api; - const AgentCache& _agent; - std::atomic _notifiers; - rocprofiler::common::Synchronized>> - _callbacks; - hsa_queue_t* _intercept_queue; + std::atomic _notifiers = {0}; + std::atomic _active_async_packets = {0}; + CoreApiTable _core_api = {}; + AmdExtTable _ext_api = {}; + const AgentCache& _agent; + rocprofiler::common::Synchronized _callbacks = {}; + hsa_queue_t* _intercept_queue = nullptr; }; -// Tracks and manages HSA queues -class QueueController -{ -public: - QueueController() = default; - // Initializes the QueueInterceptor. This must be delayed until - // HSA has been inited. - void Init(CoreApiTable& core_table, AmdExtTable& ext_table); - // Called to add a queue that was created by the user program - void add_queue(hsa_queue_t*, std::unique_ptr); - void destory_queue(hsa_queue_t*); - - // Add callback to queues associated with the agent. Returns a client - // id that can be used by callers to remove the callback. - ClientID add_callback(const rocprofiler_agent_t&, Queue::QueueCB, Queue::CompletedCB); - void remove_callback(ClientID); - - const CoreApiTable& get_core_table() const { return _core_table; } - const AmdExtTable& get_ext_table() const { return _ext_table; } - - // Gets the list of supported HSA agents that can be intercepted - const std::unordered_map& get_supported_agents() const - { - return _supported_agents; - } - - std::unordered_map& get_supported_agents() { return _supported_agents; } - -private: - CoreApiTable _core_table; - AmdExtTable _ext_table; - rocprofiler::common::Synchronized>> - _queues; - rocprofiler::common::Synchronized< - std::unordered_map>> - _callback_cache; - - std::unordered_map _supported_agents; -}; - -QueueController& -get_queue_controller(); - -void -queue_controller_init(HsaApiTable* table); - } // namespace hsa } // namespace rocprofiler diff --git a/source/lib/rocprofiler/hsa/queue_controller.cpp b/source/lib/rocprofiler/hsa/queue_controller.cpp new file mode 100644 index 0000000000..18ea5730a1 --- /dev/null +++ b/source/lib/rocprofiler/hsa/queue_controller.cpp @@ -0,0 +1,185 @@ +// Copyright (c) 2018-2023 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "lib/rocprofiler/hsa/queue_controller.hpp" + +#include + +namespace rocprofiler +{ +namespace hsa +{ +namespace +{ +// HSA Intercept Functions (create_queue/destroy_queue) +hsa_status_t +create_queue(hsa_agent_t agent, + uint32_t size, + hsa_queue_type32_t type, + void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data), + void* data, + uint32_t private_segment_size, + uint32_t group_segment_size, + hsa_queue_t** queue) +{ + for(const auto& [_, agent_info] : get_queue_controller().get_supported_agents()) + { + if(agent_info.get_agent().handle == agent.handle) + { + auto new_queue = std::make_unique(agent_info, + size, + type, + callback, + data, + private_segment_size, + group_segment_size, + get_queue_controller().get_core_table(), + get_queue_controller().get_ext_table(), + queue); + get_queue_controller().add_queue(*queue, std::move(new_queue)); + return HSA_STATUS_SUCCESS; + } + } + LOG(FATAL) << "Could not find agent - " << agent.handle; + return HSA_STATUS_ERROR_FATAL; +} + +hsa_status_t +destroy_queue(hsa_queue_t* hsa_queue) +{ + get_queue_controller().destory_queue(hsa_queue); + return HSA_STATUS_SUCCESS; +} +} // namespace + +void +QueueController::add_queue(hsa_queue_t* id, std::unique_ptr queue) +{ + CHECK(queue); + _callback_cache.wlock([&](auto& callbacks) { + _queues.wlock([&](auto& map) { + const auto agent_id = queue->get_agent().agent_t().id.handle; + map[id] = std::move(queue); + for(const auto& [cbid, cb_tuple] : callbacks) + { + auto& [agent, qcb, ccb] = cb_tuple; + if(agent.id.handle == agent_id) + { + map[id]->register_callback(cbid, qcb, ccb); + } + } + }); + }); +} + +void +QueueController::destory_queue(hsa_queue_t* id) +{ + _queues.wlock([&](auto& map) { map.erase(id); }); +} + +ClientID +QueueController::add_callback(const rocprofiler_agent_t& agent, + Queue::QueueCB qcb, + Queue::CompletedCB ccb) +{ + static std::atomic client_id = 1; + ClientID return_id; + _callback_cache.wlock([&](auto& cb_cache) { + return_id = client_id; + cb_cache[client_id] = std::tuple(agent, qcb, ccb); + client_id++; + _queues.wlock([&](auto& map) { + for(auto& [_, queue] : map) + { + if(queue->get_agent().agent_t().id.handle == agent.id.handle) + { + queue->register_callback(return_id, qcb, ccb); + } + } + }); + }); + return return_id; +} + +void +QueueController::remove_callback(ClientID id) +{ + _callback_cache.wlock([&](auto& cb_cache) { + cb_cache.erase(id); + _queues.wlock([&](auto& map) { + for(auto& [_, queue] : map) + { + queue->remove_callback(id); + } + }); + }); +} + +void +QueueController::init(CoreApiTable& core_table, AmdExtTable& ext_table) +{ + _core_table = core_table; + _ext_table = ext_table; + + core_table.hsa_queue_create_fn = create_queue; + core_table.hsa_queue_destroy_fn = destroy_queue; + + // Generate supported agents + rocprofiler_query_available_agents( + [](const rocprofiler_agent_t** agents, size_t num_agents, void* user_data) { + CHECK(user_data); + QueueController& queue = *reinterpret_cast(user_data); + for(size_t i = 0; i < num_agents; i++) + { + const auto& agent = *agents[i]; + if(agent.type != ROCPROFILER_AGENT_TYPE_GPU) continue; + try + { + queue.get_supported_agents().emplace( + i, AgentCache{agent, i, queue.get_core_table(), queue.get_ext_table()}); + } catch(std::runtime_error& error) + { + LOG(ERROR) << fmt::format("GPU Agent Construction Failed (HSA queue will not " + "be intercepted): {} ({})", + agent.id.handle, + error.what()); + } + } + return ROCPROFILER_STATUS_SUCCESS; + }, + sizeof(rocprofiler_agent_t), + this); +} + +QueueController& +get_queue_controller() +{ + static QueueController controller; + return controller; +} + +void +queue_controller_init(HsaApiTable* table) +{ + get_queue_controller().init(*table->core_, *table->amd_ext_); +} +} // namespace hsa +} // namespace rocprofiler diff --git a/source/lib/rocprofiler/hsa/queue_controller.hpp b/source/lib/rocprofiler/hsa/queue_controller.hpp new file mode 100644 index 0000000000..a8aebb2d26 --- /dev/null +++ b/source/lib/rocprofiler/hsa/queue_controller.hpp @@ -0,0 +1,81 @@ +// Copyright (c) 2018-2023 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include + +#include "lib/rocprofiler/hsa/queue.hpp" + +#include +#include +#include + +namespace rocprofiler +{ +namespace hsa +{ +// Tracks and manages HSA queues +class QueueController +{ +public: + QueueController() = default; + // Initializes the QueueInterceptor. This must be delayed until + // HSA has been inited. + void init(CoreApiTable& core_table, AmdExtTable& ext_table); + + // Called to add a queue that was created by the user program + void add_queue(hsa_queue_t*, std::unique_ptr); + void destory_queue(hsa_queue_t*); + + // Add callback to queues associated with the agent. Returns a client + // id that can be used by callers to remove the callback. + ClientID add_callback(const rocprofiler_agent_t&, Queue::QueueCB, Queue::CompletedCB); + void remove_callback(ClientID); + + const CoreApiTable& get_core_table() const { return _core_table; } + const AmdExtTable& get_ext_table() const { return _ext_table; } + + // Gets the list of supported HSA agents that can be intercepted + const auto& get_supported_agents() const { return _supported_agents; } + auto& get_supported_agents() { return _supported_agents; } + +private: + using agent_callback_tuple_t = + std::tuple; + using queue_map_t = std::unordered_map>; + using client_id_map_t = std::unordered_map; + using agent_cache_map_t = std::unordered_map; + + CoreApiTable _core_table = {}; + AmdExtTable _ext_table = {}; + common::Synchronized _queues = {}; + common::Synchronized _callback_cache = {}; + agent_cache_map_t _supported_agents = {}; +}; + +QueueController& +get_queue_controller(); + +void +queue_controller_init(HsaApiTable* table); + +} // namespace hsa +} // namespace rocprofiler diff --git a/source/lib/rocprofiler/registration.cpp b/source/lib/rocprofiler/registration.cpp index 2e926c55d8..28272d90fc 100644 --- a/source/lib/rocprofiler/registration.cpp +++ b/source/lib/rocprofiler/registration.cpp @@ -24,6 +24,7 @@ #include "lib/rocprofiler/context/context.hpp" #include "lib/rocprofiler/hsa/hsa.hpp" #include "lib/rocprofiler/hsa/queue.hpp" +#include "lib/rocprofiler/hsa/queue_controller.hpp" #include "lib/rocprofiler/internal_threading.hpp" #include