From 8ea62f1cea4a4287c40edf1e6cae90e08fada985 Mon Sep 17 00:00:00 2001 From: Tony Gutierrez Date: Mon, 19 Aug 2024 15:40:30 +0000 Subject: [PATCH] rocr/aie: Add initial support for AIE agents This change adds the initial classes for the AIE agent and AIE AQL queue. An AIE agent list is added to the core runtime object. Change-Id: I84b02f52171b80726dfb2c8431582a3ea2986eb3 --- runtime/hsa-runtime/CMakeLists.txt | 2 + runtime/hsa-runtime/core/inc/agent.h | 7 +- runtime/hsa-runtime/core/inc/amd_aie_agent.h | 106 ++++++++ .../hsa-runtime/core/inc/amd_aie_aql_queue.h | 138 +++++++++++ runtime/hsa-runtime/core/inc/runtime.h | 5 + .../core/runtime/amd_aie_agent.cpp | 189 ++++++++++++++ .../core/runtime/amd_aie_aql_queue.cpp | 232 ++++++++++++++++++ .../hsa-runtime/core/runtime/amd_topology.cpp | 53 ++-- .../hsa-runtime/core/runtime/hsa_ext_amd.cpp | 38 ++- runtime/hsa-runtime/core/runtime/runtime.cpp | 10 +- runtime/hsa-runtime/inc/hsa.h | 28 ++- 11 files changed, 762 insertions(+), 46 deletions(-) create mode 100644 runtime/hsa-runtime/core/inc/amd_aie_agent.h create mode 100644 runtime/hsa-runtime/core/inc/amd_aie_aql_queue.h create mode 100644 runtime/hsa-runtime/core/runtime/amd_aie_agent.cpp create mode 100644 runtime/hsa-runtime/core/runtime/amd_aie_aql_queue.cpp diff --git a/runtime/hsa-runtime/CMakeLists.txt b/runtime/hsa-runtime/CMakeLists.txt index e035c774f7..e6edc72403 100644 --- a/runtime/hsa-runtime/CMakeLists.txt +++ b/runtime/hsa-runtime/CMakeLists.txt @@ -154,6 +154,8 @@ set ( SRCS core/driver/driver.cpp core/util/small_heap.cpp core/util/timer.cpp core/util/flag.cpp + core/runtime/amd_aie_agent.cpp + core/runtime/amd_aie_aql_queue.cpp core/runtime/amd_blit_kernel.cpp core/runtime/amd_blit_sdma.cpp core/runtime/amd_cpu_agent.cpp diff --git a/runtime/hsa-runtime/core/inc/agent.h b/runtime/hsa-runtime/core/inc/agent.h index d0b675aded..f7396a8844 100644 --- a/runtime/hsa-runtime/core/inc/agent.h +++ b/runtime/hsa-runtime/core/inc/agent.h @@ -107,7 +107,12 @@ class Agent : public Checked<0xF6BC25EB17E6F917> { } // Lightweight RTTI for vendor specific implementations. - enum DeviceType { kAmdGpuDevice = 0, kAmdCpuDevice = 1, kUnknownDevice = 2 }; + enum DeviceType { + kAmdGpuDevice = 0, + kAmdCpuDevice = 1, + kAmdAieDevice = 2, + kUnknownDevice = 3 + }; // @brief Agent class contructor. // diff --git a/runtime/hsa-runtime/core/inc/amd_aie_agent.h b/runtime/hsa-runtime/core/inc/amd_aie_agent.h new file mode 100644 index 0000000000..c3add8a761 --- /dev/null +++ b/runtime/hsa-runtime/core/inc/amd_aie_agent.h @@ -0,0 +1,106 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// The University of Illinois/NCSA +// Open Source License (NCSA) +// +// Copyright (c) 2022-2023, Advanced Micro Devices, Inc. All rights reserved. +// +// Developed by: +// +// AMD Research and AMD HSA Software Development +// +// Advanced Micro Devices, Inc. +// +// www.amd.com +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to +// deal with the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimers. +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimers in +// the documentation and/or other materials provided with the distribution. +// - Neither the names of Advanced Micro Devices, Inc, +// nor the names of its contributors may be used to endorse or promote +// products derived from this Software without specific prior written +// permission. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS WITH THE SOFTWARE. +// +//////////////////////////////////////////////////////////////////////////////// + +// AMD specific HSA backend. + +#ifndef HSA_RUNTIME_CORE_INC_AMD_AIE_AGENT_H_ +#define HSA_RUNTIME_CORE_INC_AMD_AIE_AGENT_H_ + +#include "core/inc/agent.h" +#include "core/inc/runtime.h" + +namespace rocr { +namespace AMD { + +class AieAgent : public core::Agent { +public: + /// @brief AIE agent constructor. + /// @param [in] node Node id. + AieAgent(uint32_t node); + + // @brief AIE agent destructor. + ~AieAgent(); + + hsa_status_t VisitRegion(bool include_peer, + hsa_status_t (*callback)(hsa_region_t region, + void *data), + void *data) const; + hsa_status_t IterateRegion(hsa_status_t (*callback)(hsa_region_t region, + void *data), + void *data) const override; + + hsa_status_t IterateCache(hsa_status_t (*callback)(hsa_cache_t cache, + void *data), + void *value) const override; + + hsa_status_t GetInfo(hsa_agent_info_t attribute, void *value) const override; + + hsa_status_t QueueCreate(size_t size, hsa_queue_type32_t queue_type, + core::HsaEventCallback event_callback, void *data, + uint32_t private_segment_size, + uint32_t group_segment_size, + core::Queue **queue) override; + + const core::Isa *isa() const override { return nullptr; } + + const std::vector ®ions() const override { + return regions_; + } + +private: + // @brief Query the driver to get the region list owned by this agent. + void InitRegionList(); + + std::vector regions_; + + const hsa_profile_t profile_ = HSA_PROFILE_BASE; + static const uint32_t maxQueues_ = 8; + static const uint32_t minAqlSize_ = 0x40; + static const uint32_t maxAqlSize_ = 0x40; + uint32_t max_queues_; + uintptr_t device_heap_vaddr_ = 0; +}; + +} // namespace AMD +} // namespace rocr + +#endif // header guard diff --git a/runtime/hsa-runtime/core/inc/amd_aie_aql_queue.h b/runtime/hsa-runtime/core/inc/amd_aie_aql_queue.h new file mode 100644 index 0000000000..7e59112d5e --- /dev/null +++ b/runtime/hsa-runtime/core/inc/amd_aie_aql_queue.h @@ -0,0 +1,138 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// The University of Illinois/NCSA +// Open Source License (NCSA) +// +// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved. +// +// Developed by: +// +// AMD Research and AMD HSA Software Development +// +// Advanced Micro Devices, Inc. +// +// www.amd.com +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to +// deal with the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimers. +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimers in +// the documentation and/or other materials provided with the distribution. +// - Neither the names of Advanced Micro Devices, Inc, +// nor the names of its contributors may be used to endorse or promote +// products derived from this Software without specific prior written +// permission. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS WITH THE SOFTWARE. +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef HSA_RUNTIME_CORE_INC_AMD_HW_AQL_AIE_COMMAND_PROCESSOR_H_ +#define HSA_RUNTIME_CORE_INC_AMD_HW_AQL_AIE_COMMAND_PROCESSOR_H_ + +#include "core/inc/amd_aie_agent.h" +#include "core/inc/queue.h" +#include "core/inc/runtime.h" +#include "core/inc/signal.h" +#include "core/util/locks.h" + +namespace rocr { +namespace AMD { + +/// @brief Encapsulates HW AIE AQL Command Processor functionality. It +/// provides the interface for things such as doorbells, queue read and +/// write pointers, and a buffer. +class AieAqlQueue : public core::Queue, public core::DoorbellSignal { +public: + static __forceinline bool IsType(core::Signal *signal) { + return signal->IsType(&rtti_id_); + } + + static __forceinline bool IsType(core::Queue *queue) { + return queue->IsType(&rtti_id_); + } + + AieAqlQueue() = delete; + AieAqlQueue(AieAgent *agent, size_t req_size_pkts, uint32_t node_id); + ~AieAqlQueue(); + + hsa_status_t Inactivate() override; + hsa_status_t SetPriority(HSA_QUEUE_PRIORITY priority) override; + void Destroy() override; + uint64_t LoadReadIndexRelaxed() override; + uint64_t LoadReadIndexAcquire() override; + uint64_t LoadWriteIndexRelaxed() override; + uint64_t LoadWriteIndexAcquire() override; + void StoreReadIndexRelaxed(uint64_t value) override { assert(false); } + void StoreReadIndexRelease(uint64_t value) override { assert(false); } + void StoreWriteIndexRelaxed(uint64_t value) override; + void StoreWriteIndexRelease(uint64_t value) override; + uint64_t CasWriteIndexRelaxed(uint64_t expected, uint64_t value) override; + uint64_t CasWriteIndexAcquire(uint64_t expected, uint64_t value) override; + uint64_t CasWriteIndexRelease(uint64_t expected, uint64_t value) override; + uint64_t CasWriteIndexAcqRel(uint64_t expected, uint64_t value) override; + uint64_t AddWriteIndexRelaxed(uint64_t value) override; + uint64_t AddWriteIndexAcquire(uint64_t value) override; + uint64_t AddWriteIndexRelease(uint64_t value) override; + uint64_t AddWriteIndexAcqRel(uint64_t value) override; + void StoreRelaxed(hsa_signal_value_t value) override; + void StoreRelease(hsa_signal_value_t value) override; + + /// @brief Provide information about the queue. + hsa_status_t GetInfo(hsa_queue_info_attribute_t attribute, + void *value) override; + + // GPU-specific queue functions are unsupported. + hsa_status_t GetCUMasking(uint32_t num_cu_mask_count, + uint32_t *cu_mask) override; + hsa_status_t SetCUMasking(uint32_t num_cu_mask_count, + const uint32_t *cu_mask) override; + void ExecutePM4(uint32_t *cmd_data, size_t cmd_size_b, + hsa_fence_scope_t acquireFence = HSA_FENCE_SCOPE_NONE, + hsa_fence_scope_t releaseFence = HSA_FENCE_SCOPE_NONE, + hsa_signal_t *signal = NULL) override; + + core::SharedQueue *shared_queue_; + core::SharedSignal *shared_signal_; + /// ID of the queue used in communication with the AMD AIR driver. + uint32_t queue_id_; + /// ID of the doorbell used in communication with the AMD AIR driver. + uint32_t doorbell_id_; + /// Pointer to the hardware doorbell for this queue. + uint64_t *hardware_doorbell_ptr_; + /// ID of AIE device on which this queue has been mapped. + uint32_t node_id_; + /// Queue size in bytes. + uint32_t queue_size_bytes_; + +protected: + bool _IsA(Queue::rtti_t id) const override { return id == &rtti_id_; } + +private: + core::SharedQueue *CreateSharedQueue(AieAgent *agent, size_t req_size_pkts, + uint32_t node_id); + core::SharedSignal *CreateSharedSignal(AieAgent *agent); + + AieAgent *agent_; + /// Indicates if queue is active. + std::atomic active_; + static int rtti_id_; +}; + +} // namespace AMD +} // namespace rocr + +#endif // header guard diff --git a/runtime/hsa-runtime/core/inc/runtime.h b/runtime/hsa-runtime/core/inc/runtime.h index c1bc7cdb29..e4fb8f4dee 100644 --- a/runtime/hsa-runtime/core/inc/runtime.h +++ b/runtime/hsa-runtime/core/inc/runtime.h @@ -400,6 +400,8 @@ class Runtime { const std::vector& gpu_agents() { return gpu_agents_; } + const std::vector &aie_agents() { return aie_agents_; } + const std::vector& disabled_gpu_agents() { return disabled_gpu_agents_; } const std::vector& gpu_ids() { return gpu_ids_; } @@ -624,6 +626,9 @@ class Runtime { // Agent list containing all compatible GPU agents in the platform. std::vector gpu_agents_; + // Agent list containing all compatible AIE agents in the platform. + std::vector aie_agents_; + // Agent list containing incompletely initialized GPU agents not to be used by the process. std::vector disabled_gpu_agents_; diff --git a/runtime/hsa-runtime/core/runtime/amd_aie_agent.cpp b/runtime/hsa-runtime/core/runtime/amd_aie_agent.cpp new file mode 100644 index 0000000000..6e92211a7e --- /dev/null +++ b/runtime/hsa-runtime/core/runtime/amd_aie_agent.cpp @@ -0,0 +1,189 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// The University of Illinois/NCSA +// Open Source License (NCSA) +// +// Copyright (c) 2022-2023, Advanced Micro Devices, Inc. All rights reserved. +// +// Developed by: +// +// AMD Research and AMD HSA Software Development +// +// Advanced Micro Devices, Inc. +// +// www.amd.com +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to +// deal with the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimers. +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimers in +// the documentation and/or other materials provided with the distribution. +// - Neither the names of Advanced Micro Devices, Inc, +// nor the names of its contributors may be used to endorse or promote +// products derived from this Software without specific prior written +// permission. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS WITH THE SOFTWARE. +// +//////////////////////////////////////////////////////////////////////////////// + +#include "core/inc/amd_aie_agent.h" + +#include "core/inc/amd_aie_aql_queue.h" + +namespace rocr { +namespace AMD { + +AieAgent::AieAgent(uint32_t node) + : core::Agent(node, core::Agent::DeviceType::kAmdAieDevice), + max_queues_(core::Runtime::runtime_singleton_->flag().max_queues()) { + InitRegionList(); +} + +AieAgent::~AieAgent() { + std::for_each(regions_.begin(), regions_.end(), DeleteObject()); + regions_.clear(); +} + +hsa_status_t AieAgent::VisitRegion(bool include_peer, + hsa_status_t (*callback)(hsa_region_t region, + void *data), + void *data) const { + AMD::callback_t call(callback); + for (const auto r : regions_) { + hsa_region_t region_handle(core::MemoryRegion::Convert(r)); + call(region_handle, data); + } + return HSA_STATUS_SUCCESS; +} + +hsa_status_t AieAgent::IterateRegion( + hsa_status_t (*callback)(hsa_region_t region, void *data), + void *data) const { + return VisitRegion(false, callback, data); +} + +hsa_status_t AieAgent::IterateCache(hsa_status_t (*callback)(hsa_cache_t cache, + void *data), + void *data) const { + return HSA_STATUS_SUCCESS; +} + +hsa_status_t AieAgent::GetInfo(hsa_agent_info_t attribute, void *value) const { + const size_t attribute_ = static_cast(attribute); + + switch (attribute_) { + case HSA_AGENT_INFO_NAME: { + std::string name_info_("aie2"); + std::strcpy(reinterpret_cast(value), name_info_.c_str()); + break; + } + case HSA_AGENT_INFO_VENDOR_NAME: { + std::string vendor_name_info_("AMD"); + std::strcpy(reinterpret_cast(value), vendor_name_info_.c_str()); + break; + } + case HSA_AGENT_INFO_FEATURE: + *((hsa_agent_feature_t *)value) = HSA_AGENT_FEATURE_AGENT_DISPATCH; + break; + case HSA_AGENT_INFO_MACHINE_MODEL: + *reinterpret_cast(value) = HSA_MACHINE_MODEL_LARGE; + break; + case HSA_AGENT_INFO_PROFILE: + *reinterpret_cast(value) = profile_; + break; + case HSA_AGENT_INFO_WAVEFRONT_SIZE: + case HSA_AGENT_INFO_WORKGROUP_MAX_DIM: + case HSA_AGENT_INFO_WORKGROUP_MAX_SIZE: + case HSA_AGENT_INFO_GRID_MAX_DIM: + case HSA_AGENT_INFO_GRID_MAX_SIZE: + case HSA_AGENT_INFO_FBARRIER_MAX_SIZE: + *reinterpret_cast(value) = 0; + break; + case HSA_AGENT_INFO_QUEUES_MAX: + *reinterpret_cast(value) = maxQueues_; + break; + case HSA_AGENT_INFO_QUEUE_MIN_SIZE: + *reinterpret_cast(value) = minAqlSize_; + break; + case HSA_AGENT_INFO_QUEUE_MAX_SIZE: + *reinterpret_cast(value) = maxAqlSize_; + break; + case HSA_AGENT_INFO_QUEUE_TYPE: + *reinterpret_cast(value) = HSA_QUEUE_TYPE_SINGLE; + break; + case HSA_AGENT_INFO_NODE: + *reinterpret_cast(value) = node_id(); + break; + case HSA_AGENT_INFO_DEVICE: + *reinterpret_cast(value) = HSA_DEVICE_TYPE_AIE; + break; + case HSA_AGENT_INFO_CACHE_SIZE: + *reinterpret_cast(value) = 0; + break; + case HSA_AGENT_INFO_VERSION_MAJOR: + *reinterpret_cast(value) = 1; + break; + case HSA_AGENT_INFO_VERSION_MINOR: + *reinterpret_cast(value) = 0; + break; + case HSA_EXT_AGENT_INFO_IMAGE_1D_MAX_ELEMENTS: + case HSA_EXT_AGENT_INFO_IMAGE_1DA_MAX_ELEMENTS: + case HSA_EXT_AGENT_INFO_IMAGE_1DB_MAX_ELEMENTS: + case HSA_EXT_AGENT_INFO_IMAGE_2D_MAX_ELEMENTS: + case HSA_EXT_AGENT_INFO_IMAGE_2DA_MAX_ELEMENTS: + case HSA_EXT_AGENT_INFO_IMAGE_2DDEPTH_MAX_ELEMENTS: + case HSA_EXT_AGENT_INFO_IMAGE_2DADEPTH_MAX_ELEMENTS: + case HSA_EXT_AGENT_INFO_IMAGE_3D_MAX_ELEMENTS: + case HSA_EXT_AGENT_INFO_IMAGE_ARRAY_MAX_LAYERS: + *reinterpret_cast(value) = 0; + break; + case HSA_AMD_AGENT_INFO_PRODUCT_NAME: { + std::string product_name_info_("AIE-ML"); + std::strcpy(reinterpret_cast(value), product_name_info_.c_str()); + break; + } + default: + *reinterpret_cast(value) = 0; + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t AieAgent::QueueCreate(size_t size, hsa_queue_type32_t queue_type, + core::HsaEventCallback event_callback, + void *data, uint32_t private_segment_size, + uint32_t group_segment_size, + core::Queue **queue) { + if (!IsPowerOfTwo(size)) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + + if (size < minAqlSize_ || size > maxAqlSize_) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + + auto aql_queue(new AieAqlQueue(this, size, node_id())); + *queue = aql_queue; + + return HSA_STATUS_SUCCESS; +} + +void AieAgent::InitRegionList() {} + +} // namespace AMD +} // namespace rocr diff --git a/runtime/hsa-runtime/core/runtime/amd_aie_aql_queue.cpp b/runtime/hsa-runtime/core/runtime/amd_aie_aql_queue.cpp new file mode 100644 index 0000000000..166415d072 --- /dev/null +++ b/runtime/hsa-runtime/core/runtime/amd_aie_aql_queue.cpp @@ -0,0 +1,232 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// The University of Illinois/NCSA +// Open Source License (NCSA) +// +// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved. +// +// Developed by: +// +// AMD Research and AMD HSA Software Development +// +// Advanced Micro Devices, Inc. +// +// www.amd.com +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to +// deal with the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimers. +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimers in +// the documentation and/or other materials provided with the distribution. +// - Neither the names of Advanced Micro Devices, Inc, +// nor the names of its contributors may be used to endorse or promote +// products derived from this Software without specific prior written +// permission. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS WITH THE SOFTWARE. +// +//////////////////////////////////////////////////////////////////////////////// + +#include "core/inc/amd_aie_aql_queue.h" + +#ifdef __linux__ +#include +#include +#include +#include +#include +#endif + +#ifdef _WIN32 +#include +#endif + +#include +#include +#include + +#include "core/inc/queue.h" +#include "core/inc/runtime.h" +#include "core/inc/signal.h" +#include "core/util/utils.h" + +namespace rocr { +namespace AMD { + +int AieAqlQueue::rtti_id_ = 0; + +AieAqlQueue::AieAqlQueue(AieAgent *agent, size_t req_size_pkts, + uint32_t node_id) + : Queue(0, 0), DoorbellSignal(CreateSharedSignal(agent)), agent_(agent), + active_(false) { + amd_queue_.hsa_queue.doorbell_signal = Signal::Convert(this); + amd_queue_.hsa_queue.size = 0x40; + + signal_.hardware_doorbell_ptr = + reinterpret_cast(hardware_doorbell_ptr_); + signal_.kind = AMD_SIGNAL_KIND_DOORBELL; + signal_.queue_ptr = &amd_queue_; + active_ = true; +} + +AieAqlQueue::~AieAqlQueue() { Inactivate(); } + +hsa_status_t AieAqlQueue::Inactivate() { + bool active(active_.exchange(false, std::memory_order_relaxed)); + return HSA_STATUS_SUCCESS; +} + +hsa_status_t AieAqlQueue::SetPriority(HSA_QUEUE_PRIORITY priority) { + return HSA_STATUS_SUCCESS; +} + +void AieAqlQueue::Destroy() { delete this; } + +// Atomic Reads/Writes +uint64_t AieAqlQueue::LoadReadIndexRelaxed() { + return atomic::Load(&amd_queue_.read_dispatch_id, std::memory_order_relaxed); +} + +uint64_t AieAqlQueue::LoadReadIndexAcquire() { + return atomic::Load(&amd_queue_.read_dispatch_id, std::memory_order_acquire); +} + +uint64_t AieAqlQueue::LoadWriteIndexRelaxed() { + return atomic::Load(&amd_queue_.write_dispatch_id, std::memory_order_relaxed); +} + +uint64_t AieAqlQueue::LoadWriteIndexAcquire() { + return atomic::Load(&amd_queue_.write_dispatch_id, std::memory_order_acquire); +} + +void AieAqlQueue::StoreWriteIndexRelaxed(uint64_t value) { + atomic::Store(&amd_queue_.write_dispatch_id, value, + std::memory_order_relaxed); +} + +void AieAqlQueue::StoreWriteIndexRelease(uint64_t value) { + atomic::Store(&amd_queue_.write_dispatch_id, value, + std::memory_order_release); +} + +uint64_t AieAqlQueue::CasWriteIndexRelaxed(uint64_t expected, uint64_t value) { + return atomic::Cas(&amd_queue_.write_dispatch_id, value, expected, + std::memory_order_relaxed); +} + +uint64_t AieAqlQueue::CasWriteIndexAcquire(uint64_t expected, uint64_t value) { + return atomic::Cas(&amd_queue_.write_dispatch_id, value, expected, + std::memory_order_acquire); +} + +uint64_t AieAqlQueue::CasWriteIndexRelease(uint64_t expected, uint64_t value) { + return atomic::Cas(&amd_queue_.write_dispatch_id, value, expected, + std::memory_order_release); +} + +uint64_t AieAqlQueue::CasWriteIndexAcqRel(uint64_t expected, uint64_t value) { + return atomic::Cas(&amd_queue_.write_dispatch_id, value, expected, + std::memory_order_acq_rel); +} + +uint64_t AieAqlQueue::AddWriteIndexRelaxed(uint64_t value) { + return atomic::Add(&amd_queue_.write_dispatch_id, value, + std::memory_order_relaxed); +} + +uint64_t AieAqlQueue::AddWriteIndexAcquire(uint64_t value) { + return atomic::Add(&amd_queue_.write_dispatch_id, value, + std::memory_order_acquire); +} + +uint64_t AieAqlQueue::AddWriteIndexRelease(uint64_t value) { + return atomic::Add(&amd_queue_.write_dispatch_id, value, + std::memory_order_release); +} + +uint64_t AieAqlQueue::AddWriteIndexAcqRel(uint64_t value) { + return atomic::Add(&amd_queue_.write_dispatch_id, value, + std::memory_order_acq_rel); +} + +void AieAqlQueue::StoreRelaxed(hsa_signal_value_t value) { + atomic::Store(signal_.hardware_doorbell_ptr, uint64_t(value), + std::memory_order_release); +} + +void AieAqlQueue::StoreRelease(hsa_signal_value_t value) { + std::atomic_thread_fence(std::memory_order_release); + StoreRelaxed(value); +} + +hsa_status_t AieAqlQueue::GetInfo(hsa_queue_info_attribute_t attribute, + void *value) { + switch (attribute) { + case HSA_AMD_QUEUE_INFO_AGENT: + *(reinterpret_cast(value)) = agent_->public_handle(); + break; + case HSA_AMD_QUEUE_INFO_DOORBELL_ID: + // Hardware doorbell supports AQL semantics. + *(reinterpret_cast(value)) = + reinterpret_cast(signal_.hardware_doorbell_ptr); + break; + default: + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + return HSA_STATUS_SUCCESS; +} + +core::SharedQueue *AieAqlQueue::CreateSharedQueue(AieAgent *agent, + size_t req_size_pkts, + uint32_t node_id) { + queue_size_bytes_ = req_size_pkts * sizeof(core::AqlPacket); + + if (!IsPowerOfTwo(queue_size_bytes_)) { + throw AMD::hsa_exception( + HSA_STATUS_ERROR_INVALID_QUEUE_CREATION, + "Requested queue with non-power of two packet capacity.\n"); + } + + node_id_ = node_id; + + return nullptr; +} + +core::SharedSignal *AieAqlQueue::CreateSharedSignal(AieAgent *agent) { + return nullptr; +} + +hsa_status_t AieAqlQueue::GetCUMasking(uint32_t num_cu_mask_count, + uint32_t *cu_mask) { + assert(false && "AIE AQL queue does not support CU masking."); + return HSA_STATUS_ERROR; +} + +hsa_status_t AieAqlQueue::SetCUMasking(uint32_t num_cu_mask_count, + const uint32_t *cu_mask) { + assert(false && "AIE AQL queue does not support CU masking."); + return HSA_STATUS_ERROR; +} + +void AieAqlQueue::ExecutePM4(uint32_t *cmd_data, size_t cmd_size_b, + hsa_fence_scope_t acquireFence, + hsa_fence_scope_t releaseFence, + hsa_signal_t *signal) { + assert(false && "AIE AQL queue does not support PM4 packets."); +} + +} // namespace AMD +} // namespace rocr diff --git a/runtime/hsa-runtime/core/runtime/amd_topology.cpp b/runtime/hsa-runtime/core/runtime/amd_topology.cpp index dfe15a936e..ee3e461a8e 100644 --- a/runtime/hsa-runtime/core/runtime/amd_topology.cpp +++ b/runtime/hsa-runtime/core/runtime/amd_topology.cpp @@ -57,10 +57,11 @@ #include "hsakmt/hsakmt.h" -#include "core/inc/runtime.h" +#include "core/inc/amd_aie_agent.h" #include "core/inc/amd_cpu_agent.h" #include "core/inc/amd_gpu_agent.h" #include "core/inc/amd_memory_region.h" +#include "core/inc/runtime.h" #include "core/util/utils.h" extern r_debug _amdgpu_r_debug; @@ -170,6 +171,12 @@ GpuAgent* DiscoverGpu(HSAuint32 node_id, HsaNodeProperties& node_prop, bool xnac return gpu; } +AieAgent *DiscoverAie() { + AieAgent *aie = new AieAgent(0); + core::Runtime::runtime_singleton_->RegisterAgent(aie, true); + return aie; +} + void RegisterLinkInfo(uint32_t node_id, uint32_t num_link) { // Register connectivity links for this agent to the runtime. if (num_link == 0) { @@ -412,31 +419,41 @@ void BuildTopology() { } bool Load() { + bool gpu_found = true; + bool aie_found = false; + // Open connection to kernel driver. if (hsaKmtOpenKFD() != HSAKMT_STATUS_SUCCESS) { + gpu_found = false; + } + + if (!(gpu_found || aie_found)) { return false; } - MAKE_NAMED_SCOPE_GUARD(kfd, [&]() { hsaKmtCloseKFD(); }); - // Build topology table. - BuildTopology(); + if (gpu_found) { + MAKE_NAMED_SCOPE_GUARD(kfd, [&]() { hsaKmtCloseKFD(); }); - // Register runtime and optionally enable the debugger - // BuildTopology calls hsaKmtAcquireSystemProperties() causes libhsakmt to cache topology - // information. So we need to call hsaKmtRuntimeEnable() after calling BuildTopology() so that - // Thunk can re-use it's cached copy instead of re-parsing whole system topology. Otherwise - // BuildTopology will cause libhsakmt to destroyed cached copy because it calls - // hsaKmtReleaseSystemProperties() at the beginning. + // Build topology table. + BuildTopology(); - HSAKMT_STATUS err = - hsaKmtRuntimeEnable(&_amdgpu_r_debug, core::Runtime::runtime_singleton_->flag().debug()); - if ((err != HSAKMT_STATUS_SUCCESS) && (err != HSAKMT_STATUS_NOT_SUPPORTED)) return false; - HSAuint32 caps_mask; - hsaKmtGetRuntimeCapabilities(&caps_mask); - core::Runtime::runtime_singleton_->KfdVersion(err != HSAKMT_STATUS_NOT_SUPPORTED, - !!(caps_mask & HSA_RUNTIME_ENABLE_CAPS_SUPPORTS_CORE_DUMP_MASK)); + HSAKMT_STATUS err = hsaKmtRuntimeEnable( + &_amdgpu_r_debug, core::Runtime::runtime_singleton_->flag().debug()); + if ((err != HSAKMT_STATUS_SUCCESS) && (err != HSAKMT_STATUS_NOT_SUPPORTED)) + return false; + HSAuint32 caps_mask; + hsaKmtGetRuntimeCapabilities(&caps_mask); + core::Runtime::runtime_singleton_->KfdVersion( + err != HSAKMT_STATUS_NOT_SUPPORTED, + !!(caps_mask & HSA_RUNTIME_ENABLE_CAPS_SUPPORTS_CORE_DUMP_MASK)); + + kfd.Dismiss(); + } + + if (aie_found) { + DiscoverAie(); + } - kfd.Dismiss(); return true; } diff --git a/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp b/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp index b48c9e4567..cdc0460959 100644 --- a/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp +++ b/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp @@ -49,17 +49,18 @@ #include #include -#include "core/inc/runtime.h" #include "core/inc/agent.h" +#include "core/inc/amd_aie_agent.h" #include "core/inc/amd_cpu_agent.h" #include "core/inc/amd_gpu_agent.h" #include "core/inc/amd_memory_region.h" -#include "core/inc/signal.h" #include "core/inc/default_signal.h" +#include "core/inc/exceptions.h" +#include "core/inc/intercept_queue.h" #include "core/inc/interrupt_signal.h" #include "core/inc/ipc_signal.h" -#include "core/inc/intercept_queue.h" -#include "core/inc/exceptions.h" +#include "core/inc/runtime.h" +#include "core/inc/signal.h" namespace rocr { @@ -741,18 +742,29 @@ hsa_status_t hsa_amd_agent_iterate_memory_pools( const core::Agent* agent = core::Agent::Convert(agent_handle); IS_VALID(agent); - if (agent->device_type() == core::Agent::kAmdCpuDevice) { - return reinterpret_cast(agent)->VisitRegion( - false, reinterpret_cast(callback), + switch (agent->device_type()) { + case core::Agent::kAmdCpuDevice: + return reinterpret_cast(agent)->VisitRegion( + false, + reinterpret_cast(callback), data); + case core::Agent::kAmdAieDevice: + return reinterpret_cast(agent)->VisitRegion( + false, + reinterpret_cast(callback), + data); + case core::Agent::kAmdGpuDevice: + return reinterpret_cast(agent)->VisitRegion( + false, + reinterpret_cast(callback), + data); + default: + return HSA_STATUS_ERROR_INVALID_AGENT; } - return reinterpret_cast(agent)->VisitRegion( - false, - reinterpret_cast( - callback), - data); CATCH; } diff --git a/runtime/hsa-runtime/core/runtime/runtime.cpp b/runtime/hsa-runtime/core/runtime/runtime.cpp index 95dc6ed5d8..fef56b2197 100644 --- a/runtime/hsa-runtime/core/runtime/runtime.cpp +++ b/runtime/hsa-runtime/core/runtime/runtime.cpp @@ -182,7 +182,7 @@ void Runtime::RegisterAgent(Agent* agent, bool Enabled) { // Record the agent in the node-to-agent reverse lookup table. agents_by_node_[agent->node_id()].push_back(agent); - // Process agent as a cpu or gpu device. + // Process agent as a CPU, GPU, or AIE device. if (agent->device_type() == Agent::DeviceType::kAmdCpuDevice) { cpu_agents_.push_back(agent); @@ -239,6 +239,8 @@ void Runtime::RegisterAgent(Agent* agent, bool Enabled) { } else { disabled_gpu_agents_.push_back(agent); } + } else if (agent->device_type() == Agent::DeviceType::kAmdAieDevice) { + aie_agents_.push_back(agent); } } @@ -256,6 +258,9 @@ void Runtime::DestroyAgents() { std::for_each(cpu_agents_.begin(), cpu_agents_.end(), DeleteObject()); cpu_agents_.clear(); + std::for_each(aie_agents_.begin(), aie_agents_.end(), DeleteObject()); + aie_agents_.clear(); + region_gpu_ = NULL; system_regions_fine_.clear(); @@ -296,7 +301,8 @@ hsa_status_t Runtime::IterateAgent(hsa_status_t (*callback)(hsa_agent_t agent, void* data) { AMD::callback_t call(callback); - std::vector* agent_lists[2] = {&cpu_agents_, &gpu_agents_}; + std::vector *agent_lists[3] = {&cpu_agents_, &gpu_agents_, + &aie_agents_}; for (std::vector* agent_list : agent_lists) { for (size_t i = 0; i < agent_list->size(); ++i) { hsa_agent_t agent = Agent::Convert(agent_list->at(i)); diff --git a/runtime/hsa-runtime/inc/hsa.h b/runtime/hsa-runtime/inc/hsa.h index f44327d7a3..73130e3ddb 100644 --- a/runtime/hsa-runtime/inc/hsa.h +++ b/runtime/hsa-runtime/inc/hsa.h @@ -802,18 +802,22 @@ typedef enum { * @brief Hardware device type. */ typedef enum { - /** - * CPU device. - */ - HSA_DEVICE_TYPE_CPU = 0, - /** - * GPU device. - */ - HSA_DEVICE_TYPE_GPU = 1, - /** - * DSP device. - */ - HSA_DEVICE_TYPE_DSP = 2 + /** + * CPU device. + */ + HSA_DEVICE_TYPE_CPU = 0, + /** + * GPU device. + */ + HSA_DEVICE_TYPE_GPU = 1, + /** + * DSP device. + */ + HSA_DEVICE_TYPE_DSP = 2, + /** + * AI Engine (AIE) device. + */ + HSA_DEVICE_TYPE_AIE = 3 } hsa_device_type_t; /**