diff --git a/runtime/hsa-runtime/CMakeLists.txt b/runtime/hsa-runtime/CMakeLists.txt index e035c774f7..e6edc72403 100644 --- a/runtime/hsa-runtime/CMakeLists.txt +++ b/runtime/hsa-runtime/CMakeLists.txt @@ -154,6 +154,8 @@ set ( SRCS core/driver/driver.cpp core/util/small_heap.cpp core/util/timer.cpp core/util/flag.cpp + core/runtime/amd_aie_agent.cpp + core/runtime/amd_aie_aql_queue.cpp core/runtime/amd_blit_kernel.cpp core/runtime/amd_blit_sdma.cpp core/runtime/amd_cpu_agent.cpp diff --git a/runtime/hsa-runtime/core/inc/agent.h b/runtime/hsa-runtime/core/inc/agent.h index d0b675aded..f7396a8844 100644 --- a/runtime/hsa-runtime/core/inc/agent.h +++ b/runtime/hsa-runtime/core/inc/agent.h @@ -107,7 +107,12 @@ class Agent : public Checked<0xF6BC25EB17E6F917> { } // Lightweight RTTI for vendor specific implementations. - enum DeviceType { kAmdGpuDevice = 0, kAmdCpuDevice = 1, kUnknownDevice = 2 }; + enum DeviceType { + kAmdGpuDevice = 0, + kAmdCpuDevice = 1, + kAmdAieDevice = 2, + kUnknownDevice = 3 + }; // @brief Agent class contructor. // diff --git a/runtime/hsa-runtime/core/inc/amd_aie_agent.h b/runtime/hsa-runtime/core/inc/amd_aie_agent.h new file mode 100644 index 0000000000..c3add8a761 --- /dev/null +++ b/runtime/hsa-runtime/core/inc/amd_aie_agent.h @@ -0,0 +1,106 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// The University of Illinois/NCSA +// Open Source License (NCSA) +// +// Copyright (c) 2022-2023, Advanced Micro Devices, Inc. All rights reserved. +// +// Developed by: +// +// AMD Research and AMD HSA Software Development +// +// Advanced Micro Devices, Inc. +// +// www.amd.com +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to +// deal with the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimers. +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimers in +// the documentation and/or other materials provided with the distribution. +// - Neither the names of Advanced Micro Devices, Inc, +// nor the names of its contributors may be used to endorse or promote +// products derived from this Software without specific prior written +// permission. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS WITH THE SOFTWARE. +// +//////////////////////////////////////////////////////////////////////////////// + +// AMD specific HSA backend. + +#ifndef HSA_RUNTIME_CORE_INC_AMD_AIE_AGENT_H_ +#define HSA_RUNTIME_CORE_INC_AMD_AIE_AGENT_H_ + +#include "core/inc/agent.h" +#include "core/inc/runtime.h" + +namespace rocr { +namespace AMD { + +class AieAgent : public core::Agent { +public: + /// @brief AIE agent constructor. + /// @param [in] node Node id. + AieAgent(uint32_t node); + + // @brief AIE agent destructor. + ~AieAgent(); + + hsa_status_t VisitRegion(bool include_peer, + hsa_status_t (*callback)(hsa_region_t region, + void *data), + void *data) const; + hsa_status_t IterateRegion(hsa_status_t (*callback)(hsa_region_t region, + void *data), + void *data) const override; + + hsa_status_t IterateCache(hsa_status_t (*callback)(hsa_cache_t cache, + void *data), + void *value) const override; + + hsa_status_t GetInfo(hsa_agent_info_t attribute, void *value) const override; + + hsa_status_t QueueCreate(size_t size, hsa_queue_type32_t queue_type, + core::HsaEventCallback event_callback, void *data, + uint32_t private_segment_size, + uint32_t group_segment_size, + core::Queue **queue) override; + + const core::Isa *isa() const override { return nullptr; } + + const std::vector ®ions() const override { + return regions_; + } + +private: + // @brief Query the driver to get the region list owned by this agent. + void InitRegionList(); + + std::vector regions_; + + const hsa_profile_t profile_ = HSA_PROFILE_BASE; + static const uint32_t maxQueues_ = 8; + static const uint32_t minAqlSize_ = 0x40; + static const uint32_t maxAqlSize_ = 0x40; + uint32_t max_queues_; + uintptr_t device_heap_vaddr_ = 0; +}; + +} // namespace AMD +} // namespace rocr + +#endif // header guard diff --git a/runtime/hsa-runtime/core/inc/amd_aie_aql_queue.h b/runtime/hsa-runtime/core/inc/amd_aie_aql_queue.h new file mode 100644 index 0000000000..7e59112d5e --- /dev/null +++ b/runtime/hsa-runtime/core/inc/amd_aie_aql_queue.h @@ -0,0 +1,138 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// The University of Illinois/NCSA +// Open Source License (NCSA) +// +// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved. +// +// Developed by: +// +// AMD Research and AMD HSA Software Development +// +// Advanced Micro Devices, Inc. +// +// www.amd.com +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to +// deal with the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimers. +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimers in +// the documentation and/or other materials provided with the distribution. +// - Neither the names of Advanced Micro Devices, Inc, +// nor the names of its contributors may be used to endorse or promote +// products derived from this Software without specific prior written +// permission. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS WITH THE SOFTWARE. +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef HSA_RUNTIME_CORE_INC_AMD_HW_AQL_AIE_COMMAND_PROCESSOR_H_ +#define HSA_RUNTIME_CORE_INC_AMD_HW_AQL_AIE_COMMAND_PROCESSOR_H_ + +#include "core/inc/amd_aie_agent.h" +#include "core/inc/queue.h" +#include "core/inc/runtime.h" +#include "core/inc/signal.h" +#include "core/util/locks.h" + +namespace rocr { +namespace AMD { + +/// @brief Encapsulates HW AIE AQL Command Processor functionality. It +/// provides the interface for things such as doorbells, queue read and +/// write pointers, and a buffer. +class AieAqlQueue : public core::Queue, public core::DoorbellSignal { +public: + static __forceinline bool IsType(core::Signal *signal) { + return signal->IsType(&rtti_id_); + } + + static __forceinline bool IsType(core::Queue *queue) { + return queue->IsType(&rtti_id_); + } + + AieAqlQueue() = delete; + AieAqlQueue(AieAgent *agent, size_t req_size_pkts, uint32_t node_id); + ~AieAqlQueue(); + + hsa_status_t Inactivate() override; + hsa_status_t SetPriority(HSA_QUEUE_PRIORITY priority) override; + void Destroy() override; + uint64_t LoadReadIndexRelaxed() override; + uint64_t LoadReadIndexAcquire() override; + uint64_t LoadWriteIndexRelaxed() override; + uint64_t LoadWriteIndexAcquire() override; + void StoreReadIndexRelaxed(uint64_t value) override { assert(false); } + void StoreReadIndexRelease(uint64_t value) override { assert(false); } + void StoreWriteIndexRelaxed(uint64_t value) override; + void StoreWriteIndexRelease(uint64_t value) override; + uint64_t CasWriteIndexRelaxed(uint64_t expected, uint64_t value) override; + uint64_t CasWriteIndexAcquire(uint64_t expected, uint64_t value) override; + uint64_t CasWriteIndexRelease(uint64_t expected, uint64_t value) override; + uint64_t CasWriteIndexAcqRel(uint64_t expected, uint64_t value) override; + uint64_t AddWriteIndexRelaxed(uint64_t value) override; + uint64_t AddWriteIndexAcquire(uint64_t value) override; + uint64_t AddWriteIndexRelease(uint64_t value) override; + uint64_t AddWriteIndexAcqRel(uint64_t value) override; + void StoreRelaxed(hsa_signal_value_t value) override; + void StoreRelease(hsa_signal_value_t value) override; + + /// @brief Provide information about the queue. + hsa_status_t GetInfo(hsa_queue_info_attribute_t attribute, + void *value) override; + + // GPU-specific queue functions are unsupported. + hsa_status_t GetCUMasking(uint32_t num_cu_mask_count, + uint32_t *cu_mask) override; + hsa_status_t SetCUMasking(uint32_t num_cu_mask_count, + const uint32_t *cu_mask) override; + void ExecutePM4(uint32_t *cmd_data, size_t cmd_size_b, + hsa_fence_scope_t acquireFence = HSA_FENCE_SCOPE_NONE, + hsa_fence_scope_t releaseFence = HSA_FENCE_SCOPE_NONE, + hsa_signal_t *signal = NULL) override; + + core::SharedQueue *shared_queue_; + core::SharedSignal *shared_signal_; + /// ID of the queue used in communication with the AMD AIR driver. + uint32_t queue_id_; + /// ID of the doorbell used in communication with the AMD AIR driver. + uint32_t doorbell_id_; + /// Pointer to the hardware doorbell for this queue. + uint64_t *hardware_doorbell_ptr_; + /// ID of AIE device on which this queue has been mapped. + uint32_t node_id_; + /// Queue size in bytes. + uint32_t queue_size_bytes_; + +protected: + bool _IsA(Queue::rtti_t id) const override { return id == &rtti_id_; } + +private: + core::SharedQueue *CreateSharedQueue(AieAgent *agent, size_t req_size_pkts, + uint32_t node_id); + core::SharedSignal *CreateSharedSignal(AieAgent *agent); + + AieAgent *agent_; + /// Indicates if queue is active. + std::atomic active_; + static int rtti_id_; +}; + +} // namespace AMD +} // namespace rocr + +#endif // header guard diff --git a/runtime/hsa-runtime/core/inc/runtime.h b/runtime/hsa-runtime/core/inc/runtime.h index c1bc7cdb29..e4fb8f4dee 100644 --- a/runtime/hsa-runtime/core/inc/runtime.h +++ b/runtime/hsa-runtime/core/inc/runtime.h @@ -400,6 +400,8 @@ class Runtime { const std::vector& gpu_agents() { return gpu_agents_; } + const std::vector &aie_agents() { return aie_agents_; } + const std::vector& disabled_gpu_agents() { return disabled_gpu_agents_; } const std::vector& gpu_ids() { return gpu_ids_; } @@ -624,6 +626,9 @@ class Runtime { // Agent list containing all compatible GPU agents in the platform. std::vector gpu_agents_; + // Agent list containing all compatible AIE agents in the platform. + std::vector aie_agents_; + // Agent list containing incompletely initialized GPU agents not to be used by the process. std::vector disabled_gpu_agents_; diff --git a/runtime/hsa-runtime/core/runtime/amd_aie_agent.cpp b/runtime/hsa-runtime/core/runtime/amd_aie_agent.cpp new file mode 100644 index 0000000000..6e92211a7e --- /dev/null +++ b/runtime/hsa-runtime/core/runtime/amd_aie_agent.cpp @@ -0,0 +1,189 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// The University of Illinois/NCSA +// Open Source License (NCSA) +// +// Copyright (c) 2022-2023, Advanced Micro Devices, Inc. All rights reserved. +// +// Developed by: +// +// AMD Research and AMD HSA Software Development +// +// Advanced Micro Devices, Inc. +// +// www.amd.com +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to +// deal with the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimers. +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimers in +// the documentation and/or other materials provided with the distribution. +// - Neither the names of Advanced Micro Devices, Inc, +// nor the names of its contributors may be used to endorse or promote +// products derived from this Software without specific prior written +// permission. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS WITH THE SOFTWARE. +// +//////////////////////////////////////////////////////////////////////////////// + +#include "core/inc/amd_aie_agent.h" + +#include "core/inc/amd_aie_aql_queue.h" + +namespace rocr { +namespace AMD { + +AieAgent::AieAgent(uint32_t node) + : core::Agent(node, core::Agent::DeviceType::kAmdAieDevice), + max_queues_(core::Runtime::runtime_singleton_->flag().max_queues()) { + InitRegionList(); +} + +AieAgent::~AieAgent() { + std::for_each(regions_.begin(), regions_.end(), DeleteObject()); + regions_.clear(); +} + +hsa_status_t AieAgent::VisitRegion(bool include_peer, + hsa_status_t (*callback)(hsa_region_t region, + void *data), + void *data) const { + AMD::callback_t call(callback); + for (const auto r : regions_) { + hsa_region_t region_handle(core::MemoryRegion::Convert(r)); + call(region_handle, data); + } + return HSA_STATUS_SUCCESS; +} + +hsa_status_t AieAgent::IterateRegion( + hsa_status_t (*callback)(hsa_region_t region, void *data), + void *data) const { + return VisitRegion(false, callback, data); +} + +hsa_status_t AieAgent::IterateCache(hsa_status_t (*callback)(hsa_cache_t cache, + void *data), + void *data) const { + return HSA_STATUS_SUCCESS; +} + +hsa_status_t AieAgent::GetInfo(hsa_agent_info_t attribute, void *value) const { + const size_t attribute_ = static_cast(attribute); + + switch (attribute_) { + case HSA_AGENT_INFO_NAME: { + std::string name_info_("aie2"); + std::strcpy(reinterpret_cast(value), name_info_.c_str()); + break; + } + case HSA_AGENT_INFO_VENDOR_NAME: { + std::string vendor_name_info_("AMD"); + std::strcpy(reinterpret_cast(value), vendor_name_info_.c_str()); + break; + } + case HSA_AGENT_INFO_FEATURE: + *((hsa_agent_feature_t *)value) = HSA_AGENT_FEATURE_AGENT_DISPATCH; + break; + case HSA_AGENT_INFO_MACHINE_MODEL: + *reinterpret_cast(value) = HSA_MACHINE_MODEL_LARGE; + break; + case HSA_AGENT_INFO_PROFILE: + *reinterpret_cast(value) = profile_; + break; + case HSA_AGENT_INFO_WAVEFRONT_SIZE: + case HSA_AGENT_INFO_WORKGROUP_MAX_DIM: + case HSA_AGENT_INFO_WORKGROUP_MAX_SIZE: + case HSA_AGENT_INFO_GRID_MAX_DIM: + case HSA_AGENT_INFO_GRID_MAX_SIZE: + case HSA_AGENT_INFO_FBARRIER_MAX_SIZE: + *reinterpret_cast(value) = 0; + break; + case HSA_AGENT_INFO_QUEUES_MAX: + *reinterpret_cast(value) = maxQueues_; + break; + case HSA_AGENT_INFO_QUEUE_MIN_SIZE: + *reinterpret_cast(value) = minAqlSize_; + break; + case HSA_AGENT_INFO_QUEUE_MAX_SIZE: + *reinterpret_cast(value) = maxAqlSize_; + break; + case HSA_AGENT_INFO_QUEUE_TYPE: + *reinterpret_cast(value) = HSA_QUEUE_TYPE_SINGLE; + break; + case HSA_AGENT_INFO_NODE: + *reinterpret_cast(value) = node_id(); + break; + case HSA_AGENT_INFO_DEVICE: + *reinterpret_cast(value) = HSA_DEVICE_TYPE_AIE; + break; + case HSA_AGENT_INFO_CACHE_SIZE: + *reinterpret_cast(value) = 0; + break; + case HSA_AGENT_INFO_VERSION_MAJOR: + *reinterpret_cast(value) = 1; + break; + case HSA_AGENT_INFO_VERSION_MINOR: + *reinterpret_cast(value) = 0; + break; + case HSA_EXT_AGENT_INFO_IMAGE_1D_MAX_ELEMENTS: + case HSA_EXT_AGENT_INFO_IMAGE_1DA_MAX_ELEMENTS: + case HSA_EXT_AGENT_INFO_IMAGE_1DB_MAX_ELEMENTS: + case HSA_EXT_AGENT_INFO_IMAGE_2D_MAX_ELEMENTS: + case HSA_EXT_AGENT_INFO_IMAGE_2DA_MAX_ELEMENTS: + case HSA_EXT_AGENT_INFO_IMAGE_2DDEPTH_MAX_ELEMENTS: + case HSA_EXT_AGENT_INFO_IMAGE_2DADEPTH_MAX_ELEMENTS: + case HSA_EXT_AGENT_INFO_IMAGE_3D_MAX_ELEMENTS: + case HSA_EXT_AGENT_INFO_IMAGE_ARRAY_MAX_LAYERS: + *reinterpret_cast(value) = 0; + break; + case HSA_AMD_AGENT_INFO_PRODUCT_NAME: { + std::string product_name_info_("AIE-ML"); + std::strcpy(reinterpret_cast(value), product_name_info_.c_str()); + break; + } + default: + *reinterpret_cast(value) = 0; + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t AieAgent::QueueCreate(size_t size, hsa_queue_type32_t queue_type, + core::HsaEventCallback event_callback, + void *data, uint32_t private_segment_size, + uint32_t group_segment_size, + core::Queue **queue) { + if (!IsPowerOfTwo(size)) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + + if (size < minAqlSize_ || size > maxAqlSize_) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + + auto aql_queue(new AieAqlQueue(this, size, node_id())); + *queue = aql_queue; + + return HSA_STATUS_SUCCESS; +} + +void AieAgent::InitRegionList() {} + +} // namespace AMD +} // namespace rocr diff --git a/runtime/hsa-runtime/core/runtime/amd_aie_aql_queue.cpp b/runtime/hsa-runtime/core/runtime/amd_aie_aql_queue.cpp new file mode 100644 index 0000000000..166415d072 --- /dev/null +++ b/runtime/hsa-runtime/core/runtime/amd_aie_aql_queue.cpp @@ -0,0 +1,232 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// The University of Illinois/NCSA +// Open Source License (NCSA) +// +// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved. +// +// Developed by: +// +// AMD Research and AMD HSA Software Development +// +// Advanced Micro Devices, Inc. +// +// www.amd.com +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to +// deal with the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimers. +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimers in +// the documentation and/or other materials provided with the distribution. +// - Neither the names of Advanced Micro Devices, Inc, +// nor the names of its contributors may be used to endorse or promote +// products derived from this Software without specific prior written +// permission. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS WITH THE SOFTWARE. +// +//////////////////////////////////////////////////////////////////////////////// + +#include "core/inc/amd_aie_aql_queue.h" + +#ifdef __linux__ +#include +#include +#include +#include +#include +#endif + +#ifdef _WIN32 +#include +#endif + +#include +#include +#include + +#include "core/inc/queue.h" +#include "core/inc/runtime.h" +#include "core/inc/signal.h" +#include "core/util/utils.h" + +namespace rocr { +namespace AMD { + +int AieAqlQueue::rtti_id_ = 0; + +AieAqlQueue::AieAqlQueue(AieAgent *agent, size_t req_size_pkts, + uint32_t node_id) + : Queue(0, 0), DoorbellSignal(CreateSharedSignal(agent)), agent_(agent), + active_(false) { + amd_queue_.hsa_queue.doorbell_signal = Signal::Convert(this); + amd_queue_.hsa_queue.size = 0x40; + + signal_.hardware_doorbell_ptr = + reinterpret_cast(hardware_doorbell_ptr_); + signal_.kind = AMD_SIGNAL_KIND_DOORBELL; + signal_.queue_ptr = &amd_queue_; + active_ = true; +} + +AieAqlQueue::~AieAqlQueue() { Inactivate(); } + +hsa_status_t AieAqlQueue::Inactivate() { + bool active(active_.exchange(false, std::memory_order_relaxed)); + return HSA_STATUS_SUCCESS; +} + +hsa_status_t AieAqlQueue::SetPriority(HSA_QUEUE_PRIORITY priority) { + return HSA_STATUS_SUCCESS; +} + +void AieAqlQueue::Destroy() { delete this; } + +// Atomic Reads/Writes +uint64_t AieAqlQueue::LoadReadIndexRelaxed() { + return atomic::Load(&amd_queue_.read_dispatch_id, std::memory_order_relaxed); +} + +uint64_t AieAqlQueue::LoadReadIndexAcquire() { + return atomic::Load(&amd_queue_.read_dispatch_id, std::memory_order_acquire); +} + +uint64_t AieAqlQueue::LoadWriteIndexRelaxed() { + return atomic::Load(&amd_queue_.write_dispatch_id, std::memory_order_relaxed); +} + +uint64_t AieAqlQueue::LoadWriteIndexAcquire() { + return atomic::Load(&amd_queue_.write_dispatch_id, std::memory_order_acquire); +} + +void AieAqlQueue::StoreWriteIndexRelaxed(uint64_t value) { + atomic::Store(&amd_queue_.write_dispatch_id, value, + std::memory_order_relaxed); +} + +void AieAqlQueue::StoreWriteIndexRelease(uint64_t value) { + atomic::Store(&amd_queue_.write_dispatch_id, value, + std::memory_order_release); +} + +uint64_t AieAqlQueue::CasWriteIndexRelaxed(uint64_t expected, uint64_t value) { + return atomic::Cas(&amd_queue_.write_dispatch_id, value, expected, + std::memory_order_relaxed); +} + +uint64_t AieAqlQueue::CasWriteIndexAcquire(uint64_t expected, uint64_t value) { + return atomic::Cas(&amd_queue_.write_dispatch_id, value, expected, + std::memory_order_acquire); +} + +uint64_t AieAqlQueue::CasWriteIndexRelease(uint64_t expected, uint64_t value) { + return atomic::Cas(&amd_queue_.write_dispatch_id, value, expected, + std::memory_order_release); +} + +uint64_t AieAqlQueue::CasWriteIndexAcqRel(uint64_t expected, uint64_t value) { + return atomic::Cas(&amd_queue_.write_dispatch_id, value, expected, + std::memory_order_acq_rel); +} + +uint64_t AieAqlQueue::AddWriteIndexRelaxed(uint64_t value) { + return atomic::Add(&amd_queue_.write_dispatch_id, value, + std::memory_order_relaxed); +} + +uint64_t AieAqlQueue::AddWriteIndexAcquire(uint64_t value) { + return atomic::Add(&amd_queue_.write_dispatch_id, value, + std::memory_order_acquire); +} + +uint64_t AieAqlQueue::AddWriteIndexRelease(uint64_t value) { + return atomic::Add(&amd_queue_.write_dispatch_id, value, + std::memory_order_release); +} + +uint64_t AieAqlQueue::AddWriteIndexAcqRel(uint64_t value) { + return atomic::Add(&amd_queue_.write_dispatch_id, value, + std::memory_order_acq_rel); +} + +void AieAqlQueue::StoreRelaxed(hsa_signal_value_t value) { + atomic::Store(signal_.hardware_doorbell_ptr, uint64_t(value), + std::memory_order_release); +} + +void AieAqlQueue::StoreRelease(hsa_signal_value_t value) { + std::atomic_thread_fence(std::memory_order_release); + StoreRelaxed(value); +} + +hsa_status_t AieAqlQueue::GetInfo(hsa_queue_info_attribute_t attribute, + void *value) { + switch (attribute) { + case HSA_AMD_QUEUE_INFO_AGENT: + *(reinterpret_cast(value)) = agent_->public_handle(); + break; + case HSA_AMD_QUEUE_INFO_DOORBELL_ID: + // Hardware doorbell supports AQL semantics. + *(reinterpret_cast(value)) = + reinterpret_cast(signal_.hardware_doorbell_ptr); + break; + default: + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + return HSA_STATUS_SUCCESS; +} + +core::SharedQueue *AieAqlQueue::CreateSharedQueue(AieAgent *agent, + size_t req_size_pkts, + uint32_t node_id) { + queue_size_bytes_ = req_size_pkts * sizeof(core::AqlPacket); + + if (!IsPowerOfTwo(queue_size_bytes_)) { + throw AMD::hsa_exception( + HSA_STATUS_ERROR_INVALID_QUEUE_CREATION, + "Requested queue with non-power of two packet capacity.\n"); + } + + node_id_ = node_id; + + return nullptr; +} + +core::SharedSignal *AieAqlQueue::CreateSharedSignal(AieAgent *agent) { + return nullptr; +} + +hsa_status_t AieAqlQueue::GetCUMasking(uint32_t num_cu_mask_count, + uint32_t *cu_mask) { + assert(false && "AIE AQL queue does not support CU masking."); + return HSA_STATUS_ERROR; +} + +hsa_status_t AieAqlQueue::SetCUMasking(uint32_t num_cu_mask_count, + const uint32_t *cu_mask) { + assert(false && "AIE AQL queue does not support CU masking."); + return HSA_STATUS_ERROR; +} + +void AieAqlQueue::ExecutePM4(uint32_t *cmd_data, size_t cmd_size_b, + hsa_fence_scope_t acquireFence, + hsa_fence_scope_t releaseFence, + hsa_signal_t *signal) { + assert(false && "AIE AQL queue does not support PM4 packets."); +} + +} // namespace AMD +} // namespace rocr diff --git a/runtime/hsa-runtime/core/runtime/amd_topology.cpp b/runtime/hsa-runtime/core/runtime/amd_topology.cpp index dfe15a936e..ee3e461a8e 100644 --- a/runtime/hsa-runtime/core/runtime/amd_topology.cpp +++ b/runtime/hsa-runtime/core/runtime/amd_topology.cpp @@ -57,10 +57,11 @@ #include "hsakmt/hsakmt.h" -#include "core/inc/runtime.h" +#include "core/inc/amd_aie_agent.h" #include "core/inc/amd_cpu_agent.h" #include "core/inc/amd_gpu_agent.h" #include "core/inc/amd_memory_region.h" +#include "core/inc/runtime.h" #include "core/util/utils.h" extern r_debug _amdgpu_r_debug; @@ -170,6 +171,12 @@ GpuAgent* DiscoverGpu(HSAuint32 node_id, HsaNodeProperties& node_prop, bool xnac return gpu; } +AieAgent *DiscoverAie() { + AieAgent *aie = new AieAgent(0); + core::Runtime::runtime_singleton_->RegisterAgent(aie, true); + return aie; +} + void RegisterLinkInfo(uint32_t node_id, uint32_t num_link) { // Register connectivity links for this agent to the runtime. if (num_link == 0) { @@ -412,31 +419,41 @@ void BuildTopology() { } bool Load() { + bool gpu_found = true; + bool aie_found = false; + // Open connection to kernel driver. if (hsaKmtOpenKFD() != HSAKMT_STATUS_SUCCESS) { + gpu_found = false; + } + + if (!(gpu_found || aie_found)) { return false; } - MAKE_NAMED_SCOPE_GUARD(kfd, [&]() { hsaKmtCloseKFD(); }); - // Build topology table. - BuildTopology(); + if (gpu_found) { + MAKE_NAMED_SCOPE_GUARD(kfd, [&]() { hsaKmtCloseKFD(); }); - // Register runtime and optionally enable the debugger - // BuildTopology calls hsaKmtAcquireSystemProperties() causes libhsakmt to cache topology - // information. So we need to call hsaKmtRuntimeEnable() after calling BuildTopology() so that - // Thunk can re-use it's cached copy instead of re-parsing whole system topology. Otherwise - // BuildTopology will cause libhsakmt to destroyed cached copy because it calls - // hsaKmtReleaseSystemProperties() at the beginning. + // Build topology table. + BuildTopology(); - HSAKMT_STATUS err = - hsaKmtRuntimeEnable(&_amdgpu_r_debug, core::Runtime::runtime_singleton_->flag().debug()); - if ((err != HSAKMT_STATUS_SUCCESS) && (err != HSAKMT_STATUS_NOT_SUPPORTED)) return false; - HSAuint32 caps_mask; - hsaKmtGetRuntimeCapabilities(&caps_mask); - core::Runtime::runtime_singleton_->KfdVersion(err != HSAKMT_STATUS_NOT_SUPPORTED, - !!(caps_mask & HSA_RUNTIME_ENABLE_CAPS_SUPPORTS_CORE_DUMP_MASK)); + HSAKMT_STATUS err = hsaKmtRuntimeEnable( + &_amdgpu_r_debug, core::Runtime::runtime_singleton_->flag().debug()); + if ((err != HSAKMT_STATUS_SUCCESS) && (err != HSAKMT_STATUS_NOT_SUPPORTED)) + return false; + HSAuint32 caps_mask; + hsaKmtGetRuntimeCapabilities(&caps_mask); + core::Runtime::runtime_singleton_->KfdVersion( + err != HSAKMT_STATUS_NOT_SUPPORTED, + !!(caps_mask & HSA_RUNTIME_ENABLE_CAPS_SUPPORTS_CORE_DUMP_MASK)); + + kfd.Dismiss(); + } + + if (aie_found) { + DiscoverAie(); + } - kfd.Dismiss(); return true; } diff --git a/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp b/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp index b48c9e4567..cdc0460959 100644 --- a/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp +++ b/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp @@ -49,17 +49,18 @@ #include #include -#include "core/inc/runtime.h" #include "core/inc/agent.h" +#include "core/inc/amd_aie_agent.h" #include "core/inc/amd_cpu_agent.h" #include "core/inc/amd_gpu_agent.h" #include "core/inc/amd_memory_region.h" -#include "core/inc/signal.h" #include "core/inc/default_signal.h" +#include "core/inc/exceptions.h" +#include "core/inc/intercept_queue.h" #include "core/inc/interrupt_signal.h" #include "core/inc/ipc_signal.h" -#include "core/inc/intercept_queue.h" -#include "core/inc/exceptions.h" +#include "core/inc/runtime.h" +#include "core/inc/signal.h" namespace rocr { @@ -741,18 +742,29 @@ hsa_status_t hsa_amd_agent_iterate_memory_pools( const core::Agent* agent = core::Agent::Convert(agent_handle); IS_VALID(agent); - if (agent->device_type() == core::Agent::kAmdCpuDevice) { - return reinterpret_cast(agent)->VisitRegion( - false, reinterpret_cast(callback), + switch (agent->device_type()) { + case core::Agent::kAmdCpuDevice: + return reinterpret_cast(agent)->VisitRegion( + false, + reinterpret_cast(callback), data); + case core::Agent::kAmdAieDevice: + return reinterpret_cast(agent)->VisitRegion( + false, + reinterpret_cast(callback), + data); + case core::Agent::kAmdGpuDevice: + return reinterpret_cast(agent)->VisitRegion( + false, + reinterpret_cast(callback), + data); + default: + return HSA_STATUS_ERROR_INVALID_AGENT; } - return reinterpret_cast(agent)->VisitRegion( - false, - reinterpret_cast( - callback), - data); CATCH; } diff --git a/runtime/hsa-runtime/core/runtime/runtime.cpp b/runtime/hsa-runtime/core/runtime/runtime.cpp index 95dc6ed5d8..fef56b2197 100644 --- a/runtime/hsa-runtime/core/runtime/runtime.cpp +++ b/runtime/hsa-runtime/core/runtime/runtime.cpp @@ -182,7 +182,7 @@ void Runtime::RegisterAgent(Agent* agent, bool Enabled) { // Record the agent in the node-to-agent reverse lookup table. agents_by_node_[agent->node_id()].push_back(agent); - // Process agent as a cpu or gpu device. + // Process agent as a CPU, GPU, or AIE device. if (agent->device_type() == Agent::DeviceType::kAmdCpuDevice) { cpu_agents_.push_back(agent); @@ -239,6 +239,8 @@ void Runtime::RegisterAgent(Agent* agent, bool Enabled) { } else { disabled_gpu_agents_.push_back(agent); } + } else if (agent->device_type() == Agent::DeviceType::kAmdAieDevice) { + aie_agents_.push_back(agent); } } @@ -256,6 +258,9 @@ void Runtime::DestroyAgents() { std::for_each(cpu_agents_.begin(), cpu_agents_.end(), DeleteObject()); cpu_agents_.clear(); + std::for_each(aie_agents_.begin(), aie_agents_.end(), DeleteObject()); + aie_agents_.clear(); + region_gpu_ = NULL; system_regions_fine_.clear(); @@ -296,7 +301,8 @@ hsa_status_t Runtime::IterateAgent(hsa_status_t (*callback)(hsa_agent_t agent, void* data) { AMD::callback_t call(callback); - std::vector* agent_lists[2] = {&cpu_agents_, &gpu_agents_}; + std::vector *agent_lists[3] = {&cpu_agents_, &gpu_agents_, + &aie_agents_}; for (std::vector* agent_list : agent_lists) { for (size_t i = 0; i < agent_list->size(); ++i) { hsa_agent_t agent = Agent::Convert(agent_list->at(i)); diff --git a/runtime/hsa-runtime/inc/hsa.h b/runtime/hsa-runtime/inc/hsa.h index f44327d7a3..73130e3ddb 100644 --- a/runtime/hsa-runtime/inc/hsa.h +++ b/runtime/hsa-runtime/inc/hsa.h @@ -802,18 +802,22 @@ typedef enum { * @brief Hardware device type. */ typedef enum { - /** - * CPU device. - */ - HSA_DEVICE_TYPE_CPU = 0, - /** - * GPU device. - */ - HSA_DEVICE_TYPE_GPU = 1, - /** - * DSP device. - */ - HSA_DEVICE_TYPE_DSP = 2 + /** + * CPU device. + */ + HSA_DEVICE_TYPE_CPU = 0, + /** + * GPU device. + */ + HSA_DEVICE_TYPE_GPU = 1, + /** + * DSP device. + */ + HSA_DEVICE_TYPE_DSP = 2, + /** + * AI Engine (AIE) device. + */ + HSA_DEVICE_TYPE_AIE = 3 } hsa_device_type_t; /**