rocr/aie: Add initial support for AIE agents

This change adds the initial classes for the AIE agent and AIE AQL
queue.

An AIE agent list is added to the core runtime object.

Change-Id: I84b02f52171b80726dfb2c8431582a3ea2986eb3
This commit is contained in:
Tony Gutierrez
2024-08-19 15:40:30 +00:00
parent cb672ebcd1
commit 8ea62f1cea
11 changed files with 762 additions and 46 deletions
+2
View File
@@ -154,6 +154,8 @@ set ( SRCS core/driver/driver.cpp
core/util/small_heap.cpp
core/util/timer.cpp
core/util/flag.cpp
core/runtime/amd_aie_agent.cpp
core/runtime/amd_aie_aql_queue.cpp
core/runtime/amd_blit_kernel.cpp
core/runtime/amd_blit_sdma.cpp
core/runtime/amd_cpu_agent.cpp
+6 -1
View File
@@ -107,7 +107,12 @@ class Agent : public Checked<0xF6BC25EB17E6F917> {
}
// Lightweight RTTI for vendor specific implementations.
enum DeviceType { kAmdGpuDevice = 0, kAmdCpuDevice = 1, kUnknownDevice = 2 };
enum DeviceType {
kAmdGpuDevice = 0,
kAmdCpuDevice = 1,
kAmdAieDevice = 2,
kUnknownDevice = 3
};
// @brief Agent class contructor.
//
@@ -0,0 +1,106 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2022-2023, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
// AMD specific HSA backend.
#ifndef HSA_RUNTIME_CORE_INC_AMD_AIE_AGENT_H_
#define HSA_RUNTIME_CORE_INC_AMD_AIE_AGENT_H_
#include "core/inc/agent.h"
#include "core/inc/runtime.h"
namespace rocr {
namespace AMD {
class AieAgent : public core::Agent {
public:
/// @brief AIE agent constructor.
/// @param [in] node Node id.
AieAgent(uint32_t node);
// @brief AIE agent destructor.
~AieAgent();
hsa_status_t VisitRegion(bool include_peer,
hsa_status_t (*callback)(hsa_region_t region,
void *data),
void *data) const;
hsa_status_t IterateRegion(hsa_status_t (*callback)(hsa_region_t region,
void *data),
void *data) const override;
hsa_status_t IterateCache(hsa_status_t (*callback)(hsa_cache_t cache,
void *data),
void *value) const override;
hsa_status_t GetInfo(hsa_agent_info_t attribute, void *value) const override;
hsa_status_t QueueCreate(size_t size, hsa_queue_type32_t queue_type,
core::HsaEventCallback event_callback, void *data,
uint32_t private_segment_size,
uint32_t group_segment_size,
core::Queue **queue) override;
const core::Isa *isa() const override { return nullptr; }
const std::vector<const core::MemoryRegion *> &regions() const override {
return regions_;
}
private:
// @brief Query the driver to get the region list owned by this agent.
void InitRegionList();
std::vector<const core::MemoryRegion *> regions_;
const hsa_profile_t profile_ = HSA_PROFILE_BASE;
static const uint32_t maxQueues_ = 8;
static const uint32_t minAqlSize_ = 0x40;
static const uint32_t maxAqlSize_ = 0x40;
uint32_t max_queues_;
uintptr_t device_heap_vaddr_ = 0;
};
} // namespace AMD
} // namespace rocr
#endif // header guard
@@ -0,0 +1,138 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef HSA_RUNTIME_CORE_INC_AMD_HW_AQL_AIE_COMMAND_PROCESSOR_H_
#define HSA_RUNTIME_CORE_INC_AMD_HW_AQL_AIE_COMMAND_PROCESSOR_H_
#include "core/inc/amd_aie_agent.h"
#include "core/inc/queue.h"
#include "core/inc/runtime.h"
#include "core/inc/signal.h"
#include "core/util/locks.h"
namespace rocr {
namespace AMD {
/// @brief Encapsulates HW AIE AQL Command Processor functionality. It
/// provides the interface for things such as doorbells, queue read and
/// write pointers, and a buffer.
class AieAqlQueue : public core::Queue, public core::DoorbellSignal {
public:
static __forceinline bool IsType(core::Signal *signal) {
return signal->IsType(&rtti_id_);
}
static __forceinline bool IsType(core::Queue *queue) {
return queue->IsType(&rtti_id_);
}
AieAqlQueue() = delete;
AieAqlQueue(AieAgent *agent, size_t req_size_pkts, uint32_t node_id);
~AieAqlQueue();
hsa_status_t Inactivate() override;
hsa_status_t SetPriority(HSA_QUEUE_PRIORITY priority) override;
void Destroy() override;
uint64_t LoadReadIndexRelaxed() override;
uint64_t LoadReadIndexAcquire() override;
uint64_t LoadWriteIndexRelaxed() override;
uint64_t LoadWriteIndexAcquire() override;
void StoreReadIndexRelaxed(uint64_t value) override { assert(false); }
void StoreReadIndexRelease(uint64_t value) override { assert(false); }
void StoreWriteIndexRelaxed(uint64_t value) override;
void StoreWriteIndexRelease(uint64_t value) override;
uint64_t CasWriteIndexRelaxed(uint64_t expected, uint64_t value) override;
uint64_t CasWriteIndexAcquire(uint64_t expected, uint64_t value) override;
uint64_t CasWriteIndexRelease(uint64_t expected, uint64_t value) override;
uint64_t CasWriteIndexAcqRel(uint64_t expected, uint64_t value) override;
uint64_t AddWriteIndexRelaxed(uint64_t value) override;
uint64_t AddWriteIndexAcquire(uint64_t value) override;
uint64_t AddWriteIndexRelease(uint64_t value) override;
uint64_t AddWriteIndexAcqRel(uint64_t value) override;
void StoreRelaxed(hsa_signal_value_t value) override;
void StoreRelease(hsa_signal_value_t value) override;
/// @brief Provide information about the queue.
hsa_status_t GetInfo(hsa_queue_info_attribute_t attribute,
void *value) override;
// GPU-specific queue functions are unsupported.
hsa_status_t GetCUMasking(uint32_t num_cu_mask_count,
uint32_t *cu_mask) override;
hsa_status_t SetCUMasking(uint32_t num_cu_mask_count,
const uint32_t *cu_mask) override;
void ExecutePM4(uint32_t *cmd_data, size_t cmd_size_b,
hsa_fence_scope_t acquireFence = HSA_FENCE_SCOPE_NONE,
hsa_fence_scope_t releaseFence = HSA_FENCE_SCOPE_NONE,
hsa_signal_t *signal = NULL) override;
core::SharedQueue *shared_queue_;
core::SharedSignal *shared_signal_;
/// ID of the queue used in communication with the AMD AIR driver.
uint32_t queue_id_;
/// ID of the doorbell used in communication with the AMD AIR driver.
uint32_t doorbell_id_;
/// Pointer to the hardware doorbell for this queue.
uint64_t *hardware_doorbell_ptr_;
/// ID of AIE device on which this queue has been mapped.
uint32_t node_id_;
/// Queue size in bytes.
uint32_t queue_size_bytes_;
protected:
bool _IsA(Queue::rtti_t id) const override { return id == &rtti_id_; }
private:
core::SharedQueue *CreateSharedQueue(AieAgent *agent, size_t req_size_pkts,
uint32_t node_id);
core::SharedSignal *CreateSharedSignal(AieAgent *agent);
AieAgent *agent_;
/// Indicates if queue is active.
std::atomic<bool> active_;
static int rtti_id_;
};
} // namespace AMD
} // namespace rocr
#endif // header guard
+5
View File
@@ -400,6 +400,8 @@ class Runtime {
const std::vector<Agent*>& gpu_agents() { return gpu_agents_; }
const std::vector<Agent *> &aie_agents() { return aie_agents_; }
const std::vector<Agent*>& disabled_gpu_agents() { return disabled_gpu_agents_; }
const std::vector<uint32_t>& gpu_ids() { return gpu_ids_; }
@@ -624,6 +626,9 @@ class Runtime {
// Agent list containing all compatible GPU agents in the platform.
std::vector<Agent*> gpu_agents_;
// Agent list containing all compatible AIE agents in the platform.
std::vector<Agent *> aie_agents_;
// Agent list containing incompletely initialized GPU agents not to be used by the process.
std::vector<Agent*> disabled_gpu_agents_;
@@ -0,0 +1,189 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2022-2023, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#include "core/inc/amd_aie_agent.h"
#include "core/inc/amd_aie_aql_queue.h"
namespace rocr {
namespace AMD {
AieAgent::AieAgent(uint32_t node)
: core::Agent(node, core::Agent::DeviceType::kAmdAieDevice),
max_queues_(core::Runtime::runtime_singleton_->flag().max_queues()) {
InitRegionList();
}
AieAgent::~AieAgent() {
std::for_each(regions_.begin(), regions_.end(), DeleteObject());
regions_.clear();
}
hsa_status_t AieAgent::VisitRegion(bool include_peer,
hsa_status_t (*callback)(hsa_region_t region,
void *data),
void *data) const {
AMD::callback_t<decltype(callback)> call(callback);
for (const auto r : regions_) {
hsa_region_t region_handle(core::MemoryRegion::Convert(r));
call(region_handle, data);
}
return HSA_STATUS_SUCCESS;
}
hsa_status_t AieAgent::IterateRegion(
hsa_status_t (*callback)(hsa_region_t region, void *data),
void *data) const {
return VisitRegion(false, callback, data);
}
hsa_status_t AieAgent::IterateCache(hsa_status_t (*callback)(hsa_cache_t cache,
void *data),
void *data) const {
return HSA_STATUS_SUCCESS;
}
hsa_status_t AieAgent::GetInfo(hsa_agent_info_t attribute, void *value) const {
const size_t attribute_ = static_cast<size_t>(attribute);
switch (attribute_) {
case HSA_AGENT_INFO_NAME: {
std::string name_info_("aie2");
std::strcpy(reinterpret_cast<char *>(value), name_info_.c_str());
break;
}
case HSA_AGENT_INFO_VENDOR_NAME: {
std::string vendor_name_info_("AMD");
std::strcpy(reinterpret_cast<char *>(value), vendor_name_info_.c_str());
break;
}
case HSA_AGENT_INFO_FEATURE:
*((hsa_agent_feature_t *)value) = HSA_AGENT_FEATURE_AGENT_DISPATCH;
break;
case HSA_AGENT_INFO_MACHINE_MODEL:
*reinterpret_cast<hsa_machine_model_t *>(value) = HSA_MACHINE_MODEL_LARGE;
break;
case HSA_AGENT_INFO_PROFILE:
*reinterpret_cast<hsa_profile_t *>(value) = profile_;
break;
case HSA_AGENT_INFO_WAVEFRONT_SIZE:
case HSA_AGENT_INFO_WORKGROUP_MAX_DIM:
case HSA_AGENT_INFO_WORKGROUP_MAX_SIZE:
case HSA_AGENT_INFO_GRID_MAX_DIM:
case HSA_AGENT_INFO_GRID_MAX_SIZE:
case HSA_AGENT_INFO_FBARRIER_MAX_SIZE:
*reinterpret_cast<uint32_t *>(value) = 0;
break;
case HSA_AGENT_INFO_QUEUES_MAX:
*reinterpret_cast<uint32_t *>(value) = maxQueues_;
break;
case HSA_AGENT_INFO_QUEUE_MIN_SIZE:
*reinterpret_cast<uint32_t *>(value) = minAqlSize_;
break;
case HSA_AGENT_INFO_QUEUE_MAX_SIZE:
*reinterpret_cast<uint32_t *>(value) = maxAqlSize_;
break;
case HSA_AGENT_INFO_QUEUE_TYPE:
*reinterpret_cast<hsa_queue_type32_t *>(value) = HSA_QUEUE_TYPE_SINGLE;
break;
case HSA_AGENT_INFO_NODE:
*reinterpret_cast<uint32_t *>(value) = node_id();
break;
case HSA_AGENT_INFO_DEVICE:
*reinterpret_cast<hsa_device_type_t *>(value) = HSA_DEVICE_TYPE_AIE;
break;
case HSA_AGENT_INFO_CACHE_SIZE:
*reinterpret_cast<uint32_t *>(value) = 0;
break;
case HSA_AGENT_INFO_VERSION_MAJOR:
*reinterpret_cast<uint32_t *>(value) = 1;
break;
case HSA_AGENT_INFO_VERSION_MINOR:
*reinterpret_cast<uint32_t *>(value) = 0;
break;
case HSA_EXT_AGENT_INFO_IMAGE_1D_MAX_ELEMENTS:
case HSA_EXT_AGENT_INFO_IMAGE_1DA_MAX_ELEMENTS:
case HSA_EXT_AGENT_INFO_IMAGE_1DB_MAX_ELEMENTS:
case HSA_EXT_AGENT_INFO_IMAGE_2D_MAX_ELEMENTS:
case HSA_EXT_AGENT_INFO_IMAGE_2DA_MAX_ELEMENTS:
case HSA_EXT_AGENT_INFO_IMAGE_2DDEPTH_MAX_ELEMENTS:
case HSA_EXT_AGENT_INFO_IMAGE_2DADEPTH_MAX_ELEMENTS:
case HSA_EXT_AGENT_INFO_IMAGE_3D_MAX_ELEMENTS:
case HSA_EXT_AGENT_INFO_IMAGE_ARRAY_MAX_LAYERS:
*reinterpret_cast<uint32_t *>(value) = 0;
break;
case HSA_AMD_AGENT_INFO_PRODUCT_NAME: {
std::string product_name_info_("AIE-ML");
std::strcpy(reinterpret_cast<char *>(value), product_name_info_.c_str());
break;
}
default:
*reinterpret_cast<uint32_t *>(value) = 0;
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
return HSA_STATUS_SUCCESS;
}
hsa_status_t AieAgent::QueueCreate(size_t size, hsa_queue_type32_t queue_type,
core::HsaEventCallback event_callback,
void *data, uint32_t private_segment_size,
uint32_t group_segment_size,
core::Queue **queue) {
if (!IsPowerOfTwo(size)) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
if (size < minAqlSize_ || size > maxAqlSize_) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
auto aql_queue(new AieAqlQueue(this, size, node_id()));
*queue = aql_queue;
return HSA_STATUS_SUCCESS;
}
void AieAgent::InitRegionList() {}
} // namespace AMD
} // namespace rocr
@@ -0,0 +1,232 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#include "core/inc/amd_aie_aql_queue.h"
#ifdef __linux__
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <unistd.h>
#endif
#ifdef _WIN32
#include <Windows.h>
#endif
#include <stdio.h>
#include <string.h>
#include <thread>
#include "core/inc/queue.h"
#include "core/inc/runtime.h"
#include "core/inc/signal.h"
#include "core/util/utils.h"
namespace rocr {
namespace AMD {
int AieAqlQueue::rtti_id_ = 0;
AieAqlQueue::AieAqlQueue(AieAgent *agent, size_t req_size_pkts,
uint32_t node_id)
: Queue(0, 0), DoorbellSignal(CreateSharedSignal(agent)), agent_(agent),
active_(false) {
amd_queue_.hsa_queue.doorbell_signal = Signal::Convert(this);
amd_queue_.hsa_queue.size = 0x40;
signal_.hardware_doorbell_ptr =
reinterpret_cast<volatile uint64_t *>(hardware_doorbell_ptr_);
signal_.kind = AMD_SIGNAL_KIND_DOORBELL;
signal_.queue_ptr = &amd_queue_;
active_ = true;
}
AieAqlQueue::~AieAqlQueue() { Inactivate(); }
hsa_status_t AieAqlQueue::Inactivate() {
bool active(active_.exchange(false, std::memory_order_relaxed));
return HSA_STATUS_SUCCESS;
}
hsa_status_t AieAqlQueue::SetPriority(HSA_QUEUE_PRIORITY priority) {
return HSA_STATUS_SUCCESS;
}
void AieAqlQueue::Destroy() { delete this; }
// Atomic Reads/Writes
uint64_t AieAqlQueue::LoadReadIndexRelaxed() {
return atomic::Load(&amd_queue_.read_dispatch_id, std::memory_order_relaxed);
}
uint64_t AieAqlQueue::LoadReadIndexAcquire() {
return atomic::Load(&amd_queue_.read_dispatch_id, std::memory_order_acquire);
}
uint64_t AieAqlQueue::LoadWriteIndexRelaxed() {
return atomic::Load(&amd_queue_.write_dispatch_id, std::memory_order_relaxed);
}
uint64_t AieAqlQueue::LoadWriteIndexAcquire() {
return atomic::Load(&amd_queue_.write_dispatch_id, std::memory_order_acquire);
}
void AieAqlQueue::StoreWriteIndexRelaxed(uint64_t value) {
atomic::Store(&amd_queue_.write_dispatch_id, value,
std::memory_order_relaxed);
}
void AieAqlQueue::StoreWriteIndexRelease(uint64_t value) {
atomic::Store(&amd_queue_.write_dispatch_id, value,
std::memory_order_release);
}
uint64_t AieAqlQueue::CasWriteIndexRelaxed(uint64_t expected, uint64_t value) {
return atomic::Cas(&amd_queue_.write_dispatch_id, value, expected,
std::memory_order_relaxed);
}
uint64_t AieAqlQueue::CasWriteIndexAcquire(uint64_t expected, uint64_t value) {
return atomic::Cas(&amd_queue_.write_dispatch_id, value, expected,
std::memory_order_acquire);
}
uint64_t AieAqlQueue::CasWriteIndexRelease(uint64_t expected, uint64_t value) {
return atomic::Cas(&amd_queue_.write_dispatch_id, value, expected,
std::memory_order_release);
}
uint64_t AieAqlQueue::CasWriteIndexAcqRel(uint64_t expected, uint64_t value) {
return atomic::Cas(&amd_queue_.write_dispatch_id, value, expected,
std::memory_order_acq_rel);
}
uint64_t AieAqlQueue::AddWriteIndexRelaxed(uint64_t value) {
return atomic::Add(&amd_queue_.write_dispatch_id, value,
std::memory_order_relaxed);
}
uint64_t AieAqlQueue::AddWriteIndexAcquire(uint64_t value) {
return atomic::Add(&amd_queue_.write_dispatch_id, value,
std::memory_order_acquire);
}
uint64_t AieAqlQueue::AddWriteIndexRelease(uint64_t value) {
return atomic::Add(&amd_queue_.write_dispatch_id, value,
std::memory_order_release);
}
uint64_t AieAqlQueue::AddWriteIndexAcqRel(uint64_t value) {
return atomic::Add(&amd_queue_.write_dispatch_id, value,
std::memory_order_acq_rel);
}
void AieAqlQueue::StoreRelaxed(hsa_signal_value_t value) {
atomic::Store(signal_.hardware_doorbell_ptr, uint64_t(value),
std::memory_order_release);
}
void AieAqlQueue::StoreRelease(hsa_signal_value_t value) {
std::atomic_thread_fence(std::memory_order_release);
StoreRelaxed(value);
}
hsa_status_t AieAqlQueue::GetInfo(hsa_queue_info_attribute_t attribute,
void *value) {
switch (attribute) {
case HSA_AMD_QUEUE_INFO_AGENT:
*(reinterpret_cast<hsa_agent_t *>(value)) = agent_->public_handle();
break;
case HSA_AMD_QUEUE_INFO_DOORBELL_ID:
// Hardware doorbell supports AQL semantics.
*(reinterpret_cast<uint64_t *>(value)) =
reinterpret_cast<uint64_t>(signal_.hardware_doorbell_ptr);
break;
default:
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
return HSA_STATUS_SUCCESS;
}
core::SharedQueue *AieAqlQueue::CreateSharedQueue(AieAgent *agent,
size_t req_size_pkts,
uint32_t node_id) {
queue_size_bytes_ = req_size_pkts * sizeof(core::AqlPacket);
if (!IsPowerOfTwo(queue_size_bytes_)) {
throw AMD::hsa_exception(
HSA_STATUS_ERROR_INVALID_QUEUE_CREATION,
"Requested queue with non-power of two packet capacity.\n");
}
node_id_ = node_id;
return nullptr;
}
core::SharedSignal *AieAqlQueue::CreateSharedSignal(AieAgent *agent) {
return nullptr;
}
hsa_status_t AieAqlQueue::GetCUMasking(uint32_t num_cu_mask_count,
uint32_t *cu_mask) {
assert(false && "AIE AQL queue does not support CU masking.");
return HSA_STATUS_ERROR;
}
hsa_status_t AieAqlQueue::SetCUMasking(uint32_t num_cu_mask_count,
const uint32_t *cu_mask) {
assert(false && "AIE AQL queue does not support CU masking.");
return HSA_STATUS_ERROR;
}
void AieAqlQueue::ExecutePM4(uint32_t *cmd_data, size_t cmd_size_b,
hsa_fence_scope_t acquireFence,
hsa_fence_scope_t releaseFence,
hsa_signal_t *signal) {
assert(false && "AIE AQL queue does not support PM4 packets.");
}
} // namespace AMD
} // namespace rocr
@@ -57,10 +57,11 @@
#include "hsakmt/hsakmt.h"
#include "core/inc/runtime.h"
#include "core/inc/amd_aie_agent.h"
#include "core/inc/amd_cpu_agent.h"
#include "core/inc/amd_gpu_agent.h"
#include "core/inc/amd_memory_region.h"
#include "core/inc/runtime.h"
#include "core/util/utils.h"
extern r_debug _amdgpu_r_debug;
@@ -170,6 +171,12 @@ GpuAgent* DiscoverGpu(HSAuint32 node_id, HsaNodeProperties& node_prop, bool xnac
return gpu;
}
AieAgent *DiscoverAie() {
AieAgent *aie = new AieAgent(0);
core::Runtime::runtime_singleton_->RegisterAgent(aie, true);
return aie;
}
void RegisterLinkInfo(uint32_t node_id, uint32_t num_link) {
// Register connectivity links for this agent to the runtime.
if (num_link == 0) {
@@ -412,31 +419,41 @@ void BuildTopology() {
}
bool Load() {
bool gpu_found = true;
bool aie_found = false;
// Open connection to kernel driver.
if (hsaKmtOpenKFD() != HSAKMT_STATUS_SUCCESS) {
gpu_found = false;
}
if (!(gpu_found || aie_found)) {
return false;
}
MAKE_NAMED_SCOPE_GUARD(kfd, [&]() { hsaKmtCloseKFD(); });
// Build topology table.
BuildTopology();
if (gpu_found) {
MAKE_NAMED_SCOPE_GUARD(kfd, [&]() { hsaKmtCloseKFD(); });
// Register runtime and optionally enable the debugger
// BuildTopology calls hsaKmtAcquireSystemProperties() causes libhsakmt to cache topology
// information. So we need to call hsaKmtRuntimeEnable() after calling BuildTopology() so that
// Thunk can re-use it's cached copy instead of re-parsing whole system topology. Otherwise
// BuildTopology will cause libhsakmt to destroyed cached copy because it calls
// hsaKmtReleaseSystemProperties() at the beginning.
// Build topology table.
BuildTopology();
HSAKMT_STATUS err =
hsaKmtRuntimeEnable(&_amdgpu_r_debug, core::Runtime::runtime_singleton_->flag().debug());
if ((err != HSAKMT_STATUS_SUCCESS) && (err != HSAKMT_STATUS_NOT_SUPPORTED)) return false;
HSAuint32 caps_mask;
hsaKmtGetRuntimeCapabilities(&caps_mask);
core::Runtime::runtime_singleton_->KfdVersion(err != HSAKMT_STATUS_NOT_SUPPORTED,
!!(caps_mask & HSA_RUNTIME_ENABLE_CAPS_SUPPORTS_CORE_DUMP_MASK));
HSAKMT_STATUS err = hsaKmtRuntimeEnable(
&_amdgpu_r_debug, core::Runtime::runtime_singleton_->flag().debug());
if ((err != HSAKMT_STATUS_SUCCESS) && (err != HSAKMT_STATUS_NOT_SUPPORTED))
return false;
HSAuint32 caps_mask;
hsaKmtGetRuntimeCapabilities(&caps_mask);
core::Runtime::runtime_singleton_->KfdVersion(
err != HSAKMT_STATUS_NOT_SUPPORTED,
!!(caps_mask & HSA_RUNTIME_ENABLE_CAPS_SUPPORTS_CORE_DUMP_MASK));
kfd.Dismiss();
}
if (aie_found) {
DiscoverAie();
}
kfd.Dismiss();
return true;
}
@@ -49,17 +49,18 @@
#include <map>
#include <vector>
#include "core/inc/runtime.h"
#include "core/inc/agent.h"
#include "core/inc/amd_aie_agent.h"
#include "core/inc/amd_cpu_agent.h"
#include "core/inc/amd_gpu_agent.h"
#include "core/inc/amd_memory_region.h"
#include "core/inc/signal.h"
#include "core/inc/default_signal.h"
#include "core/inc/exceptions.h"
#include "core/inc/intercept_queue.h"
#include "core/inc/interrupt_signal.h"
#include "core/inc/ipc_signal.h"
#include "core/inc/intercept_queue.h"
#include "core/inc/exceptions.h"
#include "core/inc/runtime.h"
#include "core/inc/signal.h"
namespace rocr {
@@ -741,18 +742,29 @@ hsa_status_t hsa_amd_agent_iterate_memory_pools(
const core::Agent* agent = core::Agent::Convert(agent_handle);
IS_VALID(agent);
if (agent->device_type() == core::Agent::kAmdCpuDevice) {
return reinterpret_cast<const AMD::CpuAgent*>(agent)->VisitRegion(
false, reinterpret_cast<hsa_status_t (*)(hsa_region_t memory_pool,
void* data)>(callback),
switch (agent->device_type()) {
case core::Agent::kAmdCpuDevice:
return reinterpret_cast<const AMD::CpuAgent *>(agent)->VisitRegion(
false,
reinterpret_cast<hsa_status_t (*)(hsa_region_t memory_pool,
void *data)>(callback),
data);
case core::Agent::kAmdAieDevice:
return reinterpret_cast<const AMD::AieAgent *>(agent)->VisitRegion(
false,
reinterpret_cast<hsa_status_t (*)(hsa_region_t memory_pool,
void *data)>(callback),
data);
case core::Agent::kAmdGpuDevice:
return reinterpret_cast<const AMD::GpuAgentInt *>(agent)->VisitRegion(
false,
reinterpret_cast<hsa_status_t (*)(hsa_region_t memory_pool,
void *data)>(callback),
data);
default:
return HSA_STATUS_ERROR_INVALID_AGENT;
}
return reinterpret_cast<const AMD::GpuAgentInt*>(agent)->VisitRegion(
false,
reinterpret_cast<hsa_status_t (*)(hsa_region_t memory_pool, void* data)>(
callback),
data);
CATCH;
}
+8 -2
View File
@@ -182,7 +182,7 @@ void Runtime::RegisterAgent(Agent* agent, bool Enabled) {
// Record the agent in the node-to-agent reverse lookup table.
agents_by_node_[agent->node_id()].push_back(agent);
// Process agent as a cpu or gpu device.
// Process agent as a CPU, GPU, or AIE device.
if (agent->device_type() == Agent::DeviceType::kAmdCpuDevice) {
cpu_agents_.push_back(agent);
@@ -239,6 +239,8 @@ void Runtime::RegisterAgent(Agent* agent, bool Enabled) {
} else {
disabled_gpu_agents_.push_back(agent);
}
} else if (agent->device_type() == Agent::DeviceType::kAmdAieDevice) {
aie_agents_.push_back(agent);
}
}
@@ -256,6 +258,9 @@ void Runtime::DestroyAgents() {
std::for_each(cpu_agents_.begin(), cpu_agents_.end(), DeleteObject());
cpu_agents_.clear();
std::for_each(aie_agents_.begin(), aie_agents_.end(), DeleteObject());
aie_agents_.clear();
region_gpu_ = NULL;
system_regions_fine_.clear();
@@ -296,7 +301,8 @@ hsa_status_t Runtime::IterateAgent(hsa_status_t (*callback)(hsa_agent_t agent,
void* data) {
AMD::callback_t<decltype(callback)> call(callback);
std::vector<core::Agent*>* agent_lists[2] = {&cpu_agents_, &gpu_agents_};
std::vector<core::Agent *> *agent_lists[3] = {&cpu_agents_, &gpu_agents_,
&aie_agents_};
for (std::vector<core::Agent*>* agent_list : agent_lists) {
for (size_t i = 0; i < agent_list->size(); ++i) {
hsa_agent_t agent = Agent::Convert(agent_list->at(i));
+16 -12
View File
@@ -802,18 +802,22 @@ typedef enum {
* @brief Hardware device type.
*/
typedef enum {
/**
* CPU device.
*/
HSA_DEVICE_TYPE_CPU = 0,
/**
* GPU device.
*/
HSA_DEVICE_TYPE_GPU = 1,
/**
* DSP device.
*/
HSA_DEVICE_TYPE_DSP = 2
/**
* CPU device.
*/
HSA_DEVICE_TYPE_CPU = 0,
/**
* GPU device.
*/
HSA_DEVICE_TYPE_GPU = 1,
/**
* DSP device.
*/
HSA_DEVICE_TYPE_DSP = 2,
/**
* AI Engine (AIE) device.
*/
HSA_DEVICE_TYPE_AIE = 3
} hsa_device_type_t;
/**