rocr/aie: Bundling XDNA BOs and addresses, adding cleanup guard in case of error
This commit is contained in:
committed by
Papadopoulos, Yiannis
parent
f4e1c9b0ba
commit
e55503e7f8
@@ -64,8 +64,11 @@ static_assert((sizeof(core::ShareableHandle::handle) >= sizeof(uint32_t)) &&
|
||||
(alignof(core::ShareableHandle::handle) >= alignof(uint32_t)),
|
||||
"ShareableHandle cannot store a XDNA handle");
|
||||
|
||||
// Index where the operand addresses start in a command.
|
||||
constexpr uint32_t operand_starting_index = 5;
|
||||
|
||||
XdnaDriver::XdnaDriver(std::string devnode_name)
|
||||
: core::Driver(core::DriverType::XDNA, devnode_name) {}
|
||||
: core::Driver(core::DriverType::XDNA, std::move(devnode_name)) {}
|
||||
|
||||
hsa_status_t XdnaDriver::DiscoverDriver(std::unique_ptr<core::Driver>& driver) {
|
||||
const int max_minor_num(64);
|
||||
@@ -108,7 +111,6 @@ hsa_status_t XdnaDriver::QueryKernelModeDriver(core::DriverQuery query) {
|
||||
default:
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t XdnaDriver::Open() {
|
||||
@@ -155,13 +157,13 @@ hsa_status_t XdnaDriver::GetAgentProperties(core::Agent &agent) const {
|
||||
return HSA_STATUS_ERROR_INVALID_AGENT;
|
||||
}
|
||||
|
||||
auto &aie_agent(static_cast<AieAgent &>(agent));
|
||||
auto& aie_agent = static_cast<AieAgent&>(agent);
|
||||
|
||||
amdxdna_drm_query_aie_metadata aie_metadata{0};
|
||||
amdxdna_drm_get_info get_info_args{
|
||||
.param = DRM_AMDXDNA_QUERY_AIE_METADATA,
|
||||
.buffer_size = sizeof(aie_metadata),
|
||||
.buffer = reinterpret_cast<uintptr_t>(&aie_metadata)};
|
||||
amdxdna_drm_query_aie_metadata aie_metadata = {};
|
||||
amdxdna_drm_get_info get_info_args = {};
|
||||
get_info_args.param = DRM_AMDXDNA_QUERY_AIE_METADATA;
|
||||
get_info_args.buffer_size = sizeof(aie_metadata);
|
||||
get_info_args.buffer = reinterpret_cast<uintptr_t>(&aie_metadata);
|
||||
|
||||
if (ioctl(fd_, DRM_IOCTL_AMDXDNA_GET_INFO, &get_info_args) < 0) {
|
||||
return HSA_STATUS_ERROR;
|
||||
@@ -190,19 +192,14 @@ hsa_status_t
|
||||
XdnaDriver::AllocateMemory(const core::MemoryRegion &mem_region,
|
||||
core::MemoryRegion::AllocateFlags alloc_flags,
|
||||
void **mem, size_t size, uint32_t node_id) {
|
||||
const MemoryRegion &m_region(static_cast<const MemoryRegion &>(mem_region));
|
||||
|
||||
amdxdna_drm_create_bo create_bo_args{0};
|
||||
create_bo_args.size = size;
|
||||
|
||||
amdxdna_drm_get_bo_info get_bo_info_args{0};
|
||||
drm_gem_close close_bo_args{0};
|
||||
void *mapped_mem(nullptr);
|
||||
const MemoryRegion& m_region = static_cast<const MemoryRegion&>(mem_region);
|
||||
|
||||
if (!m_region.IsSystem()) {
|
||||
return HSA_STATUS_ERROR_INVALID_REGION;
|
||||
}
|
||||
|
||||
amdxdna_drm_create_bo create_bo_args = {};
|
||||
create_bo_args.size = size;
|
||||
const bool use_bo_shmem = !m_region.IsDeviceSVM();
|
||||
if (use_bo_shmem) {
|
||||
create_bo_args.type = AMDXDNA_BO_SHMEM;
|
||||
@@ -214,14 +211,21 @@ XdnaDriver::AllocateMemory(const core::MemoryRegion &mem_region,
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
get_bo_info_args.handle = create_bo_args.handle;
|
||||
// In case we need to close this BO to avoid leaks due to some error after
|
||||
// creation.
|
||||
close_bo_args.handle = create_bo_args.handle;
|
||||
BOHandle bo_handle;
|
||||
bo_handle.handle = create_bo_args.handle;
|
||||
bo_handle.size = size;
|
||||
|
||||
if (ioctl(fd_, DRM_IOCTL_AMDXDNA_GET_BO_INFO, &get_bo_info_args) < 0) {
|
||||
// Close the BO in the case we can't get info about it.
|
||||
// Close the BO in case of error.
|
||||
MAKE_NAMED_SCOPE_GUARD(bo_guard, [&] {
|
||||
munmap(bo_handle.vaddr, bo_handle.size);
|
||||
drm_gem_close close_bo_args = {};
|
||||
close_bo_args.handle = bo_handle.handle;
|
||||
ioctl(fd_, DRM_IOCTL_GEM_CLOSE, &close_bo_args);
|
||||
});
|
||||
|
||||
amdxdna_drm_get_bo_info get_bo_info_args = {};
|
||||
get_bo_info_args.handle = create_bo_args.handle;
|
||||
if (ioctl(fd_, DRM_IOCTL_AMDXDNA_GET_BO_INFO, &get_bo_info_args) < 0) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
@@ -229,25 +233,25 @@ XdnaDriver::AllocateMemory(const core::MemoryRegion &mem_region,
|
||||
/// to VA memory addresses. Once we can support the separate VMEM call to
|
||||
/// map handles we can fix this.
|
||||
if (use_bo_shmem) {
|
||||
mapped_mem = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd_,
|
||||
get_bo_info_args.map_offset);
|
||||
if (mapped_mem == MAP_FAILED) {
|
||||
// Close the BO in the case when a mapping fails and we got a BO handle.
|
||||
ioctl(fd_, DRM_IOCTL_GEM_CLOSE, &close_bo_args);
|
||||
bo_handle.vaddr =
|
||||
mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd_, get_bo_info_args.map_offset);
|
||||
if (bo_handle.vaddr == MAP_FAILED) {
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
} else {
|
||||
mapped_mem = reinterpret_cast<void *>(get_bo_info_args.vaddr);
|
||||
bo_handle.vaddr = reinterpret_cast<void*>(get_bo_info_args.vaddr);
|
||||
}
|
||||
|
||||
if (alloc_flags & core::MemoryRegion::AllocateMemoryOnly) {
|
||||
*mem = reinterpret_cast<void *>(create_bo_args.handle);
|
||||
} else {
|
||||
*mem = mapped_mem;
|
||||
*mem = bo_handle.vaddr;
|
||||
}
|
||||
|
||||
vmem_handle_mappings.emplace(create_bo_args.handle, mapped_mem);
|
||||
vmem_addr_mappings.emplace(mapped_mem, create_bo_args.handle);
|
||||
vmem_handle_mappings.emplace(bo_handle.handle, bo_handle.vaddr);
|
||||
vmem_addr_mappings.emplace(bo_handle.vaddr, bo_handle);
|
||||
|
||||
bo_guard.Dismiss();
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
@@ -256,7 +260,7 @@ hsa_status_t XdnaDriver::FreeMemory(void *mem, size_t size) {
|
||||
auto it = vmem_addr_mappings.find(mem);
|
||||
if (it == vmem_addr_mappings.end()) return HSA_STATUS_ERROR_INVALID_ALLOCATION;
|
||||
|
||||
auto handle = it->second;
|
||||
auto handle = it->second.handle;
|
||||
|
||||
drm_gem_close close_args = {};
|
||||
close_args.handle = handle;
|
||||
@@ -299,12 +303,13 @@ hsa_status_t XdnaDriver::DestroyQueue(core::Queue &queue) const {
|
||||
return HSA_STATUS_ERROR_INVALID_QUEUE;
|
||||
}
|
||||
|
||||
auto &aie_queue(static_cast<AieAqlQueue &>(queue));
|
||||
amdxdna_drm_destroy_hwctx destroy_hwctx_args{.handle =
|
||||
aie_queue.GetHwCtxHandle()};
|
||||
|
||||
if (ioctl(fd_, DRM_IOCTL_AMDXDNA_DESTROY_HWCTX, &destroy_hwctx_args) < 0) {
|
||||
return HSA_STATUS_ERROR;
|
||||
auto& aie_queue = static_cast<AieAqlQueue&>(queue);
|
||||
if (aie_queue.GetHwCtxHandle() != AMDXDNA_INVALID_BO_HANDLE) {
|
||||
amdxdna_drm_destroy_hwctx destroy_hwctx_args = {};
|
||||
destroy_hwctx_args.handle = aie_queue.GetHwCtxHandle();
|
||||
if (ioctl(fd_, DRM_IOCTL_AMDXDNA_DESTROY_HWCTX, &destroy_hwctx_args) < 0) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
@@ -385,72 +390,71 @@ hsa_status_t XdnaDriver::InitDeviceHeap() {
|
||||
amdxdna_drm_create_bo create_bo_args = {};
|
||||
create_bo_args.size = dev_heap_size;
|
||||
create_bo_args.type = AMDXDNA_BO_DEV_HEAP;
|
||||
|
||||
amdxdna_drm_get_bo_info get_bo_info_args{0};
|
||||
drm_gem_close close_bo_args{0};
|
||||
|
||||
if (ioctl(fd_, DRM_IOCTL_AMDXDNA_CREATE_BO, &create_bo_args) < 0) {
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
get_bo_info_args.handle = create_bo_args.handle;
|
||||
// In case we need to close this BO to avoid leaks due to some error after
|
||||
// creation.
|
||||
close_bo_args.handle = create_bo_args.handle;
|
||||
dev_heap_handle.handle = create_bo_args.handle;
|
||||
|
||||
if (ioctl(fd_, DRM_IOCTL_AMDXDNA_GET_BO_INFO, &get_bo_info_args) < 0) {
|
||||
// Close the BO in the case we can't get info about it.
|
||||
// Unmap memory and close the BO in case of error.
|
||||
MAKE_NAMED_SCOPE_GUARD(dev_heap_handle_guard, [&] {
|
||||
munmap(dev_heap_handle.vaddr, dev_heap_handle.size);
|
||||
drm_gem_close close_bo_args = {};
|
||||
close_bo_args.handle = dev_heap_handle.handle;
|
||||
ioctl(fd_, DRM_IOCTL_GEM_CLOSE, &close_bo_args);
|
||||
dev_heap_handle = BOHandle{};
|
||||
});
|
||||
|
||||
amdxdna_drm_get_bo_info get_bo_info_args = {};
|
||||
get_bo_info_args.handle = dev_heap_handle.handle;
|
||||
if (ioctl(fd_, DRM_IOCTL_AMDXDNA_GET_BO_INFO, &get_bo_info_args) < 0) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
dev_heap_parent = mmap(0, dev_heap_align * 2 - 1, PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
|
||||
if (dev_heap_parent == MAP_FAILED) {
|
||||
// Close the BO in the case when a mapping fails and we got a BO handle.
|
||||
ioctl(fd_, DRM_IOCTL_GEM_CLOSE, &close_bo_args);
|
||||
dev_heap_parent = nullptr;
|
||||
const size_t size = dev_heap_align * 2 - 1;
|
||||
dev_heap_handle.vaddr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
if (dev_heap_handle.vaddr == MAP_FAILED) {
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
dev_heap_handle.size = size;
|
||||
|
||||
void *addr_aligned(reinterpret_cast<void *>(
|
||||
AlignUp(reinterpret_cast<uintptr_t>(dev_heap_parent), dev_heap_align)));
|
||||
void* addr_aligned = reinterpret_cast<void*>(
|
||||
AlignUp(reinterpret_cast<uintptr_t>(dev_heap_handle.vaddr), dev_heap_align));
|
||||
|
||||
dev_heap_aligned =
|
||||
mmap(addr_aligned, dev_heap_size, PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED | MAP_FIXED, fd_, get_bo_info_args.map_offset);
|
||||
|
||||
if (dev_heap_aligned == MAP_FAILED) {
|
||||
// Close the BO in the case when a mapping fails and we got a BO handle.
|
||||
ioctl(fd_, DRM_IOCTL_GEM_CLOSE, &close_bo_args);
|
||||
// Unmap the dev_heap_parent.
|
||||
dev_heap_aligned = nullptr;
|
||||
FreeDeviceHeap();
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
dev_heap_handle_guard.Dismiss();
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
std::unordered_map<uint32_t, void*>& XdnaDriver::GetHandleMappings() {
|
||||
return vmem_handle_mappings;
|
||||
}
|
||||
|
||||
std::unordered_map<void*, uint32_t>& XdnaDriver::GetAddrMappings() { return vmem_addr_mappings; }
|
||||
|
||||
hsa_status_t XdnaDriver::FreeDeviceHeap() {
|
||||
if (dev_heap_parent) {
|
||||
if (munmap(dev_heap_parent, dev_heap_align * 2 - 1) != 0) return HSA_STATUS_ERROR;
|
||||
dev_heap_parent = nullptr;
|
||||
}
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
|
||||
if (dev_heap_aligned) {
|
||||
if (munmap(dev_heap_aligned, dev_heap_size) != 0) return HSA_STATUS_ERROR;
|
||||
if (munmap(dev_heap_aligned, dev_heap_size) != 0) {
|
||||
status = HSA_STATUS_ERROR;
|
||||
}
|
||||
dev_heap_aligned = nullptr;
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
if (dev_heap_handle.IsValid()) {
|
||||
if (munmap(dev_heap_handle.vaddr, dev_heap_handle.size) != 0) {
|
||||
status = HSA_STATUS_ERROR;
|
||||
}
|
||||
drm_gem_close close_bo_args = {};
|
||||
close_bo_args.handle = dev_heap_handle.handle;
|
||||
ioctl(fd_, DRM_IOCTL_GEM_CLOSE, &close_bo_args);
|
||||
dev_heap_handle = BOHandle{};
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
hsa_status_t XdnaDriver::SyncBos(const std::vector<uint64_t>& bo_addrs,
|
||||
@@ -479,26 +483,21 @@ hsa_status_t XdnaDriver::ExecCmdAndWait(amdxdna_drm_exec_cmd* exec_cmd, uint32_t
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t XdnaDriver::RegisterCmdBOs(
|
||||
uint32_t count, std::vector<uint32_t>& bo_args, std::vector<uint32_t>& bo_sizes,
|
||||
std::vector<uint64_t>& bo_addrs, hsa_amd_aie_ert_start_kernel_data_t* cmd_pkt_payload,
|
||||
const std::unordered_map<void*, uint32_t>& vmem_addr_mappings) {
|
||||
// This is the index where the operand addresses start in a command
|
||||
const int operand_starting_index = 5;
|
||||
|
||||
// Counting the number of operands in the command payload.
|
||||
uint32_t num_operands = GetOperandCount(count);
|
||||
|
||||
uint64_t instr_addr = Concat<uint64_t, uint32_t>(
|
||||
cmd_pkt_payload->data[CMD_PKT_PAYLOAD_INSTRUCTION_SEQUENCE_IDX + 1],
|
||||
cmd_pkt_payload->data[CMD_PKT_PAYLOAD_INSTRUCTION_SEQUENCE_IDX]);
|
||||
auto instr_handle = vmem_addr_mappings.find(reinterpret_cast<void*>(instr_addr));
|
||||
|
||||
if (instr_handle == vmem_addr_mappings.end()) return HSA_STATUS_ERROR;
|
||||
hsa_status_t XdnaDriver::RegisterCmdBOs(uint32_t count, std::vector<uint32_t>& bo_args,
|
||||
std::vector<uint32_t>& bo_sizes,
|
||||
std::vector<uint64_t>& bo_addrs,
|
||||
hsa_amd_aie_ert_start_kernel_data_t* cmd_pkt_payload) {
|
||||
const uint64_t instr_addr =
|
||||
Concat<uint64_t>(cmd_pkt_payload->data[CMD_PKT_PAYLOAD_INSTRUCTION_SEQUENCE_IDX + 1],
|
||||
cmd_pkt_payload->data[CMD_PKT_PAYLOAD_INSTRUCTION_SEQUENCE_IDX]);
|
||||
auto instr_bo_handle = FindBOHandle(reinterpret_cast<void*>(instr_addr));
|
||||
if (!instr_bo_handle.IsValid()) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
// Keep track of the handles and addresses before we submit the packet
|
||||
bo_args.push_back(instr_handle->second);
|
||||
bo_addrs.push_back(instr_addr);
|
||||
bo_args.push_back(instr_bo_handle.handle);
|
||||
bo_addrs.push_back(reinterpret_cast<uint64_t>(instr_bo_handle.vaddr));
|
||||
|
||||
// Adding the instruction sequence size. The packet contains the number of
|
||||
// instructions.
|
||||
@@ -510,14 +509,17 @@ hsa_status_t XdnaDriver::RegisterCmdBOs(
|
||||
// addresses and turning the addresses into handles. The starting index of
|
||||
// the operands in a command is `operand_starting_index` and the fields
|
||||
// are 32-bits we need to iterate over every two
|
||||
for (int operand_iter = 0; operand_iter < num_operands; operand_iter++) {
|
||||
uint32_t operand_index = operand_starting_index + 2 * operand_iter;
|
||||
uint64_t operand_addr = Concat<uint64_t, uint32_t>(cmd_pkt_payload->data[operand_index + 1],
|
||||
cmd_pkt_payload->data[operand_index]);
|
||||
auto operand_handle = vmem_addr_mappings.find(reinterpret_cast<void*>(operand_addr));
|
||||
if (operand_handle == vmem_addr_mappings.end()) return HSA_STATUS_ERROR;
|
||||
bo_args.push_back(operand_handle->second);
|
||||
bo_addrs.push_back(operand_addr);
|
||||
const uint32_t num_operands = GetOperandCount(count);
|
||||
for (uint32_t operand_iter = 0; operand_iter < num_operands; operand_iter++) {
|
||||
const uint32_t operand_index = operand_starting_index + 2 * operand_iter;
|
||||
const uint64_t operand_addr = Concat<uint64_t>(cmd_pkt_payload->data[operand_index + 1],
|
||||
cmd_pkt_payload->data[operand_index]);
|
||||
auto operand_bo_handle = FindBOHandle(reinterpret_cast<void*>(operand_addr));
|
||||
if (!operand_bo_handle.IsValid()) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
bo_args.push_back(operand_bo_handle.handle);
|
||||
bo_addrs.push_back(reinterpret_cast<uint64_t>(operand_bo_handle.vaddr));
|
||||
}
|
||||
|
||||
// Going through all of the operands in the command, keeping track of
|
||||
@@ -575,7 +577,7 @@ hsa_status_t XdnaDriver::SubmitCmdChain(hsa_amd_aie_ert_packet_t* first_pkt, uin
|
||||
cmds.reserve(num_pkts);
|
||||
|
||||
// Iterating over all the contiguous HSA_AMD_AIE_ERT_CMD_CHAIN packets
|
||||
for (int pkt_iter = 0; pkt_iter < num_pkts; pkt_iter++) {
|
||||
for (uint32_t pkt_iter = 0; pkt_iter < num_pkts; pkt_iter++) {
|
||||
// Getting the current command packet
|
||||
hsa_amd_aie_ert_packet_t* pkt = first_pkt + pkt_iter;
|
||||
hsa_amd_aie_ert_start_kernel_data_t* cmd_pkt_payload =
|
||||
@@ -583,8 +585,8 @@ hsa_status_t XdnaDriver::SubmitCmdChain(hsa_amd_aie_ert_packet_t* first_pkt, uin
|
||||
|
||||
// Add the handles for all of the BOs to bo_args as well as rewrite
|
||||
// the command payload handles to contain the actual virtual addresses
|
||||
if (RegisterCmdBOs(pkt->count, bo_args, bo_sizes, bo_addrs, cmd_pkt_payload,
|
||||
vmem_addr_mappings) != HSA_STATUS_SUCCESS)
|
||||
if (RegisterCmdBOs(pkt->count, bo_args, bo_sizes, bo_addrs, cmd_pkt_payload) !=
|
||||
HSA_STATUS_SUCCESS)
|
||||
return HSA_STATUS_ERROR;
|
||||
|
||||
// Creating a packet that contains the command to execute the kernel
|
||||
@@ -694,5 +696,36 @@ hsa_status_t XdnaDriver::IsModelEnabled(bool* enable) const {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
XdnaDriver::BOHandle XdnaDriver::FindBOHandle(void* mem) const {
|
||||
auto it = vmem_addr_mappings.lower_bound(mem);
|
||||
if (it == vmem_addr_mappings.cend()) {
|
||||
// Exact address not found or is larger than the largest address.
|
||||
return BOHandle{};
|
||||
}
|
||||
|
||||
if (it->first == mem) {
|
||||
// Exact address found.
|
||||
return it->second;
|
||||
}
|
||||
|
||||
if (it == vmem_addr_mappings.cbegin()) {
|
||||
// Address is smaller than the smallest registered address.
|
||||
return BOHandle{};
|
||||
}
|
||||
|
||||
// Go back one element, since lower_bound returns an iterator to the element that is equal or
|
||||
// greater.
|
||||
--it;
|
||||
|
||||
assert(it->first < mem);
|
||||
if (mem >= (static_cast<char*>(it->first) + it->second.size)) {
|
||||
// Address is not from this allocation.
|
||||
return BOHandle{};
|
||||
}
|
||||
|
||||
return it->second;
|
||||
}
|
||||
|
||||
|
||||
} // namespace AMD
|
||||
} // namespace rocr
|
||||
|
||||
@@ -42,6 +42,7 @@
|
||||
#ifndef HSA_RUNTIME_CORE_INC_AMD_XDNA_DRIVER_H_
|
||||
#define HSA_RUNTIME_CORE_INC_AMD_XDNA_DRIVER_H_
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
|
||||
@@ -127,9 +128,23 @@ inline uint32_t GetOperandCount(uint32_t arg_count) {
|
||||
}
|
||||
|
||||
class XdnaDriver final : public core::Driver {
|
||||
/// @brief BO handle information.
|
||||
struct BOHandle {
|
||||
/// Mapped address.
|
||||
void* vaddr = nullptr;
|
||||
/// Handle returned by xdna.
|
||||
uint32_t handle = AMDXDNA_INVALID_BO_HANDLE;
|
||||
/// Size in bytes.
|
||||
size_t size = 0;
|
||||
|
||||
constexpr BOHandle() = default;
|
||||
constexpr BOHandle(void* vaddr, uint32_t handle, size_t size)
|
||||
: vaddr{vaddr}, handle{handle}, size{size} {}
|
||||
constexpr bool IsValid() const { return handle != AMDXDNA_INVALID_BO_HANDLE; }
|
||||
};
|
||||
|
||||
public:
|
||||
XdnaDriver(std::string devnode_name);
|
||||
~XdnaDriver() = default;
|
||||
|
||||
static hsa_status_t DiscoverDriver(std::unique_ptr<core::Driver>& driver);
|
||||
|
||||
@@ -143,9 +158,6 @@ public:
|
||||
hsa_status_t ShutDown() override;
|
||||
hsa_status_t QueryKernelModeDriver(core::DriverQuery query) override;
|
||||
|
||||
std::unordered_map<uint32_t, void*>& GetHandleMappings();
|
||||
std::unordered_map<void*, uint32_t>& GetAddrMappings();
|
||||
|
||||
hsa_status_t Open() override;
|
||||
hsa_status_t Close() override;
|
||||
hsa_status_t GetSystemProperties(HsaSystemProperties& sys_props) const override;
|
||||
@@ -174,7 +186,7 @@ public:
|
||||
size_t size) override;
|
||||
hsa_status_t ReleaseShareableHandle(core::ShareableHandle &handle) override;
|
||||
|
||||
/// @brief Submits num_pkts packets in a command chain to the XDNA driver
|
||||
/// @brief Submits @p num_pkts packets in a command chain.
|
||||
hsa_status_t SubmitCmdChain(hsa_amd_aie_ert_packet_t* first_pkt, uint32_t num_pkts,
|
||||
uint32_t num_operands, uint32_t hw_ctx_handle);
|
||||
|
||||
@@ -187,7 +199,11 @@ public:
|
||||
hsa_status_t IsModelEnabled(bool* enable) const override;
|
||||
|
||||
private:
|
||||
/// @brief Finds the BO associated with the address.
|
||||
BOHandle FindBOHandle(void* mem) const;
|
||||
|
||||
hsa_status_t QueryDriverVersion();
|
||||
|
||||
/// @brief Allocate device accesible heap space.
|
||||
///
|
||||
/// Allocate and map a buffer object (BO) that the AIE device can access.
|
||||
@@ -210,8 +226,7 @@ public:
|
||||
/// @param cmd_pkt_payload A pointer to the payload of the command
|
||||
hsa_status_t RegisterCmdBOs(uint32_t count, std::vector<uint32_t>& bo_args,
|
||||
std::vector<uint32_t>& bo_sizes, std::vector<uint64_t>& bo_addrs,
|
||||
hsa_amd_aie_ert_start_kernel_data_t* cmd_pkt_payload,
|
||||
const std::unordered_map<void*, uint32_t>& vmem_addr_mappings);
|
||||
hsa_amd_aie_ert_start_kernel_data_t* cmd_pkt_payload);
|
||||
|
||||
/// @brief Syncs all BOs referenced in bo_args
|
||||
///
|
||||
@@ -230,17 +245,18 @@ public:
|
||||
/// driver handles requires a bit more refactoring. So rely on the XDNA driver
|
||||
/// to manage some of this for now.
|
||||
std::unordered_map<uint32_t, void *> vmem_handle_mappings;
|
||||
std::unordered_map<void*, uint32_t> vmem_addr_mappings;
|
||||
std::map<void*, BOHandle> vmem_addr_mappings;
|
||||
|
||||
/// @brief Virtual address range allocated for the device heap.
|
||||
///
|
||||
/// Allocate a large enough space so we can carve out the device heap in
|
||||
/// this range and ensure it is aligned to 64MB. Currently, npu1 supports
|
||||
/// 64MB device heap and it must be aligned to 64MB.
|
||||
void *dev_heap_parent = nullptr;
|
||||
BOHandle dev_heap_handle;
|
||||
|
||||
/// @brief The aligned device heap.
|
||||
void *dev_heap_aligned = nullptr;
|
||||
|
||||
static constexpr size_t dev_heap_size = 64 * 1024 * 1024;
|
||||
static constexpr size_t dev_heap_align = 64 * 1024 * 1024;
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user