b3c48cc68c
Extend hsa_amd_vmem_address_reserve/hsa_amd_vmem_address_reserve_align to support HSA_AMD_VMEM_ADDRESS_NO_REGISTER flag. This allocation can be used to reserve virtual address ranges that can later be used by hsa_amd_svm_attributes_set for SVM based memory allocations.
907 lines
33 KiB
C++
907 lines
33 KiB
C++
////////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// The University of Illinois/NCSA
|
|
// Open Source License (NCSA)
|
|
//
|
|
// Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved.
|
|
//
|
|
// Developed by:
|
|
//
|
|
// AMD Research and AMD HSA Software Development
|
|
//
|
|
// Advanced Micro Devices, Inc.
|
|
//
|
|
// www.amd.com
|
|
//
|
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
// of this software and associated documentation files (the "Software"), to
|
|
// deal with the Software without restriction, including without limitation
|
|
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
// and/or sell copies of the Software, and to permit persons to whom the
|
|
// Software is furnished to do so, subject to the following conditions:
|
|
//
|
|
// - Redistributions of source code must retain the above copyright notice,
|
|
// this list of conditions and the following disclaimers.
|
|
// - Redistributions in binary form must reproduce the above copyright
|
|
// notice, this list of conditions and the following disclaimers in
|
|
// the documentation and/or other materials provided with the distribution.
|
|
// - Neither the names of Advanced Micro Devices, Inc,
|
|
// nor the names of its contributors may be used to endorse or promote
|
|
// products derived from this Software without specific prior written
|
|
// permission.
|
|
//
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
|
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
// DEALINGS WITH THE SOFTWARE.
|
|
//
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
// HSA runtime C++ interface file.
|
|
|
|
#ifndef HSA_RUNTME_CORE_INC_RUNTIME_H_
|
|
#define HSA_RUNTME_CORE_INC_RUNTIME_H_
|
|
|
|
#include <vector>
|
|
#include <map>
|
|
#include <memory>
|
|
#include <tuple>
|
|
#include <utility>
|
|
#include <thread>
|
|
#include <sys/un.h>
|
|
|
|
#if defined(__linux__)
|
|
#include <xf86drm.h>
|
|
#include <amdgpu.h>
|
|
#endif
|
|
|
|
#include "core/inc/hsa_ext_interface.h"
|
|
#include "core/inc/hsa_internal.h"
|
|
#include "core/inc/hsa_ext_amd_impl.h"
|
|
|
|
#include "core/inc/agent.h"
|
|
#include "core/inc/driver.h"
|
|
#include "core/inc/exceptions.h"
|
|
#include "core/inc/interrupt_signal.h"
|
|
#include "core/inc/memory_region.h"
|
|
#include "core/inc/signal.h"
|
|
#include "core/inc/svm_profiler.h"
|
|
#include "core/inc/thunk_loader.h"
|
|
#include "core/util/flag.h"
|
|
#include "core/util/locks.h"
|
|
#include "core/util/os.h"
|
|
#include "core/util/utils.h"
|
|
|
|
#include "core/inc/amd_loader_context.hpp"
|
|
#include "core/inc/amd_hsa_code.hpp"
|
|
|
|
#if defined(__clang__)
|
|
#if __has_feature(address_sanitizer)
|
|
#define SANITIZER_AMDGPU 1
|
|
#endif
|
|
#endif
|
|
|
|
//---------------------------------------------------------------------------//
|
|
// Constants //
|
|
//---------------------------------------------------------------------------//
|
|
|
|
#define HSA_ARGUMENT_ALIGN_BYTES 16
|
|
#define HSA_QUEUE_ALIGN_BYTES 64
|
|
#define HSA_PACKET_ALIGN_BYTES 64
|
|
#define HSA_MAX_DEP_SIGNALS 5
|
|
|
|
//Avoids include
|
|
namespace rocr {
|
|
namespace AMD {
|
|
class MemoryRegion;
|
|
} // namespace amd
|
|
|
|
namespace core {
|
|
extern bool g_use_interrupt_wait;
|
|
extern bool g_use_mwaitx;
|
|
|
|
/// @brief Runtime class provides the following functions:
|
|
/// - open and close connection to kernel driver.
|
|
/// - load supported extension library (image and finalizer).
|
|
/// - load tools library.
|
|
/// - expose supported agents.
|
|
/// - allocate and free memory.
|
|
/// - memory copy and fill.
|
|
/// - grant access to memory (dgpu memory pool extension).
|
|
/// - maintain loader state.
|
|
/// - monitor asynchronous event from agent.
|
|
class Runtime {
|
|
friend class AMD::MemoryRegion;
|
|
public:
|
|
/// @brief Structure to describe connectivity between agents.
|
|
struct LinkInfo {
|
|
LinkInfo() : num_hop(0), rec_sdma_eng_id_mask(0), info{0} {}
|
|
|
|
uint32_t num_hop;
|
|
uint32_t rec_sdma_eng_id_mask;
|
|
hsa_amd_memory_pool_link_info_t info;
|
|
};
|
|
|
|
struct KfdVersion_t {
|
|
HsaVersionInfo version;
|
|
bool supports_exception_debugging;
|
|
bool supports_event_age;
|
|
bool supports_core_dump;
|
|
};
|
|
|
|
/// @brief Open connection to kernel driver and increment reference count.
|
|
static hsa_status_t Acquire();
|
|
|
|
/// @brief Decrement reference count and close connection to kernel driver.
|
|
static hsa_status_t Release();
|
|
|
|
/// @brief Checks if connection to kernel driver is opened.
|
|
/// @retval True if the connection to kernel driver is opened.
|
|
static bool IsOpen();
|
|
|
|
// @brief Callback handler for HW Exceptions.
|
|
static bool HwExceptionHandler(hsa_signal_value_t val, void* arg);
|
|
|
|
// @brief Callback handler for VM fault access.
|
|
static bool VMFaultHandler(hsa_signal_value_t val, void* arg);
|
|
|
|
// @brief Print known allocations near ptr.
|
|
static void PrintMemoryMapNear(void* ptr);
|
|
|
|
/// @brief Singleton object of the runtime.
|
|
static Runtime* runtime_singleton_;
|
|
|
|
/// @brief Insert agent into agent list ::agents_.
|
|
/// @param [in] agent Pointer to the agent object.
|
|
void RegisterAgent(Agent* agent, bool Enabled);
|
|
|
|
/// @brief Insert agent into the driver list.
|
|
/// @param [in] driver Unique pointer to the driver object.
|
|
void RegisterDriver(std::unique_ptr<Driver> driver);
|
|
|
|
/// @brief Delete all agent objects from ::agents_.
|
|
void DestroyAgents();
|
|
|
|
/// @brief Close and delete all agent driver objects from ::agent_drivers_.
|
|
void DestroyDrivers();
|
|
|
|
/// @brief Set the number of links connecting the agents in the platform.
|
|
void SetLinkCount(size_t num_link);
|
|
|
|
/// @brief Register link information connecting @p node_id_from and @p
|
|
/// node_id_to.
|
|
/// @param [in] node_id_from Node id of the source node.
|
|
/// @param [in] node_id_to Node id of the destination node.
|
|
/// @param [in] link_info The link information between source and destination
|
|
/// nodes.
|
|
void RegisterLinkInfo(uint32_t node_id_from, uint32_t node_id_to,
|
|
uint32_t num_hop, uint32_t rec_sdma_eng_id_mask,
|
|
hsa_amd_memory_pool_link_info_t& link_info);
|
|
|
|
/// @brief Query link information between two nodes.
|
|
/// @param [in] node_id_from Node id of the source node.
|
|
/// @param [in] node_id_to Node id of the destination node.
|
|
/// @retval The link information between source and destination nodes.
|
|
const LinkInfo GetLinkInfo(uint32_t node_id_from, uint32_t node_id_to);
|
|
|
|
/// @brief Invoke the user provided call back for each agent in the agent
|
|
/// list.
|
|
///
|
|
/// @param [in] callback User provided callback function.
|
|
/// @param [in] data User provided pointer as input for @p callback.
|
|
///
|
|
/// @retval ::HSA_STATUS_SUCCESS if the callback function for each traversed
|
|
/// agent returns ::HSA_STATUS_SUCCESS.
|
|
hsa_status_t IterateAgent(hsa_status_t (*callback)(hsa_agent_t agent,
|
|
void* data),
|
|
void* data);
|
|
|
|
/// @brief Allocate memory on a particular region.
|
|
///
|
|
/// @param [in] region Pointer to region object.
|
|
/// @param [in] size Allocation size in bytes.
|
|
/// @param [in] alloc_flags Modifiers to pass to MemoryRegion allocator.
|
|
/// @param [out] address Pointer to store the allocation result.
|
|
///
|
|
/// @retval ::HSA_STATUS_SUCCESS If allocation is successful.
|
|
hsa_status_t AllocateMemory(const MemoryRegion* region, size_t size,
|
|
MemoryRegion::AllocateFlags alloc_flags,
|
|
void** address, int agent_node_id = 0);
|
|
|
|
/// @brief Free memory previously allocated with AllocateMemory.
|
|
///
|
|
/// @param [in] ptr Address of the memory to be freed.
|
|
///
|
|
/// @retval ::HSA_STATUS_ERROR If @p ptr is not the address of previous
|
|
/// allocation via ::core::Runtime::AllocateMemory
|
|
/// @retval ::HSA_STATUS_SUCCESS if @p ptr is successfully released.
|
|
hsa_status_t FreeMemory(void* ptr);
|
|
|
|
hsa_status_t RegisterReleaseNotifier(void* ptr, hsa_amd_deallocation_callback_t callback,
|
|
void* user_data);
|
|
|
|
hsa_status_t DeregisterReleaseNotifier(void* ptr, hsa_amd_deallocation_callback_t callback);
|
|
|
|
/// @brief Blocking memory copy from src to dst.
|
|
///
|
|
/// @param [in] dst Memory address of the destination.
|
|
/// @param [in] src Memory address of the source.
|
|
/// @param [in] size Copy size in bytes.
|
|
///
|
|
/// @retval ::HSA_STATUS_SUCCESS if memory copy is successful and completed.
|
|
hsa_status_t CopyMemory(void* dst, const void* src, size_t size);
|
|
|
|
/// @brief Non-blocking memory copy from src to dst.
|
|
///
|
|
/// @details The memory copy will be performed after all signals in
|
|
/// @p dep_signals have value of 0. On completion @p completion_signal
|
|
/// will be decremented.
|
|
///
|
|
/// @param [in] dst Memory address of the destination.
|
|
/// @param [in] dst_agent Agent object associated with the destination. This
|
|
/// agent should be able to access the destination and source.
|
|
/// @param [in] src Memory address of the source.
|
|
/// @param [in] src_agent Agent object associated with the source. This
|
|
/// agent should be able to access the destination and source.
|
|
/// @param [in] size Copy size in bytes.
|
|
/// @param [in] dep_signals Array of signal dependency.
|
|
/// @param [in] completion_signal Completion signal object.
|
|
///
|
|
/// @retval ::HSA_STATUS_SUCCESS if copy command has been submitted
|
|
/// successfully to the agent DMA queue.
|
|
hsa_status_t CopyMemory(void* dst, core::Agent* dst_agent, const void* src,
|
|
core::Agent* src_agent, size_t size,
|
|
std::vector<core::Signal*>& dep_signals, core::Signal& completion_signal);
|
|
|
|
/// @brief Non-blocking memory copy from src to dst on engine_id.
|
|
///
|
|
/// @details All semantics and params are dentical to CopyMemory
|
|
/// with the exception of engine_id.
|
|
///
|
|
/// @param [in] engine_id Target engine to copy on.
|
|
///
|
|
/// @param [in] force_copy_on_sdma By default, a blit kernel copy is used
|
|
/// when dst_agent == src_agent. Setting this to true will force the copy
|
|
/// over SDMA1.
|
|
///
|
|
/// @retval ::HSA_STATUS_SUCCESS if copy command has been submitted
|
|
/// successfully to the agent DMA queue.
|
|
hsa_status_t CopyMemoryOnEngine(void* dst, core::Agent* dst_agent, const void* src,
|
|
core::Agent* src_agent, size_t size,
|
|
std::vector<core::Signal*>& dep_signals, core::Signal& completion_signal,
|
|
hsa_amd_sdma_engine_id_t engine_id, bool force_copy_on_sdma);
|
|
|
|
/// @brief Return SDMA availability status for copy direction
|
|
///
|
|
/// @param [in] dst_agent Destination agent.
|
|
/// @param [in] src_agent Source agent.
|
|
/// @param [out] engine_ids_mask Mask of engine_ids.
|
|
///
|
|
/// @retval HSA_STATUS_SUCCESS DMA engines are available
|
|
/// @retval HSA_STATUS_ERROR_OUT_OF_RESOURCES DMA engines are not available
|
|
hsa_status_t CopyMemoryStatus(core::Agent* dst_agent, core::Agent* src_agent,
|
|
uint32_t *engine_ids_mask);
|
|
|
|
/// @brief Get preferred SDMA engine for the copy direction
|
|
///
|
|
/// @param [in] dst_agent Destination agent.
|
|
/// @param [in] src_agent Source agent.
|
|
/// @param [out] recommended_ids_mask Mask of recommended_ids.
|
|
///
|
|
/// @retval HSA_STATUS_SUCCESS For mask returned
|
|
hsa_status_t GetPreferredEngine(core::Agent* dst_agent, core::Agent* src_agent,
|
|
uint32_t* recommended_ids_mask);
|
|
|
|
/// @brief Fill the first @p count of uint32_t in ptr with value.
|
|
///
|
|
/// @param [in] ptr Memory address to be filled.
|
|
/// @param [in] value The value/pattern that will be used to set @p ptr.
|
|
/// @param [in] count Number of uint32_t element to be set.
|
|
///
|
|
/// @retval ::HSA_STATUS_SUCCESS if memory fill is successful and completed.
|
|
hsa_status_t FillMemory(void* ptr, uint32_t value, size_t count);
|
|
|
|
/// @brief Set agents as the whitelist to access ptr.
|
|
///
|
|
/// @param [in] num_agents The number of agent handles in @p agents array.
|
|
/// @param [in] agents Agent handle array.
|
|
/// @param [in] ptr Pointer of memory previously allocated via
|
|
/// core::Runtime::AllocateMemory.
|
|
///
|
|
/// @retval ::HSA_STATUS_SUCCESS The whitelist has been configured
|
|
/// successfully and all agents in the @p agents could start accessing @p ptr.
|
|
hsa_status_t AllowAccess(uint32_t num_agents, const hsa_agent_t* agents,
|
|
const void* ptr);
|
|
|
|
/// @brief Query system information.
|
|
///
|
|
/// @param [in] attribute System info attribute to query.
|
|
/// @param [out] value Pointer to store the attribute value.
|
|
///
|
|
/// @retval HSA_STATUS_SUCCESS The attribute is valid and the @p value is
|
|
/// set.
|
|
hsa_status_t GetSystemInfo(hsa_system_info_t attribute, void* value);
|
|
|
|
/// @brief Register a callback function @p handler that is associated with
|
|
/// @p signal to asynchronous event monitor thread.
|
|
///
|
|
/// @param [in] signal Signal handle associated with @p handler.
|
|
/// @param [in] cond The condition to execute the @p handler.
|
|
/// @param [in] value The value to compare with @p signal value. If the
|
|
/// comparison satisfy @p cond, the @p handler will be called.
|
|
/// @param [in] arg Pointer to the argument that will be provided to @p
|
|
/// handler.
|
|
///
|
|
/// @retval ::HSA_STATUS_SUCCESS Registration is successful.
|
|
hsa_status_t SetAsyncSignalHandler(hsa_signal_t signal,
|
|
hsa_signal_condition_t cond,
|
|
hsa_signal_value_t value,
|
|
hsa_amd_signal_handler handler, void* arg);
|
|
|
|
hsa_status_t InteropMap(uint32_t num_agents, Agent** agents,
|
|
int interop_handle, uint32_t flags, size_t* size,
|
|
void** ptr, size_t* metadata_size,
|
|
const void** metadata);
|
|
|
|
hsa_status_t InteropUnmap(void* ptr);
|
|
|
|
struct PtrInfoBlockData {
|
|
void* base;
|
|
size_t length;
|
|
core::Agent* agentOwner;
|
|
};
|
|
|
|
hsa_status_t PtrInfo(const void* ptr, hsa_amd_pointer_info_t* info, void* (*alloc)(size_t),
|
|
uint32_t* num_agents_accessible, hsa_agent_t** accessible,
|
|
PtrInfoBlockData* block_info = nullptr);
|
|
|
|
hsa_status_t SetPtrInfoData(const void* ptr, void* userptr);
|
|
|
|
hsa_status_t IPCCreate(void* ptr, size_t len, hsa_amd_ipc_memory_t* handle);
|
|
|
|
hsa_status_t IPCAttach(const hsa_amd_ipc_memory_t* handle, size_t len, uint32_t num_agents,
|
|
Agent** mapping_agents, void** mapped_ptr);
|
|
|
|
hsa_status_t IPCDetach(void* ptr);
|
|
|
|
hsa_status_t SetSvmAttrib(void* ptr, size_t size, hsa_amd_svm_attribute_pair_t* attribute_list,
|
|
size_t attribute_count);
|
|
|
|
hsa_status_t GetSvmAttrib(void* ptr, size_t size, hsa_amd_svm_attribute_pair_t* attribute_list,
|
|
size_t attribute_count);
|
|
|
|
hsa_status_t SvmPrefetch(void* ptr, size_t size, hsa_agent_t agent, uint32_t num_dep_signals,
|
|
const hsa_signal_t* dep_signals, hsa_signal_t completion_signal);
|
|
|
|
hsa_status_t DmaBufExport(const void* ptr, size_t size, int* dmabuf,
|
|
uint64_t* offset, uint64_t flags);
|
|
|
|
hsa_status_t DmaBufClose(int dmabuf);
|
|
|
|
hsa_status_t VMemoryAddressReserve(void** ptr, size_t size, uint64_t address, uint64_t alignment, uint64_t flags);
|
|
|
|
hsa_status_t VMemoryAddressFree(void* ptr, size_t size);
|
|
|
|
hsa_status_t VMemoryHandleCreate(const MemoryRegion* region, size_t size,
|
|
MemoryRegion::AllocateFlags alloc_flags,
|
|
uint64_t flags, hsa_amd_vmem_alloc_handle_t* memoryHandle);
|
|
|
|
hsa_status_t VMemoryHandleRelease(hsa_amd_vmem_alloc_handle_t memoryHandle);
|
|
|
|
hsa_status_t VMemoryHandleMap(void* va, size_t size, size_t in_offset,
|
|
hsa_amd_vmem_alloc_handle_t memoryHandle, uint64_t flags);
|
|
|
|
hsa_status_t VMemoryHandleUnmap(void* va, size_t size);
|
|
|
|
hsa_status_t VMemorySetAccess(void* va, size_t size, const hsa_amd_memory_access_desc_t* desc,
|
|
size_t desc_cnt);
|
|
|
|
hsa_status_t VMemoryGetAccess(const void* va, hsa_access_permission_t* perms,
|
|
hsa_agent_t agent_handle);
|
|
|
|
hsa_status_t VMemoryExportShareableHandle(int* dmabuf_fd,
|
|
const hsa_amd_vmem_alloc_handle_t handle,
|
|
const uint64_t flags);
|
|
|
|
hsa_status_t VMemoryImportShareableHandle(const int dmabuf_fd,
|
|
hsa_amd_vmem_alloc_handle_t* handle);
|
|
|
|
hsa_status_t VMemoryRetainAllocHandle(hsa_amd_vmem_alloc_handle_t* memoryHandle, void* addr);
|
|
|
|
hsa_status_t VMemoryGetAllocPropertiesFromHandle(const hsa_amd_vmem_alloc_handle_t memoryHandle,
|
|
const core::MemoryRegion** mem_region,
|
|
hsa_amd_memory_type_t* type);
|
|
|
|
hsa_status_t EnableLogging(uint8_t* flags, void* file);
|
|
|
|
const std::vector<Agent*>& cpu_agents() { return cpu_agents_; }
|
|
|
|
const std::vector<Agent*>& gpu_agents() { return gpu_agents_; }
|
|
|
|
const std::vector<Agent *> &aie_agents() { return aie_agents_; }
|
|
|
|
const std::vector<Agent*>& disabled_gpu_agents() { return disabled_gpu_agents_; }
|
|
|
|
const std::vector<uint32_t>& gpu_ids() { return gpu_ids_; }
|
|
|
|
Agent* agent_by_gpuid(uint32_t gpuid) { return agents_by_gpuid_[gpuid]; }
|
|
|
|
Agent* region_gpu() { return region_gpu_; }
|
|
|
|
const std::vector<const MemoryRegion*>& system_regions_fine() const {
|
|
return system_regions_fine_;
|
|
}
|
|
|
|
const std::vector<const MemoryRegion*>& system_regions_coarse() const {
|
|
return system_regions_coarse_;
|
|
}
|
|
|
|
amd::hsa::loader::Loader* loader() { return loader_; }
|
|
|
|
amd::LoaderContext* loader_context() { return &loader_context_; }
|
|
|
|
amd::hsa::code::AmdHsaCodeManager* code_manager() { return &code_manager_; }
|
|
|
|
std::function<void*(size_t size, size_t align, MemoryRegion::AllocateFlags flags, int agent_node_id)>&
|
|
system_allocator() {
|
|
return system_allocator_;
|
|
}
|
|
|
|
std::function<void(void*)>& system_deallocator() {
|
|
return system_deallocator_;
|
|
}
|
|
|
|
const Flag& flag() const { return flag_; }
|
|
|
|
const ThunkLoader* thunkLoader() const { return thunkLoader_; }
|
|
|
|
ExtensionEntryPoints extensions_;
|
|
|
|
hsa_status_t SetCustomSystemEventHandler(hsa_amd_system_event_callback_t callback,
|
|
void* data);
|
|
|
|
hsa_status_t SetInternalQueueCreateNotifier(hsa_amd_runtime_queue_notifier callback,
|
|
void* user_data);
|
|
|
|
void InternalQueueCreateNotify(const hsa_queue_t* queue, hsa_agent_t agent);
|
|
|
|
SharedSignalPool_t* GetSharedSignalPool() { return &SharedSignalPool; }
|
|
|
|
InterruptSignal::EventPool* GetEventPool() { return &EventPool; }
|
|
|
|
uint64_t sys_clock_freq() const { return sys_clock_freq_; }
|
|
|
|
void KfdVersion(const HsaVersionInfo& version) {
|
|
kfd_version.version = version;
|
|
if (version.KernelInterfaceMajorVersion == 1 &&
|
|
version.KernelInterfaceMinorVersion >= 14)
|
|
kfd_version.supports_event_age = true;
|
|
}
|
|
|
|
void KfdVersion(bool exception_debugging, bool core_dump) {
|
|
kfd_version.supports_exception_debugging = exception_debugging;
|
|
kfd_version.supports_core_dump = core_dump;
|
|
}
|
|
|
|
KfdVersion_t KfdVersion() const { return kfd_version; }
|
|
|
|
bool VirtualMemApiSupported() const { return virtual_mem_api_supported_; }
|
|
bool XnackEnabled() const { return xnack_enabled_; }
|
|
void XnackEnabled(bool enable) { xnack_enabled_ = enable; }
|
|
|
|
Driver &AgentDriver(DriverType drv_type) {
|
|
auto is_drv_type = [&](const std::unique_ptr<Driver> &d) {
|
|
return d->kernel_driver_type_ == drv_type;
|
|
};
|
|
|
|
auto driver(std::find_if(agent_drivers_.begin(), agent_drivers_.end(),
|
|
is_drv_type));
|
|
|
|
if (driver == agent_drivers_.end()) {
|
|
throw AMD::hsa_exception(HSA_STATUS_ERROR_INVALID_ARGUMENT,
|
|
"Invalid agent device type, no driver found.");
|
|
}
|
|
|
|
return **driver;
|
|
}
|
|
|
|
std::vector<std::unique_ptr<Driver>>& AgentDrivers() { return agent_drivers_; }
|
|
|
|
protected:
|
|
static void AsyncEventsLoop(void*);
|
|
static void AsyncIPCSockServerConnLoop(void*);
|
|
|
|
struct AllocationRegion {
|
|
AllocationRegion()
|
|
: region(NULL),
|
|
size(0),
|
|
size_requested(0),
|
|
alloc_flags(core::MemoryRegion::AllocateNoFlags),
|
|
user_ptr(nullptr),
|
|
ldrm_bo(NULL) {}
|
|
AllocationRegion(const MemoryRegion* region_arg, size_t size_arg, size_t size_requested,
|
|
MemoryRegion::AllocateFlags alloc_flags)
|
|
: region(region_arg),
|
|
size(size_arg),
|
|
size_requested(size_requested),
|
|
alloc_flags(alloc_flags),
|
|
user_ptr(nullptr),
|
|
ldrm_bo(NULL) {}
|
|
|
|
struct notifier_t {
|
|
void* ptr;
|
|
AMD::callback_t<hsa_amd_deallocation_callback_t> callback;
|
|
void* user_data;
|
|
};
|
|
|
|
const MemoryRegion* region;
|
|
size_t size; /* actual size = align_up(size_requested, granularity) */
|
|
size_t size_requested; /* size requested by user */
|
|
MemoryRegion::AllocateFlags alloc_flags;
|
|
void* user_ptr;
|
|
std::unique_ptr<std::vector<notifier_t>> notifiers;
|
|
amdgpu_bo_handle ldrm_bo;
|
|
};
|
|
|
|
struct AsyncEventsControl {
|
|
AsyncEventsControl() : async_events_thread_(NULL) {}
|
|
void Shutdown();
|
|
|
|
hsa_signal_t wake;
|
|
os::Thread async_events_thread_;
|
|
HybridMutex lock;
|
|
bool exit;
|
|
};
|
|
|
|
struct AsyncEvents {
|
|
void PushBack(hsa_signal_t signal, hsa_signal_condition_t cond,
|
|
hsa_signal_value_t value, hsa_amd_signal_handler handler,
|
|
void* arg);
|
|
|
|
void CopyIndex(size_t dst, size_t src);
|
|
|
|
size_t Size();
|
|
|
|
void PopBack();
|
|
|
|
void Clear();
|
|
|
|
std::vector<hsa_signal_t> signal_;
|
|
std::vector<hsa_signal_condition_t> cond_;
|
|
std::vector<hsa_signal_value_t> value_;
|
|
std::vector<hsa_amd_signal_handler> handler_;
|
|
std::vector<HsaEvent*> hsa_events_; //!< A list of HSA events for KFD wait
|
|
std::vector<uint64_t> age_; //!< The age list for KFD wait
|
|
std::vector<void*> arg_;
|
|
};
|
|
|
|
struct PrefetchRange;
|
|
typedef std::map<uintptr_t, PrefetchRange> prefetch_map_t;
|
|
|
|
struct PrefetchOp {
|
|
void* base;
|
|
size_t size;
|
|
uint32_t node_id;
|
|
int remaining_deps;
|
|
hsa_signal_t completion;
|
|
std::vector<hsa_signal_t> dep_signals;
|
|
prefetch_map_t::iterator prefetch_map_entry;
|
|
};
|
|
|
|
struct PrefetchRange {
|
|
PrefetchRange() {}
|
|
PrefetchRange(size_t Bytes, PrefetchOp* Op) : bytes(Bytes), op(Op) {}
|
|
size_t bytes;
|
|
PrefetchOp* op;
|
|
prefetch_map_t::iterator prev;
|
|
prefetch_map_t::iterator next;
|
|
};
|
|
|
|
// Will be created before any user could call hsa_init but also could be
|
|
// destroyed before incorrectly written programs call hsa_shutdown.
|
|
static __forceinline KernelMutex& bootstrap_lock() {
|
|
// This allocation is meant to last until the last thread has exited.
|
|
// It is intentionally not freed.
|
|
static KernelMutex* bootstrap_lock_ = new KernelMutex;
|
|
return *bootstrap_lock_;
|
|
}
|
|
Runtime();
|
|
|
|
Runtime(const Runtime&);
|
|
|
|
Runtime& operator=(const Runtime&);
|
|
|
|
~Runtime() {}
|
|
|
|
/// @brief Open connection to kernel driver.
|
|
hsa_status_t Load();
|
|
|
|
/// @brief Close connection to kernel driver and cleanup resources.
|
|
void Unload();
|
|
|
|
/// @brief Dynamically load extension libraries (images, finalizer) and
|
|
/// call OnLoad method on each loaded library.
|
|
void LoadExtensions();
|
|
|
|
/// @brief Call OnUnload method on each extension library then close it.
|
|
void UnloadExtensions();
|
|
|
|
/// @brief Dynamically load tool libraries and call OnUnload method on each
|
|
/// loaded library.
|
|
void LoadTools();
|
|
|
|
/// @brief Call OnUnload method of each tool library.
|
|
void UnloadTools();
|
|
|
|
/// @brief Close tool libraries.
|
|
void CloseTools();
|
|
|
|
// @brief Binds Error handlers to this node.
|
|
void BindErrorHandlers();
|
|
|
|
// @brief Acquire snapshot of system event handlers.
|
|
// Returns a copy to avoid holding a lock during callbacks.
|
|
std::vector<std::pair<AMD::callback_t<hsa_amd_system_event_callback_t>, void*>>
|
|
GetSystemEventHandlers();
|
|
|
|
/// @brief Get the index of ::link_matrix_.
|
|
/// @param [in] node_id_from Node id of the source node.
|
|
/// @param [in] node_id_to Node id of the destination node.
|
|
/// @retval Index in ::link_matrix_.
|
|
uint32_t GetIndexLinkInfo(uint32_t node_id_from, uint32_t node_id_to);
|
|
|
|
/// @brief Get most recently issued SVM prefetch agent for the range in question.
|
|
Agent* GetSVMPrefetchAgent(void* ptr, size_t size);
|
|
|
|
/// @brief Get the highest used node id.
|
|
uint32_t max_node_id() const { return agents_by_node_.rbegin()->first; }
|
|
|
|
// Mutex object to protect multithreaded access to ::allocation_map_.
|
|
// Also ensures atomicity of pointer info queries by interlocking
|
|
// KFD map/unmap, register/unregister, and access to hsaKmtQueryPointerInfo
|
|
// registered & mapped arrays.
|
|
KernelSharedMutex memory_lock_;
|
|
|
|
// Array containing driver interfaces for compatible agent kernel-mode
|
|
// drivers. Currently supports AIE agents.
|
|
std::vector<std::unique_ptr<Driver>> agent_drivers_;
|
|
|
|
// Array containing tools library handles.
|
|
std::vector<os::LibHandle> tool_libs_;
|
|
|
|
// Agent list containing all CPU agents in the platform.
|
|
std::vector<Agent*> cpu_agents_;
|
|
|
|
// Agent list containing all compatible GPU agents in the platform.
|
|
std::vector<Agent*> gpu_agents_;
|
|
|
|
// Agent list containing all compatible AIE agents in the platform.
|
|
std::vector<Agent *> aie_agents_;
|
|
|
|
// Agent list containing incompletely initialized GPU agents not to be used by the process.
|
|
std::vector<Agent*> disabled_gpu_agents_;
|
|
|
|
// Agent map containing all agents indexed by their KFD node IDs.
|
|
std::map<uint32_t, std::vector<Agent*> > agents_by_node_;
|
|
|
|
// Agent map containing all agents indexed by their KFD gpuid.
|
|
std::map<uint32_t, Agent*> agents_by_gpuid_;
|
|
|
|
// Agent list containing all compatible gpu agent ids in the platform.
|
|
std::vector<uint32_t> gpu_ids_;
|
|
|
|
// List of all fine grain system memory region in the platform.
|
|
std::vector<const MemoryRegion*> system_regions_fine_;
|
|
|
|
// List of all coarse grain system memory region in the platform.
|
|
std::vector<const MemoryRegion*> system_regions_coarse_;
|
|
|
|
// Matrix of IO link.
|
|
std::vector<LinkInfo> link_matrix_;
|
|
|
|
// Loader instance.
|
|
amd::hsa::loader::Loader* loader_;
|
|
|
|
// Loader context.
|
|
amd::LoaderContext loader_context_;
|
|
|
|
// Code object manager.
|
|
amd::hsa::code::AmdHsaCodeManager code_manager_;
|
|
|
|
// Contains the region, address, and size of previously allocated memory.
|
|
std::map<const void*, AllocationRegion> allocation_map_;
|
|
|
|
// Pending prefetch containers.
|
|
KernelMutex prefetch_lock_;
|
|
prefetch_map_t prefetch_map_;
|
|
|
|
// Allocator using ::system_region_
|
|
std::function<void*(size_t size, size_t align, MemoryRegion::AllocateFlags flags, int agent_node_id)> system_allocator_;
|
|
|
|
// Deallocator using ::system_region_
|
|
std::function<void(void*)> system_deallocator_;
|
|
|
|
// Deprecated HSA Region API GPU (for legacy APU support only)
|
|
Agent* region_gpu_;
|
|
|
|
struct AsyncEventsInfo {
|
|
AsyncEventsControl control;
|
|
AsyncEvents events;
|
|
AsyncEvents new_events;
|
|
bool monitor_exceptions;
|
|
};
|
|
|
|
struct AsyncEventsInfo asyncSignals_;
|
|
struct AsyncEventsInfo asyncExceptions_;
|
|
|
|
// System clock frequency.
|
|
uint64_t sys_clock_freq_;
|
|
|
|
// Number of Numa Nodes
|
|
size_t num_nodes_;
|
|
|
|
// @brief AMD HSA event to monitor for virtual memory access fault.
|
|
HsaEvent* vm_fault_event_;
|
|
|
|
// @brief HSA signal to contain the VM fault event.
|
|
Signal* vm_fault_signal_;
|
|
|
|
// @brief AMD HSA event to monitor for HW exceptions.
|
|
HsaEvent* hw_exception_event_;
|
|
|
|
// @brief HSA signal to contain the HW exceptionevent.
|
|
Signal* hw_exception_signal_;
|
|
|
|
// Custom system event handlers.
|
|
std::vector<std::pair<AMD::callback_t<hsa_amd_system_event_callback_t>, void*>>
|
|
system_event_handlers_;
|
|
|
|
// System event handler lock
|
|
KernelMutex system_event_lock_;
|
|
|
|
// Internal queue creation notifier
|
|
AMD::callback_t<hsa_amd_runtime_queue_notifier> internal_queue_create_notifier_;
|
|
|
|
void* internal_queue_create_notifier_user_data_;
|
|
|
|
// Holds reference count to runtime object.
|
|
std::atomic<uint32_t> ref_count_;
|
|
|
|
// Track environment variables.
|
|
Flag flag_;
|
|
|
|
ThunkLoader* thunkLoader_;
|
|
|
|
// Pools memory for SharedSignal (Signal ABI blocks)
|
|
SharedSignalPool_t SharedSignalPool;
|
|
|
|
// Pools KFD Events for InterruptSignal
|
|
InterruptSignal::EventPool EventPool;
|
|
|
|
// Kfd version
|
|
KfdVersion_t kfd_version;
|
|
|
|
std::unique_ptr<AMD::SvmProfileControl> svm_profile_;
|
|
|
|
// IPC DMA buf unix domain socket server dmabuf FD passing
|
|
int ipc_sock_server_fd_;
|
|
std::map<uint64_t, int> ipc_sock_server_conns_;
|
|
KernelMutex ipc_sock_server_lock_;
|
|
|
|
private:
|
|
void CheckVirtualMemApiSupport();
|
|
int GetAmdgpuDeviceArgs(Agent *agent, ShareableHandle handle, int *drm_fd,
|
|
uint64_t *cpu_addr);
|
|
|
|
bool virtual_mem_api_supported_;
|
|
bool xnack_enabled_;
|
|
|
|
typedef void* ThunkHandle;
|
|
|
|
struct AddressHandle {
|
|
AddressHandle() : os_addr(nullptr), size(0), use_count(0), registered(false) {}
|
|
AddressHandle(void* addr, size_t _size, bool _registered) : os_addr(addr), size(_size), use_count(0), registered(_registered) {}
|
|
|
|
// Address returned by OS. May be different from user address when adjusted for alignment
|
|
void *os_addr;
|
|
size_t size;
|
|
int use_count;
|
|
bool registered;
|
|
};
|
|
std::map<const void*, AddressHandle> reserved_address_map_; // Indexed by VA
|
|
|
|
struct MemoryHandle {
|
|
MemoryHandle() : region(NULL), size(0), ref_count(0), thunk_handle(NULL), alloc_flag(0) {}
|
|
MemoryHandle(const MemoryRegion* region, size_t size, uint64_t flags_unused,
|
|
ThunkHandle thunk_handle, MemoryRegion::AllocateFlags alloc_flag)
|
|
: region(region),
|
|
size(size),
|
|
ref_count(1),
|
|
use_count(0),
|
|
thunk_handle(thunk_handle),
|
|
alloc_flag(alloc_flag) {}
|
|
|
|
static __forceinline hsa_amd_vmem_alloc_handle_t Convert(void* handle) {
|
|
hsa_amd_vmem_alloc_handle_t ret_handle = {
|
|
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(handle))};
|
|
return ret_handle;
|
|
}
|
|
|
|
__forceinline core::Agent* agentOwner() const { return region->owner(); }
|
|
|
|
const MemoryRegion* region;
|
|
size_t size;
|
|
int ref_count;
|
|
int use_count;
|
|
ThunkHandle thunk_handle; // handle returned by hsaKmtAllocMemory(NoAddress = 1)
|
|
MemoryRegion::AllocateFlags alloc_flag;
|
|
};
|
|
std::map<ThunkHandle, MemoryHandle> memory_handle_map_;
|
|
|
|
struct MappedHandle;
|
|
struct MappedHandleAllowedAgent {
|
|
MappedHandleAllowedAgent(MappedHandle* _mappedHandle, Agent* targetAgent, void* va, size_t size,
|
|
hsa_access_permission_t perms);
|
|
~MappedHandleAllowedAgent();
|
|
|
|
hsa_status_t RemoveAccess();
|
|
hsa_status_t EnableAccess(hsa_access_permission_t perms);
|
|
|
|
void* va;
|
|
size_t size;
|
|
Agent* targetAgent;
|
|
hsa_access_permission_t permissions;
|
|
MappedHandle* mappedHandle;
|
|
ShareableHandle shareable_handle;
|
|
};
|
|
|
|
struct MappedHandle {
|
|
MappedHandle(MemoryHandle *mem_handle, AddressHandle *address_handle,
|
|
uint64_t offset, size_t size, int drm_fd, void *drm_cpu_addr,
|
|
hsa_access_permission_t perm, ShareableHandle shareable_handle)
|
|
: mem_handle(mem_handle), address_handle(address_handle),
|
|
offset(offset), size(size), drm_fd(drm_fd),
|
|
drm_cpu_addr(drm_cpu_addr), shareable_handle(shareable_handle) {}
|
|
|
|
__forceinline core::Agent* agentOwner() const { return mem_handle->region->owner(); }
|
|
|
|
MemoryHandle* mem_handle;
|
|
AddressHandle* address_handle;
|
|
uint64_t offset;
|
|
size_t size;
|
|
int drm_fd;
|
|
void* drm_cpu_addr; // CPU Buffer address
|
|
ShareableHandle shareable_handle;
|
|
std::map<Agent*, MappedHandleAllowedAgent> allowed_agents;
|
|
};
|
|
std::map<const void*, MappedHandle> mapped_handle_map_; // Indexed by VA
|
|
|
|
hsa_status_t VMemoryMapAllowAccess(const void *va,
|
|
hsa_access_permission_t perm,
|
|
const hsa_agent_t *agents,
|
|
size_t num_agents);
|
|
hsa_status_t
|
|
VMemorySetAccessPerHandle(void *va, MappedHandle &MappedHandle,
|
|
const hsa_amd_memory_access_desc_t *desc,
|
|
const size_t desc_cnt);
|
|
|
|
// Frees runtime memory when the runtime library is unloaded if safe to do so.
|
|
// Failure to release the runtime indicates an incorrect application but is
|
|
// common (example: calls library routines at process exit).
|
|
friend class RuntimeCleanup;
|
|
|
|
void InitIPCDmaBufSupport();
|
|
bool ipc_dmabuf_supported_;
|
|
int IPCClientImport(uint32_t conn_handle, uint64_t dmabuf_fd_handle,
|
|
amdgpu_bo_import_result *res,
|
|
unsigned int numNodes, HSAuint32 *nodes,
|
|
void **importAddress, HSAuint64 *importSize);
|
|
};
|
|
|
|
} // namespace core
|
|
} // namespace rocr
|
|
#endif // header guard
|