34ac62274a
Legacy p2p copy path incorrectly transfered in whole pages rather than the requested size. Change-Id: I9aa7337754f9e32f587a0cc5305f8ffeb6196f10
544 rivejä
19 KiB
C++
544 rivejä
19 KiB
C++
////////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// The University of Illinois/NCSA
|
|
// Open Source License (NCSA)
|
|
//
|
|
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
|
|
//
|
|
// Developed by:
|
|
//
|
|
// AMD Research and AMD HSA Software Development
|
|
//
|
|
// Advanced Micro Devices, Inc.
|
|
//
|
|
// www.amd.com
|
|
//
|
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
// of this software and associated documentation files (the "Software"), to
|
|
// deal with the Software without restriction, including without limitation
|
|
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
// and/or sell copies of the Software, and to permit persons to whom the
|
|
// Software is furnished to do so, subject to the following conditions:
|
|
//
|
|
// - Redistributions of source code must retain the above copyright notice,
|
|
// this list of conditions and the following disclaimers.
|
|
// - Redistributions in binary form must reproduce the above copyright
|
|
// notice, this list of conditions and the following disclaimers in
|
|
// the documentation and/or other materials provided with the distribution.
|
|
// - Neither the names of Advanced Micro Devices, Inc,
|
|
// nor the names of its contributors may be used to endorse or promote
|
|
// products derived from this Software without specific prior written
|
|
// permission.
|
|
//
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
|
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
// DEALINGS WITH THE SOFTWARE.
|
|
//
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
// HSA runtime C++ interface file.
|
|
|
|
#ifndef HSA_RUNTME_CORE_INC_RUNTIME_H_
|
|
#define HSA_RUNTME_CORE_INC_RUNTIME_H_
|
|
|
|
#include <vector>
|
|
#include <map>
|
|
#include <memory>
|
|
#include <tuple>
|
|
#include <utility>
|
|
|
|
#include "core/inc/hsa_ext_interface.h"
|
|
#include "core/inc/hsa_internal.h"
|
|
#include "core/inc/hsa_ext_amd_impl.h"
|
|
|
|
#include "core/inc/agent.h"
|
|
#include "core/inc/exceptions.h"
|
|
#include "core/inc/memory_region.h"
|
|
#include "core/inc/signal.h"
|
|
#include "core/inc/interrupt_signal.h"
|
|
#include "core/util/flag.h"
|
|
#include "core/util/locks.h"
|
|
#include "core/util/os.h"
|
|
#include "core/util/utils.h"
|
|
|
|
#include "core/inc/amd_loader_context.hpp"
|
|
#include "core/inc/amd_hsa_code.hpp"
|
|
|
|
//---------------------------------------------------------------------------//
|
|
// Constants //
|
|
//---------------------------------------------------------------------------//
|
|
|
|
#define HSA_ARGUMENT_ALIGN_BYTES 16
|
|
#define HSA_QUEUE_ALIGN_BYTES 64
|
|
#define HSA_PACKET_ALIGN_BYTES 64
|
|
|
|
//Avoids include
|
|
namespace rocr {
|
|
namespace AMD {
|
|
class MemoryRegion;
|
|
} // namespace amd
|
|
|
|
namespace core {
|
|
extern bool g_use_interrupt_wait;
|
|
|
|
/// @brief Runtime class provides the following functions:
|
|
/// - open and close connection to kernel driver.
|
|
/// - load supported extension library (image and finalizer).
|
|
/// - load tools library.
|
|
/// - expose supported agents.
|
|
/// - allocate and free memory.
|
|
/// - memory copy and fill.
|
|
/// - grant access to memory (dgpu memory pool extension).
|
|
/// - maintain loader state.
|
|
/// - monitor asynchronous event from agent.
|
|
class Runtime {
|
|
friend class AMD::MemoryRegion;
|
|
public:
|
|
/// @brief Structure to describe connectivity between agents.
|
|
struct LinkInfo {
|
|
LinkInfo() : num_hop(0), info{0} {}
|
|
|
|
uint32_t num_hop;
|
|
hsa_amd_memory_pool_link_info_t info;
|
|
};
|
|
|
|
/// @brief Open connection to kernel driver and increment reference count.
|
|
static hsa_status_t Acquire();
|
|
|
|
/// @brief Decrement reference count and close connection to kernel driver.
|
|
static hsa_status_t Release();
|
|
|
|
/// @brief Checks if connection to kernel driver is opened.
|
|
/// @retval True if the connection to kernel driver is opened.
|
|
static bool IsOpen();
|
|
|
|
// @brief Callback handler for VM fault access.
|
|
static bool VMFaultHandler(hsa_signal_value_t val, void* arg);
|
|
|
|
// @brief Print known allocations near ptr.
|
|
static void PrintMemoryMapNear(void* ptr);
|
|
|
|
/// @brief Singleton object of the runtime.
|
|
static Runtime* runtime_singleton_;
|
|
|
|
/// @brief Insert agent into agent list ::agents_.
|
|
/// @param [in] agent Pointer to the agent object.
|
|
void RegisterAgent(Agent* agent);
|
|
|
|
/// @brief Delete all agent objects from ::agents_.
|
|
void DestroyAgents();
|
|
|
|
/// @brief Set the number of links connecting the agents in the platform.
|
|
void SetLinkCount(size_t num_link);
|
|
|
|
/// @brief Register link information connecting @p node_id_from and @p
|
|
/// node_id_to.
|
|
/// @param [in] node_id_from Node id of the source node.
|
|
/// @param [in] node_id_to Node id of the destination node.
|
|
/// @param [in] link_info The link information between source and destination
|
|
/// nodes.
|
|
void RegisterLinkInfo(uint32_t node_id_from, uint32_t node_id_to,
|
|
uint32_t num_hop,
|
|
hsa_amd_memory_pool_link_info_t& link_info);
|
|
|
|
/// @brief Query link information between two nodes.
|
|
/// @param [in] node_id_from Node id of the source node.
|
|
/// @param [in] node_id_to Node id of the destination node.
|
|
/// @retval The link information between source and destination nodes.
|
|
const LinkInfo GetLinkInfo(uint32_t node_id_from, uint32_t node_id_to);
|
|
|
|
/// @brief Invoke the user provided call back for each agent in the agent
|
|
/// list.
|
|
///
|
|
/// @param [in] callback User provided callback function.
|
|
/// @param [in] data User provided pointer as input for @p callback.
|
|
///
|
|
/// @retval ::HSA_STATUS_SUCCESS if the callback function for each traversed
|
|
/// agent returns ::HSA_STATUS_SUCCESS.
|
|
hsa_status_t IterateAgent(hsa_status_t (*callback)(hsa_agent_t agent,
|
|
void* data),
|
|
void* data);
|
|
|
|
/// @brief Allocate memory on a particular region.
|
|
///
|
|
/// @param [in] region Pointer to region object.
|
|
/// @param [in] size Allocation size in bytes.
|
|
/// @param [in] alloc_flags Modifiers to pass to MemoryRegion allocator.
|
|
/// @param [out] address Pointer to store the allocation result.
|
|
///
|
|
/// @retval ::HSA_STATUS_SUCCESS If allocation is successful.
|
|
hsa_status_t AllocateMemory(const MemoryRegion* region, size_t size,
|
|
MemoryRegion::AllocateFlags alloc_flags,
|
|
void** address);
|
|
|
|
/// @brief Free memory previously allocated with AllocateMemory.
|
|
///
|
|
/// @param [in] ptr Address of the memory to be freed.
|
|
///
|
|
/// @retval ::HSA_STATUS_ERROR If @p ptr is not the address of previous
|
|
/// allocation via ::core::Runtime::AllocateMemory
|
|
/// @retval ::HSA_STATUS_SUCCESS if @p ptr is successfully released.
|
|
hsa_status_t FreeMemory(void* ptr);
|
|
|
|
hsa_status_t RegisterReleaseNotifier(void* ptr, hsa_amd_deallocation_callback_t callback,
|
|
void* user_data);
|
|
|
|
hsa_status_t DeregisterReleaseNotifier(void* ptr, hsa_amd_deallocation_callback_t callback);
|
|
|
|
/// @brief Blocking memory copy from src to dst.
|
|
///
|
|
/// @param [in] dst Memory address of the destination.
|
|
/// @param [in] src Memory address of the source.
|
|
/// @param [in] size Copy size in bytes.
|
|
///
|
|
/// @retval ::HSA_STATUS_SUCCESS if memory copy is successful and completed.
|
|
hsa_status_t CopyMemory(void* dst, const void* src, size_t size);
|
|
|
|
/// @brief Non-blocking memory copy from src to dst.
|
|
///
|
|
/// @details The memory copy will be performed after all signals in
|
|
/// @p dep_signals have value of 0. On completion @p completion_signal
|
|
/// will be decremented.
|
|
///
|
|
/// @param [in] dst Memory address of the destination.
|
|
/// @param [in] dst_agent Agent object associated with the destination. This
|
|
/// agent should be able to access the destination and source.
|
|
/// @param [in] src Memory address of the source.
|
|
/// @param [in] src_agent Agent object associated with the source. This
|
|
/// agent should be able to access the destination and source.
|
|
/// @param [in] size Copy size in bytes.
|
|
/// @param [in] dep_signals Array of signal dependency.
|
|
/// @param [in] completion_signal Completion signal object.
|
|
///
|
|
/// @retval ::HSA_STATUS_SUCCESS if copy command has been submitted
|
|
/// successfully to the agent DMA queue.
|
|
hsa_status_t CopyMemory(void* dst, core::Agent& dst_agent, const void* src,
|
|
core::Agent& src_agent, size_t size,
|
|
std::vector<core::Signal*>& dep_signals,
|
|
core::Signal& completion_signal);
|
|
|
|
/// @brief Fill the first @p count of uint32_t in ptr with value.
|
|
///
|
|
/// @param [in] ptr Memory address to be filled.
|
|
/// @param [in] value The value/pattern that will be used to set @p ptr.
|
|
/// @param [in] count Number of uint32_t element to be set.
|
|
///
|
|
/// @retval ::HSA_STATUS_SUCCESS if memory fill is successful and completed.
|
|
hsa_status_t FillMemory(void* ptr, uint32_t value, size_t count);
|
|
|
|
/// @brief Set agents as the whitelist to access ptr.
|
|
///
|
|
/// @param [in] num_agents The number of agent handles in @p agents array.
|
|
/// @param [in] agents Agent handle array.
|
|
/// @param [in] ptr Pointer of memory previously allocated via
|
|
/// core::Runtime::AllocateMemory.
|
|
///
|
|
/// @retval ::HSA_STATUS_SUCCESS The whitelist has been configured
|
|
/// successfully and all agents in the @p agents could start accessing @p ptr.
|
|
hsa_status_t AllowAccess(uint32_t num_agents, const hsa_agent_t* agents,
|
|
const void* ptr);
|
|
|
|
/// @brief Query system information.
|
|
///
|
|
/// @param [in] attribute System info attribute to query.
|
|
/// @param [out] value Pointer to store the attribute value.
|
|
///
|
|
/// @retval HSA_STATUS_SUCCESS The attribute is valid and the @p value is
|
|
/// set.
|
|
hsa_status_t GetSystemInfo(hsa_system_info_t attribute, void* value);
|
|
|
|
/// @brief Register a callback function @p handler that is associated with
|
|
/// @p signal to asynchronous event monitor thread.
|
|
///
|
|
/// @param [in] signal Signal handle associated with @p handler.
|
|
/// @param [in] cond The condition to execute the @p handler.
|
|
/// @param [in] value The value to compare with @p signal value. If the
|
|
/// comparison satisfy @p cond, the @p handler will be called.
|
|
/// @param [in] arg Pointer to the argument that will be provided to @p
|
|
/// handler.
|
|
///
|
|
/// @retval ::HSA_STATUS_SUCCESS Registration is successful.
|
|
hsa_status_t SetAsyncSignalHandler(hsa_signal_t signal,
|
|
hsa_signal_condition_t cond,
|
|
hsa_signal_value_t value,
|
|
hsa_amd_signal_handler handler, void* arg);
|
|
|
|
hsa_status_t InteropMap(uint32_t num_agents, Agent** agents,
|
|
int interop_handle, uint32_t flags, size_t* size,
|
|
void** ptr, size_t* metadata_size,
|
|
const void** metadata);
|
|
|
|
hsa_status_t InteropUnmap(void* ptr);
|
|
|
|
struct PtrInfoBlockData {
|
|
void* base;
|
|
size_t length;
|
|
};
|
|
|
|
hsa_status_t PtrInfo(void* ptr, hsa_amd_pointer_info_t* info, void* (*alloc)(size_t),
|
|
uint32_t* num_agents_accessible, hsa_agent_t** accessible,
|
|
PtrInfoBlockData* block_info = nullptr);
|
|
|
|
hsa_status_t SetPtrInfoData(void* ptr, void* userptr);
|
|
|
|
hsa_status_t IPCCreate(void* ptr, size_t len, hsa_amd_ipc_memory_t* handle);
|
|
|
|
hsa_status_t IPCAttach(const hsa_amd_ipc_memory_t* handle, size_t len, uint32_t num_agents,
|
|
Agent** mapping_agents, void** mapped_ptr);
|
|
|
|
hsa_status_t IPCDetach(void* ptr);
|
|
|
|
const std::vector<Agent*>& cpu_agents() { return cpu_agents_; }
|
|
|
|
const std::vector<Agent*>& gpu_agents() { return gpu_agents_; }
|
|
|
|
const std::vector<uint32_t>& gpu_ids() { return gpu_ids_; }
|
|
|
|
Agent* region_gpu() { return region_gpu_; }
|
|
|
|
const std::vector<const MemoryRegion*>& system_regions_fine() const {
|
|
return system_regions_fine_;
|
|
}
|
|
|
|
const std::vector<const MemoryRegion*>& system_regions_coarse() const {
|
|
return system_regions_coarse_;
|
|
}
|
|
|
|
amd::hsa::loader::Loader* loader() { return loader_; }
|
|
|
|
amd::LoaderContext* loader_context() { return &loader_context_; }
|
|
|
|
amd::hsa::code::AmdHsaCodeManager* code_manager() { return &code_manager_; }
|
|
|
|
std::function<void*(size_t size, size_t align, MemoryRegion::AllocateFlags flags)>&
|
|
system_allocator() {
|
|
return system_allocator_;
|
|
}
|
|
|
|
std::function<void(void*)>& system_deallocator() {
|
|
return system_deallocator_;
|
|
}
|
|
|
|
const Flag& flag() const { return flag_; }
|
|
|
|
ExtensionEntryPoints extensions_;
|
|
|
|
hsa_status_t SetCustomSystemEventHandler(hsa_amd_system_event_callback_t callback,
|
|
void* data);
|
|
|
|
hsa_status_t SetInternalQueueCreateNotifier(hsa_amd_runtime_queue_notifier callback,
|
|
void* user_data);
|
|
|
|
void InternalQueueCreateNotify(const hsa_queue_t* queue, hsa_agent_t agent);
|
|
|
|
SharedSignalPool_t* GetSharedSignalPool() { return &SharedSignalPool; }
|
|
|
|
InterruptSignal::EventPool* GetEventPool() { return &EventPool; }
|
|
|
|
uint64_t sys_clock_freq() const { return sys_clock_freq_; }
|
|
|
|
protected:
|
|
static void AsyncEventsLoop(void*);
|
|
|
|
struct AllocationRegion {
|
|
AllocationRegion() : region(NULL), size(0), user_ptr(nullptr) {}
|
|
AllocationRegion(const MemoryRegion* region_arg, size_t size_arg)
|
|
: region(region_arg), size(size_arg), user_ptr(nullptr) {}
|
|
|
|
struct notifier_t {
|
|
void* ptr;
|
|
AMD::callback_t<hsa_amd_deallocation_callback_t> callback;
|
|
void* user_data;
|
|
};
|
|
|
|
const MemoryRegion* region;
|
|
size_t size;
|
|
void* user_ptr;
|
|
std::unique_ptr<std::vector<notifier_t>> notifiers;
|
|
};
|
|
|
|
struct AsyncEventsControl {
|
|
AsyncEventsControl() : async_events_thread_(NULL) {}
|
|
void Shutdown();
|
|
|
|
hsa_signal_t wake;
|
|
os::Thread async_events_thread_;
|
|
KernelMutex lock;
|
|
bool exit;
|
|
};
|
|
|
|
struct AsyncEvents {
|
|
void PushBack(hsa_signal_t signal, hsa_signal_condition_t cond,
|
|
hsa_signal_value_t value, hsa_amd_signal_handler handler,
|
|
void* arg);
|
|
|
|
void CopyIndex(size_t dst, size_t src);
|
|
|
|
size_t Size();
|
|
|
|
void PopBack();
|
|
|
|
void Clear();
|
|
|
|
std::vector<hsa_signal_t> signal_;
|
|
std::vector<hsa_signal_condition_t> cond_;
|
|
std::vector<hsa_signal_value_t> value_;
|
|
std::vector<hsa_amd_signal_handler> handler_;
|
|
std::vector<void*> arg_;
|
|
};
|
|
|
|
// Will be created before any user could call hsa_init but also could be
|
|
// destroyed before incorrectly written programs call hsa_shutdown.
|
|
static KernelMutex bootstrap_lock_;
|
|
|
|
Runtime();
|
|
|
|
Runtime(const Runtime&);
|
|
|
|
Runtime& operator=(const Runtime&);
|
|
|
|
~Runtime() {}
|
|
|
|
/// @brief Open connection to kernel driver.
|
|
hsa_status_t Load();
|
|
|
|
/// @brief Close connection to kernel driver and cleanup resources.
|
|
void Unload();
|
|
|
|
/// @brief Dynamically load extension libraries (images, finalizer) and
|
|
/// call OnLoad method on each loaded library.
|
|
void LoadExtensions();
|
|
|
|
/// @brief Call OnUnload method on each extension library then close it.
|
|
void UnloadExtensions();
|
|
|
|
/// @brief Dynamically load tool libraries and call OnUnload method on each
|
|
/// loaded library.
|
|
void LoadTools();
|
|
|
|
/// @brief Call OnUnload method of each tool library.
|
|
void UnloadTools();
|
|
|
|
/// @brief Close tool libraries.
|
|
void CloseTools();
|
|
|
|
// @brief Binds virtual memory access fault handler to this node.
|
|
void BindVmFaultHandler();
|
|
|
|
// @brief Acquire snapshot of system event handlers.
|
|
// Returns a copy to avoid holding a lock during callbacks.
|
|
std::vector<std::pair<AMD::callback_t<hsa_amd_system_event_callback_t>, void*>>
|
|
GetSystemEventHandlers();
|
|
|
|
/// @brief Get the index of ::link_matrix_.
|
|
/// @param [in] node_id_from Node id of the source node.
|
|
/// @param [in] node_id_to Node id of the destination node.
|
|
/// @retval Index in ::link_matrix_.
|
|
uint32_t GetIndexLinkInfo(uint32_t node_id_from, uint32_t node_id_to);
|
|
|
|
// Mutex object to protect multithreaded access to ::allocation_map_,
|
|
// KFD map/unmap, register/unregister, and access to hsaKmtQueryPointerInfo
|
|
// registered & mapped arrays.
|
|
KernelMutex memory_lock_;
|
|
|
|
// Array containing tools library handles.
|
|
std::vector<os::LibHandle> tool_libs_;
|
|
|
|
// Agent list containing all CPU agents in the platform.
|
|
std::vector<Agent*> cpu_agents_;
|
|
|
|
// Agent list containing all compatible GPU agents in the platform.
|
|
std::vector<Agent*> gpu_agents_;
|
|
|
|
// Agent map containing all agents indexed by their KFD node IDs.
|
|
std::map<uint32_t, std::vector<Agent*> > agents_by_node_;
|
|
|
|
// Agent list containing all compatible gpu agent ids in the platform.
|
|
std::vector<uint32_t> gpu_ids_;
|
|
|
|
// List of all fine grain system memory region in the platform.
|
|
std::vector<const MemoryRegion*> system_regions_fine_;
|
|
|
|
// List of all coarse grain system memory region in the platform.
|
|
std::vector<const MemoryRegion*> system_regions_coarse_;
|
|
|
|
// Matrix of IO link.
|
|
std::vector<LinkInfo> link_matrix_;
|
|
|
|
// Loader instance.
|
|
amd::hsa::loader::Loader* loader_;
|
|
|
|
// Loader context.
|
|
amd::LoaderContext loader_context_;
|
|
|
|
// Code object manager.
|
|
amd::hsa::code::AmdHsaCodeManager code_manager_;
|
|
|
|
// Contains the region, address, and size of previously allocated memory.
|
|
std::map<const void*, AllocationRegion> allocation_map_;
|
|
|
|
// Allocator using ::system_region_
|
|
std::function<void*(size_t size, size_t align, MemoryRegion::AllocateFlags flags)> system_allocator_;
|
|
|
|
// Deallocator using ::system_region_
|
|
std::function<void(void*)> system_deallocator_;
|
|
|
|
// Deprecated HSA Region API GPU (for legacy APU support only)
|
|
Agent* region_gpu_;
|
|
|
|
AsyncEventsControl async_events_control_;
|
|
|
|
AsyncEvents async_events_;
|
|
|
|
AsyncEvents new_async_events_;
|
|
|
|
// System clock frequency.
|
|
uint64_t sys_clock_freq_;
|
|
|
|
// Number of Numa Nodes
|
|
size_t num_nodes_;
|
|
|
|
// @brief AMD HSA event to monitor for virtual memory access fault.
|
|
HsaEvent* vm_fault_event_;
|
|
|
|
// @brief HSA signal to contain the VM fault event.
|
|
Signal* vm_fault_signal_;
|
|
|
|
// Custom system event handlers.
|
|
std::vector<std::pair<AMD::callback_t<hsa_amd_system_event_callback_t>, void*>>
|
|
system_event_handlers_;
|
|
|
|
// System event handler lock
|
|
KernelMutex system_event_lock_;
|
|
|
|
// Internal queue creation notifier
|
|
AMD::callback_t<hsa_amd_runtime_queue_notifier> internal_queue_create_notifier_;
|
|
|
|
void* internal_queue_create_notifier_user_data_;
|
|
|
|
// Holds reference count to runtime object.
|
|
std::atomic<uint32_t> ref_count_;
|
|
|
|
// Track environment variables.
|
|
Flag flag_;
|
|
|
|
// Pools memory for SharedSignal (Signal ABI blocks)
|
|
SharedSignalPool_t SharedSignalPool;
|
|
|
|
// Pools KFD Events for InterruptSignal
|
|
InterruptSignal::EventPool EventPool;
|
|
|
|
// Frees runtime memory when the runtime library is unloaded if safe to do so.
|
|
// Failure to release the runtime indicates an incorrect application but is
|
|
// common (example: calls library routines at process exit).
|
|
friend class RuntimeCleanup;
|
|
};
|
|
|
|
} // namespace core
|
|
} // namespace rocr
|
|
#endif // header guard
|