Add hsa_amd_memory_lock_to_pool.
Makes malloc memory accessible to GPUs so that the memory has the
capabilities of the pool it is locked to.
This admits fine grained locked memory and reserves API space for any future
special CPU pools.
Change-Id: If8c3dd8582a43f19d3d36b3763c1a688cc419ef0
[ROCm/ROCR-Runtime commit: a535e18cc1]
このコミットが含まれているのは:
@@ -1015,6 +1015,14 @@ hsa_status_t HSA_API hsa_amd_memory_lock(void* host_ptr, size_t size,
|
||||
host_ptr, size, agents, num_agent, agent_ptr);
|
||||
}
|
||||
|
||||
// Mirrors Amd Extension Apis
|
||||
hsa_status_t HSA_API hsa_amd_memory_lock_to_pool(void* host_ptr, size_t size, hsa_agent_t* agents,
|
||||
int num_agent, hsa_amd_memory_pool_t pool,
|
||||
uint32_t flags, void** agent_ptr) {
|
||||
return amdExtTable->hsa_amd_memory_lock_to_pool_fn(host_ptr, size, agents, num_agent, pool, flags,
|
||||
agent_ptr);
|
||||
}
|
||||
|
||||
// Mirrors Amd Extension Apis
|
||||
hsa_status_t HSA_API hsa_amd_memory_unlock(void* host_ptr) {
|
||||
return amdExtTable->hsa_amd_memory_unlock_fn(host_ptr);
|
||||
|
||||
@@ -82,8 +82,7 @@ class MemoryRegion : public core::MemoryRegion {
|
||||
/// @brief Free agent accessible memory (system / local memory).
|
||||
static void FreeKfdMemory(void* ptr, size_t size);
|
||||
|
||||
static bool RegisterMemory(void* ptr, size_t size, size_t num_nodes,
|
||||
const uint32_t* nodes);
|
||||
static bool RegisterMemory(void* ptr, size_t size, const HsaMemFlags& MemFlags);
|
||||
|
||||
static void DeregisterMemory(void* ptr);
|
||||
|
||||
|
||||
@@ -173,6 +173,11 @@ hsa_status_t HSA_API hsa_amd_memory_lock(void* host_ptr, size_t size,
|
||||
hsa_agent_t* agents, int num_agent,
|
||||
void** agent_ptr);
|
||||
|
||||
// Mirrors Amd Extension Apis
|
||||
hsa_status_t HSA_API hsa_amd_memory_lock_to_pool(void* host_ptr, size_t size, hsa_agent_t* agents,
|
||||
int num_agent, hsa_amd_memory_pool_t pool,
|
||||
uint32_t flags, void** agent_ptr);
|
||||
|
||||
// Mirrors Amd Extension Apis
|
||||
hsa_status_t HSA_API hsa_amd_memory_unlock(void* host_ptr);
|
||||
|
||||
|
||||
@@ -68,15 +68,11 @@ void MemoryRegion::FreeKfdMemory(void* ptr, size_t size) {
|
||||
assert(status == HSAKMT_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
bool MemoryRegion::RegisterMemory(void* ptr, size_t size, size_t num_nodes,
|
||||
const uint32_t* nodes) {
|
||||
bool MemoryRegion::RegisterMemory(void* ptr, size_t size, const HsaMemFlags& MemFlags) {
|
||||
assert(ptr != NULL);
|
||||
assert(size != 0);
|
||||
assert(num_nodes != 0);
|
||||
assert(nodes != NULL);
|
||||
|
||||
const HSAKMT_STATUS status = hsaKmtRegisterMemoryToNodes(
|
||||
ptr, size, num_nodes, const_cast<uint32_t*>(nodes));
|
||||
const HSAKMT_STATUS status = hsaKmtRegisterMemoryWithFlags(ptr, size, MemFlags);
|
||||
return (status == HSAKMT_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
@@ -120,8 +116,6 @@ MemoryRegion::MemoryRegion(bool fine_grain, bool full_profile, core::Agent* owne
|
||||
(mem_props_.HeapType == HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE) ? 0 : 1;
|
||||
mem_flag_.ui32.NonPaged = 1;
|
||||
|
||||
map_flag_.ui32.PageSize = HSA_PAGE_SIZE_4KB;
|
||||
|
||||
virtual_size_ = kGpuVmSize;
|
||||
} else if (IsSystem()) {
|
||||
mem_flag_.ui32.PageSize = HSA_PAGE_SIZE_4KB;
|
||||
@@ -129,9 +123,6 @@ MemoryRegion::MemoryRegion(bool fine_grain, bool full_profile, core::Agent* owne
|
||||
mem_flag_.ui32.HostAccess = 1;
|
||||
mem_flag_.ui32.CachePolicy = HSA_CACHING_CACHED;
|
||||
|
||||
map_flag_.ui32.HostAccess = 1;
|
||||
map_flag_.ui32.PageSize = HSA_PAGE_SIZE_4KB;
|
||||
|
||||
virtual_size_ =
|
||||
(full_profile) ? os::GetUserModeVirtualMemorySize() : kGpuVmSize;
|
||||
}
|
||||
@@ -584,8 +575,7 @@ hsa_status_t MemoryRegion::Lock(uint32_t num_agents, const hsa_agent_t* agents,
|
||||
}
|
||||
|
||||
// Call kernel driver to register and pin the memory.
|
||||
if (RegisterMemory(host_ptr, size, whitelist_nodes.size(),
|
||||
&whitelist_nodes[0])) {
|
||||
if (RegisterMemory(host_ptr, size, mem_flag_)) {
|
||||
uint64_t alternate_va = 0;
|
||||
if (MakeKfdMemoryResident(whitelist_nodes.size(), &whitelist_nodes[0],
|
||||
host_ptr, size, &alternate_va, map_flag_)) {
|
||||
|
||||
@@ -386,6 +386,7 @@ void HsaApiTable::UpdateAmdExts() {
|
||||
amd_ext_api.hsa_amd_queue_set_priority_fn = AMD::hsa_amd_queue_set_priority;
|
||||
amd_ext_api.hsa_amd_memory_async_copy_rect_fn = AMD::hsa_amd_memory_async_copy_rect;
|
||||
amd_ext_api.hsa_amd_runtime_queue_create_register_fn = AMD::hsa_amd_runtime_queue_create_register;
|
||||
amd_ext_api.hsa_amd_memory_lock_to_pool_fn = AMD::hsa_amd_memory_lock_to_pool;
|
||||
}
|
||||
|
||||
class Init {
|
||||
|
||||
@@ -549,14 +549,48 @@ hsa_status_t hsa_amd_memory_lock(void* host_ptr, size_t size,
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
const amd::MemoryRegion* system_region =
|
||||
reinterpret_cast<const amd::MemoryRegion*>(
|
||||
core::Runtime::runtime_singleton_->system_regions_fine()[0]);
|
||||
// Check for APU
|
||||
if (core::Runtime::runtime_singleton_->system_regions_coarse().size() == 0) {
|
||||
assert(core::Runtime::runtime_singleton_->system_regions_fine()[0]->full_profile() &&
|
||||
"Missing coarse grain host memory on dGPU system.");
|
||||
*agent_ptr = host_ptr;
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
const amd::MemoryRegion* system_region = static_cast<const amd::MemoryRegion*>(
|
||||
core::Runtime::runtime_singleton_->system_regions_coarse()[0]);
|
||||
|
||||
return system_region->Lock(num_agent, agents, host_ptr, size, agent_ptr);
|
||||
CATCH;
|
||||
}
|
||||
|
||||
hsa_status_t hsa_amd_memory_lock_to_pool(void* host_ptr, size_t size, hsa_agent_t* agents,
|
||||
int num_agent, hsa_amd_memory_pool_t pool, uint32_t flags,
|
||||
void** agent_ptr) {
|
||||
TRY;
|
||||
IS_OPEN();
|
||||
*agent_ptr = NULL;
|
||||
|
||||
if (size == 0 || host_ptr == NULL || agent_ptr == NULL || flags != 0) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
if ((agents != NULL && num_agent == 0) || (agents == NULL && num_agent != 0)) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
hsa_region_t region = {pool.handle};
|
||||
const amd::MemoryRegion* mem_region = amd::MemoryRegion::Convert(region);
|
||||
if (mem_region == nullptr) {
|
||||
return (hsa_status_t)HSA_STATUS_ERROR_INVALID_MEMORY_POOL;
|
||||
}
|
||||
if (mem_region->owner()->device_type() != core::Agent::kAmdCpuDevice)
|
||||
return (hsa_status_t)HSA_STATUS_ERROR_INVALID_MEMORY_POOL;
|
||||
|
||||
return mem_region->Lock(num_agent, agents, host_ptr, size, agent_ptr);
|
||||
CATCH;
|
||||
}
|
||||
|
||||
hsa_status_t hsa_amd_memory_unlock(void* host_ptr) {
|
||||
TRY;
|
||||
IS_OPEN();
|
||||
@@ -615,7 +649,7 @@ hsa_status_t hsa_amd_memory_pool_allocate(hsa_amd_memory_pool_t memory_pool, siz
|
||||
TRY;
|
||||
IS_OPEN();
|
||||
|
||||
if (size == 0 || ptr == NULL) {
|
||||
if (size == 0 || ptr == NULL || flags != 0) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
|
||||
@@ -181,7 +181,9 @@ global:
|
||||
hsa_amd_queue_cu_set_mask;
|
||||
hsa_amd_memory_fill;
|
||||
hsa_amd_memory_async_copy;
|
||||
hsa_amd_memory_async_copy_rect;
|
||||
hsa_amd_memory_lock;
|
||||
hsa_amd_memory_lock_to_pool;
|
||||
hsa_amd_memory_unlock;
|
||||
hsa_amd_agent_iterate_memory_pools;
|
||||
hsa_amd_agent_memory_pool_get_info;
|
||||
@@ -216,7 +218,6 @@ global:
|
||||
hsa_amd_ipc_signal_attach;
|
||||
hsa_amd_register_system_event_handler;
|
||||
hsa_amd_queue_set_priority;
|
||||
hsa_amd_memory_async_copy_rect;
|
||||
|
||||
local:
|
||||
*;
|
||||
|
||||
@@ -179,6 +179,7 @@ struct AmdExtTable {
|
||||
decltype(hsa_amd_queue_set_priority)* hsa_amd_queue_set_priority_fn;
|
||||
decltype(hsa_amd_memory_async_copy_rect)* hsa_amd_memory_async_copy_rect_fn;
|
||||
decltype(hsa_amd_runtime_queue_create_register)* hsa_amd_runtime_queue_create_register_fn;
|
||||
decltype(hsa_amd_memory_lock_to_pool)* hsa_amd_memory_lock_to_pool_fn;
|
||||
};
|
||||
|
||||
// Table to export HSA Core Runtime Apis
|
||||
|
||||
@@ -778,7 +778,7 @@ hsa_status_t HSA_API hsa_amd_agent_iterate_memory_pools(
|
||||
* ::HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE in @p memory_pool.
|
||||
*
|
||||
* @param[in] flags A bit-field that is used to specify allocation
|
||||
* directives. Must be 0.
|
||||
* directives. Reserved parameter, must be 0.
|
||||
*
|
||||
* @param[out] ptr Pointer to the location where to store the base virtual
|
||||
* address of
|
||||
@@ -799,7 +799,8 @@ hsa_status_t HSA_API hsa_amd_agent_iterate_memory_pools(
|
||||
* allocate memory in @p memory_pool, or @p size is greater than the value of
|
||||
* HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE in @p memory_pool.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p ptr is NULL, or @p size is 0.
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p ptr is NULL, or @p size is 0,
|
||||
* or flags is not 0.
|
||||
*
|
||||
*/
|
||||
hsa_status_t HSA_API
|
||||
@@ -1204,11 +1205,12 @@ hsa_status_t HSA_API hsa_amd_memory_migrate(const void* ptr,
|
||||
|
||||
/**
|
||||
*
|
||||
* @brief Pin a host pointer allocated by C/C++ or OS allocator (i.e. ordinary system DRAM) and return a new
|
||||
* pointer accessible by the @p agents. If the @p host_ptr overlaps with previously locked
|
||||
* memory, then the overlap area is kept locked (i.e multiple mappings are permitted). In this case,
|
||||
* the same input @p host_ptr may give different locked @p agent_ptr and when it does, they
|
||||
* are not necessarily coherent (i.e. accessing either @p agent_ptr is not equivalent).
|
||||
* @brief Pin a host pointer allocated by C/C++ or OS allocator (i.e. ordinary system DRAM) and
|
||||
* return a new pointer accessible by the @p agents. If the @p host_ptr overlaps with previously
|
||||
* locked memory, then the overlap area is kept locked (i.e multiple mappings are permitted). In
|
||||
* this case, the same input @p host_ptr may give different locked @p agent_ptr and when it does,
|
||||
* they are not necessarily coherent (i.e. accessing either @p agent_ptr is not equivalent).
|
||||
* Accesses to @p agent_ptr are coarse grained.
|
||||
*
|
||||
* @param[in] host_ptr A buffer allocated by C/C++ or OS allocator.
|
||||
*
|
||||
@@ -1235,20 +1237,69 @@ hsa_status_t HSA_API hsa_amd_memory_migrate(const void* ptr,
|
||||
* @p agent_ptr is NULL or @p agents not NULL but @p num_agent is 0 or @p agents
|
||||
* is NULL but @p num_agent is not 0.
|
||||
*/
|
||||
|
||||
hsa_status_t HSA_API hsa_amd_memory_lock(void* host_ptr, size_t size,
|
||||
hsa_agent_t* agents, int num_agent,
|
||||
void** agent_ptr);
|
||||
|
||||
/**
|
||||
*
|
||||
* @brief Unpin the host pointer previously pinned via ::hsa_amd_memory_lock.
|
||||
* @brief Pin a host pointer allocated by C/C++ or OS allocator (i.e. ordinary system DRAM) and
|
||||
* return a new pointer accessible by the @p agents. If the @p host_ptr overlaps with previously
|
||||
* locked memory, then the overlap area is kept locked (i.e. multiple mappings are permitted).
|
||||
* In this case, the same input @p host_ptr may give different locked @p agent_ptr and when it
|
||||
* does, they are not necessarily coherent (i.e. accessing either @p agent_ptr is not equivalent).
|
||||
* Acesses to the memory via @p agent_ptr have the same access properties as memory allocated from
|
||||
* @p pool as determined by ::hsa_amd_memory_pool_get_info and ::hsa_amd_agent_memory_pool_get_info
|
||||
* (ex. coarse/fine grain, platform atomic support, link info). Physical composition and placement
|
||||
* of the memory (ex. page size, NUMA binding) is not changed.
|
||||
*
|
||||
* @param[in] host_ptr A buffer allocated by C/C++ or OS allocator.
|
||||
*
|
||||
* @param[in] size The size to be locked.
|
||||
*
|
||||
* @param[in] agents Array of agent handle to gain access to the @p host_ptr.
|
||||
* If this parameter is NULL and the @p num_agent is 0, all agents
|
||||
* in the platform will gain access to the @p host_ptr.
|
||||
*
|
||||
* @param[in] pool Global memory pool owned by a CPU agent.
|
||||
*
|
||||
* @param[in] flags A bit-field that is used to specify allocation
|
||||
* directives. Reserved parameter, must be 0.
|
||||
*
|
||||
* @param[out] agent_ptr Pointer to the location where to store the new address.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
||||
* initialized.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure in
|
||||
* allocating the necessary resources.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_AGENT One or more agent in @p agents is
|
||||
* invalid or can not access @p pool.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_MEMORY_POOL @p pool is invalid or not owned
|
||||
* by a CPU agent.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p size is 0 or @p host_ptr or
|
||||
* @p agent_ptr is NULL or @p agents not NULL but @p num_agent is 0 or @p agents
|
||||
* is NULL but @p num_agent is not 0 or flags is not 0.
|
||||
*/
|
||||
hsa_status_t HSA_API hsa_amd_memory_lock_to_pool(void* host_ptr, size_t size, hsa_agent_t* agents,
|
||||
int num_agent, hsa_amd_memory_pool_t pool,
|
||||
uint32_t flags, void** agent_ptr);
|
||||
|
||||
/**
|
||||
*
|
||||
* @brief Unpin the host pointer previously pinned via ::hsa_amd_memory_lock or
|
||||
* ::hsa_amd_memory_lock_to_pool.
|
||||
*
|
||||
* @details The behavior is undefined if the host pointer being unpinned does not
|
||||
* match previous pinned address or if the host pointer was already deallocated.
|
||||
*
|
||||
* @param[in] host_ptr A buffer allocated by C/C++ or OS allocator that was
|
||||
* pinned previously via ::hsa_amd_memory_lock.
|
||||
* pinned previously via ::hsa_amd_memory_lock or ::hsa_amd_memory_lock_to_pool.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
|
||||
新しいイシューから参照
ユーザーをブロックする