diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/common/hsa_table_interface.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/common/hsa_table_interface.cpp index 7fcd1d1723..bf501cd39f 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/common/hsa_table_interface.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/common/hsa_table_interface.cpp @@ -1015,6 +1015,14 @@ hsa_status_t HSA_API hsa_amd_memory_lock(void* host_ptr, size_t size, host_ptr, size, agents, num_agent, agent_ptr); } +// Mirrors Amd Extension Apis +hsa_status_t HSA_API hsa_amd_memory_lock_to_pool(void* host_ptr, size_t size, hsa_agent_t* agents, + int num_agent, hsa_amd_memory_pool_t pool, + uint32_t flags, void** agent_ptr) { + return amdExtTable->hsa_amd_memory_lock_to_pool_fn(host_ptr, size, agents, num_agent, pool, flags, + agent_ptr); +} + // Mirrors Amd Extension Apis hsa_status_t HSA_API hsa_amd_memory_unlock(void* host_ptr) { return amdExtTable->hsa_amd_memory_unlock_fn(host_ptr); diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_memory_region.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_memory_region.h index 8a1ca3ea7c..e96a21c809 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_memory_region.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_memory_region.h @@ -82,8 +82,7 @@ class MemoryRegion : public core::MemoryRegion { /// @brief Free agent accessible memory (system / local memory). static void FreeKfdMemory(void* ptr, size_t size); - static bool RegisterMemory(void* ptr, size_t size, size_t num_nodes, - const uint32_t* nodes); + static bool RegisterMemory(void* ptr, size_t size, const HsaMemFlags& MemFlags); static void DeregisterMemory(void* ptr); diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/hsa_ext_amd_impl.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/hsa_ext_amd_impl.h index fcbd2eff33..17d927d9a1 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/hsa_ext_amd_impl.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/hsa_ext_amd_impl.h @@ -173,6 +173,11 @@ hsa_status_t HSA_API hsa_amd_memory_lock(void* host_ptr, size_t size, hsa_agent_t* agents, int num_agent, void** agent_ptr); +// Mirrors Amd Extension Apis +hsa_status_t HSA_API hsa_amd_memory_lock_to_pool(void* host_ptr, size_t size, hsa_agent_t* agents, + int num_agent, hsa_amd_memory_pool_t pool, + uint32_t flags, void** agent_ptr); + // Mirrors Amd Extension Apis hsa_status_t HSA_API hsa_amd_memory_unlock(void* host_ptr); diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_memory_region.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_memory_region.cpp index d2bb4e9b61..e0bc2be05c 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_memory_region.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_memory_region.cpp @@ -68,15 +68,11 @@ void MemoryRegion::FreeKfdMemory(void* ptr, size_t size) { assert(status == HSAKMT_STATUS_SUCCESS); } -bool MemoryRegion::RegisterMemory(void* ptr, size_t size, size_t num_nodes, - const uint32_t* nodes) { +bool MemoryRegion::RegisterMemory(void* ptr, size_t size, const HsaMemFlags& MemFlags) { assert(ptr != NULL); assert(size != 0); - assert(num_nodes != 0); - assert(nodes != NULL); - const HSAKMT_STATUS status = hsaKmtRegisterMemoryToNodes( - ptr, size, num_nodes, const_cast(nodes)); + const HSAKMT_STATUS status = hsaKmtRegisterMemoryWithFlags(ptr, size, MemFlags); return (status == HSAKMT_STATUS_SUCCESS); } @@ -120,8 +116,6 @@ MemoryRegion::MemoryRegion(bool fine_grain, bool full_profile, core::Agent* owne (mem_props_.HeapType == HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE) ? 0 : 1; mem_flag_.ui32.NonPaged = 1; - map_flag_.ui32.PageSize = HSA_PAGE_SIZE_4KB; - virtual_size_ = kGpuVmSize; } else if (IsSystem()) { mem_flag_.ui32.PageSize = HSA_PAGE_SIZE_4KB; @@ -129,9 +123,6 @@ MemoryRegion::MemoryRegion(bool fine_grain, bool full_profile, core::Agent* owne mem_flag_.ui32.HostAccess = 1; mem_flag_.ui32.CachePolicy = HSA_CACHING_CACHED; - map_flag_.ui32.HostAccess = 1; - map_flag_.ui32.PageSize = HSA_PAGE_SIZE_4KB; - virtual_size_ = (full_profile) ? os::GetUserModeVirtualMemorySize() : kGpuVmSize; } @@ -584,8 +575,7 @@ hsa_status_t MemoryRegion::Lock(uint32_t num_agents, const hsa_agent_t* agents, } // Call kernel driver to register and pin the memory. - if (RegisterMemory(host_ptr, size, whitelist_nodes.size(), - &whitelist_nodes[0])) { + if (RegisterMemory(host_ptr, size, mem_flag_)) { uint64_t alternate_va = 0; if (MakeKfdMemoryResident(whitelist_nodes.size(), &whitelist_nodes[0], host_ptr, size, &alternate_va, map_flag_)) { diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_api_trace.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_api_trace.cpp index 6e18860ebb..b7a71d4534 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_api_trace.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_api_trace.cpp @@ -386,6 +386,7 @@ void HsaApiTable::UpdateAmdExts() { amd_ext_api.hsa_amd_queue_set_priority_fn = AMD::hsa_amd_queue_set_priority; amd_ext_api.hsa_amd_memory_async_copy_rect_fn = AMD::hsa_amd_memory_async_copy_rect; amd_ext_api.hsa_amd_runtime_queue_create_register_fn = AMD::hsa_amd_runtime_queue_create_register; + amd_ext_api.hsa_amd_memory_lock_to_pool_fn = AMD::hsa_amd_memory_lock_to_pool; } class Init { diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp index 3e690ff35a..b4fd546431 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp @@ -549,14 +549,48 @@ hsa_status_t hsa_amd_memory_lock(void* host_ptr, size_t size, return HSA_STATUS_ERROR_INVALID_ARGUMENT; } - const amd::MemoryRegion* system_region = - reinterpret_cast( - core::Runtime::runtime_singleton_->system_regions_fine()[0]); + // Check for APU + if (core::Runtime::runtime_singleton_->system_regions_coarse().size() == 0) { + assert(core::Runtime::runtime_singleton_->system_regions_fine()[0]->full_profile() && + "Missing coarse grain host memory on dGPU system."); + *agent_ptr = host_ptr; + return HSA_STATUS_SUCCESS; + } + + const amd::MemoryRegion* system_region = static_cast( + core::Runtime::runtime_singleton_->system_regions_coarse()[0]); return system_region->Lock(num_agent, agents, host_ptr, size, agent_ptr); CATCH; } +hsa_status_t hsa_amd_memory_lock_to_pool(void* host_ptr, size_t size, hsa_agent_t* agents, + int num_agent, hsa_amd_memory_pool_t pool, uint32_t flags, + void** agent_ptr) { + TRY; + IS_OPEN(); + *agent_ptr = NULL; + + if (size == 0 || host_ptr == NULL || agent_ptr == NULL || flags != 0) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + + if ((agents != NULL && num_agent == 0) || (agents == NULL && num_agent != 0)) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + + hsa_region_t region = {pool.handle}; + const amd::MemoryRegion* mem_region = amd::MemoryRegion::Convert(region); + if (mem_region == nullptr) { + return (hsa_status_t)HSA_STATUS_ERROR_INVALID_MEMORY_POOL; + } + if (mem_region->owner()->device_type() != core::Agent::kAmdCpuDevice) + return (hsa_status_t)HSA_STATUS_ERROR_INVALID_MEMORY_POOL; + + return mem_region->Lock(num_agent, agents, host_ptr, size, agent_ptr); + CATCH; +} + hsa_status_t hsa_amd_memory_unlock(void* host_ptr) { TRY; IS_OPEN(); @@ -615,7 +649,7 @@ hsa_status_t hsa_amd_memory_pool_allocate(hsa_amd_memory_pool_t memory_pool, siz TRY; IS_OPEN(); - if (size == 0 || ptr == NULL) { + if (size == 0 || ptr == NULL || flags != 0) { return HSA_STATUS_ERROR_INVALID_ARGUMENT; } diff --git a/projects/rocr-runtime/runtime/hsa-runtime/hsacore.so.def b/projects/rocr-runtime/runtime/hsa-runtime/hsacore.so.def index cce468b985..b04afb21fe 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/hsacore.so.def +++ b/projects/rocr-runtime/runtime/hsa-runtime/hsacore.so.def @@ -181,7 +181,9 @@ global: hsa_amd_queue_cu_set_mask; hsa_amd_memory_fill; hsa_amd_memory_async_copy; + hsa_amd_memory_async_copy_rect; hsa_amd_memory_lock; + hsa_amd_memory_lock_to_pool; hsa_amd_memory_unlock; hsa_amd_agent_iterate_memory_pools; hsa_amd_agent_memory_pool_get_info; @@ -216,7 +218,6 @@ global: hsa_amd_ipc_signal_attach; hsa_amd_register_system_event_handler; hsa_amd_queue_set_priority; - hsa_amd_memory_async_copy_rect; local: *; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_api_trace.h b/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_api_trace.h index a9971b2299..2bdf229312 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_api_trace.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_api_trace.h @@ -179,6 +179,7 @@ struct AmdExtTable { decltype(hsa_amd_queue_set_priority)* hsa_amd_queue_set_priority_fn; decltype(hsa_amd_memory_async_copy_rect)* hsa_amd_memory_async_copy_rect_fn; decltype(hsa_amd_runtime_queue_create_register)* hsa_amd_runtime_queue_create_register_fn; + decltype(hsa_amd_memory_lock_to_pool)* hsa_amd_memory_lock_to_pool_fn; }; // Table to export HSA Core Runtime Apis diff --git a/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_ext_amd.h b/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_ext_amd.h index ccd3f82045..646ccbead2 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_ext_amd.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_ext_amd.h @@ -778,7 +778,7 @@ hsa_status_t HSA_API hsa_amd_agent_iterate_memory_pools( * ::HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE in @p memory_pool. * * @param[in] flags A bit-field that is used to specify allocation - * directives. Must be 0. + * directives. Reserved parameter, must be 0. * * @param[out] ptr Pointer to the location where to store the base virtual * address of @@ -799,7 +799,8 @@ hsa_status_t HSA_API hsa_amd_agent_iterate_memory_pools( * allocate memory in @p memory_pool, or @p size is greater than the value of * HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE in @p memory_pool. * - * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p ptr is NULL, or @p size is 0. + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p ptr is NULL, or @p size is 0, + * or flags is not 0. * */ hsa_status_t HSA_API @@ -1204,11 +1205,12 @@ hsa_status_t HSA_API hsa_amd_memory_migrate(const void* ptr, /** * - * @brief Pin a host pointer allocated by C/C++ or OS allocator (i.e. ordinary system DRAM) and return a new - * pointer accessible by the @p agents. If the @p host_ptr overlaps with previously locked - * memory, then the overlap area is kept locked (i.e multiple mappings are permitted). In this case, - * the same input @p host_ptr may give different locked @p agent_ptr and when it does, they - * are not necessarily coherent (i.e. accessing either @p agent_ptr is not equivalent). + * @brief Pin a host pointer allocated by C/C++ or OS allocator (i.e. ordinary system DRAM) and + * return a new pointer accessible by the @p agents. If the @p host_ptr overlaps with previously + * locked memory, then the overlap area is kept locked (i.e multiple mappings are permitted). In + * this case, the same input @p host_ptr may give different locked @p agent_ptr and when it does, + * they are not necessarily coherent (i.e. accessing either @p agent_ptr is not equivalent). + * Accesses to @p agent_ptr are coarse grained. * * @param[in] host_ptr A buffer allocated by C/C++ or OS allocator. * @@ -1235,20 +1237,69 @@ hsa_status_t HSA_API hsa_amd_memory_migrate(const void* ptr, * @p agent_ptr is NULL or @p agents not NULL but @p num_agent is 0 or @p agents * is NULL but @p num_agent is not 0. */ - hsa_status_t HSA_API hsa_amd_memory_lock(void* host_ptr, size_t size, hsa_agent_t* agents, int num_agent, void** agent_ptr); /** * - * @brief Unpin the host pointer previously pinned via ::hsa_amd_memory_lock. + * @brief Pin a host pointer allocated by C/C++ or OS allocator (i.e. ordinary system DRAM) and + * return a new pointer accessible by the @p agents. If the @p host_ptr overlaps with previously + * locked memory, then the overlap area is kept locked (i.e. multiple mappings are permitted). + * In this case, the same input @p host_ptr may give different locked @p agent_ptr and when it + * does, they are not necessarily coherent (i.e. accessing either @p agent_ptr is not equivalent). + * Acesses to the memory via @p agent_ptr have the same access properties as memory allocated from + * @p pool as determined by ::hsa_amd_memory_pool_get_info and ::hsa_amd_agent_memory_pool_get_info + * (ex. coarse/fine grain, platform atomic support, link info). Physical composition and placement + * of the memory (ex. page size, NUMA binding) is not changed. + * + * @param[in] host_ptr A buffer allocated by C/C++ or OS allocator. + * + * @param[in] size The size to be locked. + * + * @param[in] agents Array of agent handle to gain access to the @p host_ptr. + * If this parameter is NULL and the @p num_agent is 0, all agents + * in the platform will gain access to the @p host_ptr. + * + * @param[in] pool Global memory pool owned by a CPU agent. + * + * @param[in] flags A bit-field that is used to specify allocation + * directives. Reserved parameter, must be 0. + * + * @param[out] agent_ptr Pointer to the location where to store the new address. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure in + * allocating the necessary resources. + * + * @retval ::HSA_STATUS_ERROR_INVALID_AGENT One or more agent in @p agents is + * invalid or can not access @p pool. + * + * @retval ::HSA_STATUS_ERROR_INVALID_MEMORY_POOL @p pool is invalid or not owned + * by a CPU agent. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p size is 0 or @p host_ptr or + * @p agent_ptr is NULL or @p agents not NULL but @p num_agent is 0 or @p agents + * is NULL but @p num_agent is not 0 or flags is not 0. + */ +hsa_status_t HSA_API hsa_amd_memory_lock_to_pool(void* host_ptr, size_t size, hsa_agent_t* agents, + int num_agent, hsa_amd_memory_pool_t pool, + uint32_t flags, void** agent_ptr); + +/** + * + * @brief Unpin the host pointer previously pinned via ::hsa_amd_memory_lock or + * ::hsa_amd_memory_lock_to_pool. * * @details The behavior is undefined if the host pointer being unpinned does not * match previous pinned address or if the host pointer was already deallocated. * * @param[in] host_ptr A buffer allocated by C/C++ or OS allocator that was - * pinned previously via ::hsa_amd_memory_lock. + * pinned previously via ::hsa_amd_memory_lock or ::hsa_amd_memory_lock_to_pool. * * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. *