From d5ecfae62ff19601bf91ae1f7a6dff8a79468a23 Mon Sep 17 00:00:00 2001 From: Jay Cornwall Date: Mon, 22 Aug 2016 20:19:21 -0500 Subject: [PATCH] Refactor: Consolidate calls to hsaKmtAllocMemory Route all device-visible system memory allocations through system_allocator. Change-Id: I5e90a1bf491e432678a6d8ab1f9f3770734cbda1 [ROCm/ROCR-Runtime commit: 74f5aca93d23fd00afb3581a28767d2d7b0b16e7] --- .../hsa-runtime/core/common/shared.cpp | 4 +- .../runtime/hsa-runtime/core/common/shared.h | 6 +-- .../runtime/hsa-runtime/core/inc/agent.h | 1 - .../hsa-runtime/core/inc/amd_blit_kernel.h | 1 + .../hsa-runtime/core/inc/amd_memory_region.h | 4 +- .../runtime/hsa-runtime/core/inc/checked.h | 3 +- .../hsa-runtime/core/inc/memory_region.h | 13 ++++- .../runtime/hsa-runtime/core/inc/queue.h | 1 - .../runtime/hsa-runtime/core/inc/runtime.h | 21 +++----- .../core/runtime/amd_aql_queue.cpp | 54 +++++-------------- .../core/runtime/amd_blit_kernel.cpp | 3 +- .../core/runtime/amd_blit_sdma.cpp | 29 +++------- .../core/runtime/amd_gpu_agent.cpp | 30 ++++------- .../core/runtime/amd_memory_region.cpp | 16 +++--- .../runtime/hsa-runtime/core/runtime/hsa.cpp | 4 +- .../hsa-runtime/core/runtime/hsa_ext_amd.cpp | 4 +- .../hsa-runtime/core/runtime/runtime.cpp | 48 ++++++----------- 17 files changed, 89 insertions(+), 153 deletions(-) diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/common/shared.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/common/shared.cpp index 07dbc89f19..ca786b775b 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/common/shared.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/common/shared.cpp @@ -43,6 +43,6 @@ #include "core/common/shared.h" namespace core { -std::function BaseShared::allocate_=nullptr; -std::function BaseShared::free_=nullptr; +std::function BaseShared::allocate_ = nullptr; +std::function BaseShared::free_ = nullptr; } diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/common/shared.h b/projects/rocr-runtime/runtime/hsa-runtime/core/common/shared.h index fdf89b6258..d56b63bcf8 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/common/shared.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/common/shared.h @@ -55,14 +55,14 @@ namespace core { class BaseShared { public: static void SetAllocateAndFree( - const std::function& allocate, + const std::function& allocate, const std::function& free) { allocate_ = allocate; free_ = free; } protected: - static std::function allocate_; + static std::function allocate_; static std::function free_; }; @@ -78,7 +78,7 @@ class Shared : public BaseShared { "Align is less than alignof(T)"); shared_object_ = - reinterpret_cast(allocate_(sizeof(T), Max(__alignof(T), Align))); + reinterpret_cast(allocate_(sizeof(T), Max(__alignof(T), Align), 0)); assert(shared_object_ != NULL && "Failed on allocating shared_object_"); diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/agent.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/agent.h index b1c55f6627..4108a06e5d 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/agent.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/agent.h @@ -49,7 +49,6 @@ #include -#include "core/inc/runtime.h" #include "core/inc/checked.h" #include "core/inc/isa.h" #include "core/inc/queue.h" diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_blit_kernel.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_blit_kernel.h index eabbdc768a..9d7090940e 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_blit_kernel.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_blit_kernel.h @@ -44,6 +44,7 @@ #define HSA_RUNTIME_CORE_INC_AMD_BLIT_KERNEL_H_ #include +#include #include #include "core/inc/blit.h" diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_memory_region.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_memory_region.h index 95a99364ef..d85d9443a1 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_memory_region.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_memory_region.h @@ -99,9 +99,7 @@ class MemoryRegion : public core::MemoryRegion { ~MemoryRegion(); - hsa_status_t Allocate(size_t size, void** address) const; - - hsa_status_t Allocate(bool restrict_access, size_t size, + hsa_status_t Allocate(size_t size, AllocateFlags alloc_flags, void** address) const; hsa_status_t Free(void* address, size_t size) const; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/checked.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/checked.h index 731b75d383..5b8ef8b3c9 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/checked.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/checked.h @@ -43,7 +43,8 @@ #ifndef HSA_RUNTME_CORE_INC_CHECKED_H_ #define HSA_RUNTME_CORE_INC_CHECKED_H_ -#include "stdint.h" +#include +#include namespace core { diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/memory_region.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/memory_region.h index 0889930deb..502ebb38d4 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/memory_region.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/memory_region.h @@ -47,7 +47,6 @@ #include -#include "core/inc/runtime.h" #include "core/inc/agent.h" #include "core/inc/checked.h" @@ -81,7 +80,17 @@ class MemoryRegion : public Checked<0x9C961F19EE175BB3> { return reinterpret_cast(region.handle); } - virtual hsa_status_t Allocate(size_t size, void** address) const = 0; + enum AllocateEnum { + AllocateNoFlags = 0, + AllocateRestrict = (1 << 0), // Don't map system memory to GPU agents + AllocateExecutable = (1 << 1), // Set executable permission + AllocateDoubleMap = (1 << 2), // Map twice VA allocation to backing store + }; + + typedef uint32_t AllocateFlags; + + virtual hsa_status_t Allocate(size_t size, AllocateFlags alloc_flags, + void** address) const = 0; virtual hsa_status_t Free(void* address, size_t size) const = 0; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/queue.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/queue.h index 402843d7c5..b73294faf8 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/queue.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/queue.h @@ -48,7 +48,6 @@ #include "core/common/shared.h" -#include "core/inc/runtime.h" #include "core/inc/checked.h" #include "core/util/utils.h" diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/runtime.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/runtime.h index c5a66a0517..8ad74bd1b4 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/runtime.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/runtime.h @@ -151,25 +151,14 @@ class Runtime { /// /// @param [in] region Pointer to region object. /// @param [in] size Allocation size in bytes. + /// @param [in] alloc_flags Modifiers to pass to MemoryRegion allocator. /// @param [out] address Pointer to store the allocation result. /// /// @retval ::HSA_STATUS_SUCCESS If allocation is successful. hsa_status_t AllocateMemory(const MemoryRegion* region, size_t size, + MemoryRegion::AllocateFlags alloc_flags, void** address); - /// @brief Allocate memory on a particular region with option to restrict - /// access to the owning agent. - /// - /// @param [in] restrict_access If true, the allocation result would only be - /// accessible to the agent(s) that own the region object. - /// @param [in] region Pointer to region object. - /// @param [in] size Allocation size in bytes. - /// @param [out] address Pointer to store the allocation result. - /// - /// @retval ::HSA_STATUS_SUCCESS If allocation is successful. - hsa_status_t AllocateMemory(bool restrict_access, const MemoryRegion* region, - size_t size, void** address); - /// @brief Free memory previously allocated with AllocateMemory. /// /// @param [in] ptr Address of the memory to be freed. @@ -292,7 +281,8 @@ class Runtime { amd::hsa::code::AmdHsaCodeManager* code_manager() { return &code_manager_; } - std::function& system_allocator() { + std::function& + system_allocator() { return system_allocator_; } @@ -446,7 +436,8 @@ class Runtime { std::map allocation_map_; // Allocator using ::system_region_ - std::function system_allocator_; + std::function + system_allocator_; // Deallocator using ::system_region_ std::function system_deallocator_; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp index 47101c5a2e..a1e57a5272 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp @@ -266,21 +266,12 @@ AqlQueue::AqlQueue(GpuAgent* agent, size_t req_size_pkts, HSAuint32 node_id, SignalGuard.Dismiss(); #endif - HsaMemFlags pm4_ib_buf_flags = {0}; - pm4_ib_buf_flags.ui32.HostAccess = 1; - pm4_ib_buf_flags.ui32.ExecuteAccess = 1; - pm4_ib_buf_flags.ui32.NoSubstitute = 1; - - HSAKMT_STATUS err = - hsaKmtAllocMemory(agent_->node_id(), pm4_ib_size_b_, pm4_ib_buf_flags, &pm4_ib_buf_); - assert(err == HSAKMT_STATUS_SUCCESS && "hsaKmtAllocMemory(PM4 IB) failed"); - - err = hsaKmtMapMemoryToGPU(pm4_ib_buf_, pm4_ib_size_b_, NULL); - assert(err == HSAKMT_STATUS_SUCCESS && "hsaKmtMapMemoryToGPU(PM4 IB) failed"); + pm4_ib_buf_ = core::Runtime::runtime_singleton_->system_allocator()( + pm4_ib_size_b_, 0x1000, core::MemoryRegion::AllocateExecutable); + if (pm4_ib_buf_ == NULL) return; MAKE_NAMED_SCOPE_GUARD(PM4IBGuard, [&]() { - hsaKmtUnmapMemoryToGPU(pm4_ib_buf_); - hsaKmtFreeMemory(pm4_ib_buf_, pm4_ib_size_b_); + core::Runtime::runtime_singleton_->system_deallocator()(pm4_ib_buf_); }); valid_ = true; @@ -314,8 +305,7 @@ AqlQueue::~AqlQueue() { } #endif - hsaKmtUnmapMemoryToGPU(pm4_ib_buf_); - hsaKmtFreeMemory(pm4_ib_buf_, pm4_ib_size_b_); + core::Runtime::runtime_singleton_->system_deallocator()(pm4_ib_buf_); } uint64_t AqlQueue::LoadReadIndexAcquire() { @@ -631,34 +621,19 @@ void AqlQueue::AllocRegisteredRingBuffer(uint32_t queue_size_pkts) { #endif } else { // Allocate storage for the ring buffer. - HsaMemFlags flags; - flags.Value = 0; - flags.ui32.HostAccess = 1; - flags.ui32.AtomicAccessPartial = 1; - flags.ui32.ExecuteAccess = 1; - flags.ui32.AQLQueueMemory = 1; - ring_buf_alloc_bytes_ = AlignUp( queue_size_pkts * static_cast(sizeof(core::AqlPacket)), 4096); - auto err = hsaKmtAllocMemory(agent_->node_id(), ring_buf_alloc_bytes_, - flags, (void**)&ring_buf_); - if (err != HSAKMT_STATUS_SUCCESS) { - assert(false && "AQL queue memory allocation failure."); - return; - } + ring_buf_ = core::Runtime::runtime_singleton_->system_allocator()( + ring_buf_alloc_bytes_, 0x1000, + core::MemoryRegion::AllocateExecutable | + core::MemoryRegion::AllocateDoubleMap); - HSAuint64 alternate_va; - err = hsaKmtMapMemoryToGPU(ring_buf_, ring_buf_alloc_bytes_, &alternate_va); + assert(ring_buf_ != NULL && "AQL queue memory allocation failure"); - if (err != HSAKMT_STATUS_SUCCESS) { - assert(false && "AQL queue memory map failure."); - hsaKmtFreeMemory(ring_buf_, ring_buf_alloc_bytes_); - ring_buf_ = NULL; - return; - } - - ring_buf_alloc_bytes_ = 2 * ring_buf_alloc_bytes_; + // The virtual ring allocation is twice as large as requested. + // Each half maps to the same set of physical pages. + ring_buf_alloc_bytes_ *= 2; } } @@ -673,8 +648,7 @@ void AqlQueue::FreeRegisteredRingBuffer() { (void*)(uintptr_t(ring_buf_) + (ring_buf_alloc_bytes_ / 2))); #endif } else { - hsaKmtUnmapMemoryToGPU(ring_buf_); - hsaKmtFreeMemory(ring_buf_, ring_buf_alloc_bytes_ / 2); + core::Runtime::runtime_singleton_->system_deallocator()(ring_buf_); } ring_buf_ = NULL; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_blit_kernel.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_blit_kernel.cpp index 679fce594e..dac4904a57 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_blit_kernel.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_blit_kernel.cpp @@ -537,7 +537,8 @@ hsa_status_t BlitKernel::Initialize(const core::Agent& agent) { kernarg_async_ = reinterpret_cast( core::Runtime::runtime_singleton_->system_allocator()( - queue_->public_handle()->size * AlignUp(sizeof(KernelArgs), 16), 16)); + queue_->public_handle()->size * AlignUp(sizeof(KernelArgs), 16), 16, + core::MemoryRegion::AllocateNoFlags)); kernarg_async_mask_ = queue_->public_handle()->size - 1; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_blit_sdma.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_blit_sdma.cpp index b0b28b2c7c..c397bfb4e2 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_blit_sdma.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_blit_sdma.cpp @@ -447,26 +447,11 @@ hsa_status_t BlitSdma::Initialize(const core::Agent& agent) { // Allocate queue buffer. queue_size_ = kQueueSize; - HsaMemFlags flags; - flags.Value = 0; - flags.ui32.HostAccess = 1; - flags.ui32.AtomicAccessPartial = 1; - flags.ui32.ExecuteAccess = 1; + queue_start_addr_ = + (char*)core::Runtime::runtime_singleton_->system_allocator()( + queue_size_, 0x1000, core::MemoryRegion::AllocateExecutable); - auto err = hsaKmtAllocMemory(amd_gpu_agent.node_id(), queue_size_, flags, - reinterpret_cast(&queue_start_addr_)); - - if (err != HSAKMT_STATUS_SUCCESS) { - assert(false && "SDMA queue memory allocation failure."); - return HSA_STATUS_ERROR_OUT_OF_RESOURCES; - } - - HSAuint64 alternate_va; - err = hsaKmtMapMemoryToGPU(queue_start_addr_, queue_size_, &alternate_va); - - if (err != HSAKMT_STATUS_SUCCESS) { - assert(false && "AQL queue memory map failure."); - Destroy(agent); + if (queue_start_addr_ == NULL) { return HSA_STATUS_ERROR_OUT_OF_RESOURCES; } @@ -494,7 +479,8 @@ hsa_status_t BlitSdma::Initialize(const core::Agent& agent) { fence_base_addr_ = reinterpret_cast( core::Runtime::runtime_singleton_->system_allocator()( - fence_pool_size_ * sizeof(uint32_t), 256)); + fence_pool_size_ * sizeof(uint32_t), 256, + core::MemoryRegion::AllocateNoFlags)); if (fence_base_addr_ == NULL) { Destroy(agent); @@ -516,8 +502,7 @@ hsa_status_t BlitSdma::Destroy(const core::Agent& agent) { if (queue_start_addr_ != NULL && queue_size_ != 0) { // Release queue buffer. - hsaKmtUnmapMemoryToGPU(queue_start_addr_); - hsaKmtFreeMemory(queue_start_addr_, queue_size_); + core::Runtime::runtime_singleton_->system_deallocator()(queue_start_addr_); } if (fence_base_addr_ != NULL) { diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp index f669efef82..9e951d7ea7 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp @@ -124,14 +124,8 @@ GpuAgent::GpuAgent(HSAuint32 node, const HsaNodeProperties& node_props) // Populate region list. InitRegionList(); - // Reserve memory for scratch. - InitScratchPool(); - // Populate cache list. InitCacheList(); - - // Bind the second-level trap handler to this node. - BindTrapHandler(); } GpuAgent::~GpuAgent() { @@ -214,21 +208,13 @@ void GpuAgent::AssembleShader(const char* src_sp3, const char* func_name, } // Allocate a GPU-visible buffer for the shader. - HsaMemFlags code_buf_flags = {0}; - code_buf_flags.ui32.HostAccess = 1; - code_buf_flags.ui32.ExecuteAccess = 1; - code_buf_flags.ui32.NoSubstitute = 1; - size_t header_size = (assemble_target == AssembleTarget::AQL ? sizeof(amd_kernel_code_t) : 0); code_buf_size = AlignUp(header_size + asic_shader->size, 0x1000); - HSAKMT_STATUS err = - hsaKmtAllocMemory(node_id(), code_buf_size, code_buf_flags, &code_buf); - assert(err == HSAKMT_STATUS_SUCCESS && "hsaKmtAllocMemory(Trap) failed"); - - err = hsaKmtMapMemoryToGPU(code_buf, code_buf_size, NULL); - assert(err == HSAKMT_STATUS_SUCCESS && "hsaKmtMapMemoryToGPU(Trap) failed"); + code_buf = core::Runtime::runtime_singleton_->system_allocator()( + code_buf_size, 0x1000, core::MemoryRegion::AllocateExecutable); + assert(code_buf != NULL && "Code buffer allocation failed"); memset(code_buf, 0, code_buf_size); @@ -265,8 +251,7 @@ void GpuAgent::AssembleShader(const char* src_sp3, const char* func_name, } void GpuAgent::ReleaseShader(void* code_buf, size_t code_buf_size) const { - hsaKmtUnmapMemoryToGPU(code_buf); - hsaKmtFreeMemory(code_buf, code_buf_size); + core::Runtime::runtime_singleton_->system_deallocator()(code_buf); } void GpuAgent::InitRegionList() { @@ -415,7 +400,8 @@ bool GpuAgent::InitEndTsPool() { uint64_t* buff = NULL; if (HSA_STATUS_SUCCESS != - runtime->AllocateMemory(true, local_region_, alloc_size, + runtime->AllocateMemory(local_region_, alloc_size, + MemoryRegion::AllocateRestrict, reinterpret_cast(&buff))) { return false; } @@ -589,6 +575,10 @@ void GpuAgent::InitDma() { } hsa_status_t GpuAgent::PostToolsInit() { + // Defer memory allocation until agents have been discovered. + InitScratchPool(); + BindTrapHandler(); + // Defer utility queue creation to allow tools to intercept. queues_[QueueUtility] = CreateInterceptibleQueue(); diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_memory_region.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_memory_region.cpp index 62c9bfa16b..77338cf816 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_memory_region.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_memory_region.cpp @@ -149,11 +149,7 @@ MemoryRegion::MemoryRegion(bool fine_grain, bool full_profile, MemoryRegion::~MemoryRegion() {} -hsa_status_t MemoryRegion::Allocate(size_t size, void** address) const { - return Allocate(false, size, address); -} - -hsa_status_t MemoryRegion::Allocate(bool restrict_access, size_t size, +hsa_status_t MemoryRegion::Allocate(size_t size, AllocateFlags alloc_flags, void** address) const { if (address == NULL) { return HSA_STATUS_ERROR_INVALID_ARGUMENT; @@ -169,7 +165,13 @@ hsa_status_t MemoryRegion::Allocate(bool restrict_access, size_t size, size = AlignUp(size, kPageSize_); - *address = AllocateKfdMemory(mem_flag_, owner()->node_id(), size); + HsaMemFlags kmt_alloc_flags(mem_flag_); + kmt_alloc_flags.ui32.ExecuteAccess = + (alloc_flags & AllocateExecutable ? 1 : 0); + kmt_alloc_flags.ui32.AQLQueueMemory = + (alloc_flags & AllocateDoubleMap ? 1 : 0); + + *address = AllocateKfdMemory(kmt_alloc_flags, owner()->node_id(), size); if (*address != NULL) { // Commit the memory. @@ -184,7 +186,7 @@ hsa_status_t MemoryRegion::Allocate(bool restrict_access, size_t size, const uint32_t* map_node_id = &owner_node_id; if (IsSystem()) { - if (!restrict_access) { + if ((alloc_flags & AllocateRestrict) == 0) { // Map to all GPU agents. map_node_count = core::Runtime::runtime_singleton_->gpu_ids().size(); diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa.cpp index 04b322de1d..196e6709d2 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa.cpp @@ -903,8 +903,8 @@ hsa_status_t const core::MemoryRegion* mem_region = core::MemoryRegion::Convert(region); IS_VALID(mem_region); - return core::Runtime::runtime_singleton_->AllocateMemory(mem_region, size, - ptr); + return core::Runtime::runtime_singleton_->AllocateMemory( + mem_region, size, core::MemoryRegion::AllocateNoFlags, ptr); } hsa_status_t hsa_memory_free(void* ptr) { diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp index a31b5a6c35..a35d7368c6 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp @@ -464,8 +464,8 @@ hsa_status_t return (hsa_status_t)HSA_STATUS_ERROR_INVALID_MEMORY_POOL; } - return core::Runtime::runtime_singleton_->AllocateMemory(true, mem_region, - size, ptr); + return core::Runtime::runtime_singleton_->AllocateMemory( + mem_region, size, core::MemoryRegion::AllocateRestrict, ptr); } hsa_status_t hsa_amd_memory_pool_free(void* ptr) { diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp index be5cc744c6..d9fabab782 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp @@ -159,29 +159,22 @@ void Runtime::RegisterAgent(Agent* agent) { // Init default fine grain system region allocator using fine grain // system region of the first discovered CPU agent. if (cpu_agents_.size() == 1) { - if (system_regions_fine_[0]->full_profile()) { - system_allocator_ = [](size_t size, size_t alignment) -> void * { - return _aligned_malloc(size, alignment); - }; + // Might need memory pooling to cover allocation that + // requires less than 4096 bytes. + system_allocator_ = + [&](size_t size, size_t alignment, + MemoryRegion::AllocateFlags alloc_flags) -> void* { + assert(alignment <= 4096); + void* ptr = NULL; + return (HSA_STATUS_SUCCESS == + core::Runtime::runtime_singleton_->AllocateMemory( + system_regions_fine_[0], size, alloc_flags, &ptr)) + ? ptr + : NULL; + }; - system_deallocator_ = [](void* ptr) { _aligned_free(ptr); }; - } else { - // Might need memory pooling to cover allocation that - // requires less than 4096 bytes. - system_allocator_ = [&](size_t size, size_t alignment) -> void * { - assert(alignment <= 4096); - void* ptr = NULL; - return (HSA_STATUS_SUCCESS == - core::Runtime::runtime_singleton_->AllocateMemory( - system_regions_fine_[0], size, &ptr)) - ? ptr - : NULL; - }; - - system_deallocator_ = [](void* ptr) { - core::Runtime::runtime_singleton_->FreeMemory(ptr); - }; - } + system_deallocator_ = + [](void* ptr) { core::Runtime::runtime_singleton_->FreeMemory(ptr); }; BaseShared::SetAllocateAndFree(system_allocator_, system_deallocator_); } @@ -307,16 +300,9 @@ hsa_status_t Runtime::IterateAgent(hsa_status_t (*callback)(hsa_agent_t agent, } hsa_status_t Runtime::AllocateMemory(const MemoryRegion* region, size_t size, - void** ptr) { - return AllocateMemory(false, region, size, ptr); -} - -hsa_status_t Runtime::AllocateMemory(bool restrict_access, - const MemoryRegion* region, size_t size, + MemoryRegion::AllocateFlags alloc_flags, void** address) { - const amd::MemoryRegion* amd_region = - reinterpret_cast(region); - hsa_status_t status = amd_region->Allocate(restrict_access, size, address); + hsa_status_t status = region->Allocate(size, alloc_flags, address); // Track the allocation result so that it could be freed properly. if (status == HSA_STATUS_SUCCESS) {