Refactor: Consolidate calls to hsaKmtAllocMemory
Route all device-visible system memory allocations through system_allocator.
Change-Id: I5e90a1bf491e432678a6d8ab1f9f3770734cbda1
[ROCm/ROCR-Runtime commit: 74f5aca93d]
Tento commit je obsažen v:
@@ -43,6 +43,6 @@
|
||||
#include "core/common/shared.h"
|
||||
|
||||
namespace core {
|
||||
std::function<void*(size_t, size_t)> BaseShared::allocate_=nullptr;
|
||||
std::function<void(void*)> BaseShared::free_=nullptr;
|
||||
std::function<void*(size_t, size_t, uint32_t)> BaseShared::allocate_ = nullptr;
|
||||
std::function<void(void*)> BaseShared::free_ = nullptr;
|
||||
}
|
||||
|
||||
@@ -55,14 +55,14 @@ namespace core {
|
||||
class BaseShared {
|
||||
public:
|
||||
static void SetAllocateAndFree(
|
||||
const std::function<void*(size_t, size_t)>& allocate,
|
||||
const std::function<void*(size_t, size_t, uint32_t)>& allocate,
|
||||
const std::function<void(void*)>& free) {
|
||||
allocate_ = allocate;
|
||||
free_ = free;
|
||||
}
|
||||
|
||||
protected:
|
||||
static std::function<void*(size_t, size_t)> allocate_;
|
||||
static std::function<void*(size_t, size_t, uint32_t)> allocate_;
|
||||
static std::function<void(void*)> free_;
|
||||
};
|
||||
|
||||
@@ -78,7 +78,7 @@ class Shared : public BaseShared {
|
||||
"Align is less than alignof(T)");
|
||||
|
||||
shared_object_ =
|
||||
reinterpret_cast<T*>(allocate_(sizeof(T), Max(__alignof(T), Align)));
|
||||
reinterpret_cast<T*>(allocate_(sizeof(T), Max(__alignof(T), Align), 0));
|
||||
|
||||
assert(shared_object_ != NULL && "Failed on allocating shared_object_");
|
||||
|
||||
|
||||
@@ -49,7 +49,6 @@
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "core/inc/runtime.h"
|
||||
#include "core/inc/checked.h"
|
||||
#include "core/inc/isa.h"
|
||||
#include "core/inc/queue.h"
|
||||
|
||||
@@ -44,6 +44,7 @@
|
||||
#define HSA_RUNTIME_CORE_INC_AMD_BLIT_KERNEL_H_
|
||||
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "core/inc/blit.h"
|
||||
|
||||
@@ -99,9 +99,7 @@ class MemoryRegion : public core::MemoryRegion {
|
||||
|
||||
~MemoryRegion();
|
||||
|
||||
hsa_status_t Allocate(size_t size, void** address) const;
|
||||
|
||||
hsa_status_t Allocate(bool restrict_access, size_t size,
|
||||
hsa_status_t Allocate(size_t size, AllocateFlags alloc_flags,
|
||||
void** address) const;
|
||||
|
||||
hsa_status_t Free(void* address, size_t size) const;
|
||||
|
||||
@@ -43,7 +43,8 @@
|
||||
#ifndef HSA_RUNTME_CORE_INC_CHECKED_H_
|
||||
#define HSA_RUNTME_CORE_INC_CHECKED_H_
|
||||
|
||||
#include "stdint.h"
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
namespace core {
|
||||
|
||||
|
||||
@@ -47,7 +47,6 @@
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "core/inc/runtime.h"
|
||||
#include "core/inc/agent.h"
|
||||
#include "core/inc/checked.h"
|
||||
|
||||
@@ -81,7 +80,17 @@ class MemoryRegion : public Checked<0x9C961F19EE175BB3> {
|
||||
return reinterpret_cast<MemoryRegion*>(region.handle);
|
||||
}
|
||||
|
||||
virtual hsa_status_t Allocate(size_t size, void** address) const = 0;
|
||||
enum AllocateEnum {
|
||||
AllocateNoFlags = 0,
|
||||
AllocateRestrict = (1 << 0), // Don't map system memory to GPU agents
|
||||
AllocateExecutable = (1 << 1), // Set executable permission
|
||||
AllocateDoubleMap = (1 << 2), // Map twice VA allocation to backing store
|
||||
};
|
||||
|
||||
typedef uint32_t AllocateFlags;
|
||||
|
||||
virtual hsa_status_t Allocate(size_t size, AllocateFlags alloc_flags,
|
||||
void** address) const = 0;
|
||||
|
||||
virtual hsa_status_t Free(void* address, size_t size) const = 0;
|
||||
|
||||
|
||||
@@ -48,7 +48,6 @@
|
||||
|
||||
#include "core/common/shared.h"
|
||||
|
||||
#include "core/inc/runtime.h"
|
||||
#include "core/inc/checked.h"
|
||||
|
||||
#include "core/util/utils.h"
|
||||
|
||||
@@ -151,25 +151,14 @@ class Runtime {
|
||||
///
|
||||
/// @param [in] region Pointer to region object.
|
||||
/// @param [in] size Allocation size in bytes.
|
||||
/// @param [in] alloc_flags Modifiers to pass to MemoryRegion allocator.
|
||||
/// @param [out] address Pointer to store the allocation result.
|
||||
///
|
||||
/// @retval ::HSA_STATUS_SUCCESS If allocation is successful.
|
||||
hsa_status_t AllocateMemory(const MemoryRegion* region, size_t size,
|
||||
MemoryRegion::AllocateFlags alloc_flags,
|
||||
void** address);
|
||||
|
||||
/// @brief Allocate memory on a particular region with option to restrict
|
||||
/// access to the owning agent.
|
||||
///
|
||||
/// @param [in] restrict_access If true, the allocation result would only be
|
||||
/// accessible to the agent(s) that own the region object.
|
||||
/// @param [in] region Pointer to region object.
|
||||
/// @param [in] size Allocation size in bytes.
|
||||
/// @param [out] address Pointer to store the allocation result.
|
||||
///
|
||||
/// @retval ::HSA_STATUS_SUCCESS If allocation is successful.
|
||||
hsa_status_t AllocateMemory(bool restrict_access, const MemoryRegion* region,
|
||||
size_t size, void** address);
|
||||
|
||||
/// @brief Free memory previously allocated with AllocateMemory.
|
||||
///
|
||||
/// @param [in] ptr Address of the memory to be freed.
|
||||
@@ -292,7 +281,8 @@ class Runtime {
|
||||
|
||||
amd::hsa::code::AmdHsaCodeManager* code_manager() { return &code_manager_; }
|
||||
|
||||
std::function<void*(size_t, size_t)>& system_allocator() {
|
||||
std::function<void*(size_t, size_t, MemoryRegion::AllocateFlags)>&
|
||||
system_allocator() {
|
||||
return system_allocator_;
|
||||
}
|
||||
|
||||
@@ -446,7 +436,8 @@ class Runtime {
|
||||
std::map<const void*, AllocationRegion> allocation_map_;
|
||||
|
||||
// Allocator using ::system_region_
|
||||
std::function<void*(size_t, size_t)> system_allocator_;
|
||||
std::function<void*(size_t, size_t, MemoryRegion::AllocateFlags)>
|
||||
system_allocator_;
|
||||
|
||||
// Deallocator using ::system_region_
|
||||
std::function<void(void*)> system_deallocator_;
|
||||
|
||||
@@ -266,21 +266,12 @@ AqlQueue::AqlQueue(GpuAgent* agent, size_t req_size_pkts, HSAuint32 node_id,
|
||||
SignalGuard.Dismiss();
|
||||
#endif
|
||||
|
||||
HsaMemFlags pm4_ib_buf_flags = {0};
|
||||
pm4_ib_buf_flags.ui32.HostAccess = 1;
|
||||
pm4_ib_buf_flags.ui32.ExecuteAccess = 1;
|
||||
pm4_ib_buf_flags.ui32.NoSubstitute = 1;
|
||||
|
||||
HSAKMT_STATUS err =
|
||||
hsaKmtAllocMemory(agent_->node_id(), pm4_ib_size_b_, pm4_ib_buf_flags, &pm4_ib_buf_);
|
||||
assert(err == HSAKMT_STATUS_SUCCESS && "hsaKmtAllocMemory(PM4 IB) failed");
|
||||
|
||||
err = hsaKmtMapMemoryToGPU(pm4_ib_buf_, pm4_ib_size_b_, NULL);
|
||||
assert(err == HSAKMT_STATUS_SUCCESS && "hsaKmtMapMemoryToGPU(PM4 IB) failed");
|
||||
pm4_ib_buf_ = core::Runtime::runtime_singleton_->system_allocator()(
|
||||
pm4_ib_size_b_, 0x1000, core::MemoryRegion::AllocateExecutable);
|
||||
if (pm4_ib_buf_ == NULL) return;
|
||||
|
||||
MAKE_NAMED_SCOPE_GUARD(PM4IBGuard, [&]() {
|
||||
hsaKmtUnmapMemoryToGPU(pm4_ib_buf_);
|
||||
hsaKmtFreeMemory(pm4_ib_buf_, pm4_ib_size_b_);
|
||||
core::Runtime::runtime_singleton_->system_deallocator()(pm4_ib_buf_);
|
||||
});
|
||||
|
||||
valid_ = true;
|
||||
@@ -314,8 +305,7 @@ AqlQueue::~AqlQueue() {
|
||||
}
|
||||
#endif
|
||||
|
||||
hsaKmtUnmapMemoryToGPU(pm4_ib_buf_);
|
||||
hsaKmtFreeMemory(pm4_ib_buf_, pm4_ib_size_b_);
|
||||
core::Runtime::runtime_singleton_->system_deallocator()(pm4_ib_buf_);
|
||||
}
|
||||
|
||||
uint64_t AqlQueue::LoadReadIndexAcquire() {
|
||||
@@ -631,34 +621,19 @@ void AqlQueue::AllocRegisteredRingBuffer(uint32_t queue_size_pkts) {
|
||||
#endif
|
||||
} else {
|
||||
// Allocate storage for the ring buffer.
|
||||
HsaMemFlags flags;
|
||||
flags.Value = 0;
|
||||
flags.ui32.HostAccess = 1;
|
||||
flags.ui32.AtomicAccessPartial = 1;
|
||||
flags.ui32.ExecuteAccess = 1;
|
||||
flags.ui32.AQLQueueMemory = 1;
|
||||
|
||||
ring_buf_alloc_bytes_ = AlignUp(
|
||||
queue_size_pkts * static_cast<uint32_t>(sizeof(core::AqlPacket)), 4096);
|
||||
auto err = hsaKmtAllocMemory(agent_->node_id(), ring_buf_alloc_bytes_,
|
||||
flags, (void**)&ring_buf_);
|
||||
|
||||
if (err != HSAKMT_STATUS_SUCCESS) {
|
||||
assert(false && "AQL queue memory allocation failure.");
|
||||
return;
|
||||
}
|
||||
ring_buf_ = core::Runtime::runtime_singleton_->system_allocator()(
|
||||
ring_buf_alloc_bytes_, 0x1000,
|
||||
core::MemoryRegion::AllocateExecutable |
|
||||
core::MemoryRegion::AllocateDoubleMap);
|
||||
|
||||
HSAuint64 alternate_va;
|
||||
err = hsaKmtMapMemoryToGPU(ring_buf_, ring_buf_alloc_bytes_, &alternate_va);
|
||||
assert(ring_buf_ != NULL && "AQL queue memory allocation failure");
|
||||
|
||||
if (err != HSAKMT_STATUS_SUCCESS) {
|
||||
assert(false && "AQL queue memory map failure.");
|
||||
hsaKmtFreeMemory(ring_buf_, ring_buf_alloc_bytes_);
|
||||
ring_buf_ = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
ring_buf_alloc_bytes_ = 2 * ring_buf_alloc_bytes_;
|
||||
// The virtual ring allocation is twice as large as requested.
|
||||
// Each half maps to the same set of physical pages.
|
||||
ring_buf_alloc_bytes_ *= 2;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -673,8 +648,7 @@ void AqlQueue::FreeRegisteredRingBuffer() {
|
||||
(void*)(uintptr_t(ring_buf_) + (ring_buf_alloc_bytes_ / 2)));
|
||||
#endif
|
||||
} else {
|
||||
hsaKmtUnmapMemoryToGPU(ring_buf_);
|
||||
hsaKmtFreeMemory(ring_buf_, ring_buf_alloc_bytes_ / 2);
|
||||
core::Runtime::runtime_singleton_->system_deallocator()(ring_buf_);
|
||||
}
|
||||
|
||||
ring_buf_ = NULL;
|
||||
|
||||
@@ -537,7 +537,8 @@ hsa_status_t BlitKernel::Initialize(const core::Agent& agent) {
|
||||
|
||||
kernarg_async_ = reinterpret_cast<KernelArgs*>(
|
||||
core::Runtime::runtime_singleton_->system_allocator()(
|
||||
queue_->public_handle()->size * AlignUp(sizeof(KernelArgs), 16), 16));
|
||||
queue_->public_handle()->size * AlignUp(sizeof(KernelArgs), 16), 16,
|
||||
core::MemoryRegion::AllocateNoFlags));
|
||||
|
||||
kernarg_async_mask_ = queue_->public_handle()->size - 1;
|
||||
|
||||
|
||||
@@ -447,26 +447,11 @@ hsa_status_t BlitSdma::Initialize(const core::Agent& agent) {
|
||||
// Allocate queue buffer.
|
||||
queue_size_ = kQueueSize;
|
||||
|
||||
HsaMemFlags flags;
|
||||
flags.Value = 0;
|
||||
flags.ui32.HostAccess = 1;
|
||||
flags.ui32.AtomicAccessPartial = 1;
|
||||
flags.ui32.ExecuteAccess = 1;
|
||||
queue_start_addr_ =
|
||||
(char*)core::Runtime::runtime_singleton_->system_allocator()(
|
||||
queue_size_, 0x1000, core::MemoryRegion::AllocateExecutable);
|
||||
|
||||
auto err = hsaKmtAllocMemory(amd_gpu_agent.node_id(), queue_size_, flags,
|
||||
reinterpret_cast<void**>(&queue_start_addr_));
|
||||
|
||||
if (err != HSAKMT_STATUS_SUCCESS) {
|
||||
assert(false && "SDMA queue memory allocation failure.");
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
HSAuint64 alternate_va;
|
||||
err = hsaKmtMapMemoryToGPU(queue_start_addr_, queue_size_, &alternate_va);
|
||||
|
||||
if (err != HSAKMT_STATUS_SUCCESS) {
|
||||
assert(false && "AQL queue memory map failure.");
|
||||
Destroy(agent);
|
||||
if (queue_start_addr_ == NULL) {
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
@@ -494,7 +479,8 @@ hsa_status_t BlitSdma::Initialize(const core::Agent& agent) {
|
||||
|
||||
fence_base_addr_ = reinterpret_cast<uint32_t*>(
|
||||
core::Runtime::runtime_singleton_->system_allocator()(
|
||||
fence_pool_size_ * sizeof(uint32_t), 256));
|
||||
fence_pool_size_ * sizeof(uint32_t), 256,
|
||||
core::MemoryRegion::AllocateNoFlags));
|
||||
|
||||
if (fence_base_addr_ == NULL) {
|
||||
Destroy(agent);
|
||||
@@ -516,8 +502,7 @@ hsa_status_t BlitSdma::Destroy(const core::Agent& agent) {
|
||||
|
||||
if (queue_start_addr_ != NULL && queue_size_ != 0) {
|
||||
// Release queue buffer.
|
||||
hsaKmtUnmapMemoryToGPU(queue_start_addr_);
|
||||
hsaKmtFreeMemory(queue_start_addr_, queue_size_);
|
||||
core::Runtime::runtime_singleton_->system_deallocator()(queue_start_addr_);
|
||||
}
|
||||
|
||||
if (fence_base_addr_ != NULL) {
|
||||
|
||||
@@ -124,14 +124,8 @@ GpuAgent::GpuAgent(HSAuint32 node, const HsaNodeProperties& node_props)
|
||||
// Populate region list.
|
||||
InitRegionList();
|
||||
|
||||
// Reserve memory for scratch.
|
||||
InitScratchPool();
|
||||
|
||||
// Populate cache list.
|
||||
InitCacheList();
|
||||
|
||||
// Bind the second-level trap handler to this node.
|
||||
BindTrapHandler();
|
||||
}
|
||||
|
||||
GpuAgent::~GpuAgent() {
|
||||
@@ -214,21 +208,13 @@ void GpuAgent::AssembleShader(const char* src_sp3, const char* func_name,
|
||||
}
|
||||
|
||||
// Allocate a GPU-visible buffer for the shader.
|
||||
HsaMemFlags code_buf_flags = {0};
|
||||
code_buf_flags.ui32.HostAccess = 1;
|
||||
code_buf_flags.ui32.ExecuteAccess = 1;
|
||||
code_buf_flags.ui32.NoSubstitute = 1;
|
||||
|
||||
size_t header_size =
|
||||
(assemble_target == AssembleTarget::AQL ? sizeof(amd_kernel_code_t) : 0);
|
||||
code_buf_size = AlignUp(header_size + asic_shader->size, 0x1000);
|
||||
|
||||
HSAKMT_STATUS err =
|
||||
hsaKmtAllocMemory(node_id(), code_buf_size, code_buf_flags, &code_buf);
|
||||
assert(err == HSAKMT_STATUS_SUCCESS && "hsaKmtAllocMemory(Trap) failed");
|
||||
|
||||
err = hsaKmtMapMemoryToGPU(code_buf, code_buf_size, NULL);
|
||||
assert(err == HSAKMT_STATUS_SUCCESS && "hsaKmtMapMemoryToGPU(Trap) failed");
|
||||
code_buf = core::Runtime::runtime_singleton_->system_allocator()(
|
||||
code_buf_size, 0x1000, core::MemoryRegion::AllocateExecutable);
|
||||
assert(code_buf != NULL && "Code buffer allocation failed");
|
||||
|
||||
memset(code_buf, 0, code_buf_size);
|
||||
|
||||
@@ -265,8 +251,7 @@ void GpuAgent::AssembleShader(const char* src_sp3, const char* func_name,
|
||||
}
|
||||
|
||||
void GpuAgent::ReleaseShader(void* code_buf, size_t code_buf_size) const {
|
||||
hsaKmtUnmapMemoryToGPU(code_buf);
|
||||
hsaKmtFreeMemory(code_buf, code_buf_size);
|
||||
core::Runtime::runtime_singleton_->system_deallocator()(code_buf);
|
||||
}
|
||||
|
||||
void GpuAgent::InitRegionList() {
|
||||
@@ -415,7 +400,8 @@ bool GpuAgent::InitEndTsPool() {
|
||||
|
||||
uint64_t* buff = NULL;
|
||||
if (HSA_STATUS_SUCCESS !=
|
||||
runtime->AllocateMemory(true, local_region_, alloc_size,
|
||||
runtime->AllocateMemory(local_region_, alloc_size,
|
||||
MemoryRegion::AllocateRestrict,
|
||||
reinterpret_cast<void**>(&buff))) {
|
||||
return false;
|
||||
}
|
||||
@@ -589,6 +575,10 @@ void GpuAgent::InitDma() {
|
||||
}
|
||||
|
||||
hsa_status_t GpuAgent::PostToolsInit() {
|
||||
// Defer memory allocation until agents have been discovered.
|
||||
InitScratchPool();
|
||||
BindTrapHandler();
|
||||
|
||||
// Defer utility queue creation to allow tools to intercept.
|
||||
queues_[QueueUtility] = CreateInterceptibleQueue();
|
||||
|
||||
|
||||
@@ -149,11 +149,7 @@ MemoryRegion::MemoryRegion(bool fine_grain, bool full_profile,
|
||||
|
||||
MemoryRegion::~MemoryRegion() {}
|
||||
|
||||
hsa_status_t MemoryRegion::Allocate(size_t size, void** address) const {
|
||||
return Allocate(false, size, address);
|
||||
}
|
||||
|
||||
hsa_status_t MemoryRegion::Allocate(bool restrict_access, size_t size,
|
||||
hsa_status_t MemoryRegion::Allocate(size_t size, AllocateFlags alloc_flags,
|
||||
void** address) const {
|
||||
if (address == NULL) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
@@ -169,7 +165,13 @@ hsa_status_t MemoryRegion::Allocate(bool restrict_access, size_t size,
|
||||
|
||||
size = AlignUp(size, kPageSize_);
|
||||
|
||||
*address = AllocateKfdMemory(mem_flag_, owner()->node_id(), size);
|
||||
HsaMemFlags kmt_alloc_flags(mem_flag_);
|
||||
kmt_alloc_flags.ui32.ExecuteAccess =
|
||||
(alloc_flags & AllocateExecutable ? 1 : 0);
|
||||
kmt_alloc_flags.ui32.AQLQueueMemory =
|
||||
(alloc_flags & AllocateDoubleMap ? 1 : 0);
|
||||
|
||||
*address = AllocateKfdMemory(kmt_alloc_flags, owner()->node_id(), size);
|
||||
|
||||
if (*address != NULL) {
|
||||
// Commit the memory.
|
||||
@@ -184,7 +186,7 @@ hsa_status_t MemoryRegion::Allocate(bool restrict_access, size_t size,
|
||||
const uint32_t* map_node_id = &owner_node_id;
|
||||
|
||||
if (IsSystem()) {
|
||||
if (!restrict_access) {
|
||||
if ((alloc_flags & AllocateRestrict) == 0) {
|
||||
// Map to all GPU agents.
|
||||
map_node_count = core::Runtime::runtime_singleton_->gpu_ids().size();
|
||||
|
||||
|
||||
@@ -903,8 +903,8 @@ hsa_status_t
|
||||
const core::MemoryRegion* mem_region = core::MemoryRegion::Convert(region);
|
||||
IS_VALID(mem_region);
|
||||
|
||||
return core::Runtime::runtime_singleton_->AllocateMemory(mem_region, size,
|
||||
ptr);
|
||||
return core::Runtime::runtime_singleton_->AllocateMemory(
|
||||
mem_region, size, core::MemoryRegion::AllocateNoFlags, ptr);
|
||||
}
|
||||
|
||||
hsa_status_t hsa_memory_free(void* ptr) {
|
||||
|
||||
@@ -464,8 +464,8 @@ hsa_status_t
|
||||
return (hsa_status_t)HSA_STATUS_ERROR_INVALID_MEMORY_POOL;
|
||||
}
|
||||
|
||||
return core::Runtime::runtime_singleton_->AllocateMemory(true, mem_region,
|
||||
size, ptr);
|
||||
return core::Runtime::runtime_singleton_->AllocateMemory(
|
||||
mem_region, size, core::MemoryRegion::AllocateRestrict, ptr);
|
||||
}
|
||||
|
||||
hsa_status_t hsa_amd_memory_pool_free(void* ptr) {
|
||||
|
||||
@@ -159,29 +159,22 @@ void Runtime::RegisterAgent(Agent* agent) {
|
||||
// Init default fine grain system region allocator using fine grain
|
||||
// system region of the first discovered CPU agent.
|
||||
if (cpu_agents_.size() == 1) {
|
||||
if (system_regions_fine_[0]->full_profile()) {
|
||||
system_allocator_ = [](size_t size, size_t alignment) -> void * {
|
||||
return _aligned_malloc(size, alignment);
|
||||
};
|
||||
// Might need memory pooling to cover allocation that
|
||||
// requires less than 4096 bytes.
|
||||
system_allocator_ =
|
||||
[&](size_t size, size_t alignment,
|
||||
MemoryRegion::AllocateFlags alloc_flags) -> void* {
|
||||
assert(alignment <= 4096);
|
||||
void* ptr = NULL;
|
||||
return (HSA_STATUS_SUCCESS ==
|
||||
core::Runtime::runtime_singleton_->AllocateMemory(
|
||||
system_regions_fine_[0], size, alloc_flags, &ptr))
|
||||
? ptr
|
||||
: NULL;
|
||||
};
|
||||
|
||||
system_deallocator_ = [](void* ptr) { _aligned_free(ptr); };
|
||||
} else {
|
||||
// Might need memory pooling to cover allocation that
|
||||
// requires less than 4096 bytes.
|
||||
system_allocator_ = [&](size_t size, size_t alignment) -> void * {
|
||||
assert(alignment <= 4096);
|
||||
void* ptr = NULL;
|
||||
return (HSA_STATUS_SUCCESS ==
|
||||
core::Runtime::runtime_singleton_->AllocateMemory(
|
||||
system_regions_fine_[0], size, &ptr))
|
||||
? ptr
|
||||
: NULL;
|
||||
};
|
||||
|
||||
system_deallocator_ = [](void* ptr) {
|
||||
core::Runtime::runtime_singleton_->FreeMemory(ptr);
|
||||
};
|
||||
}
|
||||
system_deallocator_ =
|
||||
[](void* ptr) { core::Runtime::runtime_singleton_->FreeMemory(ptr); };
|
||||
|
||||
BaseShared::SetAllocateAndFree(system_allocator_, system_deallocator_);
|
||||
}
|
||||
@@ -307,16 +300,9 @@ hsa_status_t Runtime::IterateAgent(hsa_status_t (*callback)(hsa_agent_t agent,
|
||||
}
|
||||
|
||||
hsa_status_t Runtime::AllocateMemory(const MemoryRegion* region, size_t size,
|
||||
void** ptr) {
|
||||
return AllocateMemory(false, region, size, ptr);
|
||||
}
|
||||
|
||||
hsa_status_t Runtime::AllocateMemory(bool restrict_access,
|
||||
const MemoryRegion* region, size_t size,
|
||||
MemoryRegion::AllocateFlags alloc_flags,
|
||||
void** address) {
|
||||
const amd::MemoryRegion* amd_region =
|
||||
reinterpret_cast<const amd::MemoryRegion*>(region);
|
||||
hsa_status_t status = amd_region->Allocate(restrict_access, size, address);
|
||||
hsa_status_t status = region->Allocate(size, alloc_flags, address);
|
||||
|
||||
// Track the allocation result so that it could be freed properly.
|
||||
if (status == HSA_STATUS_SUCCESS) {
|
||||
|
||||
Odkázat v novém úkolu
Zablokovat Uživatele