Create fine-grained allocator
Create allocator helper function to provide fine-grained memory on a specific agent. Change-Id: I32ba9aceb9c9dc708b140a0c45158e6e7a018844
This commit is contained in:
@@ -411,6 +411,13 @@ class GpuAgent : public GpuAgentInt {
|
||||
|
||||
const std::function<void(void*)>& system_deallocator() const { return system_deallocator_; }
|
||||
|
||||
const std::function<void*(size_t size, core::MemoryRegion::AllocateFlags flags)>&
|
||||
finegrain_allocator() const {
|
||||
return finegrain_allocator_;
|
||||
}
|
||||
|
||||
const std::function<void(void*)>& finegrain_deallocator() const { return finegrain_deallocator_; }
|
||||
|
||||
protected:
|
||||
// Sizes are in packets.
|
||||
static const uint32_t minAqlSize_ = 0x40; // 4KB min
|
||||
@@ -581,8 +588,8 @@ class GpuAgent : public GpuAgentInt {
|
||||
// @brief Setup GWS accessing queue.
|
||||
void InitGWS();
|
||||
|
||||
// @brief Setup NUMA aware system memory allocator.
|
||||
void InitNumaAllocator();
|
||||
// @brief Set-up memory allocators
|
||||
void InitAllocators();
|
||||
|
||||
// @brief Initialize scratch handler thresholds
|
||||
void InitAsyncScratchThresholds();
|
||||
@@ -657,6 +664,10 @@ class GpuAgent : public GpuAgentInt {
|
||||
|
||||
std::function<void(void*)> system_deallocator_;
|
||||
|
||||
// Fine grain allocator on this device
|
||||
std::function<void*(size_t size, core::MemoryRegion::AllocateFlags flags)> finegrain_allocator_;
|
||||
|
||||
std::function<void(void*)> finegrain_deallocator_;
|
||||
// @brief device handle
|
||||
amdgpu_device_handle ldrm_dev_;
|
||||
|
||||
|
||||
@@ -96,7 +96,7 @@ class MemoryRegion : public core::MemoryRegion {
|
||||
static void MakeKfdMemoryUnresident(const void* ptr);
|
||||
|
||||
MemoryRegion(bool fine_grain, bool kernarg, bool full_profile, bool extended_scope_fine_grain,
|
||||
core::Agent* owner, const HsaMemoryProperties& mem_props);
|
||||
bool user_visible, core::Agent* owner, const HsaMemoryProperties& mem_props);
|
||||
|
||||
~MemoryRegion();
|
||||
|
||||
|
||||
@@ -58,11 +58,12 @@ class Agent;
|
||||
class MemoryRegion : public Checked<0x9C961F19EE175BB3> {
|
||||
public:
|
||||
MemoryRegion(bool fine_grain, bool kernarg, bool full_profile, bool extended_scope_fine_grain,
|
||||
core::Agent* owner)
|
||||
bool user_visible, core::Agent* owner)
|
||||
: fine_grain_(fine_grain),
|
||||
kernarg_(kernarg),
|
||||
full_profile_(full_profile),
|
||||
extended_scope_fine_grain_(extended_scope_fine_grain),
|
||||
user_visible_(user_visible),
|
||||
owner_(owner) {
|
||||
assert(owner_ != NULL);
|
||||
}
|
||||
@@ -132,6 +133,8 @@ class MemoryRegion : public Checked<0x9C961F19EE175BB3> {
|
||||
|
||||
__forceinline bool full_profile() const { return full_profile_; }
|
||||
|
||||
__forceinline bool user_visible() const { return user_visible_; }
|
||||
|
||||
__forceinline core::Agent* owner() const { return owner_; }
|
||||
|
||||
private:
|
||||
@@ -139,6 +142,8 @@ class MemoryRegion : public Checked<0x9C961F19EE175BB3> {
|
||||
const bool kernarg_;
|
||||
const bool full_profile_;
|
||||
const bool extended_scope_fine_grain_;
|
||||
const bool user_visible_;
|
||||
|
||||
core::Agent* owner_;
|
||||
};
|
||||
} // namespace core
|
||||
|
||||
@@ -85,15 +85,15 @@ void CpuAgent::InitRegionList() {
|
||||
if (system_prop != mem_props.end()) system_props = *system_prop;
|
||||
|
||||
MemoryRegion* system_region_fine =
|
||||
new MemoryRegion(true, false, is_apu_node, false, this, system_props);
|
||||
new MemoryRegion(true, false, is_apu_node, false, true, this, system_props);
|
||||
regions_.push_back(system_region_fine);
|
||||
MemoryRegion* system_region_kernarg =
|
||||
new MemoryRegion(true, true, is_apu_node, false, this, system_props);
|
||||
new MemoryRegion(true, true, is_apu_node, false, true, this, system_props);
|
||||
regions_.push_back(system_region_kernarg);
|
||||
|
||||
if (!is_apu_node) {
|
||||
MemoryRegion* system_region_coarse =
|
||||
new MemoryRegion(false, false, is_apu_node, false, this, system_props);
|
||||
new MemoryRegion(false, false, is_apu_node, false, true, this, system_props);
|
||||
regions_.push_back(system_region_coarse);
|
||||
}
|
||||
}
|
||||
@@ -152,6 +152,7 @@ hsa_status_t CpuAgent::VisitRegion(
|
||||
hsa_status_t (*callback)(hsa_region_t region, void* data),
|
||||
void* data) const {
|
||||
for (const core::MemoryRegion* region : regions) {
|
||||
if (!region->user_visible()) continue;
|
||||
hsa_region_t region_handle = core::MemoryRegion::Convert(region);
|
||||
hsa_status_t status = callback(region_handle, data);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
|
||||
@@ -448,19 +448,20 @@ void GpuAgent::InitRegionList() {
|
||||
case HSA_HEAPTYPE_GPU_LDS:
|
||||
case HSA_HEAPTYPE_GPU_SCRATCH: {
|
||||
MemoryRegion* region =
|
||||
new MemoryRegion(false, false, false, false, this, mem_props[mem_idx]);
|
||||
new MemoryRegion(false, false, false, false, true, this, mem_props[mem_idx]);
|
||||
|
||||
regions_.push_back(region);
|
||||
|
||||
if (region->IsLocalMemory()) {
|
||||
regions_.push_back(
|
||||
new MemoryRegion(false, false, false, true, this, mem_props[mem_idx]));
|
||||
new MemoryRegion(false, false, false, true, true, this, mem_props[mem_idx]));
|
||||
|
||||
// Expose VRAM as uncached/fine grain over PCIe (if enabled) or XGMI.
|
||||
if ((properties_.HiveID != 0) ||
|
||||
(core::Runtime::runtime_singleton_->flag().fine_grain_pcie())) {
|
||||
regions_.push_back(
|
||||
new MemoryRegion(true, false, false, false, this, mem_props[mem_idx]));
|
||||
}
|
||||
bool user_visible = (properties_.HiveID != 0) ||
|
||||
core::Runtime::runtime_singleton_->flag().fine_grain_pcie();
|
||||
|
||||
regions_.push_back(new MemoryRegion(true, false, false, false, user_visible, this,
|
||||
mem_props[mem_idx]));
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -650,6 +651,8 @@ hsa_status_t GpuAgent::VisitRegion(
|
||||
void* data) const {
|
||||
AMD::callback_t<decltype(callback)> call(callback);
|
||||
for (const core::MemoryRegion* region : regions) {
|
||||
if (!region->user_visible()) continue;
|
||||
|
||||
const AMD::MemoryRegion* amd_region =
|
||||
reinterpret_cast<const AMD::MemoryRegion*>(region);
|
||||
|
||||
@@ -850,7 +853,7 @@ void GpuAgent::PreloadBlits() {
|
||||
|
||||
hsa_status_t GpuAgent::PostToolsInit() {
|
||||
// Defer memory allocation until agents have been discovered.
|
||||
InitNumaAllocator();
|
||||
InitAllocators();
|
||||
InitScratchPool();
|
||||
BindTrapHandler();
|
||||
InitDma();
|
||||
@@ -2241,7 +2244,7 @@ void GpuAgent::Trim() {
|
||||
scratch_cache_.trim(false);
|
||||
}
|
||||
|
||||
void GpuAgent::InitNumaAllocator() {
|
||||
void GpuAgent::InitAllocators() {
|
||||
for (auto pool : GetNearestCpuAgent()->regions()) {
|
||||
if (pool->kernarg()) {
|
||||
system_allocator_ = [pool](size_t size, size_t alignment,
|
||||
@@ -2255,11 +2258,29 @@ void GpuAgent::InitNumaAllocator() {
|
||||
};
|
||||
|
||||
system_deallocator_ = [](void* ptr) { core::Runtime::runtime_singleton_->FreeMemory(ptr); };
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
assert(false && "Nearest NUMA node did not have a kernarg pool.");
|
||||
assert(system_allocator_ && "Nearest NUMA node did not have a kernarg pool.");
|
||||
|
||||
// Setup fine-grain allocator
|
||||
for (auto region : regions()) {
|
||||
const AMD::MemoryRegion* amd_region = (const AMD::MemoryRegion*)region;
|
||||
if (amd_region->IsLocalMemory() && amd_region->fine_grain()) {
|
||||
finegrain_allocator_ = [region](size_t size,
|
||||
MemoryRegion::AllocateFlags alloc_flags) -> void* {
|
||||
void* ptr = nullptr;
|
||||
return (HSA_STATUS_SUCCESS ==
|
||||
core::Runtime::runtime_singleton_->AllocateMemory(region, size, alloc_flags, &ptr))
|
||||
? ptr
|
||||
: nullptr;
|
||||
};
|
||||
|
||||
finegrain_deallocator_ = [](void* ptr) {
|
||||
core::Runtime::runtime_singleton_->FreeMemory(ptr);
|
||||
};
|
||||
}
|
||||
}
|
||||
assert(finegrain_deallocator_ && "Agent does not have a fine-grain allocator");
|
||||
}
|
||||
|
||||
core::Agent* GpuAgent::GetNearestCpuAgent() const {
|
||||
|
||||
@@ -102,9 +102,10 @@ void MemoryRegion::MakeKfdMemoryUnresident(const void* ptr) {
|
||||
}
|
||||
|
||||
MemoryRegion::MemoryRegion(bool fine_grain, bool kernarg, bool full_profile,
|
||||
bool extended_scope_fine_grain, core::Agent* owner,
|
||||
bool extended_scope_fine_grain, bool user_visible, core::Agent* owner,
|
||||
const HsaMemoryProperties& mem_props)
|
||||
: core::MemoryRegion(fine_grain, kernarg, full_profile, extended_scope_fine_grain, owner),
|
||||
: core::MemoryRegion(fine_grain, kernarg, full_profile, extended_scope_fine_grain, user_visible,
|
||||
owner),
|
||||
mem_props_(mem_props),
|
||||
max_single_alloc_size_(0),
|
||||
virtual_size_(0),
|
||||
|
||||
مرجع در شماره جدید
Block a user