rocr/libhsakmt: Add coarse-grain allocator to GPU
[ROCm/ROCR-Runtime commit: adbc0495e2]
Этот коммит содержится в:
коммит произвёл
Yat Sin, David
родитель
945d6da90b
Коммит
3ebcf3020f
@@ -479,6 +479,18 @@ class GpuAgent : public GpuAgentInt {
|
||||
|
||||
const std::function<void(void*)>& finegrain_deallocator() const { return finegrain_deallocator_; }
|
||||
|
||||
/// @brief Allocate coarse grain device memory on this GPU agent.
|
||||
const std::function<void*(size_t size, core::MemoryRegion::AllocateFlags flags)>&
|
||||
coarsegrain_allocator() const {
|
||||
return coarsegrain_allocator_;
|
||||
}
|
||||
|
||||
/// @brief Deallocate memory allocated from the coarsegrain_allocator
|
||||
/// on this GPU agent.
|
||||
const std::function<void(void*)>& coarsegrain_deallocator() const {
|
||||
return coarsegrain_deallocator_;
|
||||
}
|
||||
|
||||
protected:
|
||||
// Sizes are in packets.
|
||||
const uint32_t minAqlSize_ = 0x40; // 4KB min
|
||||
@@ -731,16 +743,19 @@ class GpuAgent : public GpuAgentInt {
|
||||
|
||||
ScratchCache scratch_cache_;
|
||||
|
||||
// System memory allocator in the nearest NUMA node.
|
||||
/// @brief System memory allocator in the nearest NUMA node.
|
||||
std::function<void*(size_t size, size_t align, core::MemoryRegion::AllocateFlags flags)>
|
||||
system_allocator_;
|
||||
|
||||
/// @brief System memory deallocator in the nearest NUMA node.
|
||||
std::function<void(void*)> system_deallocator_;
|
||||
|
||||
// Fine grain allocator on this device
|
||||
/// @brief Fine-grain allocator on this GPU.
|
||||
std::function<void*(size_t size, core::MemoryRegion::AllocateFlags flags)> finegrain_allocator_;
|
||||
|
||||
/// @brief Fine-grain deallocator on this GPU.
|
||||
std::function<void(void*)> finegrain_deallocator_;
|
||||
/// @brief Coarse-grain allocator on this GPU.
|
||||
std::function<void*(size_t size, core::MemoryRegion::AllocateFlags flags)> coarsegrain_allocator_;
|
||||
/// @brief Coarse-grain deallocator on this GPU.
|
||||
std::function<void(void*)> coarsegrain_deallocator_;
|
||||
|
||||
void* trap_handler_tma_region_;
|
||||
|
||||
|
||||
@@ -2456,25 +2456,32 @@ void GpuAgent::InitAllocators() {
|
||||
}
|
||||
assert(system_allocator_ && "Nearest NUMA node did not have a kernarg pool.");
|
||||
|
||||
// Setup fine-grain allocator
|
||||
// Setup this GPU's fine-grain and coarse-grain allocators.
|
||||
for (auto region : regions()) {
|
||||
const AMD::MemoryRegion* amd_region = (const AMD::MemoryRegion*)region;
|
||||
if (amd_region->IsLocalMemory() && amd_region->fine_grain()) {
|
||||
finegrain_allocator_ = [region](size_t size,
|
||||
MemoryRegion::AllocateFlags alloc_flags) -> void* {
|
||||
void* ptr = nullptr;
|
||||
return (HSA_STATUS_SUCCESS ==
|
||||
core::Runtime::runtime_singleton_->AllocateMemory(region, size, alloc_flags, &ptr))
|
||||
? ptr
|
||||
: nullptr;
|
||||
};
|
||||
const AMD::MemoryRegion* amd_region = static_cast<const AMD::MemoryRegion*>(region);
|
||||
|
||||
finegrain_deallocator_ = [](void* ptr) {
|
||||
core::Runtime::runtime_singleton_->FreeMemory(ptr);
|
||||
};
|
||||
auto region_allocator = [region](size_t size,
|
||||
MemoryRegion::AllocateFlags alloc_flags) -> void* {
|
||||
void* ptr = nullptr;
|
||||
return (HSA_STATUS_SUCCESS ==
|
||||
core::Runtime::runtime_singleton_->AllocateMemory(region, size, alloc_flags, &ptr))
|
||||
? ptr
|
||||
: nullptr;
|
||||
};
|
||||
|
||||
auto region_deallocator = [](void* ptr) { core::Runtime::runtime_singleton_->FreeMemory(ptr); };
|
||||
|
||||
if (amd_region->IsLocalMemory() && amd_region->fine_grain()) {
|
||||
finegrain_allocator_ = region_allocator;
|
||||
finegrain_deallocator_ = region_deallocator;
|
||||
} else if (amd_region->IsLocalMemory() &&
|
||||
!(amd_region->fine_grain() || amd_region->extended_scope_fine_grain())) {
|
||||
coarsegrain_allocator_ = region_allocator;
|
||||
coarsegrain_deallocator_ = region_deallocator;
|
||||
}
|
||||
}
|
||||
assert(finegrain_deallocator_ && "Agent does not have a fine-grain allocator");
|
||||
assert(finegrain_allocator_ && "GPU agent does not have a fine-grain allocator");
|
||||
assert(coarsegrain_allocator_ && "GPU agent does not have a coarse-grain allocator");
|
||||
}
|
||||
|
||||
core::Agent* GpuAgent::GetNearestCpuAgent() const {
|
||||
|
||||
Ссылка в новой задаче
Block a user