rocr/libhsakmt: Add coarse-grain allocator to GPU

[ROCm/ROCR-Runtime commit: adbc0495e2]
Этот коммит содержится в:
Tony Gutierrez
2025-02-18 14:02:37 -08:00
коммит произвёл Yat Sin, David
родитель 945d6da90b
Коммит 3ebcf3020f
2 изменённых файлов: 42 добавлений и 20 удалений
+20 -5
Просмотреть файл
@@ -479,6 +479,18 @@ class GpuAgent : public GpuAgentInt {
const std::function<void(void*)>& finegrain_deallocator() const { return finegrain_deallocator_; }
/// @brief Allocate coarse grain device memory on this GPU agent.
const std::function<void*(size_t size, core::MemoryRegion::AllocateFlags flags)>&
coarsegrain_allocator() const {
return coarsegrain_allocator_;
}
/// @brief Deallocate memory allocated from the coarsegrain_allocator
/// on this GPU agent.
const std::function<void(void*)>& coarsegrain_deallocator() const {
return coarsegrain_deallocator_;
}
protected:
// Sizes are in packets.
const uint32_t minAqlSize_ = 0x40; // 4KB min
@@ -731,16 +743,19 @@ class GpuAgent : public GpuAgentInt {
ScratchCache scratch_cache_;
// System memory allocator in the nearest NUMA node.
/// @brief System memory allocator in the nearest NUMA node.
std::function<void*(size_t size, size_t align, core::MemoryRegion::AllocateFlags flags)>
system_allocator_;
/// @brief System memory deallocator in the nearest NUMA node.
std::function<void(void*)> system_deallocator_;
// Fine grain allocator on this device
/// @brief Fine-grain allocator on this GPU.
std::function<void*(size_t size, core::MemoryRegion::AllocateFlags flags)> finegrain_allocator_;
/// @brief Fine-grain deallocator on this GPU.
std::function<void(void*)> finegrain_deallocator_;
/// @brief Coarse-grain allocator on this GPU.
std::function<void*(size_t size, core::MemoryRegion::AllocateFlags flags)> coarsegrain_allocator_;
/// @brief Coarse-grain deallocator on this GPU.
std::function<void(void*)> coarsegrain_deallocator_;
void* trap_handler_tma_region_;
+22 -15
Просмотреть файл
@@ -2456,25 +2456,32 @@ void GpuAgent::InitAllocators() {
}
assert(system_allocator_ && "Nearest NUMA node did not have a kernarg pool.");
// Setup fine-grain allocator
// Setup this GPU's fine-grain and coarse-grain allocators.
for (auto region : regions()) {
const AMD::MemoryRegion* amd_region = (const AMD::MemoryRegion*)region;
if (amd_region->IsLocalMemory() && amd_region->fine_grain()) {
finegrain_allocator_ = [region](size_t size,
MemoryRegion::AllocateFlags alloc_flags) -> void* {
void* ptr = nullptr;
return (HSA_STATUS_SUCCESS ==
core::Runtime::runtime_singleton_->AllocateMemory(region, size, alloc_flags, &ptr))
? ptr
: nullptr;
};
const AMD::MemoryRegion* amd_region = static_cast<const AMD::MemoryRegion*>(region);
finegrain_deallocator_ = [](void* ptr) {
core::Runtime::runtime_singleton_->FreeMemory(ptr);
};
auto region_allocator = [region](size_t size,
MemoryRegion::AllocateFlags alloc_flags) -> void* {
void* ptr = nullptr;
return (HSA_STATUS_SUCCESS ==
core::Runtime::runtime_singleton_->AllocateMemory(region, size, alloc_flags, &ptr))
? ptr
: nullptr;
};
auto region_deallocator = [](void* ptr) { core::Runtime::runtime_singleton_->FreeMemory(ptr); };
if (amd_region->IsLocalMemory() && amd_region->fine_grain()) {
finegrain_allocator_ = region_allocator;
finegrain_deallocator_ = region_deallocator;
} else if (amd_region->IsLocalMemory() &&
!(amd_region->fine_grain() || amd_region->extended_scope_fine_grain())) {
coarsegrain_allocator_ = region_allocator;
coarsegrain_deallocator_ = region_deallocator;
}
}
assert(finegrain_deallocator_ && "Agent does not have a fine-grain allocator");
assert(finegrain_allocator_ && "GPU agent does not have a fine-grain allocator");
assert(coarsegrain_allocator_ && "GPU agent does not have a coarse-grain allocator");
}
core::Agent* GpuAgent::GetNearestCpuAgent() const {