44b6b6813d
Change-Id: I94acbca606fd4c575e2e1a9e34959ce650571867
528 lines
19 KiB
C++
528 lines
19 KiB
C++
/* Copyright (c) 2022-2023 Advanced Micro Devices, Inc.
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
of this software and associated documentation files (the "Software"), to deal
|
|
in the Software without restriction, including without limitation the rights
|
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
copies of the Software, and to permit persons to whom the Software is
|
|
furnished to do so, subject to the following conditions:
|
|
|
|
The above copyright notice and this permission notice shall be included in
|
|
all copies or substantial portions of the Software.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
THE SOFTWARE. */
|
|
|
|
#include "hip_mempool_impl.hpp"
|
|
#include "hip_vm.hpp"
|
|
#include "platform/command.hpp"
|
|
|
|
namespace hip {
|
|
|
|
// ================================================================================================
|
|
void Heap::AddMemory(amd::Memory* memory, Stream* stream) {
|
|
auto mem_size = memory->getSize();
|
|
allocations_.insert({{mem_size, memory}, {stream}});
|
|
total_size_ += mem_size;
|
|
max_total_size_ = std::max(max_total_size_, total_size_);
|
|
}
|
|
|
|
// ================================================================================================
|
|
void Heap::AddMemory(amd::Memory* memory, const MemoryTimestamp& ts) {
|
|
auto mem_size = memory->getSize();
|
|
allocations_.insert({{mem_size, memory}, ts});
|
|
total_size_ += mem_size;
|
|
max_total_size_ = std::max(max_total_size_, total_size_);
|
|
}
|
|
|
|
// ================================================================================================
|
|
amd::Memory* Heap::FindMemory(size_t size, Stream* stream, bool opportunistic,
|
|
void* dptr, MemoryTimestamp* ts) {
|
|
amd::Memory* memory = nullptr;
|
|
auto start = allocations_.lower_bound({size, nullptr});
|
|
for (auto it = start; it != allocations_.end();) {
|
|
bool check_address = (dptr == nullptr);
|
|
if (it->first.second->getSvmPtr() == dptr) {
|
|
// If the search is done for the specified address then runtime must wait
|
|
it->second.Wait();
|
|
check_address = true;
|
|
}
|
|
// Runtime can accept an allocation with 12.5% on the size threshold
|
|
if (it->first.first > (size / 8.0) * 9) {
|
|
return nullptr;
|
|
}
|
|
// Check if size can match and it's safe to use this resource.
|
|
if (check_address && (it->second.IsSafeFind(stream, opportunistic))) {
|
|
memory = it->first.second;
|
|
total_size_ -= memory->getSize();
|
|
// Preserve event, since the logic could skip GPU wait on reuse
|
|
ts->event_ = it->second.event_;
|
|
// Remove found allocation from the map
|
|
it = allocations_.erase(it);
|
|
break;
|
|
} else {
|
|
++it;
|
|
}
|
|
}
|
|
return memory;
|
|
}
|
|
|
|
// ================================================================================================
|
|
bool Heap::RemoveMemory(amd::Memory* memory, MemoryTimestamp* ts) {
|
|
auto mem_size = memory->getSize();
|
|
if (auto it = allocations_.find({mem_size, memory}); it != allocations_.end()) {
|
|
if (ts != nullptr) {
|
|
// Preserve timestamp info for possible reuse later
|
|
*ts = it->second;
|
|
} else {
|
|
it->second.SetEvent(nullptr);
|
|
}
|
|
total_size_ -= mem_size;
|
|
allocations_.erase(it);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// ================================================================================================
|
|
Heap::SortedMap::iterator Heap::EraseAllocaton(Heap::SortedMap::iterator& it) {
|
|
auto memory = it->first.second;
|
|
const device::Memory* dev_mem = memory->getDeviceMemory(*device_->devices()[0]);
|
|
void* dev_mem_vaddr = reinterpret_cast<void*>(dev_mem->virtualAddress());
|
|
total_size_ -= it->first.first;
|
|
|
|
if (dev_mem_vaddr != nullptr) {
|
|
amd::SvmBuffer::free(memory->getContext(), dev_mem_vaddr);
|
|
} else {
|
|
amd::SvmBuffer::free(memory->getContext(), memory->getSvmPtr());
|
|
}
|
|
// Clear HIP event
|
|
it->second.SetEvent(nullptr);
|
|
// Remove the allocation from the map
|
|
return allocations_.erase(it);
|
|
}
|
|
|
|
// ================================================================================================
|
|
bool Heap::ReleaseAllMemory(size_t min_bytes_to_hold, bool safe_release) {
|
|
for (auto it = allocations_.begin(); it != allocations_.end();) {
|
|
// Make sure the heap is smaller than the minimum value to hold
|
|
if (total_size_ <= min_bytes_to_hold) {
|
|
return true;
|
|
}
|
|
// Safe release forces unconditional wait for memory
|
|
if (safe_release) {
|
|
it->second.Wait();
|
|
}
|
|
if (it->second.IsSafeRelease()) {
|
|
it = EraseAllocaton(it);
|
|
} else {
|
|
++it;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// ================================================================================================
|
|
bool Heap::ReleaseAllMemory() {
|
|
for (auto it = allocations_.begin(); it != allocations_.end();) {
|
|
// Make sure the heap holds the minimum number of bytes
|
|
if (total_size_ <= release_threshold_) {
|
|
return true;
|
|
}
|
|
if (it->second.IsSafeRelease()) {
|
|
it = EraseAllocaton(it);
|
|
} else {
|
|
++it;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// ================================================================================================
|
|
void Heap::RemoveStream(Stream* stream) {
|
|
for (auto it : allocations_) {
|
|
it.second.safe_streams_.erase(stream);
|
|
}
|
|
}
|
|
|
|
// ================================================================================================
|
|
void Heap::SetAccess(hip::Device* device, bool enable) {
|
|
for (const auto& it : allocations_) {
|
|
auto peer_device = device->asContext()->devices()[0];
|
|
device::Memory* mem = it.first.second->getDeviceMemory(*peer_device);
|
|
if (mem != nullptr) {
|
|
if (!mem->getAllowedPeerAccess() && enable) {
|
|
// Enable p2p access for the specified device
|
|
peer_device->allowPeerAccess(mem);
|
|
mem->setAllowedPeerAccess(true);
|
|
} else if (mem->getAllowedPeerAccess() && !enable) {
|
|
mem->setAllowedPeerAccess(false);
|
|
}
|
|
} else {
|
|
LogError("Couldn't find device memory for P2P access");
|
|
}
|
|
}
|
|
}
|
|
|
|
// ================================================================================================
|
|
void* MemoryPool::AllocateMemory(size_t size, Stream* stream, void* dptr) {
|
|
amd::ScopedLock lock(lock_pool_ops_);
|
|
|
|
void* dev_ptr = nullptr;
|
|
MemoryTimestamp ts;
|
|
amd::Memory* memory = free_heap_.FindMemory(size, stream, Opportunistic(), dptr, &ts);
|
|
if (memory == nullptr) {
|
|
if (Properties().maxSize != 0 && (max_total_size_ + size) > Properties().maxSize) {
|
|
return nullptr;
|
|
}
|
|
amd::Context* context = device_->asContext();
|
|
const auto& dev_info = context->devices()[0]->info();
|
|
if (dev_info.maxMemAllocSize_ < size) {
|
|
return nullptr;
|
|
}
|
|
cl_svm_mem_flags flags = (state_.interprocess_) ? ROCCLR_MEM_INTERPROCESS : 0;
|
|
flags |= (state_.phys_mem_) ? ROCCLR_MEM_PHYMEM : 0;
|
|
dev_ptr = amd::SvmBuffer::malloc(*context, flags, size, dev_info.memBaseAddrAlign_, nullptr);
|
|
if (dev_ptr == nullptr) {
|
|
size_t free = 0, total =0;
|
|
hipError_t err = hipMemGetInfo(&free, &total);
|
|
if (err == hipSuccess) {
|
|
LogPrintfError("Allocation failed : Device memory : required :%zu | free :%zu | total :%zu",
|
|
size, free, total);
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
size_t offset = 0;
|
|
memory = getMemoryObject(dev_ptr, offset);
|
|
// Saves the current device id so that it can be accessed later
|
|
memory->getUserData().deviceId = device_->deviceId();
|
|
|
|
// Update access for the new allocation from other devices
|
|
for (const auto& it : access_map_) {
|
|
auto vdi_device = it.first->asContext()->devices()[0];
|
|
device::Memory* mem = memory->getDeviceMemory(*vdi_device);
|
|
if ((mem != nullptr) && (it.second != hipMemAccessFlagsProtNone)) {
|
|
vdi_device->allowPeerAccess(mem);
|
|
mem->setAllowedPeerAccess(true);
|
|
}
|
|
}
|
|
} else {
|
|
dev_ptr = memory->getSvmPtr();
|
|
if (!amd::MemObjMap::FindMemObj(dev_ptr))
|
|
amd::MemObjMap::AddMemObj(dev_ptr, memory);
|
|
}
|
|
// Place the allocated memory into the busy heap
|
|
ts.AddSafeStream(stream);
|
|
busy_heap_.AddMemory(memory, ts);
|
|
|
|
max_total_size_ = std::max(max_total_size_, busy_heap_.GetTotalSize() +
|
|
free_heap_.GetTotalSize());
|
|
// Increment the reference counter on the pool
|
|
retain();
|
|
|
|
ClPrint(amd::LOG_INFO, amd::LOG_MEM_POOL, "Pool AllocMem: %p, %p", memory->getSvmPtr(), memory);
|
|
|
|
return dev_ptr;
|
|
}
|
|
|
|
// ================================================================================================
|
|
bool MemoryPool::FreeMemory(amd::Memory* memory, Stream* stream, Event* event) {
|
|
{
|
|
amd::ScopedLock lock(lock_pool_ops_);
|
|
|
|
if (memory->getUserData().phys_mem_obj != nullptr) {
|
|
memory = memory->getUserData().phys_mem_obj;
|
|
}
|
|
|
|
// If the free heap grows over the busy heap, then force release
|
|
if (AMD_DIRECT_DISPATCH && (free_heap_.GetTotalSize() > busy_heap_.GetTotalSize())) {
|
|
// Use event base release to reduce memory pressure
|
|
constexpr size_t kBytesToHold = 0;
|
|
free_heap_.ReleaseAllMemory(kBytesToHold);
|
|
|
|
// If free mmeory is less than 12.5% of total, then force wait release
|
|
size_t free = 0;
|
|
size_t total = 0;
|
|
hipError_t err = hipMemGetInfo(&free, &total);
|
|
if ((err == hipSuccess) && (free < (total >> 3))) {
|
|
constexpr bool kSafeRelease = true;
|
|
free_heap_.ReleaseAllMemory(free_heap_.GetTotalSize() >> 1, kSafeRelease);
|
|
}
|
|
}
|
|
|
|
MemoryTimestamp ts;
|
|
// Remove memory object from the busy pool
|
|
if (!busy_heap_.RemoveMemory(memory, &ts)) {
|
|
// This pool doesn't contain memory
|
|
return false;
|
|
}
|
|
ClPrint(amd::LOG_INFO, amd::LOG_MEM_POOL, "Pool FreeMem: %p, %p", memory->getSvmPtr(), memory);
|
|
|
|
if (memory->getUserData().vaddr_mem_obj != nullptr) {
|
|
auto va_mem = memory->getUserData().vaddr_mem_obj;
|
|
if (stream == nullptr) {
|
|
stream = g_devices[memory->getUserData().deviceId]->NullStream();
|
|
}
|
|
// Unmap virtual address from memory
|
|
auto cmd = new amd::VirtualMapCommand(*stream, amd::Command::EventWaitList{},
|
|
va_mem->getSvmPtr(), va_mem->getSize(), nullptr);
|
|
cmd->enqueue();
|
|
cmd->release();
|
|
}
|
|
|
|
if (stream != nullptr) {
|
|
// The stream of destruction is a safe stream, because the app must handle sync
|
|
ts.AddSafeStream(stream);
|
|
|
|
if (event == nullptr) {
|
|
// Add a marker to the stream to trace availability of this memory
|
|
Event* e = new hip::Event(0);
|
|
if (e != nullptr) {
|
|
if (hipSuccess == e->addMarker(reinterpret_cast<hipStream_t>(stream), nullptr, true)) {
|
|
ts.SetEvent(e);
|
|
// Make sure runtime sends a notification
|
|
auto result = e->ready();
|
|
}
|
|
}
|
|
} else {
|
|
ts.SetEvent(event);
|
|
}
|
|
} else {
|
|
// Assume a safe release from hipFree() if stream is nullptr
|
|
ts.SetEvent(nullptr);
|
|
}
|
|
free_heap_.AddMemory(memory, ts);
|
|
}
|
|
|
|
// Decrement the reference counter on the pool.
|
|
// Note: It may delete memory pool for the last allocation. Thus, the scope lock can't include
|
|
// this call.
|
|
release();
|
|
|
|
return true;
|
|
}
|
|
|
|
// ================================================================================================
|
|
void MemoryPool::ReleaseAllMemory() {
|
|
constexpr bool kSafeRelease = true;
|
|
free_heap_.ReleaseAllMemory(0, kSafeRelease);
|
|
busy_heap_.ReleaseAllMemory(0, kSafeRelease);
|
|
}
|
|
|
|
// ================================================================================================
|
|
void MemoryPool::ReleaseFreedMemory() {
|
|
amd::ScopedLock lock(lock_pool_ops_);
|
|
|
|
free_heap_.ReleaseAllMemory();
|
|
}
|
|
|
|
// ================================================================================================
|
|
void MemoryPool::RemoveStream(Stream* stream) {
|
|
amd::ScopedLock lock(lock_pool_ops_);
|
|
|
|
free_heap_.RemoveStream(stream);
|
|
}
|
|
|
|
// ================================================================================================
|
|
void MemoryPool::TrimTo(size_t min_bytes_to_hold) {
|
|
amd::ScopedLock lock(lock_pool_ops_);
|
|
|
|
free_heap_.ReleaseAllMemory(min_bytes_to_hold);
|
|
}
|
|
|
|
// ================================================================================================
|
|
hipError_t MemoryPool::SetAttribute(hipMemPoolAttr attr, void* value) {
|
|
amd::ScopedLock lock(lock_pool_ops_);
|
|
uint64_t reset;
|
|
|
|
switch (attr) {
|
|
case hipMemPoolReuseFollowEventDependencies:
|
|
// Enable/disable HIP events tracking from the app's dependencies
|
|
state_.event_dependencies_ = *reinterpret_cast<int32_t*>(value);
|
|
break;
|
|
case hipMemPoolReuseAllowOpportunistic:
|
|
// Enable/disable HIP event check for freed memory
|
|
state_.opportunistic_ = *reinterpret_cast<int32_t*>(value);
|
|
break;
|
|
case hipMemPoolReuseAllowInternalDependencies:
|
|
// Enable/disable internal extra dependencies introduced in runtime
|
|
state_.internal_dependencies_ = *reinterpret_cast<int32_t*>(value);
|
|
break;
|
|
case hipMemPoolAttrReleaseThreshold:
|
|
free_heap_.SetReleaseThreshold(*reinterpret_cast<uint64_t*>(value));
|
|
break;
|
|
case hipMemPoolAttrReservedMemCurrent:
|
|
// Should be GetAttribute only
|
|
return hipErrorInvalidValue;
|
|
break;
|
|
case hipMemPoolAttrReservedMemHigh:
|
|
reset = *reinterpret_cast<uint64_t*>(value);
|
|
// Only 0 is accepted
|
|
if (reset != 0) {
|
|
return hipErrorInvalidValue;
|
|
}
|
|
max_total_size_ = reset;
|
|
break;
|
|
case hipMemPoolAttrUsedMemCurrent:
|
|
// Should be GetAttribute only
|
|
return hipErrorInvalidValue;
|
|
break;
|
|
case hipMemPoolAttrUsedMemHigh:
|
|
reset = *reinterpret_cast<uint64_t*>(value);
|
|
// Only 0 is accepted
|
|
if (reset != 0) {
|
|
return hipErrorInvalidValue;
|
|
}
|
|
busy_heap_.SetMaxTotalSize(reset);
|
|
break;
|
|
default:
|
|
return hipErrorInvalidValue;
|
|
}
|
|
return hipSuccess;
|
|
}
|
|
|
|
// ================================================================================================
|
|
hipError_t MemoryPool::GetAttribute(hipMemPoolAttr attr, void* value) {
|
|
amd::ScopedLock lock(lock_pool_ops_);
|
|
|
|
switch (attr) {
|
|
case hipMemPoolReuseFollowEventDependencies:
|
|
// Enable/disable HIP events tracking from the app's dependencies
|
|
*reinterpret_cast<int32_t*>(value) = EventDependencies();
|
|
break;
|
|
case hipMemPoolReuseAllowOpportunistic:
|
|
// Enable/disable HIP event check for freed memory
|
|
*reinterpret_cast<int32_t*>(value) = Opportunistic();
|
|
break;
|
|
case hipMemPoolReuseAllowInternalDependencies:
|
|
// Enable/disable internal extra dependencies introduced in runtime
|
|
*reinterpret_cast<int32_t*>(value) = InternalDependencies();
|
|
break;
|
|
case hipMemPoolAttrReleaseThreshold:
|
|
*reinterpret_cast<uint64_t*>(value) = free_heap_.GetReleaseThreshold();
|
|
break;
|
|
case hipMemPoolAttrReservedMemCurrent:
|
|
// All allocate memory by the pool in OS
|
|
*reinterpret_cast<uint64_t*>(value) = busy_heap_.GetTotalSize() + free_heap_.GetTotalSize();
|
|
break;
|
|
case hipMemPoolAttrReservedMemHigh:
|
|
// High watermark of all allocated memory in OS, since the last reset
|
|
*reinterpret_cast<uint64_t*>(value) = max_total_size_;
|
|
break;
|
|
case hipMemPoolAttrUsedMemCurrent:
|
|
// Total currently used memory by the pool
|
|
*reinterpret_cast<uint64_t*>(value) = busy_heap_.GetTotalSize();
|
|
break;
|
|
case hipMemPoolAttrUsedMemHigh:
|
|
// High watermark of all used memoryS, since the last reset
|
|
*reinterpret_cast<uint64_t*>(value) = busy_heap_.GetMaxTotalSize();
|
|
break;
|
|
default:
|
|
return hipErrorInvalidValue;
|
|
}
|
|
return hipSuccess;
|
|
}
|
|
|
|
// ================================================================================================
|
|
void MemoryPool::SetAccess(hip::Device* device, hipMemAccessFlags flags) {
|
|
amd::ScopedLock lock(lock_pool_ops_);
|
|
|
|
// Check if the requested device is the pool device where memory was allocated
|
|
if (device == device_) {
|
|
return;
|
|
}
|
|
|
|
hipMemAccessFlags current_flags = hipMemAccessFlagsProtNone;
|
|
|
|
// Check if access was enabled before
|
|
if (access_map_.find(device) != access_map_.end()) {
|
|
current_flags = access_map_[device];
|
|
}
|
|
|
|
if (current_flags != flags) {
|
|
bool enable_access = false;
|
|
// Save the access state in the device map
|
|
access_map_[device] = flags;
|
|
// Check if access is enabled
|
|
if ((flags == hipMemAccessFlagsProtRead) || (flags == hipMemAccessFlagsProtReadWrite)) {
|
|
enable_access = true;
|
|
}
|
|
// Update device access on the both pools
|
|
busy_heap_.SetAccess(device, enable_access);
|
|
free_heap_.SetAccess(device, enable_access);
|
|
}
|
|
}
|
|
|
|
// ================================================================================================
|
|
void MemoryPool::GetAccess(hip::Device* device, hipMemAccessFlags* flags) {
|
|
amd::ScopedLock lock(lock_pool_ops_);
|
|
|
|
// Current pool device has full access to memory allocation
|
|
*flags = (device == device_) ? hipMemAccessFlagsProtReadWrite : hipMemAccessFlagsProtNone;
|
|
|
|
// Check if access was enabled before
|
|
if (access_map_.find(device) != access_map_.end()) {
|
|
*flags = access_map_[device];
|
|
}
|
|
}
|
|
|
|
// ================================================================================================
|
|
void MemoryPool::FreeAllMemory(Stream* stream) {
|
|
while (!busy_heap_.Allocations().empty()) {
|
|
FreeMemory(busy_heap_.Allocations().begin()->first.second, stream);
|
|
}
|
|
}
|
|
|
|
// ================================================================================================
|
|
amd::Os::FileDesc MemoryPool::Export() {
|
|
amd::ScopedLock lock(lock_pool_ops_);
|
|
if (shared_ != nullptr) {
|
|
return shared_->handle_;
|
|
}
|
|
|
|
constexpr uint32_t kFileNameSize = 20;
|
|
char file_name[kFileNameSize];
|
|
// Generate a unique name from the mempool pointer
|
|
// Note: Windows can accept an unnamed allocation
|
|
snprintf(file_name, kFileNameSize, "%p", this);
|
|
amd::Os::FileDesc handle{};
|
|
shared_ = reinterpret_cast<SharedMemPool*>(amd::Os::CreateIpcMemory(
|
|
file_name, sizeof(SharedMemPool), &handle));
|
|
if (shared_ != nullptr) {
|
|
shared_->handle_ = handle;
|
|
shared_->state_ = state_.value_;
|
|
shared_->access_size_ = 0;
|
|
memset(shared_->access_, 0, sizeof(SharedAccess) * kMaxMgpuAccess);
|
|
assert((access_map_.size() <= kMaxMgpuAccess) && "Can't support more GPU(s) in shared access" );
|
|
for (auto it : access_map_) {
|
|
shared_->access_[shared_->access_size_] = SharedAccess{it.first->deviceId(), it.second};
|
|
shared_->access_size_++;
|
|
}
|
|
}
|
|
return handle;
|
|
}
|
|
|
|
// ================================================================================================
|
|
bool MemoryPool::Import(amd::Os::FileDesc handle) {
|
|
amd::ScopedLock lock(lock_pool_ops_);
|
|
bool result = false;
|
|
auto shared = reinterpret_cast<SharedMemPool*>(
|
|
amd::Os::OpenIpcMemory(nullptr, handle, sizeof(SharedMemPool)));
|
|
|
|
if (shared != nullptr) {
|
|
state_.value_ = shared->state_;
|
|
for (uint32_t i = 0; i < shared->access_size_; ++i) {
|
|
access_map_[g_devices[shared->access_[i].device_id_]] = shared->access_[i].flags_;
|
|
}
|
|
result = true;
|
|
}
|
|
return result;
|
|
}
|
|
}
|