diff --git a/hipamd/src/hip_mempool_impl.cpp b/hipamd/src/hip_mempool_impl.cpp index 6c6b98ae56..851836039f 100644 --- a/hipamd/src/hip_mempool_impl.cpp +++ b/hipamd/src/hip_mempool_impl.cpp @@ -41,7 +41,8 @@ void Heap::AddMemory(amd::Memory* memory, const MemoryTimestamp& ts) { } // ================================================================================================ -amd::Memory* Heap::FindMemory(size_t size, Stream* stream, bool opportunistic, void* dptr) { +amd::Memory* Heap::FindMemory(size_t size, Stream* stream, bool opportunistic, + void* dptr, MemoryTimestamp* ts) { amd::Memory* memory = nullptr; auto start = allocations_.lower_bound({size, nullptr}); for (auto it = start; it != allocations_.end();) { @@ -61,6 +62,8 @@ amd::Memory* Heap::FindMemory(size_t size, Stream* stream, bool opportunistic, v if (check_address && (it->second.IsSafeFind(stream, opp_mode))) { memory = it->first.second; total_size_ -= memory->getSize(); + // Preserve event, since the logic could skip GPU wait on reuse + ts->event_ = it->second.event_; // Remove found allocation from the map it = allocations_.erase(it); break; @@ -79,8 +82,6 @@ bool Heap::RemoveMemory(amd::Memory* memory, MemoryTimestamp* ts) { // Preserve timestamp info for possible reuse later *ts = it->second; } else { - // Runtime will delete the timestamp object, hence make sure HIP event is released - it->second.Wait(); it->second.SetEvent(nullptr); } total_size_ -= mem_size; @@ -169,7 +170,8 @@ void* MemoryPool::AllocateMemory(size_t size, Stream* stream, void* dptr) { amd::ScopedLock lock(lock_pool_ops_); void* dev_ptr = nullptr; - amd::Memory* memory = free_heap_.FindMemory(size, stream, Opportunistic(), dptr); + MemoryTimestamp ts; + amd::Memory* memory = free_heap_.FindMemory(size, stream, Opportunistic(), dptr, &ts); if (memory == nullptr) { if (Properties().maxSize != 0 && (max_total_size_ + size) > Properties().maxSize) { return nullptr; @@ -207,12 +209,12 @@ void* MemoryPool::AllocateMemory(size_t size, Stream* stream, void* dptr) { } } } else { - free_heap_.RemoveMemory(memory); const device::Memory* dev_mem = memory->getDeviceMemory(*device_->devices()[0]); dev_ptr = reinterpret_cast(dev_mem->virtualAddress()); } // Place the allocated memory into the busy heap - busy_heap_.AddMemory(memory, stream); + ts.AddSafeStream(stream); + busy_heap_.AddMemory(memory, ts); max_total_size_ = std::max(max_total_size_, busy_heap_.GetTotalSize() + free_heap_.GetTotalSize()); diff --git a/hipamd/src/hip_mempool_impl.hpp b/hipamd/src/hip_mempool_impl.hpp index 0662d1dc96..e397ceb79b 100644 --- a/hipamd/src/hip_mempool_impl.hpp +++ b/hipamd/src/hip_mempool_impl.hpp @@ -38,13 +38,11 @@ struct SharedMemPointer { }; struct MemoryTimestamp { - MemoryTimestamp(hip::Stream* stream): event_(nullptr) { + MemoryTimestamp(hip::Stream* stream = nullptr) { if (stream != nullptr) { safe_streams_.insert(stream); } } - MemoryTimestamp(): event_(nullptr) {} - /// Adds a safe stream to the list of stream for possible reuse void AddSafeStream(Stream* event_stream, Stream* wait_stream = nullptr) { if (wait_stream == nullptr) { @@ -59,6 +57,8 @@ struct MemoryTimestamp { } /// Changes last known valid event asociated with memory void SetEvent(hip::Event* event) { + // Runtime will delete the HIP event, hence make sure GPU is done with it + Wait(); delete event_; event_ = event; } @@ -94,7 +94,7 @@ struct MemoryTimestamp { } std::unordered_set safe_streams_; //!< Safe streams for memory reuse - hip::Event* event_; //!< Last known HIP event, associated with the memory object + hip::Event* event_ = nullptr; //!< Last known HIP event, associated with the memory object }; class Heap : public amd::EmbeddedObject { @@ -112,7 +112,8 @@ public: void AddMemory(amd::Memory* memory, const MemoryTimestamp& ts); /// Finds memory object with the specified size - amd::Memory* FindMemory(size_t size, Stream* stream, bool opportunistic, void* dptr = nullptr); + amd::Memory* FindMemory(size_t size, Stream* stream, bool opportunistic, + void* dptr, MemoryTimestamp* ts); /// Removes allocation from the map bool RemoveMemory(amd::Memory* memory, MemoryTimestamp* ts = nullptr); @@ -157,7 +158,6 @@ public: } } - /// Checks if memory belongs to this heap bool IsActiveMemory(amd::Memory* memory) const { return (allocations_.find({memory->getSize(), memory}) != allocations_.end());