SWDEV-460242 - Add system memory suballocator

Switch commands creation to the new suballocator to avoid
frequent expensive OS calls

Change-Id: I3597c811820e577c15708bad8b8a41aa53acc400


[ROCm/clr commit: 5b0bfdcbad]
Этот коммит содержится в:
German Andryeyev
2024-05-03 17:06:10 -04:00
коммит произвёл Maneesh Gupta
родитель 3ca0dbc4d7
Коммит 68344576d3
3 изменённых файлов: 130 добавлений и 18 удалений
+13 -9
Просмотреть файл
@@ -1,4 +1,4 @@
/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc.
/* Copyright (c) 2008 - 2024 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@@ -18,14 +18,6 @@
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
/*!
* \file command.cpp
* \brief Definitions for Event, Command and HostQueue objects.
*
* \author Laurent Morichetti
* \date October 2008
*/
#include "platform/activity.hpp"
#include "platform/command.hpp"
#include "platform/commandqueue.hpp"
@@ -326,6 +318,18 @@ Command::Command(HostQueue& queue, cl_command_type type, const EventWaitList& ev
}
}
SysmemPool<ComputeCommand> Command::command_pool_;
// ================================================================================================
void Command::operator delete(void* ptr) {
command_pool_.Free(ptr);
}
// ================================================================================================
void* Command::operator new(size_t size) {
return command_pool_.Alloc(size);
}
// ================================================================================================
void Command::releaseResources() {
const Command::EventWaitList& events = eventWaitList();
+40 -8
Просмотреть файл
@@ -1,4 +1,4 @@
/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc.
/* Copyright (c) 2010 - 2024 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@@ -18,13 +18,6 @@
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
/*! \file command.hpp
* \brief Declarations for Event, Command and HostQueue objects.
*
* \author Laurent Morichetti
* \date October 2008
*/
#ifndef COMMAND_HPP_
#define COMMAND_HPP_
@@ -62,6 +55,7 @@ namespace amd {
class Command;
class HostQueue;
union ComputeCommand;
/*! \brief Encapsulates the status of a command.
*
@@ -254,6 +248,7 @@ union CopyMetadata {
*/
class Command : public Event {
private:
static SysmemPool<ComputeCommand> command_pool_; //!< Pool of active commands
HostQueue* queue_; //!< The command queue this command is enqueue into
Command* next_; //!< Next GPU command in the queue list
Command* batch_head_ = nullptr; //!< The head of the batch commands
@@ -297,6 +292,10 @@ class Command : public Event {
}
public:
//! Overload new/delete for fast commands allocation/destruction
void* operator new(size_t size);
void operator delete(void* ptr);
//! Return the queue this command is enqueued into.
HostQueue* queue() const { return queue_; }
@@ -1787,6 +1786,39 @@ public:
const void* ptr() const { return ptr_; }
};
//! Union used in memory suballocator, must be updated with the new commands
union ComputeCommand {
ReadMemoryCommand cmd0;
WriteMemoryCommand cmd1;
FillMemoryCommand cmd2;
CopyMemoryCommand cmd3;
MapMemoryCommand cmd4;
UnmapMemoryCommand cmd5;
MigrateMemObjectsCommand cmd6;
NDRangeKernelCommand cmd7;
NativeFnCommand cmd8;
ExternalSemaphoreCmd cmd9;
Marker cmd10;
AccumulateCommand cmd11;
AcquireExtObjectsCommand cmd13;
ReleaseExtObjectsCommand cmd14;
PerfCounterCommand cmd15;
ThreadTraceMemObjectsCommand cmd16;
ThreadTraceCommand cmd17;
SignalCommand cmd18;
MakeBuffersResidentCommand cmd19;
SvmFreeMemoryCommand cmd20;
SvmCopyMemoryCommand cmd21;
SvmFillMemoryCommand cmd22;
SvmMapMemoryCommand cmd23;
SvmUnmapMemoryCommand cmd24;
CopyMemoryP2PCommand cmd25;
SvmPrefetchAsyncCommand cmd26;
VirtualMapCommand cmd27;
ComputeCommand() {}
~ComputeCommand() {}
};
/*! @}
* @}
*/
+77 -1
Просмотреть файл
@@ -1,4 +1,4 @@
/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc.
/* Copyright (c) 2008 - 2024 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@@ -21,6 +21,8 @@
#ifndef OBJECT_HPP_
#define OBJECT_HPP_
#include <set>
#include "top.hpp"
#include "os/alloc.hpp"
#include "thread/monitor.hpp"
@@ -190,6 +192,80 @@ struct Coord3D {
}
};
template <class T>
class SysmemPool {
public:
SysmemPool(): chunk_access_("Sysmem Pool Lock", true) {}
~SysmemPool() {
// Release current chunk
if (chunks_.size() == 1) {
auto it = chunks_.begin();
auto idx = kAllocChunkSize - (current_alloc_.load() % kAllocChunkSize);
// Make sure all allocations were released
if (idx == (*it)->free_) {
delete [] (*it)->allocs_;
delete (*it);
chunks_.erase(it);
}
}
}
void* Alloc(size_t size) {
guarantee(size <= sizeof(T), "Bigger size than pool allows!");
size_t current = current_alloc_++;
auto idx = current / kAllocChunkSize;
while (idx >= max_chunk_idx_) {
ScopedLock lock(chunk_access_);
// Second check in a case of multiple waiters
if (idx == max_chunk_idx_) {
auto allocs = new T[kAllocChunkSize];
chunks_.emplace(new AllocChunk(allocs));
active_allocs_[idx % kActiveAllocSize] = allocs;
max_chunk_idx_++;
}
}
return &active_allocs_[idx % kActiveAllocSize][current % kAllocChunkSize];
}
void Free(void* ptr) {
ScopedLock lock(chunk_access_);
bool found = false;
// Search for the pointer in all valid chunks
for (auto it : chunks_) {
if (reinterpret_cast<uintptr_t>(ptr) >= reinterpret_cast<uintptr_t>(it->allocs_) &&
reinterpret_cast<uintptr_t>(ptr) <
(reinterpret_cast<uintptr_t>(it->allocs_) + sizeof(T) * kAllocChunkSize)) {
it->free_--;
found = true;
// Destory current chunk if all allocations are freed
if (it->free_ == 0) {
delete [] it->allocs_;
delete it;
chunks_.erase(it);
}
break;
}
}
if (!found) {
guarantee(true, "Mempool releases incorrect memory!\n");
}
}
private:
static constexpr size_t kAllocChunkSize = 1024; //!< The total number of allocations in a chunk
static constexpr size_t kActiveAllocSize = 32; //!< The number of active chunks
struct AllocChunk {
T* allocs_; //! Array of allocations
uint32_t free_; //! The number of commands still available for usage
AllocChunk(T* alloc): allocs_(alloc), free_(kAllocChunkSize) {}
};
std::atomic<uint64_t> current_alloc_ = 0; //!< Current allocation, global index
size_t max_chunk_idx_ = 0; //!< Current max chunk index
amd::Monitor chunk_access_; //!< Lock for the chunk list access
std::set<AllocChunk*> chunks_; //!< List of allocated memory chunks
T* active_allocs_[kActiveAllocSize] = {}; //!< Active chunks for fast access
};
} // namespace amd
template <typename CL> typename amd::as_internal<CL>::type* as_amd(CL* cl_obj) {