SWDEV-460242 - Add system memory suballocator
Switch commands creation to the new suballocator to avoid
frequent expensive OS calls
Change-Id: I3597c811820e577c15708bad8b8a41aa53acc400
[ROCm/clr commit: 5b0bfdcbad]
Этот коммит содержится в:
коммит произвёл
Maneesh Gupta
родитель
3ca0dbc4d7
Коммит
68344576d3
@@ -1,4 +1,4 @@
|
||||
/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc.
|
||||
/* Copyright (c) 2008 - 2024 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -18,14 +18,6 @@
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
/*!
|
||||
* \file command.cpp
|
||||
* \brief Definitions for Event, Command and HostQueue objects.
|
||||
*
|
||||
* \author Laurent Morichetti
|
||||
* \date October 2008
|
||||
*/
|
||||
|
||||
#include "platform/activity.hpp"
|
||||
#include "platform/command.hpp"
|
||||
#include "platform/commandqueue.hpp"
|
||||
@@ -326,6 +318,18 @@ Command::Command(HostQueue& queue, cl_command_type type, const EventWaitList& ev
|
||||
}
|
||||
}
|
||||
|
||||
SysmemPool<ComputeCommand> Command::command_pool_;
|
||||
|
||||
// ================================================================================================
|
||||
void Command::operator delete(void* ptr) {
|
||||
command_pool_.Free(ptr);
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
void* Command::operator new(size_t size) {
|
||||
return command_pool_.Alloc(size);
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
void Command::releaseResources() {
|
||||
const Command::EventWaitList& events = eventWaitList();
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc.
|
||||
/* Copyright (c) 2010 - 2024 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -18,13 +18,6 @@
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
/*! \file command.hpp
|
||||
* \brief Declarations for Event, Command and HostQueue objects.
|
||||
*
|
||||
* \author Laurent Morichetti
|
||||
* \date October 2008
|
||||
*/
|
||||
|
||||
#ifndef COMMAND_HPP_
|
||||
#define COMMAND_HPP_
|
||||
|
||||
@@ -62,6 +55,7 @@ namespace amd {
|
||||
|
||||
class Command;
|
||||
class HostQueue;
|
||||
union ComputeCommand;
|
||||
|
||||
/*! \brief Encapsulates the status of a command.
|
||||
*
|
||||
@@ -254,6 +248,7 @@ union CopyMetadata {
|
||||
*/
|
||||
class Command : public Event {
|
||||
private:
|
||||
static SysmemPool<ComputeCommand> command_pool_; //!< Pool of active commands
|
||||
HostQueue* queue_; //!< The command queue this command is enqueue into
|
||||
Command* next_; //!< Next GPU command in the queue list
|
||||
Command* batch_head_ = nullptr; //!< The head of the batch commands
|
||||
@@ -297,6 +292,10 @@ class Command : public Event {
|
||||
}
|
||||
|
||||
public:
|
||||
//! Overload new/delete for fast commands allocation/destruction
|
||||
void* operator new(size_t size);
|
||||
void operator delete(void* ptr);
|
||||
|
||||
//! Return the queue this command is enqueued into.
|
||||
HostQueue* queue() const { return queue_; }
|
||||
|
||||
@@ -1787,6 +1786,39 @@ public:
|
||||
const void* ptr() const { return ptr_; }
|
||||
};
|
||||
|
||||
//! Union used in memory suballocator, must be updated with the new commands
|
||||
union ComputeCommand {
|
||||
ReadMemoryCommand cmd0;
|
||||
WriteMemoryCommand cmd1;
|
||||
FillMemoryCommand cmd2;
|
||||
CopyMemoryCommand cmd3;
|
||||
MapMemoryCommand cmd4;
|
||||
UnmapMemoryCommand cmd5;
|
||||
MigrateMemObjectsCommand cmd6;
|
||||
NDRangeKernelCommand cmd7;
|
||||
NativeFnCommand cmd8;
|
||||
ExternalSemaphoreCmd cmd9;
|
||||
Marker cmd10;
|
||||
AccumulateCommand cmd11;
|
||||
AcquireExtObjectsCommand cmd13;
|
||||
ReleaseExtObjectsCommand cmd14;
|
||||
PerfCounterCommand cmd15;
|
||||
ThreadTraceMemObjectsCommand cmd16;
|
||||
ThreadTraceCommand cmd17;
|
||||
SignalCommand cmd18;
|
||||
MakeBuffersResidentCommand cmd19;
|
||||
SvmFreeMemoryCommand cmd20;
|
||||
SvmCopyMemoryCommand cmd21;
|
||||
SvmFillMemoryCommand cmd22;
|
||||
SvmMapMemoryCommand cmd23;
|
||||
SvmUnmapMemoryCommand cmd24;
|
||||
CopyMemoryP2PCommand cmd25;
|
||||
SvmPrefetchAsyncCommand cmd26;
|
||||
VirtualMapCommand cmd27;
|
||||
ComputeCommand() {}
|
||||
~ComputeCommand() {}
|
||||
};
|
||||
|
||||
/*! @}
|
||||
* @}
|
||||
*/
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc.
|
||||
/* Copyright (c) 2008 - 2024 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -21,6 +21,8 @@
|
||||
#ifndef OBJECT_HPP_
|
||||
#define OBJECT_HPP_
|
||||
|
||||
#include <set>
|
||||
|
||||
#include "top.hpp"
|
||||
#include "os/alloc.hpp"
|
||||
#include "thread/monitor.hpp"
|
||||
@@ -190,6 +192,80 @@ struct Coord3D {
|
||||
}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
class SysmemPool {
|
||||
public:
|
||||
SysmemPool(): chunk_access_("Sysmem Pool Lock", true) {}
|
||||
~SysmemPool() {
|
||||
// Release current chunk
|
||||
if (chunks_.size() == 1) {
|
||||
auto it = chunks_.begin();
|
||||
auto idx = kAllocChunkSize - (current_alloc_.load() % kAllocChunkSize);
|
||||
// Make sure all allocations were released
|
||||
if (idx == (*it)->free_) {
|
||||
delete [] (*it)->allocs_;
|
||||
delete (*it);
|
||||
chunks_.erase(it);
|
||||
}
|
||||
}
|
||||
}
|
||||
void* Alloc(size_t size) {
|
||||
guarantee(size <= sizeof(T), "Bigger size than pool allows!");
|
||||
size_t current = current_alloc_++;
|
||||
auto idx = current / kAllocChunkSize;
|
||||
while (idx >= max_chunk_idx_) {
|
||||
ScopedLock lock(chunk_access_);
|
||||
// Second check in a case of multiple waiters
|
||||
if (idx == max_chunk_idx_) {
|
||||
auto allocs = new T[kAllocChunkSize];
|
||||
chunks_.emplace(new AllocChunk(allocs));
|
||||
active_allocs_[idx % kActiveAllocSize] = allocs;
|
||||
max_chunk_idx_++;
|
||||
}
|
||||
}
|
||||
return &active_allocs_[idx % kActiveAllocSize][current % kAllocChunkSize];
|
||||
}
|
||||
|
||||
void Free(void* ptr) {
|
||||
ScopedLock lock(chunk_access_);
|
||||
bool found = false;
|
||||
// Search for the pointer in all valid chunks
|
||||
for (auto it : chunks_) {
|
||||
if (reinterpret_cast<uintptr_t>(ptr) >= reinterpret_cast<uintptr_t>(it->allocs_) &&
|
||||
reinterpret_cast<uintptr_t>(ptr) <
|
||||
(reinterpret_cast<uintptr_t>(it->allocs_) + sizeof(T) * kAllocChunkSize)) {
|
||||
it->free_--;
|
||||
found = true;
|
||||
// Destory current chunk if all allocations are freed
|
||||
if (it->free_ == 0) {
|
||||
delete [] it->allocs_;
|
||||
delete it;
|
||||
chunks_.erase(it);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
guarantee(true, "Mempool releases incorrect memory!\n");
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
static constexpr size_t kAllocChunkSize = 1024; //!< The total number of allocations in a chunk
|
||||
static constexpr size_t kActiveAllocSize = 32; //!< The number of active chunks
|
||||
struct AllocChunk {
|
||||
T* allocs_; //! Array of allocations
|
||||
uint32_t free_; //! The number of commands still available for usage
|
||||
AllocChunk(T* alloc): allocs_(alloc), free_(kAllocChunkSize) {}
|
||||
};
|
||||
|
||||
std::atomic<uint64_t> current_alloc_ = 0; //!< Current allocation, global index
|
||||
size_t max_chunk_idx_ = 0; //!< Current max chunk index
|
||||
amd::Monitor chunk_access_; //!< Lock for the chunk list access
|
||||
std::set<AllocChunk*> chunks_; //!< List of allocated memory chunks
|
||||
T* active_allocs_[kActiveAllocSize] = {}; //!< Active chunks for fast access
|
||||
};
|
||||
|
||||
} // namespace amd
|
||||
|
||||
template <typename CL> typename amd::as_internal<CL>::type* as_amd(CL* cl_obj) {
|
||||
|
||||
Ссылка в новой задаче
Block a user