diff --git a/projects/clr/rocclr/platform/command.cpp b/projects/clr/rocclr/platform/command.cpp index 1bf3b40852..670c4980c3 100644 --- a/projects/clr/rocclr/platform/command.cpp +++ b/projects/clr/rocclr/platform/command.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2024 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -18,14 +18,6 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/*! - * \file command.cpp - * \brief Definitions for Event, Command and HostQueue objects. - * - * \author Laurent Morichetti - * \date October 2008 - */ - #include "platform/activity.hpp" #include "platform/command.hpp" #include "platform/commandqueue.hpp" @@ -326,6 +318,18 @@ Command::Command(HostQueue& queue, cl_command_type type, const EventWaitList& ev } } +SysmemPool Command::command_pool_; + +// ================================================================================================ +void Command::operator delete(void* ptr) { + command_pool_.Free(ptr); +} + +// ================================================================================================ +void* Command::operator new(size_t size) { + return command_pool_.Alloc(size); +} + // ================================================================================================ void Command::releaseResources() { const Command::EventWaitList& events = eventWaitList(); diff --git a/projects/clr/rocclr/platform/command.hpp b/projects/clr/rocclr/platform/command.hpp index 38cd2af186..06fd4d81cd 100644 --- a/projects/clr/rocclr/platform/command.hpp +++ b/projects/clr/rocclr/platform/command.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2024 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -18,13 +18,6 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/*! \file command.hpp - * \brief Declarations for Event, Command and HostQueue objects. - * - * \author Laurent Morichetti - * \date October 2008 - */ - #ifndef COMMAND_HPP_ #define COMMAND_HPP_ @@ -62,6 +55,7 @@ namespace amd { class Command; class HostQueue; +union ComputeCommand; /*! \brief Encapsulates the status of a command. * @@ -254,6 +248,7 @@ union CopyMetadata { */ class Command : public Event { private: + static SysmemPool command_pool_; //!< Pool of active commands HostQueue* queue_; //!< The command queue this command is enqueue into Command* next_; //!< Next GPU command in the queue list Command* batch_head_ = nullptr; //!< The head of the batch commands @@ -297,6 +292,10 @@ class Command : public Event { } public: + //! Overload new/delete for fast commands allocation/destruction + void* operator new(size_t size); + void operator delete(void* ptr); + //! Return the queue this command is enqueued into. HostQueue* queue() const { return queue_; } @@ -1787,6 +1786,39 @@ public: const void* ptr() const { return ptr_; } }; +//! Union used in memory suballocator, must be updated with the new commands +union ComputeCommand { + ReadMemoryCommand cmd0; + WriteMemoryCommand cmd1; + FillMemoryCommand cmd2; + CopyMemoryCommand cmd3; + MapMemoryCommand cmd4; + UnmapMemoryCommand cmd5; + MigrateMemObjectsCommand cmd6; + NDRangeKernelCommand cmd7; + NativeFnCommand cmd8; + ExternalSemaphoreCmd cmd9; + Marker cmd10; + AccumulateCommand cmd11; + AcquireExtObjectsCommand cmd13; + ReleaseExtObjectsCommand cmd14; + PerfCounterCommand cmd15; + ThreadTraceMemObjectsCommand cmd16; + ThreadTraceCommand cmd17; + SignalCommand cmd18; + MakeBuffersResidentCommand cmd19; + SvmFreeMemoryCommand cmd20; + SvmCopyMemoryCommand cmd21; + SvmFillMemoryCommand cmd22; + SvmMapMemoryCommand cmd23; + SvmUnmapMemoryCommand cmd24; + CopyMemoryP2PCommand cmd25; + SvmPrefetchAsyncCommand cmd26; + VirtualMapCommand cmd27; + ComputeCommand() {} + ~ComputeCommand() {} +}; + /*! @} * @} */ diff --git a/projects/clr/rocclr/platform/object.hpp b/projects/clr/rocclr/platform/object.hpp index 6d3e5b0411..73e167c2c7 100644 --- a/projects/clr/rocclr/platform/object.hpp +++ b/projects/clr/rocclr/platform/object.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2024 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -21,6 +21,8 @@ #ifndef OBJECT_HPP_ #define OBJECT_HPP_ +#include + #include "top.hpp" #include "os/alloc.hpp" #include "thread/monitor.hpp" @@ -190,6 +192,80 @@ struct Coord3D { } }; +template +class SysmemPool { +public: + SysmemPool(): chunk_access_("Sysmem Pool Lock", true) {} + ~SysmemPool() { + // Release current chunk + if (chunks_.size() == 1) { + auto it = chunks_.begin(); + auto idx = kAllocChunkSize - (current_alloc_.load() % kAllocChunkSize); + // Make sure all allocations were released + if (idx == (*it)->free_) { + delete [] (*it)->allocs_; + delete (*it); + chunks_.erase(it); + } + } + } + void* Alloc(size_t size) { + guarantee(size <= sizeof(T), "Bigger size than pool allows!"); + size_t current = current_alloc_++; + auto idx = current / kAllocChunkSize; + while (idx >= max_chunk_idx_) { + ScopedLock lock(chunk_access_); + // Second check in a case of multiple waiters + if (idx == max_chunk_idx_) { + auto allocs = new T[kAllocChunkSize]; + chunks_.emplace(new AllocChunk(allocs)); + active_allocs_[idx % kActiveAllocSize] = allocs; + max_chunk_idx_++; + } + } + return &active_allocs_[idx % kActiveAllocSize][current % kAllocChunkSize]; + } + + void Free(void* ptr) { + ScopedLock lock(chunk_access_); + bool found = false; + // Search for the pointer in all valid chunks + for (auto it : chunks_) { + if (reinterpret_cast(ptr) >= reinterpret_cast(it->allocs_) && + reinterpret_cast(ptr) < + (reinterpret_cast(it->allocs_) + sizeof(T) * kAllocChunkSize)) { + it->free_--; + found = true; + // Destory current chunk if all allocations are freed + if (it->free_ == 0) { + delete [] it->allocs_; + delete it; + chunks_.erase(it); + } + break; + } + } + if (!found) { + guarantee(true, "Mempool releases incorrect memory!\n"); + } + } + +private: + static constexpr size_t kAllocChunkSize = 1024; //!< The total number of allocations in a chunk + static constexpr size_t kActiveAllocSize = 32; //!< The number of active chunks + struct AllocChunk { + T* allocs_; //! Array of allocations + uint32_t free_; //! The number of commands still available for usage + AllocChunk(T* alloc): allocs_(alloc), free_(kAllocChunkSize) {} + }; + + std::atomic current_alloc_ = 0; //!< Current allocation, global index + size_t max_chunk_idx_ = 0; //!< Current max chunk index + amd::Monitor chunk_access_; //!< Lock for the chunk list access + std::set chunks_; //!< List of allocated memory chunks + T* active_allocs_[kActiveAllocSize] = {}; //!< Active chunks for fast access +}; + } // namespace amd template typename amd::as_internal::type* as_amd(CL* cl_obj) {