clr: Update signal count and pool size for staging buffer (#2889)
* clr: Update signal count and pool size for staging buffer * Change to naming of variables etc --------- Co-authored-by: Rahul Manocha <rmanocha@amd.com>
This commit is contained in:
gecommit door
GitHub
bovenliggende
58c203e252
commit
c4f7593001
@@ -1730,8 +1730,8 @@ VirtualGPU::VirtualGPU(Device& device, bool profiling, bool cooperative,
|
||||
schedulerThreads_(0),
|
||||
schedulerQueue_(nullptr),
|
||||
barriers_(*this),
|
||||
managed_buffer_(*this, ManagedBuffer::kPoolNumSignals * device.settings().stagedXferSize_),
|
||||
managed_kernarg_buffer_(*this, device.settings().kernargPoolSize_),
|
||||
managed_buffer_(*this, kStagingPoolNumSignals * device.settings().stagedXferSize_, kStagingPoolNumSignals),
|
||||
managed_kernarg_buffer_(*this, device.settings().kernargPoolSize_, kKernArgPoolNumSignals),
|
||||
cuMask_(cuMask),
|
||||
priority_(priority),
|
||||
copy_command_type_(0),
|
||||
@@ -1912,7 +1912,7 @@ VirtualGPU::ManagedBuffer::~ManagedBuffer() {
|
||||
|
||||
// ================================================================================================
|
||||
bool VirtualGPU::ManagedBuffer::Create(Device::MemorySegment mem_segment) {
|
||||
pool_chunk_end_ = pool_size_ / kPoolNumSignals;
|
||||
pool_chunk_end_ = pool_size_ / num_chunk_signals_;
|
||||
active_chunk_ = 0;
|
||||
// Allocate memory for managed buffer
|
||||
if (mem_segment == Device::MemorySegment::kKernArg &&
|
||||
@@ -1965,14 +1965,14 @@ address VirtualGPU::ManagedBuffer::Acquire(uint32_t size, uint32_t alignment) {
|
||||
// Dispatch a barrier packet into the queue
|
||||
gpu_.dispatchBarrierPacket(kBarrierPacketHeader, kSkipSignal, pool_signal_[active_chunk_]);
|
||||
// Get the next chunk
|
||||
active_chunk_ = ++active_chunk_ % kPoolNumSignals;
|
||||
active_chunk_ = ++active_chunk_ % num_chunk_signals_;
|
||||
// Make sure the new active chunk is free
|
||||
bool test = WaitForSignal(pool_signal_[active_chunk_], gpu_.ActiveWait());
|
||||
assert(test && "Runtime can't fail a wait for chunk!");
|
||||
// Make sure the current offset matches the new chunk to avoid possible overlaps
|
||||
// between chunks and issues during recycle
|
||||
pool_cur_offset_ = (active_chunk_ == 0) ? 0 : pool_chunk_end_;
|
||||
pool_chunk_end_ = pool_cur_offset_ + pool_size_ / kPoolNumSignals;
|
||||
pool_chunk_end_ = pool_cur_offset_ + pool_size_ / num_chunk_signals_;
|
||||
result = amd::alignUp(pool_base_ + pool_cur_offset_, alignment);
|
||||
pool_cur_offset_ = (result + size) - pool_base_;
|
||||
}
|
||||
@@ -1983,7 +1983,7 @@ address VirtualGPU::ManagedBuffer::Acquire(uint32_t size, uint32_t alignment) {
|
||||
// ================================================================================================
|
||||
void VirtualGPU::ManagedBuffer::ResetPool() {
|
||||
pool_cur_offset_ = 0;
|
||||
pool_chunk_end_ = pool_size_ / kPoolNumSignals;
|
||||
pool_chunk_end_ = pool_size_ / num_chunk_signals_;
|
||||
active_chunk_ = 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -203,9 +203,9 @@ class VirtualGPU : public device::VirtualDevice {
|
||||
class ManagedBuffer : public amd::EmbeddedObject {
|
||||
public:
|
||||
//! The number of chunks the arg pool will be divided
|
||||
static constexpr uint32_t kPoolNumSignals = 16;
|
||||
ManagedBuffer(VirtualGPU& gpu, uint32_t pool_size)
|
||||
: gpu_(gpu), pool_size_(pool_size), pool_signal_(kPoolNumSignals) {}
|
||||
ManagedBuffer(VirtualGPU& gpu, uint32_t pool_size, uint32_t num_signals)
|
||||
: gpu_(gpu), pool_size_(pool_size), pool_signal_(num_signals),
|
||||
num_chunk_signals_(num_signals) {}
|
||||
~ManagedBuffer();
|
||||
|
||||
//! Allocates all necessary resources to manage memory
|
||||
@@ -228,6 +228,7 @@ class VirtualGPU : public device::VirtualDevice {
|
||||
uint32_t active_chunk_ = 0; //!< The index of the current active chunk
|
||||
uint32_t pool_cur_offset_ = 0; //!< Current active offset for update
|
||||
std::vector<hsa_signal_t> pool_signal_; //!< Pool of HSA signals to manage multiple chunks
|
||||
uint32_t num_chunk_signals_; //!< Number of signals used per chunk
|
||||
};
|
||||
class MemoryDependency : public amd::EmbeddedObject {
|
||||
public:
|
||||
@@ -622,6 +623,9 @@ class VirtualGPU : public device::VirtualDevice {
|
||||
ManagedBuffer managed_buffer_; //!< Memory manager for staging copies
|
||||
ManagedBuffer managed_kernarg_buffer_; //!< Managed memory for kernel args
|
||||
|
||||
static constexpr uint32_t kStagingPoolNumSignals = 4; //!< Hsa Signal count for Staging Buffer
|
||||
static constexpr uint32_t kKernArgPoolNumSignals = 16; //!< Hsa Signal count for KernArg Buffer
|
||||
|
||||
friend class Timestamp;
|
||||
|
||||
// PM4 packet for gfx8 performance counter
|
||||
|
||||
Verwijs in nieuw issue
Block a user