clr: Update signal count and pool size for staging buffer (#2889)
* clr: Update signal count and pool size for staging buffer * Change to naming of variables etc --------- Co-authored-by: Rahul Manocha <rmanocha@amd.com>
Cette révision appartient à :
@@ -1730,8 +1730,8 @@ VirtualGPU::VirtualGPU(Device& device, bool profiling, bool cooperative,
|
|||||||
schedulerThreads_(0),
|
schedulerThreads_(0),
|
||||||
schedulerQueue_(nullptr),
|
schedulerQueue_(nullptr),
|
||||||
barriers_(*this),
|
barriers_(*this),
|
||||||
managed_buffer_(*this, ManagedBuffer::kPoolNumSignals * device.settings().stagedXferSize_),
|
managed_buffer_(*this, kStagingPoolNumSignals * device.settings().stagedXferSize_, kStagingPoolNumSignals),
|
||||||
managed_kernarg_buffer_(*this, device.settings().kernargPoolSize_),
|
managed_kernarg_buffer_(*this, device.settings().kernargPoolSize_, kKernArgPoolNumSignals),
|
||||||
cuMask_(cuMask),
|
cuMask_(cuMask),
|
||||||
priority_(priority),
|
priority_(priority),
|
||||||
copy_command_type_(0),
|
copy_command_type_(0),
|
||||||
@@ -1912,7 +1912,7 @@ VirtualGPU::ManagedBuffer::~ManagedBuffer() {
|
|||||||
|
|
||||||
// ================================================================================================
|
// ================================================================================================
|
||||||
bool VirtualGPU::ManagedBuffer::Create(Device::MemorySegment mem_segment) {
|
bool VirtualGPU::ManagedBuffer::Create(Device::MemorySegment mem_segment) {
|
||||||
pool_chunk_end_ = pool_size_ / kPoolNumSignals;
|
pool_chunk_end_ = pool_size_ / num_chunk_signals_;
|
||||||
active_chunk_ = 0;
|
active_chunk_ = 0;
|
||||||
// Allocate memory for managed buffer
|
// Allocate memory for managed buffer
|
||||||
if (mem_segment == Device::MemorySegment::kKernArg &&
|
if (mem_segment == Device::MemorySegment::kKernArg &&
|
||||||
@@ -1965,14 +1965,14 @@ address VirtualGPU::ManagedBuffer::Acquire(uint32_t size, uint32_t alignment) {
|
|||||||
// Dispatch a barrier packet into the queue
|
// Dispatch a barrier packet into the queue
|
||||||
gpu_.dispatchBarrierPacket(kBarrierPacketHeader, kSkipSignal, pool_signal_[active_chunk_]);
|
gpu_.dispatchBarrierPacket(kBarrierPacketHeader, kSkipSignal, pool_signal_[active_chunk_]);
|
||||||
// Get the next chunk
|
// Get the next chunk
|
||||||
active_chunk_ = ++active_chunk_ % kPoolNumSignals;
|
active_chunk_ = ++active_chunk_ % num_chunk_signals_;
|
||||||
// Make sure the new active chunk is free
|
// Make sure the new active chunk is free
|
||||||
bool test = WaitForSignal(pool_signal_[active_chunk_], gpu_.ActiveWait());
|
bool test = WaitForSignal(pool_signal_[active_chunk_], gpu_.ActiveWait());
|
||||||
assert(test && "Runtime can't fail a wait for chunk!");
|
assert(test && "Runtime can't fail a wait for chunk!");
|
||||||
// Make sure the current offset matches the new chunk to avoid possible overlaps
|
// Make sure the current offset matches the new chunk to avoid possible overlaps
|
||||||
// between chunks and issues during recycle
|
// between chunks and issues during recycle
|
||||||
pool_cur_offset_ = (active_chunk_ == 0) ? 0 : pool_chunk_end_;
|
pool_cur_offset_ = (active_chunk_ == 0) ? 0 : pool_chunk_end_;
|
||||||
pool_chunk_end_ = pool_cur_offset_ + pool_size_ / kPoolNumSignals;
|
pool_chunk_end_ = pool_cur_offset_ + pool_size_ / num_chunk_signals_;
|
||||||
result = amd::alignUp(pool_base_ + pool_cur_offset_, alignment);
|
result = amd::alignUp(pool_base_ + pool_cur_offset_, alignment);
|
||||||
pool_cur_offset_ = (result + size) - pool_base_;
|
pool_cur_offset_ = (result + size) - pool_base_;
|
||||||
}
|
}
|
||||||
@@ -1983,7 +1983,7 @@ address VirtualGPU::ManagedBuffer::Acquire(uint32_t size, uint32_t alignment) {
|
|||||||
// ================================================================================================
|
// ================================================================================================
|
||||||
void VirtualGPU::ManagedBuffer::ResetPool() {
|
void VirtualGPU::ManagedBuffer::ResetPool() {
|
||||||
pool_cur_offset_ = 0;
|
pool_cur_offset_ = 0;
|
||||||
pool_chunk_end_ = pool_size_ / kPoolNumSignals;
|
pool_chunk_end_ = pool_size_ / num_chunk_signals_;
|
||||||
active_chunk_ = 0;
|
active_chunk_ = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -203,9 +203,9 @@ class VirtualGPU : public device::VirtualDevice {
|
|||||||
class ManagedBuffer : public amd::EmbeddedObject {
|
class ManagedBuffer : public amd::EmbeddedObject {
|
||||||
public:
|
public:
|
||||||
//! The number of chunks the arg pool will be divided
|
//! The number of chunks the arg pool will be divided
|
||||||
static constexpr uint32_t kPoolNumSignals = 16;
|
ManagedBuffer(VirtualGPU& gpu, uint32_t pool_size, uint32_t num_signals)
|
||||||
ManagedBuffer(VirtualGPU& gpu, uint32_t pool_size)
|
: gpu_(gpu), pool_size_(pool_size), pool_signal_(num_signals),
|
||||||
: gpu_(gpu), pool_size_(pool_size), pool_signal_(kPoolNumSignals) {}
|
num_chunk_signals_(num_signals) {}
|
||||||
~ManagedBuffer();
|
~ManagedBuffer();
|
||||||
|
|
||||||
//! Allocates all necessary resources to manage memory
|
//! Allocates all necessary resources to manage memory
|
||||||
@@ -228,6 +228,7 @@ class VirtualGPU : public device::VirtualDevice {
|
|||||||
uint32_t active_chunk_ = 0; //!< The index of the current active chunk
|
uint32_t active_chunk_ = 0; //!< The index of the current active chunk
|
||||||
uint32_t pool_cur_offset_ = 0; //!< Current active offset for update
|
uint32_t pool_cur_offset_ = 0; //!< Current active offset for update
|
||||||
std::vector<hsa_signal_t> pool_signal_; //!< Pool of HSA signals to manage multiple chunks
|
std::vector<hsa_signal_t> pool_signal_; //!< Pool of HSA signals to manage multiple chunks
|
||||||
|
uint32_t num_chunk_signals_; //!< Number of signals used per chunk
|
||||||
};
|
};
|
||||||
class MemoryDependency : public amd::EmbeddedObject {
|
class MemoryDependency : public amd::EmbeddedObject {
|
||||||
public:
|
public:
|
||||||
@@ -622,6 +623,9 @@ class VirtualGPU : public device::VirtualDevice {
|
|||||||
ManagedBuffer managed_buffer_; //!< Memory manager for staging copies
|
ManagedBuffer managed_buffer_; //!< Memory manager for staging copies
|
||||||
ManagedBuffer managed_kernarg_buffer_; //!< Managed memory for kernel args
|
ManagedBuffer managed_kernarg_buffer_; //!< Managed memory for kernel args
|
||||||
|
|
||||||
|
static constexpr uint32_t kStagingPoolNumSignals = 4; //!< Hsa Signal count for Staging Buffer
|
||||||
|
static constexpr uint32_t kKernArgPoolNumSignals = 16; //!< Hsa Signal count for KernArg Buffer
|
||||||
|
|
||||||
friend class Timestamp;
|
friend class Timestamp;
|
||||||
|
|
||||||
// PM4 packet for gfx8 performance counter
|
// PM4 packet for gfx8 performance counter
|
||||||
|
|||||||
Référencer dans un nouveau ticket
Bloquer un utilisateur