Files
rocm-systems/rocclr/runtime/device/pal/palconstbuf.cpp
T
foreman 79ba5904dc P4 to Git Change 1546657 by gandryey@gera-w8 on 2018/04/26 10:59:34
SWDEV-151739 - [CQE OCL][DTB][Perf][QR][DTB-BLOCKER][VEGA10] Upto 18% performance drop observed while running Video Composition test sub test of Compubench due to faulty CL#1544622
	- Implement customized TS tracking for managed buffers. The common TS tracking mechanism saves the event of the last command, assuming SDMA and compute operations occur in order, but for managed buffers it's not the case. Also managed buffer doesn't have to validate TS for the parent resource.

Affected files ...

... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palblit.cpp#21 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palconstbuf.cpp#11 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palconstbuf.hpp#9 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palmemory.hpp#6 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.hpp#22 edit
2018-04-26 11:13:29 -04:00

163 строки
5.8 KiB
C++

//
// Copyright (c) 2010 Advanced Micro Devices, Inc. All rights reserved.
//
#include "device/pal/palconstbuf.hpp"
#include "device/pal/palvirtual.hpp"
#include "device/pal/paldevice.hpp"
#include "device/pal/palsettings.hpp"
namespace pal {
// ================================================================================================
ManagedBuffer::ManagedBuffer(VirtualGPU& gpu, uint32_t size)
: gpu_(gpu)
, pool_(MaxNumberOfBuffers)
, activeBuffer_(0)
, size_(size)
, wrtOffset_(0)
, wrtAddress_(nullptr) {}
// ================================================================================================
void ManagedBuffer::release() {
for (auto it : pool_) {
if ((it.buf != nullptr) && (it.buf->data() != nullptr)) {
it.buf->unmap(&gpu_);
}
delete it.buf;
}
}
// ================================================================================================
bool ManagedBuffer::create(Resource::MemoryType type) {
for (uint i = 0; i < pool_.size(); ++i) {
pool_[i].buf = new Memory(const_cast<pal::Device&>(gpu_.dev()), size_);
if (nullptr == pool_[i].buf || !pool_[i].buf->create(type)) {
LogPrintfError("We couldn't create HW constant buffer, size(%d)!", size_);
return false;
}
// Assign virtual gpu to the allocation. Buffer will be used only on a particular queue
pool_[i].buf->memRef()->gpu_ = &gpu_;
void* wrtAddress = pool_[i].buf->map(&gpu_);
if (wrtAddress == nullptr) {
LogPrintfError("We couldn't map HW constant buffer, size(%d)!", size_);
return false;
}
// Make sure OCL touches every buffer in the queue to avoid delays on the first submit
uint dummy = 0;
static constexpr bool Wait = true;
// Write 0 for the buffer paging by VidMM
pool_[i].buf->writeRawData(gpu_, 0, sizeof(dummy), &dummy, Wait);
}
wrtAddress_ = pool_[activeBuffer_].buf->data();
return true;
}
// ================================================================================================
address ManagedBuffer::reserve(uint32_t size, uint64_t* gpu_address) {
// Align to the maximum data size available in OpenCL
static constexpr uint32_t MemAlignment = sizeof(cl_double16);
// Align reserve size on the vector's boundary
uint32_t count = amd::alignUp(size, MemAlignment);
// Save previous event
pinGpuEvent();
// Check if buffer has enough space for reservation
if ((wrtOffset_ + count) > size_) {
// Get the next buffer in the list
++activeBuffer_;
activeBuffer_ %= MaxNumberOfBuffers;
// Make sure the buffer isn't busy
gpu().waitForEvent(&pool_[activeBuffer_].events[SdmaEngine]);
gpu().waitForEvent(&pool_[activeBuffer_].events[MainEngine]);
wrtAddress_ = pool_[activeBuffer_].buf->data();
wrtOffset_ = 0;
}
*gpu_address = pool_[activeBuffer_].buf->vmAddress() + wrtOffset_;
address cpu_address = wrtAddress_ + wrtOffset_;
// Adjust the offset by the reserved size
wrtOffset_ += count;
return cpu_address;
}
// ================================================================================================
void ManagedBuffer::pinGpuEvent() {
GpuEvent* event = activeMemory()->getGpuEvent(gpu());
pool_[activeBuffer_].events[event->engineId_] = *event;
activeMemory()->setBusy(gpu(), GpuEvent::InvalidID);
}
// ================================================================================================
ConstantBuffer::ConstantBuffer(ManagedBuffer& mbuf, uint32_t size)
: mbuf_(mbuf)
, sys_mem_copy_(nullptr)
, size_(size)
{}
// ================================================================================================
ConstantBuffer::~ConstantBuffer() {
amd::AlignedMemory::deallocate(sys_mem_copy_);
}
// ================================================================================================
bool ConstantBuffer::Create() {
// Create sysmem copy for the constant buffer.
sys_mem_copy_ = reinterpret_cast<address>(amd::AlignedMemory::allocate(size_, 256));
if (sys_mem_copy_ == nullptr) {
LogPrintfError("We couldn't allocate sysmem copy for constant buffer, size(%d)!", size_);
return false;
}
memset(sys_mem_copy_, 0, size_);
return true;
}
// ================================================================================================
uint64_t ConstantBuffer::UploadDataToHw(uint32_t size) const {
uint64_t vm_address;
address cpu_address = mbuf_.reserve(size, &vm_address);
// Update memory with new CB data
memcpy(cpu_address, sys_mem_copy_, size);
return vm_address;
}
// ================================================================================================
uint64_t ConstantBuffer::UploadDataToHw(const void* sysmem, uint32_t size) const {
uint64_t vm_address;
address cpu_address = mbuf_.reserve(size, &vm_address);
// Update memory with new CB data
memcpy(cpu_address, sysmem, size);
return vm_address;
}
// ================================================================================================
XferBuffer::XferBuffer(const Device& device, ManagedBuffer& mbuf, uint32_t size)
: buffer_view_(device, size)
, mbuf_(mbuf)
, size_(size) {
// Create a view for access
Resource::ViewParams params = {};
params.gpu_ = &mbuf_.gpu();
params.offset_ = 0;
params.size_ = size_;
params.resource_ = mbuf_.activeMemory();
bool result = buffer_view_.create(Resource::View, &params);
assert(result && "View creaiton should never return an error!");
}
// ================================================================================================
Memory& XferBuffer::Acquire(uint32_t size) {
uint64_t vm_address;
// Reserve space in the managed buffer
address cpu_address = mbuf_.reserve(size, &vm_address);
// Update a view for access
buffer_view_.updateView(mbuf_.activeMemory(), vm_address - mbuf_.vmAddress(), size);
return buffer_view_;
}
} // namespace pal