1cb5415538
SWDEV-76870 - Add write feature to AMD extension (Merging //depot/rel/r6/15.30.1023/stream/opencl/... to //depot/stg/opencl/drivers/opencl/...) Add clEnqueueReadBufferToFileAMD function to AMD extension (added declaration in cl_context.cpp) 1. Added clEnqueueReadBufferToFileAMD_fn function declaration to cl_ext.h 2. Added LiquidFlashFile::writeBlock method who implements transfer from GPU to SSD. In order to avoid code duplication LiquidFlashFile::readBlock and LiquidFlashFile::writeBlock is called from new method LiquidFlashFile::transferBlock (changes in cl_lqdflash_amd.cpp) 3. clEnqueueWriteBufferFromFileAMD and clEnqueueWriteBufferFromFileAMD call internal function EnqueueTransferBufferFromFileAMD who makes the same preparations as clEnqueueWriteBufferFromFileAMD in the prvious release 4. WriteBufferFromFileCommand class is renamed to TransferBufferFromFileCommand and new class makes the same preparation except assigning transfer direction (read or write) Affected files ... ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_context.cpp#48 integrate ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_lqdflash_amd.cpp#13 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_lqdflash_amd.h#4 integrate ... //depot/stg/opencl/drivers/opencl/api/opencl/khronos/headers/opencl2.0/CL/cl_ext.h#24 integrate ... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#267 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#396 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.hpp#138 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/command.cpp#74 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/command.hpp#81 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/memory.hpp#96 edit
644 linhas
19 KiB
C++
644 linhas
19 KiB
C++
//
|
|
// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
|
|
//
|
|
|
|
/*!
|
|
* \file command.cpp
|
|
* \brief Definitions for Event, Command and HostQueue objects.
|
|
*
|
|
* \author Laurent Morichetti (laurent.morichetti@amd.com)
|
|
* \date October 2008
|
|
*/
|
|
|
|
#include "platform/command.hpp"
|
|
#include "platform/commandqueue.hpp"
|
|
#include "device/device.hpp"
|
|
#include "platform/context.hpp"
|
|
#include "platform/kernel.hpp"
|
|
#include "thread/monitor.hpp"
|
|
#include "platform/memory.hpp"
|
|
#include "platform/agent.hpp"
|
|
#include "os/alloc.hpp"
|
|
|
|
#include <cstring>
|
|
#include <algorithm>
|
|
|
|
namespace amd {
|
|
|
|
Event::Event(HostQueue& queue)
|
|
: callbacks_(NULL)
|
|
, status_(CL_INT_MAX)
|
|
, profilingInfo_(
|
|
queue.properties().test(CL_QUEUE_PROFILING_ENABLE)
|
|
|| Agent::shouldPostEventEvents())
|
|
{ notified_.clear(); }
|
|
|
|
Event::Event()
|
|
: callbacks_(NULL)
|
|
, status_(CL_SUBMITTED)
|
|
{ notified_.clear(); }
|
|
|
|
Event::~Event()
|
|
{
|
|
CallBackEntry* callback = callbacks_;
|
|
while (callback != NULL) {
|
|
CallBackEntry* next = callback->next_;
|
|
delete callback;
|
|
callback = next;
|
|
}
|
|
}
|
|
|
|
uint64_t
|
|
Event::recordProfilingInfo(cl_int status, uint64_t timeStamp)
|
|
{
|
|
if (timeStamp == 0) {
|
|
timeStamp = Os::timeNanos();
|
|
}
|
|
switch (status) {
|
|
case CL_QUEUED:
|
|
profilingInfo_.queued_ = timeStamp;
|
|
break;
|
|
case CL_SUBMITTED:
|
|
profilingInfo_.submitted_ = timeStamp;
|
|
break;
|
|
case CL_RUNNING:
|
|
profilingInfo_.start_ = timeStamp;
|
|
break;
|
|
default:
|
|
profilingInfo_.end_ = timeStamp;
|
|
if (profilingInfo_.callback_ != NULL) {
|
|
profilingInfo_.callback_->callback(timeStamp - profilingInfo_.start_);
|
|
}
|
|
break;
|
|
}
|
|
return timeStamp;
|
|
}
|
|
|
|
bool
|
|
Event::setStatus(cl_int status, uint64_t timeStamp)
|
|
{
|
|
assert(status <= CL_QUEUED && "invalid status");
|
|
|
|
cl_int currentStatus = status_;
|
|
if (currentStatus <= CL_COMPLETE || currentStatus <= status) {
|
|
// We can only move forward in the execution status.
|
|
return false;
|
|
}
|
|
|
|
if (profilingInfo().enabled_) {
|
|
timeStamp = recordProfilingInfo(status, timeStamp);
|
|
}
|
|
|
|
if (!make_atomic(status_).compareAndSet(currentStatus, status)) {
|
|
// Somebody else beat us to it, let them deal with the release/signal.
|
|
return false;
|
|
}
|
|
|
|
if (callbacks_ != (CallBackEntry*)0) {
|
|
processCallbacks(status);
|
|
}
|
|
|
|
if (Agent::shouldPostEventEvents() && command().type() != 0) {
|
|
Agent::postEventStatusChanged(
|
|
as_cl(this), status, timeStamp + Os::offsetToEpochNanos());
|
|
}
|
|
|
|
if (status <= CL_COMPLETE) {
|
|
// Before we notify the waiters that this event reached the CL_COMPLETE
|
|
// status, we release all the resources associated with this instance.
|
|
releaseResources();
|
|
|
|
// Broadcast all the waiters.
|
|
if (referenceCount() > 1) {
|
|
signal();
|
|
}
|
|
release();
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
bool
|
|
Event::setCallback(cl_int status, Event::CallBackFunction callback, void* data)
|
|
{
|
|
assert(status >= CL_COMPLETE && status <= CL_QUEUED && "invalid status");
|
|
|
|
CallBackEntry* entry = new CallBackEntry(status, callback, data);
|
|
if (entry == NULL) {
|
|
return false;
|
|
}
|
|
|
|
entry->next_ = callbacks_;
|
|
while (!callbacks_.compare_exchange_weak(entry->next_, entry))
|
|
; // Someone else is also updating the head of the linked list! reload.
|
|
|
|
// Check if the event has already reached 'status'
|
|
if (status_ <= status && entry->callback_ != CallBackFunction(0)) {
|
|
if (entry->callback_.exchange(NULL) != NULL) {
|
|
callback(as_cl(this), status, entry->data_);
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
void
|
|
Event::processCallbacks(cl_int status) const
|
|
{
|
|
cl_event event = const_cast<cl_event>(as_cl(this));
|
|
const cl_int mask = (status > CL_COMPLETE) ? status : CL_COMPLETE;
|
|
|
|
// For_each callback:
|
|
CallBackEntry* entry;
|
|
for (entry = callbacks_; entry != NULL; entry = entry->next_) {
|
|
// If the entry's status matches the mask,
|
|
if (entry->status_ == mask && entry->callback_ != CallBackFunction(0)) {
|
|
// invoke the callback function.
|
|
CallBackFunction callback = entry->callback_.exchange(NULL);
|
|
if (callback != NULL) {
|
|
callback(event, status, entry->data_);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
bool
|
|
Event::awaitCompletion()
|
|
{
|
|
if (status_ > CL_COMPLETE) {
|
|
// Notifies current command queue about waiting
|
|
if (!notifyCmdQueue()) {
|
|
return false;
|
|
}
|
|
|
|
ScopedLock lock(lock_);
|
|
|
|
// Wait until the status becomes CL_COMPLETE or negative.
|
|
while (status_ > CL_COMPLETE) {
|
|
lock_.wait();
|
|
}
|
|
}
|
|
|
|
return status_ == CL_COMPLETE;
|
|
}
|
|
|
|
bool
|
|
Event::notifyCmdQueue()
|
|
{
|
|
HostQueue* queue = command().queue();
|
|
if ((NULL != queue) && !notified_.test_and_set()) {
|
|
// Make sure the queue is draining the enqueued commands.
|
|
amd::Command* command = new amd::Marker(*queue, false, nullWaitList, this);
|
|
if (command == NULL) {
|
|
notified_.clear();
|
|
return false;
|
|
}
|
|
command->enqueue();
|
|
command->release();
|
|
}
|
|
return true;
|
|
}
|
|
|
|
const Event::EventWaitList Event::nullWaitList(0);
|
|
|
|
Command::Command(
|
|
HostQueue& queue,
|
|
cl_command_type type,
|
|
const EventWaitList& eventWaitList) :
|
|
Event(queue), queue_(&queue), next_(NULL), type_(type),
|
|
exception_(0), data_(NULL), eventWaitList_(eventWaitList)
|
|
{
|
|
// Retain the commands from the event wait list.
|
|
std::for_each(
|
|
eventWaitList.begin(),
|
|
eventWaitList.end(),
|
|
std::mem_fun(&Command::retain));
|
|
}
|
|
|
|
void
|
|
Command::releaseResources()
|
|
{
|
|
const Command::EventWaitList& events = eventWaitList();
|
|
|
|
// Release the commands from the event wait list.
|
|
std::for_each(
|
|
events.begin(),
|
|
events.end(),
|
|
std::mem_fun(&Command::release));
|
|
}
|
|
|
|
void
|
|
Command::enqueue()
|
|
{
|
|
assert(queue_ != NULL && "Cannot be enqueued");
|
|
|
|
if (Agent::shouldPostEventEvents() && type_ != 0) {
|
|
Agent::postEventCreate(as_cl(static_cast<Event*>(this)), type_);
|
|
}
|
|
|
|
queue_->append(*this);
|
|
queue_->flush();
|
|
if (queue_->device().settings().waitCommand_ && (type_ != 0)) {
|
|
awaitCompletion();
|
|
}
|
|
}
|
|
|
|
const Context&
|
|
Command::context() const
|
|
{
|
|
return queue_->context();
|
|
}
|
|
|
|
NDRangeKernelCommand::NDRangeKernelCommand(
|
|
HostQueue& queue,
|
|
const EventWaitList& eventWaitList,
|
|
Kernel& kernel,
|
|
const NDRangeContainer& sizes) :
|
|
Command(queue, CL_COMMAND_NDRANGE_KERNEL, eventWaitList),
|
|
kernel_(kernel), sizes_(sizes)
|
|
{
|
|
parameters_ = kernel.parameters().capture(queue.device());
|
|
auto& device = queue.device();
|
|
auto devKernel = const_cast<device::Kernel*>(kernel.getDeviceKernel(device));
|
|
profilingInfo_.setCallback(devKernel->getProfilingCallback(queue.vdev()));
|
|
fixme_guarantee(parameters_ != NULL && "out of memory");
|
|
kernel_.retain();
|
|
}
|
|
|
|
void NDRangeKernelCommand::releaseResources() {
|
|
kernel_.parameters().release(parameters_, queue()->device());
|
|
DEBUG_ONLY(parameters_ = NULL);
|
|
kernel_.release();
|
|
Command::releaseResources();
|
|
}
|
|
|
|
NativeFnCommand::NativeFnCommand(
|
|
HostQueue& queue, const EventWaitList& eventWaitList,
|
|
void (CL_CALLBACK *nativeFn)(void*), const void* args, size_t argsSize,
|
|
size_t numMemObjs, const cl_mem* memObjs, const void** memLocs) :
|
|
Command(queue, CL_COMMAND_NATIVE_KERNEL, eventWaitList),
|
|
nativeFn_(nativeFn), argsSize_(argsSize)
|
|
{
|
|
args_ = new char[argsSize_];
|
|
if (args_ == NULL) {
|
|
return;
|
|
}
|
|
::memcpy(args_, args, argsSize_);
|
|
|
|
memObjects_.resize(numMemObjs);
|
|
memOffsets_.resize(numMemObjs);
|
|
for (size_t i = 0; i < numMemObjs; ++i) {
|
|
Memory* obj = as_amd(memObjs[i]);
|
|
|
|
obj->retain();
|
|
memObjects_[i] = obj;
|
|
memOffsets_[i] = (const_address) memLocs[i] - (const_address) args;
|
|
}
|
|
}
|
|
|
|
cl_int
|
|
NativeFnCommand::invoke()
|
|
{
|
|
size_t numMemObjs = memObjects_.size();
|
|
for (size_t i = 0; i < numMemObjs; ++i) {
|
|
void* hostMemPtr = memObjects_[i]->getHostMem();
|
|
if (hostMemPtr == NULL) {
|
|
return CL_MEM_OBJECT_ALLOCATION_FAILURE;
|
|
}
|
|
*reinterpret_cast<void **>(&args_[memOffsets_[i]]) = hostMemPtr;
|
|
}
|
|
nativeFn_(args_);
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
bool
|
|
OneMemoryArgCommand::validateMemory()
|
|
{
|
|
if (queue()->device().info().type_ & CL_DEVICE_TYPE_GPU) {
|
|
device::Memory* mem = memory_->getDeviceMemory(queue()->device());
|
|
if (NULL == mem) {
|
|
LogPrintfError("Can't allocate memory size - 0x%08X bytes!",
|
|
memory_->getSize());
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
TwoMemoryArgsCommand::validateMemory()
|
|
{
|
|
if (queue()->device().info().type_ & CL_DEVICE_TYPE_GPU) {
|
|
device::Memory* mem = memory1_->getDeviceMemory(queue()->device());
|
|
if (NULL == mem) {
|
|
LogPrintfError("Can't allocate memory size - 0x%08X bytes!",
|
|
memory1_->getSize());
|
|
return false;
|
|
}
|
|
mem = memory2_->getDeviceMemory(queue()->device());
|
|
if (NULL == mem) {
|
|
LogPrintfError("Can't allocate memory size - 0x%08X bytes!",
|
|
memory2_->getSize());
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
bool
|
|
ReadMemoryCommand::isEntireMemory() const
|
|
{
|
|
return source().isEntirelyCovered(origin(), size());
|
|
}
|
|
|
|
bool
|
|
WriteMemoryCommand::isEntireMemory() const
|
|
{
|
|
return destination().isEntirelyCovered(origin(), size());
|
|
}
|
|
|
|
bool
|
|
SvmMapMemoryCommand::isEntireMemory() const
|
|
{
|
|
return getSvmMem()->isEntirelyCovered(origin(), size());
|
|
}
|
|
|
|
bool
|
|
FillMemoryCommand::isEntireMemory() const
|
|
{
|
|
return memory().isEntirelyCovered(origin(), size());
|
|
}
|
|
|
|
bool
|
|
CopyMemoryCommand::isEntireMemory() const
|
|
{
|
|
bool result = false;
|
|
|
|
switch (type()) {
|
|
case CL_COMMAND_COPY_IMAGE_TO_BUFFER: {
|
|
Coord3D imageSize(size()[0] * size()[1] * size()[2] *
|
|
source().asImage()->getImageFormat().getElementSize());
|
|
result = source().isEntirelyCovered(srcOrigin(), size()) &&
|
|
destination().isEntirelyCovered(dstOrigin(), imageSize);
|
|
}
|
|
break;
|
|
case CL_COMMAND_COPY_BUFFER_TO_IMAGE: {
|
|
Coord3D imageSize(size()[0] * size()[1] * size()[2] *
|
|
destination().asImage()->getImageFormat().getElementSize());
|
|
result = source().isEntirelyCovered(srcOrigin(), imageSize) &&
|
|
destination().isEntirelyCovered(dstOrigin(), size());
|
|
}
|
|
break;
|
|
case CL_COMMAND_COPY_BUFFER_RECT: {
|
|
Coord3D rectSize(size()[0] * size()[1] * size()[2]);
|
|
Coord3D srcOffs(srcRect().start_);
|
|
Coord3D dstOffs(dstRect().start_);
|
|
result = source().isEntirelyCovered(srcOffs, rectSize) &&
|
|
destination().isEntirelyCovered(dstOffs, rectSize);
|
|
}
|
|
break;
|
|
default:
|
|
result = source().isEntirelyCovered(srcOrigin(), size()) &&
|
|
destination().isEntirelyCovered(dstOrigin(), size());
|
|
break;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
bool
|
|
MapMemoryCommand::isEntireMemory() const
|
|
{
|
|
return memory().isEntirelyCovered(origin(), size());
|
|
}
|
|
|
|
void
|
|
UnmapMemoryCommand::releaseResources()
|
|
{
|
|
if (queue()->device().info().type_ & CL_DEVICE_TYPE_GPU) {
|
|
//! @todo This is a workaround to a deadlock on indirect map release.
|
|
//! Remove this code when CAL will have a refcounter on memory.
|
|
//! decIndMapCount() has to go back to submitUnmapMemory()
|
|
device::Memory* mem = memory_->getDeviceMemory(queue()->device());
|
|
if (NULL != mem) {
|
|
mem->releaseIndirectMap();
|
|
}
|
|
}
|
|
OneMemoryArgCommand::releaseResources();
|
|
}
|
|
|
|
bool
|
|
MigrateMemObjectsCommand::validateMemory()
|
|
{
|
|
if (queue()->device().info().type_ & CL_DEVICE_TYPE_GPU) {
|
|
std::vector<amd::Memory*>::const_iterator itr;
|
|
for (itr = memObjects_.begin(); itr != memObjects_.end(); itr++) {
|
|
device::Memory* mem = (*itr)->getDeviceMemory(queue()->device());
|
|
if (NULL == mem) {
|
|
LogPrintfError("Can't allocate memory size - 0x%08X bytes!",
|
|
(*itr)->getSize());
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
cl_int
|
|
NDRangeKernelCommand::validateMemory()
|
|
{
|
|
const amd::Device& device = queue()->device();
|
|
if (device.info().type_ & CL_DEVICE_TYPE_GPU) {
|
|
// Validate the kernel before submission
|
|
if (!queue()->device().validateKernel(kernel(), queue()->vdev())) {
|
|
return CL_OUT_OF_RESOURCES;
|
|
}
|
|
|
|
const amd::KernelSignature& signature = kernel().signature();
|
|
for (uint i = 0; i != signature.numParameters(); ++i) {
|
|
const amd::KernelParameterDescriptor& desc = signature.at(i);
|
|
// Check if it's a memory object
|
|
if ((desc.type_ == T_POINTER) && (desc.size_ != 0)) {
|
|
amd::Memory* amdMemory;
|
|
if (kernel().parameters().boundToSvmPointer(device,
|
|
parameters_,
|
|
i)) {
|
|
//find the real mem object from svm ptr from the list
|
|
amdMemory = amd::SvmManager::FindSvmBuffer(
|
|
*reinterpret_cast<void* const*>(parameters() + desc.offset_));
|
|
}
|
|
else {
|
|
amdMemory = *reinterpret_cast<amd::Memory* const*>
|
|
(parameters() + desc.offset_);
|
|
}
|
|
if (amdMemory != NULL) {
|
|
if (desc.addressQualifier_ == CL_KERNEL_ARG_ADDRESS_CONSTANT) {
|
|
// Make sure argument size isn't bigger than the device limit
|
|
if (amdMemory->getSize() > device.info().maxConstantBufferSize_) {
|
|
LogPrintfError("HW constant buffer is too big (0x%X bytes)!",
|
|
amdMemory->getSize());
|
|
return CL_OUT_OF_RESOURCES;
|
|
}
|
|
}
|
|
device::Memory* mem =
|
|
amdMemory->getDeviceMemory(device);
|
|
if (!kernel().getDeviceKernel(
|
|
device)->validateMemory(i, amdMemory)) {
|
|
if (device.reallocMemory(*amdMemory)) {
|
|
mem = amdMemory->getDeviceMemory(device);
|
|
}
|
|
else {
|
|
mem = NULL;
|
|
}
|
|
}
|
|
if (NULL == mem) {
|
|
LogPrintfError("Can't allocate memory size - 0x%08X bytes!",
|
|
amdMemory->getSize());
|
|
return CL_MEM_OBJECT_ALLOCATION_FAILURE;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
bool ExtObjectsCommand::validateMemory()
|
|
{
|
|
bool retVal = true;
|
|
if (queue()->device().info().type_ & CL_DEVICE_TYPE_GPU) {
|
|
for(std::vector<amd::Memory*>::const_iterator itr = memObjects_.begin();
|
|
itr != memObjects_.end(); itr++) {
|
|
device::Memory* mem = (*itr)->getDeviceMemory(queue()->device());
|
|
if (NULL == mem) {
|
|
LogPrintfError("Can't allocate memory size - 0x%08X bytes!",
|
|
(*itr)->getSize());
|
|
return false;
|
|
}
|
|
retVal = processGLResource(mem);
|
|
}
|
|
}
|
|
return retVal;
|
|
}
|
|
|
|
bool AcquireExtObjectsCommand::processGLResource(device::Memory * mem)
|
|
{
|
|
return mem->processGLResource(device::Memory::GLDecompressResource);
|
|
}
|
|
|
|
bool ReleaseExtObjectsCommand::processGLResource(device::Memory * mem)
|
|
{
|
|
return mem->processGLResource(device::Memory::GLInvalidateFBO);
|
|
}
|
|
|
|
bool
|
|
MakeBuffersResidentCommand::validateMemory()
|
|
{
|
|
if (queue()->device().info().type_ & CL_DEVICE_TYPE_GPU) {
|
|
for(std::vector<amd::Memory*>::const_iterator itr = memObjects_.begin();
|
|
itr != memObjects_.end(); itr++) {
|
|
device::Memory* mem = (*itr)->getDeviceMemory(queue()->device());
|
|
if (NULL == mem) {
|
|
LogPrintfError("Can't allocate memory size - 0x%08X bytes!",
|
|
(*itr)->getSize());
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
bool
|
|
ThreadTraceMemObjectsCommand::validateMemory()
|
|
{
|
|
if (queue()->device().info().type_ & CL_DEVICE_TYPE_GPU) {
|
|
for(std::vector<amd::Memory*>::const_iterator itr = memObjects_.begin();
|
|
itr != memObjects_.end(); itr++) {
|
|
device::Memory* mem = (*itr)->getDeviceMemory(queue()->device());
|
|
if (NULL == mem) {
|
|
std::vector<amd::Memory*>::const_iterator tmpItr;
|
|
for (tmpItr = memObjects_.begin(); tmpItr != itr; tmpItr++) {
|
|
device::Memory* tmpMem = (*tmpItr)->getDeviceMemory(queue()->device());
|
|
delete tmpMem;
|
|
}
|
|
LogPrintfError("Can't allocate memory size - 0x%08X bytes!",
|
|
(*itr)->getSize());
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void
|
|
TransferBufferFileCommand::releaseResources()
|
|
{
|
|
for (uint i = 0; i < NumStagingBuffers; ++i) {
|
|
if (NULL != staging_[i]) {
|
|
staging_[i]->release();
|
|
}
|
|
}
|
|
|
|
// Call the parent
|
|
OneMemoryArgCommand::releaseResources();
|
|
}
|
|
|
|
void
|
|
TransferBufferFileCommand::submit(device::VirtualDevice& device)
|
|
{
|
|
device::Memory* mem = memory_->getDeviceMemory(queue()->device());
|
|
if (memory_->getMemFlags() & (CL_MEM_USE_HOST_PTR |
|
|
CL_MEM_ALLOC_HOST_PTR | CL_MEM_USE_PERSISTENT_MEM_AMD)) {
|
|
void* srcDstBuffer = mem->cpuMap(device);
|
|
// Make HD transfer to the host accessible memory
|
|
bool writeBuffer(type() == CL_COMMAND_WRITE_BUFFER_FROM_FILE_AMD);
|
|
if (!file()->transferBlock(writeBuffer, srcDstBuffer,
|
|
fileOffset(), origin()[0], size()[0])) {
|
|
return;
|
|
}
|
|
mem->cpuUnmap(device);
|
|
}
|
|
else {
|
|
device.submitTransferBufferFromFile(*this);
|
|
}
|
|
}
|
|
|
|
bool
|
|
TransferBufferFileCommand::validateMemory()
|
|
{
|
|
if (queue()->device().info().type_ & CL_DEVICE_TYPE_GPU) {
|
|
// Check if the destination buffer has direct host access
|
|
if (!(memory_->getMemFlags() & (CL_MEM_USE_HOST_PTR |
|
|
CL_MEM_ALLOC_HOST_PTR | CL_MEM_USE_PERSISTENT_MEM_AMD))) {
|
|
// Allocate staging buffers
|
|
for (uint i = 0; i < NumStagingBuffers; ++i) {
|
|
staging_[i] = new (memory_->getContext())
|
|
Buffer(memory_->getContext(),
|
|
StagingBufferMemType, StagingBufferSize);
|
|
if (NULL == staging_[i] || !staging_[i]->create(nullptr)) {
|
|
return false;
|
|
}
|
|
device::Memory* mem = staging_[i]->getDeviceMemory(queue()->device());
|
|
if (NULL == mem) {
|
|
LogPrintfError("Can't allocate staging buffer - 0x%08X bytes!",
|
|
staging_[i]->getSize());
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
device::Memory* mem = memory_->getDeviceMemory(queue()->device());
|
|
if (NULL == mem) {
|
|
LogPrintfError("Can't allocate memory size - 0x%08X bytes!",
|
|
memory_->getSize());
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
} // namespace amd
|