b0631f7ab9
SWDEV-92049 - Forum [2712399]: clEnqueueMapBuffer in parallel - Handle multiple unmapInfo structures of multiple simultaneous maps of the same buffer - The change didn't affect images path, since it requires extra handling Affected files ... ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_memobj.cpp#79 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_svm.cpp#16 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#194 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#271 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpumemory.cpp#126 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#399 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsamemory.cpp#25 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsamemory.hpp#10 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsavirtual.cpp#64 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palmemory.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/command.hpp#82 edit
1550 righe
46 KiB
C++
1550 righe
46 KiB
C++
//
|
|
// Copyright 2010 Advanced Micro Devices, Inc. All rights reserved.
|
|
//
|
|
|
|
/*! \file command.hpp
|
|
* \brief Declarations for Event, Command and HostQueue objects.
|
|
*
|
|
* \author Laurent Morichetti (laurent.morichetti@amd.com)
|
|
* \date October 2008
|
|
*/
|
|
|
|
#ifndef COMMAND_HPP_
|
|
#define COMMAND_HPP_
|
|
|
|
#include "top.hpp"
|
|
#include "thread/monitor.hpp"
|
|
#include "thread/thread.hpp"
|
|
#include "platform/agent.hpp"
|
|
#include "platform/object.hpp"
|
|
#include "platform/context.hpp"
|
|
#include "platform/ndrange.hpp"
|
|
#include "platform/kernel.hpp"
|
|
#include "device/device.hpp"
|
|
#include "utils/concurrent.hpp"
|
|
#include "platform/memory.hpp"
|
|
#include "platform/perfctr.hpp"
|
|
#include "platform/threadtrace.hpp"
|
|
|
|
#include "CL/cl_ext.h"
|
|
|
|
#include <algorithm>
|
|
#include <atomic>
|
|
#include <functional>
|
|
#include <vector>
|
|
|
|
namespace amd {
|
|
|
|
/*! \addtogroup Runtime
|
|
* @{
|
|
*
|
|
* \addtogroup Commands Event, Commands and Command-Queue
|
|
* @{
|
|
*/
|
|
|
|
class Command;
|
|
class HostQueue;
|
|
|
|
/*! \brief Encapsulates the status of a command.
|
|
*
|
|
* \details An event object encapsulates the status of a Command
|
|
* it is associated with and can be used to synchronize operations
|
|
* in a Context.
|
|
*/
|
|
class Event : public RuntimeObject
|
|
{
|
|
typedef void (CL_CALLBACK * CallBackFunction)(
|
|
cl_event event, cl_int command_exec_status, void *user_data);
|
|
|
|
struct CallBackEntry : public HeapObject
|
|
{
|
|
struct CallBackEntry* next_; //!< the next entry in the callback list.
|
|
|
|
std::atomic<CallBackFunction> callback_; //!< callback function pointer.
|
|
void* data_; //!< user data passed to the callback function.
|
|
cl_int status_; //!< execution status triggering the callback.
|
|
|
|
CallBackEntry(cl_int status, CallBackFunction callback, void* data) :
|
|
callback_(callback), data_(data), status_(status)
|
|
{ }
|
|
};
|
|
|
|
public:
|
|
|
|
typedef std::vector<Event*> EventWaitList;
|
|
|
|
private:
|
|
|
|
Monitor lock_;
|
|
|
|
std::atomic<CallBackEntry*> callbacks_; //!< linked list of callback entries.
|
|
volatile cl_int status_; //!< current execution status.
|
|
std::atomic_flag notified_; //!< Command queue was notified
|
|
|
|
protected:
|
|
|
|
static const EventWaitList nullWaitList;
|
|
|
|
struct ProfilingInfo
|
|
{
|
|
ProfilingInfo(bool enabled = false) : enabled_(enabled)
|
|
{
|
|
if (enabled) {
|
|
clear();
|
|
callback_ = NULL;
|
|
}
|
|
}
|
|
|
|
uint64_t queued_;
|
|
uint64_t submitted_;
|
|
uint64_t start_;
|
|
uint64_t end_;
|
|
bool enabled_;
|
|
ProfilingCallback *callback_;
|
|
void clear() {
|
|
queued_ = 0ULL;
|
|
submitted_ = 0ULL;
|
|
start_ = 0ULL;
|
|
end_ = 0ULL;
|
|
}
|
|
void setCallback(ProfilingCallback *callback) {
|
|
if (callback == NULL) {
|
|
return;
|
|
}
|
|
enabled_ = true;
|
|
clear();
|
|
callback_ = callback;
|
|
}
|
|
|
|
} profilingInfo_;
|
|
|
|
//! Construct a new event.
|
|
Event();
|
|
|
|
//! Construct a new event associated to the given command \a queue.
|
|
Event(HostQueue& queue);
|
|
|
|
//! Destroy the event.
|
|
virtual ~Event();
|
|
|
|
//! Release the resources associated with this event.
|
|
virtual void releaseResources() { }
|
|
|
|
//! Record the profiling info for the given change of \a status.
|
|
// If the given \a timeStamp is 0 and profiling is enabled,
|
|
// use the current host clock time instead.
|
|
uint64_t recordProfilingInfo(cl_int status, uint64_t timeStamp = 0);
|
|
|
|
//! Process the callbacks for the given \a status change.
|
|
void processCallbacks(cl_int status) const;
|
|
|
|
public:
|
|
//! Return the context for this event.
|
|
virtual const Context& context() const = 0;
|
|
|
|
//! Return the command this event is associated with.
|
|
inline Command& command();
|
|
inline const Command& command() const;
|
|
|
|
//! Return the profiling info.
|
|
const ProfilingInfo& profilingInfo() const { return profilingInfo_; }
|
|
|
|
//! Return this command's execution status.
|
|
cl_int status() const { return status_; }
|
|
|
|
//! Insert the given \a callback into the callback stack.
|
|
bool setCallback(cl_int status, CallBackFunction callback, void* data);
|
|
|
|
/*! \brief Set the event status.
|
|
*
|
|
* \details If the status becomes CL_COMPLETE, notify all threads
|
|
* awaiting this command's completion. If the given \a timeStamp is 0
|
|
* and profiling is enabled, use the current host clock time instead.
|
|
*
|
|
* \see amd::Event::awaitCompletion
|
|
*/
|
|
bool setStatus(cl_int status, uint64_t timeStamp = 0);
|
|
|
|
//! Signal all threads waiting on this event.
|
|
void signal()
|
|
{
|
|
ScopedLock lock(lock_);
|
|
lock_.notifyAll();
|
|
}
|
|
|
|
/*! \brief Suspend the current thread until the status of the Command
|
|
* associated with this event changes to CL_COMPLETE. Return true if the
|
|
* command successfully completed.
|
|
*/
|
|
virtual bool awaitCompletion();
|
|
|
|
/*! \brief Notifies current command queue about execution status
|
|
*/
|
|
bool notifyCmdQueue();
|
|
|
|
//! RTTI internal implementation
|
|
virtual ObjectType objectType() const {return ObjectTypeEvent;}
|
|
};
|
|
|
|
/*! \brief An operation that is submitted to a command queue.
|
|
*
|
|
* %Command is the abstract base type of all OpenCL operations
|
|
* submitted to a HostQueue for execution. Classes derived from
|
|
* %Command must implement the submit() function.
|
|
*
|
|
|
|
*/
|
|
class Command : public Event
|
|
{
|
|
|
|
private:
|
|
//! The command queue this command is enqueue into. NULL if not yet enqueue.
|
|
HostQueue* queue_;
|
|
//! Next GPU command in the queue list
|
|
Command* next_;
|
|
|
|
const cl_command_type type_; //!< This command's OpenCL type.
|
|
volatile cl_int exception_; //!< The first raised exception.
|
|
void* data_;
|
|
|
|
protected:
|
|
//! The Events that need to complete before this command is submitted.
|
|
EventWaitList eventWaitList_;
|
|
|
|
//! Construct a new command of the given OpenCL type.
|
|
Command(
|
|
HostQueue& queue,
|
|
cl_command_type type,
|
|
const EventWaitList& eventWaitList = nullWaitList);
|
|
|
|
//! Construct a new command of the given OpenCL type.
|
|
Command(cl_command_type type) :
|
|
Event(), queue_(NULL), next_(NULL), type_(type),
|
|
exception_(0), data_(NULL), eventWaitList_(nullWaitList)
|
|
{ }
|
|
|
|
bool terminate() {
|
|
if (Agent::shouldPostEventEvents() && type() != 0) {
|
|
Agent::postEventFree(as_cl(static_cast<Event*>(this)));
|
|
}
|
|
return true;
|
|
}
|
|
|
|
public:
|
|
//! Return the queue this command is enqueued into.
|
|
HostQueue* queue() const { return queue_; }
|
|
|
|
//! Enqueue this command into the associated command queue.
|
|
void enqueue();
|
|
|
|
//! Return the event encapsulating this command's status.
|
|
const Event& event() const { return *this; }
|
|
Event& event() { return *this; }
|
|
|
|
//! Return the list of events this command needs to wait on before dispatch
|
|
const EventWaitList& eventWaitList() const { return eventWaitList_; }
|
|
|
|
//! Return this command's OpenCL type.
|
|
cl_command_type type() const { return type_; }
|
|
|
|
//! Return the first raised exception or 0 if none.
|
|
cl_int exception() const { return exception_; }
|
|
|
|
//! Set the exception for this command.
|
|
void setException(cl_int exception) { exception_ = exception; }
|
|
|
|
//! Return the opaque, device specific data for this command.
|
|
void* data() const { return data_; }
|
|
|
|
//! Set the opaque, device specific data for this command.
|
|
void setData(void* data) { data_ = data; }
|
|
|
|
/*! \brief The execution engine for this command.
|
|
*
|
|
* \details All derived class must implement this virtual function.
|
|
*
|
|
* \note This function will execute in the command queue thread.
|
|
*/
|
|
virtual void submit(device::VirtualDevice& device) = 0;
|
|
|
|
//! Release the resources associated with this event.
|
|
virtual void releaseResources();
|
|
|
|
//! Set the next GPU command
|
|
void setNext(Command* next) { next_ = next; }
|
|
|
|
//! Get the next GPU command
|
|
Command* getNext() const { return next_; }
|
|
|
|
//! Return the context for this event.
|
|
virtual const Context& context() const;
|
|
};
|
|
|
|
class UserEvent : public Command
|
|
{
|
|
const Context& context_;
|
|
|
|
public:
|
|
UserEvent(Context& context) : Command(CL_COMMAND_USER), context_(context) {
|
|
setStatus(CL_SUBMITTED);
|
|
}
|
|
|
|
virtual void submit(device::VirtualDevice& device) {
|
|
ShouldNotCallThis();
|
|
}
|
|
|
|
virtual const Context& context() const { return context_; }
|
|
};
|
|
|
|
class ClGlEvent : public Command
|
|
{
|
|
private:
|
|
const Context& context_;
|
|
bool waitForFence();
|
|
|
|
public:
|
|
ClGlEvent(Context& context)
|
|
: Command(CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR)
|
|
, context_(context) {
|
|
setStatus(CL_SUBMITTED);
|
|
}
|
|
|
|
virtual void submit(device::VirtualDevice& device) {
|
|
ShouldNotCallThis();
|
|
}
|
|
|
|
bool awaitCompletion() {
|
|
return waitForFence();
|
|
}
|
|
|
|
virtual const Context& context() const { return context_; }
|
|
};
|
|
|
|
inline Command&
|
|
Event::command()
|
|
{
|
|
return *static_cast<Command*>(this);
|
|
}
|
|
|
|
inline const Command&
|
|
Event::command() const
|
|
{
|
|
return *static_cast<const Command*>(this);
|
|
}
|
|
|
|
class Kernel;
|
|
class NDRangeContainer;
|
|
|
|
//! A memory command that holds a single memory object reference.
|
|
//
|
|
class OneMemoryArgCommand : public Command
|
|
{
|
|
protected:
|
|
Memory* memory_;
|
|
|
|
public:
|
|
OneMemoryArgCommand(
|
|
HostQueue& queue,
|
|
cl_command_type type,
|
|
const EventWaitList& eventWaitList,
|
|
Memory& memory) :
|
|
Command(queue, type, eventWaitList), memory_(&memory) {
|
|
memory_->retain();
|
|
}
|
|
|
|
virtual void releaseResources() {
|
|
memory_->release();
|
|
DEBUG_ONLY(memory_ = NULL);
|
|
Command::releaseResources();
|
|
}
|
|
|
|
bool validateMemory();
|
|
};
|
|
|
|
//! A memory command that holds a single memory object reference.
|
|
//
|
|
class TwoMemoryArgsCommand : public Command
|
|
{
|
|
protected:
|
|
Memory* memory1_;
|
|
Memory* memory2_;
|
|
|
|
public:
|
|
TwoMemoryArgsCommand(
|
|
HostQueue& queue,
|
|
cl_command_type type,
|
|
const EventWaitList& eventWaitList,
|
|
Memory& memory1, Memory& memory2) :
|
|
Command(queue, type, eventWaitList),
|
|
memory1_(&memory1), memory2_(&memory2) {
|
|
memory1_->retain();
|
|
memory2_->retain();
|
|
|
|
}
|
|
|
|
virtual void releaseResources() {
|
|
memory1_->release();
|
|
memory2_->release();
|
|
DEBUG_ONLY(memory1_ = memory2_ = NULL);
|
|
Command::releaseResources();
|
|
}
|
|
|
|
bool validateMemory();
|
|
};
|
|
|
|
/*! \brief A generic read memory command.
|
|
*
|
|
* \details Used for operations on both buffers and images. Backends
|
|
* are expected to handle any required translation. Buffers
|
|
* are treated as 1D structures so origin_[0] and size_[0]
|
|
* are equivalent to offset_ and count_ respectively.
|
|
*
|
|
* @todo Find a cleaner way of merging the row and slice pitch concepts at this level.
|
|
*
|
|
*/
|
|
|
|
class ReadMemoryCommand : public OneMemoryArgCommand
|
|
{
|
|
private:
|
|
Coord3D origin_; //!< Origin of the region to read.
|
|
Coord3D size_; //!< Size of the region to read.
|
|
void* hostPtr_; //!< The host pointer destination.
|
|
size_t rowPitch_; //!< Row pitch (for image operations)
|
|
size_t slicePitch_; //!< Slice pitch (for image operations)
|
|
|
|
BufferRect bufRect_; //!< Buffer rectangle information
|
|
BufferRect hostRect_; //!< Host memory rectangle information
|
|
|
|
public:
|
|
//! Construct a new ReadMemoryCommand
|
|
ReadMemoryCommand(
|
|
HostQueue& queue,
|
|
cl_command_type cmdType,
|
|
const EventWaitList& eventWaitList,
|
|
Memory& memory, Coord3D origin,
|
|
Coord3D size, void* hostPtr,
|
|
size_t rowPitch = 0, size_t slicePitch = 0) :
|
|
OneMemoryArgCommand(queue, cmdType, eventWaitList, memory),
|
|
origin_(origin), size_(size), hostPtr_(hostPtr),
|
|
rowPitch_(rowPitch), slicePitch_(slicePitch)
|
|
{
|
|
// Sanity checks
|
|
assert(hostPtr != NULL && "hostPtr cannot be null");
|
|
assert(size.c[0] > 0 && "invalid");
|
|
}
|
|
|
|
//! Construct a new ReadMemoryCommand
|
|
ReadMemoryCommand(
|
|
HostQueue& queue,
|
|
cl_command_type cmdType,
|
|
const EventWaitList& eventWaitList,
|
|
Memory& memory, Coord3D origin,
|
|
Coord3D size, void* hostPtr,
|
|
const BufferRect& bufRect,
|
|
const BufferRect& hostRect)
|
|
: OneMemoryArgCommand(queue, cmdType, eventWaitList, memory)
|
|
, origin_(origin)
|
|
, size_(size)
|
|
, hostPtr_(hostPtr)
|
|
, rowPitch_(0)
|
|
, slicePitch_(0)
|
|
, bufRect_(bufRect)
|
|
, hostRect_(hostRect)
|
|
{
|
|
// Sanity checks
|
|
assert(hostPtr != NULL && "hostPtr cannot be null");
|
|
assert(size.c[0] > 0 && "invalid");
|
|
}
|
|
|
|
virtual void submit(device::VirtualDevice& device) {
|
|
device.submitReadMemory(*this);
|
|
}
|
|
|
|
//! Return the memory object to read from.
|
|
Memory& source() const { return *memory_; }
|
|
//! Return the host memory to write to
|
|
void* destination() const { return hostPtr_; }
|
|
|
|
//! Return the origin of the region to read
|
|
const Coord3D& origin() const { return origin_; }
|
|
//! Return the size of the region to read
|
|
const Coord3D& size() const { return size_; }
|
|
//! Return the row pitch
|
|
size_t rowPitch() const { return rowPitch_; }
|
|
//! Return the slice pitch
|
|
size_t slicePitch() const { return slicePitch_; }
|
|
|
|
//! Return the buffer rectangle information
|
|
const BufferRect& bufRect() const { return bufRect_; }
|
|
//! Return the host rectangle information
|
|
const BufferRect& hostRect() const { return hostRect_; }
|
|
|
|
//! Return true if the entire memory object is read.
|
|
bool isEntireMemory() const;
|
|
};
|
|
|
|
/*! \brief A generic write memory command.
|
|
*
|
|
* \details Used for operations on both buffers and images. Backends
|
|
* are expected to handle any required translations. Buffers
|
|
* are treated as 1D structures so origin_[0] and size_[0]
|
|
* are equivalent to offset_ and count_ respectively.
|
|
*/
|
|
|
|
class WriteMemoryCommand : public OneMemoryArgCommand
|
|
{
|
|
private:
|
|
Coord3D origin_; //!< Origin of the region to write to.
|
|
Coord3D size_; //!< Size of the region to write to.
|
|
const void* hostPtr_; //!< The host pointer source.
|
|
size_t rowPitch_; //!< Row pitch (for image operations)
|
|
size_t slicePitch_; //!< Slice pitch (for image operations)
|
|
|
|
BufferRect bufRect_; //!< Buffer rectangle information
|
|
BufferRect hostRect_; //!< Host memory rectangle information
|
|
|
|
public:
|
|
WriteMemoryCommand(
|
|
HostQueue& queue,
|
|
cl_command_type cmdType,
|
|
const EventWaitList& eventWaitList,
|
|
Memory& memory, Coord3D origin,
|
|
Coord3D size, const void* hostPtr,
|
|
size_t rowPitch = 0, size_t slicePitch = 0) :
|
|
OneMemoryArgCommand(queue, cmdType, eventWaitList, memory),
|
|
origin_(origin), size_(size), hostPtr_(hostPtr),
|
|
rowPitch_(rowPitch), slicePitch_(slicePitch)
|
|
{
|
|
// Sanity checks
|
|
assert(hostPtr != NULL && "hostPtr cannot be null");
|
|
assert(size.c[0] > 0 && "invalid");
|
|
}
|
|
|
|
WriteMemoryCommand(
|
|
HostQueue& queue,
|
|
cl_command_type cmdType,
|
|
const EventWaitList& eventWaitList,
|
|
Memory& memory, Coord3D origin,
|
|
Coord3D size, const void* hostPtr,
|
|
const BufferRect& bufRect,
|
|
const BufferRect& hostRect)
|
|
: OneMemoryArgCommand(queue, cmdType, eventWaitList, memory),
|
|
origin_(origin), size_(size), hostPtr_(hostPtr),
|
|
rowPitch_(0), slicePitch_(0),
|
|
bufRect_(bufRect), hostRect_(hostRect)
|
|
{
|
|
// Sanity checks
|
|
assert(hostPtr != NULL && "hostPtr cannot be null");
|
|
assert(size.c[0] > 0 && "invalid");
|
|
}
|
|
|
|
virtual void submit(device::VirtualDevice& device) {
|
|
device.submitWriteMemory(*this);
|
|
}
|
|
|
|
//! Return the host memory to read from
|
|
const void* source() const { return hostPtr_; }
|
|
//! Return the memory object to write to.
|
|
Memory& destination() const { return *memory_; }
|
|
|
|
//! Return the region origin
|
|
const Coord3D& origin() const { return origin_; }
|
|
//! Return the region size
|
|
const Coord3D& size() const { return size_; }
|
|
//! Return the row pitch
|
|
size_t rowPitch() const { return rowPitch_; }
|
|
//! Return the slice pitch
|
|
size_t slicePitch() const { return slicePitch_; }
|
|
|
|
//! Return the buffer rectangle information
|
|
const BufferRect& bufRect() const { return bufRect_; }
|
|
//! Return the host rectangle information
|
|
const BufferRect& hostRect() const { return hostRect_; }
|
|
|
|
//! Return true if the entire memory object is written.
|
|
bool isEntireMemory() const;
|
|
};
|
|
|
|
/*! \brief A generic fill memory command.
|
|
*
|
|
* \details Used for operations on both buffers and images. Backends
|
|
* are expected to handle any required translations. Buffers
|
|
* are treated as 1D structures so origin_[0] and size_[0]
|
|
* are equivalent to offset_ and count_ respectively.
|
|
*/
|
|
|
|
class FillMemoryCommand : public OneMemoryArgCommand
|
|
{
|
|
public:
|
|
const static size_t MaxFillPatterSize = sizeof(cl_double16);
|
|
|
|
private:
|
|
Coord3D origin_; //!< Origin of the region to write to.
|
|
Coord3D size_; //!< Size of the region to write to.
|
|
char pattern_[MaxFillPatterSize]; //!< The fill pattern
|
|
size_t patternSize_; //!< Pattern size
|
|
|
|
public:
|
|
FillMemoryCommand(
|
|
HostQueue& queue,
|
|
cl_command_type cmdType,
|
|
const EventWaitList& eventWaitList,
|
|
Memory& memory,
|
|
const void* pattern, size_t patternSize,
|
|
Coord3D origin, Coord3D size)
|
|
: OneMemoryArgCommand(queue, cmdType, eventWaitList, memory)
|
|
, origin_(origin)
|
|
, size_(size)
|
|
, patternSize_(patternSize)
|
|
{
|
|
// Sanity checks
|
|
assert(pattern != NULL && "pattern cannot be null");
|
|
assert(size.c[0] > 0 && "invalid");
|
|
memcpy(pattern_, pattern, patternSize);
|
|
}
|
|
|
|
virtual void submit(device::VirtualDevice& device) {
|
|
device.submitFillMemory(*this);
|
|
}
|
|
|
|
//! Return the pattern memory to fill with
|
|
const void* pattern() const { return reinterpret_cast<const void*>(pattern_); }
|
|
//! Return the pattern size
|
|
const size_t patternSize() const { return patternSize_; }
|
|
//! Return the memory object to write to.
|
|
Memory& memory() const { return *memory_; }
|
|
|
|
//! Return the region origin
|
|
const Coord3D& origin() const { return origin_; }
|
|
//! Return the region size
|
|
const Coord3D& size() const { return size_; }
|
|
|
|
//! Return true if the entire memory object is written.
|
|
bool isEntireMemory() const;
|
|
};
|
|
|
|
/*! \brief A generic copy memory command
|
|
*
|
|
* \details Used for both buffers and images. Backends are expected
|
|
* to handle any required translation. Buffers are treated
|
|
* as 1D structures so origin_[0] and size_[0] are
|
|
* equivalent to offset_ and count_ respectively.
|
|
*/
|
|
|
|
class CopyMemoryCommand : public TwoMemoryArgsCommand
|
|
{
|
|
private:
|
|
Coord3D srcOrigin_; //!< Origin of the source region.
|
|
Coord3D dstOrigin_; //!< Origin of the destination region.
|
|
Coord3D size_; //!< Size of the region to copy.
|
|
|
|
BufferRect srcRect_; //!< Source buffer rectangle information
|
|
BufferRect dstRect_; //!< Destination buffer rectangle information
|
|
|
|
public:
|
|
CopyMemoryCommand(
|
|
HostQueue& queue,
|
|
cl_command_type cmdType,
|
|
const EventWaitList& eventWaitList,
|
|
Memory& srcMemory, Memory& dstMemory,
|
|
Coord3D srcOrigin, Coord3D dstOrigin,
|
|
Coord3D size)
|
|
: TwoMemoryArgsCommand(
|
|
queue, cmdType, eventWaitList, srcMemory, dstMemory),
|
|
srcOrigin_(srcOrigin), dstOrigin_(dstOrigin), size_(size)
|
|
{
|
|
// Sanity checks
|
|
assert(size.c[0] > 0 && "invalid");
|
|
}
|
|
|
|
CopyMemoryCommand(
|
|
HostQueue& queue,
|
|
cl_command_type cmdType,
|
|
const EventWaitList& eventWaitList,
|
|
Memory& srcMemory, Memory& dstMemory,
|
|
Coord3D srcOrigin, Coord3D dstOrigin,
|
|
Coord3D size,
|
|
const BufferRect& srcRect,
|
|
const BufferRect& dstRect)
|
|
: TwoMemoryArgsCommand(
|
|
queue, cmdType, eventWaitList, srcMemory, dstMemory),
|
|
srcOrigin_(srcOrigin), dstOrigin_(dstOrigin), size_(size),
|
|
srcRect_(srcRect), dstRect_(dstRect)
|
|
{
|
|
// Sanity checks
|
|
assert(size.c[0] > 0 && "invalid");
|
|
}
|
|
|
|
virtual void submit(device::VirtualDevice& device) {
|
|
device.submitCopyMemory(*this);
|
|
}
|
|
|
|
//! Return the host memory to read from
|
|
Memory& source() const { return *memory1_; }
|
|
//! Return the memory object to write to.
|
|
Memory& destination() const { return *memory2_; }
|
|
|
|
//! Return the source origin
|
|
const Coord3D& srcOrigin() const { return srcOrigin_; }
|
|
//! Return the offset in bytes in the destination.
|
|
const Coord3D& dstOrigin() const { return dstOrigin_; }
|
|
//! Return the number of bytes to copy.
|
|
const Coord3D& size() const { return size_; }
|
|
|
|
//! Return the source buffer rectangle information
|
|
const BufferRect& srcRect() const { return srcRect_; }
|
|
//! Return the destination buffer rectangle information
|
|
const BufferRect& dstRect() const { return dstRect_; }
|
|
|
|
//! Return true if the both memories are is read/written in their entirety.
|
|
bool isEntireMemory() const;
|
|
};
|
|
|
|
/*! \brief A generic map memory command. Makes a memory object accessible to the host.
|
|
*
|
|
* @todo:dgladdin Need to think more about how the pitch parameters operate in
|
|
* the context of unified buffer/image commands.
|
|
*/
|
|
|
|
class MapMemoryCommand: public OneMemoryArgCommand
|
|
{
|
|
private:
|
|
cl_map_flags mapFlags_; //!< Flags controlling the map.
|
|
bool blocking_; //!< True for blocking maps
|
|
Coord3D origin_; //!< Origin of the region to map.
|
|
Coord3D size_; //!< Size of the region to map.
|
|
const void* mapPtr_; //!< Host-space pointer that the object is currently mapped at
|
|
|
|
public:
|
|
//! Construct a new MapMemoryCommand
|
|
MapMemoryCommand(
|
|
HostQueue& queue,
|
|
cl_command_type cmdType,
|
|
const EventWaitList& eventWaitList,
|
|
Memory& memory, cl_map_flags mapFlags,
|
|
bool blocking,
|
|
Coord3D origin, Coord3D size,
|
|
size_t* imgRowPitch = nullptr,
|
|
size_t* imgSlicePitch = nullptr,
|
|
void* mapPtr = nullptr) :
|
|
OneMemoryArgCommand(queue, cmdType, eventWaitList, memory),
|
|
mapFlags_(mapFlags), blocking_(blocking),
|
|
origin_(origin), size_(size), mapPtr_(mapPtr)
|
|
{
|
|
// Sanity checks
|
|
assert(size.c[0] > 0 && "invalid");
|
|
}
|
|
|
|
virtual void submit(device::VirtualDevice& device) {
|
|
device.submitMapMemory(*this);
|
|
}
|
|
|
|
//! Read the memory object
|
|
Memory& memory() const { return *memory_; }
|
|
//! Read the map control flags
|
|
cl_map_flags mapFlags() const { return mapFlags_; }
|
|
//! Read the origin
|
|
const Coord3D& origin() const { return origin_; }
|
|
//! Read the size
|
|
const Coord3D& size() const { return size_; }
|
|
//! Read the blocking flag
|
|
bool blocking() const { return blocking_; }
|
|
//! Returns true if the entire memory object is mapped
|
|
bool isEntireMemory() const;
|
|
//! Read the map pointer
|
|
const void* mapPtr() const { return mapPtr_; }
|
|
|
|
};
|
|
|
|
|
|
|
|
/*! \brief A generic unmap memory command.
|
|
*
|
|
* @todo:dgladdin Need to think more about how the pitch parameters operate in
|
|
* the context of unified buffer/image commands.
|
|
*/
|
|
|
|
class UnmapMemoryCommand: public OneMemoryArgCommand
|
|
{
|
|
private:
|
|
//! Host-space pointer that the object is currently mapped at
|
|
void* mapPtr_;
|
|
|
|
public:
|
|
//! Construct a new MapMemoryCommand
|
|
UnmapMemoryCommand(
|
|
HostQueue& queue,
|
|
cl_command_type cmdType,
|
|
const EventWaitList& eventWaitList,
|
|
Memory& memory, void* mapPtr) :
|
|
OneMemoryArgCommand(queue, cmdType, eventWaitList, memory),
|
|
mapPtr_(mapPtr)
|
|
{ }
|
|
|
|
virtual void submit(device::VirtualDevice& device) {
|
|
device.submitUnmapMemory(*this);
|
|
}
|
|
|
|
virtual void releaseResources();
|
|
|
|
//! Read the memory object
|
|
Memory& memory() const { return *memory_; }
|
|
//! Read the map pointer
|
|
void* mapPtr() const { return mapPtr_; }
|
|
};
|
|
|
|
/*! \brief Migrate memory objects command.
|
|
*
|
|
* \details Used for operations on both buffers and images. Backends
|
|
* are expected to handle any required translations.
|
|
*/
|
|
class MigrateMemObjectsCommand: public Command
|
|
{
|
|
private:
|
|
cl_mem_migration_flags migrationFlags_; //!< Migration flags
|
|
std::vector<amd::Memory*> memObjects_; //!< The list of memory objects
|
|
|
|
public:
|
|
//! Construct a new AcquireExtObjectsCommand
|
|
MigrateMemObjectsCommand(
|
|
HostQueue& queue,
|
|
cl_command_type type,
|
|
const EventWaitList& eventWaitList,
|
|
const std::vector<amd::Memory*>& memObjects,
|
|
cl_mem_migration_flags flags)
|
|
: Command(queue, type, eventWaitList)
|
|
, migrationFlags_(flags)
|
|
{
|
|
std::vector<amd::Memory*>::const_iterator itr;
|
|
for (itr = memObjects.begin(); itr != memObjects.end(); itr++) {
|
|
(*itr)->retain();
|
|
memObjects_.push_back(*itr);
|
|
}
|
|
}
|
|
|
|
virtual void submit(device::VirtualDevice& device) {
|
|
device.submitMigrateMemObjects(*this);
|
|
}
|
|
|
|
//! Release all resources associated with this command
|
|
void releaseResources() {
|
|
std::vector<amd::Memory*>::const_iterator itr;
|
|
for (itr = memObjects_.begin(); itr != memObjects_.end(); itr++) {
|
|
(*itr)->release();
|
|
}
|
|
Command::releaseResources();
|
|
}
|
|
|
|
//! Returns the migration flags
|
|
cl_mem_migration_flags migrationFlags() const { return migrationFlags_; }
|
|
//! Returns the number of memory objects in the command
|
|
cl_uint numMemObjects() const { return (cl_uint) memObjects_.size(); }
|
|
//! Returns a pointer to the memory objects
|
|
const std::vector<amd::Memory*>& memObjects() const { return memObjects_; }
|
|
|
|
bool validateMemory();
|
|
};
|
|
|
|
//! To execute a kernel on a specific device.
|
|
class NDRangeKernelCommand : public Command
|
|
{
|
|
private:
|
|
Kernel& kernel_;
|
|
NDRangeContainer sizes_;
|
|
address parameters_;
|
|
|
|
public:
|
|
//! Construct an ExecuteKernel command
|
|
NDRangeKernelCommand(
|
|
HostQueue& queue,
|
|
const EventWaitList& eventWaitList,
|
|
Kernel& kernel,
|
|
const NDRangeContainer& sizes);
|
|
|
|
virtual void submit(device::VirtualDevice& device) {
|
|
device.submitKernel(*this);
|
|
}
|
|
|
|
//! Release all resources associated with this command (
|
|
void releaseResources();
|
|
|
|
//! Return the kernel.
|
|
const Kernel& kernel() const { return kernel_; }
|
|
|
|
//! Return the parameters given to this kernel.
|
|
const_address parameters() const { return parameters_; }
|
|
|
|
//! Return the kernel NDRange.
|
|
const NDRangeContainer& sizes() const { return sizes_; }
|
|
|
|
//! Set the local work size.
|
|
void setLocalWorkSize(const NDRange& local) { sizes_.local() = local; }
|
|
|
|
cl_int validateMemory();
|
|
};
|
|
|
|
class NativeFnCommand : public Command
|
|
{
|
|
private:
|
|
void (CL_CALLBACK *nativeFn_)(void *);
|
|
|
|
char* args_;
|
|
size_t argsSize_;
|
|
|
|
std::vector<Memory*> memObjects_;
|
|
std::vector<size_t> memOffsets_;
|
|
|
|
public:
|
|
NativeFnCommand(
|
|
HostQueue& queue, const EventWaitList& eventWaitList,
|
|
void (CL_CALLBACK * nativeFn)(void*), const void* args, size_t argsSize,
|
|
size_t numMemObjs, const cl_mem* memObjs, const void** memLocs);
|
|
|
|
~NativeFnCommand() {
|
|
delete[] args_;
|
|
}
|
|
|
|
void releaseResources() {
|
|
std::for_each(memObjects_.begin(), memObjects_.end(),
|
|
std::mem_fun(&Memory::release));
|
|
Command::releaseResources();
|
|
}
|
|
|
|
virtual void submit(device::VirtualDevice& device) {
|
|
device.submitNativeFn(*this);
|
|
}
|
|
|
|
cl_int invoke();
|
|
};
|
|
|
|
class Marker : public Command
|
|
{
|
|
public:
|
|
//! Create a new Marker
|
|
Marker(
|
|
HostQueue& queue, bool userVisible,
|
|
const EventWaitList& eventWaitList = nullWaitList, const Event* waitingEvent = NULL)
|
|
: Command(queue, userVisible ? CL_COMMAND_MARKER : 0, eventWaitList)
|
|
, waitingEvent_(waitingEvent)
|
|
{ }
|
|
|
|
//! The actual command implementation.
|
|
virtual void submit(device::VirtualDevice& device) {
|
|
device.submitMarker(*this);
|
|
}
|
|
|
|
const Event* waitingEvent() const { return waitingEvent_; }
|
|
|
|
private:
|
|
const Event* waitingEvent_; //!< Waiting event associated with the marker
|
|
};
|
|
|
|
/*! \brief Maps CL objects created from external ones and syncs the contents (blocking).
|
|
*
|
|
*/
|
|
|
|
class ExtObjectsCommand: public Command
|
|
{
|
|
private:
|
|
std::vector<amd::Memory*> memObjects_; //!< The list of Memory based classes
|
|
|
|
public:
|
|
//! Construct a new AcquireExtObjectsCommand
|
|
ExtObjectsCommand(
|
|
HostQueue& queue,
|
|
const EventWaitList& eventWaitList,
|
|
cl_uint num_objects,
|
|
const std::vector<amd::Memory*>& memoryObjects,
|
|
cl_command_type type) :
|
|
Command(queue, type, eventWaitList)
|
|
{
|
|
for(std::vector<amd::Memory*>::const_iterator itr = memoryObjects.begin();
|
|
itr != memoryObjects.end(); itr++) {
|
|
(*itr)->retain();
|
|
memObjects_.push_back(*itr);
|
|
}
|
|
}
|
|
|
|
//! Release all resources associated with this command
|
|
void releaseResources() {
|
|
for(std::vector<amd::Memory*>::const_iterator itr = memObjects_.begin();
|
|
itr != memObjects_.end(); itr++) {
|
|
(*itr)->release();
|
|
}
|
|
Command::releaseResources();
|
|
}
|
|
|
|
//! Get number of GL objects
|
|
cl_uint getNumObjects() {return (cl_uint) memObjects_.size();}
|
|
//! Get pointer to GL object list
|
|
const std::vector<amd::Memory*>& getMemList() const {return memObjects_;}
|
|
bool validateMemory();
|
|
virtual bool processGLResource(device::Memory * mem) = 0 ;
|
|
|
|
};
|
|
|
|
class AcquireExtObjectsCommand: public ExtObjectsCommand
|
|
{
|
|
public:
|
|
//! Construct a new AcquireExtObjectsCommand
|
|
AcquireExtObjectsCommand(
|
|
HostQueue& queue,
|
|
const EventWaitList& eventWaitList,
|
|
cl_uint num_objects,
|
|
const std::vector<amd::Memory*>& memoryObjects,
|
|
cl_command_type type) :
|
|
ExtObjectsCommand(queue, eventWaitList, num_objects,
|
|
memoryObjects, type)
|
|
{
|
|
}
|
|
|
|
virtual void submit(device::VirtualDevice& device) {
|
|
device.submitAcquireExtObjects(*this);
|
|
}
|
|
|
|
virtual bool processGLResource(device::Memory * mem);
|
|
};
|
|
|
|
class ReleaseExtObjectsCommand: public ExtObjectsCommand
|
|
{
|
|
public:
|
|
//! Construct a new ReleaseExtObjectsCommand
|
|
ReleaseExtObjectsCommand(
|
|
HostQueue& queue,
|
|
const EventWaitList& eventWaitList,
|
|
cl_uint num_objects,
|
|
const std::vector<amd::Memory*>& memoryObjects,
|
|
cl_command_type type) :
|
|
ExtObjectsCommand(queue, eventWaitList, num_objects,
|
|
memoryObjects, type)
|
|
{
|
|
}
|
|
|
|
virtual void submit(device::VirtualDevice& device) {
|
|
device.submitReleaseExtObjects(*this);
|
|
}
|
|
|
|
virtual bool processGLResource(device::Memory * mem);
|
|
};
|
|
|
|
class PerfCounterCommand : public Command
|
|
{
|
|
public:
|
|
typedef std::vector<PerfCounter*> PerfCounterList;
|
|
|
|
enum State
|
|
{
|
|
Begin = 0, //!< Issue a begin command
|
|
End = 1 //!< Issue an end command
|
|
};
|
|
|
|
//! Construct a new PerfCounterCommand
|
|
PerfCounterCommand(
|
|
HostQueue& queue,
|
|
const EventWaitList& eventWaitList,
|
|
const PerfCounterList& counterList,
|
|
State state)
|
|
: Command(queue, 0, eventWaitList)
|
|
, counterList_(counterList)
|
|
, state_(state)
|
|
{
|
|
for (uint i = 0; i < counterList_.size(); ++i) {
|
|
counterList_[i]->retain();
|
|
}
|
|
}
|
|
|
|
void releaseResources() {
|
|
for (uint i = 0; i < counterList_.size(); ++i) {
|
|
counterList_[i]->release();
|
|
}
|
|
Command::releaseResources();
|
|
}
|
|
|
|
//! Gets the number of PerfCounter objects
|
|
size_t getNumCounters() const { return counterList_.size(); }
|
|
|
|
//! Gets the list of all counters
|
|
const PerfCounterList& getCounters() const { return counterList_; }
|
|
|
|
//! Gets the performance counter state
|
|
State getState() const { return state_; }
|
|
|
|
//! Process the command on the device queue
|
|
virtual void submit(device::VirtualDevice& device) {
|
|
device.submitPerfCounter(*this);
|
|
}
|
|
|
|
private:
|
|
PerfCounterList counterList_; //!< The list of performance counters
|
|
State state_; //!< State of the issued command
|
|
};
|
|
|
|
/*! \brief Thread Trace memory objects command.
|
|
*
|
|
* \details Used for bindig memory objects to therad trace mechanism.
|
|
*/
|
|
class ThreadTraceMemObjectsCommand: public Command
|
|
{
|
|
public:
|
|
//! Construct a new ThreadTraceMemObjectsCommand
|
|
ThreadTraceMemObjectsCommand(
|
|
HostQueue& queue,
|
|
const EventWaitList& eventWaitList,
|
|
size_t numMemoryObjects,
|
|
const cl_mem* memoryObjects,
|
|
size_t sizeMemoryObject,
|
|
ThreadTrace& threadTrace,
|
|
cl_command_type type) :
|
|
Command(queue, type, eventWaitList),
|
|
sizeMemObjects_(sizeMemoryObject),
|
|
threadTrace_(threadTrace)
|
|
{
|
|
memObjects_.resize(numMemoryObjects);
|
|
for (size_t i = 0; i < numMemoryObjects; ++i) {
|
|
Memory* obj = as_amd(memoryObjects[i]);
|
|
obj->retain();
|
|
memObjects_[i] = obj;
|
|
}
|
|
threadTrace_.retain();
|
|
}
|
|
//! Release all resources associated with this command
|
|
void releaseResources() {
|
|
threadTrace_.release();
|
|
for(std::vector<amd::Memory*>::const_iterator itr = memObjects_.begin();
|
|
itr != memObjects_.end(); itr++) {
|
|
(*itr)->release();
|
|
}
|
|
Command::releaseResources();
|
|
}
|
|
|
|
//! Get number of CL memory objects
|
|
cl_uint getNumObjects() {return (cl_uint) memObjects_.size();}
|
|
|
|
//! Get pointer to CL memory object list
|
|
const std::vector<amd::Memory*>& getMemList() const {return memObjects_;}
|
|
|
|
//! Submit command to bind memory object to the Thread Trace mechanism
|
|
virtual void submit(device::VirtualDevice& device) {
|
|
device.submitThreadTraceMemObjects(*this);
|
|
}
|
|
|
|
//! Return the thread trace object.
|
|
ThreadTrace& getThreadTrace() const { return threadTrace_; }
|
|
|
|
//! Get memory object size
|
|
const size_t getMemoryObjectSize() const {return sizeMemObjects_;}
|
|
|
|
//! Validate memory bound to the thread thrace
|
|
bool validateMemory();
|
|
private:
|
|
std::vector<amd::Memory*> memObjects_; //!< The list of memory objects,bound to the thread trace
|
|
size_t sizeMemObjects_; //!< The size of each memory object from memObjects_ list (all memory objects have the smae size)
|
|
ThreadTrace& threadTrace_; //!< The Thread Trace object
|
|
};
|
|
|
|
/*! \brief Thread Trace command.
|
|
*
|
|
* \details Used for issue begin/end/pause/resume for therad trace object.
|
|
*/
|
|
class ThreadTraceCommand : public Command
|
|
{
|
|
private:
|
|
void *threadTraceConfig_;
|
|
public:
|
|
|
|
enum State
|
|
{
|
|
Begin = 0, //!< Issue a begin command
|
|
End = 1, //!< Issue an end command
|
|
Pause = 2, //!< Issue a pause command
|
|
Resume = 3 //!< Issue a resume command
|
|
};
|
|
|
|
//! Construct a new ThreadTraceCommand
|
|
ThreadTraceCommand(
|
|
HostQueue& queue,
|
|
const EventWaitList& eventWaitList,
|
|
const void *threadTraceConfig,
|
|
ThreadTrace& threadTrace,
|
|
State state,
|
|
cl_command_type type)
|
|
: Command(queue, type, eventWaitList)
|
|
, threadTrace_(threadTrace)
|
|
, state_(state)
|
|
{
|
|
const unsigned int size = *static_cast<const unsigned int*>(threadTraceConfig);
|
|
threadTraceConfig_ = static_cast<void*>(new char[size]);
|
|
if (threadTraceConfig_) {
|
|
memcpy(threadTraceConfig_, threadTraceConfig, size);
|
|
}
|
|
threadTrace_.retain();
|
|
}
|
|
|
|
//! Release all resources associated with this command
|
|
void releaseResources() {
|
|
threadTrace_.release();
|
|
Command::releaseResources();
|
|
}
|
|
|
|
//! Get the thread trace object
|
|
ThreadTrace& getThreadTrace() const { return threadTrace_; }
|
|
|
|
//! Get the thread trace command state
|
|
State getState() const { return state_; }
|
|
|
|
//! Process the command on the device queue
|
|
virtual void submit(device::VirtualDevice& device) {
|
|
device.submitThreadTrace(*this);
|
|
}
|
|
// Accessor methods
|
|
void* threadTraceConfig() const { return threadTraceConfig_; }
|
|
|
|
private:
|
|
ThreadTrace& threadTrace_; //!< The list of performance counters
|
|
State state_; //!< State of the issued command
|
|
};
|
|
|
|
class SignalCommand:public OneMemoryArgCommand
|
|
{
|
|
|
|
private:
|
|
cl_uint markerValue_;
|
|
cl_ulong markerOffset_;
|
|
|
|
public:
|
|
|
|
SignalCommand(
|
|
HostQueue& queue,
|
|
cl_command_type cmdType,
|
|
const EventWaitList& eventWaitList,
|
|
Memory& memory,
|
|
cl_uint value,
|
|
cl_ulong offset = 0):
|
|
OneMemoryArgCommand(queue, cmdType, eventWaitList, memory),
|
|
markerValue_(value),
|
|
markerOffset_(offset)
|
|
{
|
|
|
|
}
|
|
|
|
virtual void submit(device::VirtualDevice& device)
|
|
{
|
|
device.submitSignal(*this);
|
|
}
|
|
|
|
const cl_uint markerValue() {return markerValue_;}
|
|
Memory& memory() {return *memory_;}
|
|
const cl_ulong markerOffset() {return markerOffset_;}
|
|
|
|
};
|
|
|
|
class MakeBuffersResidentCommand: public Command
|
|
{
|
|
private:
|
|
std::vector<amd::Memory*> memObjects_;
|
|
cl_bus_address_amd* busAddresses_;
|
|
|
|
public:
|
|
MakeBuffersResidentCommand(
|
|
HostQueue& queue,
|
|
cl_command_type type,
|
|
const EventWaitList& eventWaitList,
|
|
const std::vector<amd::Memory*>& memObjects,
|
|
cl_bus_address_amd* busAddr)
|
|
: Command(queue, type, eventWaitList),
|
|
busAddresses_(busAddr)
|
|
{
|
|
std::vector<amd::Memory*>::const_iterator itr;
|
|
for (itr = memObjects.begin(); itr != memObjects.end(); itr++) {
|
|
(*itr)->retain();
|
|
memObjects_.push_back(*itr);
|
|
}
|
|
}
|
|
|
|
virtual void submit(device::VirtualDevice& device)
|
|
{
|
|
device.submitMakeBuffersResident(*this);
|
|
}
|
|
|
|
void releaseResources()
|
|
{
|
|
std::vector<amd::Memory*>::const_iterator itr;
|
|
for (itr = memObjects_.begin(); itr != memObjects_.end(); itr++) {
|
|
(*itr)->release();
|
|
}
|
|
Command::releaseResources();
|
|
}
|
|
|
|
bool validateMemory();
|
|
const std::vector<amd::Memory*>& memObjects() const { return memObjects_; }
|
|
cl_bus_address_amd* busAddress() const {return busAddresses_;}
|
|
|
|
};
|
|
|
|
//! A deallocation command used to free SVM or system pointers.
|
|
class SvmFreeMemoryCommand : public Command
|
|
{
|
|
public:
|
|
typedef void (CL_CALLBACK *freeCallBack)
|
|
(cl_command_queue, cl_uint, void**, void*);
|
|
|
|
private:
|
|
std::vector<void*> svmPointers_; //!< List of pointers to deallocate
|
|
freeCallBack pfnFreeFunc_; //!< User-defined deallocation callback
|
|
void* userData_; //!< Data passed to user-defined callback
|
|
|
|
public:
|
|
SvmFreeMemoryCommand(
|
|
HostQueue& queue,
|
|
const EventWaitList& eventWaitList,
|
|
cl_uint numSvmPointers,
|
|
void** svmPointers,
|
|
freeCallBack pfnFreeFunc,
|
|
void* userData) :
|
|
Command(queue, CL_COMMAND_SVM_FREE, eventWaitList),
|
|
//! We copy svmPointers since it can be reused/deallocated after
|
|
// command creation
|
|
svmPointers_(svmPointers, svmPointers + numSvmPointers),
|
|
pfnFreeFunc_(pfnFreeFunc),
|
|
userData_(userData) { }
|
|
|
|
virtual void submit(device::VirtualDevice& device)
|
|
{
|
|
device.submitSvmFreeMemory(*this);
|
|
}
|
|
|
|
std::vector<void*>& svmPointers() { return svmPointers_; }
|
|
|
|
freeCallBack pfnFreeFunc() const { return pfnFreeFunc_; }
|
|
|
|
void* userData() const { return userData_; }
|
|
};
|
|
|
|
//! A copy command where the origin and destination memory locations are SVM
|
|
// pointers.
|
|
class SvmCopyMemoryCommand : public Command
|
|
{
|
|
private:
|
|
void* dst_; //!< Destination pointer
|
|
const void* src_; //!< Source pointer
|
|
size_t srcSize_; //!< Size (in bytes) of the source buffer
|
|
|
|
public:
|
|
SvmCopyMemoryCommand(
|
|
HostQueue& queue,
|
|
const EventWaitList& eventWaitList,
|
|
void* dst,
|
|
const void* src,
|
|
size_t srcSize) :
|
|
Command(queue, CL_COMMAND_SVM_MEMCPY, eventWaitList),
|
|
dst_(dst),
|
|
src_(src),
|
|
srcSize_(srcSize) { }
|
|
|
|
virtual void submit(device::VirtualDevice& device)
|
|
{
|
|
device.submitSvmCopyMemory(*this);
|
|
}
|
|
|
|
void* dst() const { return dst_; }
|
|
|
|
const void* src() const { return src_; }
|
|
|
|
size_t srcSize() const { return srcSize_; }
|
|
};
|
|
|
|
//! A fill command where the pattern and destination memory locations are SVM
|
|
// pointers.
|
|
class SvmFillMemoryCommand : public Command
|
|
{
|
|
private:
|
|
void* dst_; //!< Destination pointer
|
|
char pattern_[FillMemoryCommand::MaxFillPatterSize]; //!< The fill pattern
|
|
size_t patternSize_; //!< Pattern size
|
|
size_t times_; //!< Number of times to fill the
|
|
// destination buffer with the source buffer
|
|
|
|
public:
|
|
SvmFillMemoryCommand(
|
|
HostQueue& queue,
|
|
const EventWaitList& eventWaitList,
|
|
void* dst,
|
|
const void* pattern,
|
|
size_t patternSize,
|
|
size_t size) :
|
|
Command(queue, CL_COMMAND_SVM_MEMFILL, eventWaitList),
|
|
dst_(dst),
|
|
patternSize_(patternSize),
|
|
times_(size / patternSize)
|
|
{
|
|
assert(amd::isMultipleOf(size, patternSize));
|
|
//! We copy the pattern buffer since it can be reused/deallocated after
|
|
// command creation
|
|
memcpy(pattern_, pattern, patternSize);
|
|
}
|
|
|
|
virtual void submit(device::VirtualDevice& device)
|
|
{
|
|
device.submitSvmFillMemory(*this);
|
|
}
|
|
|
|
void* dst() const { return dst_; }
|
|
|
|
const char* pattern() const { return pattern_; }
|
|
|
|
size_t patternSize() const { return patternSize_; }
|
|
|
|
size_t times() const { return times_; }
|
|
};
|
|
|
|
/*! \brief A map memory command where the pointer to be mapped is a SVM shared
|
|
* buffer
|
|
*/
|
|
class SvmMapMemoryCommand : public Command
|
|
{
|
|
private:
|
|
Memory* svmMem_; //!< the pointer to the amd::Memory object corresponding the svm pointer mapped
|
|
Coord3D size_; //!< the map size
|
|
Coord3D origin_; //!< the origin of the mapped svm pointer shift from the beginning of svm space allocated
|
|
cl_map_flags flags_; //!< map flags
|
|
void* svmPtr_;
|
|
|
|
public:
|
|
SvmMapMemoryCommand(
|
|
HostQueue& queue,
|
|
const EventWaitList& eventWaitList,
|
|
Memory* svmMem,
|
|
const size_t size,
|
|
const size_t offset,
|
|
cl_map_flags flags,
|
|
void* svmPtr)
|
|
: Command(queue, CL_COMMAND_SVM_MAP, eventWaitList)
|
|
, svmMem_(svmMem)
|
|
, size_(size)
|
|
, origin_(offset)
|
|
, flags_(flags)
|
|
, svmPtr_(svmPtr)
|
|
{
|
|
}
|
|
|
|
virtual void submit(device::VirtualDevice& device)
|
|
{
|
|
device.submitSvmMapMemory(*this);
|
|
}
|
|
|
|
Memory* getSvmMem() const {return svmMem_;}
|
|
|
|
Coord3D size() const {return size_;}
|
|
|
|
cl_map_flags mapFlags() const {return flags_;}
|
|
|
|
Coord3D origin() const {return origin_;}
|
|
|
|
void* svmPtr() const { return svmPtr_; }
|
|
|
|
bool isEntireMemory() const;
|
|
};
|
|
|
|
/*! \brief An unmap memory command where the unmapped pointer is a SVM shared
|
|
* buffer
|
|
*/
|
|
class SvmUnmapMemoryCommand : public Command
|
|
{
|
|
private:
|
|
Memory* svmMem_; //!< the pointer to the amd::Memory object corresponding the svm pointer mapped
|
|
void* svmPtr_; //!< SVM pointer
|
|
|
|
public:
|
|
SvmUnmapMemoryCommand(
|
|
HostQueue& queue,
|
|
const EventWaitList& eventWaitList,
|
|
Memory* svmMem,
|
|
void* svmPtr)
|
|
: Command(queue, CL_COMMAND_SVM_UNMAP, eventWaitList)
|
|
, svmMem_(svmMem)
|
|
, svmPtr_(svmPtr)
|
|
{}
|
|
|
|
virtual void submit(device::VirtualDevice& device)
|
|
{
|
|
device.submitSvmUnmapMemory(*this);
|
|
}
|
|
|
|
Memory* getSvmMem() const { return svmMem_; }
|
|
|
|
void* svmPtr() const { return svmPtr_; }
|
|
};
|
|
|
|
/*! \brief A generic transfer memory from/to file command.
|
|
*
|
|
* \details Currently supports buffers only. Buffers
|
|
* are treated as 1D structures so origin_[0] and size_[0]
|
|
* are equivalent to offset_ and count_ respectively.
|
|
*/
|
|
class TransferBufferFileCommand : public OneMemoryArgCommand
|
|
{
|
|
public:
|
|
static const uint NumStagingBuffers = 2;
|
|
static const size_t StagingBufferSize = 4 * Mi;
|
|
static const uint StagingBufferMemType = CL_MEM_USE_PERSISTENT_MEM_AMD;
|
|
|
|
protected:
|
|
const Coord3D origin_; //!< Origin of the region to write to
|
|
const Coord3D size_; //!< Size of the region to write to
|
|
LiquidFlashFile* file_; //!< The file object for data read
|
|
size_t fileOffset_; //!< Offset in the file for data read
|
|
amd::Memory* staging_[NumStagingBuffers]; //!< Staging buffers for transfer
|
|
|
|
public:
|
|
TransferBufferFileCommand(
|
|
cl_command_type type,
|
|
HostQueue& queue,
|
|
const EventWaitList& eventWaitList,
|
|
Memory& memory, const Coord3D& origin,
|
|
const Coord3D& size, LiquidFlashFile* file, size_t fileOffset)
|
|
: OneMemoryArgCommand(queue, type,
|
|
eventWaitList, memory)
|
|
, origin_(origin)
|
|
, size_(size)
|
|
, file_(file)
|
|
, fileOffset_(fileOffset)
|
|
{
|
|
// Sanity checks
|
|
assert(size.c[0] > 0 && "invalid");
|
|
for (uint i = 0; i < NumStagingBuffers; ++i) {
|
|
staging_[i] = NULL;
|
|
}
|
|
}
|
|
|
|
virtual void releaseResources();
|
|
|
|
virtual void submit(device::VirtualDevice& device);
|
|
|
|
//! Return the memory object to write to
|
|
Memory& memory() const { return *memory_; }
|
|
|
|
//! Return the host memory to read from
|
|
LiquidFlashFile* file() const { return file_; }
|
|
|
|
//! Returns file offset
|
|
size_t fileOffset() const { return fileOffset_; }
|
|
|
|
//! Return the region origin
|
|
const Coord3D& origin() const { return origin_; }
|
|
//! Return the region size
|
|
const Coord3D& size() const { return size_; }
|
|
|
|
//! Return the staging buffer for transfer
|
|
Memory& staging(uint i) const { return *staging_[i]; }
|
|
|
|
bool validateMemory();
|
|
};
|
|
|
|
/*! @}
|
|
* @}
|
|
*/
|
|
|
|
} // namespace amd
|
|
|
|
#endif /*COMMAND_HPP_*/
|