// // Copyright 2010 Advanced Micro Devices, Inc. All rights reserved. // /*! \file command.hpp * \brief Declarations for Event, Command and HostQueue objects. * * \author Laurent Morichetti (laurent.morichetti@amd.com) * \date October 2008 */ #ifndef COMMAND_HPP_ #define COMMAND_HPP_ #include "top.hpp" #include "thread/monitor.hpp" #include "thread/thread.hpp" #include "platform/agent.hpp" #include "platform/object.hpp" #include "platform/context.hpp" #include "platform/ndrange.hpp" #include "platform/kernel.hpp" #include "device/device.hpp" #include "utils/concurrent.hpp" #include "platform/memory.hpp" #include "CL/cl_ext.h" #include #include #include #include namespace amd { /*! \addtogroup Runtime * @{ * * \addtogroup Commands Event, Commands and Command-Queue * @{ */ class Command; class HostQueue; /*! \brief Encapsulates the status of a command. * * \details An event object encapsulates the status of a Command * it is associated with and can be used to synchronize operations * in a Context. */ class Event : public RuntimeObject { typedef void (CL_CALLBACK * CallBackFunction)( cl_event event, cl_int command_exec_status, void *user_data); struct CallBackEntry : public HeapObject { struct CallBackEntry* next_; //!< the next entry in the callback list. std::atomic callback_; //!< callback function pointer. void* data_; //!< user data passed to the callback function. cl_int status_; //!< execution status triggering the callback. CallBackEntry(cl_int status, CallBackFunction callback, void* data) : callback_(callback), data_(data), status_(status) { } }; public: typedef std::vector EventWaitList; private: Monitor lock_; SharedReference context_; //!< context associated with this event. std::atomic callbacks_; //!< linked list of callback entries. volatile cl_int status_; //!< current execution status. std::atomic_flag notified_; //!< Command queue was notified protected: static const EventWaitList nullWaitList; struct ProfilingInfo { ProfilingInfo(bool enabled = false) : enabled_(enabled) { if (enabled) { queued_ = 0ULL; submitted_ = 0ULL; start_ = 0ULL; end_ = 0ULL; } } uint64_t queued_; uint64_t submitted_; uint64_t start_; uint64_t end_; const bool enabled_; } profilingInfo_; //! Construct a new event in the given \a context. Event(Context& context); //! Construct a new event associated to the given command \a queue. Event(HostQueue& queue); //! Destroy the event. virtual ~Event(); //! Release the resources associated with this event. virtual void releaseResources() { } //! Record the profiling info for the given change of \a status. // If the given \a timeStamp is 0 and profiling is enabled, // use the current host clock time instead. uint64_t recordProfilingInfo(cl_int status, uint64_t timeStamp = 0); //! Process the callbacks for the given \a status change. void processCallbacks(cl_int status) const; public: //! Return the context for this event. Context& context() { return context_(); } const Context& context() const { return context_(); } //! Return the command this event is associated with. inline Command& command(); inline const Command& command() const; //! Return the profiling info. const ProfilingInfo& profilingInfo() const { return profilingInfo_; } //! Return this command's execution status. cl_int status() const { return status_; } //! Insert the given \a callback into the callback stack. bool setCallback(cl_int status, CallBackFunction callback, void* data); /*! \brief Set the event status. * * \details If the status becomes CL_COMPLETE, notify all threads * awaiting this command's completion. If the given \a timeStamp is 0 * and profiling is enabled, use the current host clock time instead. * * \see amd::Event::awaitCompletion */ bool setStatus(cl_int status, uint64_t timeStamp = 0); //! Signal all threads waiting on this event. void signal() { ScopedLock lock(lock_); lock_.notifyAll(); } /*! \brief Suspend the current thread until the status of the Command * associated with this event changes to CL_COMPLETE. Return true if the * command successfully completed. */ virtual bool awaitCompletion(); /*! \brief Notifies current command queue about execution status */ bool notifyCmdQueue(); //! RTTI internal implementation virtual ObjectType objectType() const {return ObjectTypeEvent;} }; /*! \brief An operation that is submitted to a command queue. * * %Command is the abstract base type of all OpenCL operations * submitted to a HostQueue for execution. Classes derived from * %Command must implement the submit() function. * */ class Command : public Event { private: //! The command queue this command is enqueue into. NULL if not yet enqueue. HostQueue* queue_; //! Next GPU command in the queue list Command* next_; const cl_command_type type_; //!< This command's OpenCL type. volatile cl_int exception_; //!< The first raised exception. void* data_; protected: //! The Events that need to complete before this command is submitted. EventWaitList eventWaitList_; //! Construct a new command of the given OpenCL type. Command( HostQueue& queue, cl_command_type type, const EventWaitList& eventWaitList = nullWaitList); //! Construct a new command of the given OpenCL type. Command(Context& context, cl_command_type type) : Event(context), queue_(NULL), next_(NULL), type_(type), exception_(0), data_(NULL), eventWaitList_(nullWaitList) { } //! Destroy the command object. virtual ~Command(); bool terminate() { if (Agent::shouldPostEventEvents() && type() != 0) { Agent::postEventFree(as_cl(static_cast(this))); } return true; } public: //! Return the queue this command is enqueued into. HostQueue* queue() const { return queue_; } //! Enqueue this command into the associated command queue. void enqueue(); //! Return the event encapsulating this command's status. const Event& event() const { return *this; } Event& event() { return *this; } //! Return the list of events this command needs to wait on before dispatch const EventWaitList& eventWaitList() const { return eventWaitList_; } //! Return this command's OpenCL type. cl_command_type type() const { return type_; } //! Return the first raised exception or 0 if none. cl_int exception() const { return exception_; } //! Set the exception for this command. void setException(cl_int exception) { exception_ = exception; } //! Return the opaque, device specific data for this command. void* data() const { return data_; } //! Set the opaque, device specific data for this command. void setData(void* data) { data_ = data; } /*! \brief The execution engine for this command. * * \details All derived class must implement this virtual function. * * \note This function will execute in the command queue thread. */ virtual void submit(device::VirtualDevice& device) = 0; //! Release the resources associated with this event. virtual void releaseResources(); //! Set the next GPU command void setNext(Command* next) { next_ = next; } //! Get the next GPU command Command* getNext() const { return next_; } }; class UserEvent : public Command { public: UserEvent(Context& context) : Command(context, CL_COMMAND_USER) { setStatus(CL_SUBMITTED); } virtual void submit(device::VirtualDevice& device) { ShouldNotCallThis(); } }; class ClGlEvent : public Command { private: bool waitForFence(); public: ClGlEvent(Context& context) : Command(context, CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR) { setStatus(CL_SUBMITTED); } virtual void submit(device::VirtualDevice& device) { ShouldNotCallThis(); } bool awaitCompletion() { return waitForFence(); } }; inline Command& Event::command() { return *static_cast(this); } inline const Command& Event::command() const { return *static_cast(this); } class Kernel; class NDRangeContainer; //! A memory command that holds a single memory object reference. // class OneMemoryArgCommand : public Command { protected: Memory* memory_; public: OneMemoryArgCommand( HostQueue& queue, cl_command_type type, const EventWaitList& eventWaitList, Memory& memory) : Command(queue, type, eventWaitList), memory_(&memory) { memory_->retain(); } virtual void releaseResources() { memory_->release(); DEBUG_ONLY(memory_ = NULL); Command::releaseResources(); } bool validateMemory(); }; //! A memory command that holds a single memory object reference. // class TwoMemoryArgsCommand : public Command { protected: Memory* memory1_; Memory* memory2_; public: TwoMemoryArgsCommand( HostQueue& queue, cl_command_type type, const EventWaitList& eventWaitList, Memory& memory1, Memory& memory2) : Command(queue, type, eventWaitList), memory1_(&memory1), memory2_(&memory2) { memory1_->retain(); memory2_->retain(); } virtual void releaseResources() { memory1_->release(); memory2_->release(); DEBUG_ONLY(memory1_ = memory2_ = NULL); Command::releaseResources(); } bool validateMemory(); }; /*! \brief A generic read memory command. * * \details Used for operations on both buffers and images. Backends * are expected to handle any required translation. Buffers * are treated as 1D structures so origin_[0] and size_[0] * are equivalent to offset_ and count_ respectively. * * @todo Find a cleaner way of merging the row and slice pitch concepts at this level. * */ class ReadMemoryCommand : public OneMemoryArgCommand { private: Coord3D origin_; //!< Origin of the region to read. Coord3D size_; //!< Size of the region to read. void* hostPtr_; //!< The host pointer destination. size_t rowPitch_; //!< Row pitch (for image operations) size_t slicePitch_; //!< Slice pitch (for image operations) BufferRect bufRect_; //!< Buffer rectangle information BufferRect hostRect_; //!< Host memory rectangle information public: //! Construct a new ReadMemoryCommand ReadMemoryCommand( HostQueue& queue, cl_command_type cmdType, const EventWaitList& eventWaitList, Memory& memory, Coord3D origin, Coord3D size, void* hostPtr, size_t rowPitch = 0, size_t slicePitch = 0) : OneMemoryArgCommand(queue, cmdType, eventWaitList, memory), origin_(origin), size_(size), hostPtr_(hostPtr), rowPitch_(rowPitch), slicePitch_(slicePitch) { // Sanity checks assert(hostPtr != NULL && "hostPtr cannot be null"); assert(size.c[0] > 0 && "invalid"); } //! Construct a new ReadMemoryCommand ReadMemoryCommand( HostQueue& queue, cl_command_type cmdType, const EventWaitList& eventWaitList, Memory& memory, Coord3D origin, Coord3D size, void* hostPtr, const BufferRect& bufRect, const BufferRect& hostRect) : OneMemoryArgCommand(queue, cmdType, eventWaitList, memory) , origin_(origin) , size_(size) , hostPtr_(hostPtr) , rowPitch_(0) , slicePitch_(0) , bufRect_(bufRect) , hostRect_(hostRect) { // Sanity checks assert(hostPtr != NULL && "hostPtr cannot be null"); assert(size.c[0] > 0 && "invalid"); } virtual void submit(device::VirtualDevice& device) { device.submitReadMemory(*this); } //! Return the memory object to read from. Memory& source() const { return *memory_; } //! Return the host memory to write to void* destination() const { return hostPtr_; } //! Return the origin of the region to read const Coord3D& origin() const { return origin_; } //! Return the size of the region to read const Coord3D& size() const { return size_; } //! Return the row pitch size_t rowPitch() const { return rowPitch_; } //! Return the slice pitch size_t slicePitch() const { return slicePitch_; } //! Return the buffer rectangle information const BufferRect& bufRect() const { return bufRect_; } //! Return the host rectangle information const BufferRect& hostRect() const { return hostRect_; } //! Return true if the entire memory object is read. bool isEntireMemory() const; }; /*! \brief A generic write memory command. * * \details Used for operations on both buffers and images. Backends * are expected to handle any required translations. Buffers * are treated as 1D structures so origin_[0] and size_[0] * are equivalent to offset_ and count_ respectively. */ class WriteMemoryCommand : public OneMemoryArgCommand { private: Coord3D origin_; //!< Origin of the region to write to. Coord3D size_; //!< Size of the region to write to. const void* hostPtr_; //!< The host pointer source. size_t rowPitch_; //!< Row pitch (for image operations) size_t slicePitch_; //!< Slice pitch (for image operations) BufferRect bufRect_; //!< Buffer rectangle information BufferRect hostRect_; //!< Host memory rectangle information public: WriteMemoryCommand( HostQueue& queue, cl_command_type cmdType, const EventWaitList& eventWaitList, Memory& memory, Coord3D origin, Coord3D size, const void* hostPtr, size_t rowPitch = 0, size_t slicePitch = 0) : OneMemoryArgCommand(queue, cmdType, eventWaitList, memory), origin_(origin), size_(size), hostPtr_(hostPtr), rowPitch_(rowPitch), slicePitch_(slicePitch) { // Sanity checks assert(hostPtr != NULL && "hostPtr cannot be null"); assert(size.c[0] > 0 && "invalid"); } WriteMemoryCommand( HostQueue& queue, cl_command_type cmdType, const EventWaitList& eventWaitList, Memory& memory, Coord3D origin, Coord3D size, const void* hostPtr, const BufferRect& bufRect, const BufferRect& hostRect) : OneMemoryArgCommand(queue, cmdType, eventWaitList, memory), origin_(origin), size_(size), hostPtr_(hostPtr), rowPitch_(0), slicePitch_(0), bufRect_(bufRect), hostRect_(hostRect) { // Sanity checks assert(hostPtr != NULL && "hostPtr cannot be null"); assert(size.c[0] > 0 && "invalid"); } virtual void submit(device::VirtualDevice& device) { device.submitWriteMemory(*this); } //! Return the host memory to read from const void* source() const { return hostPtr_; } //! Return the memory object to write to. Memory& destination() const { return *memory_; } //! Return the region origin const Coord3D& origin() const { return origin_; } //! Return the region size const Coord3D& size() const { return size_; } //! Return the row pitch size_t rowPitch() const { return rowPitch_; } //! Return the slice pitch size_t slicePitch() const { return slicePitch_; } //! Return the buffer rectangle information const BufferRect& bufRect() const { return bufRect_; } //! Return the host rectangle information const BufferRect& hostRect() const { return hostRect_; } //! Return true if the entire memory object is written. bool isEntireMemory() const; }; /*! \brief A generic fill memory command. * * \details Used for operations on both buffers and images. Backends * are expected to handle any required translations. Buffers * are treated as 1D structures so origin_[0] and size_[0] * are equivalent to offset_ and count_ respectively. */ class FillMemoryCommand : public OneMemoryArgCommand { public: const static size_t MaxFillPatterSize = sizeof(cl_double16); private: Coord3D origin_; //!< Origin of the region to write to. Coord3D size_; //!< Size of the region to write to. char pattern_[MaxFillPatterSize]; //!< The fill pattern size_t patternSize_; //!< Pattern size public: FillMemoryCommand( HostQueue& queue, cl_command_type cmdType, const EventWaitList& eventWaitList, Memory& memory, const void* pattern, size_t patternSize, Coord3D origin, Coord3D size) : OneMemoryArgCommand(queue, cmdType, eventWaitList, memory) , origin_(origin) , size_(size) , patternSize_(patternSize) { // Sanity checks assert(pattern != NULL && "pattern cannot be null"); assert(size.c[0] > 0 && "invalid"); memcpy(pattern_, pattern, patternSize); } virtual void submit(device::VirtualDevice& device) { device.submitFillMemory(*this); } //! Return the pattern memory to fill with const void* pattern() const { return reinterpret_cast(pattern_); } //! Return the pattern size const size_t patternSize() const { return patternSize_; } //! Return the memory object to write to. Memory& memory() const { return *memory_; } //! Return the region origin const Coord3D& origin() const { return origin_; } //! Return the region size const Coord3D& size() const { return size_; } //! Return true if the entire memory object is written. bool isEntireMemory() const; }; /*! \brief A generic copy memory command * * \details Used for both buffers and images. Backends are expected * to handle any required translation. Buffers are treated * as 1D structures so origin_[0] and size_[0] are * equivalent to offset_ and count_ respectively. */ class CopyMemoryCommand : public TwoMemoryArgsCommand { private: Coord3D srcOrigin_; //!< Origin of the source region. Coord3D dstOrigin_; //!< Origin of the destination region. Coord3D size_; //!< Size of the region to copy. BufferRect srcRect_; //!< Source buffer rectangle information BufferRect dstRect_; //!< Destination buffer rectangle information public: CopyMemoryCommand( HostQueue& queue, cl_command_type cmdType, const EventWaitList& eventWaitList, Memory& srcMemory, Memory& dstMemory, Coord3D srcOrigin, Coord3D dstOrigin, Coord3D size) : TwoMemoryArgsCommand( queue, cmdType, eventWaitList, srcMemory, dstMemory), srcOrigin_(srcOrigin), dstOrigin_(dstOrigin), size_(size) { // Sanity checks assert(size.c[0] > 0 && "invalid"); } CopyMemoryCommand( HostQueue& queue, cl_command_type cmdType, const EventWaitList& eventWaitList, Memory& srcMemory, Memory& dstMemory, Coord3D srcOrigin, Coord3D dstOrigin, Coord3D size, const BufferRect& srcRect, const BufferRect& dstRect) : TwoMemoryArgsCommand( queue, cmdType, eventWaitList, srcMemory, dstMemory), srcOrigin_(srcOrigin), dstOrigin_(dstOrigin), size_(size), srcRect_(srcRect), dstRect_(dstRect) { // Sanity checks assert(size.c[0] > 0 && "invalid"); } virtual void submit(device::VirtualDevice& device) { device.submitCopyMemory(*this); } //! Return the host memory to read from Memory& source() const { return *memory1_; } //! Return the memory object to write to. Memory& destination() const { return *memory2_; } //! Return the source origin const Coord3D& srcOrigin() const { return srcOrigin_; } //! Return the offset in bytes in the destination. const Coord3D& dstOrigin() const { return dstOrigin_; } //! Return the number of bytes to copy. const Coord3D& size() const { return size_; } //! Return the source buffer rectangle information const BufferRect& srcRect() const { return srcRect_; } //! Return the destination buffer rectangle information const BufferRect& dstRect() const { return dstRect_; } //! Return true if the both memories are is read/written in their entirety. bool isEntireMemory() const; }; /*! \brief A generic map memory command. Makes a memory object accessible to the host. * * @todo:dgladdin Need to think more about how the pitch parameters operate in * the context of unified buffer/image commands. */ class MapMemoryCommand: public OneMemoryArgCommand { private: cl_map_flags mapFlags_; //!< Flags controlling the map. bool blocking_; //!< True for blocking maps Coord3D origin_; //!< Origin of the region to map. Coord3D size_; //!< Size of the region to map. public: //! Construct a new MapMemoryCommand MapMemoryCommand( HostQueue& queue, cl_command_type cmdType, const EventWaitList& eventWaitList, Memory& memory, cl_map_flags mapFlags, bool blocking, Coord3D origin, Coord3D size, size_t* imgRowPitch = NULL, size_t* imgSlicePitch = NULL) : OneMemoryArgCommand(queue, cmdType, eventWaitList, memory), mapFlags_(mapFlags), blocking_(blocking), origin_(origin), size_(size) { // Sanity checks assert(size.c[0] > 0 && "invalid"); } virtual void submit(device::VirtualDevice& device) { device.submitMapMemory(*this); } //! Read the memory object Memory& memory() const { return *memory_; } //! Read the map control flags cl_map_flags mapFlags() const { return mapFlags_; } //! Read the origin const Coord3D& origin() const { return origin_; } //! Read the size const Coord3D& size() const { return size_; } //! Read the blocking flag bool blocking() const { return blocking_; } //! Returns true if the entire memory object is mapped bool isEntireMemory() const; }; /*! \brief A generic unmap memory command. * * @todo:dgladdin Need to think more about how the pitch parameters operate in * the context of unified buffer/image commands. */ class UnmapMemoryCommand: public OneMemoryArgCommand { private: //! Host-space pointer that the object is currently mapped at void* mapPtr_; public: //! Construct a new MapMemoryCommand UnmapMemoryCommand( HostQueue& queue, cl_command_type cmdType, const EventWaitList& eventWaitList, Memory& memory, void* mapPtr) : OneMemoryArgCommand(queue, cmdType, eventWaitList, memory), mapPtr_(mapPtr) { } virtual void submit(device::VirtualDevice& device) { device.submitUnmapMemory(*this); } virtual void releaseResources(); //! Read the memory object Memory& memory() const { return *memory_; } //! Read the map pointer void* mapPtr() const { return mapPtr_; } }; /*! \brief Migrate memory objects command. * * \details Used for operations on both buffers and images. Backends * are expected to handle any required translations. */ class MigrateMemObjectsCommand: public Command { private: cl_mem_migration_flags migrationFlags_; //!< Migration flags std::vector memObjects_; //!< The list of memory objects public: //! Construct a new AcquireExtObjectsCommand MigrateMemObjectsCommand( HostQueue& queue, cl_command_type type, const EventWaitList& eventWaitList, const std::vector& memObjects, cl_mem_migration_flags flags) : Command(queue, type, eventWaitList) , migrationFlags_(flags) { std::vector::const_iterator itr; for (itr = memObjects.begin(); itr != memObjects.end(); itr++) { (*itr)->retain(); memObjects_.push_back(*itr); } } virtual void submit(device::VirtualDevice& device) { device.submitMigrateMemObjects(*this); } //! Release all resources associated with this command void releaseResources() { std::vector::const_iterator itr; for (itr = memObjects_.begin(); itr != memObjects_.end(); itr++) { (*itr)->release(); } Command::releaseResources(); } //! Returns the migration flags cl_mem_migration_flags migrationFlags() const { return migrationFlags_; } //! Returns the number of memory objects in the command cl_uint numMemObjects() const { return (cl_uint) memObjects_.size(); } //! Returns a pointer to the memory objects const std::vector& memObjects() const { return memObjects_; } bool validateMemory(); }; //! To execute a kernel on a specific device. class NDRangeKernelCommand : public Command { private: Kernel& kernel_; NDRangeContainer sizes_; address parameters_; public: //! Construct an ExecuteKernel command NDRangeKernelCommand( HostQueue& queue, const EventWaitList& eventWaitList, Kernel& kernel, const NDRangeContainer& sizes); virtual void submit(device::VirtualDevice& device) { device.submitKernel(*this); } //! Release all resources associated with this command ( void releaseResources(); //! Return the kernel. const Kernel& kernel() const { return kernel_; } //! Return the parameters given to this kernel. const_address parameters() const { return parameters_; } //! Return the kernel NDRange. const NDRangeContainer& sizes() const { return sizes_; } //! Set the local work size. void setLocalWorkSize(const NDRange& local) { sizes_.local() = local; } cl_int validateMemory(); }; class NativeFnCommand : public Command { private: void (CL_CALLBACK *nativeFn_)(void *); char* args_; size_t argsSize_; std::vector memObjects_; std::vector memOffsets_; public: NativeFnCommand( HostQueue& queue, const EventWaitList& eventWaitList, void (CL_CALLBACK * nativeFn)(void*), const void* args, size_t argsSize, size_t numMemObjs, const cl_mem* memObjs, const void** memLocs); ~NativeFnCommand() { delete[] args_; } void releaseResources() { std::for_each(memObjects_.begin(), memObjects_.end(), std::mem_fun(&Memory::release)); Command::releaseResources(); } virtual void submit(device::VirtualDevice& device) { device.submitNativeFn(*this); } cl_int invoke(); }; class Marker : public Command { public: //! Create a new Marker Marker( HostQueue& queue, bool userVisible, const EventWaitList& eventWaitList = nullWaitList, const Event* waitingEvent = NULL) : Command(queue, userVisible ? CL_COMMAND_MARKER : 0, eventWaitList) , waitingEvent_(waitingEvent) { } //! The actual command implementation. virtual void submit(device::VirtualDevice& device) { device.submitMarker(*this); } const Event* waitingEvent() const { return waitingEvent_; } private: const Event* waitingEvent_; //!< Waiting event associated with the marker }; /*! \brief Maps CL objects created from external ones and syncs the contents (blocking). * */ class ExtObjectsCommand: public Command { private: std::vector memObjects_; //!< The list of Memory based classes public: //! Construct a new AcquireExtObjectsCommand ExtObjectsCommand( HostQueue& queue, const EventWaitList& eventWaitList, cl_uint num_objects, const std::vector& memoryObjects, cl_command_type type) : Command(queue, type, eventWaitList) { for(std::vector::const_iterator itr = memoryObjects.begin(); itr != memoryObjects.end(); itr++) { (*itr)->retain(); memObjects_.push_back(*itr); } } //! Release all resources associated with this command void releaseResources() { for(std::vector::const_iterator itr = memObjects_.begin(); itr != memObjects_.end(); itr++) { (*itr)->release(); } Command::releaseResources(); } //! Get number of GL objects cl_uint getNumObjects() {return (cl_uint) memObjects_.size();} //! Get pointer to GL object list const std::vector& getMemList() const {return memObjects_;} bool validateMemory(); virtual bool processGLResource(device::Memory * mem) = 0 ; }; class AcquireExtObjectsCommand: public ExtObjectsCommand { public: //! Construct a new AcquireExtObjectsCommand AcquireExtObjectsCommand( HostQueue& queue, const EventWaitList& eventWaitList, cl_uint num_objects, const std::vector& memoryObjects, cl_command_type type) : ExtObjectsCommand(queue, eventWaitList, num_objects, memoryObjects, type) { } virtual void submit(device::VirtualDevice& device) { device.submitAcquireExtObjects(*this); } virtual bool processGLResource(device::Memory * mem); }; class ReleaseExtObjectsCommand: public ExtObjectsCommand { public: //! Construct a new ReleaseExtObjectsCommand ReleaseExtObjectsCommand( HostQueue& queue, const EventWaitList& eventWaitList, cl_uint num_objects, const std::vector& memoryObjects, cl_command_type type) : ExtObjectsCommand(queue, eventWaitList, num_objects, memoryObjects, type) { } virtual void submit(device::VirtualDevice& device) { device.submitReleaseExtObjects(*this); } virtual bool processGLResource(device::Memory * mem); }; class PerfCounterCommand : public Command { public: typedef std::vector PerfCounterList; enum State { Begin = 0, //!< Issue a begin command End = 1 //!< Issue an end command }; //! Construct a new PerfCounterCommand PerfCounterCommand( HostQueue& queue, const EventWaitList& eventWaitList, const PerfCounterList& counterList, State state) : Command(queue, 0, eventWaitList) , counterList_(counterList) , state_(state) { } //! Gets the number of PerfCounter objects size_t getNumCounters() const { return counterList_.size(); } //! Gets the list of all counters const PerfCounterList& getCounters() const { return counterList_; } //! Gets the performance counter state State getState() const { return state_; } //! Process the command on the device queue virtual void submit(device::VirtualDevice& device) { device.submitPerfCounter(*this); } private: PerfCounterList counterList_; //!< The list of performance counters State state_; //!< State of the issued command }; /*! \brief Thread Trace memory objects command. * * \details Used for bindig memory objects to therad trace mechanism. */ class ThreadTraceMemObjectsCommand: public Command { public: //! Construct a new ThreadTraceMemObjectsCommand ThreadTraceMemObjectsCommand( HostQueue& queue, const EventWaitList& eventWaitList, size_t numMemoryObjects, const cl_mem* memoryObjects, size_t sizeMemoryObject, ThreadTrace& threadTrace, cl_command_type type) : Command(queue, type, eventWaitList), sizeMemObjects_(sizeMemoryObject), threadTrace_(threadTrace) { memObjects_.resize(numMemoryObjects); for (size_t i = 0; i < numMemoryObjects; ++i) { Memory* obj = as_amd(memoryObjects[i]); obj->retain(); memObjects_[i] = obj; } } //! Release all resources associated with this command void releaseResources() { for(std::vector::const_iterator itr = memObjects_.begin(); itr != memObjects_.end(); itr++) { (*itr)->release(); } Command::releaseResources(); } //! Get number of CL memory objects cl_uint getNumObjects() {return (cl_uint) memObjects_.size();} //! Get pointer to CL memory object list const std::vector& getMemList() const {return memObjects_;} //! Submit command to bind memory object to the Thread Trace mechanism virtual void submit(device::VirtualDevice& device) { device.submitThreadTraceMemObjects(*this); } //! Return the thread trace object. ThreadTrace& getThreadTrace() const { return threadTrace_; } //! Get memory object size const size_t getMemoryObjectSize() const {return sizeMemObjects_;} //! Validate memory bound to the thread thrace bool validateMemory(); private: std::vector memObjects_; //!< The list of memory objects,bound to the thread trace size_t sizeMemObjects_; //!< The size of each memory object from memObjects_ list (all memory objects have the smae size) ThreadTrace& threadTrace_; //!< The Thread Trace object }; /*! \brief Thread Trace command. * * \details Used for issue begin/end/pause/resume for therad trace object. */ class ThreadTraceCommand : public Command { private: void *threadTraceConfig_; public: enum State { Begin = 0, //!< Issue a begin command End = 1, //!< Issue an end command Pause = 2, //!< Issue a pause command Resume = 3 //!< Issue a resume command }; //! Construct a new PerfCounterCommand ThreadTraceCommand( HostQueue& queue, const EventWaitList& eventWaitList, const void *threadTraceConfig, ThreadTrace& threadTrace, State state, cl_command_type type) : Command(queue, type, eventWaitList) , threadTrace_(threadTrace) , state_(state) { const unsigned int size = *static_cast(threadTraceConfig); threadTraceConfig_ = static_cast(new char[size]); if (threadTraceConfig_) { memcpy(threadTraceConfig_, threadTraceConfig, size); } } //! Get the thread trace object ThreadTrace& getThreadTrace() const { return threadTrace_; } //! Get the thread trace command state State getState() const { return state_; } //! Process the command on the device queue virtual void submit(device::VirtualDevice& device) { device.submitThreadTrace(*this); } // Accessor methods void* threadTraceConfig() const { return threadTraceConfig_; } private: ThreadTrace& threadTrace_; //!< The list of performance counters State state_; //!< State of the issued command }; #if cl_amd_open_video class RunVideoProgramCommand: public OneMemoryArgCommand { private: //! Store pointer to video data structure void* videoData_; public: //! Construct a new AcquireExtObjectsCommand RunVideoProgramCommand( HostQueue& queue, const EventWaitList& eventWaitList, void* videoData, Memory& memory, cl_command_type cmdType) : OneMemoryArgCommand(queue, cmdType, eventWaitList, memory) , videoData_(NULL) { unsigned int size = *static_cast(videoData); videoData_ = static_cast(new char[size]); if (videoData_) { memcpy(videoData_, videoData, size); } } virtual ~RunVideoProgramCommand() { if (videoData_) delete[] static_cast(videoData_); } virtual void submit(device::VirtualDevice& device) { device.submitRunVideoProgram(*this); } // Accessor methods void* videoData() const { return videoData_; } //! Read the memory object Memory& memory() const { return *memory_; } }; class SetVideoSessionCommand: public Command { public: enum Operation { CloseSession, ConfigTypePictureControl, ConfigTypeRateControl, ConfigTypeMotionEstimation, ConfigTypeRDO, SendEncodeConfig, GetDeviceCapVCE, EncodeQueryTaskDescription, ReleaseOutputResource }; //! Construct a new SetVideoSessionCommand SetVideoSessionCommand( HostQueue& queue, const EventWaitList& eventWaitList, Operation oper, void* paramValue = 0 ) : Command(queue, 0, eventWaitList) , oper_(oper) , paramValue_(paramValue) , numBuffers_(0) , paramValue2_(NULL) { } //! Construct a new SetVideoSessionCommand SetVideoSessionCommand( HostQueue& queue, const EventWaitList& eventWaitList, Operation oper, void* paramValue, uint numBuffers ) : Command(queue, 0, eventWaitList) , oper_(oper) , paramValue_(paramValue) , numBuffers_(numBuffers) , paramValue2_(NULL) { } //! Construct a new SetVideoSessionCommand SetVideoSessionCommand( HostQueue& queue, const EventWaitList& eventWaitList, Operation oper, uint numBuffers, void* paramValue, uint* paramValue2 ) : Command(queue, 0, eventWaitList) , oper_(oper) , paramValue_(paramValue) , numBuffers_(numBuffers) , paramValue2_(paramValue2) { } virtual ~SetVideoSessionCommand() { } virtual void submit(device::VirtualDevice& device) { device.submitSetVideoSession(*this); } // Accessor methods Operation operation() const { return oper_; } void* paramValue() const { return paramValue_; } uint* paramValue2() const { return paramValue2_; } uint numBuffers() const { return numBuffers_; } private: Operation oper_; //!< Video session operation void* paramValue_; //!< Store pointer to parameter data uint numBuffers_; //!< uint* paramValue2_; //!< Store pointer to parameter data }; #endif // cl_amd_open_video class SignalCommand:public OneMemoryArgCommand { private: cl_uint markerValue_; cl_ulong markerOffset_; public: SignalCommand( HostQueue& queue, cl_command_type cmdType, const EventWaitList& eventWaitList, Memory& memory, cl_uint value, cl_ulong offset = 0): OneMemoryArgCommand(queue, cmdType, eventWaitList, memory), markerValue_(value), markerOffset_(offset) { } virtual void submit(device::VirtualDevice& device) { device.submitSignal(*this); } const cl_uint markerValue() {return markerValue_;} Memory& memory() {return *memory_;} const cl_ulong markerOffset() {return markerOffset_;} }; class MakeBuffersResidentCommand: public Command { private: std::vector memObjects_; cl_bus_address_amd* busAddresses_; public: MakeBuffersResidentCommand( HostQueue& queue, cl_command_type type, const EventWaitList& eventWaitList, const std::vector& memObjects, cl_bus_address_amd* busAddr) : Command(queue, type, eventWaitList), busAddresses_(busAddr) { std::vector::const_iterator itr; for (itr = memObjects.begin(); itr != memObjects.end(); itr++) { (*itr)->retain(); memObjects_.push_back(*itr); } } virtual void submit(device::VirtualDevice& device) { device.submitMakeBuffersResident(*this); } void releaseResources() { std::vector::const_iterator itr; for (itr = memObjects_.begin(); itr != memObjects_.end(); itr++) { (*itr)->release(); } Command::releaseResources(); } bool validateMemory(); const std::vector& memObjects() const { return memObjects_; } cl_bus_address_amd* busAddress() const {return busAddresses_;} }; //! A deallocation command used to free SVM or system pointers. class SvmFreeMemoryCommand : public Command { public: typedef void (CL_CALLBACK *freeCallBack) (cl_command_queue, cl_uint, void**, void*); private: std::vector svmPointers_; //!< List of pointers to deallocate freeCallBack pfnFreeFunc_; //!< User-defined deallocation callback void* userData_; //!< Data passed to user-defined callback public: SvmFreeMemoryCommand( HostQueue& queue, const EventWaitList& eventWaitList, cl_uint numSvmPointers, void** svmPointers, freeCallBack pfnFreeFunc, void* userData) : Command(queue, CL_COMMAND_SVM_FREE, eventWaitList), //! We copy svmPointers since it can be reused/deallocated after // command creation svmPointers_(svmPointers, svmPointers + numSvmPointers), pfnFreeFunc_(pfnFreeFunc), userData_(userData) { } virtual void releaseResources() { Command::releaseResources(); } virtual void submit(device::VirtualDevice& device) { device.submitSvmFreeMemory(*this); } std::vector& svmPointers() { return svmPointers_; } freeCallBack pfnFreeFunc() const { return pfnFreeFunc_; } void* userData() const { return userData_; } }; //! A copy command where the origin and destination memory locations are SVM // pointers. class SvmCopyMemoryCommand : public Command { private: void* dst_; //!< Destination pointer const void* src_; //!< Source pointer size_t srcSize_; //!< Size (in bytes) of the source buffer public: SvmCopyMemoryCommand( HostQueue& queue, const EventWaitList& eventWaitList, void* dst, const void* src, size_t srcSize) : Command(queue, CL_COMMAND_SVM_MEMCPY, eventWaitList), dst_(dst), src_(src), srcSize_(srcSize) { } virtual void releaseResources() { Command::releaseResources(); } virtual void submit(device::VirtualDevice& device) { device.submitSvmCopyMemory(*this); } void* dst() const { return dst_; } const void* src() const { return src_; } size_t srcSize() const { return srcSize_; } }; //! A fill command where the pattern and destination memory locations are SVM // pointers. class SvmFillMemoryCommand : public Command { private: void* dst_; //!< Destination pointer char pattern_[FillMemoryCommand::MaxFillPatterSize]; //!< The fill pattern size_t patternSize_; //!< Pattern size size_t times_; //!< Number of times to fill the // destination buffer with the source buffer public: SvmFillMemoryCommand( HostQueue& queue, const EventWaitList& eventWaitList, void* dst, const void* pattern, size_t patternSize, size_t size) : Command(queue, CL_COMMAND_SVM_MEMFILL, eventWaitList), dst_(dst), patternSize_(patternSize), times_(size / patternSize) { assert(amd::isMultipleOf(size, patternSize)); //! We copy the pattern buffer since it can be reused/deallocated after // command creation memcpy(pattern_, pattern, patternSize); } virtual void releaseResources() { Command::releaseResources(); } virtual void submit(device::VirtualDevice& device) { device.submitSvmFillMemory(*this); } void* dst() const { return dst_; } const char* pattern() const { return pattern_; } size_t patternSize() const { return patternSize_; } size_t times() const { return times_; } }; /*! \brief A map memory command where the pointer to be mapped is a SVM shared * buffer */ class SvmMapMemoryCommand : public Command { private: Memory* svmMem_; //!< the pointer to the amd::Memory object corresponding the svm pointer mapped Coord3D size_; //!< the map size Coord3D origin_; //!< the origin of the mapped svm pointer shift from the beginning of svm space allocated cl_map_flags flags_; //!< map flags public: SvmMapMemoryCommand( HostQueue& queue, const EventWaitList& eventWaitList, Memory* svmMem, const size_t size, const size_t offset, cl_map_flags flags) : Command(queue, CL_COMMAND_SVM_MAP, eventWaitList), svmMem_(svmMem), size_(size), origin_(offset), flags_(flags) { } virtual void releaseResources() { Command::releaseResources(); } virtual void submit(device::VirtualDevice& device) { device.submitSvmMapMemory(*this); } Memory* getSvmMem() const {return svmMem_;} Coord3D size() const {return size_;} cl_map_flags mapFlags() const {return flags_;} Coord3D origin() const {return origin_;} bool isEntireMemory() const; }; /*! \brief An unmap memory command where the unmapped pointer is a SVM shared * buffer */ class SvmUnmapMemoryCommand : public Command { private: Memory* svmMem_; //!< the pointer to the amd::Memory object corresponding the svm pointer mapped public: SvmUnmapMemoryCommand( HostQueue& queue, const EventWaitList& eventWaitList, Memory* svmMem) : Command(queue, CL_COMMAND_SVM_UNMAP, eventWaitList), svmMem_(svmMem) {} virtual void releaseResources() { Command::releaseResources(); } virtual void submit(device::VirtualDevice& device) { device.submitSvmUnmapMemory(*this); } Memory* getSvmMem() const {return svmMem_;} }; /*! @} * @} */ } // namespace amd #endif /*COMMAND_HPP_*/