// // Copyright 2010 Advanced Micro Devices, Inc. All rights reserved. // /*! \file command.hpp * \brief Declarations for Event, Command and HostQueue objects. * * \author Laurent Morichetti (laurent.morichetti@amd.com) * \date October 2008 */ #ifndef COMMAND_HPP_ #define COMMAND_HPP_ #include "top.hpp" #include "thread/monitor.hpp" #include "thread/thread.hpp" #include "platform/agent.hpp" #include "platform/object.hpp" #include "platform/context.hpp" #include "platform/ndrange.hpp" #include "platform/kernel.hpp" #include "device/device.hpp" #include "utils/concurrent.hpp" #include "platform/memory.hpp" #include "platform/perfctr.hpp" #include "platform/threadtrace.hpp" #include "CL/cl_ext.h" #include #include #include #include namespace amd { /*! \addtogroup Runtime * @{ * * \addtogroup Commands Event, Commands and Command-Queue * @{ */ class Command; class HostQueue; /*! \brief Encapsulates the status of a command. * * \details An event object encapsulates the status of a Command * it is associated with and can be used to synchronize operations * in a Context. */ class Event : public RuntimeObject { typedef void(CL_CALLBACK* CallBackFunction)(cl_event event, cl_int command_exec_status, void* user_data); struct CallBackEntry : public HeapObject { struct CallBackEntry* next_; //!< the next entry in the callback list. std::atomic callback_; //!< callback function pointer. void* data_; //!< user data passed to the callback function. cl_int status_; //!< execution status triggering the callback. CallBackEntry(cl_int status, CallBackFunction callback, void* data) : callback_(callback), data_(data), status_(status) {} }; public: typedef std::vector EventWaitList; private: Monitor lock_; std::atomic callbacks_; //!< linked list of callback entries. volatile cl_int status_; //!< current execution status. std::atomic_flag notified_; //!< Command queue was notified protected: static const EventWaitList nullWaitList; struct ProfilingInfo { ProfilingInfo(bool enabled = false) : enabled_(enabled), waves_(0) { if (enabled) { clear(); callback_ = NULL; } } uint64_t queued_; uint64_t submitted_; uint64_t start_; uint64_t end_; bool enabled_; //!< Profiling enabled for the wave limiter uint32_t waves_; //!< The number of waves used in a dispatch ProfilingCallback* callback_; void clear() { queued_ = 0ULL; submitted_ = 0ULL; start_ = 0ULL; end_ = 0ULL; } void setCallback(ProfilingCallback* callback, uint32_t waves) { if (callback == NULL) { return; } enabled_ = true; waves_ = waves; clear(); callback_ = callback; } } profilingInfo_; //! Construct a new event. Event(); //! Construct a new event associated to the given command \a queue. Event(HostQueue& queue); //! Destroy the event. virtual ~Event(); //! Release the resources associated with this event. virtual void releaseResources() {} //! Record the profiling info for the given change of \a status. // If the given \a timeStamp is 0 and profiling is enabled, // use the current host clock time instead. uint64_t recordProfilingInfo(cl_int status, uint64_t timeStamp = 0); //! Process the callbacks for the given \a status change. void processCallbacks(cl_int status) const; public: //! Return the context for this event. virtual const Context& context() const = 0; //! Return the command this event is associated with. inline Command& command(); inline const Command& command() const; //! Return the profiling info. const ProfilingInfo& profilingInfo() const { return profilingInfo_; } //! Return this command's execution status. cl_int status() const { return status_; } //! Insert the given \a callback into the callback stack. bool setCallback(cl_int status, CallBackFunction callback, void* data); /*! \brief Set the event status. * * \details If the status becomes CL_COMPLETE, notify all threads * awaiting this command's completion. If the given \a timeStamp is 0 * and profiling is enabled, use the current host clock time instead. * * \see amd::Event::awaitCompletion */ bool setStatus(cl_int status, uint64_t timeStamp = 0); //! Signal all threads waiting on this event. void signal() { ScopedLock lock(lock_); lock_.notifyAll(); } /*! \brief Suspend the current thread until the status of the Command * associated with this event changes to CL_COMPLETE. Return true if the * command successfully completed. */ virtual bool awaitCompletion(); /*! \brief Notifies current command queue about execution status */ bool notifyCmdQueue(); //! RTTI internal implementation virtual ObjectType objectType() const { return ObjectTypeEvent; } }; /*! \brief An operation that is submitted to a command queue. * * %Command is the abstract base type of all OpenCL operations * submitted to a HostQueue for execution. Classes derived from * %Command must implement the submit() function. * */ class Command : public Event { private: //! The command queue this command is enqueue into. NULL if not yet enqueue. HostQueue* queue_; //! Next GPU command in the queue list Command* next_; const cl_command_type type_; //!< This command's OpenCL type. volatile cl_int exception_; //!< The first raised exception. void* data_; protected: //! The Events that need to complete before this command is submitted. EventWaitList eventWaitList_; //! Construct a new command of the given OpenCL type. Command(HostQueue& queue, cl_command_type type, const EventWaitList& eventWaitList = nullWaitList); //! Construct a new command of the given OpenCL type. Command(cl_command_type type) : Event(), queue_(NULL), next_(NULL), type_(type), exception_(0), data_(NULL), eventWaitList_(nullWaitList) {} bool terminate() { if (Agent::shouldPostEventEvents() && type() != 0) { Agent::postEventFree(as_cl(static_cast(this))); } return true; } public: //! Return the queue this command is enqueued into. HostQueue* queue() const { return queue_; } //! Enqueue this command into the associated command queue. void enqueue(); //! Return the event encapsulating this command's status. const Event& event() const { return *this; } Event& event() { return *this; } //! Return the list of events this command needs to wait on before dispatch const EventWaitList& eventWaitList() const { return eventWaitList_; } //! Return this command's OpenCL type. cl_command_type type() const { return type_; } //! Return the first raised exception or 0 if none. cl_int exception() const { return exception_; } //! Set the exception for this command. void setException(cl_int exception) { exception_ = exception; } //! Return the opaque, device specific data for this command. void* data() const { return data_; } //! Set the opaque, device specific data for this command. void setData(void* data) { data_ = data; } /*! \brief The execution engine for this command. * * \details All derived class must implement this virtual function. * * \note This function will execute in the command queue thread. */ virtual void submit(device::VirtualDevice& device) = 0; //! Release the resources associated with this event. virtual void releaseResources(); //! Set the next GPU command void setNext(Command* next) { next_ = next; } //! Get the next GPU command Command* getNext() const { return next_; } //! Return the context for this event. virtual const Context& context() const; }; class UserEvent : public Command { const Context& context_; public: UserEvent(Context& context) : Command(CL_COMMAND_USER), context_(context) { setStatus(CL_SUBMITTED); } virtual void submit(device::VirtualDevice& device) { ShouldNotCallThis(); } virtual const Context& context() const { return context_; } }; class ClGlEvent : public Command { private: const Context& context_; bool waitForFence(); public: ClGlEvent(Context& context) : Command(CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR), context_(context) { setStatus(CL_SUBMITTED); } virtual void submit(device::VirtualDevice& device) { ShouldNotCallThis(); } bool awaitCompletion() { return waitForFence(); } virtual const Context& context() const { return context_; } }; inline Command& Event::command() { return *static_cast(this); } inline const Command& Event::command() const { return *static_cast(this); } class Kernel; class NDRangeContainer; //! A memory command that holds a single memory object reference. // class OneMemoryArgCommand : public Command { protected: Memory* memory_; public: OneMemoryArgCommand(HostQueue& queue, cl_command_type type, const EventWaitList& eventWaitList, Memory& memory) : Command(queue, type, eventWaitList), memory_(&memory) { memory_->retain(); } virtual void releaseResources() { memory_->release(); DEBUG_ONLY(memory_ = NULL); Command::releaseResources(); } bool validateMemory(); }; //! A memory command that holds a single memory object reference. // class TwoMemoryArgsCommand : public Command { protected: Memory* memory1_; Memory* memory2_; public: TwoMemoryArgsCommand(HostQueue& queue, cl_command_type type, const EventWaitList& eventWaitList, Memory& memory1, Memory& memory2) : Command(queue, type, eventWaitList), memory1_(&memory1), memory2_(&memory2) { memory1_->retain(); memory2_->retain(); } virtual void releaseResources() { memory1_->release(); memory2_->release(); DEBUG_ONLY(memory1_ = memory2_ = NULL); Command::releaseResources(); } bool validateMemory(); }; /*! \brief A generic read memory command. * * \details Used for operations on both buffers and images. Backends * are expected to handle any required translation. Buffers * are treated as 1D structures so origin_[0] and size_[0] * are equivalent to offset_ and count_ respectively. * * @todo Find a cleaner way of merging the row and slice pitch concepts at this level. * */ class ReadMemoryCommand : public OneMemoryArgCommand { private: Coord3D origin_; //!< Origin of the region to read. Coord3D size_; //!< Size of the region to read. void* hostPtr_; //!< The host pointer destination. size_t rowPitch_; //!< Row pitch (for image operations) size_t slicePitch_; //!< Slice pitch (for image operations) BufferRect bufRect_; //!< Buffer rectangle information BufferRect hostRect_; //!< Host memory rectangle information public: //! Construct a new ReadMemoryCommand ReadMemoryCommand(HostQueue& queue, cl_command_type cmdType, const EventWaitList& eventWaitList, Memory& memory, Coord3D origin, Coord3D size, void* hostPtr, size_t rowPitch = 0, size_t slicePitch = 0) : OneMemoryArgCommand(queue, cmdType, eventWaitList, memory), origin_(origin), size_(size), hostPtr_(hostPtr), rowPitch_(rowPitch), slicePitch_(slicePitch) { // Sanity checks assert(hostPtr != NULL && "hostPtr cannot be null"); assert(size.c[0] > 0 && "invalid"); } //! Construct a new ReadMemoryCommand ReadMemoryCommand(HostQueue& queue, cl_command_type cmdType, const EventWaitList& eventWaitList, Memory& memory, Coord3D origin, Coord3D size, void* hostPtr, const BufferRect& bufRect, const BufferRect& hostRect) : OneMemoryArgCommand(queue, cmdType, eventWaitList, memory), origin_(origin), size_(size), hostPtr_(hostPtr), rowPitch_(0), slicePitch_(0), bufRect_(bufRect), hostRect_(hostRect) { // Sanity checks assert(hostPtr != NULL && "hostPtr cannot be null"); assert(size.c[0] > 0 && "invalid"); } virtual void submit(device::VirtualDevice& device) { device.submitReadMemory(*this); } //! Return the memory object to read from. Memory& source() const { return *memory_; } //! Return the host memory to write to void* destination() const { return hostPtr_; } //! Return the origin of the region to read const Coord3D& origin() const { return origin_; } //! Return the size of the region to read const Coord3D& size() const { return size_; } //! Return the row pitch size_t rowPitch() const { return rowPitch_; } //! Return the slice pitch size_t slicePitch() const { return slicePitch_; } //! Return the buffer rectangle information const BufferRect& bufRect() const { return bufRect_; } //! Return the host rectangle information const BufferRect& hostRect() const { return hostRect_; } //! Return true if the entire memory object is read. bool isEntireMemory() const; }; /*! \brief A generic write memory command. * * \details Used for operations on both buffers and images. Backends * are expected to handle any required translations. Buffers * are treated as 1D structures so origin_[0] and size_[0] * are equivalent to offset_ and count_ respectively. */ class WriteMemoryCommand : public OneMemoryArgCommand { private: Coord3D origin_; //!< Origin of the region to write to. Coord3D size_; //!< Size of the region to write to. const void* hostPtr_; //!< The host pointer source. size_t rowPitch_; //!< Row pitch (for image operations) size_t slicePitch_; //!< Slice pitch (for image operations) BufferRect bufRect_; //!< Buffer rectangle information BufferRect hostRect_; //!< Host memory rectangle information public: WriteMemoryCommand(HostQueue& queue, cl_command_type cmdType, const EventWaitList& eventWaitList, Memory& memory, Coord3D origin, Coord3D size, const void* hostPtr, size_t rowPitch = 0, size_t slicePitch = 0) : OneMemoryArgCommand(queue, cmdType, eventWaitList, memory), origin_(origin), size_(size), hostPtr_(hostPtr), rowPitch_(rowPitch), slicePitch_(slicePitch) { // Sanity checks assert(hostPtr != NULL && "hostPtr cannot be null"); assert(size.c[0] > 0 && "invalid"); } WriteMemoryCommand(HostQueue& queue, cl_command_type cmdType, const EventWaitList& eventWaitList, Memory& memory, Coord3D origin, Coord3D size, const void* hostPtr, const BufferRect& bufRect, const BufferRect& hostRect) : OneMemoryArgCommand(queue, cmdType, eventWaitList, memory), origin_(origin), size_(size), hostPtr_(hostPtr), rowPitch_(0), slicePitch_(0), bufRect_(bufRect), hostRect_(hostRect) { // Sanity checks assert(hostPtr != NULL && "hostPtr cannot be null"); assert(size.c[0] > 0 && "invalid"); } virtual void submit(device::VirtualDevice& device) { device.submitWriteMemory(*this); } //! Return the host memory to read from const void* source() const { return hostPtr_; } //! Return the memory object to write to. Memory& destination() const { return *memory_; } //! Return the region origin const Coord3D& origin() const { return origin_; } //! Return the region size const Coord3D& size() const { return size_; } //! Return the row pitch size_t rowPitch() const { return rowPitch_; } //! Return the slice pitch size_t slicePitch() const { return slicePitch_; } //! Return the buffer rectangle information const BufferRect& bufRect() const { return bufRect_; } //! Return the host rectangle information const BufferRect& hostRect() const { return hostRect_; } //! Return true if the entire memory object is written. bool isEntireMemory() const; }; /*! \brief A generic fill memory command. * * \details Used for operations on both buffers and images. Backends * are expected to handle any required translations. Buffers * are treated as 1D structures so origin_[0] and size_[0] * are equivalent to offset_ and count_ respectively. */ class FillMemoryCommand : public OneMemoryArgCommand { public: const static size_t MaxFillPatterSize = sizeof(cl_double16); private: Coord3D origin_; //!< Origin of the region to write to. Coord3D size_; //!< Size of the region to write to. char pattern_[MaxFillPatterSize]; //!< The fill pattern size_t patternSize_; //!< Pattern size public: FillMemoryCommand(HostQueue& queue, cl_command_type cmdType, const EventWaitList& eventWaitList, Memory& memory, const void* pattern, size_t patternSize, Coord3D origin, Coord3D size) : OneMemoryArgCommand(queue, cmdType, eventWaitList, memory), origin_(origin), size_(size), patternSize_(patternSize) { // Sanity checks assert(pattern != NULL && "pattern cannot be null"); assert(size.c[0] > 0 && "invalid"); memcpy(pattern_, pattern, patternSize); } virtual void submit(device::VirtualDevice& device) { device.submitFillMemory(*this); } //! Return the pattern memory to fill with const void* pattern() const { return reinterpret_cast(pattern_); } //! Return the pattern size const size_t patternSize() const { return patternSize_; } //! Return the memory object to write to. Memory& memory() const { return *memory_; } //! Return the region origin const Coord3D& origin() const { return origin_; } //! Return the region size const Coord3D& size() const { return size_; } //! Return true if the entire memory object is written. bool isEntireMemory() const; }; /*! \brief A generic copy memory command * * \details Used for both buffers and images. Backends are expected * to handle any required translation. Buffers are treated * as 1D structures so origin_[0] and size_[0] are * equivalent to offset_ and count_ respectively. */ class CopyMemoryCommand : public TwoMemoryArgsCommand { private: Coord3D srcOrigin_; //!< Origin of the source region. Coord3D dstOrigin_; //!< Origin of the destination region. Coord3D size_; //!< Size of the region to copy. BufferRect srcRect_; //!< Source buffer rectangle information BufferRect dstRect_; //!< Destination buffer rectangle information public: CopyMemoryCommand(HostQueue& queue, cl_command_type cmdType, const EventWaitList& eventWaitList, Memory& srcMemory, Memory& dstMemory, Coord3D srcOrigin, Coord3D dstOrigin, Coord3D size) : TwoMemoryArgsCommand(queue, cmdType, eventWaitList, srcMemory, dstMemory), srcOrigin_(srcOrigin), dstOrigin_(dstOrigin), size_(size) { // Sanity checks assert(size.c[0] > 0 && "invalid"); } CopyMemoryCommand(HostQueue& queue, cl_command_type cmdType, const EventWaitList& eventWaitList, Memory& srcMemory, Memory& dstMemory, Coord3D srcOrigin, Coord3D dstOrigin, Coord3D size, const BufferRect& srcRect, const BufferRect& dstRect) : TwoMemoryArgsCommand(queue, cmdType, eventWaitList, srcMemory, dstMemory), srcOrigin_(srcOrigin), dstOrigin_(dstOrigin), size_(size), srcRect_(srcRect), dstRect_(dstRect) { // Sanity checks assert(size.c[0] > 0 && "invalid"); } virtual void submit(device::VirtualDevice& device) { device.submitCopyMemory(*this); } //! Return the host memory to read from Memory& source() const { return *memory1_; } //! Return the memory object to write to. Memory& destination() const { return *memory2_; } //! Return the source origin const Coord3D& srcOrigin() const { return srcOrigin_; } //! Return the offset in bytes in the destination. const Coord3D& dstOrigin() const { return dstOrigin_; } //! Return the number of bytes to copy. const Coord3D& size() const { return size_; } //! Return the source buffer rectangle information const BufferRect& srcRect() const { return srcRect_; } //! Return the destination buffer rectangle information const BufferRect& dstRect() const { return dstRect_; } //! Return true if the both memories are is read/written in their entirety. bool isEntireMemory() const; }; /*! \brief A generic map memory command. Makes a memory object accessible to the host. * * @todo:dgladdin Need to think more about how the pitch parameters operate in * the context of unified buffer/image commands. */ class MapMemoryCommand : public OneMemoryArgCommand { private: cl_map_flags mapFlags_; //!< Flags controlling the map. bool blocking_; //!< True for blocking maps Coord3D origin_; //!< Origin of the region to map. Coord3D size_; //!< Size of the region to map. const void* mapPtr_; //!< Host-space pointer that the object is currently mapped at public: //! Construct a new MapMemoryCommand MapMemoryCommand(HostQueue& queue, cl_command_type cmdType, const EventWaitList& eventWaitList, Memory& memory, cl_map_flags mapFlags, bool blocking, Coord3D origin, Coord3D size, size_t* imgRowPitch = nullptr, size_t* imgSlicePitch = nullptr, void* mapPtr = nullptr) : OneMemoryArgCommand(queue, cmdType, eventWaitList, memory), mapFlags_(mapFlags), blocking_(blocking), origin_(origin), size_(size), mapPtr_(mapPtr) { // Sanity checks assert(size.c[0] > 0 && "invalid"); } virtual void submit(device::VirtualDevice& device) { device.submitMapMemory(*this); } //! Read the memory object Memory& memory() const { return *memory_; } //! Read the map control flags cl_map_flags mapFlags() const { return mapFlags_; } //! Read the origin const Coord3D& origin() const { return origin_; } //! Read the size const Coord3D& size() const { return size_; } //! Read the blocking flag bool blocking() const { return blocking_; } //! Returns true if the entire memory object is mapped bool isEntireMemory() const; //! Read the map pointer const void* mapPtr() const { return mapPtr_; } }; /*! \brief A generic unmap memory command. * * @todo:dgladdin Need to think more about how the pitch parameters operate in * the context of unified buffer/image commands. */ class UnmapMemoryCommand : public OneMemoryArgCommand { private: //! Host-space pointer that the object is currently mapped at void* mapPtr_; public: //! Construct a new MapMemoryCommand UnmapMemoryCommand(HostQueue& queue, cl_command_type cmdType, const EventWaitList& eventWaitList, Memory& memory, void* mapPtr) : OneMemoryArgCommand(queue, cmdType, eventWaitList, memory), mapPtr_(mapPtr) {} virtual void submit(device::VirtualDevice& device) { device.submitUnmapMemory(*this); } virtual void releaseResources(); //! Read the memory object Memory& memory() const { return *memory_; } //! Read the map pointer void* mapPtr() const { return mapPtr_; } }; /*! \brief Migrate memory objects command. * * \details Used for operations on both buffers and images. Backends * are expected to handle any required translations. */ class MigrateMemObjectsCommand : public Command { private: cl_mem_migration_flags migrationFlags_; //!< Migration flags std::vector memObjects_; //!< The list of memory objects public: //! Construct a new AcquireExtObjectsCommand MigrateMemObjectsCommand(HostQueue& queue, cl_command_type type, const EventWaitList& eventWaitList, const std::vector& memObjects, cl_mem_migration_flags flags) : Command(queue, type, eventWaitList), migrationFlags_(flags) { for (const auto& it : memObjects) { it->retain(); memObjects_.push_back(it); } } virtual void submit(device::VirtualDevice& device) { device.submitMigrateMemObjects(*this); } //! Release all resources associated with this command void releaseResources() { for (const auto& it : memObjects_) { it->release(); } Command::releaseResources(); } //! Returns the migration flags cl_mem_migration_flags migrationFlags() const { return migrationFlags_; } //! Returns the number of memory objects in the command cl_uint numMemObjects() const { return (cl_uint)memObjects_.size(); } //! Returns a pointer to the memory objects const std::vector& memObjects() const { return memObjects_; } bool validateMemory(); }; //! To execute a kernel on a specific device. class NDRangeKernelCommand : public Command { private: Kernel& kernel_; NDRangeContainer sizes_; address parameters_; uint32_t sharedMemBytes_; public: //! Construct an ExecuteKernel command NDRangeKernelCommand(HostQueue& queue, const EventWaitList& eventWaitList, Kernel& kernel, const NDRangeContainer& sizes, uint32_t sharedMemBytes = 0); virtual void submit(device::VirtualDevice& device) { device.submitKernel(*this); } //! Release all resources associated with this command ( void releaseResources(); //! Return the kernel. const Kernel& kernel() const { return kernel_; } //! Return the parameters given to this kernel. const_address parameters() const { return parameters_; } //! Return the kernel NDRange. const NDRangeContainer& sizes() const { return sizes_; } //! Return the shared memory size uint32_t sharedMemBytes() const { return sharedMemBytes_; } //! Set the local work size. void setLocalWorkSize(const NDRange& local) { sizes_.local() = local; } cl_int captureAndValidate(); }; class NativeFnCommand : public Command { private: void(CL_CALLBACK* nativeFn_)(void*); char* args_; size_t argsSize_; std::vector memObjects_; std::vector memOffsets_; public: NativeFnCommand(HostQueue& queue, const EventWaitList& eventWaitList, void(CL_CALLBACK* nativeFn)(void*), const void* args, size_t argsSize, size_t numMemObjs, const cl_mem* memObjs, const void** memLocs); ~NativeFnCommand() { delete[] args_; } void releaseResources() { std::for_each(memObjects_.begin(), memObjects_.end(), std::mem_fun(&Memory::release)); Command::releaseResources(); } virtual void submit(device::VirtualDevice& device) { device.submitNativeFn(*this); } cl_int invoke(); }; class Marker : public Command { public: //! Create a new Marker Marker(HostQueue& queue, bool userVisible, const EventWaitList& eventWaitList = nullWaitList, const Event* waitingEvent = NULL) : Command(queue, userVisible ? CL_COMMAND_MARKER : 0, eventWaitList), waitingEvent_(waitingEvent) {} //! The actual command implementation. virtual void submit(device::VirtualDevice& device) { device.submitMarker(*this); } const Event* waitingEvent() const { return waitingEvent_; } private: const Event* waitingEvent_; //!< Waiting event associated with the marker }; /*! \brief Maps CL objects created from external ones and syncs the contents (blocking). * */ class ExtObjectsCommand : public Command { private: std::vector memObjects_; //!< The list of Memory based classes public: //! Construct a new AcquireExtObjectsCommand ExtObjectsCommand(HostQueue& queue, const EventWaitList& eventWaitList, cl_uint num_objects, const std::vector& memoryObjects, cl_command_type type) : Command(queue, type, eventWaitList) { for (const auto& it : memoryObjects) { it->retain(); memObjects_.push_back(it); } } //! Release all resources associated with this command void releaseResources() { for (const auto& it : memObjects_) { it->release(); } Command::releaseResources(); } //! Get number of GL objects cl_uint getNumObjects() { return (cl_uint)memObjects_.size(); } //! Get pointer to GL object list const std::vector& getMemList() const { return memObjects_; } bool validateMemory(); virtual bool processGLResource(device::Memory* mem) = 0; }; class AcquireExtObjectsCommand : public ExtObjectsCommand { public: //! Construct a new AcquireExtObjectsCommand AcquireExtObjectsCommand(HostQueue& queue, const EventWaitList& eventWaitList, cl_uint num_objects, const std::vector& memoryObjects, cl_command_type type) : ExtObjectsCommand(queue, eventWaitList, num_objects, memoryObjects, type) {} virtual void submit(device::VirtualDevice& device) { device.submitAcquireExtObjects(*this); } virtual bool processGLResource(device::Memory* mem); }; class ReleaseExtObjectsCommand : public ExtObjectsCommand { public: //! Construct a new ReleaseExtObjectsCommand ReleaseExtObjectsCommand(HostQueue& queue, const EventWaitList& eventWaitList, cl_uint num_objects, const std::vector& memoryObjects, cl_command_type type) : ExtObjectsCommand(queue, eventWaitList, num_objects, memoryObjects, type) {} virtual void submit(device::VirtualDevice& device) { device.submitReleaseExtObjects(*this); } virtual bool processGLResource(device::Memory* mem); }; class PerfCounterCommand : public Command { public: typedef std::vector PerfCounterList; enum State { Begin = 0, //!< Issue a begin command End = 1 //!< Issue an end command }; //! Construct a new PerfCounterCommand PerfCounterCommand(HostQueue& queue, const EventWaitList& eventWaitList, const PerfCounterList& counterList, State state) : Command(queue, 1, eventWaitList), counterList_(counterList), state_(state) { for (uint i = 0; i < counterList_.size(); ++i) { counterList_[i]->retain(); } } void releaseResources() { for (uint i = 0; i < counterList_.size(); ++i) { counterList_[i]->release(); } Command::releaseResources(); } //! Gets the number of PerfCounter objects size_t getNumCounters() const { return counterList_.size(); } //! Gets the list of all counters const PerfCounterList& getCounters() const { return counterList_; } //! Gets the performance counter state State getState() const { return state_; } //! Process the command on the device queue virtual void submit(device::VirtualDevice& device) { device.submitPerfCounter(*this); } private: PerfCounterList counterList_; //!< The list of performance counters State state_; //!< State of the issued command }; /*! \brief Thread Trace memory objects command. * * \details Used for bindig memory objects to therad trace mechanism. */ class ThreadTraceMemObjectsCommand : public Command { public: //! Construct a new ThreadTraceMemObjectsCommand ThreadTraceMemObjectsCommand(HostQueue& queue, const EventWaitList& eventWaitList, size_t numMemoryObjects, const cl_mem* memoryObjects, size_t sizeMemoryObject, ThreadTrace& threadTrace, cl_command_type type) : Command(queue, type, eventWaitList), sizeMemObjects_(sizeMemoryObject), threadTrace_(threadTrace) { memObjects_.resize(numMemoryObjects); for (size_t i = 0; i < numMemoryObjects; ++i) { Memory* obj = as_amd(memoryObjects[i]); obj->retain(); memObjects_[i] = obj; } threadTrace_.retain(); } //! Release all resources associated with this command void releaseResources() { threadTrace_.release(); for (const auto& itr : memObjects_) { itr->release(); } Command::releaseResources(); } //! Get number of CL memory objects cl_uint getNumObjects() { return (cl_uint)memObjects_.size(); } //! Get pointer to CL memory object list const std::vector& getMemList() const { return memObjects_; } //! Submit command to bind memory object to the Thread Trace mechanism virtual void submit(device::VirtualDevice& device) { device.submitThreadTraceMemObjects(*this); } //! Return the thread trace object. ThreadTrace& getThreadTrace() const { return threadTrace_; } //! Get memory object size const size_t getMemoryObjectSize() const { return sizeMemObjects_; } //! Validate memory bound to the thread thrace bool validateMemory(); private: std::vector memObjects_; //!< The list of memory objects,bound to the thread trace size_t sizeMemObjects_; //!< The size of each memory object from memObjects_ list (all memory //!objects have the smae size) ThreadTrace& threadTrace_; //!< The Thread Trace object }; /*! \brief Thread Trace command. * * \details Used for issue begin/end/pause/resume for therad trace object. */ class ThreadTraceCommand : public Command { private: void* threadTraceConfig_; public: enum State { Begin = 0, //!< Issue a begin command End = 1, //!< Issue an end command Pause = 2, //!< Issue a pause command Resume = 3 //!< Issue a resume command }; //! Construct a new ThreadTraceCommand ThreadTraceCommand(HostQueue& queue, const EventWaitList& eventWaitList, const void* threadTraceConfig, ThreadTrace& threadTrace, State state, cl_command_type type) : Command(queue, type, eventWaitList), threadTrace_(threadTrace), state_(state) { const unsigned int size = *static_cast(threadTraceConfig); threadTraceConfig_ = static_cast(new char[size]); if (threadTraceConfig_) { memcpy(threadTraceConfig_, threadTraceConfig, size); } threadTrace_.retain(); } //! Release all resources associated with this command void releaseResources() { threadTrace_.release(); Command::releaseResources(); } //! Get the thread trace object ThreadTrace& getThreadTrace() const { return threadTrace_; } //! Get the thread trace command state State getState() const { return state_; } //! Process the command on the device queue virtual void submit(device::VirtualDevice& device) { device.submitThreadTrace(*this); } // Accessor methods void* threadTraceConfig() const { return threadTraceConfig_; } private: ThreadTrace& threadTrace_; //!< The list of performance counters State state_; //!< State of the issued command }; class SignalCommand : public OneMemoryArgCommand { private: cl_uint markerValue_; cl_ulong markerOffset_; public: SignalCommand(HostQueue& queue, cl_command_type cmdType, const EventWaitList& eventWaitList, Memory& memory, cl_uint value, cl_ulong offset = 0) : OneMemoryArgCommand(queue, cmdType, eventWaitList, memory), markerValue_(value), markerOffset_(offset) {} virtual void submit(device::VirtualDevice& device) { device.submitSignal(*this); } const cl_uint markerValue() { return markerValue_; } Memory& memory() { return *memory_; } const cl_ulong markerOffset() { return markerOffset_; } }; class MakeBuffersResidentCommand : public Command { private: std::vector memObjects_; cl_bus_address_amd* busAddresses_; public: MakeBuffersResidentCommand(HostQueue& queue, cl_command_type type, const EventWaitList& eventWaitList, const std::vector& memObjects, cl_bus_address_amd* busAddr) : Command(queue, type, eventWaitList), busAddresses_(busAddr) { for (const auto& it : memObjects) { it->retain(); memObjects_.push_back(it); } } virtual void submit(device::VirtualDevice& device) { device.submitMakeBuffersResident(*this); } void releaseResources() { for (const auto& it : memObjects_) { it->release(); } Command::releaseResources(); } bool validateMemory(); const std::vector& memObjects() const { return memObjects_; } cl_bus_address_amd* busAddress() const { return busAddresses_; } }; //! A deallocation command used to free SVM or system pointers. class SvmFreeMemoryCommand : public Command { public: typedef void(CL_CALLBACK* freeCallBack)(cl_command_queue, cl_uint, void**, void*); private: std::vector svmPointers_; //!< List of pointers to deallocate freeCallBack pfnFreeFunc_; //!< User-defined deallocation callback void* userData_; //!< Data passed to user-defined callback public: SvmFreeMemoryCommand(HostQueue& queue, const EventWaitList& eventWaitList, cl_uint numSvmPointers, void** svmPointers, freeCallBack pfnFreeFunc, void* userData) : Command(queue, CL_COMMAND_SVM_FREE, eventWaitList), //! We copy svmPointers since it can be reused/deallocated after // command creation svmPointers_(svmPointers, svmPointers + numSvmPointers), pfnFreeFunc_(pfnFreeFunc), userData_(userData) {} virtual void submit(device::VirtualDevice& device) { device.submitSvmFreeMemory(*this); } std::vector& svmPointers() { return svmPointers_; } freeCallBack pfnFreeFunc() const { return pfnFreeFunc_; } void* userData() const { return userData_; } }; //! A copy command where the origin and destination memory locations are SVM // pointers. class SvmCopyMemoryCommand : public Command { private: void* dst_; //!< Destination pointer const void* src_; //!< Source pointer size_t srcSize_; //!< Size (in bytes) of the source buffer public: SvmCopyMemoryCommand(HostQueue& queue, const EventWaitList& eventWaitList, void* dst, const void* src, size_t srcSize) : Command(queue, CL_COMMAND_SVM_MEMCPY, eventWaitList), dst_(dst), src_(src), srcSize_(srcSize) {} virtual void submit(device::VirtualDevice& device) { device.submitSvmCopyMemory(*this); } void* dst() const { return dst_; } const void* src() const { return src_; } size_t srcSize() const { return srcSize_; } }; //! A fill command where the pattern and destination memory locations are SVM // pointers. class SvmFillMemoryCommand : public Command { private: void* dst_; //!< Destination pointer char pattern_[FillMemoryCommand::MaxFillPatterSize]; //!< The fill pattern size_t patternSize_; //!< Pattern size size_t times_; //!< Number of times to fill the // destination buffer with the source buffer public: SvmFillMemoryCommand(HostQueue& queue, const EventWaitList& eventWaitList, void* dst, const void* pattern, size_t patternSize, size_t size) : Command(queue, CL_COMMAND_SVM_MEMFILL, eventWaitList), dst_(dst), patternSize_(patternSize), times_(size / patternSize) { assert(amd::isMultipleOf(size, patternSize)); //! We copy the pattern buffer since it can be reused/deallocated after // command creation memcpy(pattern_, pattern, patternSize); } virtual void submit(device::VirtualDevice& device) { device.submitSvmFillMemory(*this); } void* dst() const { return dst_; } const char* pattern() const { return pattern_; } size_t patternSize() const { return patternSize_; } size_t times() const { return times_; } }; /*! \brief A map memory command where the pointer to be mapped is a SVM shared * buffer */ class SvmMapMemoryCommand : public Command { private: Memory* svmMem_; //!< the pointer to the amd::Memory object corresponding the svm pointer mapped Coord3D size_; //!< the map size Coord3D origin_; //!< the origin of the mapped svm pointer shift from the beginning of svm space //!allocated cl_map_flags flags_; //!< map flags void* svmPtr_; public: SvmMapMemoryCommand(HostQueue& queue, const EventWaitList& eventWaitList, Memory* svmMem, const size_t size, const size_t offset, cl_map_flags flags, void* svmPtr) : Command(queue, CL_COMMAND_SVM_MAP, eventWaitList), svmMem_(svmMem), size_(size), origin_(offset), flags_(flags), svmPtr_(svmPtr) {} virtual void submit(device::VirtualDevice& device) { device.submitSvmMapMemory(*this); } Memory* getSvmMem() const { return svmMem_; } Coord3D size() const { return size_; } cl_map_flags mapFlags() const { return flags_; } Coord3D origin() const { return origin_; } void* svmPtr() const { return svmPtr_; } bool isEntireMemory() const; }; /*! \brief An unmap memory command where the unmapped pointer is a SVM shared * buffer */ class SvmUnmapMemoryCommand : public Command { private: Memory* svmMem_; //!< the pointer to the amd::Memory object corresponding the svm pointer mapped void* svmPtr_; //!< SVM pointer public: SvmUnmapMemoryCommand(HostQueue& queue, const EventWaitList& eventWaitList, Memory* svmMem, void* svmPtr) : Command(queue, CL_COMMAND_SVM_UNMAP, eventWaitList), svmMem_(svmMem), svmPtr_(svmPtr) {} virtual void submit(device::VirtualDevice& device) { device.submitSvmUnmapMemory(*this); } Memory* getSvmMem() const { return svmMem_; } void* svmPtr() const { return svmPtr_; } }; /*! \brief A generic transfer memory from/to file command. * * \details Currently supports buffers only. Buffers * are treated as 1D structures so origin_[0] and size_[0] * are equivalent to offset_ and count_ respectively. */ class TransferBufferFileCommand : public OneMemoryArgCommand { public: static const uint NumStagingBuffers = 2; static const size_t StagingBufferSize = 4 * Mi; static const uint StagingBufferMemType = CL_MEM_USE_PERSISTENT_MEM_AMD; protected: const Coord3D origin_; //!< Origin of the region to write to const Coord3D size_; //!< Size of the region to write to LiquidFlashFile* file_; //!< The file object for data read size_t fileOffset_; //!< Offset in the file for data read amd::Memory* staging_[NumStagingBuffers]; //!< Staging buffers for transfer public: TransferBufferFileCommand(cl_command_type type, HostQueue& queue, const EventWaitList& eventWaitList, Memory& memory, const Coord3D& origin, const Coord3D& size, LiquidFlashFile* file, size_t fileOffset) : OneMemoryArgCommand(queue, type, eventWaitList, memory), origin_(origin), size_(size), file_(file), fileOffset_(fileOffset) { // Sanity checks assert(size.c[0] > 0 && "invalid"); for (uint i = 0; i < NumStagingBuffers; ++i) { staging_[i] = NULL; } } virtual void releaseResources(); virtual void submit(device::VirtualDevice& device); //! Return the memory object to write to Memory& memory() const { return *memory_; } //! Return the host memory to read from LiquidFlashFile* file() const { return file_; } //! Returns file offset size_t fileOffset() const { return fileOffset_; } //! Return the region origin const Coord3D& origin() const { return origin_; } //! Return the region size const Coord3D& size() const { return size_; } //! Return the staging buffer for transfer Memory& staging(uint i) const { return *staging_[i]; } bool validateMemory(); }; /*! \brief A P2P copy memory command * * \details Used for buffers only. Backends are expected * to handle any required translation. Buffers are treated * as 1D structures so origin_[0] and size_[0] are * equivalent to offset_ and count_ respectively. */ class CopyMemoryP2PCommand : public CopyMemoryCommand { public: CopyMemoryP2PCommand(HostQueue& queue, cl_command_type cmdType, const EventWaitList& eventWaitList, Memory& srcMemory, Memory& dstMemory, Coord3D srcOrigin, Coord3D dstOrigin, Coord3D size) : CopyMemoryCommand(queue, cmdType, eventWaitList, srcMemory, dstMemory, srcOrigin, dstOrigin, size) { } virtual void submit(device::VirtualDevice& device) { device.submitCopyMemoryP2P(*this); } bool validateMemory(); }; /*! @} * @} */ } // namespace amd #endif /*COMMAND_HPP_*/