// // Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved. // #ifndef KERNEL_HPP_ #define KERNEL_HPP_ #include "top.hpp" #include "platform/object.hpp" #include "amdocl/cl_kernel.h" #include #include // for malloc #include #include "device/device.hpp" enum FGSStatus { FGS_DEFAULT, //!< The default kernel fine-grained system pointer support FGS_NO, //!< no support of kernel fine-grained system pointer FGS_YES //!< have support of kernel fine-grained system pointer }; namespace amd { class Symbol; class Program; /*! \addtogroup Runtime * @{ * * \addtogroup Program Programs and Kernel functions * @{ */ class KernelSignature : public HeapObject { private: std::vector params_; std::string attributes_; //!< The kernel attributes uint32_t numParameters_; //!< Number of OCL arguments in the kernel uint32_t paramsSize_; //!< The size of all arguments uint32_t numMemories_; //!< The number of memory objects used in the kernel uint32_t numSamplers_; //!< The number of sampler objects used in the kernel uint32_t numQueues_; //!< The number of queue objects used in the kernel uint32_t version_; //!< The ABI version public: enum { ABIVersion_0 = 0, //! ABI constructed based on the OCL semantics ABIVersion_1 = 1, //! ABI constructed based on the HW ABI returned from HSAIL ABIVersion_2 = 2 //! ABI constructed based on the HW ABI returned from LC }; //! Default constructor KernelSignature(): numParameters_(0), paramsSize_(0), numMemories_(0), numSamplers_(0), numQueues_(0), version_(ABIVersion_0) {} //! Construct a new signature. KernelSignature(const std::vector& params, const std::string& attrib, uint32_t numParameters, uint32_t version); //! Return the number of parameters uint32_t numParameters() const { return numParameters_; } //! Return the total number of parameters, including hidden uint32_t numParametersAll() const { return params_.size(); } //! Return the parameter descriptor at the given index. const KernelParameterDescriptor& at(size_t index) const { assert(index < params_.size() && "index is out of bounds"); return params_[index]; } std::vector& params() { return params_; } //! Return the size in bytes required for the arguments on the stack. uint32_t paramsSize() const { return paramsSize_; } //! Returns the number of memory objects. uint32_t numMemories() const { return numMemories_; } //! Returns the number of sampler objects. uint32_t numSamplers() const { return numSamplers_; } //! Returns the number of queue objects. uint32_t numQueues() const { return numQueues_; } //! Returns the signature version uint32_t version() const { return version_; } //! Return the kernel attributes const std::string& attributes() const { return attributes_; } const std::vector& parameters() const { return params_; } }; // @todo: look into a copy-on-write model instead of copy-on-read. // class KernelParameters : protected HeapObject { private: //! The signature describing these parameters. KernelSignature& signature_; address values_; //!< pointer to the base of the values stack. uint32_t execInfoOffset_; //!< The offset of execInfo std::vector execSvmPtr_; //!< The non argument svm pointers for kernel FGSStatus svmSystemPointersSupport_; //!< The flag for the status of the kernel // support of fine-grain system sharing. uint32_t memoryObjOffset_; //!< The number of memory objects uint32_t samplerObjOffset_; //!< The number of sampler objects uint32_t queueObjOffset_; //!< The number of queue objects amd::Memory** memoryObjects_; //!< Memory objects, associated with the kernel amd::Sampler** samplerObjects_; //!< Sampler objects, associated with the kernel amd::DeviceQueue** queueObjects_; //!< Queue objects, associated with the kernel uint32_t totalSize_; //!< The total size of all captured parameters struct { uint32_t validated_ : 1; //!< True if all parameters are defined. uint32_t execNewVcop_ : 1; //!< special new VCOP for kernel execution uint32_t execPfpaVcop_ : 1; //!< special PFPA VCOP for kernel execution uint32_t unused : 29; //!< unused }; public: //! Construct a new instance of parameters for the given signature. KernelParameters(KernelSignature& signature) : signature_(signature), execInfoOffset_(0), svmSystemPointersSupport_(FGS_DEFAULT), memoryObjects_(nullptr), samplerObjects_(nullptr), queueObjects_(nullptr), validated_(0), execNewVcop_(0), execPfpaVcop_(0) { totalSize_ = signature.paramsSize() + (signature.numMemories() + signature.numSamplers() + signature.numQueues()) * sizeof(void*); values_ = reinterpret_cast
(this) + alignUp(sizeof(KernelParameters), 16); memoryObjOffset_ = signature_.paramsSize(); memoryObjects_ = reinterpret_cast(values_ + memoryObjOffset_); samplerObjOffset_ = memoryObjOffset_ + signature_.numMemories() * sizeof(amd::Memory*); samplerObjects_ = reinterpret_cast(values_ + samplerObjOffset_); queueObjOffset_ = samplerObjOffset_ + signature_.numSamplers() * sizeof(amd::Sampler*); queueObjects_ = reinterpret_cast(values_ + queueObjOffset_); address limit = reinterpret_cast
(&queueObjects_[signature_.numQueues()]); ::memset(values_, '\0', limit - values_); } explicit KernelParameters(const KernelParameters& rhs) : signature_(rhs.signature_), execInfoOffset_(rhs.execInfoOffset_), execSvmPtr_(rhs.execSvmPtr_), svmSystemPointersSupport_(rhs.svmSystemPointersSupport_), memoryObjects_(nullptr), samplerObjects_(nullptr), queueObjects_(nullptr), totalSize_(rhs.totalSize_), validated_(rhs.validated_), execNewVcop_(rhs.execNewVcop_), execPfpaVcop_(rhs.execPfpaVcop_) { values_ = reinterpret_cast
(this) + alignUp(sizeof(KernelParameters), 16); memoryObjOffset_ = signature_.paramsSize(); memoryObjects_ = reinterpret_cast(values_ + memoryObjOffset_); samplerObjOffset_ = memoryObjOffset_ + signature_.numMemories() * sizeof(amd::Memory*); samplerObjects_ = reinterpret_cast(values_ + samplerObjOffset_); queueObjOffset_ = samplerObjOffset_ + signature_.numSamplers() * sizeof(amd::Sampler*); queueObjects_ = reinterpret_cast(values_ + queueObjOffset_); address limit = reinterpret_cast
(&queueObjects_[signature_.numQueues()]); ::memcpy(values_, rhs.values_, limit - values_); } //! Reset the parameter at the given \a index (becomes undefined). void reset(size_t index) { signature_.params()[index].info_.defined_ = false; validated_ = 0; } //! Set the parameter at the given \a index to the value pointed by \a value // \a svmBound indicates that \a value is a SVM pointer. void set(size_t index, size_t size, const void* value, bool svmBound = false); //! Return true if the parameter at the given \a index is defined. bool test(size_t index) const { return signature_.at(index).info_.defined_; } //! Return true if all the parameters have been defined. bool check(); //! The amount of memory required for local memory needed size_t localMemSize(size_t minDataTypeAlignment) const; //! Capture the state of the parameters and return the stack base pointer. address capture(const Device& device, cl_ulong lclMemSize, cl_int* error); //! Release the captured state of the parameters. void release(address parameters, const amd::Device& device) const; //! Allocate memory for this instance as well as the required storage for // the values_, defined_, and rawPointer_ arrays. void* operator new(size_t size, const KernelSignature& signature) { size_t requiredSize = alignUp(size, 16) + signature.paramsSize() + (signature.numMemories() + signature.numSamplers() + signature.numQueues()) * sizeof(void*); return AlignedMemory::allocate(requiredSize, PARAMETERS_MIN_ALIGNMENT); } //! Deallocate the memory reserved for this instance. void operator delete(void* ptr) { AlignedMemory::deallocate(ptr); } //! Deallocate the memory reserved for this instance, // matching overloaded operator new. void operator delete(void* ptr, const KernelSignature& signature) { AlignedMemory::deallocate(ptr); } //! Returns raw kernel parameters without capture address values() const { return values_; } //! Return true if the captured parameter at the given \a index is bound to // SVM pointer. bool boundToSvmPointer(const Device& device, const_address capturedAddress, size_t index) const; //! add the svmPtr execInfo into container void addSvmPtr(void* const* execInfoArray, size_t count) { execSvmPtr_.clear(); for (size_t i = 0; i < count; i++) { execSvmPtr_.push_back(execInfoArray[i]); } } //! get the number of svmPtr in the execInfo container size_t getNumberOfSvmPtr() const { return execSvmPtr_.size(); } //! get the offset of svmPtr in the parameters uint32_t getExecInfoOffset() const { return execInfoOffset_; } //! get the offset of memory objects in the parameters uint32_t memoryObjOffset() const { return memoryObjOffset_; } //! get the offset of sampler objects in the parameters uint32_t samplerObjOffset() const { return samplerObjOffset_; } //! get the offset of memory objects in the parameters uint32_t queueObjOffset() const { return queueObjOffset_; } //! set the status of kernel support fine-grained SVM system pointer sharing void setSvmSystemPointersSupport(FGSStatus svmSystemSupport) { svmSystemPointersSupport_ = svmSystemSupport; } //! return the status of kernel support fine-grained SVM system pointer sharing FGSStatus getSvmSystemPointersSupport() const { return svmSystemPointersSupport_; } //! set the new VCOP in the execInfo container void setExecNewVcop(const bool newVcop) { execNewVcop_ = (newVcop == true); } //! set the PFPA VCOP in the execInfo container void setExecPfpaVcop(const bool pfpaVcop) { execPfpaVcop_ = (pfpaVcop == true); } //! get the new VCOP in the execInfo container bool getExecNewVcop() const { return (execNewVcop_ == 1); } //! get the PFPA VCOP in the execInfo container bool getExecPfpaVcop() const { return (execPfpaVcop_ == 1); } }; /*! \brief Encapsulates a __kernel function and the argument values * to be used when invoking this function. */ class Kernel : public RuntimeObject { private: //! The program where this kernel is defined. SharedReference program_; const Symbol& symbol_; //!< The symbol for this kernel. std::string name_; //!< The kernel's name. KernelParameters* parameters_; //!< The parameters. protected: //! Destroy this kernel ~Kernel(); public: /*! \brief Construct a kernel object from the __kernel function * \a kernelName in the given \a program. */ Kernel(Program& program, const Symbol& symbol, const std::string& name); //! Construct a new kernel object from an existing one. Used by CloneKernel. explicit Kernel(const Kernel& rhs); //! Return the program containing this kernel. Program& program() const { return program_(); } //! Return this kernel's signature. const KernelSignature& signature() const; //! Return the kernel entry point for the given device. const device::Kernel* getDeviceKernel(const Device& device //!< Device object ) const; //! Return the parameters. KernelParameters& parameters() const { return *parameters_; } //! Return the kernel's name. const std::string& name() const { return name_; } virtual ObjectType objectType() const { return ObjectTypeKernel; } }; /*! @} * @} */ } // namespace amd #endif /*KERNEL_HPP_*/