/* Copyright (c) 2008-present Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef GPUKERNEL_HPP_ #define GPUKERNEL_HPP_ #include "device/device.hpp" #include "utils/macros.hpp" #include "platform/command.hpp" #include "platform/program.hpp" #include "platform/kernel.hpp" #include "platform/sampler.hpp" #include "device/gpu/gpudevice.hpp" #include "device/gpu/gpuvirtual.hpp" #include "amd_hsa_kernel_code.h" #include "device/gpu/gpuprintf.hpp" #include "device/devwavelimiter.hpp" #include "hsa.h" namespace amd { namespace hsa { namespace loader { class Symbol; } // loader } // hsa } // amd //! \namespace gpu GPU Device Implementation namespace gpu { class VirtualGPU; class Device; class NullDevice; class HSAILProgram; struct HWSHADER_Helper { template static T Get(S base, T offset) { return reinterpret_cast(reinterpret_cast(base) + reinterpret_cast(offset)); } }; #define HWSHADER_Get(shader, field) HWSHADER_Helper::Get((shader), (shader)->field) template static void CalcPtr(D& dst, const S src, size_t structSize, size_t size) { dst = reinterpret_cast(reinterpret_cast(src) + structSize * size); } /*! \addtogroup GPU GPU Device Implementation * @{ */ /*! \brief Helper function for the std::string processing. * Finds the name in the std::string * * \return True if we found the entry of the symbols */ bool expect(const std::string& str, //!< The original std::string size_t* pos, //!< Position to start const std::string& sym //!< The sympols to expect ); /*! \brief Helper function for the std::string processing. * Gets a word from the std::string * * \return True if we successfully received a word */ bool getword(const std::string& str, //!< The original std::string size_t* pos, //!< Position to start std::string& sym //!< Returned word ); /*! \brief Helper function for the std::string processing. * Loads numbers from the metadata * * \return True if we loaded a number */ bool getuint(const std::string& str, //!< The original std::string size_t* pos, //!< Position to start uint* val //!< Returned number ); /*! \brief Helper function for the std::string processing. * Loads numbers from the metadata in HEX format * * \return True if we loaded a number */ bool getuintHex(const std::string& str, //!< The original std::string size_t* pos, //!< Position to start uint* val //!< Returned number ); /*! \brief Helper function for the std::string processing. * Loads numbers from the metadata in HEX format * * \return True if we loaded a number */ bool getuint64Hex(const std::string& str, //!< The original std::string size_t* pos, //!< Position to start uint64_t* val //!< Returned number ); /*! \brief Helper function for the std::string processing. * Converts unsigned integer to string * * \return None */ void intToStr(size_t value, //!< Value for conversion char* str, //!< Pointer to the converted string size_t size //!< String size ); //! Image constant data from ABI specification struct ImageConstants : public amd::EmbeddedObject { uint32_t width_; //!< Image surface width uint32_t height_; //!< Image surface height uint32_t depth_; //!< Image surface depth (1 for 2D images) uint32_t dataType_; //!< Image surface data type float widthFloat_; //!< Image surface width float heightFloat_; //!< Image surface height float depthFloat_; //!< Image surface depth (1 for 2D images) uint32_t channelOrder_; //!< Image surface texels channel order }; //! Kernel arguments struct KernelArg : public amd::HeapObject { public: //! \enum Kernel argument type enum ArgumentType { NoType = 0, PointerGlobal, Value, Image, PointerLocal, PointerHwLocal, PointerPrivate, PointerHwPrivate, PointerConst, PointerHwConst, Float, Double, Half, Char, UChar, Short, UShort, Int, UInt, Long, ULong, Struct, Union, Opaque, Event, Image1D, //!< first image Image2D, Image1DB, Image1DA, Image2DA, Image3D, //!< last image Counter, Sampler, PrivateSize, LocalSize, HwPrivateSize, HwLocalSize, Grouping, WrkgrpSize, Wavefront, PrivateFixed, ErrorMessage, WarningMessage, PrintfFormatStr, MetadataVersion, UavId, ABI64Bit, GWS, SWGWS, Reflection, ConstArg, ConstBufId, PrintfBufId, GroupingHint, VecTypeHint, WavesPerSimdHint, TotalTypes }; // The compiler metadata fields std::string name_; //!< parameters name ArgumentType type_; //!< type of argument union { uint size_; //!< number of arguments (for values and pointers only) uint location_; //!< sampler's location (for samplers only) }; uint cbIdx_; //!< constant buffer index uint cbPos_; //!< dword address in CB for the argument std::string buf_; //!< buffer tag uint index_; //!< buffer/image/sampler index uint alignment_; //!< the required argument's alignment ArgumentType dataType_; //!< data type of the argument union { struct { uint uavBuf_ : 1; //!< UAV memory, no global heap uint realloc_ : 1; //!< argument has to be reallocatedin the global heap uint readOnly_ : 1; //!< Read only memory object uint writeOnly_ : 1; //!< Write only memory object uint readWrite_ : 1; //!< Read/Write memory object }; uint value_; } memory_; std::string typeName_; //!< argument's type name uint typeQualifier_; //!< argument's type qualifier //! Default constructor for the kernel argument KernelArg(); //! Copy constructor for the kernel argument KernelArg(const KernelArg& data); //! Overloads operator= KernelArg& operator=(const KernelArg& data); //! Destructor of the kernel argument ~KernelArg() { name_.clear(); } /*! \brief Checks if this arguments requires a place in constant buffer * * \return True if we need CB */ bool isCbNeeded() const; /*! \brief Retrieves the argument's size * * \return Size of the current argument */ size_t size(bool gpuLayer //!< True if we want the argument's size for the GPU layer ) const; /*! \brief Retrieves the argument's type for the abstraction layer * * \return The argument's type in the abstraction layer format */ clk_value_type_t type() const; /*! \brief Retrieves the argument's address qualifier for the abstraction layer * * \return The argument's address qualifier in the abstraction layer format */ cl_kernel_arg_address_qualifier addressQualifier() const; /*! \brief Retrieves the argument's access qualifier for the abstraction layer * * \return The argument's access qualifier in the abstraction layer format */ cl_kernel_arg_access_qualifier accessQualifier() const; /*! \brief Retrieves the argument's type name for the abstraction layer * * \return The argument's type name */ const char* typeName() const { return typeName_.c_str(); } /*! \brief Retrieves the argument's type qualifier for the abstraction layer * * \return The argument's type qualifier */ cl_kernel_arg_type_qualifier typeQualifier() const { switch (type_) { case PointerConst: case PointerHwConst: return static_cast(typeQualifier_ | CL_KERNEL_ARG_TYPE_CONST); default: return static_cast(typeQualifier_); } } //! Special case for vectors with component size <= 16bit const static uint VectorSizeLimit = 4; size_t specialVector() const; }; struct DataTypeConst { const char* tagName_; //!< data type's name KernelArg::ArgumentType type_; //!< data type }; //! Metadata description for parsing struct MetaDataConst { const char* typeName_; //!< parameters name KernelArg::ArgumentType type_; //!< type of argument struct { uint size_ : 1; //!< number of arguments uint name_ : 1; //!< argument's name uint resType_ : 1; //!< argument's type uint cbIdx_ : 1; //!< resource index CB, sampler or image uint cbPos_ : 1; //!< dword address in CB for the argument uint buf_ : 1; //!< buffer tag uint reserved : 26; //!< reserved }; }; const uint DescTotal = 15; const uint BasicTypeTotal = 15; const uint ArgStateTotal = DescTotal + BasicTypeTotal; //! The constant array that describes different metadata properties extern const MetaDataConst ArgState[ArgStateTotal]; extern const DataTypeConst DataType[]; extern const uint DataTypeTotal; // Forward declaration class Program; class NullProgram; class CalImageReference : public amd::ReferenceCountedObject { public: //! Default constructor CalImageReference(CALimage calImage) : image_(calImage) {} //! Get CAL image CALimage calImage() const { return image_; } protected: //! Default destructor ~CalImageReference(); private: //! Disable copy constructor CalImageReference(const CalImageReference&); //! Disable operator= CalImageReference& operator=(const CalImageReference&); CALimage image_; //!< CAL kernel image }; //! \class GPU NullKernel - Kernel for offline device class NullKernel : public device::Kernel { public: typedef std::vector arguments_t; const static uint UavIdUndefined = 0xffff; enum Flags { LimitWorkgroup = 1 << 0, //!< Limits the workgroup size PrintfOutput = 1 << 1, //!< Kernel has printf output PrivateFixed = 1 << 2, //!< Kernel has printf output ABI64bit = 1 << 3, //!< Kernel has 64 bit ABI Unused0 = 1 << 4, //!< Unused Unused1 = 1 << 5, //!< Unused ImageEnable = 1 << 6, //!< Kernel uses images ImageWrite = 1 << 7, //!< Kernel writes images }; //! \enum Resource type for binding enum ResourceType { Undefined = 0x00000000, //!< resource type will be detected ConstantBuffer = 0x00000001, //!< resource is a constant buffer GlobalBuffer = 0x00000002, //!< resource is a global buffer ArgumentHeapBuffer = 0x00000004, //!< resource is an argument buffer ArgumentBuffer = 0x00000005, //!< resource is an argument buffer ArgumentImageRead = 0x00000006, //!< resource is an argument image read ArgumentImageWrite = 0x00000007, //!< resource is an argument image write ArgumentConstBuffer = 0x00000008, //!< resource is an argument const buffer ArgumentCounter = 0x00000009, //!< resource is a global counter ArgumentUavID = 0x0000000a, //!< resource is a dummy ID read ArgumentCbID = 0x0000000b, //!< resource is a constant buffer ArgumentPrintfID = 0x0000000c, //!< resource is a printf buffer }; //! GPU kernel constructor NullKernel(const std::string& name, //!< The kernel's name const NullDevice& gpuNullDev, //!< GPU device object const NullProgram& nullProg //!< Reference to the program ); virtual ~NullKernel(); /*! \brief Creates a GPU kernel in CAL * * \return True if we successfully created a kernel in CAL */ bool create(const std::string& code, //!< IL source code const std::string& metadata, //!< the kernel metadata structure const void* binaryCode = NULL, //!< binary machine code for CAL size_t binarySize = 0 //!< the machine code size ); //! Returns CAL function descriptor CALimage calImage() const { return calRef_->calImage(); } //! Returns TRUE if we successfully retrieved the binary from CAL bool getCalBinary(void* binary, //!< ISA binary code size_t size //!< ISA binary size ) const; //! Returns CAL image size size_t getCalBinarySize() const; //! Returns GPU device object, associated with this kernel const NullDevice& nullDev() const { return gpuDev_; } //! Returns GPU device object, associated with this kernel const NullProgram& nullProg() const { return reinterpret_cast(prog_); } //! Returns the kernel's build error const int32_t buildError() const { return buildError_; } //! Returns the kernel's flags uint flags() const { return flags_; } //! Returns TRUE if ABI is for 64 bits bool abi64Bit() const { return (flags_ & ABI64bit) ? true : false; } //! Returns the total number of all arguments size_t argSize() const { return arguments_.size(); } //! Returns instruction count of the current kernel uint instructionCnt() const { return instructionCnt_; } protected: /*! \brief Parses the metadata structure for the kernel, * provided by the OpenCL compiler * * \return True if we succefully parsed all arguments */ bool parseArguments(const std::string& metaData, //!< the program for parsing uint* uavRefCount //!< an array of reference counters for used UAVs ); //! Returns the argument for the specified index const KernelArg* argument(uint idx) const { return arguments_[idx]; } //! Adds the kernel argument into the list void addArgument(KernelArg* arg) { arguments_.push_back(arg); } //! Returns the argument for the specified sampler's index const KernelArg* sampler(uint idx) const { return intSamplers_[idx]; } //! Returns the total number of all internal samplers size_t samplerSize() const { return intSamplers_.size(); } //! Adds the kernel sampler into the sampler's list void addSampler(KernelArg* arg) { intSamplers_.push_back(arg); } //! Returns UAV raw index for this kernel uint uavRaw() const { return uavRaw_; } int32_t buildError_; //!< Kernel's build error std::string ilSource_; //!< IL source code of this kernel const NullDevice& gpuDev_; //!< GPU device object CalImageReference* calRef_; //!< CAL image reference for this kernel bool internal_; //!< Runtime internal ker uint flags_; //!< kernel object flags arguments_t arguments_; //!< kernel arguments for the execution arguments_t intSamplers_; //!< predefined intenal kernel samplers size_t* cbSizes_; //!< real constant buffer sizes for this kernel uint numCb_; //!< total number of constant buffers uint uavRaw_; //!< UAV used for RAW access bool rwAttributes_; //!< backend provides RW attributes for arguments uint instructionCnt_; //!< Instruction count uint cbId_; //!< UAV used for constant buffer access uint printfId_; //!< UAV used for printf buffer access private: //! Disable copy constructor NullKernel(const NullKernel&); //! Disable operator= NullKernel& operator=(const NullKernel&); //! Creates a filename for ISA/IL dumps std::string mkDumpName(const char* extension //!< File extension to append ) const; bool createMultiBinary(uint* imageSize, //!< Multibinary image size void** image, //!< Multibinary image const void* isa //!< Kernel HW info ); //! SI HW specific setup for kernels bool siCreateHwInfo(const void* shader, //!< HW info shader AMUabiAddEncoding& encoding //!< ABI encoding structure ); //! r800 HW specific setup for kernels bool r800CreateHwInfo(const void* shader, //!< HW info shader AMUabiAddEncoding& encoding //!< ABI encoding structure ); }; //! \class GPU kernel class Kernel : public NullKernel { public: struct InitData { uint privateSize_; //!< Private ring initial size uint localSize_; //!< Local ring initial size uint hwPrivateSize_; //!< HW private ring initial size uint hwLocalSize_; //!< HW local ring initial size uint flags_; //!< Kernel initialization flags }; //! GPU kernel constructor Kernel(const std::string& name, //!< The kernel's name const Device& gpuDev, //!< GPU device object const Program& prog, //!< Reference to the program const InitData* initData_ //!< Initialization data ); //! GPU kernel destructor virtual ~Kernel(); /*! \brief Creates a GPU kernel in CAL * * \return True if we successfully created a kernel in CAL */ bool create(const std::string& code, //!< IL source code const std::string& metadata, //!< the kernel metadata structure const void* binaryCode = NULL, //!< binary machine code for CAL size_t binarySize = 0 //!< the machine code size ); //! Initializes the CAL program grid for the kernel execution void setupProgramGrid(VirtualGPU& gpu, //!< virtual GPU device object size_t workDim, //!< work dimension const amd::NDRange& glbWorkOffset, //!< global work offset const amd::NDRange& gblWorkSize, //!< global work size amd::NDRange& lclWorkSize, //!< local work size const amd::NDRange& groupOffset, //!< group offsets const amd::NDRange& glbWorkOffsetOrg, const amd::NDRange& glbWorkSizeOrg //!< original global work size ) const; /*! \brief Detects if runtime has to disable cache optimization and * recompiles the kernel * * \return True if aliases were detected in the kernel arguments */ void processMemObjects(VirtualGPU& gpu, //!< Virtual GPU objects - queue const amd::Kernel& kernel, //!< AMD kernel object for execution const_address params, //!< pointer to the param's store bool nativeMem //!< Native memory objects ) const; /*! \brief Loads all kernel arguments, so we could run the kernel in HW. * This includes CB update and resource binding * * \return True if we succefully loaded the arguments */ bool loadParameters(VirtualGPU& gpu, //!< virtual GPU device object const amd::Kernel& kernel, //!< AMD kernel object for execution const_address params, //!< pointer to the param's store bool nativeMem //!< Native memory objects ) const; //! Binds the constant buffers associated with the kernel bool bindConstantBuffers(VirtualGPU& gpu) const; /*! \brief Runs the kernel on HW * * \return True if we succefully executed the kernel */ bool run(VirtualGPU& gpu, //!< virtual GPU device object GpuEvent* gpuEvent, //!< Pointer to the GPU event bool lastRun, //!< Last run in the split execution bool lastDoppCmd, //!< for last dopp submission kernel dispatch bool pfpaDoppCmd //!< for PFPA dopp submission kernel dispatch ) const; //! Help function to debug the kernel output void debug(VirtualGPU& gpu //!< virtual GPU device object ) const; //! Programs internal samplers defined inside the kernel bool setInternalSamplers(VirtualGPU& gpu //!< Virtual GPU device object ) const; //! Returns TRUE if we successfully retrieved the binary from CAL bool getCalBinary(void* binary, //!< ISA binary code size_t size //!< ISA binary size ) const; //! Returns CAL image size size_t getCalBinarySize() const; //! Returns GPU device object, associated with this kernel const Device& dev() const; //! Returns GPU device object, associated with this kernel const Program& prog() const; //! Binds global HW constant buffers bool bindGlobalHwCb(VirtualGPU& gpu, //!< Virtual GPU device object VirtualGPU::GslKernelDesc* desc //!< Kernel descriptor ) const; protected: //! Initializes the kernel parameters for the abstraction layer bool initParameters(); /*! \brief Creates constant buffer resources, associated with the kernel * * \return TRUE if we succefully created constant buffers */ bool initConstBuffers(); private: //! Disable copy constructor Kernel(const Kernel&); //! Disable operator= Kernel& operator=(const Kernel&); //! \enum Fixed Metadata offsets enum MetadataOffsets { GlobalWorkitemOffset = 0, LocalWorkitemOffset = 1, GroupsOffset = 2, PrivateRingOffset = 3, LocalRingOffset = 4, MathLibOffset = 5, GlobalWorkOffsetOffset = 6, GroupWorkOffsetOffset = 7, GlobalDataStoreOffset = 8, DebugOffset = 8, NDRangeGlobalWorkOffsetOffset = 9, // The total number of constants reserved for ABI TotalABIVectors }; /*! \brief Sets the kernel argument * * \return True if we succefully updated the arguments */ bool setArgument(VirtualGPU& gpu, //!< Virtual GPU device object const amd::Kernel& kernel, //!< AMD kernel object uint idx, //!< the argument index const_address params,//!< the arguments data const amd::KernelParameterDescriptor& desc, //!< Argument's descriptor bool nativeMem //!< Native memory objects ) const; /*! \brief Initializes local and private buffer ranges * * \return True if we succefully initialized the ranges */ bool initLocalPrivateRanges(VirtualGPU& gpu //!< Virtual GPU device object ) const; //! Sets local and private buffer ranges void setLocalPrivateRanges(VirtualGPU& gpu //!< Virtual GPU device object ) const; //! Sets the sampler's parameters for the image look-up void setSampler(VirtualGPU& gpu, //!< virtual GPU device object uint32_t state, //!< sampler state uint physUnit //!< sampler's number ) const; /*! \brief Binds resource * * \return True if we succefully created constant buffers */ bool bindResource(VirtualGPU& gpu, //!< virtual GPU device object const Memory& memory, //!< memory for binding uint paramIdx, //!< index of the parameter ResourceType type, //!< resource type uint physUnit, //!< PhysUnit size_t offset = 0) const; //! Unbinds all resources for the kernel void unbindResources(VirtualGPU& gpu, //!< virtual GPU device object GpuEvent gpuEvent, //!< GPU event that will be associated with the resources bool lastRun //!< last run in the split execution ) const; //! Copies image constants to the constant buffer void copyImageConstants(const amd::Image* amdImage, //!< Abstraction layer image object ImageConstants* imageData //!< Pointer in CB to the image constants ) const; //! Finds local workgroup size void findLocalWorkSize(size_t workDim, //!< Work dimension const amd::NDRange& gblWorkSize, //!< Global work size amd::NDRange& lclWorkSize //!< Local work size ) const; uint hwPrivateSize_; //!< initial HW private size uint hwLocalSize_; //!< initial HW local size }; enum HSAIL_ADDRESS_QUALIFIER { HSAIL_ADDRESS_ERROR = 0, HSAIL_ADDRESS_GLOBAL, HSAIL_ADDRESS_LOCAL, HSAIL_MAX_ADDRESS_QUALIFIERS }; enum HSAIL_ARG_TYPE { HSAIL_ARGTYPE_ERROR = 0, HSAIL_ARGTYPE_POINTER, HSAIL_ARGTYPE_VALUE, HSAIL_ARGTYPE_IMAGE, HSAIL_ARGTYPE_SAMPLER, HSAIL_ARGTYPE_QUEUE, HSAIL_ARGMAX_ARG_TYPES }; enum HSAIL_DATA_TYPE { HSAIL_DATATYPE_ERROR = 0, HSAIL_DATATYPE_B1, HSAIL_DATATYPE_B8, HSAIL_DATATYPE_B16, HSAIL_DATATYPE_B32, HSAIL_DATATYPE_B64, HSAIL_DATATYPE_S8, HSAIL_DATATYPE_S16, HSAIL_DATATYPE_S32, HSAIL_DATATYPE_S64, HSAIL_DATATYPE_U8, HSAIL_DATATYPE_U16, HSAIL_DATATYPE_U32, HSAIL_DATATYPE_U64, HSAIL_DATATYPE_F16, HSAIL_DATATYPE_F32, HSAIL_DATATYPE_F64, HSAIL_DATATYPE_STRUCT, HSAIL_DATATYPE_OPAQUE, HSAIL_DATATYPE_MAX_TYPES }; enum HSAIL_ACCESS_TYPE { HSAIL_ACCESS_TYPE_NONE = 0, HSAIL_ACCESS_TYPE_RO, HSAIL_ACCESS_TYPE_WO, HSAIL_ACCESS_TYPE_RW }; class HSAILKernel : public device::Kernel { public: struct Argument { std::string name_; //!< Argument's name std::string typeName_; //!< Argument's type name uint size_; //!< Size in bytes uint offset_; //!< Argument's offset uint alignment_; //!< Argument's alignment HSAIL_ARG_TYPE type_; //!< Type of the argument HSAIL_ADDRESS_QUALIFIER addrQual_; //!< Address qualifier of the argument HSAIL_DATA_TYPE dataType_; //!< The type of data uint numElem_; //!< Number of elements HSAIL_ACCESS_TYPE access_; //!< Access type for the argument }; // Max number of possible extra (hidden) kernel arguments static const uint MaxExtraArgumentsNum = 6; HSAILKernel(std::string name, HSAILProgram* prog, std::string compileOptions, uint extraArgsNum); virtual ~HSAILKernel(); //! Initializes the metadata required for this kernel, //! finalizes the kernel if needed bool init(amd::hsa::loader::Symbol* sym, bool finalize = false); //! Returns a pointer to the hsail argument const Argument* argument(size_t i) const { return arguments_[i]; } //! Returns the number of hsail arguments size_t numArguments() const { return arguments_.size(); } //! Returns GPU device object, associated with this kernel const Device& dev() const; //! Returns HSA program associated with this kernel const HSAILProgram& prog() const; //! Returns LDS size used in this kernel uint32_t ldsSize() const { return cpuAqlCode_->workgroup_group_segment_byte_size; } //! Returns pointer on CPU to AQL code info const void* cpuAqlCode() const { return cpuAqlCode_; } //! Returns memory object with AQL code gpu::Memory* gpuAqlCode() const { return code_; } //! Returns size of AQL code size_t aqlCodeSize() const { return codeSize_; } //! Returns the size of argument buffer size_t argsBufferSize() const { return cpuAqlCode_->kernarg_segment_byte_size; } //! Returns spill reg size per workitem int spillSegSize() const { return cpuAqlCode_->workitem_private_segment_byte_size; } //! Returns AQL packet in CPU memory //! if the kerenl arguments were successfully loaded, otherwise NULL hsa_kernel_dispatch_packet_t* loadArguments( VirtualGPU& gpu, //!< Running GPU context const amd::Kernel& kernel, //!< AMD kernel object const amd::NDRangeContainer& sizes, //!< NDrange container const_address parameters, //!< Application arguments for the kernel bool nativeMem, //!< Native memory objectes are passed uint64_t vmDefQueue, //!< GPU VM default queue pointer uint64_t* vmParentWrap, //!< GPU VM parent aql wrap object std::vector& memList //!< Memory list for GSL/VidMM handles ) const; //! Returns the kernel index in the program uint index() const { return index_; } //! Returns kernel's extra argument count uint extraArgumentsNum() const { return extraArgumentsNum_; } private: //! Disable copy constructor HSAILKernel(const HSAILKernel&); //! Disable operator= HSAILKernel& operator=(const HSAILKernel&); //! Creates AQL kernel HW info bool aqlCreateHWInfo(amd::hsa::loader::Symbol* sym); //! Initializes arguments_ and the abstraction layer kernel parameters void initArgList(const aclArgData* aclArg //!< List of ACL arguments ); //! Initializes Hsail Argument metadata and info void initHsailArgs(const aclArgData* aclArg //!< List of ACL arguments ); std::vector arguments_; //!< Vector list of HSAIL Arguments std::string compileOptions_; //!< compile used for finalizing this kernel amd_kernel_code_t* cpuAqlCode_; //!< AQL kernel code on CPU uint index_; //!< Kernel index in the program gpu::Memory* code_; //!< Memory object with ISA code size_t codeSize_; //!< Size of ISA code char* hwMetaData_; //!< SI metadata uint extraArgumentsNum_; //! Number of extra (hidden) kernel arguments }; /*@}*/} // namespace gpu #endif /*GPUKERNEL_HPP_*/