Files
rocm-systems/rocclr/runtime/device/gpu/gpukernel.hpp
T
foreman 6cc75de90f P4 to Git Change 1599699 by gandryey@gera-w8 on 2018/08/29 18:43:02
SWDEV-79445 - OCL generic changes and code clean-up
	- Move WaveLimiter logic to the abstract layer. PAL version was taken as the base, thus performance of GSL path can be affected by this change

Affected files ...

... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#315 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devkernel.cpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devkernel.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devwavelimiter.cpp#1 move/add
... //depot/stg/opencl/drivers/opencl/runtime/device/devwavelimiter.hpp#1 move/add
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#598 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.cpp#331 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.hpp#133 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuwavelimiter.cpp#15 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuwavelimiter.hpp#11 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#107 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#64 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.hpp#23 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palwavelimiter.cpp#8 move/delete
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palwavelimiter.hpp#8 move/delete
2018-08-29 18:54:19 -04:00

864 строки
29 KiB
C++

//
// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
//
#ifndef GPUKERNEL_HPP_
#define GPUKERNEL_HPP_
#include "device/device.hpp"
#include "utils/macros.hpp"
#include "platform/command.hpp"
#include "platform/program.hpp"
#include "platform/kernel.hpp"
#include "platform/sampler.hpp"
#include "device/gpu/gpudevice.hpp"
#include "device/gpu/gpuvirtual.hpp"
#include "amd_hsa_kernel_code.h"
#include "device/gpu/gpuprintf.hpp"
#include "device/devwavelimiter.hpp"
#include "hsa.h"
namespace amd {
namespace hsa {
namespace loader {
class Symbol;
} // loader
} // hsa
} // amd
//! \namespace gpu GPU Device Implementation
namespace gpu {
class VirtualGPU;
class Device;
class NullDevice;
class HSAILProgram;
struct HWSHADER_Helper {
template <typename S, typename T> static T Get(S base, T offset) {
return reinterpret_cast<T>(reinterpret_cast<intptr_t>(base) + reinterpret_cast<size_t>(offset));
}
};
#define HWSHADER_Get(shader, field) HWSHADER_Helper::Get((shader), (shader)->field)
template <typename D, typename S>
static void CalcPtr(D& dst, const S src, size_t structSize, size_t size) {
dst = reinterpret_cast<D>(reinterpret_cast<const intptr_t>(src) + structSize * size);
}
/*! \addtogroup GPU GPU Device Implementation
* @{
*/
/*! \brief Helper function for the std::string processing.
* Finds the name in the std::string
*
* \return True if we found the entry of the symbols
*/
bool expect(const std::string& str, //!< The original std::string
size_t* pos, //!< Position to start
const std::string& sym //!< The sympols to expect
);
/*! \brief Helper function for the std::string processing.
* Gets a word from the std::string
*
* \return True if we successfully received a word
*/
bool getword(const std::string& str, //!< The original std::string
size_t* pos, //!< Position to start
std::string& sym //!< Returned word
);
/*! \brief Helper function for the std::string processing.
* Loads numbers from the metadata
*
* \return True if we loaded a number
*/
bool getuint(const std::string& str, //!< The original std::string
size_t* pos, //!< Position to start
uint* val //!< Returned number
);
/*! \brief Helper function for the std::string processing.
* Loads numbers from the metadata in HEX format
*
* \return True if we loaded a number
*/
bool getuintHex(const std::string& str, //!< The original std::string
size_t* pos, //!< Position to start
uint* val //!< Returned number
);
/*! \brief Helper function for the std::string processing.
* Loads numbers from the metadata in HEX format
*
* \return True if we loaded a number
*/
bool getuint64Hex(const std::string& str, //!< The original std::string
size_t* pos, //!< Position to start
uint64_t* val //!< Returned number
);
/*! \brief Helper function for the std::string processing.
* Converts unsigned integer to string
*
* \return None
*/
void intToStr(size_t value, //!< Value for conversion
char* str, //!< Pointer to the converted string
size_t size //!< String size
);
//! Image constant data from ABI specification
struct ImageConstants : public amd::EmbeddedObject {
uint32_t width_; //!< Image surface width
uint32_t height_; //!< Image surface height
uint32_t depth_; //!< Image surface depth (1 for 2D images)
uint32_t dataType_; //!< Image surface data type
float widthFloat_; //!< Image surface width
float heightFloat_; //!< Image surface height
float depthFloat_; //!< Image surface depth (1 for 2D images)
uint32_t channelOrder_; //!< Image surface texels channel order
};
//! Kernel arguments
struct KernelArg : public amd::HeapObject {
public:
//! \enum Kernel argument type
enum ArgumentType {
NoType = 0,
PointerGlobal,
Value,
Image,
PointerLocal,
PointerHwLocal,
PointerPrivate,
PointerHwPrivate,
PointerConst,
PointerHwConst,
Float,
Double,
Half,
Char,
UChar,
Short,
UShort,
Int,
UInt,
Long,
ULong,
Struct,
Union,
Opaque,
Event,
Image1D, //!< first image
Image2D,
Image1DB,
Image1DA,
Image2DA,
Image3D, //!< last image
Counter,
Sampler,
PrivateSize,
LocalSize,
HwPrivateSize,
HwLocalSize,
Grouping,
WrkgrpSize,
Wavefront,
PrivateFixed,
ErrorMessage,
WarningMessage,
PrintfFormatStr,
MetadataVersion,
UavId,
ABI64Bit,
GWS,
SWGWS,
Reflection,
ConstArg,
ConstBufId,
PrintfBufId,
GroupingHint,
VecTypeHint,
WavesPerSimdHint,
TotalTypes
};
// The compiler metadata fields
std::string name_; //!< parameters name
ArgumentType type_; //!< type of argument
union {
uint size_; //!< number of arguments (for values and pointers only)
uint location_; //!< sampler's location (for samplers only)
};
uint cbIdx_; //!< constant buffer index
uint cbPos_; //!< dword address in CB for the argument
std::string buf_; //!< buffer tag
uint index_; //!< buffer/image/sampler index
uint alignment_; //!< the required argument's alignment
ArgumentType dataType_; //!< data type of the argument
union {
struct {
uint uavBuf_ : 1; //!< UAV memory, no global heap
uint realloc_ : 1; //!< argument has to be reallocatedin the global heap
uint readOnly_ : 1; //!< Read only memory object
uint writeOnly_ : 1; //!< Write only memory object
uint readWrite_ : 1; //!< Read/Write memory object
};
uint value_;
} memory_;
std::string typeName_; //!< argument's type name
uint typeQualifier_; //!< argument's type qualifier
//! Default constructor for the kernel argument
KernelArg();
//! Copy constructor for the kernel argument
KernelArg(const KernelArg& data);
//! Overloads operator=
KernelArg& operator=(const KernelArg& data);
//! Destructor of the kernel argument
~KernelArg() { name_.clear(); }
/*! \brief Checks if this arguments requires a place in constant buffer
*
* \return True if we need CB
*/
bool isCbNeeded() const;
/*! \brief Retrieves the argument's size
*
* \return Size of the current argument
*/
size_t size(bool gpuLayer //!< True if we want the argument's size for the GPU layer
) const;
/*! \brief Retrieves the argument's type for the abstraction layer
*
* \return The argument's type in the abstraction layer format
*/
clk_value_type_t type() const;
/*! \brief Retrieves the argument's address qualifier for the abstraction layer
*
* \return The argument's address qualifier in the abstraction layer format
*/
cl_kernel_arg_address_qualifier addressQualifier() const;
/*! \brief Retrieves the argument's access qualifier for the abstraction layer
*
* \return The argument's access qualifier in the abstraction layer format
*/
cl_kernel_arg_access_qualifier accessQualifier() const;
/*! \brief Retrieves the argument's type name for the abstraction layer
*
* \return The argument's type name
*/
const char* typeName() const { return typeName_.c_str(); }
/*! \brief Retrieves the argument's type qualifier for the abstraction layer
*
* \return The argument's type qualifier
*/
cl_kernel_arg_type_qualifier typeQualifier() const {
switch (type_) {
case PointerConst:
case PointerHwConst:
return static_cast<cl_kernel_arg_type_qualifier>(typeQualifier_ | CL_KERNEL_ARG_TYPE_CONST);
default:
return static_cast<cl_kernel_arg_type_qualifier>(typeQualifier_);
}
}
//! Special case for vectors with component size <= 16bit
const static uint VectorSizeLimit = 4;
size_t specialVector() const;
};
struct DataTypeConst {
const char* tagName_; //!< data type's name
KernelArg::ArgumentType type_; //!< data type
};
//! Metadata description for parsing
struct MetaDataConst {
const char* typeName_; //!< parameters name
KernelArg::ArgumentType type_; //!< type of argument
struct {
uint size_ : 1; //!< number of arguments
uint name_ : 1; //!< argument's name
uint resType_ : 1; //!< argument's type
uint cbIdx_ : 1; //!< resource index CB, sampler or image
uint cbPos_ : 1; //!< dword address in CB for the argument
uint buf_ : 1; //!< buffer tag
uint reserved : 26; //!< reserved
};
};
const uint DescTotal = 15;
const uint BasicTypeTotal = 15;
const uint ArgStateTotal = DescTotal + BasicTypeTotal;
//! The constant array that describes different metadata properties
extern const MetaDataConst ArgState[ArgStateTotal];
extern const DataTypeConst DataType[];
extern const uint DataTypeTotal;
// Forward declaration
class Program;
class NullProgram;
class CalImageReference : public amd::ReferenceCountedObject {
public:
//! Default constructor
CalImageReference(CALimage calImage) : image_(calImage) {}
//! Get CAL image
CALimage calImage() const { return image_; }
protected:
//! Default destructor
~CalImageReference();
private:
//! Disable copy constructor
CalImageReference(const CalImageReference&);
//! Disable operator=
CalImageReference& operator=(const CalImageReference&);
CALimage image_; //!< CAL kernel image
};
//! \class GPU NullKernel - Kernel for offline device
class NullKernel : public device::Kernel {
public:
typedef std::vector<KernelArg*> arguments_t;
const static uint UavIdUndefined = 0xffff;
enum Flags {
LimitWorkgroup = 1 << 0, //!< Limits the workgroup size
PrintfOutput = 1 << 1, //!< Kernel has printf output
PrivateFixed = 1 << 2, //!< Kernel has printf output
ABI64bit = 1 << 3, //!< Kernel has 64 bit ABI
Unused0 = 1 << 4, //!< Unused
Unused1 = 1 << 5, //!< Unused
ImageEnable = 1 << 6, //!< Kernel uses images
ImageWrite = 1 << 7, //!< Kernel writes images
};
//! \enum Resource type for binding
enum ResourceType {
Undefined = 0x00000000, //!< resource type will be detected
ConstantBuffer = 0x00000001, //!< resource is a constant buffer
GlobalBuffer = 0x00000002, //!< resource is a global buffer
ArgumentHeapBuffer = 0x00000004, //!< resource is an argument buffer
ArgumentBuffer = 0x00000005, //!< resource is an argument buffer
ArgumentImageRead = 0x00000006, //!< resource is an argument image read
ArgumentImageWrite = 0x00000007, //!< resource is an argument image write
ArgumentConstBuffer = 0x00000008, //!< resource is an argument const buffer
ArgumentCounter = 0x00000009, //!< resource is a global counter
ArgumentUavID = 0x0000000a, //!< resource is a dummy ID read
ArgumentCbID = 0x0000000b, //!< resource is a constant buffer
ArgumentPrintfID = 0x0000000c, //!< resource is a printf buffer
};
//! GPU kernel constructor
NullKernel(const std::string& name, //!< The kernel's name
const NullDevice& gpuNullDev, //!< GPU device object
const NullProgram& nullProg //!< Reference to the program
);
virtual ~NullKernel();
/*! \brief Creates a GPU kernel in CAL
*
* \return True if we successfully created a kernel in CAL
*/
bool create(const std::string& code, //!< IL source code
const std::string& metadata, //!< the kernel metadata structure
const void* binaryCode = NULL, //!< binary machine code for CAL
size_t binarySize = 0 //!< the machine code size
);
//! Returns CAL function descriptor
CALimage calImage() const { return calRef_->calImage(); }
//! Returns TRUE if we successfully retrieved the binary from CAL
bool getCalBinary(void* binary, //!< ISA binary code
size_t size //!< ISA binary size
) const;
//! Returns CAL image size
size_t getCalBinarySize() const;
//! Returns GPU device object, associated with this kernel
const NullDevice& nullDev() const { return gpuDev_; }
//! Returns GPU device object, associated with this kernel
const NullProgram& nullProg() const { return prog_; }
//! Returns the kernel's build error
const cl_int buildError() const { return buildError_; }
//! Returns the kernel's flags
uint flags() const { return flags_; }
//! Returns TRUE if ABI is for 64 bits
bool abi64Bit() const { return (flags_ & ABI64bit) ? true : false; }
//! Returns the total number of all arguments
size_t argSize() const { return arguments_.size(); }
//! Returns instruction count of the current kernel
uint instructionCnt() const { return instructionCnt_; }
protected:
/*! \brief Parses the metadata structure for the kernel,
* provided by the OpenCL compiler
*
* \return True if we succefully parsed all arguments
*/
bool parseArguments(const std::string& metaData, //!< the program for parsing
uint* uavRefCount //!< an array of reference counters for used UAVs
);
//! Returns the argument for the specified index
const KernelArg* argument(uint idx) const { return arguments_[idx]; }
//! Adds the kernel argument into the list
void addArgument(KernelArg* arg) { arguments_.push_back(arg); }
//! Returns the argument for the specified sampler's index
const KernelArg* sampler(uint idx) const { return intSamplers_[idx]; }
//! Returns the total number of all internal samplers
size_t samplerSize() const { return intSamplers_.size(); }
//! Adds the kernel sampler into the sampler's list
void addSampler(KernelArg* arg) { intSamplers_.push_back(arg); }
//! Returns UAV raw index for this kernel
uint uavRaw() const { return uavRaw_; }
cl_int buildError_; //!< Kernel's build error
std::string ilSource_; //!< IL source code of this kernel
const NullDevice& gpuDev_; //!< GPU device object
const NullProgram& prog_; //!< Reference to the parent program
CalImageReference* calRef_; //!< CAL image reference for this kernel
bool internal_; //!< Runtime internal ker
uint flags_; //!< kernel object flags
arguments_t arguments_; //!< kernel arguments for the execution
arguments_t intSamplers_; //!< predefined intenal kernel samplers
size_t* cbSizes_; //!< real constant buffer sizes for this kernel
uint numCb_; //!< total number of constant buffers
uint uavRaw_; //!< UAV used for RAW access
bool rwAttributes_; //!< backend provides RW attributes for arguments
uint instructionCnt_; //!< Instruction count
uint cbId_; //!< UAV used for constant buffer access
uint printfId_; //!< UAV used for printf buffer access
private:
//! Disable copy constructor
NullKernel(const NullKernel&);
//! Disable operator=
NullKernel& operator=(const NullKernel&);
//! Creates a filename for ISA/IL dumps
std::string mkDumpName(const char* extension //!< File extension to append
) const;
bool createMultiBinary(uint* imageSize, //!< Multibinary image size
void** image, //!< Multibinary image
const void* isa //!< Kernel HW info
);
//! SI HW specific setup for kernels
bool siCreateHwInfo(const void* shader, //!< HW info shader
AMUabiAddEncoding& encoding //!< ABI encoding structure
);
//! r800 HW specific setup for kernels
bool r800CreateHwInfo(const void* shader, //!< HW info shader
AMUabiAddEncoding& encoding //!< ABI encoding structure
);
};
//! \class GPU kernel
class Kernel : public NullKernel {
public:
struct InitData {
uint privateSize_; //!< Private ring initial size
uint localSize_; //!< Local ring initial size
uint hwPrivateSize_; //!< HW private ring initial size
uint hwLocalSize_; //!< HW local ring initial size
uint flags_; //!< Kernel initialization flags
};
//! GPU kernel constructor
Kernel(const std::string& name, //!< The kernel's name
const Device& gpuDev, //!< GPU device object
const Program& prog, //!< Reference to the program
const InitData* initData_ //!< Initialization data
);
//! GPU kernel destructor
virtual ~Kernel();
/*! \brief Creates a GPU kernel in CAL
*
* \return True if we successfully created a kernel in CAL
*/
bool create(const std::string& code, //!< IL source code
const std::string& metadata, //!< the kernel metadata structure
const void* binaryCode = NULL, //!< binary machine code for CAL
size_t binarySize = 0 //!< the machine code size
);
//! Initializes the CAL program grid for the kernel execution
void setupProgramGrid(VirtualGPU& gpu, //!< virtual GPU device object
size_t workDim, //!< work dimension
const amd::NDRange& glbWorkOffset, //!< global work offset
const amd::NDRange& gblWorkSize, //!< global work size
amd::NDRange& lclWorkSize, //!< local work size
const amd::NDRange& groupOffset, //!< group offsets
const amd::NDRange& glbWorkOffsetOrg,
const amd::NDRange& glbWorkSizeOrg //!< original global work size
) const;
/*! \brief Detects if runtime has to disable cache optimization and
* recompiles the kernel
*
* \return True if aliases were detected in the kernel arguments
*/
void processMemObjects(VirtualGPU& gpu, //!< Virtual GPU objects - queue
const amd::Kernel& kernel, //!< AMD kernel object for execution
const_address params, //!< pointer to the param's store
bool nativeMem //!< Native memory objects
) const;
/*! \brief Loads all kernel arguments, so we could run the kernel in HW.
* This includes CB update and resource binding
*
* \return True if we succefully loaded the arguments
*/
bool loadParameters(VirtualGPU& gpu, //!< virtual GPU device object
const amd::Kernel& kernel, //!< AMD kernel object for execution
const_address params, //!< pointer to the param's store
bool nativeMem //!< Native memory objects
) const;
//! Binds the constant buffers associated with the kernel
bool bindConstantBuffers(VirtualGPU& gpu) const;
/*! \brief Runs the kernel on HW
*
* \return True if we succefully executed the kernel
*/
bool run(VirtualGPU& gpu, //!< virtual GPU device object
GpuEvent* gpuEvent, //!< Pointer to the GPU event
bool lastRun, //!< Last run in the split execution
bool lastDoppCmd, //!< for last dopp submission kernel dispatch
bool pfpaDoppCmd //!< for PFPA dopp submission kernel dispatch
) const;
//! Help function to debug the kernel output
void debug(VirtualGPU& gpu //!< virtual GPU device object
) const;
//! Programs internal samplers defined inside the kernel
bool setInternalSamplers(VirtualGPU& gpu //!< Virtual GPU device object
) const;
//! Returns TRUE if we successfully retrieved the binary from CAL
bool getCalBinary(void* binary, //!< ISA binary code
size_t size //!< ISA binary size
) const;
//! Returns CAL image size
size_t getCalBinarySize() const;
//! Returns GPU device object, associated with this kernel
const Device& dev() const;
//! Returns GPU device object, associated with this kernel
const Program& prog() const;
//! Binds global HW constant buffers
bool bindGlobalHwCb(VirtualGPU& gpu, //!< Virtual GPU device object
VirtualGPU::GslKernelDesc* desc //!< Kernel descriptor
) const;
protected:
//! Initializes the kernel parameters for the abstraction layer
bool initParameters();
/*! \brief Creates constant buffer resources, associated with the kernel
*
* \return TRUE if we succefully created constant buffers
*/
bool initConstBuffers();
private:
//! Disable copy constructor
Kernel(const Kernel&);
//! Disable operator=
Kernel& operator=(const Kernel&);
//! \enum Fixed Metadata offsets
enum MetadataOffsets {
GlobalWorkitemOffset = 0,
LocalWorkitemOffset = 1,
GroupsOffset = 2,
PrivateRingOffset = 3,
LocalRingOffset = 4,
MathLibOffset = 5,
GlobalWorkOffsetOffset = 6,
GroupWorkOffsetOffset = 7,
GlobalDataStoreOffset = 8,
DebugOffset = 8,
NDRangeGlobalWorkOffsetOffset = 9,
// The total number of constants reserved for ABI
TotalABIVectors
};
/*! \brief Sets the kernel argument
*
* \return True if we succefully updated the arguments
*/
bool setArgument(VirtualGPU& gpu, //!< Virtual GPU device object
const amd::Kernel& kernel, //!< AMD kernel object
uint idx, //!< the argument index
const_address params,//!< the arguments data
const amd::KernelParameterDescriptor& desc, //!< Argument's descriptor
bool nativeMem //!< Native memory objects
) const;
/*! \brief Initializes local and private buffer ranges
*
* \return True if we succefully initialized the ranges
*/
bool initLocalPrivateRanges(VirtualGPU& gpu //!< Virtual GPU device object
) const;
//! Sets local and private buffer ranges
void setLocalPrivateRanges(VirtualGPU& gpu //!< Virtual GPU device object
) const;
//! Sets the sampler's parameters for the image look-up
void setSampler(VirtualGPU& gpu, //!< virtual GPU device object
uint32_t state, //!< sampler state
uint physUnit //!< sampler's number
) const;
/*! \brief Binds resource
*
* \return True if we succefully created constant buffers
*/
bool bindResource(VirtualGPU& gpu, //!< virtual GPU device object
const Memory& memory, //!< memory for binding
uint paramIdx, //!< index of the parameter
ResourceType type, //!< resource type
uint physUnit, //!< PhysUnit
size_t offset = 0) const;
//! Unbinds all resources for the kernel
void unbindResources(VirtualGPU& gpu, //!< virtual GPU device object
GpuEvent gpuEvent, //!< GPU event that will be associated with the resources
bool lastRun //!< last run in the split execution
) const;
//! Copies image constants to the constant buffer
void copyImageConstants(const amd::Image* amdImage, //!< Abstraction layer image object
ImageConstants* imageData //!< Pointer in CB to the image constants
) const;
//! Finds local workgroup size
void findLocalWorkSize(size_t workDim, //!< Work dimension
const amd::NDRange& gblWorkSize, //!< Global work size
amd::NDRange& lclWorkSize //!< Local work size
) const;
uint hwPrivateSize_; //!< initial HW private size
uint hwLocalSize_; //!< initial HW local size
};
enum HSAIL_ADDRESS_QUALIFIER {
HSAIL_ADDRESS_ERROR = 0,
HSAIL_ADDRESS_GLOBAL,
HSAIL_ADDRESS_LOCAL,
HSAIL_MAX_ADDRESS_QUALIFIERS
};
enum HSAIL_ARG_TYPE {
HSAIL_ARGTYPE_ERROR = 0,
HSAIL_ARGTYPE_POINTER,
HSAIL_ARGTYPE_VALUE,
HSAIL_ARGTYPE_IMAGE,
HSAIL_ARGTYPE_SAMPLER,
HSAIL_ARGTYPE_QUEUE,
HSAIL_ARGMAX_ARG_TYPES
};
enum HSAIL_DATA_TYPE {
HSAIL_DATATYPE_ERROR = 0,
HSAIL_DATATYPE_B1,
HSAIL_DATATYPE_B8,
HSAIL_DATATYPE_B16,
HSAIL_DATATYPE_B32,
HSAIL_DATATYPE_B64,
HSAIL_DATATYPE_S8,
HSAIL_DATATYPE_S16,
HSAIL_DATATYPE_S32,
HSAIL_DATATYPE_S64,
HSAIL_DATATYPE_U8,
HSAIL_DATATYPE_U16,
HSAIL_DATATYPE_U32,
HSAIL_DATATYPE_U64,
HSAIL_DATATYPE_F16,
HSAIL_DATATYPE_F32,
HSAIL_DATATYPE_F64,
HSAIL_DATATYPE_STRUCT,
HSAIL_DATATYPE_OPAQUE,
HSAIL_DATATYPE_MAX_TYPES
};
enum HSAIL_ACCESS_TYPE {
HSAIL_ACCESS_TYPE_NONE = 0,
HSAIL_ACCESS_TYPE_RO,
HSAIL_ACCESS_TYPE_WO,
HSAIL_ACCESS_TYPE_RW
};
class HSAILKernel : public device::Kernel {
public:
struct Argument {
std::string name_; //!< Argument's name
std::string typeName_; //!< Argument's type name
uint size_; //!< Size in bytes
uint offset_; //!< Argument's offset
uint alignment_; //!< Argument's alignment
HSAIL_ARG_TYPE type_; //!< Type of the argument
HSAIL_ADDRESS_QUALIFIER addrQual_; //!< Address qualifier of the argument
HSAIL_DATA_TYPE dataType_; //!< The type of data
uint numElem_; //!< Number of elements
HSAIL_ACCESS_TYPE access_; //!< Access type for the argument
};
// Max number of possible extra (hidden) kernel arguments
static const uint MaxExtraArgumentsNum = 6;
HSAILKernel(std::string name, HSAILProgram* prog, std::string compileOptions, uint extraArgsNum);
virtual ~HSAILKernel();
//! Initializes the metadata required for this kernel,
//! finalizes the kernel if needed
bool init(amd::hsa::loader::Symbol* sym, bool finalize = false);
//! Returns a pointer to the hsail argument
const Argument* argument(size_t i) const { return arguments_[i]; }
//! Returns the number of hsail arguments
size_t numArguments() const { return arguments_.size(); }
//! Returns GPU device object, associated with this kernel
const Device& dev() const;
//! Returns HSA program associated with this kernel
const HSAILProgram& prog() const;
//! Returns LDS size used in this kernel
uint32_t ldsSize() const { return cpuAqlCode_->workgroup_group_segment_byte_size; }
//! Returns pointer on CPU to AQL code info
const void* cpuAqlCode() const { return cpuAqlCode_; }
//! Returns memory object with AQL code
gpu::Memory* gpuAqlCode() const { return code_; }
//! Returns size of AQL code
size_t aqlCodeSize() const { return codeSize_; }
//! Returns the size of argument buffer
size_t argsBufferSize() const { return cpuAqlCode_->kernarg_segment_byte_size; }
//! Returns spill reg size per workitem
int spillSegSize() const { return cpuAqlCode_->workitem_private_segment_byte_size; }
//! Returns AQL packet in CPU memory
//! if the kerenl arguments were successfully loaded, otherwise NULL
hsa_kernel_dispatch_packet_t* loadArguments(
VirtualGPU& gpu, //!< Running GPU context
const amd::Kernel& kernel, //!< AMD kernel object
const amd::NDRangeContainer& sizes, //!< NDrange container
const_address parameters, //!< Application arguments for the kernel
bool nativeMem, //!< Native memory objectes are passed
uint64_t vmDefQueue, //!< GPU VM default queue pointer
uint64_t* vmParentWrap, //!< GPU VM parent aql wrap object
std::vector<const Memory*>& memList //!< Memory list for GSL/VidMM handles
) const;
//! Returns the kernel index in the program
uint index() const { return index_; }
//! Returns kernel's extra argument count
uint extraArgumentsNum() const { return extraArgumentsNum_; }
private:
//! Disable copy constructor
HSAILKernel(const HSAILKernel&);
//! Disable operator=
HSAILKernel& operator=(const HSAILKernel&);
//! Creates AQL kernel HW info
bool aqlCreateHWInfo(amd::hsa::loader::Symbol* sym);
//! Initializes arguments_ and the abstraction layer kernel parameters
void initArgList(const aclArgData* aclArg //!< List of ACL arguments
);
//! Initializes Hsail Argument metadata and info
void initHsailArgs(const aclArgData* aclArg //!< List of ACL arguments
);
std::vector<Argument*> arguments_; //!< Vector list of HSAIL Arguments
std::string compileOptions_; //!< compile used for finalizing this kernel
amd_kernel_code_t* cpuAqlCode_; //!< AQL kernel code on CPU
const HSAILProgram& prog_; //!< Reference to the parent program
uint index_; //!< Kernel index in the program
gpu::Memory* code_; //!< Memory object with ISA code
size_t codeSize_; //!< Size of ISA code
char* hwMetaData_; //!< SI metadata
uint extraArgumentsNum_; //! Number of extra (hidden) kernel arguments
};
/*@}*/} // namespace gpu
#endif /*GPUKERNEL_HPP_*/