647aba6ed2
ECR #399840 - re-checkin of CL1109955 with the fix of OpenCL sanity check timeout (hw debug flag initialization) Affected files ... ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_debugger_amd.cpp#4 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_debugger_amd.h#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#174 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#238 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudebugger.hpp#3 add ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudebugmanager.cpp#3 add ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudebugmanager.hpp#3 add ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#490 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.hpp#137 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.cpp#275 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.hpp#106 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuresource.cpp#200 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuscsi.cpp#30 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpusettings.cpp#297 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#346 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.hpp#124 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gslbe/src/rt/GSLContext.cpp#69 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gslbe/src/rt/GSLContext.h#42 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/hwdebug.cpp#3 add ... //depot/stg/opencl/drivers/opencl/runtime/device/hwdebug.hpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.hpp#223 edit
963 строки
32 KiB
C++
963 строки
32 KiB
C++
//
|
|
// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
|
|
//
|
|
|
|
#ifndef GPUKERNEL_HPP_
|
|
#define GPUKERNEL_HPP_
|
|
|
|
#include "device/device.hpp"
|
|
#include "utils/macros.hpp"
|
|
#include "platform/command.hpp"
|
|
#include "platform/program.hpp"
|
|
#include "platform/kernel.hpp"
|
|
#include "platform/sampler.hpp"
|
|
#include "device/gpu/gpudevice.hpp"
|
|
#include "device/gpu/gpuvirtual.hpp"
|
|
#include "sc-hsa/Interface/SCHSAInterface.h"
|
|
#include "device/gpu/gpuprintf.hpp"
|
|
#include "hsa.h"
|
|
//! \namespace gpu GPU Device Implementation
|
|
namespace gpu {
|
|
|
|
class VirtualGPU;
|
|
class Device;
|
|
class NullDevice;
|
|
class HSAILProgram;
|
|
|
|
struct HWSHADER_Helper
|
|
{
|
|
template <typename S, typename T>
|
|
static T Get(S base, T offset) {
|
|
return reinterpret_cast<T>(reinterpret_cast<intptr_t>(base)
|
|
+ reinterpret_cast<size_t>(offset));
|
|
}
|
|
};
|
|
|
|
#define HWSHADER_Get(shader, field) \
|
|
HWSHADER_Helper::Get((shader), (shader)->field)
|
|
|
|
template <typename D, typename S>
|
|
static void CalcPtr(D& dst, const S src, size_t structSize, size_t size) {
|
|
dst = reinterpret_cast<D>(reinterpret_cast<const intptr_t>(src)
|
|
+ structSize * size);
|
|
}
|
|
|
|
/*! \addtogroup GPU GPU Device Implementation
|
|
* @{
|
|
*/
|
|
|
|
/*! \brief Helper function for the std::string processing.
|
|
* Finds the name in the std::string
|
|
*
|
|
* \return True if we found the entry of the symbols
|
|
*/
|
|
bool expect(
|
|
const std::string& str, //!< The original std::string
|
|
size_t* pos, //!< Position to start
|
|
const std::string& sym //!< The sympols to expect
|
|
);
|
|
|
|
/*! \brief Helper function for the std::string processing.
|
|
* Gets a word from the std::string
|
|
*
|
|
* \return True if we successfully received a word
|
|
*/
|
|
bool getword(
|
|
const std::string& str, //!< The original std::string
|
|
size_t* pos, //!< Position to start
|
|
char* sym //!< Returned word
|
|
);
|
|
|
|
/*! \brief Helper function for the std::string processing.
|
|
* Loads numbers from the metadata
|
|
*
|
|
* \return True if we loaded a number
|
|
*/
|
|
bool getuint(
|
|
const std::string& str, //!< The original std::string
|
|
size_t* pos, //!< Position to start
|
|
uint* val //!< Returned number
|
|
);
|
|
|
|
/*! \brief Helper function for the std::string processing.
|
|
* Loads numbers from the metadata in HEX format
|
|
*
|
|
* \return True if we loaded a number
|
|
*/
|
|
bool getuintHex(
|
|
const std::string& str, //!< The original std::string
|
|
size_t* pos, //!< Position to start
|
|
uint* val //!< Returned number
|
|
);
|
|
|
|
/*! \brief Helper function for the std::string processing.
|
|
* Loads numbers from the metadata in HEX format
|
|
*
|
|
* \return True if we loaded a number
|
|
*/
|
|
bool getuint64Hex(
|
|
const std::string& str, //!< The original std::string
|
|
size_t* pos, //!< Position to start
|
|
uint64_t* val //!< Returned number
|
|
);
|
|
|
|
/*! \brief Helper function for the std::string processing.
|
|
* Converts unsigned integer to string
|
|
*
|
|
* \return None
|
|
*/
|
|
void intToStr(
|
|
size_t value, //!< Value for conversion
|
|
char* str, //!< Pointer to the converted string
|
|
size_t size //!< String size
|
|
);
|
|
|
|
//! Image constant data from ABI specification
|
|
struct ImageConstants : public amd::EmbeddedObject
|
|
{
|
|
uint32_t width_; //!< Image surface width
|
|
uint32_t height_; //!< Image surface height
|
|
uint32_t depth_; //!< Image surface depth (1 for 2D images)
|
|
uint32_t dataType_; //!< Image surface data type
|
|
float widthFloat_; //!< Image surface width
|
|
float heightFloat_; //!< Image surface height
|
|
float depthFloat_; //!< Image surface depth (1 for 2D images)
|
|
uint32_t channelOrder_; //!< Image surface texels channel order
|
|
};
|
|
|
|
//! Kernel arguments
|
|
struct KernelArg : public amd::HeapObject
|
|
{
|
|
public:
|
|
//! \enum Kernel argument type
|
|
enum ArgumentType
|
|
{
|
|
None = 0,
|
|
PointerGlobal,
|
|
Value,
|
|
Image,
|
|
PointerLocal,
|
|
PointerHwLocal,
|
|
PointerPrivate,
|
|
PointerHwPrivate,
|
|
PointerConst,
|
|
PointerHwConst,
|
|
Float,
|
|
Double,
|
|
Half,
|
|
Char,
|
|
UChar,
|
|
Short,
|
|
UShort,
|
|
Int,
|
|
UInt,
|
|
Long,
|
|
ULong,
|
|
Struct,
|
|
Union,
|
|
Opaque,
|
|
Event,
|
|
Image1D, //!< first image
|
|
Image2D,
|
|
Image1DB,
|
|
Image1DA,
|
|
Image2DA,
|
|
Image3D, //!< last image
|
|
Counter,
|
|
Sampler,
|
|
PrivateSize,
|
|
LocalSize,
|
|
HwPrivateSize,
|
|
HwLocalSize,
|
|
Grouping,
|
|
WrkgrpSize,
|
|
Wavefront,
|
|
PrivateFixed,
|
|
ErrorMessage,
|
|
WarningMessage,
|
|
PrintfFormatStr,
|
|
MetadataVersion,
|
|
UavId,
|
|
ABI64Bit,
|
|
GWS,
|
|
SWGWS,
|
|
Reflection,
|
|
ConstArg,
|
|
ConstBufId,
|
|
PrintfBufId,
|
|
GroupingHint,
|
|
VecTypeHint,
|
|
TotalTypes
|
|
};
|
|
|
|
// The compiler metadata fields
|
|
std::string name_; //!< parameters name
|
|
ArgumentType type_; //!< type of argument
|
|
union {
|
|
uint size_; //!< number of arguments (for values and pointers only)
|
|
uint location_; //!< sampler's location (for samplers only)
|
|
};
|
|
uint cbIdx_; //!< constant buffer index
|
|
uint cbPos_; //!< dword address in CB for the argument
|
|
std::string buf_; //!< buffer tag
|
|
uint index_; //!< buffer/image/sampler index
|
|
uint alignment_; //!< the required argument's alignment
|
|
ArgumentType dataType_; //!< data type of the argument
|
|
union {
|
|
struct {
|
|
uint uavBuf_ : 1; //!< UAV memory, no global heap
|
|
uint realloc_ : 1; //!< argument has to be reallocatedin the global heap
|
|
uint readOnly_ : 1; //!< Read only memory object
|
|
uint writeOnly_ : 1; //!< Write only memory object
|
|
uint readWrite_ : 1; //!< Read/Write memory object
|
|
};
|
|
uint value_;
|
|
} memory_;
|
|
|
|
std::string typeName_; //!< argument's type name
|
|
uint typeQualifier_; //!< argument's type qualifier
|
|
|
|
//! Default constructor for the kernel argument
|
|
KernelArg();
|
|
|
|
//! Copy constructor for the kernel argument
|
|
KernelArg(const KernelArg& data);
|
|
|
|
//! Overloads operator=
|
|
KernelArg& operator=(const KernelArg& data);
|
|
|
|
//! Destructor of the kernel argument
|
|
~KernelArg() { name_.clear(); }
|
|
|
|
/*! \brief Checks if this arguments requires a place in constant buffer
|
|
*
|
|
* \return True if we need CB
|
|
*/
|
|
bool isCbNeeded() const;
|
|
|
|
/*! \brief Retrieves the argument's size
|
|
*
|
|
* \return Size of the current argument
|
|
*/
|
|
size_t size(
|
|
bool gpuLayer //!< True if we want the argument's size for the GPU layer
|
|
) const;
|
|
|
|
/*! \brief Retrieves the argument's type for the abstraction layer
|
|
*
|
|
* \return The argument's type in the abstraction layer format
|
|
*/
|
|
clk_value_type_t type() const;
|
|
|
|
/*! \brief Retrieves the argument's address qualifier for the abstraction layer
|
|
*
|
|
* \return The argument's address qualifier in the abstraction layer format
|
|
*/
|
|
cl_kernel_arg_address_qualifier addressQualifier() const;
|
|
|
|
/*! \brief Retrieves the argument's access qualifier for the abstraction layer
|
|
*
|
|
* \return The argument's access qualifier in the abstraction layer format
|
|
*/
|
|
cl_kernel_arg_access_qualifier accessQualifier() const;
|
|
|
|
/*! \brief Retrieves the argument's type name for the abstraction layer
|
|
*
|
|
* \return The argument's type name
|
|
*/
|
|
const char* typeName() const { return typeName_.c_str(); }
|
|
|
|
/*! \brief Retrieves the argument's type qualifier for the abstraction layer
|
|
*
|
|
* \return The argument's type qualifier
|
|
*/
|
|
cl_kernel_arg_type_qualifier typeQualifier() const
|
|
{
|
|
switch (type_) {
|
|
case PointerConst:
|
|
case PointerHwConst:
|
|
return static_cast<cl_kernel_arg_type_qualifier>(typeQualifier_ |
|
|
CL_KERNEL_ARG_TYPE_CONST);
|
|
default:
|
|
return static_cast<cl_kernel_arg_type_qualifier>(typeQualifier_);
|
|
}
|
|
}
|
|
|
|
//! Special case for vectors with component size <= 16bit
|
|
const static uint VectorSizeLimit = 4;
|
|
size_t specialVector() const;
|
|
};
|
|
|
|
struct DataTypeConst
|
|
{
|
|
const char* tagName_; //!< data type's name
|
|
KernelArg::ArgumentType type_; //!< data type
|
|
};
|
|
|
|
//! Metadata description for parsing
|
|
struct MetaDataConst
|
|
{
|
|
const char* typeName_; //!< parameters name
|
|
KernelArg::ArgumentType type_; //!< type of argument
|
|
struct
|
|
{
|
|
uint size_ : 1; //!< number of arguments
|
|
uint name_ : 1; //!< argument's name
|
|
uint resType_: 1; //!< argument's type
|
|
uint cbIdx_ : 1; //!< resource index CB, sampler or image
|
|
uint cbPos_ : 1; //!< dword address in CB for the argument
|
|
uint buf_ : 1; //!< buffer tag
|
|
uint reserved: 26; //!< reserved
|
|
};
|
|
};
|
|
|
|
const uint DescTotal = 15;
|
|
const uint BasicTypeTotal = 14;
|
|
const uint ArgStateTotal = DescTotal + BasicTypeTotal;
|
|
|
|
//! The constant array that describes different metadata properties
|
|
extern const MetaDataConst ArgState[ArgStateTotal];
|
|
|
|
extern const DataTypeConst DataType[];
|
|
|
|
extern const uint DataTypeTotal;
|
|
|
|
// Forward declaration
|
|
class Program;
|
|
class NullProgram;
|
|
|
|
class CalImageReference : public amd::ReferenceCountedObject
|
|
{
|
|
public:
|
|
//! Default constructor
|
|
CalImageReference(CALimage calImage): image_(calImage) {}
|
|
|
|
//! Get CAL image
|
|
CALimage calImage() const { return image_; }
|
|
|
|
protected:
|
|
//! Default destructor
|
|
~CalImageReference();
|
|
|
|
private:
|
|
//! Disable copy constructor
|
|
CalImageReference(const CalImageReference&);
|
|
|
|
//! Disable operator=
|
|
CalImageReference& operator=(const CalImageReference&);
|
|
|
|
CALimage image_; //!< CAL kernel image
|
|
};
|
|
|
|
//! \class GPU NullKernel - Kernel for offline device
|
|
class NullKernel : public device::Kernel
|
|
{
|
|
public:
|
|
typedef std::vector<KernelArg*> arguments_t;
|
|
|
|
const static uint UavIdUndefined = 0xffff;
|
|
|
|
enum Flags {
|
|
LimitWorkgroup = 1 << 0, //!< Limits the workgroup size
|
|
PrintfOutput = 1 << 1, //!< Kernel has printf output
|
|
PrivateFixed = 1 << 2, //!< Kernel has printf output
|
|
ABI64bit = 1 << 3, //!< Kernel has 64 bit ABI
|
|
Unused0 = 1 << 4, //!< Unused
|
|
Unused1 = 1 << 5, //!< Unused
|
|
ImageEnable = 1 << 6, //!< Kernel uses images
|
|
ImageWrite = 1 << 7, //!< Kernel writes images
|
|
};
|
|
|
|
//! \enum Resource type for binding
|
|
enum ResourceType
|
|
{
|
|
Undefined = 0x00000000, //!< resource type will be detected
|
|
ConstantBuffer = 0x00000001, //!< resource is a constant buffer
|
|
GlobalBuffer = 0x00000002, //!< resource is a global buffer
|
|
GlobalBufferArena = 0x00000003, //!< resource is a global buffer
|
|
ArgumentHeapBuffer = 0x00000004, //!< resource is an argument buffer
|
|
ArgumentBuffer = 0x00000005, //!< resource is an argument buffer
|
|
ArgumentImageRead = 0x00000006, //!< resource is an argument image read
|
|
ArgumentImageWrite = 0x00000007, //!< resource is an argument image write
|
|
ArgumentConstBuffer = 0x00000008, //!< resource is an argument const buffer
|
|
ArgumentCounter = 0x00000009, //!< resource is a global counter
|
|
ArgumentUavID = 0x0000000a, //!< resource is a dummy ID read
|
|
ArgumentCbID = 0x0000000b, //!< resource is a constant buffer
|
|
ArgumentPrintfID = 0x0000000c, //!< resource is a printf buffer
|
|
};
|
|
|
|
//! GPU kernel constructor
|
|
NullKernel(
|
|
const std::string& name, //!< The kernel's name
|
|
const NullDevice& gpuNullDev, //!< GPU device object
|
|
const NullProgram& nullProg //!< Reference to the program
|
|
);
|
|
|
|
virtual ~NullKernel();
|
|
|
|
/*! \brief Creates a GPU kernel in CAL
|
|
*
|
|
* \return True if we successfully created a kernel in CAL
|
|
*/
|
|
bool create(
|
|
const std::string& code, //!< IL source code
|
|
const std::string& metadata, //!< the kernel metadata structure
|
|
const void* binaryCode = NULL, //!< binary machine code for CAL
|
|
size_t binarySize = 0 //!< the machine code size
|
|
);
|
|
|
|
//! Returns CAL function descriptor
|
|
CALimage calImage() const { return calRef_->calImage(); }
|
|
|
|
//! Returns TRUE if we successfully retrieved the binary from CAL
|
|
bool getCalBinary(
|
|
void* binary, //!< ISA binary code
|
|
size_t size //!< ISA binary size
|
|
) const;
|
|
|
|
//! Returns CAL image size
|
|
size_t getCalBinarySize() const;
|
|
|
|
//! Returns GPU device object, associated with this kernel
|
|
const NullDevice& nullDev() const { return gpuDev_; }
|
|
|
|
//! Returns GPU device object, associated with this kernel
|
|
const NullProgram& nullProg() const { return prog_; }
|
|
|
|
//! Returns the kernel's build error
|
|
const cl_int buildError() const { return buildError_; }
|
|
|
|
//! Returns the kernel's flags
|
|
uint flags() const { return flags_; }
|
|
|
|
//! Returns TRUE if ABI is for 64 bits
|
|
bool abi64Bit() const { return (flags_ & ABI64bit) ? true : false; }
|
|
|
|
//! Returns the total number of all arguments
|
|
size_t argSize() const { return arguments_.size(); }
|
|
|
|
//! Returns instruction count of the current kernel
|
|
uint instructionCnt() const { return instructionCnt_; }
|
|
|
|
protected:
|
|
//! Returns TRUE if memory should be reallocated, returns FALSE always for NullDevice
|
|
virtual bool isRealloc() const { return false; }
|
|
|
|
/*! \brief Parses the metadata structure for the kernel,
|
|
* provided by the OpenCL compiler
|
|
*
|
|
* \return True if we succefully parsed all arguments
|
|
*/
|
|
bool parseArguments(
|
|
const std::string& metaData, //!< the program for parsing
|
|
uint* uavRefCount //!< an array of reference counters for used UAVs
|
|
);
|
|
|
|
//! Returns the argument for the specified index
|
|
const KernelArg* argument(uint idx) const { return arguments_[idx]; }
|
|
|
|
//! Adds the kernel argument into the list
|
|
void addArgument(KernelArg* arg) { arguments_.push_back(arg); }
|
|
|
|
//! Returns the argument for the specified sampler's index
|
|
const KernelArg* sampler(uint idx) const { return intSamplers_[idx]; }
|
|
|
|
//! Returns the total number of all internal samplers
|
|
size_t samplerSize() const { return intSamplers_.size(); }
|
|
|
|
//! Adds the kernel sampler into the sampler's list
|
|
void addSampler(KernelArg* arg) { intSamplers_.push_back(arg); }
|
|
|
|
//! Returns UAV raw index for this kernel
|
|
uint uavRaw() const { return uavRaw_; }
|
|
|
|
//! Returns UAV arena index for this kernel
|
|
uint uavArena() const { return uavArena_; }
|
|
|
|
cl_int buildError_; //!< Kernel's build error
|
|
std::string ilSource_; //!< IL source code of this kernel
|
|
|
|
const NullDevice& gpuDev_; //!< GPU device object
|
|
const NullProgram& prog_; //!< Reference to the parent program
|
|
|
|
CalImageReference* calRef_; //!< CAL image reference for this kernel
|
|
bool internal_; //!< Runtime internal ker
|
|
|
|
uint flags_; //!< kernel object flags
|
|
arguments_t arguments_; //!< kernel arguments for the execution
|
|
arguments_t intSamplers_; //!< predefined intenal kernel samplers
|
|
|
|
size_t* cbSizes_; //!< real constant buffer sizes for this kernel
|
|
uint numCb_; //!< total number of constant buffers
|
|
|
|
uint uavRaw_; //!< UAV used for RAW access
|
|
uint uavArena_; //!< UAV used for arena access
|
|
|
|
bool rwAttributes_; //!< backend provides RW attributes for arguments
|
|
|
|
uint instructionCnt_;//!< Instruction count
|
|
|
|
uint cbId_; //!< UAV used for constant buffer access
|
|
uint printfId_; //!< UAV used for printf buffer access
|
|
|
|
private:
|
|
//! Disable copy constructor
|
|
NullKernel(const NullKernel&);
|
|
|
|
//! Disable operator=
|
|
NullKernel& operator=(const NullKernel&);
|
|
|
|
//! Creates a filename for ISA/IL dumps
|
|
std::string mkDumpName(
|
|
const char* extension //!< File extension to append
|
|
) const;
|
|
|
|
bool createMultiBinary(
|
|
uint* imageSize, //!< Multibinary image size
|
|
void** image, //!< Multibinary image
|
|
const void* isa //!< Kernel HW info
|
|
);
|
|
|
|
//! SI HW specific setup for kernels
|
|
bool siCreateHwInfo(
|
|
const void* shader, //!< HW info shader
|
|
AMUabiAddEncoding& encoding //!< ABI encoding structure
|
|
);
|
|
|
|
//! r800 HW specific setup for kernels
|
|
bool r800CreateHwInfo(
|
|
const void* shader, //!< HW info shader
|
|
AMUabiAddEncoding& encoding //!< ABI encoding structure
|
|
);
|
|
};
|
|
|
|
//! \class GPU kernel
|
|
class Kernel : public NullKernel
|
|
{
|
|
public:
|
|
struct InitData {
|
|
uint privateSize_; //!< Private ring initial size
|
|
uint localSize_; //!< Local ring initial size
|
|
uint hwPrivateSize_; //!< HW private ring initial size
|
|
uint hwLocalSize_; //!< HW local ring initial size
|
|
uint flags_; //!< Kernel initialization flags
|
|
};
|
|
|
|
//! GPU kernel constructor
|
|
Kernel(
|
|
const std::string& name, //!< The kernel's name
|
|
const Device& gpuDev, //!< GPU device object
|
|
const Program& prog, //!< Reference to the program
|
|
const InitData* initData_ //!< Initialization data
|
|
);
|
|
|
|
//! GPU kernel destructor
|
|
virtual ~Kernel();
|
|
|
|
/*! \brief Creates a GPU kernel in CAL
|
|
*
|
|
* \return True if we successfully created a kernel in CAL
|
|
*/
|
|
bool create(
|
|
const std::string& code, //!< IL source code
|
|
const std::string& metadata, //!< the kernel metadata structure
|
|
const void* binaryCode = NULL, //!< binary machine code for CAL
|
|
size_t binarySize = 0 //!< the machine code size
|
|
);
|
|
|
|
//! Validates memory argument
|
|
virtual bool validateMemory(
|
|
uint idx, //!< Argument's index
|
|
amd::Memory* amdMem //!< AMD memory object for validation
|
|
) const ;
|
|
|
|
//! Initializes the CAL program grid for the kernel execution
|
|
void setupProgramGrid(
|
|
VirtualGPU& gpu, //!< virtual GPU device object
|
|
size_t workDim, //!< work dimension
|
|
const amd::NDRange& glbWorkOffset, //!< global work offset
|
|
const amd::NDRange& gblWorkSize, //!< global work size
|
|
amd::NDRange& lclWorkSize, //!< local work size
|
|
const amd::NDRange& groupOffset, //!< group offsets
|
|
const amd::NDRange& glbWorkOffsetOrg,
|
|
const amd::NDRange& glbWorkSizeOrg //!< original global work size
|
|
) const;
|
|
|
|
/*! \brief Detects if runtime has to disable cache optimization and
|
|
* recompiles the kernel
|
|
*
|
|
* \return True if aliases were detected in the kernel arguments
|
|
*/
|
|
bool processMemObjects(
|
|
VirtualGPU& gpu, //!< Virtual GPU objects - queue
|
|
const amd::Kernel& kernel, //!< AMD kernel object for execution
|
|
const_address params, //!< pointer to the param's store
|
|
bool nativeMem //!< Native memory objects
|
|
) const;
|
|
|
|
/*! \brief Loads all kernel arguments, so we could run the kernel in HW.
|
|
* This includes CB update and resource binding
|
|
*
|
|
* \return True if we succefully loaded the arguments
|
|
*/
|
|
bool loadParameters(
|
|
VirtualGPU& gpu, //!< virtual GPU device object
|
|
const amd::Kernel& kernel, //!< AMD kernel object for execution
|
|
const_address params, //!< pointer to the param's store
|
|
bool nativeMem //!< Native memory objects
|
|
) const;
|
|
|
|
//! Binds the constant buffers associated with the kernel
|
|
bool bindConstantBuffers(VirtualGPU& gpu) const;
|
|
|
|
/*! \brief Runs the kernel on HW
|
|
*
|
|
* \return True if we succefully executed the kernel
|
|
*/
|
|
bool run(
|
|
VirtualGPU& gpu, //!< virtual GPU device object
|
|
GpuEvent* gpuEvent, //!< Pointer to the GPU event
|
|
bool lastRun //!< Last run in the split execution
|
|
) const;
|
|
|
|
//! Help function to debug the kernel output
|
|
void debug(
|
|
VirtualGPU& gpu //!< virtual GPU device object
|
|
) const;
|
|
|
|
//! Programs internal samplers defined inside the kernel
|
|
bool setInternalSamplers(
|
|
VirtualGPU& gpu //!< Virtual GPU device object
|
|
) const;
|
|
|
|
//! Returns TRUE if we successfully retrieved the binary from CAL
|
|
bool getCalBinary(
|
|
void* binary, //!< ISA binary code
|
|
size_t size //!< ISA binary size
|
|
) const;
|
|
|
|
//! Returns CAL image size
|
|
size_t getCalBinarySize() const;
|
|
|
|
//! Returns GPU device object, associated with this kernel
|
|
const Device& dev() const;
|
|
|
|
//! Returns GPU device object, associated with this kernel
|
|
const Program& prog() const;
|
|
|
|
//! Binds global HW constant buffers
|
|
bool bindGlobalHwCb(
|
|
VirtualGPU& gpu, //!< Virtual GPU device object
|
|
VirtualGPU::GslKernelDesc* desc //!< Kernel descriptor
|
|
) const;
|
|
|
|
protected:
|
|
//! Initializes the kernel parameters for the abstraction layer
|
|
bool initParameters();
|
|
|
|
/*! \brief Creates constant buffer resources, associated with the kernel
|
|
*
|
|
* \return TRUE if we succefully created constant buffers
|
|
*/
|
|
bool initConstBuffers();
|
|
|
|
//! Returns TRUE if memory should be reallocated, returns FALSE always for NullDevice
|
|
virtual bool isRealloc() const { return !dev().heap()->isVirtual(); }
|
|
|
|
private:
|
|
//! Disable copy constructor
|
|
Kernel(const Kernel&);
|
|
|
|
//! Disable operator=
|
|
Kernel& operator=(const Kernel&);
|
|
|
|
//! \enum Fixed Metadata offsets
|
|
enum MetadataOffsets
|
|
{
|
|
GlobalWorkitemOffset = 0,
|
|
LocalWorkitemOffset = 1,
|
|
GroupsOffset = 2,
|
|
PrivateRingOffset = 3,
|
|
LocalRingOffset = 4,
|
|
MathLibOffset = 5,
|
|
GlobalWorkOffsetOffset = 6,
|
|
GroupWorkOffsetOffset = 7,
|
|
GlobalDataStoreOffset = 8,
|
|
DebugOffset = 8,
|
|
NDRangeGlobalWorkOffsetOffset = 9,
|
|
|
|
// The total number of constants reserved for ABI
|
|
TotalABIVectors
|
|
};
|
|
|
|
/*! \brief Sets the kernel argument
|
|
*
|
|
* \return True if we succefully updated the arguments
|
|
*/
|
|
bool setArgument(
|
|
VirtualGPU& gpu, //!< Virtual GPU device object
|
|
uint idx, //!< the argument index
|
|
const void* param, //!< the arguments data
|
|
size_t size, //!< size of the provided data
|
|
bool nativeMem //!< Native memory objects
|
|
) const;
|
|
|
|
/*! \brief Initializes local and private buffer ranges
|
|
*
|
|
* \return True if we succefully initialized the ranges
|
|
*/
|
|
bool initLocalPrivateRanges(
|
|
VirtualGPU& gpu //!< Virtual GPU device object
|
|
) const;
|
|
|
|
//! Sets local and private buffer ranges
|
|
void setLocalPrivateRanges(
|
|
VirtualGPU& gpu //!< Virtual GPU device object
|
|
) const;
|
|
|
|
//! Sets the sampler's parameters for the image look-up
|
|
void setSampler(
|
|
VirtualGPU& gpu, //!< virtual GPU device object
|
|
uint32_t state, //!< sampler state
|
|
uint physUnit //!< sampler's number
|
|
) const;
|
|
|
|
/*! \brief Binds resource
|
|
*
|
|
* \return True if we succefully created constant buffers
|
|
*/
|
|
bool bindResource(
|
|
VirtualGPU& gpu, //!< virtual GPU device object
|
|
const Resource& resource, //!< resource for binding
|
|
uint paramIdx, //!< index of the parameter
|
|
ResourceType type, //!< resource type
|
|
uint physUnit, //!< PhysUnit
|
|
Memory* memory = NULL, //!< GPU layer memory object
|
|
size_t offset = 0
|
|
) const;
|
|
|
|
//! Unbinds all resources for the kernel
|
|
void unbindResources(
|
|
VirtualGPU& gpu, //!< virtual GPU device object
|
|
GpuEvent gpuEvent, //!< GPU event that will be associated with the resources
|
|
bool lastRun //!< last run in the split execution
|
|
) const;
|
|
|
|
//! Returns true if arena setup was successful
|
|
bool setupArenaAliases(
|
|
VirtualGPU& gpu, //!< Virtual GPU device object
|
|
const Resource& resource //!< Resource for aliases setup
|
|
) const;
|
|
|
|
//! Copies image constants to the constant buffer
|
|
void copyImageConstants(
|
|
const amd::Image* amdImage, //!< Abstraction layer image object
|
|
ImageConstants* imageData //!< Pointer in CB to the image constants
|
|
) const;
|
|
|
|
//! Finds local workgroup size
|
|
void findLocalWorkSize(
|
|
size_t workDim, //!< Work dimension
|
|
const amd::NDRange& gblWorkSize,//!< Global work size
|
|
amd::NDRange& lclWorkSize //!< Local work size
|
|
) const;
|
|
|
|
uint hwPrivateSize_; //!< initial HW private size
|
|
uint hwLocalSize_; //!< initial HW local size
|
|
|
|
//! @todo remove the blit kernel hack
|
|
bool blitKernelHack_; //!< No VM hack for kernel blit
|
|
};
|
|
|
|
enum HSAIL_ADDRESS_QUALIFIER{
|
|
HSAIL_ADDRESS_ERROR = 0,
|
|
HSAIL_ADDRESS_GLOBAL,
|
|
HSAIL_ADDRESS_LOCAL,
|
|
HSAIL_MAX_ADDRESS_QUALIFIERS
|
|
} ;
|
|
|
|
enum HSAIL_ARG_TYPE{
|
|
HSAIL_ARGTYPE_ERROR = 0,
|
|
HSAIL_ARGTYPE_POINTER,
|
|
HSAIL_ARGTYPE_VALUE,
|
|
HSAIL_ARGTYPE_IMAGE,
|
|
HSAIL_ARGTYPE_SAMPLER,
|
|
HSAIL_ARGTYPE_QUEUE,
|
|
HSAIL_ARGMAX_ARG_TYPES
|
|
};
|
|
|
|
enum HSAIL_DATA_TYPE{
|
|
HSAIL_DATATYPE_ERROR = 0,
|
|
HSAIL_DATATYPE_B1,
|
|
HSAIL_DATATYPE_B8,
|
|
HSAIL_DATATYPE_B16,
|
|
HSAIL_DATATYPE_B32,
|
|
HSAIL_DATATYPE_B64,
|
|
HSAIL_DATATYPE_S8,
|
|
HSAIL_DATATYPE_S16,
|
|
HSAIL_DATATYPE_S32,
|
|
HSAIL_DATATYPE_S64,
|
|
HSAIL_DATATYPE_U8,
|
|
HSAIL_DATATYPE_U16,
|
|
HSAIL_DATATYPE_U32,
|
|
HSAIL_DATATYPE_U64,
|
|
HSAIL_DATATYPE_F16,
|
|
HSAIL_DATATYPE_F32,
|
|
HSAIL_DATATYPE_F64,
|
|
HSAIL_DATATYPE_STRUCT,
|
|
HSAIL_DATATYPE_OPAQUE,
|
|
HSAIL_DATATYPE_MAX_TYPES
|
|
};
|
|
|
|
|
|
class HSAILKernel : public device::Kernel
|
|
{
|
|
public:
|
|
struct Argument
|
|
{
|
|
std::string name_; //!< Argument's name
|
|
std::string typeName_; //!< Argument's type name
|
|
uint size_; //!< Size in bytes
|
|
uint offset_; //!< Argument's offset
|
|
uint alignment_; //!< Argument's alignment
|
|
HSAIL_ARG_TYPE type_; //!< Type of the argument
|
|
HSAIL_ADDRESS_QUALIFIER addrQual_; //!< Address qualifier of the argument
|
|
HSAIL_DATA_TYPE dataType_; //!< The type of data
|
|
uint numElem_; //!< Number of elements
|
|
};
|
|
|
|
// Global offsets located in the first 3 elements
|
|
static const uint ExtraArguments = 6;
|
|
|
|
HSAILKernel(std::string name,
|
|
HSAILProgram* prog,
|
|
std::string compileOptions);
|
|
|
|
virtual ~HSAILKernel();
|
|
|
|
//! Initializes the metadata required for this kernel,
|
|
//! finalizes the kernel if needed
|
|
bool init(bool finalize = true);
|
|
|
|
//! Returns true if memory is valid for execution
|
|
virtual bool validateMemory(uint idx, amd::Memory* amdMem) const;
|
|
|
|
//! Returns a pointer to the hsail argument
|
|
const Argument* argument(size_t i) const { return arguments_[i]; }
|
|
|
|
//! Returns the number of hsail arguments
|
|
size_t numArguments() const { return arguments_.size(); }
|
|
|
|
//! Returns GPU device object, associated with this kernel
|
|
const Device& dev() const;
|
|
|
|
//! Returns HSA program associated with this kernel
|
|
const HSAILProgram& prog() const;
|
|
|
|
//! Returns LDS size used in this kernel
|
|
uint32_t ldsSize() const
|
|
{ return cpuAqlCode_->workgroup_group_segment_byte_size; }
|
|
|
|
//! Returns pointer on CPU to AQL code info
|
|
const void* cpuAqlCode() const { return cpuAqlCode_; }
|
|
|
|
//! Returns memory object with AQL code
|
|
gpu::Memory* gpuAqlCode() const { return code_; }
|
|
|
|
//! Returns size of AQL code
|
|
size_t aqlCodeSize() const { return codeSize_; }
|
|
|
|
//! Returns the size of argument buffer
|
|
size_t argsBufferSize() const
|
|
{ return cpuAqlCode_->kernarg_segment_byte_size; }
|
|
|
|
//! Returns spill reg size per workitem
|
|
int spillSegSize() const
|
|
{ return cpuAqlCode_->workitem_private_segment_byte_size; }
|
|
|
|
//! Returns TRUE if kernel uses dynamic parallelism
|
|
bool dynamicParallelism() const
|
|
{ return (flags_.dynamicParallelism_) ? true : false; }
|
|
|
|
//! Finds local workgroup size
|
|
void findLocalWorkSize(
|
|
size_t workDim, //!< Work dimension
|
|
const amd::NDRange& gblWorkSize,//!< Global work size
|
|
amd::NDRange& lclWorkSize //!< Local work size
|
|
) const;
|
|
|
|
//! Returns AQL packet in CPU memory
|
|
//! if the kerenl arguments were successfully loaded, otherwise NULL
|
|
hsa_kernel_dispatch_packet_t* loadArguments(
|
|
VirtualGPU& gpu, //!< Running GPU context
|
|
const amd::Kernel& kernel, //!< AMD kernel object
|
|
const amd::NDRangeContainer& sizes, //!< NDrange container
|
|
const_address parameters, //!< Application arguments for the kernel
|
|
bool nativeMem, //!< Native memory objectes are passed
|
|
uint64_t vmDefQueue, //!< GPU VM default queue pointer
|
|
uint64_t* vmParentWrap, //!< GPU VM parent aql wrap object
|
|
std::vector<const Resource*>& memList //!< Memory list for GSL/VidMM handles
|
|
) const;
|
|
|
|
//! Returns pritnf info array
|
|
const std::vector<PrintfInfo>& printfInfo() const { return printf_; }
|
|
|
|
//! Returns the kernel index in the program
|
|
uint index() const { return index_; }
|
|
|
|
private:
|
|
//! Disable copy constructor
|
|
HSAILKernel(const HSAILKernel&);
|
|
|
|
//! Disable operator=
|
|
HSAILKernel& operator=(const HSAILKernel&);
|
|
|
|
//! Creates AQL kernel HW info
|
|
bool aqlCreateHWInfo(
|
|
const void* kernel, //!< Kernel's packed binary info and code
|
|
size_t kernelSize //!< Size of the kernel's packed binary
|
|
);
|
|
|
|
//! Initializes arguments_ and the abstraction layer kernel parameters
|
|
void initArgList(
|
|
const aclArgData* aclArg //!< List of ACL arguments
|
|
);
|
|
|
|
//! Initializes Hsail Argument metadata and info
|
|
void initHsailArgs(
|
|
const aclArgData* aclArg //!< List of ACL arguments
|
|
);
|
|
|
|
//! Initializes Hsail Printf metadata and info
|
|
void initPrintf(
|
|
const aclPrintfFmt* aclPrintf //!< List of ACL printfs
|
|
);
|
|
|
|
std::vector<Argument*> arguments_; //!< Vector list of HSAIL Arguments
|
|
std::string compileOptions_; //!< compile used for finalizing this kernel
|
|
amd_kernel_code_t* cpuAqlCode_; //!< AQL kernel code on CPU
|
|
const NullDevice& dev_; //!< GPU device object
|
|
const HSAILProgram& prog_; //!< Reference to the parent program
|
|
std::vector<PrintfInfo> printf_; //!< Format strings for GPU printf support
|
|
uint index_; //!< Kernel index in the program
|
|
|
|
gpu::Memory* code_; //!< Memory object with ISA code
|
|
size_t codeSize_; //!< Size of ISA code
|
|
|
|
char* hwMetaData_; //!< SI metadata
|
|
|
|
union Flags {
|
|
struct {
|
|
uint imageEna_: 1; //!< Kernel uses images
|
|
uint imageWriteEna_: 1; //!< Kernel uses image writes
|
|
uint dynamicParallelism_: 1; //!< Dynamic parallelism enabled
|
|
};
|
|
uint value_;
|
|
Flags(): value_(0) {}
|
|
} flags_;
|
|
};
|
|
|
|
/*@}*/} // namespace gpu
|
|
|
|
#endif /*GPUKERNEL_HPP_*/
|