Files
rocm-systems/rocclr/runtime/device/gpu/gpukernel.hpp
T
foreman 8cc3f47661 P4 to Git Change 1170297 by nhaustov@nhaustov_hsa on 2015/07/14 05:36:10
ECR #333753 - ORCA RT/Compiler Lib: HSA Code Object/RT independent loader introducing/integration into OpenCL.

	Changes by Evgeniy Mankov.

	       Purpose:
	       Use the same Finalizer & loader for both HSA & ORCA RT.
	       AMDIL path is not affected.

	       Changes:
	       1. The whole BRIG is finalized now instead of per kernel finalization (both in gpuprogram & hsail_be).
	       2. HSALoader is changed in order to work with CodeObject and new HSA Loader's API <96> Context. Now it is in ORCA<92>s gpuprogram instead of Compiler Lib.
	       3. brig_loader.cpp is removed from compiler lib, as well as __aclHSALoader function exports from the whole stack.
	       4. BIF .text section now contains the whole finalized HSA CodeObject instead of separate symbols for finalized kernels.
	       5. ORCA RT now works directly with amd_kernel_code_t and doesn't need any SC metadata anymore.
	       6. aoc2 is supplemented with fake offline loader correspondingly.
	       7. amdocl/complib make sytem changes.
	       8. test_driver.pl update.

	       ToDo:
	       1. Implement disassemble() & BuildLog() functions to support ISA dumping & SC error handling (Konstantin).
	       2. Global variables initialization by pragma reference (Konstantin). Test to verify: test_basic progvar_prog_scope_init.
	       3. Code Object without kernels support (Nikolay - ready). Test to verify: test_generic_address_space.exe library_function

	       testing: windows smoke, pre check-in, ocl conformance 2.0, ocl SDK 2.9

	       Reviewers: Nikolay Haustov, German Andryeyev

Affected files ...

... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/amdocl.def.in#13 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/amdocl.map.in#15 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/build/Makefile.api#116 edit
... //depot/stg/opencl/drivers/opencl/compiler/legacy-lib/amdoclcl.def.in#2 edit
... //depot/stg/opencl/drivers/opencl/compiler/legacy-lib/amdoclcl.map.in#2 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/amdoclcl.def.in#12 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/amdoclcl.map.in#11 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/common/v0_8/if_acl.cpp#70 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/build/Makefile.gpu#32 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/hsail_be.cpp#44 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/build/Makefile.complib#85 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/utils/v0_8/libUtils.cpp#9 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/utils/v0_8/libUtils.h#18 edit
... //depot/stg/opencl/drivers/opencl/compiler/tools/aoc2/aoc2.cpp#70 edit
... //depot/stg/opencl/drivers/opencl/compiler/tools/aoc2/build/Makefile.aoc2#24 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#248 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudefs.hpp#121 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.cpp#288 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.hpp#112 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.cpp#194 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.hpp#59 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuscsi.cpp#33 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#368 edit
... //depot/stg/opencl/drivers/opencl/tests/hsa/bin/test_driver.pl#12 edit
2015-07-14 17:08:54 -04:00

984 خطوط
32 KiB
C++

//
// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
//
#ifndef GPUKERNEL_HPP_
#define GPUKERNEL_HPP_
#include "device/device.hpp"
#include "utils/macros.hpp"
#include "platform/command.hpp"
#include "platform/program.hpp"
#include "platform/kernel.hpp"
#include "platform/sampler.hpp"
#include "device/gpu/gpudevice.hpp"
#include "device/gpu/gpuvirtual.hpp"
#include "amd_hsa_kernel_code.h"
#include "device/gpu/gpuprintf.hpp"
#include "device/gpu/gpuwavelimiter.hpp"
#include "hsa.h"
namespace amd {
namespace hsa {
namespace loader {
class Symbol;
} // loader
} // hsa
} // amd
//! \namespace gpu GPU Device Implementation
namespace gpu {
class VirtualGPU;
class Device;
class NullDevice;
class HSAILProgram;
struct HWSHADER_Helper
{
template <typename S, typename T>
static T Get(S base, T offset) {
return reinterpret_cast<T>(reinterpret_cast<intptr_t>(base)
+ reinterpret_cast<size_t>(offset));
}
};
#define HWSHADER_Get(shader, field) \
HWSHADER_Helper::Get((shader), (shader)->field)
template <typename D, typename S>
static void CalcPtr(D& dst, const S src, size_t structSize, size_t size) {
dst = reinterpret_cast<D>(reinterpret_cast<const intptr_t>(src)
+ structSize * size);
}
/*! \addtogroup GPU GPU Device Implementation
* @{
*/
/*! \brief Helper function for the std::string processing.
* Finds the name in the std::string
*
* \return True if we found the entry of the symbols
*/
bool expect(
const std::string& str, //!< The original std::string
size_t* pos, //!< Position to start
const std::string& sym //!< The sympols to expect
);
/*! \brief Helper function for the std::string processing.
* Gets a word from the std::string
*
* \return True if we successfully received a word
*/
bool getword(
const std::string& str, //!< The original std::string
size_t* pos, //!< Position to start
char* sym //!< Returned word
);
/*! \brief Helper function for the std::string processing.
* Loads numbers from the metadata
*
* \return True if we loaded a number
*/
bool getuint(
const std::string& str, //!< The original std::string
size_t* pos, //!< Position to start
uint* val //!< Returned number
);
/*! \brief Helper function for the std::string processing.
* Loads numbers from the metadata in HEX format
*
* \return True if we loaded a number
*/
bool getuintHex(
const std::string& str, //!< The original std::string
size_t* pos, //!< Position to start
uint* val //!< Returned number
);
/*! \brief Helper function for the std::string processing.
* Loads numbers from the metadata in HEX format
*
* \return True if we loaded a number
*/
bool getuint64Hex(
const std::string& str, //!< The original std::string
size_t* pos, //!< Position to start
uint64_t* val //!< Returned number
);
/*! \brief Helper function for the std::string processing.
* Converts unsigned integer to string
*
* \return None
*/
void intToStr(
size_t value, //!< Value for conversion
char* str, //!< Pointer to the converted string
size_t size //!< String size
);
//! Image constant data from ABI specification
struct ImageConstants : public amd::EmbeddedObject
{
uint32_t width_; //!< Image surface width
uint32_t height_; //!< Image surface height
uint32_t depth_; //!< Image surface depth (1 for 2D images)
uint32_t dataType_; //!< Image surface data type
float widthFloat_; //!< Image surface width
float heightFloat_; //!< Image surface height
float depthFloat_; //!< Image surface depth (1 for 2D images)
uint32_t channelOrder_; //!< Image surface texels channel order
};
//! Kernel arguments
struct KernelArg : public amd::HeapObject
{
public:
//! \enum Kernel argument type
enum ArgumentType
{
None = 0,
PointerGlobal,
Value,
Image,
PointerLocal,
PointerHwLocal,
PointerPrivate,
PointerHwPrivate,
PointerConst,
PointerHwConst,
Float,
Double,
Half,
Char,
UChar,
Short,
UShort,
Int,
UInt,
Long,
ULong,
Struct,
Union,
Opaque,
Event,
Image1D, //!< first image
Image2D,
Image1DB,
Image1DA,
Image2DA,
Image3D, //!< last image
Counter,
Sampler,
PrivateSize,
LocalSize,
HwPrivateSize,
HwLocalSize,
Grouping,
WrkgrpSize,
Wavefront,
PrivateFixed,
ErrorMessage,
WarningMessage,
PrintfFormatStr,
MetadataVersion,
UavId,
ABI64Bit,
GWS,
SWGWS,
Reflection,
ConstArg,
ConstBufId,
PrintfBufId,
GroupingHint,
VecTypeHint,
LimitWave,
TotalTypes
};
// The compiler metadata fields
std::string name_; //!< parameters name
ArgumentType type_; //!< type of argument
union {
uint size_; //!< number of arguments (for values and pointers only)
uint location_; //!< sampler's location (for samplers only)
};
uint cbIdx_; //!< constant buffer index
uint cbPos_; //!< dword address in CB for the argument
std::string buf_; //!< buffer tag
uint index_; //!< buffer/image/sampler index
uint alignment_; //!< the required argument's alignment
ArgumentType dataType_; //!< data type of the argument
union {
struct {
uint uavBuf_ : 1; //!< UAV memory, no global heap
uint realloc_ : 1; //!< argument has to be reallocatedin the global heap
uint readOnly_ : 1; //!< Read only memory object
uint writeOnly_ : 1; //!< Write only memory object
uint readWrite_ : 1; //!< Read/Write memory object
};
uint value_;
} memory_;
std::string typeName_; //!< argument's type name
uint typeQualifier_; //!< argument's type qualifier
//! Default constructor for the kernel argument
KernelArg();
//! Copy constructor for the kernel argument
KernelArg(const KernelArg& data);
//! Overloads operator=
KernelArg& operator=(const KernelArg& data);
//! Destructor of the kernel argument
~KernelArg() { name_.clear(); }
/*! \brief Checks if this arguments requires a place in constant buffer
*
* \return True if we need CB
*/
bool isCbNeeded() const;
/*! \brief Retrieves the argument's size
*
* \return Size of the current argument
*/
size_t size(
bool gpuLayer //!< True if we want the argument's size for the GPU layer
) const;
/*! \brief Retrieves the argument's type for the abstraction layer
*
* \return The argument's type in the abstraction layer format
*/
clk_value_type_t type() const;
/*! \brief Retrieves the argument's address qualifier for the abstraction layer
*
* \return The argument's address qualifier in the abstraction layer format
*/
cl_kernel_arg_address_qualifier addressQualifier() const;
/*! \brief Retrieves the argument's access qualifier for the abstraction layer
*
* \return The argument's access qualifier in the abstraction layer format
*/
cl_kernel_arg_access_qualifier accessQualifier() const;
/*! \brief Retrieves the argument's type name for the abstraction layer
*
* \return The argument's type name
*/
const char* typeName() const { return typeName_.c_str(); }
/*! \brief Retrieves the argument's type qualifier for the abstraction layer
*
* \return The argument's type qualifier
*/
cl_kernel_arg_type_qualifier typeQualifier() const
{
switch (type_) {
case PointerConst:
case PointerHwConst:
return static_cast<cl_kernel_arg_type_qualifier>(typeQualifier_ |
CL_KERNEL_ARG_TYPE_CONST);
default:
return static_cast<cl_kernel_arg_type_qualifier>(typeQualifier_);
}
}
//! Special case for vectors with component size <= 16bit
const static uint VectorSizeLimit = 4;
size_t specialVector() const;
};
struct DataTypeConst
{
const char* tagName_; //!< data type's name
KernelArg::ArgumentType type_; //!< data type
};
//! Metadata description for parsing
struct MetaDataConst
{
const char* typeName_; //!< parameters name
KernelArg::ArgumentType type_; //!< type of argument
struct
{
uint size_ : 1; //!< number of arguments
uint name_ : 1; //!< argument's name
uint resType_: 1; //!< argument's type
uint cbIdx_ : 1; //!< resource index CB, sampler or image
uint cbPos_ : 1; //!< dword address in CB for the argument
uint buf_ : 1; //!< buffer tag
uint reserved: 26; //!< reserved
};
};
const uint DescTotal = 15;
const uint BasicTypeTotal = 15;
const uint ArgStateTotal = DescTotal + BasicTypeTotal;
//! The constant array that describes different metadata properties
extern const MetaDataConst ArgState[ArgStateTotal];
extern const DataTypeConst DataType[];
extern const uint DataTypeTotal;
// Forward declaration
class Program;
class NullProgram;
class CalImageReference : public amd::ReferenceCountedObject
{
public:
//! Default constructor
CalImageReference(CALimage calImage): image_(calImage) {}
//! Get CAL image
CALimage calImage() const { return image_; }
protected:
//! Default destructor
~CalImageReference();
private:
//! Disable copy constructor
CalImageReference(const CalImageReference&);
//! Disable operator=
CalImageReference& operator=(const CalImageReference&);
CALimage image_; //!< CAL kernel image
};
//! \class GPU NullKernel - Kernel for offline device
class NullKernel : public device::Kernel
{
public:
typedef std::vector<KernelArg*> arguments_t;
const static uint UavIdUndefined = 0xffff;
enum Flags {
LimitWorkgroup = 1 << 0, //!< Limits the workgroup size
PrintfOutput = 1 << 1, //!< Kernel has printf output
PrivateFixed = 1 << 2, //!< Kernel has printf output
ABI64bit = 1 << 3, //!< Kernel has 64 bit ABI
Unused0 = 1 << 4, //!< Unused
Unused1 = 1 << 5, //!< Unused
ImageEnable = 1 << 6, //!< Kernel uses images
ImageWrite = 1 << 7, //!< Kernel writes images
};
//! \enum Resource type for binding
enum ResourceType
{
Undefined = 0x00000000, //!< resource type will be detected
ConstantBuffer = 0x00000001, //!< resource is a constant buffer
GlobalBuffer = 0x00000002, //!< resource is a global buffer
GlobalBufferArena = 0x00000003, //!< resource is a global buffer
ArgumentHeapBuffer = 0x00000004, //!< resource is an argument buffer
ArgumentBuffer = 0x00000005, //!< resource is an argument buffer
ArgumentImageRead = 0x00000006, //!< resource is an argument image read
ArgumentImageWrite = 0x00000007, //!< resource is an argument image write
ArgumentConstBuffer = 0x00000008, //!< resource is an argument const buffer
ArgumentCounter = 0x00000009, //!< resource is a global counter
ArgumentUavID = 0x0000000a, //!< resource is a dummy ID read
ArgumentCbID = 0x0000000b, //!< resource is a constant buffer
ArgumentPrintfID = 0x0000000c, //!< resource is a printf buffer
};
//! GPU kernel constructor
NullKernel(
const std::string& name, //!< The kernel's name
const NullDevice& gpuNullDev, //!< GPU device object
const NullProgram& nullProg //!< Reference to the program
);
virtual ~NullKernel();
/*! \brief Creates a GPU kernel in CAL
*
* \return True if we successfully created a kernel in CAL
*/
bool create(
const std::string& code, //!< IL source code
const std::string& metadata, //!< the kernel metadata structure
const void* binaryCode = NULL, //!< binary machine code for CAL
size_t binarySize = 0 //!< the machine code size
);
//! Returns CAL function descriptor
CALimage calImage() const { return calRef_->calImage(); }
//! Returns TRUE if we successfully retrieved the binary from CAL
bool getCalBinary(
void* binary, //!< ISA binary code
size_t size //!< ISA binary size
) const;
//! Returns CAL image size
size_t getCalBinarySize() const;
//! Returns GPU device object, associated with this kernel
const NullDevice& nullDev() const { return gpuDev_; }
//! Returns GPU device object, associated with this kernel
const NullProgram& nullProg() const { return prog_; }
//! Returns the kernel's build error
const cl_int buildError() const { return buildError_; }
//! Returns the kernel's flags
uint flags() const { return flags_; }
//! Returns TRUE if ABI is for 64 bits
bool abi64Bit() const { return (flags_ & ABI64bit) ? true : false; }
//! Returns the total number of all arguments
size_t argSize() const { return arguments_.size(); }
//! Returns instruction count of the current kernel
uint instructionCnt() const { return instructionCnt_; }
protected:
//! Returns TRUE if memory should be reallocated, returns FALSE always for NullDevice
virtual bool isRealloc() const { return false; }
/*! \brief Parses the metadata structure for the kernel,
* provided by the OpenCL compiler
*
* \return True if we succefully parsed all arguments
*/
bool parseArguments(
const std::string& metaData, //!< the program for parsing
uint* uavRefCount //!< an array of reference counters for used UAVs
);
//! Returns the argument for the specified index
const KernelArg* argument(uint idx) const { return arguments_[idx]; }
//! Adds the kernel argument into the list
void addArgument(KernelArg* arg) { arguments_.push_back(arg); }
//! Returns the argument for the specified sampler's index
const KernelArg* sampler(uint idx) const { return intSamplers_[idx]; }
//! Returns the total number of all internal samplers
size_t samplerSize() const { return intSamplers_.size(); }
//! Adds the kernel sampler into the sampler's list
void addSampler(KernelArg* arg) { intSamplers_.push_back(arg); }
//! Returns UAV raw index for this kernel
uint uavRaw() const { return uavRaw_; }
//! Returns UAV arena index for this kernel
uint uavArena() const { return uavArena_; }
cl_int buildError_; //!< Kernel's build error
std::string ilSource_; //!< IL source code of this kernel
const NullDevice& gpuDev_; //!< GPU device object
const NullProgram& prog_; //!< Reference to the parent program
CalImageReference* calRef_; //!< CAL image reference for this kernel
bool internal_; //!< Runtime internal ker
uint flags_; //!< kernel object flags
arguments_t arguments_; //!< kernel arguments for the execution
arguments_t intSamplers_; //!< predefined intenal kernel samplers
size_t* cbSizes_; //!< real constant buffer sizes for this kernel
uint numCb_; //!< total number of constant buffers
uint uavRaw_; //!< UAV used for RAW access
uint uavArena_; //!< UAV used for arena access
bool rwAttributes_; //!< backend provides RW attributes for arguments
uint instructionCnt_;//!< Instruction count
uint cbId_; //!< UAV used for constant buffer access
uint printfId_; //!< UAV used for printf buffer access
private:
//! Disable copy constructor
NullKernel(const NullKernel&);
//! Disable operator=
NullKernel& operator=(const NullKernel&);
//! Creates a filename for ISA/IL dumps
std::string mkDumpName(
const char* extension //!< File extension to append
) const;
bool createMultiBinary(
uint* imageSize, //!< Multibinary image size
void** image, //!< Multibinary image
const void* isa //!< Kernel HW info
);
//! SI HW specific setup for kernels
bool siCreateHwInfo(
const void* shader, //!< HW info shader
AMUabiAddEncoding& encoding //!< ABI encoding structure
);
//! r800 HW specific setup for kernels
bool r800CreateHwInfo(
const void* shader, //!< HW info shader
AMUabiAddEncoding& encoding //!< ABI encoding structure
);
};
//! \class GPU kernel
class Kernel : public NullKernel
{
public:
struct InitData {
uint privateSize_; //!< Private ring initial size
uint localSize_; //!< Local ring initial size
uint hwPrivateSize_; //!< HW private ring initial size
uint hwLocalSize_; //!< HW local ring initial size
uint flags_; //!< Kernel initialization flags
};
//! GPU kernel constructor
Kernel(
const std::string& name, //!< The kernel's name
const Device& gpuDev, //!< GPU device object
const Program& prog, //!< Reference to the program
const InitData* initData_ //!< Initialization data
);
//! GPU kernel destructor
virtual ~Kernel();
/*! \brief Creates a GPU kernel in CAL
*
* \return True if we successfully created a kernel in CAL
*/
bool create(
const std::string& code, //!< IL source code
const std::string& metadata, //!< the kernel metadata structure
const void* binaryCode = NULL, //!< binary machine code for CAL
size_t binarySize = 0 //!< the machine code size
);
//! Validates memory argument
virtual bool validateMemory(
uint idx, //!< Argument's index
amd::Memory* amdMem //!< AMD memory object for validation
) const ;
//! Initializes the CAL program grid for the kernel execution
void setupProgramGrid(
VirtualGPU& gpu, //!< virtual GPU device object
size_t workDim, //!< work dimension
const amd::NDRange& glbWorkOffset, //!< global work offset
const amd::NDRange& gblWorkSize, //!< global work size
amd::NDRange& lclWorkSize, //!< local work size
const amd::NDRange& groupOffset, //!< group offsets
const amd::NDRange& glbWorkOffsetOrg,
const amd::NDRange& glbWorkSizeOrg //!< original global work size
) const;
/*! \brief Detects if runtime has to disable cache optimization and
* recompiles the kernel
*
* \return True if aliases were detected in the kernel arguments
*/
bool processMemObjects(
VirtualGPU& gpu, //!< Virtual GPU objects - queue
const amd::Kernel& kernel, //!< AMD kernel object for execution
const_address params, //!< pointer to the param's store
bool nativeMem //!< Native memory objects
) const;
/*! \brief Loads all kernel arguments, so we could run the kernel in HW.
* This includes CB update and resource binding
*
* \return True if we succefully loaded the arguments
*/
bool loadParameters(
VirtualGPU& gpu, //!< virtual GPU device object
const amd::Kernel& kernel, //!< AMD kernel object for execution
const_address params, //!< pointer to the param's store
bool nativeMem //!< Native memory objects
) const;
//! Binds the constant buffers associated with the kernel
bool bindConstantBuffers(VirtualGPU& gpu) const;
/*! \brief Runs the kernel on HW
*
* \return True if we succefully executed the kernel
*/
bool run(
VirtualGPU& gpu, //!< virtual GPU device object
GpuEvent* gpuEvent, //!< Pointer to the GPU event
bool lastRun //!< Last run in the split execution
) const;
//! Help function to debug the kernel output
void debug(
VirtualGPU& gpu //!< virtual GPU device object
) const;
//! Programs internal samplers defined inside the kernel
bool setInternalSamplers(
VirtualGPU& gpu //!< Virtual GPU device object
) const;
//! Returns TRUE if we successfully retrieved the binary from CAL
bool getCalBinary(
void* binary, //!< ISA binary code
size_t size //!< ISA binary size
) const;
//! Returns CAL image size
size_t getCalBinarySize() const;
//! Returns GPU device object, associated with this kernel
const Device& dev() const;
//! Returns GPU device object, associated with this kernel
const Program& prog() const;
//! Binds global HW constant buffers
bool bindGlobalHwCb(
VirtualGPU& gpu, //!< Virtual GPU device object
VirtualGPU::GslKernelDesc* desc //!< Kernel descriptor
) const;
//! Get profiling callback object
virtual amd::ProfilingCallback* getProfilingCallback(
const device::VirtualDevice *vdev){
return waveLimiter_.getProfilingCallback(vdev);
}
protected:
//! Initializes the kernel parameters for the abstraction layer
bool initParameters();
/*! \brief Creates constant buffer resources, associated with the kernel
*
* \return TRUE if we succefully created constant buffers
*/
bool initConstBuffers();
//! Returns TRUE if memory should be reallocated, returns FALSE always for NullDevice
virtual bool isRealloc() const { return !dev().heap()->isVirtual(); }
private:
//! Disable copy constructor
Kernel(const Kernel&);
//! Disable operator=
Kernel& operator=(const Kernel&);
//! \enum Fixed Metadata offsets
enum MetadataOffsets
{
GlobalWorkitemOffset = 0,
LocalWorkitemOffset = 1,
GroupsOffset = 2,
PrivateRingOffset = 3,
LocalRingOffset = 4,
MathLibOffset = 5,
GlobalWorkOffsetOffset = 6,
GroupWorkOffsetOffset = 7,
GlobalDataStoreOffset = 8,
DebugOffset = 8,
NDRangeGlobalWorkOffsetOffset = 9,
// The total number of constants reserved for ABI
TotalABIVectors
};
/*! \brief Sets the kernel argument
*
* \return True if we succefully updated the arguments
*/
bool setArgument(
VirtualGPU& gpu, //!< Virtual GPU device object
uint idx, //!< the argument index
const void* param, //!< the arguments data
size_t size, //!< size of the provided data
bool nativeMem //!< Native memory objects
) const;
/*! \brief Initializes local and private buffer ranges
*
* \return True if we succefully initialized the ranges
*/
bool initLocalPrivateRanges(
VirtualGPU& gpu //!< Virtual GPU device object
) const;
//! Sets local and private buffer ranges
void setLocalPrivateRanges(
VirtualGPU& gpu //!< Virtual GPU device object
) const;
//! Sets the sampler's parameters for the image look-up
void setSampler(
VirtualGPU& gpu, //!< virtual GPU device object
uint32_t state, //!< sampler state
uint physUnit //!< sampler's number
) const;
/*! \brief Binds resource
*
* \return True if we succefully created constant buffers
*/
bool bindResource(
VirtualGPU& gpu, //!< virtual GPU device object
const Resource& resource, //!< resource for binding
uint paramIdx, //!< index of the parameter
ResourceType type, //!< resource type
uint physUnit, //!< PhysUnit
Memory* memory = NULL, //!< GPU layer memory object
size_t offset = 0
) const;
//! Unbinds all resources for the kernel
void unbindResources(
VirtualGPU& gpu, //!< virtual GPU device object
GpuEvent gpuEvent, //!< GPU event that will be associated with the resources
bool lastRun //!< last run in the split execution
) const;
//! Returns true if arena setup was successful
bool setupArenaAliases(
VirtualGPU& gpu, //!< Virtual GPU device object
const Resource& resource //!< Resource for aliases setup
) const;
//! Copies image constants to the constant buffer
void copyImageConstants(
const amd::Image* amdImage, //!< Abstraction layer image object
ImageConstants* imageData //!< Pointer in CB to the image constants
) const;
//! Finds local workgroup size
void findLocalWorkSize(
size_t workDim, //!< Work dimension
const amd::NDRange& gblWorkSize,//!< Global work size
amd::NDRange& lclWorkSize //!< Local work size
) const;
uint hwPrivateSize_; //!< initial HW private size
uint hwLocalSize_; //!< initial HW local size
//! @todo remove the blit kernel hack
bool blitKernelHack_; //!< No VM hack for kernel blit
WaveLimiterManager waveLimiter_; //!< adaptively control number of waves
};
enum HSAIL_ADDRESS_QUALIFIER{
HSAIL_ADDRESS_ERROR = 0,
HSAIL_ADDRESS_GLOBAL,
HSAIL_ADDRESS_LOCAL,
HSAIL_MAX_ADDRESS_QUALIFIERS
} ;
enum HSAIL_ARG_TYPE{
HSAIL_ARGTYPE_ERROR = 0,
HSAIL_ARGTYPE_POINTER,
HSAIL_ARGTYPE_VALUE,
HSAIL_ARGTYPE_IMAGE,
HSAIL_ARGTYPE_SAMPLER,
HSAIL_ARGTYPE_QUEUE,
HSAIL_ARGMAX_ARG_TYPES
};
enum HSAIL_DATA_TYPE{
HSAIL_DATATYPE_ERROR = 0,
HSAIL_DATATYPE_B1,
HSAIL_DATATYPE_B8,
HSAIL_DATATYPE_B16,
HSAIL_DATATYPE_B32,
HSAIL_DATATYPE_B64,
HSAIL_DATATYPE_S8,
HSAIL_DATATYPE_S16,
HSAIL_DATATYPE_S32,
HSAIL_DATATYPE_S64,
HSAIL_DATATYPE_U8,
HSAIL_DATATYPE_U16,
HSAIL_DATATYPE_U32,
HSAIL_DATATYPE_U64,
HSAIL_DATATYPE_F16,
HSAIL_DATATYPE_F32,
HSAIL_DATATYPE_F64,
HSAIL_DATATYPE_STRUCT,
HSAIL_DATATYPE_OPAQUE,
HSAIL_DATATYPE_MAX_TYPES
};
class HSAILKernel : public device::Kernel
{
public:
struct Argument
{
std::string name_; //!< Argument's name
std::string typeName_; //!< Argument's type name
uint size_; //!< Size in bytes
uint offset_; //!< Argument's offset
uint alignment_; //!< Argument's alignment
HSAIL_ARG_TYPE type_; //!< Type of the argument
HSAIL_ADDRESS_QUALIFIER addrQual_; //!< Address qualifier of the argument
HSAIL_DATA_TYPE dataType_; //!< The type of data
uint numElem_; //!< Number of elements
};
// Global offsets located in the first 3 elements
static const uint ExtraArguments = 6;
HSAILKernel(std::string name,
HSAILProgram* prog,
std::string compileOptions);
virtual ~HSAILKernel();
//! Initializes the metadata required for this kernel,
//! finalizes the kernel if needed
bool init(amd::hsa::loader::Symbol *sym, bool finalize = false);
//! Returns true if memory is valid for execution
virtual bool validateMemory(uint idx, amd::Memory* amdMem) const;
//! Returns a pointer to the hsail argument
const Argument* argument(size_t i) const { return arguments_[i]; }
//! Returns the number of hsail arguments
size_t numArguments() const { return arguments_.size(); }
//! Returns GPU device object, associated with this kernel
const Device& dev() const;
//! Returns HSA program associated with this kernel
const HSAILProgram& prog() const;
//! Returns LDS size used in this kernel
uint32_t ldsSize() const
{ return cpuAqlCode_->workgroup_group_segment_byte_size; }
//! Returns pointer on CPU to AQL code info
const void* cpuAqlCode() const { return cpuAqlCode_; }
//! Returns memory object with AQL code
gpu::Memory* gpuAqlCode() const { return code_; }
//! Returns size of AQL code
size_t aqlCodeSize() const { return codeSize_; }
//! Returns the size of argument buffer
size_t argsBufferSize() const
{ return cpuAqlCode_->kernarg_segment_byte_size; }
//! Returns spill reg size per workitem
int spillSegSize() const
{ return cpuAqlCode_->workitem_private_segment_byte_size; }
//! Returns TRUE if kernel uses dynamic parallelism
bool dynamicParallelism() const
{ return (flags_.dynamicParallelism_) ? true : false; }
//! Returns TRUE if kernel is internal kernel
bool isInternalKernel() const
{ return (flags_.internalKernel_) ? true : false; }
//! Finds local workgroup size
void findLocalWorkSize(
size_t workDim, //!< Work dimension
const amd::NDRange& gblWorkSize,//!< Global work size
amd::NDRange& lclWorkSize //!< Local work size
) const;
//! Returns AQL packet in CPU memory
//! if the kerenl arguments were successfully loaded, otherwise NULL
hsa_kernel_dispatch_packet_t* loadArguments(
VirtualGPU& gpu, //!< Running GPU context
const amd::Kernel& kernel, //!< AMD kernel object
const amd::NDRangeContainer& sizes, //!< NDrange container
const_address parameters, //!< Application arguments for the kernel
bool nativeMem, //!< Native memory objectes are passed
uint64_t vmDefQueue, //!< GPU VM default queue pointer
uint64_t* vmParentWrap, //!< GPU VM parent aql wrap object
std::vector<const Resource*>& memList //!< Memory list for GSL/VidMM handles
) const;
//! Returns pritnf info array
const std::vector<PrintfInfo>& printfInfo() const { return printf_; }
//! Returns the kernel index in the program
uint index() const { return index_; }
private:
//! Disable copy constructor
HSAILKernel(const HSAILKernel&);
//! Disable operator=
HSAILKernel& operator=(const HSAILKernel&);
//! Creates AQL kernel HW info
bool aqlCreateHWInfo(amd::hsa::loader::Symbol *sym);
//! Initializes arguments_ and the abstraction layer kernel parameters
void initArgList(
const aclArgData* aclArg //!< List of ACL arguments
);
//! Initializes Hsail Argument metadata and info
void initHsailArgs(
const aclArgData* aclArg //!< List of ACL arguments
);
//! Initializes Hsail Printf metadata and info
void initPrintf(
const aclPrintfFmt* aclPrintf //!< List of ACL printfs
);
std::vector<Argument*> arguments_; //!< Vector list of HSAIL Arguments
std::string compileOptions_; //!< compile used for finalizing this kernel
amd_kernel_code_t* cpuAqlCode_; //!< AQL kernel code on CPU
const NullDevice& dev_; //!< GPU device object
const HSAILProgram& prog_; //!< Reference to the parent program
std::vector<PrintfInfo> printf_; //!< Format strings for GPU printf support
uint index_; //!< Kernel index in the program
gpu::Memory* code_; //!< Memory object with ISA code
size_t codeSize_; //!< Size of ISA code
char* hwMetaData_; //!< SI metadata
union Flags {
struct {
uint imageEna_: 1; //!< Kernel uses images
uint imageWriteEna_: 1; //!< Kernel uses image writes
uint dynamicParallelism_: 1; //!< Dynamic parallelism enabled
uint internalKernel_: 1; //!< True: internal kernel
};
uint value_;
Flags(): value_(0) {}
} flags_;
};
/*@}*/} // namespace gpu
#endif /*GPUKERNEL_HPP_*/