c99d679c9e
SWDEV-86035 - Add OCL backend for PAL - PAL backend build is disabled by default. "BUILD_PAL_DEVICE = yes" enables the build. You also have to update the client workspace with PAL mapping: //depot/stg/pal/... //<your_opencl_location>/runtime/device/pal/palbe/... Affected files ... ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/build/Makefile.api#130 edit ... //depot/stg/opencl/drivers/opencl/compiler/sclibdefs.opencl#8 edit ... //depot/stg/opencl/drivers/opencl/opencldefs#166 edit ... //depot/stg/opencl/drivers/opencl/openclrules#91 edit ... //depot/stg/opencl/drivers/opencl/runtime/Makefile#21 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#192 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/Makefile#1 add ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/build/Makefile#1 add ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/build/Makefile.pal#1 add ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palappprofile.cpp#1 add ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palappprofile.hpp#1 add ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palbinary.cpp#1 add ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palbinary.hpp#1 add ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palblit.cpp#1 add ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palblit.hpp#1 add ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palcompiler.cpp#1 add ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palconstbuf.cpp#1 add ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palconstbuf.hpp#1 add ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palcounters.cpp#1 add ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palcounters.hpp#1 add ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldebugger.hpp#1 add ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldebugmanager.cpp#1 add ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldebugmanager.hpp#1 add ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldefs.hpp#1 add ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#1 add ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.hpp#1 add ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldeviced3d10.cpp#1 add ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldeviced3d11.cpp#1 add ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldeviced3d9.cpp#1 add ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevicegl.cpp#1 add ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#1 add ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.hpp#1 add ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palmemory.cpp#1 add ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palmemory.hpp#1 add ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprintf.cpp#1 add ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprintf.hpp#1 add ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.cpp#1 add ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.hpp#1 add ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.cpp#1 add ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.hpp#1 add ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsched.hpp#1 add ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palschedcl.cpp#1 add ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.cpp#1 add ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.hpp#1 add ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palthreadtrace.cpp#1 add ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palthreadtrace.hpp#1 add ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paltimestamp.cpp#1 add ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paltimestamp.hpp#1 add ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paltrap.hpp#1 add ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#1 add ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.hpp#1 add ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palwavelimiter.cpp#1 add ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palwavelimiter.hpp#1 add ... //depot/stg/opencl/drivers/opencl/runtime/runtimedefs#36 edit ... //depot/stg/opencl/drivers/opencl/runtime/top.hpp#23 edit
264 строки
8.2 KiB
C++
264 строки
8.2 KiB
C++
//
|
|
// Copyright (c) 2015 Advanced Micro Devices, Inc. All rights reserved.
|
|
//
|
|
#ifndef GPUKERNEL_HPP_
|
|
#define GPUKERNEL_HPP_
|
|
|
|
#include "device/device.hpp"
|
|
#include "utils/macros.hpp"
|
|
#include "platform/command.hpp"
|
|
#include "platform/program.hpp"
|
|
#include "platform/kernel.hpp"
|
|
#include "platform/sampler.hpp"
|
|
#include "device/pal/paldevice.hpp"
|
|
#include "device/pal/palvirtual.hpp"
|
|
#include "amd_hsa_kernel_code.h"
|
|
#include "device/pal/palprintf.hpp"
|
|
#include "device/pal/palwavelimiter.hpp"
|
|
#include "hsa.h"
|
|
|
|
namespace amd {
|
|
namespace hsa {
|
|
namespace loader {
|
|
class Symbol;
|
|
} // loader
|
|
} // hsa
|
|
} // amd
|
|
|
|
//! \namespace pal PAL Device Implementation
|
|
namespace pal {
|
|
|
|
class VirtualGPU;
|
|
class Device;
|
|
class NullDevice;
|
|
class HSAILProgram;
|
|
|
|
struct HWSHADER_Helper
|
|
{
|
|
template <typename S, typename T>
|
|
static T Get(S base, T offset) {
|
|
return reinterpret_cast<T>(reinterpret_cast<intptr_t>(base)
|
|
+ reinterpret_cast<size_t>(offset));
|
|
}
|
|
};
|
|
|
|
#define HWSHADER_Get(shader, field) \
|
|
HWSHADER_Helper::Get((shader), (shader)->field)
|
|
|
|
template <typename D, typename S>
|
|
static void CalcPtr(D& dst, const S src, size_t structSize, size_t size) {
|
|
dst = reinterpret_cast<D>(reinterpret_cast<const intptr_t>(src)
|
|
+ structSize * size);
|
|
}
|
|
|
|
/*! \addtogroup pal PAL Device Implementation
|
|
* @{
|
|
*/
|
|
|
|
enum HSAIL_ADDRESS_QUALIFIER{
|
|
HSAIL_ADDRESS_ERROR = 0,
|
|
HSAIL_ADDRESS_GLOBAL,
|
|
HSAIL_ADDRESS_LOCAL,
|
|
HSAIL_MAX_ADDRESS_QUALIFIERS
|
|
} ;
|
|
|
|
enum HSAIL_ARG_TYPE{
|
|
HSAIL_ARGTYPE_ERROR = 0,
|
|
HSAIL_ARGTYPE_POINTER,
|
|
HSAIL_ARGTYPE_VALUE,
|
|
HSAIL_ARGTYPE_IMAGE,
|
|
HSAIL_ARGTYPE_SAMPLER,
|
|
HSAIL_ARGTYPE_QUEUE,
|
|
HSAIL_ARGMAX_ARG_TYPES
|
|
};
|
|
|
|
enum HSAIL_DATA_TYPE{
|
|
HSAIL_DATATYPE_ERROR = 0,
|
|
HSAIL_DATATYPE_B1,
|
|
HSAIL_DATATYPE_B8,
|
|
HSAIL_DATATYPE_B16,
|
|
HSAIL_DATATYPE_B32,
|
|
HSAIL_DATATYPE_B64,
|
|
HSAIL_DATATYPE_S8,
|
|
HSAIL_DATATYPE_S16,
|
|
HSAIL_DATATYPE_S32,
|
|
HSAIL_DATATYPE_S64,
|
|
HSAIL_DATATYPE_U8,
|
|
HSAIL_DATATYPE_U16,
|
|
HSAIL_DATATYPE_U32,
|
|
HSAIL_DATATYPE_U64,
|
|
HSAIL_DATATYPE_F16,
|
|
HSAIL_DATATYPE_F32,
|
|
HSAIL_DATATYPE_F64,
|
|
HSAIL_DATATYPE_STRUCT,
|
|
HSAIL_DATATYPE_OPAQUE,
|
|
HSAIL_DATATYPE_MAX_TYPES
|
|
};
|
|
|
|
enum HSAIL_ACCESS_TYPE {
|
|
HSAIL_ACCESS_TYPE_NONE = 0,
|
|
HSAIL_ACCESS_TYPE_RO,
|
|
HSAIL_ACCESS_TYPE_WO,
|
|
HSAIL_ACCESS_TYPE_RW
|
|
};
|
|
|
|
class HSAILKernel : public device::Kernel
|
|
{
|
|
public:
|
|
struct Argument
|
|
{
|
|
std::string name_; //!< Argument's name
|
|
std::string typeName_; //!< Argument's type name
|
|
uint size_; //!< Size in bytes
|
|
uint offset_; //!< Argument's offset
|
|
uint alignment_; //!< Argument's alignment
|
|
HSAIL_ARG_TYPE type_; //!< Type of the argument
|
|
HSAIL_ADDRESS_QUALIFIER addrQual_; //!< Address qualifier of the argument
|
|
HSAIL_DATA_TYPE dataType_; //!< The type of data
|
|
uint numElem_; //!< Number of elements
|
|
HSAIL_ACCESS_TYPE access_; //!< Access type for the argument
|
|
};
|
|
|
|
// Max number of possible extra (hidden) kernel arguments
|
|
static const uint MaxExtraArgumentsNum = 6;
|
|
|
|
HSAILKernel(std::string name,
|
|
HSAILProgram* prog,
|
|
std::string compileOptions,
|
|
uint extraArgsNum);
|
|
|
|
virtual ~HSAILKernel();
|
|
|
|
//! Initializes the metadata required for this kernel,
|
|
//! finalizes the kernel if needed
|
|
bool init(amd::hsa::loader::Symbol *sym, bool finalize = false);
|
|
|
|
//! Returns true if memory is valid for execution
|
|
virtual bool validateMemory(uint idx, amd::Memory* amdMem) const;
|
|
|
|
//! Returns a pointer to the hsail argument
|
|
const Argument* argument(size_t i) const { return arguments_[i]; }
|
|
|
|
//! Returns the number of hsail arguments
|
|
size_t numArguments() const { return arguments_.size(); }
|
|
|
|
//! Returns GPU device object, associated with this kernel
|
|
const Device& dev() const;
|
|
|
|
//! Returns HSA program associated with this kernel
|
|
const HSAILProgram& prog() const;
|
|
|
|
//! Returns LDS size used in this kernel
|
|
uint32_t ldsSize() const
|
|
{ return cpuAqlCode_->workgroup_group_segment_byte_size; }
|
|
|
|
//! Returns pointer on CPU to AQL code info
|
|
const void* cpuAqlCode() const { return cpuAqlCode_; }
|
|
|
|
//! Returns memory object with AQL code
|
|
pal::Memory* gpuAqlCode() const { return code_; }
|
|
|
|
//! Returns size of AQL code
|
|
size_t aqlCodeSize() const { return codeSize_; }
|
|
|
|
//! Returns the size of argument buffer
|
|
size_t argsBufferSize() const
|
|
{ return cpuAqlCode_->kernarg_segment_byte_size; }
|
|
|
|
//! Returns spill reg size per workitem
|
|
int spillSegSize() const
|
|
{ return cpuAqlCode_->workitem_private_segment_byte_size; }
|
|
|
|
//! Returns TRUE if kernel uses dynamic parallelism
|
|
bool dynamicParallelism() const
|
|
{ return (flags_.dynamicParallelism_) ? true : false; }
|
|
|
|
//! Returns TRUE if kernel is internal kernel
|
|
bool isInternalKernel() const
|
|
{ return (flags_.internalKernel_) ? true : false; }
|
|
|
|
//! Finds local workgroup size
|
|
void findLocalWorkSize(
|
|
size_t workDim, //!< Work dimension
|
|
const amd::NDRange& gblWorkSize,//!< Global work size
|
|
amd::NDRange& lclWorkSize //!< Local work size
|
|
) const;
|
|
|
|
//! Returns AQL packet in CPU memory
|
|
//! if the kerenl arguments were successfully loaded, otherwise NULL
|
|
hsa_kernel_dispatch_packet_t* loadArguments(
|
|
VirtualGPU& gpu, //!< Running GPU context
|
|
const amd::Kernel& kernel, //!< AMD kernel object
|
|
const amd::NDRangeContainer& sizes, //!< NDrange container
|
|
const_address parameters, //!< Application arguments for the kernel
|
|
bool nativeMem, //!< Native memory objectes are passed
|
|
uint64_t vmDefQueue, //!< GPU VM default queue pointer
|
|
uint64_t* vmParentWrap, //!< GPU VM parent aql wrap object
|
|
std::vector<const Memory*>& memList //!< Memory list for GSL/VidMM handles
|
|
) const;
|
|
|
|
//! Returns pritnf info array
|
|
const std::vector<PrintfInfo>& printfInfo() const { return printf_; }
|
|
|
|
//! Returns the kernel index in the program
|
|
uint index() const { return index_; }
|
|
|
|
//! Returns kernel's extra argument count
|
|
uint extraArgumentsNum() const { return extraArgumentsNum_; }
|
|
|
|
private:
|
|
//! Disable copy constructor
|
|
HSAILKernel(const HSAILKernel&);
|
|
|
|
//! Disable operator=
|
|
HSAILKernel& operator=(const HSAILKernel&);
|
|
|
|
//! Creates AQL kernel HW info
|
|
bool aqlCreateHWInfo(amd::hsa::loader::Symbol *sym);
|
|
|
|
//! Initializes arguments_ and the abstraction layer kernel parameters
|
|
void initArgList(
|
|
const aclArgData* aclArg //!< List of ACL arguments
|
|
);
|
|
|
|
//! Initializes Hsail Argument metadata and info
|
|
void initHsailArgs(
|
|
const aclArgData* aclArg //!< List of ACL arguments
|
|
);
|
|
|
|
//! Initializes Hsail Printf metadata and info
|
|
void initPrintf(
|
|
const aclPrintfFmt* aclPrintf //!< List of ACL printfs
|
|
);
|
|
|
|
std::vector<Argument*> arguments_; //!< Vector list of HSAIL Arguments
|
|
std::string compileOptions_; //!< compile used for finalizing this kernel
|
|
amd_kernel_code_t* cpuAqlCode_; //!< AQL kernel code on CPU
|
|
const NullDevice& dev_; //!< GPU device object
|
|
const HSAILProgram& prog_; //!< Reference to the parent program
|
|
std::vector<PrintfInfo> printf_; //!< Format strings for GPU printf support
|
|
uint index_; //!< Kernel index in the program
|
|
|
|
pal::Memory* code_; //!< Memory object with ISA code
|
|
size_t codeSize_; //!< Size of ISA code
|
|
|
|
char* hwMetaData_; //!< SI metadata
|
|
|
|
uint extraArgumentsNum_; //! Number of extra (hidden) kernel arguments
|
|
|
|
union Flags {
|
|
struct {
|
|
uint imageEna_: 1; //!< Kernel uses images
|
|
uint imageWriteEna_: 1; //!< Kernel uses image writes
|
|
uint dynamicParallelism_: 1; //!< Dynamic parallelism enabled
|
|
uint internalKernel_: 1; //!< True: internal kernel
|
|
};
|
|
uint value_;
|
|
Flags(): value_(0) {}
|
|
} flags_;
|
|
};
|
|
|
|
/*@}*/} // namespace pal
|
|
|
|
#endif /*PALKERNEL_HPP_*/
|