P4 to Git Change 1177220 by emankov@em-hsa-amd on 2015/08/05 06:08:39
ECR #333753 - ORCA RT/Compiler Lib/aoc2: AMD HSA Code Object Import feature (part II) - arbitrary hidden (extra) kernargs support Only HSAIL path is affected. It doesn't affect blit kernels. To use offline by aoc2: aoc2 -hsacodeobject=<importing_code_object_filename> -numhiddenkernargs=<num> -cl-std=CL2.0 -march=hsail(-64) -mdevice=Bonaire <source_cl_filename> To use online by setting env: AMD_DEBUG_HSA_NUM_HIDDEN_KERNARGS=<num> where num >= 0. If num == 0, then no additional arguments will be added on RT for every kernel. The default value is unchanged and equal to 6 for now. Misc: + get rid of PRE & POST defines in Compiler Lib, as they started to conflict with ugl\gl\gs\hwl\ headers with the same defines. + minor copy/paste eliminations & typo fixes + ocltst complib tests update Testing: pre check-in, manually based on ocl sdk MatrixMultiplication Reviewers: Brian Sumner, German Andryeyev, Nikolay Haustov, Artem Tamazov Affected files ... ... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/common/v0_8/if_acl.cpp#72 edit ... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/hsail_be.cpp#49 edit ... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/metadata.cpp#8 edit ... //depot/stg/opencl/drivers/opencl/compiler/lib/include/v0_8/aclDefs.h#5 edit ... //depot/stg/opencl/drivers/opencl/compiler/lib/include/v0_8/aclEnums.h#19 edit ... //depot/stg/opencl/drivers/opencl/compiler/lib/include/v0_8/aclStructs.h#17 edit ... //depot/stg/opencl/drivers/opencl/compiler/lib/utils/bif_section_labels.hpp#21 edit ... //depot/stg/opencl/drivers/opencl/compiler/lib/utils/v0_8/libUtils.cpp#10 edit ... //depot/stg/opencl/drivers/opencl/compiler/lib/utils/v0_8/libUtils.h#20 edit ... //depot/stg/opencl/drivers/opencl/compiler/tools/aoc2/aoc2.cpp#74 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#181 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#249 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.cpp#291 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.hpp#113 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.cpp#199 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#369 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsaprogram.cpp#38 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsakernel.cpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsakernel.hpp#5 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsaprogram.cpp#19 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsavirtual.cpp#43 edit ... //depot/stg/opencl/drivers/opencl/tests/ocltst/module/complib/CLAssumptionCheck.cpp#43 edit ... //depot/stg/opencl/drivers/opencl/tests/ocltst/module/complib/CLEnumCheck.cpp#44 edit
Этот коммит содержится в:
@@ -2641,6 +2641,17 @@ if_aclQueryInfo(aclCompiler *cl,
|
||||
}
|
||||
break;
|
||||
}
|
||||
case RT_NUM_KERNEL_HIDDEN_ARGS: {
|
||||
size_t hidden_kernargs_size = sizeof(md->numHiddenKernelArgs);
|
||||
if (!ptr) {
|
||||
*size = hidden_kernargs_size;
|
||||
success = true;
|
||||
} else if (*size >= hidden_kernargs_size) {
|
||||
memcpy(ptr, &md->numHiddenKernelArgs, hidden_kernargs_size);
|
||||
success = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return (success) ? ACL_SUCCESS : ACL_ERROR;
|
||||
}
|
||||
|
||||
@@ -28,4 +28,10 @@
|
||||
#define BIF_API_3_0
|
||||
#endif
|
||||
|
||||
#ifndef MAX_HIDDEN_KERNARGS_NUM
|
||||
#define MAX_HIDDEN_KERNARGS_NUM 6
|
||||
#else
|
||||
#error "MAX_HIDDEN_KERNARGS_NUM is already defined"
|
||||
#endif
|
||||
|
||||
#endif // _ACL_DEFS_0_8_H_
|
||||
|
||||
@@ -188,29 +188,30 @@ typedef enum _bif_sections_enum_0_8 {
|
||||
|
||||
//! An enumeration that defines what are valid queries for aclQueryInfo.
|
||||
typedef enum _rt_query_types_enum_0_8 {
|
||||
RT_ABI_VERSION = 0,
|
||||
RT_DEVICE_NAME = 1,
|
||||
RT_MEM_SIZES = 2,
|
||||
RT_GPU_FUNC_CAPS = 3,
|
||||
RT_GPU_FUNC_ID = 4,
|
||||
RT_GPU_DEFAULT_ID = 5,
|
||||
RT_WORK_GROUP_SIZE = 6,
|
||||
RT_WORK_REGION_SIZE = 7,
|
||||
RT_ARGUMENT_ARRAY = 8,
|
||||
RT_GPU_PRINTF_ARRAY = 9,
|
||||
RT_CPU_BARRIER_NAMES = 10,
|
||||
RT_DEVICE_ENQUEUE = 11,
|
||||
RT_KERNEL_INDEX = 12,
|
||||
RT_KERNEL_NAME = 13,
|
||||
RT_KERNEL_NAMES = 14,
|
||||
RT_CONTAINS_LLVMIR = 15,
|
||||
RT_CONTAINS_OPTIONS = 16,
|
||||
RT_CONTAINS_BRIG = 17,
|
||||
RT_CONTAINS_HSAIL = 18,
|
||||
RT_CONTAINS_ISA = 19,
|
||||
RT_CONTAINS_LOADER_MAP = 20,
|
||||
RT_CONTAINS_SPIR = 21,
|
||||
RT_LAST_TYPE = 22
|
||||
RT_ABI_VERSION = 0,
|
||||
RT_DEVICE_NAME = 1,
|
||||
RT_MEM_SIZES = 2,
|
||||
RT_GPU_FUNC_CAPS = 3,
|
||||
RT_GPU_FUNC_ID = 4,
|
||||
RT_GPU_DEFAULT_ID = 5,
|
||||
RT_WORK_GROUP_SIZE = 6,
|
||||
RT_WORK_REGION_SIZE = 7,
|
||||
RT_ARGUMENT_ARRAY = 8,
|
||||
RT_GPU_PRINTF_ARRAY = 9,
|
||||
RT_CPU_BARRIER_NAMES = 10,
|
||||
RT_DEVICE_ENQUEUE = 11,
|
||||
RT_KERNEL_INDEX = 12,
|
||||
RT_KERNEL_NAME = 13,
|
||||
RT_KERNEL_NAMES = 14,
|
||||
RT_CONTAINS_LLVMIR = 15,
|
||||
RT_CONTAINS_OPTIONS = 16,
|
||||
RT_CONTAINS_BRIG = 17,
|
||||
RT_CONTAINS_HSAIL = 18,
|
||||
RT_CONTAINS_ISA = 19,
|
||||
RT_CONTAINS_LOADER_MAP = 20,
|
||||
RT_CONTAINS_SPIR = 21,
|
||||
RT_NUM_KERNEL_HIDDEN_ARGS = 22,
|
||||
RT_LAST_TYPE = 23
|
||||
} aclQueryType_0_8;
|
||||
|
||||
//! An enumeration for the various GPU capabilities
|
||||
|
||||
@@ -119,6 +119,7 @@ typedef struct _acl_metadata_0_8 {
|
||||
const char *deviceName; // RT_DEVICE_NAME
|
||||
bool enqueue_kernel; // RT_DEVICE_ENQUEUE
|
||||
uint32_t kernel_index; // RT_KERNEL_INDEX
|
||||
uint32_t numHiddenKernelArgs; // RT_NUM_KERNEL_HIDDEN_ARGS
|
||||
} aclMetadata_0_8;
|
||||
|
||||
//! An structure that holds information on the capabilities of the bif device.
|
||||
|
||||
@@ -6,8 +6,10 @@
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
#define PRE 0
|
||||
#define POST 1
|
||||
namespace bif {
|
||||
const unsigned PRE = 0;
|
||||
const unsigned POST = 1;
|
||||
}
|
||||
|
||||
typedef enum {
|
||||
symOpenclCompilerOptions,
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
//
|
||||
// Copyright (c) 2011 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
#include "acl.h"
|
||||
#include "aclTypes.h"
|
||||
|
||||
#include "api/v0_8/aclValidation.h"
|
||||
#include "libUtils.h"
|
||||
#include "bif/bifbase.hpp"
|
||||
@@ -10,7 +9,6 @@
|
||||
#include "utils/versions.hpp"
|
||||
#include "utils/options.hpp"
|
||||
#include "backends/gpu/scwrapper/devState.h"
|
||||
#include <cassert>
|
||||
#include <cstring>
|
||||
#include "bif/bif.hpp"
|
||||
extern aclBinary* constructBinary(size_t struct_version,
|
||||
|
||||
@@ -3,12 +3,16 @@
|
||||
//
|
||||
#ifndef _CL_LIB_UTILS_0_8_H_
|
||||
#define _CL_LIB_UTILS_0_8_H_
|
||||
#include "v0_8/aclTypes.h"
|
||||
#include "acl.h"
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <iterator>
|
||||
#include <cstdlib>
|
||||
#include <cassert>
|
||||
#include "library.hpp"
|
||||
#include "utils/bif_section_labels.hpp"
|
||||
using namespace bif;
|
||||
|
||||
// Utility function to set a flag in option structure
|
||||
// of the aclDevCaps.
|
||||
void
|
||||
@@ -150,6 +154,83 @@ aclutAlloc(const aclCompilerOptions *bin);
|
||||
FreeFunc
|
||||
aclutFree(const aclCompilerOptions *bin);
|
||||
|
||||
inline std::vector<std::string> splitSpaceSeparatedString(char *str)
|
||||
{
|
||||
std::string s(str);
|
||||
std::stringstream ss(s);
|
||||
std::istream_iterator<std::string> beg(ss), end;
|
||||
std::vector<std::string> vec(beg, end);
|
||||
return vec;
|
||||
}
|
||||
|
||||
// Helper function that returns OpenCL mangled kernel name.
|
||||
inline std::string
|
||||
aclutOpenclMangledKernelName(const std::string& kernel_name)
|
||||
{
|
||||
const oclBIFSymbolStruct* sym = findBIF30SymStruct(symOpenclKernel);
|
||||
assert(sym && "symbol not found");
|
||||
return std::string("&") + sym->str[PRE] + kernel_name + sym->str[POST];
|
||||
}
|
||||
|
||||
// Helper function that returns OpenCL mangled kernel metadata symbol name.
|
||||
inline std::string
|
||||
aclutOpenclMangledKernelMetadataName(const std::string& kernel_name)
|
||||
{
|
||||
const oclBIFSymbolStruct* sym = findBIF30SymStruct(symOpenclMeta);
|
||||
assert(sym && "symbol not found");
|
||||
return sym->str[PRE] + aclutOpenclMangledKernelName(kernel_name) + sym->str[POST];
|
||||
}
|
||||
|
||||
#ifdef WITH_TARGET_HSAIL
|
||||
// Helper function that updates metadata for all the kernels in binary;
|
||||
// the updated attribute is the number of hidden kernel arguments.
|
||||
inline acl_error
|
||||
aclutUpdateMetadataWithHiddenKernargsNum(aclCompiler* cl, aclBinary* bin, uint32_t num) {
|
||||
if (num == MAX_HIDDEN_KERNARGS_NUM) {
|
||||
return ACL_SUCCESS;
|
||||
}
|
||||
const oclBIFSymbolStruct* sym = findBIF30SymStruct(symOpenclMeta);
|
||||
assert(sym && "symbol not found");
|
||||
aclSections secID = sym->sections[0];
|
||||
size_t kernelNamesSize = 0;
|
||||
acl_error error_code = aclQueryInfo(cl, bin, RT_KERNEL_NAMES, NULL, NULL, &kernelNamesSize);
|
||||
if (error_code != ACL_SUCCESS) {
|
||||
return error_code;
|
||||
}
|
||||
char* kernelNames = new char[kernelNamesSize];
|
||||
error_code = aclQueryInfo(cl, bin, RT_KERNEL_NAMES, NULL, kernelNames, &kernelNamesSize);
|
||||
if (error_code != ACL_SUCCESS) {
|
||||
delete kernelNames;
|
||||
return error_code;
|
||||
}
|
||||
std::vector<std::string> vKernels = splitSpaceSeparatedString(kernelNames);
|
||||
delete kernelNames;
|
||||
size_t roSize = 0;
|
||||
for (auto it = vKernels.begin(); it != vKernels.end(); ++it) {
|
||||
std::string symbol = aclutOpenclMangledKernelMetadataName(*it);
|
||||
void* roSec = const_cast<void*>(aclExtractSymbol(cl, bin, &roSize, secID, symbol.c_str(), &error_code));
|
||||
if (error_code != ACL_SUCCESS) {
|
||||
return error_code;
|
||||
}
|
||||
if (!roSec || roSize == 0) {
|
||||
error_code = ACL_ELF_ERROR;
|
||||
return error_code;
|
||||
}
|
||||
aclMetadata *md = reinterpret_cast<aclMetadata*>(roSec);
|
||||
md->numHiddenKernelArgs = num;
|
||||
error_code = aclRemoveSymbol(cl, bin, secID, symbol.c_str());
|
||||
if (error_code != ACL_SUCCESS) {
|
||||
return error_code;
|
||||
}
|
||||
error_code = aclInsertSymbol(cl, bin, md, roSize, secID, symbol.c_str());
|
||||
if (error_code != ACL_SUCCESS) {
|
||||
return error_code;
|
||||
}
|
||||
}
|
||||
return error_code;
|
||||
}
|
||||
#endif
|
||||
|
||||
inline bool is64BitTarget(const aclTargetInfo& target)
|
||||
{
|
||||
return (target.arch_id == aclX64 ||
|
||||
@@ -184,15 +265,6 @@ enum scId {
|
||||
SC_LAST,
|
||||
};
|
||||
|
||||
inline std::vector<std::string> splitSpaceSeparatedString(char *str)
|
||||
{
|
||||
std::string s(str);
|
||||
std::stringstream ss(s);
|
||||
std::istream_iterator<std::string> beg(ss), end;
|
||||
std::vector<std::string> vec(beg, end);
|
||||
return vec;
|
||||
}
|
||||
|
||||
// Helper function that allocates an aligned memory.
|
||||
inline void*
|
||||
alignedMalloc(size_t size, size_t alignment)
|
||||
|
||||
@@ -604,6 +604,14 @@ Kernel::~Kernel()
|
||||
delete signature_;
|
||||
}
|
||||
|
||||
std::string
|
||||
Kernel::openclMangledName(const std::string& name)
|
||||
{
|
||||
const oclBIFSymbolStruct* bifSym = findBIF30SymStruct(symOpenclKernel);
|
||||
assert(bifSym && "symbol not found");
|
||||
return std::string("&") + bifSym->str[bif::PRE] + name + bifSym->str[bif::POST];
|
||||
}
|
||||
|
||||
void
|
||||
Memory::saveMapInfo(
|
||||
const amd::Coord3D origin,
|
||||
@@ -1246,7 +1254,7 @@ ClBinary::getBIFSymbol(unsigned int symbolID) const
|
||||
const oclBIFSymbolStruct* symb = findBIFSymbolStruct(BIF20, nSymbols, symID);
|
||||
assert(symb && "BIF20 symbol with symbolID not found");
|
||||
if (symb) {
|
||||
return std::string(symb->str[PRE]) + std::string(symb->str[POST]);
|
||||
return std::string(symb->str[bif::PRE]) + std::string(symb->str[bif::POST]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -1255,7 +1263,7 @@ ClBinary::getBIFSymbol(unsigned int symbolID) const
|
||||
const oclBIFSymbolStruct* symb = findBIFSymbolStruct(BIF30, nSymbols, symID);
|
||||
assert(symb && "BIF30 symbol with symbolID not found");
|
||||
if (symb) {
|
||||
return std::string(symb->str[PRE]) + std::string(symb->str[POST]);
|
||||
return std::string(symb->str[bif::PRE]) + std::string(symb->str[bif::POST]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -940,7 +940,7 @@ public:
|
||||
//! Return the build log
|
||||
const std::string& buildLog() const { return buildLog_; }
|
||||
|
||||
static std::string openclMangledName(const std::string& name) { return "&__OpenCL_" + name + "_kernel"; }
|
||||
static std::string openclMangledName(const std::string& name);
|
||||
|
||||
protected:
|
||||
std::string name_; //!< kernel name
|
||||
|
||||
@@ -9,7 +9,6 @@
|
||||
#include "device/gpu/gpusched.hpp"
|
||||
#include "platform/commandqueue.hpp"
|
||||
#include "utils/options.hpp"
|
||||
#include "utils/bif_section_labels.hpp"
|
||||
|
||||
#include "acl.h"
|
||||
#include "SCShadersR678XXCommon.h"
|
||||
@@ -3432,7 +3431,7 @@ HSAILKernel::initArgList(const aclArgData* aclArg)
|
||||
size_t offset = 0;
|
||||
|
||||
// Reserved arguments for HSAIL launch
|
||||
aclArg += ExtraArguments;
|
||||
aclArg += MaxExtraArgumentsNum;
|
||||
for (uint i = 0; aclArg->struct_size != 0; i++, aclArg++) {
|
||||
desc.name_ = arguments_[i]->name_.c_str();
|
||||
desc.type_ = GetOclType(aclArg);
|
||||
@@ -3479,7 +3478,7 @@ HSAILKernel::initHsailArgs(const aclArgData* aclArg)
|
||||
int offset = 0;
|
||||
|
||||
// Reserved arguments for HSAIL launch
|
||||
aclArg += ExtraArguments;
|
||||
aclArg += MaxExtraArgumentsNum;
|
||||
|
||||
// Iterate through the each kernel argument
|
||||
for (; aclArg->struct_size != 0; aclArg++) {
|
||||
@@ -3569,7 +3568,8 @@ HSAILKernel::initPrintf(const aclPrintfFmt* aclPrintf)
|
||||
|
||||
HSAILKernel::HSAILKernel(std::string name,
|
||||
HSAILProgram* prog,
|
||||
std::string compileOptions)
|
||||
std::string compileOptions,
|
||||
uint extraArgsNum)
|
||||
: device::Kernel(name)
|
||||
, compileOptions_(compileOptions)
|
||||
, dev_(prog->dev())
|
||||
@@ -3578,6 +3578,7 @@ HSAILKernel::HSAILKernel(std::string name,
|
||||
, code_(NULL)
|
||||
, codeSize_(0)
|
||||
, hwMetaData_(NULL)
|
||||
, extraArgumentsNum_(extraArgsNum)
|
||||
{
|
||||
hsa_ = true;
|
||||
}
|
||||
@@ -3598,14 +3599,16 @@ HSAILKernel::~HSAILKernel()
|
||||
bool
|
||||
HSAILKernel::init(amd::hsa::loader::Symbol *sym, bool finalize)
|
||||
{
|
||||
acl_error error;
|
||||
const oclBIFSymbolStruct* bifSym = findBIF30SymStruct(symOpenclKernel);
|
||||
assert(bifSym && "symbol not found");
|
||||
std::string openClKernelName(std::string("&") + bifSym->str[PRE] + name() + bifSym->str[POST]);
|
||||
if (extraArgumentsNum_ > MaxExtraArgumentsNum) {
|
||||
LogError("Failed to initialize kernel: extra arguments number is bigger than is supported");
|
||||
return false;
|
||||
}
|
||||
acl_error error = ACL_SUCCESS;
|
||||
std::string openClKernelName = openclMangledName(name());
|
||||
//compile kernel down to ISA
|
||||
if (finalize) {
|
||||
std::string options(compileOptions_.c_str());
|
||||
flags_.internalKernel_ = (compileOptions_.find("-cl-internal-kernel") !=
|
||||
flags_.internalKernel_ = (compileOptions_.find("-cl-internal-kernel") !=
|
||||
std::string::npos) ? true: false;
|
||||
options.append(" -just-kernel=");
|
||||
options.append(openClKernelName.c_str());
|
||||
@@ -3618,7 +3621,7 @@ HSAILKernel::init(amd::hsa::loader::Symbol *sym, bool finalize)
|
||||
options.c_str(), ACL_TYPE_CG, ACL_TYPE_ISA, NULL);
|
||||
buildLog_ += aclGetCompilerLog(dev().hsaCompiler());
|
||||
if (error != ACL_SUCCESS) {
|
||||
LogError("Failed to finalize");
|
||||
LogError("Failed to finalize kernel");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -3900,35 +3903,37 @@ HSAILKernel::loadArguments(
|
||||
address aqlStruct = gpu.cb(1)->sysMemCopy();
|
||||
bool srdResource = false;
|
||||
|
||||
// The HLC generates 3 additional arguments for the global offsets
|
||||
//and fourth argument is the printf_buffer pointer
|
||||
size_t offsetSize[HSAILKernel::ExtraArguments] = { 0, 0, 0, 0, 0, 0 };
|
||||
for (uint i = 0; i < sizes.dimensions(); ++i) {
|
||||
offsetSize[i] = sizes.offset()[i];
|
||||
if (extraArgumentsNum_ > 0) {
|
||||
assert(MaxExtraArgumentsNum >= 6 && "MaxExtraArgumentsNum has changed, the below algorithm should be changed accordingly");
|
||||
size_t extraArgs[MaxExtraArgumentsNum] = { 0, 0, 0, 0, 0, 0 };
|
||||
// The HLC generates up to 3 additional arguments for the global offsets
|
||||
for (uint i = 0; i < sizes.dimensions(); ++i) {
|
||||
extraArgs[i] = sizes.offset()[i];
|
||||
}
|
||||
// Check if the kernel may have printf output
|
||||
if ((printfInfo().size() > 0) &&
|
||||
// and printf buffer was allocated
|
||||
(gpu.printfDbgHSA().dbgBuffer() != NULL)) {
|
||||
// and set the fourth argument as the printf_buffer pointer
|
||||
extraArgs[3] = static_cast<size_t>(gpu.printfDbgHSA().dbgBuffer()->vmAddress());
|
||||
memList.push_back(gpu.printfDbgHSA().dbgBuffer());
|
||||
}
|
||||
if (dynamicParallelism()) {
|
||||
// Provide the host parent AQL wrap object to the kernel
|
||||
AmdAqlWrap* wrap = reinterpret_cast<AmdAqlWrap*>(aqlStruct);
|
||||
memset(wrap, 0, sizeof(AmdAqlWrap));
|
||||
wrap->state = AQL_WRAP_BUSY;
|
||||
ConstBuffer* cb = gpu.constBufs_[1];
|
||||
cb->uploadDataToHw(sizeof(AmdAqlWrap));
|
||||
*vmParentWrap = cb->vmAddress() + cb->wrtOffset();
|
||||
// and set 5th & 6th arguments
|
||||
extraArgs[4] = vmDefQueue;
|
||||
extraArgs[5] = *vmParentWrap;
|
||||
memList.push_back(cb);
|
||||
}
|
||||
WriteAqlArg(&aqlArgBuf, extraArgs, sizeof(size_t)*extraArgumentsNum_, sizeof(size_t));
|
||||
}
|
||||
|
||||
if (dynamicParallelism()) {
|
||||
// Provide the host parent AQL wrap object to the kernel
|
||||
AmdAqlWrap* wrap = reinterpret_cast<AmdAqlWrap*>(aqlStruct);
|
||||
memset(wrap, 0, sizeof(AmdAqlWrap));
|
||||
wrap->state = AQL_WRAP_BUSY;
|
||||
ConstBuffer* cb = gpu.constBufs_[1];
|
||||
cb->uploadDataToHw(sizeof(AmdAqlWrap));
|
||||
*vmParentWrap = cb->vmAddress() + cb->wrtOffset();
|
||||
offsetSize[4] = vmDefQueue;
|
||||
offsetSize[5] = *vmParentWrap;
|
||||
memList.push_back(cb);
|
||||
}
|
||||
|
||||
// Check if the kernel may have printf output
|
||||
if ((printfInfo().size() > 0) &&
|
||||
// and printf buffer was allocated
|
||||
(gpu.printfDbgHSA().dbgBuffer() != NULL)) {
|
||||
offsetSize[3] = static_cast<size_t>(gpu.printfDbgHSA().dbgBuffer()->vmAddress());
|
||||
memList.push_back(gpu.printfDbgHSA().dbgBuffer());
|
||||
}
|
||||
WriteAqlArg(&aqlArgBuf, offsetSize, sizeof(offsetSize), sizeof(size_t));
|
||||
|
||||
const amd::KernelSignature& signature = kernel.signature();
|
||||
const amd::KernelParameters& kernelParams = kernel.parameters();
|
||||
|
||||
|
||||
@@ -845,12 +845,13 @@ public:
|
||||
uint numElem_; //!< Number of elements
|
||||
};
|
||||
|
||||
// Global offsets located in the first 3 elements
|
||||
static const uint ExtraArguments = 6;
|
||||
// Max number of possible extra (hidden) kernel arguments
|
||||
static const uint MaxExtraArgumentsNum = 6;
|
||||
|
||||
HSAILKernel(std::string name,
|
||||
HSAILProgram* prog,
|
||||
std::string compileOptions);
|
||||
std::string compileOptions,
|
||||
uint extraArgsNum);
|
||||
|
||||
virtual ~HSAILKernel();
|
||||
|
||||
@@ -928,6 +929,9 @@ public:
|
||||
//! Returns the kernel index in the program
|
||||
uint index() const { return index_; }
|
||||
|
||||
//! Returns kernel's extra argument count
|
||||
uint extraArgumentsNum() const { return extraArgumentsNum_; }
|
||||
|
||||
private:
|
||||
//! Disable copy constructor
|
||||
HSAILKernel(const HSAILKernel&);
|
||||
@@ -966,6 +970,8 @@ private:
|
||||
|
||||
char* hwMetaData_; //!< SI metadata
|
||||
|
||||
uint extraArgumentsNum_; //! Number of extra (hidden) kernel arguments
|
||||
|
||||
union Flags {
|
||||
struct {
|
||||
uint imageEna_: 1; //!< Kernel uses images
|
||||
|
||||
@@ -1974,7 +1974,7 @@ HSAILProgram::getNextCompilationStageFromBinary(amd::option::Options* options) {
|
||||
break;
|
||||
const oclBIFSymbolStruct* symbol = findBIF30SymStruct(symOpenclCompilerOptions);
|
||||
assert(symbol && "symbol not found");
|
||||
std::string symName = std::string(symbol->str[PRE]) + std::string(symbol->str[POST]);
|
||||
std::string symName = std::string(symbol->str[bif::PRE]) + std::string(symbol->str[bif::POST]);
|
||||
size_t symSize = 0;
|
||||
const void *opts = aclExtractSymbol(dev().hsaCompiler(),
|
||||
binaryElf_, &symSize, aclCOMMENT, symName.c_str(), &errorCode);
|
||||
@@ -2095,21 +2095,21 @@ HSAILProgram::linkImpl(amd::option::Options* options)
|
||||
}
|
||||
hsa_status_t status = executable_->LoadCodeObject(agent, code_object, NULL);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
buildLog_ += "Error while HSA Loader phase: loading HSA Code Object \n";
|
||||
buildLog_ += "Error while HSA Loader phase: loading HSA Code Object\n";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
size_t kernelNamesSize = 0;
|
||||
errorCode = aclQueryInfo(dev().hsaCompiler(), binaryElf_, RT_KERNEL_NAMES, NULL, NULL, &kernelNamesSize);
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
buildLog_ += "Error while Finalization phase: kernel names query from the ELF failed\n";
|
||||
buildLog_ += "Error while Finalization phase: Kernel names size querying from the ELF failed\n";
|
||||
return false;
|
||||
}
|
||||
if (!isNull() && kernelNamesSize > 0) {
|
||||
char* kernelNames = new char[kernelNamesSize];
|
||||
errorCode = aclQueryInfo(dev().hsaCompiler(), binaryElf_, RT_KERNEL_NAMES, NULL, kernelNames, &kernelNamesSize);
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
buildLog_ += "Error while Finalization phase: kernel's Metadata is corrupted in the ELF\n";
|
||||
buildLog_ += "Error while Finalization phase: Kernel names querying from the ELF failed\n";
|
||||
delete kernelNames;
|
||||
return false;
|
||||
}
|
||||
@@ -2117,11 +2117,22 @@ HSAILProgram::linkImpl(amd::option::Options* options)
|
||||
delete kernelNames;
|
||||
std::vector<std::string>::iterator it = vKernels.begin();
|
||||
bool dynamicParallelism = false;
|
||||
aclMetadata md;
|
||||
md.numHiddenKernelArgs = 0;
|
||||
size_t sizeOfnumHiddenKernelArgs = sizeof(md.numHiddenKernelArgs);
|
||||
for (it; it != vKernels.end(); ++it) {
|
||||
std::string kernelName = *it;
|
||||
HSAILKernel *aKernel = new HSAILKernel(kernelName, this, options->origOptionStr + hsailOptions());
|
||||
std::string kernelName(*it);
|
||||
std::string openclKernelName = Kernel::openclMangledName(kernelName);
|
||||
errorCode = aclQueryInfo(dev().hsaCompiler(), binaryElf_, RT_NUM_KERNEL_HIDDEN_ARGS,
|
||||
openclKernelName.c_str(), &md.numHiddenKernelArgs, &sizeOfnumHiddenKernelArgs);
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
buildLog_ += "Error while Finalization phase: Kernel extra arguments count querying from the ELF failed\n";
|
||||
return false;
|
||||
}
|
||||
HSAILKernel *aKernel = new HSAILKernel(kernelName, this, options->origOptionStr + hsailOptions(),
|
||||
md.numHiddenKernelArgs);
|
||||
kernels()[kernelName] = aKernel;
|
||||
amd::hsa::loader::Symbol *sym = executable_->GetSymbol("", Kernel::openclMangledName(kernelName).c_str(), agent, 0);
|
||||
amd::hsa::loader::Symbol *sym = executable_->GetSymbol("", openclKernelName.c_str(), agent, 0);
|
||||
if (!sym) {
|
||||
LogError("Failed to get kernel ISA code");
|
||||
return false;
|
||||
|
||||
@@ -1872,9 +1872,9 @@ VirtualGPU::submitKernelInternalHSA(
|
||||
gpuDefQueue->virtualQueue_->vmAddress();
|
||||
address argum = gpuDefQueue->virtualQueue_->data() + offsArg;
|
||||
print << "Kernel: " << child->name() << "\n";
|
||||
static const char* Names[HSAILKernel::ExtraArguments] = {
|
||||
static const char* Names[HSAILKernel::MaxExtraArgumentsNum] = {
|
||||
"Offset0: ", "Offset1: ","Offset2: ","PrintfBuf: ", "VqueuePtr: ", "AqlWrap: "};
|
||||
for (j = 0; j < HSAILKernel::ExtraArguments; ++j) {
|
||||
for (j = 0; j < child->extraArgumentsNum(); ++j) {
|
||||
print << "\t" << Names[j] << *(size_t*)argum;
|
||||
print << "\n";
|
||||
argum += sizeof(size_t);
|
||||
|
||||
@@ -295,14 +295,14 @@ namespace oclhsa {
|
||||
std::string openClKernelName("&__OpenCL_" + kernelName + "_kernel");
|
||||
const oclBIFSymbolStruct* isaSymbolStruct = findBIF30SymStruct(symISABinary);
|
||||
assert(isaSymbolStruct && "symbol not found");
|
||||
std::string kernelIsaSymbol = isaSymbolStruct->str[PRE] +
|
||||
openClKernelName + isaSymbolStruct->str[POST];
|
||||
std::string kernelIsaSymbol = isaSymbolStruct->str[bif::PRE] +
|
||||
openClKernelName + isaSymbolStruct->str[bif::POST];
|
||||
|
||||
const oclBIFSymbolStruct* debugSymbolStruct = findBIF30SymStruct(symDebugInfo);
|
||||
assert(debugSymbolStruct && "symbol not found");
|
||||
//For debug symbols, the PRE is used for BRIG debug and the POST is used for
|
||||
//ISA debug
|
||||
std::string kernelIsaDebugSymbol = debugSymbolStruct->str[POST] + openClKernelName;
|
||||
std::string kernelIsaDebugSymbol = debugSymbolStruct->str[bif::POST] + openClKernelName;
|
||||
|
||||
//Extract the ISA section
|
||||
size_t symbolSize;
|
||||
|
||||
Ссылка в новой задаче
Block a user