P4 to Git Change 1177220 by emankov@em-hsa-amd on 2015/08/05 06:08:39

ECR #333753 - ORCA RT/Compiler Lib/aoc2: AMD HSA Code Object Import feature (part II) - arbitrary hidden (extra) kernargs support

	Only HSAIL path is affected. It doesn't affect blit kernels.

	To use offline by aoc2:
	aoc2 -hsacodeobject=<importing_code_object_filename> -numhiddenkernargs=<num> -cl-std=CL2.0 -march=hsail(-64) -mdevice=Bonaire <source_cl_filename>

	To use online by setting env:
	AMD_DEBUG_HSA_NUM_HIDDEN_KERNARGS=<num>

	where num >= 0. If num == 0, then no additional arguments will be added on RT for every kernel. The default value is unchanged and equal to 6 for now.

	Misc:
	+ get rid of PRE & POST defines in Compiler Lib, as they started to conflict with ugl\gl\gs\hwl\ headers with the same defines.
	+ minor copy/paste eliminations & typo fixes
	+ ocltst complib tests update

	Testing: pre check-in, manually based on ocl sdk MatrixMultiplication

	Reviewers: Brian Sumner, German Andryeyev, Nikolay Haustov, Artem Tamazov

Affected files ...

... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/common/v0_8/if_acl.cpp#72 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/hsail_be.cpp#49 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/metadata.cpp#8 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/include/v0_8/aclDefs.h#5 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/include/v0_8/aclEnums.h#19 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/include/v0_8/aclStructs.h#17 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/utils/bif_section_labels.hpp#21 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/utils/v0_8/libUtils.cpp#10 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/utils/v0_8/libUtils.h#20 edit
... //depot/stg/opencl/drivers/opencl/compiler/tools/aoc2/aoc2.cpp#74 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#181 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#249 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.cpp#291 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.hpp#113 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.cpp#199 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#369 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsaprogram.cpp#38 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsakernel.cpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsakernel.hpp#5 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsaprogram.cpp#19 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsavirtual.cpp#43 edit
... //depot/stg/opencl/drivers/opencl/tests/ocltst/module/complib/CLAssumptionCheck.cpp#43 edit
... //depot/stg/opencl/drivers/opencl/tests/ocltst/module/complib/CLEnumCheck.cpp#44 edit
Этот коммит содержится в:
foreman
2015-08-05 06:18:33 -04:00
родитель 881eeab3b8
Коммит 81b331f4c5
14 изменённых файлов: 214 добавлений и 93 удалений
+11
Просмотреть файл
@@ -2641,6 +2641,17 @@ if_aclQueryInfo(aclCompiler *cl,
}
break;
}
case RT_NUM_KERNEL_HIDDEN_ARGS: {
size_t hidden_kernargs_size = sizeof(md->numHiddenKernelArgs);
if (!ptr) {
*size = hidden_kernargs_size;
success = true;
} else if (*size >= hidden_kernargs_size) {
memcpy(ptr, &md->numHiddenKernelArgs, hidden_kernargs_size);
success = true;
}
break;
}
}
return (success) ? ACL_SUCCESS : ACL_ERROR;
}
+6
Просмотреть файл
@@ -28,4 +28,10 @@
#define BIF_API_3_0
#endif
#ifndef MAX_HIDDEN_KERNARGS_NUM
#define MAX_HIDDEN_KERNARGS_NUM 6
#else
#error "MAX_HIDDEN_KERNARGS_NUM is already defined"
#endif
#endif // _ACL_DEFS_0_8_H_
+24 -23
Просмотреть файл
@@ -188,29 +188,30 @@ typedef enum _bif_sections_enum_0_8 {
//! An enumeration that defines what are valid queries for aclQueryInfo.
typedef enum _rt_query_types_enum_0_8 {
RT_ABI_VERSION = 0,
RT_DEVICE_NAME = 1,
RT_MEM_SIZES = 2,
RT_GPU_FUNC_CAPS = 3,
RT_GPU_FUNC_ID = 4,
RT_GPU_DEFAULT_ID = 5,
RT_WORK_GROUP_SIZE = 6,
RT_WORK_REGION_SIZE = 7,
RT_ARGUMENT_ARRAY = 8,
RT_GPU_PRINTF_ARRAY = 9,
RT_CPU_BARRIER_NAMES = 10,
RT_DEVICE_ENQUEUE = 11,
RT_KERNEL_INDEX = 12,
RT_KERNEL_NAME = 13,
RT_KERNEL_NAMES = 14,
RT_CONTAINS_LLVMIR = 15,
RT_CONTAINS_OPTIONS = 16,
RT_CONTAINS_BRIG = 17,
RT_CONTAINS_HSAIL = 18,
RT_CONTAINS_ISA = 19,
RT_CONTAINS_LOADER_MAP = 20,
RT_CONTAINS_SPIR = 21,
RT_LAST_TYPE = 22
RT_ABI_VERSION = 0,
RT_DEVICE_NAME = 1,
RT_MEM_SIZES = 2,
RT_GPU_FUNC_CAPS = 3,
RT_GPU_FUNC_ID = 4,
RT_GPU_DEFAULT_ID = 5,
RT_WORK_GROUP_SIZE = 6,
RT_WORK_REGION_SIZE = 7,
RT_ARGUMENT_ARRAY = 8,
RT_GPU_PRINTF_ARRAY = 9,
RT_CPU_BARRIER_NAMES = 10,
RT_DEVICE_ENQUEUE = 11,
RT_KERNEL_INDEX = 12,
RT_KERNEL_NAME = 13,
RT_KERNEL_NAMES = 14,
RT_CONTAINS_LLVMIR = 15,
RT_CONTAINS_OPTIONS = 16,
RT_CONTAINS_BRIG = 17,
RT_CONTAINS_HSAIL = 18,
RT_CONTAINS_ISA = 19,
RT_CONTAINS_LOADER_MAP = 20,
RT_CONTAINS_SPIR = 21,
RT_NUM_KERNEL_HIDDEN_ARGS = 22,
RT_LAST_TYPE = 23
} aclQueryType_0_8;
//! An enumeration for the various GPU capabilities
+1
Просмотреть файл
@@ -119,6 +119,7 @@ typedef struct _acl_metadata_0_8 {
const char *deviceName; // RT_DEVICE_NAME
bool enqueue_kernel; // RT_DEVICE_ENQUEUE
uint32_t kernel_index; // RT_KERNEL_INDEX
uint32_t numHiddenKernelArgs; // RT_NUM_KERNEL_HIDDEN_ARGS
} aclMetadata_0_8;
//! An structure that holds information on the capabilities of the bif device.
+4 -2
Просмотреть файл
@@ -6,8 +6,10 @@
#ifdef __cplusplus
extern "C" {
#endif
#define PRE 0
#define POST 1
namespace bif {
const unsigned PRE = 0;
const unsigned POST = 1;
}
typedef enum {
symOpenclCompilerOptions,
+1 -3
Просмотреть файл
@@ -1,8 +1,7 @@
//
// Copyright (c) 2011 Advanced Micro Devices, Inc. All rights reserved.
//
#include "acl.h"
#include "aclTypes.h"
#include "api/v0_8/aclValidation.h"
#include "libUtils.h"
#include "bif/bifbase.hpp"
@@ -10,7 +9,6 @@
#include "utils/versions.hpp"
#include "utils/options.hpp"
#include "backends/gpu/scwrapper/devState.h"
#include <cassert>
#include <cstring>
#include "bif/bif.hpp"
extern aclBinary* constructBinary(size_t struct_version,
+82 -10
Просмотреть файл
@@ -3,12 +3,16 @@
//
#ifndef _CL_LIB_UTILS_0_8_H_
#define _CL_LIB_UTILS_0_8_H_
#include "v0_8/aclTypes.h"
#include "acl.h"
#include <string>
#include <sstream>
#include <iterator>
#include <cstdlib>
#include <cassert>
#include "library.hpp"
#include "utils/bif_section_labels.hpp"
using namespace bif;
// Utility function to set a flag in option structure
// of the aclDevCaps.
void
@@ -150,6 +154,83 @@ aclutAlloc(const aclCompilerOptions *bin);
FreeFunc
aclutFree(const aclCompilerOptions *bin);
inline std::vector<std::string> splitSpaceSeparatedString(char *str)
{
std::string s(str);
std::stringstream ss(s);
std::istream_iterator<std::string> beg(ss), end;
std::vector<std::string> vec(beg, end);
return vec;
}
// Helper function that returns OpenCL mangled kernel name.
inline std::string
aclutOpenclMangledKernelName(const std::string& kernel_name)
{
const oclBIFSymbolStruct* sym = findBIF30SymStruct(symOpenclKernel);
assert(sym && "symbol not found");
return std::string("&") + sym->str[PRE] + kernel_name + sym->str[POST];
}
// Helper function that returns OpenCL mangled kernel metadata symbol name.
inline std::string
aclutOpenclMangledKernelMetadataName(const std::string& kernel_name)
{
const oclBIFSymbolStruct* sym = findBIF30SymStruct(symOpenclMeta);
assert(sym && "symbol not found");
return sym->str[PRE] + aclutOpenclMangledKernelName(kernel_name) + sym->str[POST];
}
#ifdef WITH_TARGET_HSAIL
// Helper function that updates metadata for all the kernels in binary;
// the updated attribute is the number of hidden kernel arguments.
inline acl_error
aclutUpdateMetadataWithHiddenKernargsNum(aclCompiler* cl, aclBinary* bin, uint32_t num) {
if (num == MAX_HIDDEN_KERNARGS_NUM) {
return ACL_SUCCESS;
}
const oclBIFSymbolStruct* sym = findBIF30SymStruct(symOpenclMeta);
assert(sym && "symbol not found");
aclSections secID = sym->sections[0];
size_t kernelNamesSize = 0;
acl_error error_code = aclQueryInfo(cl, bin, RT_KERNEL_NAMES, NULL, NULL, &kernelNamesSize);
if (error_code != ACL_SUCCESS) {
return error_code;
}
char* kernelNames = new char[kernelNamesSize];
error_code = aclQueryInfo(cl, bin, RT_KERNEL_NAMES, NULL, kernelNames, &kernelNamesSize);
if (error_code != ACL_SUCCESS) {
delete kernelNames;
return error_code;
}
std::vector<std::string> vKernels = splitSpaceSeparatedString(kernelNames);
delete kernelNames;
size_t roSize = 0;
for (auto it = vKernels.begin(); it != vKernels.end(); ++it) {
std::string symbol = aclutOpenclMangledKernelMetadataName(*it);
void* roSec = const_cast<void*>(aclExtractSymbol(cl, bin, &roSize, secID, symbol.c_str(), &error_code));
if (error_code != ACL_SUCCESS) {
return error_code;
}
if (!roSec || roSize == 0) {
error_code = ACL_ELF_ERROR;
return error_code;
}
aclMetadata *md = reinterpret_cast<aclMetadata*>(roSec);
md->numHiddenKernelArgs = num;
error_code = aclRemoveSymbol(cl, bin, secID, symbol.c_str());
if (error_code != ACL_SUCCESS) {
return error_code;
}
error_code = aclInsertSymbol(cl, bin, md, roSize, secID, symbol.c_str());
if (error_code != ACL_SUCCESS) {
return error_code;
}
}
return error_code;
}
#endif
inline bool is64BitTarget(const aclTargetInfo& target)
{
return (target.arch_id == aclX64 ||
@@ -184,15 +265,6 @@ enum scId {
SC_LAST,
};
inline std::vector<std::string> splitSpaceSeparatedString(char *str)
{
std::string s(str);
std::stringstream ss(s);
std::istream_iterator<std::string> beg(ss), end;
std::vector<std::string> vec(beg, end);
return vec;
}
// Helper function that allocates an aligned memory.
inline void*
alignedMalloc(size_t size, size_t alignment)
+10 -2
Просмотреть файл
@@ -604,6 +604,14 @@ Kernel::~Kernel()
delete signature_;
}
std::string
Kernel::openclMangledName(const std::string& name)
{
const oclBIFSymbolStruct* bifSym = findBIF30SymStruct(symOpenclKernel);
assert(bifSym && "symbol not found");
return std::string("&") + bifSym->str[bif::PRE] + name + bifSym->str[bif::POST];
}
void
Memory::saveMapInfo(
const amd::Coord3D origin,
@@ -1246,7 +1254,7 @@ ClBinary::getBIFSymbol(unsigned int symbolID) const
const oclBIFSymbolStruct* symb = findBIFSymbolStruct(BIF20, nSymbols, symID);
assert(symb && "BIF20 symbol with symbolID not found");
if (symb) {
return std::string(symb->str[PRE]) + std::string(symb->str[POST]);
return std::string(symb->str[bif::PRE]) + std::string(symb->str[bif::POST]);
}
break;
}
@@ -1255,7 +1263,7 @@ ClBinary::getBIFSymbol(unsigned int symbolID) const
const oclBIFSymbolStruct* symb = findBIFSymbolStruct(BIF30, nSymbols, symID);
assert(symb && "BIF30 symbol with symbolID not found");
if (symb) {
return std::string(symb->str[PRE]) + std::string(symb->str[POST]);
return std::string(symb->str[bif::PRE]) + std::string(symb->str[bif::POST]);
}
break;
}
+1 -1
Просмотреть файл
@@ -940,7 +940,7 @@ public:
//! Return the build log
const std::string& buildLog() const { return buildLog_; }
static std::string openclMangledName(const std::string& name) { return "&__OpenCL_" + name + "_kernel"; }
static std::string openclMangledName(const std::string& name);
protected:
std::string name_; //!< kernel name
+42 -37
Просмотреть файл
@@ -9,7 +9,6 @@
#include "device/gpu/gpusched.hpp"
#include "platform/commandqueue.hpp"
#include "utils/options.hpp"
#include "utils/bif_section_labels.hpp"
#include "acl.h"
#include "SCShadersR678XXCommon.h"
@@ -3432,7 +3431,7 @@ HSAILKernel::initArgList(const aclArgData* aclArg)
size_t offset = 0;
// Reserved arguments for HSAIL launch
aclArg += ExtraArguments;
aclArg += MaxExtraArgumentsNum;
for (uint i = 0; aclArg->struct_size != 0; i++, aclArg++) {
desc.name_ = arguments_[i]->name_.c_str();
desc.type_ = GetOclType(aclArg);
@@ -3479,7 +3478,7 @@ HSAILKernel::initHsailArgs(const aclArgData* aclArg)
int offset = 0;
// Reserved arguments for HSAIL launch
aclArg += ExtraArguments;
aclArg += MaxExtraArgumentsNum;
// Iterate through the each kernel argument
for (; aclArg->struct_size != 0; aclArg++) {
@@ -3569,7 +3568,8 @@ HSAILKernel::initPrintf(const aclPrintfFmt* aclPrintf)
HSAILKernel::HSAILKernel(std::string name,
HSAILProgram* prog,
std::string compileOptions)
std::string compileOptions,
uint extraArgsNum)
: device::Kernel(name)
, compileOptions_(compileOptions)
, dev_(prog->dev())
@@ -3578,6 +3578,7 @@ HSAILKernel::HSAILKernel(std::string name,
, code_(NULL)
, codeSize_(0)
, hwMetaData_(NULL)
, extraArgumentsNum_(extraArgsNum)
{
hsa_ = true;
}
@@ -3598,14 +3599,16 @@ HSAILKernel::~HSAILKernel()
bool
HSAILKernel::init(amd::hsa::loader::Symbol *sym, bool finalize)
{
acl_error error;
const oclBIFSymbolStruct* bifSym = findBIF30SymStruct(symOpenclKernel);
assert(bifSym && "symbol not found");
std::string openClKernelName(std::string("&") + bifSym->str[PRE] + name() + bifSym->str[POST]);
if (extraArgumentsNum_ > MaxExtraArgumentsNum) {
LogError("Failed to initialize kernel: extra arguments number is bigger than is supported");
return false;
}
acl_error error = ACL_SUCCESS;
std::string openClKernelName = openclMangledName(name());
//compile kernel down to ISA
if (finalize) {
std::string options(compileOptions_.c_str());
flags_.internalKernel_ = (compileOptions_.find("-cl-internal-kernel") !=
flags_.internalKernel_ = (compileOptions_.find("-cl-internal-kernel") !=
std::string::npos) ? true: false;
options.append(" -just-kernel=");
options.append(openClKernelName.c_str());
@@ -3618,7 +3621,7 @@ HSAILKernel::init(amd::hsa::loader::Symbol *sym, bool finalize)
options.c_str(), ACL_TYPE_CG, ACL_TYPE_ISA, NULL);
buildLog_ += aclGetCompilerLog(dev().hsaCompiler());
if (error != ACL_SUCCESS) {
LogError("Failed to finalize");
LogError("Failed to finalize kernel");
return false;
}
}
@@ -3900,35 +3903,37 @@ HSAILKernel::loadArguments(
address aqlStruct = gpu.cb(1)->sysMemCopy();
bool srdResource = false;
// The HLC generates 3 additional arguments for the global offsets
//and fourth argument is the printf_buffer pointer
size_t offsetSize[HSAILKernel::ExtraArguments] = { 0, 0, 0, 0, 0, 0 };
for (uint i = 0; i < sizes.dimensions(); ++i) {
offsetSize[i] = sizes.offset()[i];
if (extraArgumentsNum_ > 0) {
assert(MaxExtraArgumentsNum >= 6 && "MaxExtraArgumentsNum has changed, the below algorithm should be changed accordingly");
size_t extraArgs[MaxExtraArgumentsNum] = { 0, 0, 0, 0, 0, 0 };
// The HLC generates up to 3 additional arguments for the global offsets
for (uint i = 0; i < sizes.dimensions(); ++i) {
extraArgs[i] = sizes.offset()[i];
}
// Check if the kernel may have printf output
if ((printfInfo().size() > 0) &&
// and printf buffer was allocated
(gpu.printfDbgHSA().dbgBuffer() != NULL)) {
// and set the fourth argument as the printf_buffer pointer
extraArgs[3] = static_cast<size_t>(gpu.printfDbgHSA().dbgBuffer()->vmAddress());
memList.push_back(gpu.printfDbgHSA().dbgBuffer());
}
if (dynamicParallelism()) {
// Provide the host parent AQL wrap object to the kernel
AmdAqlWrap* wrap = reinterpret_cast<AmdAqlWrap*>(aqlStruct);
memset(wrap, 0, sizeof(AmdAqlWrap));
wrap->state = AQL_WRAP_BUSY;
ConstBuffer* cb = gpu.constBufs_[1];
cb->uploadDataToHw(sizeof(AmdAqlWrap));
*vmParentWrap = cb->vmAddress() + cb->wrtOffset();
// and set 5th & 6th arguments
extraArgs[4] = vmDefQueue;
extraArgs[5] = *vmParentWrap;
memList.push_back(cb);
}
WriteAqlArg(&aqlArgBuf, extraArgs, sizeof(size_t)*extraArgumentsNum_, sizeof(size_t));
}
if (dynamicParallelism()) {
// Provide the host parent AQL wrap object to the kernel
AmdAqlWrap* wrap = reinterpret_cast<AmdAqlWrap*>(aqlStruct);
memset(wrap, 0, sizeof(AmdAqlWrap));
wrap->state = AQL_WRAP_BUSY;
ConstBuffer* cb = gpu.constBufs_[1];
cb->uploadDataToHw(sizeof(AmdAqlWrap));
*vmParentWrap = cb->vmAddress() + cb->wrtOffset();
offsetSize[4] = vmDefQueue;
offsetSize[5] = *vmParentWrap;
memList.push_back(cb);
}
// Check if the kernel may have printf output
if ((printfInfo().size() > 0) &&
// and printf buffer was allocated
(gpu.printfDbgHSA().dbgBuffer() != NULL)) {
offsetSize[3] = static_cast<size_t>(gpu.printfDbgHSA().dbgBuffer()->vmAddress());
memList.push_back(gpu.printfDbgHSA().dbgBuffer());
}
WriteAqlArg(&aqlArgBuf, offsetSize, sizeof(offsetSize), sizeof(size_t));
const amd::KernelSignature& signature = kernel.signature();
const amd::KernelParameters& kernelParams = kernel.parameters();
+9 -3
Просмотреть файл
@@ -845,12 +845,13 @@ public:
uint numElem_; //!< Number of elements
};
// Global offsets located in the first 3 elements
static const uint ExtraArguments = 6;
// Max number of possible extra (hidden) kernel arguments
static const uint MaxExtraArgumentsNum = 6;
HSAILKernel(std::string name,
HSAILProgram* prog,
std::string compileOptions);
std::string compileOptions,
uint extraArgsNum);
virtual ~HSAILKernel();
@@ -928,6 +929,9 @@ public:
//! Returns the kernel index in the program
uint index() const { return index_; }
//! Returns kernel's extra argument count
uint extraArgumentsNum() const { return extraArgumentsNum_; }
private:
//! Disable copy constructor
HSAILKernel(const HSAILKernel&);
@@ -966,6 +970,8 @@ private:
char* hwMetaData_; //!< SI metadata
uint extraArgumentsNum_; //! Number of extra (hidden) kernel arguments
union Flags {
struct {
uint imageEna_: 1; //!< Kernel uses images
+18 -7
Просмотреть файл
@@ -1974,7 +1974,7 @@ HSAILProgram::getNextCompilationStageFromBinary(amd::option::Options* options) {
break;
const oclBIFSymbolStruct* symbol = findBIF30SymStruct(symOpenclCompilerOptions);
assert(symbol && "symbol not found");
std::string symName = std::string(symbol->str[PRE]) + std::string(symbol->str[POST]);
std::string symName = std::string(symbol->str[bif::PRE]) + std::string(symbol->str[bif::POST]);
size_t symSize = 0;
const void *opts = aclExtractSymbol(dev().hsaCompiler(),
binaryElf_, &symSize, aclCOMMENT, symName.c_str(), &errorCode);
@@ -2095,21 +2095,21 @@ HSAILProgram::linkImpl(amd::option::Options* options)
}
hsa_status_t status = executable_->LoadCodeObject(agent, code_object, NULL);
if (status != HSA_STATUS_SUCCESS) {
buildLog_ += "Error while HSA Loader phase: loading HSA Code Object \n";
buildLog_ += "Error while HSA Loader phase: loading HSA Code Object\n";
return false;
}
}
size_t kernelNamesSize = 0;
errorCode = aclQueryInfo(dev().hsaCompiler(), binaryElf_, RT_KERNEL_NAMES, NULL, NULL, &kernelNamesSize);
if (errorCode != ACL_SUCCESS) {
buildLog_ += "Error while Finalization phase: kernel names query from the ELF failed\n";
buildLog_ += "Error while Finalization phase: Kernel names size querying from the ELF failed\n";
return false;
}
if (!isNull() && kernelNamesSize > 0) {
char* kernelNames = new char[kernelNamesSize];
errorCode = aclQueryInfo(dev().hsaCompiler(), binaryElf_, RT_KERNEL_NAMES, NULL, kernelNames, &kernelNamesSize);
if (errorCode != ACL_SUCCESS) {
buildLog_ += "Error while Finalization phase: kernel's Metadata is corrupted in the ELF\n";
buildLog_ += "Error while Finalization phase: Kernel names querying from the ELF failed\n";
delete kernelNames;
return false;
}
@@ -2117,11 +2117,22 @@ HSAILProgram::linkImpl(amd::option::Options* options)
delete kernelNames;
std::vector<std::string>::iterator it = vKernels.begin();
bool dynamicParallelism = false;
aclMetadata md;
md.numHiddenKernelArgs = 0;
size_t sizeOfnumHiddenKernelArgs = sizeof(md.numHiddenKernelArgs);
for (it; it != vKernels.end(); ++it) {
std::string kernelName = *it;
HSAILKernel *aKernel = new HSAILKernel(kernelName, this, options->origOptionStr + hsailOptions());
std::string kernelName(*it);
std::string openclKernelName = Kernel::openclMangledName(kernelName);
errorCode = aclQueryInfo(dev().hsaCompiler(), binaryElf_, RT_NUM_KERNEL_HIDDEN_ARGS,
openclKernelName.c_str(), &md.numHiddenKernelArgs, &sizeOfnumHiddenKernelArgs);
if (errorCode != ACL_SUCCESS) {
buildLog_ += "Error while Finalization phase: Kernel extra arguments count querying from the ELF failed\n";
return false;
}
HSAILKernel *aKernel = new HSAILKernel(kernelName, this, options->origOptionStr + hsailOptions(),
md.numHiddenKernelArgs);
kernels()[kernelName] = aKernel;
amd::hsa::loader::Symbol *sym = executable_->GetSymbol("", Kernel::openclMangledName(kernelName).c_str(), agent, 0);
amd::hsa::loader::Symbol *sym = executable_->GetSymbol("", openclKernelName.c_str(), agent, 0);
if (!sym) {
LogError("Failed to get kernel ISA code");
return false;
+2 -2
Просмотреть файл
@@ -1872,9 +1872,9 @@ VirtualGPU::submitKernelInternalHSA(
gpuDefQueue->virtualQueue_->vmAddress();
address argum = gpuDefQueue->virtualQueue_->data() + offsArg;
print << "Kernel: " << child->name() << "\n";
static const char* Names[HSAILKernel::ExtraArguments] = {
static const char* Names[HSAILKernel::MaxExtraArgumentsNum] = {
"Offset0: ", "Offset1: ","Offset2: ","PrintfBuf: ", "VqueuePtr: ", "AqlWrap: "};
for (j = 0; j < HSAILKernel::ExtraArguments; ++j) {
for (j = 0; j < child->extraArgumentsNum(); ++j) {
print << "\t" << Names[j] << *(size_t*)argum;
print << "\n";
argum += sizeof(size_t);
+3 -3
Просмотреть файл
@@ -295,14 +295,14 @@ namespace oclhsa {
std::string openClKernelName("&__OpenCL_" + kernelName + "_kernel");
const oclBIFSymbolStruct* isaSymbolStruct = findBIF30SymStruct(symISABinary);
assert(isaSymbolStruct && "symbol not found");
std::string kernelIsaSymbol = isaSymbolStruct->str[PRE] +
openClKernelName + isaSymbolStruct->str[POST];
std::string kernelIsaSymbol = isaSymbolStruct->str[bif::PRE] +
openClKernelName + isaSymbolStruct->str[bif::POST];
const oclBIFSymbolStruct* debugSymbolStruct = findBIF30SymStruct(symDebugInfo);
assert(debugSymbolStruct && "symbol not found");
//For debug symbols, the PRE is used for BRIG debug and the POST is used for
//ISA debug
std::string kernelIsaDebugSymbol = debugSymbolStruct->str[POST] + openClKernelName;
std::string kernelIsaDebugSymbol = debugSymbolStruct->str[bif::POST] + openClKernelName;
//Extract the ISA section
size_t symbolSize;