SWDEV-277566 - Separate code object loading from building

Change-Id: I87b8178f55e8ef23762dfe11fab71665ba680f00
Этот коммит содержится в:
Jason Tang
2021-03-26 15:29:05 -04:00
коммит произвёл Jason Tang
родитель 509f528980
Коммит 211ba25b4e
13 изменённых файлов: 283 добавлений и 168 удалений
+4
Просмотреть файл
@@ -366,6 +366,10 @@ bool Device::BlitProgram::create(amd::Device* device, const char* extraKernels,
DevLogPrintfError("Build failed for Kernel: %s \n", kernels.c_str());
return false;
}
if (!program_->load()) {
DevLogPrintfError("Could not load the kernels: %s \n", kernels.c_str());
return false;
}
return true;
}
-4
Просмотреть файл
@@ -91,10 +91,6 @@ class Device;
struct KernelParameterDescriptor;
struct Coord3D;
namespace option {
class Options;
} // namespace option
//! @note: the defines match hip values
enum MemoryAdvice : uint32_t {
SetReadMostly = 1, ///< Data will mostly be read and only occassionally be written to
+63 -17
Просмотреть файл
@@ -1150,13 +1150,16 @@ bool Program::linkImplLC(amd::option::Options* options) {
case FILE_TYPE_ISA: {
amd::Comgr::destroy_data_set(inputs);
binary_t isaBinary = binary();
finfo_t isaFdesc = BinaryFd();
if (GPU_DUMP_CODE_OBJECT) {
dumpCodeObject(std::string{(const char*)isaBinary.first, isaBinary.second});
}
return setKernels(options, const_cast<void *>(isaBinary.first), isaBinary.second,
isaFdesc.first, isaFdesc.second, BinaryURI());
break;
if (!createKernels(const_cast<void *>(isaBinary.first), isaBinary.second,
options->oVariables->UniformWorkGroupSize, internal_)) {
buildLog_ += "Error: Cannot create kernels.\n";
return false;
}
return true;
}
default:
buildLog_ += "Error while Codegen phase: the binary is incomplete \n";
@@ -1284,8 +1287,9 @@ bool Program::linkImplLC(amd::option::Options* options) {
// Destroy original memory with executable after compilation
delete[] executable;
if (!setKernels(options, const_cast<void*>(clBinary()->data().first),
clBinary()->data().second)) {
if (!createKernels(const_cast<void*>(clBinary()->data().first), clBinary()->data().second,
options->oVariables->UniformWorkGroupSize, internal_)) {
buildLog_ += "Error: Cannot create kernels.\n";
return false;
}
@@ -1352,17 +1356,17 @@ bool Program::linkImplHSAIL(amd::option::Options* options) {
fin_options.append(" -sc-xnack-iommu");
}
if (device().settings().enableWave32Mode_) {
fin_options.append(" -force-wave-size-32");
}
if (device().settings().enableWave32Mode_) {
fin_options.append(" -force-wave-size-32");
}
if (device().settings().enableWgpMode_) {
fin_options.append(" -force-wgp-mode");
}
if (device().settings().enableWgpMode_) {
fin_options.append(" -force-wgp-mode");
}
if (device().settings().hsailExplicitXnack_) {
fin_options.append(" -xnack");
}
if (device().settings().hsailExplicitXnack_) {
fin_options.append(" -xnack");
}
errorCode = amd::Hsail::Compile(device().compiler(), binaryElf_, fin_options.c_str(), ACL_TYPE_CG,
ACL_TYPE_ISA, logFunction);
@@ -1382,8 +1386,8 @@ bool Program::linkImplHSAIL(amd::option::Options* options) {
}
// Call the device layer to setup all available kernels on the actual device
if (!setKernels(options, binary, binSize)) {
buildLog_ += "Error: Cannot set kernel \n";
if (!createKernels(binary, binSize, options->oVariables->UniformWorkGroupSize, internal_)) {
buildLog_ += "Error: Cannot create kernel.\n";
return false;
}
@@ -1772,6 +1776,48 @@ int32_t Program::build(const std::string& sourceCode, const char* origOptions,
return buildError();
}
// ================================================================================================
bool Program::loadHSAIL() {
#if defined(WITH_COMPILER_LIB)
acl_error errorCode;
size_t binSize;
void* bin = const_cast<void*>(amd::Hsail::ExtractSection(device().compiler(), binaryElf_,
&binSize, aclTEXT, &errorCode));
if (errorCode != ACL_SUCCESS) {
LogPrintfError("Error: cannot extract ISA from compiled binary.\n");
return false;
}
// Call the device layer to setup all available kernels on the actual device
return setKernels(bin, binSize);
#else
return false;
#endif
}
// ================================================================================================
bool Program::loadLC() {
#if defined(USE_COMGR_LIBRARY)
return setKernels(const_cast<void*>(binary().first), binary().second,
BinaryFd().first, BinaryFd().second, BinaryURI());
#else
return false;
#endif
}
// ================================================================================================
bool Program::load() {
bool ret;
if (isLC()) {
ret = loadLC();
} else {
ret = loadHSAIL();
}
if (ret) {
coLoaded_ = 1;
}
return ret;
}
// ================================================================================================
std::vector<std::string> Program::ProcessOptions(amd::option::Options* options) {
std::vector<std::string> optionsVec;
+20 -5
Просмотреть файл
@@ -123,7 +123,8 @@ class Program : public amd::HeapObject {
uint32_t internal_ : 1; //!< Internal blit program
uint32_t isLC_ : 1; //!< LC was used for the program compilation
uint32_t hasGlobalStores_ : 1; //!< Program has writable program scope variables
uint32_t isHIP_ : 1; //!< Determine if the program is for HIP
uint32_t isHIP_ : 1; //!< Determine if the program is for HIP
uint32_t coLoaded_ : 1; //!< Has the code objected been loaded
};
uint32_t flags_; //!< Program flags
};
@@ -178,15 +179,18 @@ class Program : public amd::HeapObject {
const char** headerIncludeNames, const char* origOptions,
amd::option::Options* options);
//! Builds the device program.
//! Link the device program.
int32_t link(const std::vector<Program*>& inputPrograms, const char* origOptions,
amd::option::Options* options);
//! Builds the device program.
//! Build the device program.
int32_t build(const std::string& sourceCode, const char* origOptions,
amd::option::Options* options, const std::vector<std::string>& preCompiledHeaders);
//! Returns the device object, associated with this program.
//! Load the device program.
bool load();
//! Return the device object, associated with this program.
const amd::Device& device() const { return device_(); }
//! Return the compiler options used to build the program.
@@ -248,6 +252,9 @@ class Program : public amd::HeapObject {
//! Global variables are a part of the code segment
bool hasGlobalStores() const { return hasGlobalStores_; }
//! Return TRUE if the program has been loaded
bool isCodeObjectLoaded() const { return coLoaded_; }
#if defined(USE_COMGR_LIBRARY)
amd_comgr_metadata_node_t metadata() const { return metadata_; }
@@ -324,9 +331,11 @@ class Program : public amd::HeapObject {
//! return target info
virtual const aclTargetInfo& info() = 0;
#endif
virtual bool createKernels(void* binary, size_t binSize, bool useUniformWorkGroupSize,
bool internalKernel) { return true; }
virtual bool setKernels(
amd::option::Options* options, void* binary, size_t binSize,
void* binary, size_t binSize,
amd::Os::FileDesc fdesc = amd::Os::FDescInit(), size_t foffset = 0,
std::string uri = std::string()) { return true; }
@@ -397,6 +406,12 @@ class Program : public amd::HeapObject {
//! Link the device program with HSAIL path
bool linkImplHSAIL(amd::option::Options* options);
//! Load the device program with LC path
bool loadLC();
//! Load the device program with HSAIL path
bool loadHSAIL();
#if defined(USE_COMGR_LIBRARY)
//! Dump the log data object to the build log, if a log data object is present
void extractBuildLog(amd_comgr_data_set_t dataSet);
+29 -36
Просмотреть файл
@@ -85,15 +85,26 @@ bool HSAILKernel::setKernelCode(amd::hsa::loader::Symbol* sym, amd_kernel_code_t
return true;
}
bool HSAILKernel::aqlCreateHWInfo(amd::hsa::loader::Symbol* sym) {
bool HSAILKernel::aqlCreateHWInfo() {
hsa_agent_t agent = {amd::Device::toHandle(&(device()))};
std::string openclKernelName = device::Kernel::openclMangledName(name());
amd::hsa::loader::Symbol* sym = prog().getSymbol(openclKernelName.c_str(), &agent);
if (!sym) {
LogPrintfError("Error: Getting kernel ISA code symbol %s from AMD HSA Code Object failed.\n",
openclKernelName.c_str());
return false;
}
amd_kernel_code_t* akc = &akc_;
if (!setKernelCode(sym, akc)) {
LogPrintfError("Error: setKernelCode() failed.\n");
return false;
}
if (!sym->GetInfo(HSA_EXT_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT_SIZE,
reinterpret_cast<void*>(&codeSize_))) {
LogPrintfError("Error: sym->GetInfo() failed.\n");
return false;
}
@@ -107,45 +118,30 @@ bool HSAILKernel::aqlCreateHWInfo(amd::hsa::loader::Symbol* sym) {
return true;
}
HSAILKernel::HSAILKernel(std::string name, HSAILProgram* prog, std::string compileOptions)
HSAILKernel::HSAILKernel(std::string name, HSAILProgram* prog, bool internalKernel)
: device::Kernel(prog->device(), name, *prog),
compileOptions_(compileOptions),
index_(0),
code_(0),
codeSize_(0) {
flags_.hsa_ = true;
flags_.internalKernel_ = internalKernel;
}
HSAILKernel::~HSAILKernel() {}
bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
bool HSAILKernel::postLoad() {
#if defined(WITH_COMPILER_LIB)
if (!aqlCreateHWInfo()) {
return false;
}
#endif
return true;
}
bool HSAILKernel::init() {
#if defined(WITH_COMPILER_LIB)
acl_error error = ACL_SUCCESS;
std::string openClKernelName = openclMangledName(name());
flags_.internalKernel_ =
(compileOptions_.find("-cl-internal-kernel") != std::string::npos) ? true : false;
// compile kernel down to ISA
if (finalize) {
std::string options(compileOptions_.c_str());
options.append(" -just-kernel=");
options.append(openClKernelName.c_str());
// Append an option so that we can selectively enable a SCOption on CZ
// whenever IOMMUv2 is enabled.
if (palNullDevice().settings().svmFineGrainSystem_) {
options.append(" -sc-xnack-iommu");
}
error = amd::Hsail::Compile(palNullDevice().compiler(), prog().binaryElf(), options.c_str(), ACL_TYPE_CG,
ACL_TYPE_ISA, nullptr);
buildLog_ += amd::Hsail::GetCompilerLog(palNullDevice().compiler());
if (error != ACL_SUCCESS) {
LogError("Failed to finalize kernel");
return false;
}
}
if (!aqlCreateHWInfo(sym)) {
return false;
}
// Pull out metadata from the ELF
size_t sizeOfArgList;
@@ -437,13 +433,10 @@ const LightningProgram& LightningKernel::prog() const {
#if defined(USE_COMGR_LIBRARY)
bool LightningKernel::init() {
flags_.internalKernel_ =
(compileOptions_.find("-cl-internal-kernel") != std::string::npos) ? true : false;
if (!GetAttrCodePropMetadata()) {
return false;
}
return GetAttrCodePropMetadata();
}
bool LightningKernel::postLoad() {
if (codeObjectVer() == 2) {
symbolName_ = name();
}
@@ -451,7 +444,7 @@ bool LightningKernel::init() {
// Copy codeobject of this kernel from the program CPU segment
hsa_agent_t agent = {amd::Device::toHandle(&(device()))};
auto sym = prog().GetSymbol(symbolName().c_str(), &agent);
auto sym = prog().getSymbol(symbolName().c_str(), &agent);
if (!setKernelCode(sym, &akc_)) {
return false;
@@ -465,7 +458,7 @@ bool LightningKernel::init() {
amd::hsa::loader::Symbol* rth_symbol;
// Get the runtime handle symbol GPU address
rth_symbol = prog().GetSymbol(RuntimeHandle().c_str(), &agent);
rth_symbol = prog().getSymbol(RuntimeHandle().c_str(), &agent);
uint64_t symbol_address;
rth_symbol->GetInfo(HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, &symbol_address);
+12 -11
Просмотреть файл
@@ -60,13 +60,15 @@ class LightningProgram;
*/
class HSAILKernel : public device::Kernel {
public:
HSAILKernel(std::string name, HSAILProgram* prog, std::string compileOptions);
HSAILKernel(std::string name, HSAILProgram* prog, bool internalKernel);
virtual ~HSAILKernel();
//! Initializes the metadata required for this kernel,
//! finalizes the kernel if needed
bool init(amd::hsa::loader::Symbol* sym, bool finalize = false);
bool init();
//! Setup after code object loading
bool postLoad();
//! Returns PAL, possibly null, device object, associated with this kernel.
const NullDevice& palNullDevice() const { return reinterpret_cast<const NullDevice&>(dev_); }
@@ -122,7 +124,7 @@ class HSAILKernel : public device::Kernel {
protected:
//! Creates AQL kernel HW info
bool aqlCreateHWInfo(amd::hsa::loader::Symbol* sym);
bool aqlCreateHWInfo();
//! Get the kernel code and copy the code object from the program CPU segment
bool setKernelCode(amd::hsa::loader::Symbol* sym, amd_kernel_code_t* akc);
@@ -131,7 +133,6 @@ class HSAILKernel : public device::Kernel {
void setWorkGroupInfo(const uint32_t privateSegmentSize, const uint32_t groupSegmentSize,
const uint16_t numSGPRs, const uint16_t numVGPRs);
std::string compileOptions_; //!< compile used for finalizing this kernel
amd_kernel_code_t akc_; //!< AQL kernel code on CPU
uint index_; //!< Kernel index in the program
@@ -141,18 +142,18 @@ class HSAILKernel : public device::Kernel {
class LightningKernel : public HSAILKernel {
public:
LightningKernel(const std::string& name, HSAILProgram* prog, const std::string& compileOptions)
: HSAILKernel(name, prog, compileOptions) {}
LightningKernel(const std::string& name, HSAILProgram* prog, bool internalKernel)
: HSAILKernel(name, prog, internalKernel) {}
//! Returns Lightning program associated with this kernel
const LightningProgram& prog() const;
//! Initializes the metadata required for this kernel,
bool init(amd::hsa::loader::Symbol* symbol);
#if defined(USE_COMGR_LIBRARY)
//! Initializes the metadata required for this kernel,
//! Initializes the metadata required for this kernel
bool init();
//! Setup after code object loading
bool postLoad();
#endif
};
+87 -68
Просмотреть файл
@@ -243,7 +243,44 @@ inline static std::vector<std::string> splitSpaceSeparatedString(char* str) {
return vec;
}
bool HSAILProgram::setKernels(amd::option::Options* options, void* binary, size_t binSize,
bool HSAILProgram::createKernels(void* binary, size_t binSize, bool useUniformWorkGroupSize,
bool internalKernel) {
size_t kernelNamesSize = 0;
acl_error errorCode = amd::Hsail::QueryInfo(palNullDevice().compiler(), binaryElf_,
RT_KERNEL_NAMES, nullptr, nullptr, &kernelNamesSize);
if (errorCode != ACL_SUCCESS) {
buildLog_ += "Error: Querying of kernel names size from the binary failed.\n";
return false;
}
if (kernelNamesSize > 0) {
char* kernelNames = new char[kernelNamesSize];
errorCode = amd::Hsail::QueryInfo(palNullDevice().compiler(), binaryElf_, RT_KERNEL_NAMES,
nullptr, kernelNames, &kernelNamesSize);
if (errorCode != ACL_SUCCESS) {
buildLog_ += "Error: Querying of kernel names from the binary failed.\n";
delete[] kernelNames;
return false;
}
std::vector<std::string> vKernels = splitSpaceSeparatedString(kernelNames);
delete[] kernelNames;
for (const auto& it : vKernels) {
std::string kernelName(it);
HSAILKernel* aKernel = new HSAILKernel(kernelName, this, internalKernel);
kernels()[kernelName] = aKernel;
if (!aKernel->init()) {
buildLog_ += "Error: Kernel initialization failed.\n";
return false;
}
aKernel->setUniformWorkGroupSize(useUniformWorkGroupSize);
}
}
return true;
}
bool HSAILProgram::setKernels(void* binary, size_t binSize,
amd::Os::FileDesc fdesc, size_t foffset, std::string uri) {
#if defined(WITH_COMPILER_LIB)
// Stop compilation if it is an offline device - PAL runtime does not
@@ -275,56 +312,23 @@ bool HSAILProgram::setKernels(amd::option::Options* options, void* binary, size_
return false;
}
size_t kernelNamesSize = 0;
acl_error errorCode = amd::Hsail::QueryInfo(palNullDevice().compiler(), binaryElf_, RT_KERNEL_NAMES, nullptr,
nullptr, &kernelNamesSize);
if (errorCode != ACL_SUCCESS) {
buildLog_ += "Error: Querying of kernel names size from the binary failed.\n";
return false;
bool dynamicParallelism = false;
for (auto& kit : kernels()) {
HSAILKernel* aKernel = static_cast<HSAILKernel*>(kit.second);
if (!aKernel->postLoad()) {
return false;
}
dynamicParallelism |= aKernel->dynamicParallelism();
// Find max scratch regs used in the program. It's used for scratch buffer preallocation
// with dynamic parallelism, since runtime doesn't know which child kernel will be called
maxScratchRegs_ =
std::max(static_cast<uint>(aKernel->workGroupInfo()->scratchRegs_), maxScratchRegs_);
maxVgprs_ = std::max(static_cast<uint>(aKernel->workGroupInfo()->usedVGPRs_), maxVgprs_);
}
if (kernelNamesSize > 0) {
char* kernelNames = new char[kernelNamesSize];
errorCode = amd::Hsail::QueryInfo(palNullDevice().compiler(), binaryElf_, RT_KERNEL_NAMES, nullptr, kernelNames,
&kernelNamesSize);
if (errorCode != ACL_SUCCESS) {
buildLog_ += "Error: Querying of kernel names from the binary failed.\n";
delete[] kernelNames;
return false;
}
std::vector<std::string> vKernels = splitSpaceSeparatedString(kernelNames);
delete[] kernelNames;
bool dynamicParallelism = false;
for (const auto& it : vKernels) {
std::string kernelName(it);
std::string openclKernelName = device::Kernel::openclMangledName(kernelName);
HSAILKernel* aKernel =
new HSAILKernel(kernelName, this, options->origOptionStr + ProcessOptionsFlattened(options));
kernels()[kernelName] = aKernel;
amd::hsa::loader::Symbol* sym = executable_->GetSymbol(openclKernelName.c_str(), &agent);
if (!sym) {
buildLog_ += "Error: Getting kernel ISA code symbol '" + openclKernelName +
"' from AMD HSA Code Object failed. Kernel initialization failed.\n";
return false;
}
if (!aKernel->init(sym, false)) {
buildLog_ += "Error: Kernel '" + openclKernelName + "' initialization failed.\n";
return false;
}
buildLog_ += aKernel->buildLog();
aKernel->setUniformWorkGroupSize(options->oVariables->UniformWorkGroupSize);
dynamicParallelism |= aKernel->dynamicParallelism();
// Find max scratch regs used in the program. It's used for scratch buffer preallocation
// with dynamic parallelism, since runtime doesn't know which child kernel will be called
maxScratchRegs_ =
std::max(static_cast<uint>(aKernel->workGroupInfo()->scratchRegs_), maxScratchRegs_);
maxVgprs_ = std::max(static_cast<uint>(aKernel->workGroupInfo()->usedVGPRs_), maxVgprs_);
}
// Allocate kernel table for device enqueuing
if (!isNull() && dynamicParallelism && !allocKernelTable()) {
return false;
}
// Allocate kernel table for device enqueuing
if (!isNull() && dynamicParallelism && !allocKernelTable()) {
return false;
}
DestroySegmentCpuAccess();
@@ -731,7 +735,34 @@ bool LightningProgram::createBinary(amd::option::Options* options) {
return true;
}
bool LightningProgram::setKernels(amd::option::Options* options, void* binary, size_t binSize,
bool LightningProgram::createKernels(void* binary, size_t binSize, bool useUniformWorkGroupSize,
bool internalKernel) {
#if defined(USE_COMGR_LIBRARY)
// Find the size of global variables from the binary
if (!FindGlobalVarSize(binary, binSize)) {
buildLog_ += "Error: Cannot Find Global Var Sizes\n";
return false;
}
for (const auto& kernelMeta : kernelMetadataMap_) {
auto kernelName = kernelMeta.first;
auto kernel = new LightningKernel(kernelName, this, internalKernel);
if (kernel == nullptr) {
return false;
}
if (!kernel->init()) {
buildLog_ += "[ROC][Kernel] Could not get Code Prop Meta Data \n";
return false;
}
kernels()[kernelName] = kernel;
kernel->setUniformWorkGroupSize(useUniformWorkGroupSize);
}
#endif
return true;
}
bool LightningProgram::setKernels(void* binary, size_t binSize,
amd::Os::FileDesc fdesc, size_t foffset, std::string uri) {
#if defined(USE_COMGR_LIBRARY)
// Stop compilation if it is an offline device - PAL runtime does not
@@ -742,7 +773,7 @@ bool LightningProgram::setKernels(amd::option::Options* options, void* binary, s
executable_ = loader_->CreateExecutable(HSA_PROFILE_FULL, nullptr);
if (executable_ == nullptr) {
buildLog_ += "Error: Executable for AMD HSA Code Object isn't created.\n";
LogPrintfError("Error: Executable for AMD HSA Code Object isn't created.\n");
return false;
}
@@ -753,33 +784,21 @@ bool LightningProgram::setKernels(amd::option::Options* options, void* binary, s
hsa_status_t status = executable_->LoadCodeObject(agent, code_object, nullptr);
if (status != HSA_STATUS_SUCCESS) {
buildLog_ += "Error: AMD HSA Code Object loading failed.\n";
LogPrintfError("Error: AMD HSA Code Object loading failed.\n");
return false;
}
status = executable_->Freeze(nullptr);
if (status != HSA_STATUS_SUCCESS) {
buildLog_ += "Error: Freezing the executable failed: ";
LogPrintfError("Error: Freezing the executable failed.\n");
return false;
}
// Find the size of global variables from the binary
if (!FindGlobalVarSize(binary, binSize)) {
return false;
}
for (const auto& kernelMeta : kernelMetadataMap_) {
auto kernelName = kernelMeta.first;
auto kernel =
new LightningKernel(kernelName, this, options->origOptionStr + ProcessOptionsFlattened(options));
kernels()[kernelName] = kernel;
if (!kernel->init()) {
for (auto& kit : kernels()) {
LightningKernel* kernel = static_cast<LightningKernel*>(kit.second);
if (!kernel->postLoad()) {
return false;
}
kernel->setUniformWorkGroupSize(options->oVariables->UniformWorkGroupSize);
// Find max scratch regs used in the program. It's used for scratch buffer preallocation
// with dynamic parallelism, since runtime doesn't know which child kernel will be called
maxScratchRegs_ =
+8 -3
Просмотреть файл
@@ -196,7 +196,7 @@ class HSAILProgram : public device::Program {
}
//! Get symbol by name
amd::hsa::loader::Symbol* GetSymbol(const char* symbol_name, const hsa_agent_t* agent) const {
amd::hsa::loader::Symbol* getSymbol(const char* symbol_name, const hsa_agent_t* agent) const {
return executable_->GetSymbol(symbol_name, agent);
}
@@ -208,8 +208,10 @@ class HSAILProgram : public device::Program {
#if defined(WITH_COMPILER_LIB)
virtual const aclTargetInfo& info();
#endif
virtual bool createKernels(void* binary, size_t binSize, bool useUniformWorkGroupSize,
bool internalKernel) override;
virtual bool setKernels(amd::option::Options* options, void* binary, size_t binSize,
virtual bool setKernels(void* binary, size_t binSize,
amd::Os::FileDesc fdesc = amd::Os::FDescInit(), size_t foffset = 0,
std::string uri = std::string()) override;
@@ -267,7 +269,10 @@ class LightningProgram : public HSAILProgram {
virtual ~LightningProgram() {}
protected:
virtual bool setKernels(amd::option::Options* options, void* binary, size_t binSize,
virtual bool createKernels(void* binary, size_t binSize, bool useUniformWorkGroupSize,
bool internalKernel) override;
virtual bool setKernels(void* binary, size_t binSize,
amd::Os::FileDesc fdesc = amd::Os::FDescInit(), size_t foffset = 0,
std::string uri = std::string()) override;
+3 -4
Просмотреть файл
@@ -45,11 +45,10 @@ Kernel::Kernel(std::string name, Program* prog)
#if defined(USE_COMGR_LIBRARY)
bool LightningKernel::init() {
if (!GetAttrCodePropMetadata()) {
LogError("[ROC][Kernel] Could not get Code Prop Meta Data \n");
return false;
}
return GetAttrCodePropMetadata();
}
bool LightningKernel::postLoad() {
// Set the kernel symbol name and size/alignment based on the kernel metadata
// NOTE: kernel name is used to get the kernel code handle in V2,
// but kernel symbol name is used in V3
+3
Просмотреть файл
@@ -79,6 +79,9 @@ class LightningKernel : public roc::Kernel {
//! Initializes the metadata required for this kernel
virtual bool init() final;
//! Setup after code object loading
bool postLoad();
};
} // namespace roc
+26 -17
Просмотреть файл
@@ -219,7 +219,7 @@ bool HSAILProgram::saveBinaryAndSetType(type_t type) {
return true;
}
bool HSAILProgram::setKernels(amd::option::Options* options, void* binary, size_t binSize,
bool HSAILProgram::setKernels(void* binary, size_t binSize,
amd::Os::FileDesc fdesc, size_t foffset, std::string uri) {
return true;
}
@@ -263,7 +263,28 @@ bool LightningProgram::saveBinaryAndSetType(type_t type, void* rawBinary, size_t
return true;
}
bool LightningProgram::setKernels(amd::option::Options* options, void* binary, size_t binSize,
bool LightningProgram::createKernels(void* binary, size_t binSize, bool useUniformWorkGroupSize,
bool internalKernel) {
// Find the size of global variables from the binary
if (!FindGlobalVarSize(binary, binSize)) {
buildLog_ += "Error: Cannot Find Global Var Sizes\n";
return false;
}
for (const auto &kernelMeta : kernelMetadataMap_) {
const std::string kernelName = kernelMeta.first;
Kernel* aKernel = new roc::LightningKernel(kernelName, this);
if (!aKernel->init()) {
return false;
}
aKernel->setUniformWorkGroupSize(useUniformWorkGroupSize);
aKernel->setInternalKernelFlag(internalKernel);
kernels()[kernelName] = aKernel;
}
return true;
}
bool LightningProgram::setKernels(void* binary, size_t binSize,
amd::Os::FileDesc fdesc, size_t foffset, std::string uri) {
#if defined(USE_COMGR_LIBRARY)
// Stop compilation if it is an offline device - HSA runtime does not
@@ -272,13 +293,6 @@ bool LightningProgram::setKernels(amd::option::Options* options, void* binary, s
return true;
}
// Find the size of global variables from the binary
if (!FindGlobalVarSize(binary, binSize)) {
buildLog_ += "Error: Cannot Global Var Sizes ";
buildLog_ += "\n";
return false;
}
hsa_agent_t agent = rocDevice().getBackendDevice();
hsa_status_t status;
@@ -320,16 +334,11 @@ bool LightningProgram::setKernels(amd::option::Options* options, void* binary, s
return false;
}
for (const auto &kernelMeta : kernelMetadataMap_) {
const std::string kernelName = kernelMeta.first;
Kernel* aKernel = new roc::LightningKernel(kernelName, this);
if (!aKernel->init()) {
for (auto& kit : kernels()) {
LightningKernel* kernel = static_cast<LightningKernel*>(kit.second);
if (!kernel->postLoad()) {
return false;
}
aKernel->setUniformWorkGroupSize(options->oVariables->UniformWorkGroupSize);
aKernel->setInternalKernelFlag(compileOptions_.find("-cl-internal-kernel") !=
std::string::npos);
kernels()[kernelName] = aKernel;
}
#endif // defined(USE_COMGR_LIBRARY)
return true;
+6 -3
Просмотреть файл
@@ -94,7 +94,7 @@ class HSAILProgram : public roc::Program {
protected:
bool createBinary(amd::option::Options* options) override { return true; }
virtual bool setKernels(amd::option::Options* options, void* binary, size_t binSize,
virtual bool setKernels(void* binary, size_t binSize,
amd::Os::FileDesc fdesc = amd::Os::FDescInit(), size_t foffset = 0,
std::string uri = std::string()) override;
@@ -117,9 +117,12 @@ protected:
private:
bool saveBinaryAndSetType(type_t type, void* rawBinary, size_t size);
bool setKernels(amd::option::Options* options, void* binary, size_t binSize,
bool createKernels(void* binary, size_t binSize, bool useUniformWorkGroupSize,
bool internalKernel) override final;
bool setKernels(void* binary, size_t binSize,
amd::Os::FileDesc fdesc = amd::Os::FDescInit(), size_t foffset = 0,
std::string uri = std::string()) final;
std::string uri = std::string()) override final;
};
/*@}*/} // namespace roc
+22
Просмотреть файл
@@ -622,6 +622,28 @@ int32_t Program::build(const std::vector<Device*>& devices, const char* options,
}
bool Program::load(const std::vector<Device*>& devices) {
ScopedLock sl(buildLock_);
for (const auto& it : devicePrograms_) {
const Device& device = *(it.first);
// If devices is specified, only load code object for those devices
if (std::find(devices.begin(), devices.end(), &device) != devices.end()) {
continue;
}
device::Program& devProgram = *(it.second);
// Only load the code object once
if (devProgram.isCodeObjectLoaded()) {
continue;
}
if (!devProgram.load()) {
return false;
}
}
return true;
}