diff --git a/rocclr/device/devprogram.cpp b/rocclr/device/devprogram.cpp index 02e74b6e0a..3dfd455478 100644 --- a/rocclr/device/devprogram.cpp +++ b/rocclr/device/devprogram.cpp @@ -78,6 +78,8 @@ Program::Program(amd::Device& device, amd::Program& owner) : device_(device), owner_(owner), type_(TYPE_NONE), + initKernels_(), + finiKernels_(), flags_(0), clBinary_(nullptr), llvmBinary_(), @@ -119,6 +121,8 @@ Program::~Program() { // ================================================================================================ void Program::clear() { + initKernels_.clear(); + finiKernels_.clear(); // Destroy all device kernels for (const auto& it : kernels_) { delete it.second; @@ -2132,6 +2136,16 @@ bool Program::initClBinary(const char* binaryIn, size_t size, amd::Os::FileDesc return clBinary()->setBinary(bin, sz, (decryptedBin != nullptr), fdesc, foffset, uri); } +// ================================================================================================ +void Program::addKernel(Kernel* k) { + kernels_[k->name()] = k; + if (k->isInitKernel()) { + initKernels_.push_back(k); + } else if (k->isFiniKernel()) { + finiKernels_.push_back(k); + } +} + // ================================================================================================ bool Program::setBinary(const char* binaryIn, size_t size, const device::Program* same_dev_prog, amd::Os::FileDesc fdesc, size_t foffset, std::string uri) { @@ -2915,60 +2929,56 @@ bool Program::getGlobalVarFromCodeObj(std::vector* var_names) const // Init Fini Launch Lock amd::Monitor Program::initFiniLock_(true); -bool Program::runInitFiniKernel(kernel_kind_t kind) const { +bool Program::runInitFiniKernel(const std::vector& kernels) const { amd::HostQueue* queue = nullptr; - for (const auto& i : kernels_) { - const auto &kernel = i.second; - if ((kernel->isInitKernel() && kind == kernel_kind_t::InitKernel) || - (kernel->isFiniKernel() && kind == kernel_kind_t::FiniKernel)) { - amd::ScopedLock sl(initFiniLock_); + for (const auto& kernel: kernels) { + amd::ScopedLock sl(initFiniLock_); + if (queue == nullptr) { + queue = new amd::HostQueue(device_().context(), device_(), 0); if (queue == nullptr) { - queue = new amd::HostQueue(device_().context(), device_(), 0); - if (queue == nullptr) { - LogError("Unable to create queue"); - return false; - } - queue->create(); - } - - LogPrintfInfo("%s is marked init/fini", i.first.c_str()); - - size_t globalWorkOffset[3] = {0}; - size_t globalWorkSize[3] = {1, 1, 1}; - size_t localWorkSize[3] = {1, 1, 1}; - amd::NDRangeContainer ndrange(3, globalWorkOffset, globalWorkSize, localWorkSize); - amd::Command::EventWaitList waitList; - - auto symbol = owner_.findSymbol(kernel->name().c_str()); - amd::Kernel* k = new amd::Kernel(owner_, *symbol, kernel->name().c_str()); - if (!k) { - queue->release(); - LogError("Unable to create kernel"); + LogError("Unable to create queue"); return false; } - - amd::NDRangeKernelCommand* kernelCommand = - new amd::NDRangeKernelCommand(*queue, waitList, *k, ndrange); - if (!kernelCommand) { - LogError("Unale to allocate memory to launch kernel"); - k->release(); - queue->release(); - return false; - } - if (CL_SUCCESS != kernelCommand->captureAndValidate()) { - LogError("Kernel Capture and Validate failed"); - kernelCommand->release(); - k->release(); - queue->release(); - return false; - } - kernelCommand->enqueue(); - queue->finish(); - k->release(); - kernelCommand->release(); + queue->create(); } + + LogPrintfInfo("%s is marked init/fini", kernel->name().c_str()); + + size_t globalWorkOffset[3] = {0}; + size_t globalWorkSize[3] = {1, 1, 1}; + size_t localWorkSize[3] = {1, 1, 1}; + amd::NDRangeContainer ndrange(3, globalWorkOffset, globalWorkSize, localWorkSize); + amd::Command::EventWaitList waitList; + + auto symbol = owner_.findSymbol(kernel->name().c_str()); + amd::Kernel* k = new amd::Kernel(owner_, *symbol, kernel->name().c_str()); + if (!k) { + queue->release(); + LogError("Unable to create kernel"); + return false; + } + + amd::NDRangeKernelCommand* kernelCommand = + new amd::NDRangeKernelCommand(*queue, waitList, *k, ndrange); + if (!kernelCommand) { + LogError("Unale to allocate memory to launch kernel"); + k->release(); + queue->release(); + return false; + } + if (CL_SUCCESS != kernelCommand->captureAndValidate()) { + LogError("Kernel Capture and Validate failed"); + kernelCommand->release(); + k->release(); + queue->release(); + return false; + } + kernelCommand->enqueue(); + queue->finish(); + k->release(); + kernelCommand->release(); } if (queue != nullptr) { @@ -2977,7 +2987,7 @@ bool Program::runInitFiniKernel(kernel_kind_t kind) const { return true; } -bool Program::runInitKernels() { return runInitFiniKernel(kernel_kind_t::InitKernel); } +bool Program::runInitKernels() { return runInitFiniKernel(initKernels_); } -bool Program::runFiniKernels() { return runInitFiniKernel(kernel_kind_t::FiniKernel); } +bool Program::runFiniKernels() { return runInitFiniKernel(finiKernels_); } } /* namespace amd::device*/ diff --git a/rocclr/device/devprogram.hpp b/rocclr/device/devprogram.hpp index 81822ba9b6..437ce80c46 100644 --- a/rocclr/device/devprogram.hpp +++ b/rocclr/device/devprogram.hpp @@ -115,8 +115,10 @@ class Program : public amd::HeapObject { kernels_t kernels_; //!< The kernel entry points this binary. type_t type_; //!< type of this program - typedef enum { InitKernel = 0, FiniKernel } kernel_kind_t; //!< Kernel kind - bool runInitFiniKernel(kernel_kind_t) const; + std::vector initKernels_; //!< Init kernels + std::vector finiKernels_; //!< Fini kernels + + bool runInitFiniKernel(const std::vector& kernels) const; #if defined(WITH_COMPILER_LIB) static amd::Monitor buildLock_; //!< Global build lock for HSAIL which isn't thread-safe @@ -224,6 +226,9 @@ class Program : public amd::HeapObject { const kernels_t& kernels() const { return kernels_; } kernels_t& kernels() { return kernels_; } + //! Add kernel to the map and init fini kernel vector. + void addKernel(Kernel* k); + //! Return the binary image. inline const binary_t binary() const; inline binary_t binary(); diff --git a/rocclr/device/pal/palprogram.cpp b/rocclr/device/pal/palprogram.cpp index 30b505281a..f79b658f24 100644 --- a/rocclr/device/pal/palprogram.cpp +++ b/rocclr/device/pal/palprogram.cpp @@ -296,7 +296,7 @@ bool HSAILProgram::createKernels(void* binary, size_t binSize, bool useUniformWo std::string kernelName(it); HSAILKernel* aKernel = new HSAILKernel(kernelName, this, internalKernel); - kernels()[kernelName] = aKernel; + addKernel(aKernel); if (!aKernel->init()) { buildLog_ += "Error: Kernel initialization failed.\n"; @@ -764,7 +764,7 @@ bool LightningProgram::createKernels(void* binary, size_t binSize, bool useUnifo buildLog_ += "[ROC][Kernel] Could not get Code Prop Meta Data \n"; return false; } - kernels()[kernelName] = kernel; + addKernel(kernel); if (codeObjectVer() < 5) { kernel->setUniformWorkGroupSize(useUniformWorkGroupSize); diff --git a/rocclr/device/rocm/rocprogram.cpp b/rocclr/device/rocm/rocprogram.cpp index fa62bf2908..faa90ea989 100644 --- a/rocclr/device/rocm/rocprogram.cpp +++ b/rocclr/device/rocm/rocprogram.cpp @@ -278,7 +278,7 @@ bool LightningProgram::createKernels(void* binary, size_t binSize, bool useUnifo aKernel->setUniformWorkGroupSize(useUniformWorkGroupSize); } aKernel->setInternalKernelFlag(internalKernel); - kernels()[kernelName] = aKernel; + addKernel(aKernel); } return true; } diff --git a/rocclr/platform/program.cpp b/rocclr/platform/program.cpp index cfae3161f0..31561d2d52 100644 --- a/rocclr/platform/program.cpp +++ b/rocclr/platform/program.cpp @@ -93,6 +93,9 @@ Program::~Program() { void Program::unload() { for (const auto& it : devicePrograms_) { device::Program& devProgram = *(it.second); + if (!devProgram.isCodeObjectLoaded()) { + continue; + } if (!devProgram.runFiniKernels()) { LogError("Error running fini kernels for devprogram"); }