diff --git a/projects/clr/rocclr/device/devkernel.cpp b/projects/clr/rocclr/device/devkernel.cpp index 2fd4a9ed72..60effb96ed 100644 --- a/projects/clr/rocclr/device/devkernel.cpp +++ b/projects/clr/rocclr/device/devkernel.cpp @@ -573,6 +573,9 @@ static amd_comgr_status_t populateKernelMetaV3(const amd_comgr_metadata_node_t k case KernelField::SymbolName: kernel->SetSymbolName(buf); break; + case KernelField::Kind: + kernel->SetKernelKind(buf); + break; default: return AMD_COMGR_STATUS_ERROR; } @@ -1634,5 +1637,4 @@ void Kernel::InitPrintf(const aclPrintfFmt* aclPrintf) { } } #endif // defined(WITH_COMPILER_LIB) - } diff --git a/projects/clr/rocclr/device/devkernel.hpp b/projects/clr/rocclr/device/devkernel.hpp index 7fa0e8ba61..511e2116b1 100644 --- a/projects/clr/rocclr/device/devkernel.hpp +++ b/projects/clr/rocclr/device/devkernel.hpp @@ -225,7 +225,8 @@ enum class KernelField : uint8_t { NumVGPRs = 11, MaxFlatWorkGroupSize = 12, NumSpilledSGPRs = 13, - NumSpilledVGPRs = 14 + NumSpilledVGPRs = 14, + Kind = 15 }; static const std::map ArgFieldMapV3 = @@ -296,7 +297,8 @@ static const std::map KernelFieldMapV3 = {".vgpr_count", KernelField::NumVGPRs}, {".max_flat_workgroup_size", KernelField::MaxFlatWorkGroupSize}, {".sgpr_spill_count", KernelField::NumSpilledSGPRs}, - {".vgpr_spill_count", KernelField::NumSpilledVGPRs} + {".vgpr_spill_count", KernelField::NumSpilledVGPRs}, + {".kind", KernelField::Kind} }; #endif // defined(USE_COMGR_LIBRARY) @@ -476,6 +478,14 @@ class Kernel : public amd::HeapObject { void SetSymbolName(const std::string& name) { symbolName_ = name; } + void SetKernelKind(const std::string& kind) { + kind_ = (kind == "init") ? Init : ((kind == "fini") ? Fini : Normal); + } + + bool isInitKernel() const { return kind_ == Init; } + + bool isFiniKernel() const { return kind_ == Fini; } + protected: //! Initializes the abstraction layer kernel parameters #if defined(USE_COMGR_LIBRARY) @@ -547,6 +557,14 @@ class Kernel : public amd::HeapObject { Kernel& operator=(const Kernel&); std::unordered_map patchReferences_; //!< Patch table for references + + enum KernelKind{ + Normal = 0, + Init = 1, + Fini = 2 + }; + + KernelKind kind_{Normal}; //!< Kernel kind, is normal unless specified otherwise }; #if defined(USE_COMGR_LIBRARY) diff --git a/projects/clr/rocclr/device/devprogram.cpp b/projects/clr/rocclr/device/devprogram.cpp index 14c2999415..3a40c03ae5 100644 --- a/projects/clr/rocclr/device/devprogram.cpp +++ b/projects/clr/rocclr/device/devprogram.cpp @@ -18,6 +18,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include "platform/command.hpp" +#include "platform/commandqueue.hpp" #include "platform/runtime.hpp" #include "platform/program.hpp" #include "platform/ndrange.hpp" @@ -2952,4 +2954,73 @@ bool Program::getGlobalVarFromCodeObj(std::vector* var_names) const return true; #endif } + +amd::Monitor Program::initFiniLock_("Init Fini Launch Lock", true); + +bool Program::runInitFiniKernel(kernel_kind_t kind) const { + amd::HostQueue* queue = nullptr; + + for (const auto& i : kernels_) { + LogPrintfInfo("For Init/Fini: Kernel Name: %s", i.first.c_str()); + const auto &kernel = i.second; + if ((kernel->isInitKernel() && kind == kernel_kind_t::InitKernel) || + (kernel->isFiniKernel() && kind == kernel_kind_t::FiniKernel)) { + amd::ScopedLock sl(initFiniLock_); + + if (queue == nullptr) { + queue = new amd::HostQueue(device_().context(), device_(), 0); + if (queue == nullptr) { + LogError("Unable to create queue"); + return false; + } + queue->create(); + } + + LogPrintfInfo("%s is marked init/fini", i.first.c_str()); + + size_t globalWorkOffset[3] = {0}; + size_t globalWorkSize[3] = {1, 1, 1}; + size_t localWorkSize[3] = {1, 1, 1}; + amd::NDRangeContainer ndrange(3, globalWorkOffset, globalWorkSize, localWorkSize); + amd::Command::EventWaitList waitList; + + auto symbol = owner_.findSymbol(kernel->name().c_str()); + amd::Kernel* k = new amd::Kernel(owner_, *symbol, kernel->name().c_str()); + if (!k) { + queue->release(); + LogError("Unable to create kernel"); + return false; + } + + amd::NDRangeKernelCommand* kernelCommand = + new amd::NDRangeKernelCommand(*queue, waitList, *k, ndrange); + if (!kernelCommand) { + LogError("Unale to allocate memory to launch kernel"); + k->release(); + queue->release(); + return false; + } + if (CL_SUCCESS != kernelCommand->captureAndValidate()) { + LogError("Kernel Capture and Validate failed"); + kernelCommand->release(); + k->release(); + queue->release(); + return false; + } + kernelCommand->enqueue(); + queue->finish(); + k->release(); + kernelCommand->release(); + } + } + + if (queue != nullptr) { + queue->release(); + } + return true; +} + +bool Program::runInitKernels() { return runInitFiniKernel(kernel_kind_t::InitKernel); } + +bool Program::runFiniKernels() { return runInitFiniKernel(kernel_kind_t::FiniKernel); } } /* namespace device*/ diff --git a/projects/clr/rocclr/device/devprogram.hpp b/projects/clr/rocclr/device/devprogram.hpp index 761ab5de82..f8db998f9a 100644 --- a/projects/clr/rocclr/device/devprogram.hpp +++ b/projects/clr/rocclr/device/devprogram.hpp @@ -116,6 +116,9 @@ class Program : public amd::HeapObject { kernels_t kernels_; //!< The kernel entry points this binary. type_t type_; //!< type of this program + typedef enum { InitKernel = 0, FiniKernel } kernel_kind_t; //!< Kernel kind + bool runInitFiniKernel(kernel_kind_t) const; + protected: union { struct { @@ -158,6 +161,8 @@ class Program : public amd::HeapObject { uint32_t codeObjectVer_; //!< version of code object std::map kernelMetadataMap_; //!< Map of kernel metadata #endif + //! Sanitizer lock - lock when launching init/fini kernels + static amd::Monitor initFiniLock_; public: //! Construct a section. @@ -290,6 +295,12 @@ class Program : public amd::HeapObject { return false; } + //! Run kernels marked with "init" kind metadata + bool runInitKernels(); + + //! Run kernels marked with "fini" kind metadata + bool runFiniKernels(); + protected: //! pre-compile setup bool initBuild(amd::option::Options* options); @@ -385,6 +396,7 @@ class Program : public amd::HeapObject { bool defineUndefinedVars(); private: + //! Compile the device program with LC path bool compileImplLC(const std::string& sourceCode, const std::vector& headers, const char** headerIncludeNames, amd::option::Options* options, diff --git a/projects/clr/rocclr/platform/program.cpp b/projects/clr/rocclr/platform/program.cpp index 663e4b6289..3e57c99bc5 100644 --- a/projects/clr/rocclr/platform/program.cpp +++ b/projects/clr/rocclr/platform/program.cpp @@ -89,6 +89,15 @@ Program::~Program() { //! @todo Make sure we have destroyed all CPU specific objects } +void Program::unload() { + for (const auto& it : devicePrograms_) { + device::Program& devProgram = *(it.second); + if (!devProgram.runFiniKernels()) { + LogError("Error running fini kernels for devprogram"); + } + } +} + const Symbol* Program::findSymbol(const char* kernelName) const { // avoid seg. fault if the program has not built yet if (symbolTable_ == NULL) { @@ -621,6 +630,11 @@ bool Program::load(const std::vector& devices) { if (!devProgram.load()) { return false; } + + // Run kernels marked with init + if (!devProgram.runInitKernels()) { + return false; + } } return true; diff --git a/projects/clr/rocclr/platform/program.hpp b/projects/clr/rocclr/platform/program.hpp index f32d403ddd..f6976d04c5 100644 --- a/projects/clr/rocclr/platform/program.hpp +++ b/projects/clr/rocclr/platform/program.hpp @@ -235,6 +235,9 @@ class Program : public RuntimeObject { void setVarInfoCallBack(VarInfoCallback callback) { varcallback = callback; } + + //! Actions to perform during program unload + void unload(); }; /*! @}