diff --git a/hipamd/src/hip_code_object.cpp b/hipamd/src/hip_code_object.cpp index 047ce5b301..224f0549d1 100644 --- a/hipamd/src/hip_code_object.cpp +++ b/hipamd/src/hip_code_object.cpp @@ -1379,13 +1379,10 @@ const char* StatCO::getStatFuncName(const void* hostFunction) { } hipError_t StatCO::getStatFunc(hipFunction_t* hfunc, const void* hostFunction, int deviceId) { - amd::ScopedLock lock(sclock_); - const auto it = functions_.find(hostFunction); if (it == functions_.end()) { return hipErrorInvalidSymbol; } - return it->second->getStatFunc(hfunc, deviceId); } diff --git a/hipamd/src/hip_global.cpp b/hipamd/src/hip_global.cpp index 124b91090b..67f007efc0 100644 --- a/hipamd/src/hip_global.cpp +++ b/hipamd/src/hip_global.cpp @@ -90,7 +90,7 @@ DeviceVar::DeviceVar(std::string name, DeviceVar::~DeviceVar() { // device_ptr_ is being removed and its amd:Memory obj is being released/deleted during - // ihipFree in hip::StatCO::removeFatBinary however in DynCO path, it seems to bypass + // ihipFree in hip::StatCO::removeFatBinary however in DynCO path, it seems to bypass // ihipFree and hence it needs to be removed+released here. In order to avoid issue with // StatCO, It is better to check if mem obj is found. if (amd::MemObjMap::FindMemObj(device_ptr_) != nullptr && amd_mem_obj_ != nullptr) { @@ -155,21 +155,24 @@ bool Function::isValidDynFunc(const void* hfunc) { } hipError_t Function::getStatFunc(hipFunction_t* hfunc, int deviceId) { - guarantee(modules_ != nullptr, "Module not initialized"); - - if (dFunc_.size() != g_devices.size()) { + if (deviceId >= dFunc_.size()) { return hipErrorNoBinaryForGpu; } - + if (dFunc_[deviceId] != nullptr) { + *hfunc = dFunc_[deviceId]->asHipFunction(); + return hipSuccess; + } + amd::ScopedLock lock(fc_lock_); + // Check for the compiled kernel again, to make sure only one thread does compilation + if (dFunc_[deviceId] != nullptr) { + *hfunc = dFunc_[deviceId]->asHipFunction(); + return hipSuccess; + } hipModule_t hmod = nullptr; IHIP_RETURN_ONFAIL((*modules_)->BuildProgram(deviceId)); IHIP_RETURN_ONFAIL((*modules_)->GetModule(deviceId, &hmod)); - - if (dFunc_[deviceId] == nullptr) { - dFunc_[deviceId] = new DeviceFunc(name_, hmod); - } + dFunc_[deviceId] = new DeviceFunc(name_, hmod); *hfunc = dFunc_[deviceId]->asHipFunction(); - return hipSuccess; } diff --git a/hipamd/src/hip_global.hpp b/hipamd/src/hip_global.hpp index 22ba881809..65ecff6640 100644 --- a/hipamd/src/hip_global.hpp +++ b/hipamd/src/hip_global.hpp @@ -93,9 +93,10 @@ public: const std::string& name() const { return name_; } private: - std::vector dFunc_; //DeviceFuncObj per Device - std::string name_; //name of the func(not unique identifier) - FatBinaryInfo** modules_; // static module where it is referenced + std::vector dFunc_; //!< DeviceFuncObj per Device + std::string name_; //!< name of the func(not unique identifier) + FatBinaryInfo** modules_; //!< static module where it is referenced + amd::Monitor fc_lock_{true}; //!< Lock for the function create }; class Var {