From 17f613bfde1cce51948884757d82eabdca32b7d0 Mon Sep 17 00:00:00 2001 From: foreman Date: Mon, 22 Apr 2019 15:19:24 -0400 Subject: [PATCH] P4 to Git Change 1772785 by cpaquot@cpaquot-ocl-lc-lnx on 2019/04/22 14:31:18 SWDEV-144570 - [HIP] Lazy build kernels to avoid overfilling dev memory. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#28 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#27 edit [ROCm/hip commit: 80fb2806434cc4c5d36c38e85687e8942f0a4e6e] --- projects/hip/api/hip/hip_internal.hpp | 23 +++-- projects/hip/api/hip/hip_platform.cpp | 133 +++++++++++++------------- 2 files changed, 82 insertions(+), 74 deletions(-) diff --git a/projects/hip/api/hip/hip_internal.hpp b/projects/hip/api/hip/hip_internal.hpp index ff5dc1673b..9606cd47d1 100644 --- a/projects/hip/api/hip/hip_internal.hpp +++ b/projects/hip/api/hip/hip_internal.hpp @@ -102,23 +102,30 @@ class PlatformState { public: struct RegisteredVar { public: - RegisteredVar(): hostVar_(nullptr), size_(0), devicePtr_(nullptr), amd_mem_obj_(nullptr) {} - RegisteredVar(char* hostVar, size_t size, hipDeviceptr_t devicePtr, amd::Memory* amd_mem_obj); + RegisteredVar(): size_(0), devicePtr_(nullptr), amd_mem_obj_(nullptr) {} ~RegisteredVar() {} hipDeviceptr_t getdeviceptr() const { return devicePtr_; }; size_t getvarsize() const { return size_; }; - private: - char* hostVar_; // Variable name in host code size_t size_; // Size of the variable hipDeviceptr_t devicePtr_; //Device Memory Address of the variable. amd::Memory* amd_mem_obj_; }; + struct DeviceFunction { + std::string deviceName; + std::vector< std::pair< hipModule_t, bool > >* modules; + std::vector functions; + }; + struct DeviceVar { + std::string hostVar; + std::vector< std::pair< hipModule_t, bool > >* modules; + std::vector rvars; + }; private: - std::unordered_map > functions_; - std::unordered_map > vars_; + std::unordered_map functions_; + std::unordered_map vars_; static PlatformState* platform_; @@ -129,8 +136,8 @@ public: return *platform_; } - void registerVar(const char* hostvar, const std::vector& rvar); - void registerFunction(const void* hostFunction, const std::vector& funcs); + void registerVar(const void* hostvar, const DeviceVar& var); + void registerFunction(const void* hostFunction, const DeviceFunction& func); hipFunction_t getFunc(const void* hostFunction, int deviceId); bool getGlobalVar(const void* hostVar, int deviceId, hipDeviceptr_t* dev_ptr, diff --git a/projects/hip/api/hip/hip_platform.cpp b/projects/hip/api/hip/hip_platform.cpp index 36a79cf35f..54a8806d54 100644 --- a/projects/hip/api/hip/hip_platform.cpp +++ b/projects/hip/api/hip/hip_platform.cpp @@ -64,7 +64,7 @@ hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipError_t ihipCreateGlobalVarObj(const char* name, hipModule_t hmod, amd::Memory** amd_mem_obj, hipDeviceptr_t* dptr, size_t* bytes); -extern "C" std::vector* __hipRegisterFatBinary(const void* data) +extern "C" std::vector< std::pair >* __hipRegisterFatBinary(const void* data) { HIP_INIT(); @@ -80,7 +80,7 @@ extern "C" std::vector* __hipRegisterFatBinary(const void* data) return nullptr; } - auto programs = new std::vector{g_devices.size()}; + auto programs = new std::vector< std::pair >{g_devices.size()}; const auto obheader = reinterpret_cast(fbwrapper->binary); const auto* desc = &obheader->desc[0]; @@ -115,9 +115,8 @@ extern "C" std::vector* __hipRegisterFatBinary(const void* data) if (program == nullptr) { return nullptr; } - if (CL_SUCCESS == program->addDeviceProgram(*ctx->devices()[0], image, size) && - CL_SUCCESS == program->build(ctx->devices(), nullptr, nullptr, nullptr)) { - programs->at(dev) = reinterpret_cast(as_cl(program)); + if (CL_SUCCESS == program->addDeviceProgram(*ctx->devices()[0], image, size)) { + programs->at(dev) = std::make_pair(reinterpret_cast(as_cl(program)) , false); } } } @@ -125,35 +124,45 @@ extern "C" std::vector* __hipRegisterFatBinary(const void* data) return programs; } -PlatformState::RegisteredVar::RegisteredVar(char* hostVar, size_t size, hipDeviceptr_t devicePtr, - amd::Memory* amd_mem_obj) : hostVar_(hostVar), - size_(size), devicePtr_(devicePtr), - amd_mem_obj_(amd_mem_obj) { - - /* Add the memory to the MemObjMap */ - amd::MemObjMap::AddMemObj(devicePtr_, amd_mem_obj_); -} - -void PlatformState::registerVar(const char* hostvar, - const std::vector& rvar) { +void PlatformState::registerVar(const void* hostvar, + const DeviceVar& rvar) { amd::ScopedLock lock(lock_); vars_.insert(std::make_pair(hostvar, rvar)); } void PlatformState::registerFunction(const void* hostFunction, - const std::vector& funcs) { + const DeviceFunction& func) { amd::ScopedLock lock(lock_); - functions_.insert(std::make_pair(hostFunction, funcs)); + functions_.insert(std::make_pair(hostFunction, func)); } hipFunction_t PlatformState::getFunc(const void* hostFunction, int deviceId) { amd::ScopedLock lock(lock_); const auto it = functions_.find(hostFunction); if (it != functions_.cend()) { - return it->second[deviceId]; - } else { - return nullptr; + PlatformState::DeviceFunction& devFunc = it->second; + if (devFunc.functions[deviceId] == 0) { + hipModule_t module = (*devFunc.modules)[deviceId].first; + if (!(*devFunc.modules)[deviceId].second) { + amd::Program* program = as_amd(reinterpret_cast(module)); + if (CL_SUCCESS != program->build(g_devices[deviceId]->devices(), nullptr, nullptr, nullptr)) { + return nullptr; + } + (*devFunc.modules)[deviceId].second = true; + } + hipFunction_t function = nullptr; + if (hipSuccess == hipModuleGetFunction(&function, module, devFunc.deviceName.c_str()) && + function != nullptr) { + devFunc.functions[deviceId] = function; + } + else { + // tprintf(DB_FB, "__hipRegisterFunction cannot find kernel %s for" + // " device %d\n", deviceName, deviceId); + } + } + return devFunc.functions[deviceId]; } + return nullptr; } bool PlatformState::getGlobalVar(const void* hostVar, int deviceId, @@ -161,8 +170,32 @@ bool PlatformState::getGlobalVar(const void* hostVar, int deviceId, amd::ScopedLock lock(lock_); const auto it = vars_.find(hostVar); if (it != vars_.cend()) { - *size_ptr = it->second[deviceId].getvarsize(); - *dev_ptr = it->second[deviceId].getdeviceptr(); + DeviceVar& dvar = it->second; + if (dvar.rvars[deviceId].getdeviceptr() == nullptr) { + size_t sym_size = 0; + hipDeviceptr_t device_ptr = nullptr; + amd::Memory* amd_mem_obj = nullptr; + + if (!(*dvar.modules)[deviceId].second) { + amd::Program* program = as_amd(reinterpret_cast((*dvar.modules)[deviceId].first)); + if (CL_SUCCESS != program->build(g_devices[deviceId]->devices(), nullptr, nullptr, nullptr)) { + return false; + } + (*dvar.modules)[deviceId].second = true; + } + if((hipSuccess == ihipCreateGlobalVarObj(dvar.hostVar.c_str(), (*dvar.modules)[deviceId].first, + &amd_mem_obj, &device_ptr, &sym_size)) + && (device_ptr != nullptr)) { + dvar.rvars[deviceId].size_ = sym_size; + dvar.rvars[deviceId].devicePtr_ = device_ptr; + dvar.rvars[deviceId].amd_mem_obj_ = amd_mem_obj; + amd::MemObjMap::AddMemObj(device_ptr, amd_mem_obj); + } else { + LogError("[HIP] __hipRegisterVar cannot find kernel for device \n"); + } + } + *size_ptr = dvar.rvars[deviceId].getvarsize(); + *dev_ptr = dvar.rvars[deviceId].getdeviceptr(); return true; } else { return false; @@ -190,7 +223,7 @@ void PlatformState::popExec(ihipExec_t& exec) { } extern "C" void __hipRegisterFunction( - std::vector* modules, + std::vector >* modules, const void* hostFunction, char* deviceFunction, const char* deviceName, @@ -203,21 +236,9 @@ extern "C" void __hipRegisterFunction( { HIP_INIT(); - std::vector functions{g_devices.size()}; + PlatformState::DeviceFunction func{ std::string{deviceName}, modules, std::vector{ g_devices.size() }}; - for (size_t deviceId=0; deviceId < g_devices.size(); ++deviceId) { - hipFunction_t function = nullptr; - if (hipSuccess == hipModuleGetFunction(&function, modules->at(deviceId), deviceName) && - function != nullptr) { - functions[deviceId] = function; - } - else { - // tprintf(DB_FB, "__hipRegisterFunction cannot find kernel %s for" - // " device %d\n", deviceName, deviceId); - } - } - - PlatformState::instance().registerFunction(hostFunction, functions); + PlatformState::instance().registerFunction(hostFunction, func); } // Registers a device-side global variable. @@ -226,7 +247,7 @@ extern "C" void __hipRegisterFunction( // track of the value of the device side global variable between kernel // executions. extern "C" void __hipRegisterVar( - std::vector* modules, // The device modules containing code object + std::vector >* modules, // The device modules containing code object char* var, // The shadow variable in host code char* hostVar, // Variable name in host code char* deviceVar, // Variable name in device code @@ -237,38 +258,19 @@ extern "C" void __hipRegisterVar( { HIP_INIT(); - size_t sym_size = 0; - std::vector global_vars{g_devices.size()}; + PlatformState::DeviceVar dvar{ std::string{ hostVar }, modules, + std::vector{ g_devices.size() } }; - for (size_t deviceId=0; deviceId < g_devices.size(); ++deviceId) { - hipDeviceptr_t device_ptr = nullptr; - amd::Memory* amd_mem_obj = nullptr; - - if((hipSuccess == ihipCreateGlobalVarObj(hostVar, modules->at(deviceId), &amd_mem_obj, - &device_ptr, &sym_size)) - && (device_ptr != nullptr)) { - - if (static_cast(size) != sym_size) { - LogError("[OCL] Size Mismatch with the HSA Symbol retrieved \n"); - } - - global_vars[deviceId] = PlatformState::RegisteredVar(hostVar, sym_size, device_ptr, amd_mem_obj); - - } else { - LogError("[OCL] __hipRegisterVar cannot find kernel for device \n"); - } - } - - PlatformState::instance().registerVar(hostVar, global_vars); + PlatformState::instance().registerVar(hostVar, dvar); } -extern "C" void __hipUnregisterFatBinary(std::vector* modules) +extern "C" void __hipUnregisterFatBinary(std::vector< std::pair >* modules) { HIP_INIT(); - std::for_each(modules->begin(), modules->end(), [](hipModule_t module){ - if (module != nullptr) { - as_amd(reinterpret_cast(module))->release(); + std::for_each(modules->begin(), modules->end(), [](std::pair module){ + if (module.first != nullptr) { + as_amd(reinterpret_cast(module.first))->release(); } }); delete modules; @@ -355,7 +357,6 @@ hipError_t ihipCreateGlobalVarObj(const char* name, hipModule_t hmod, amd::Memor if (dev_program == nullptr) { HIP_RETURN(hipErrorUnknown); } - /* Find the global Symbols */ if(!dev_program->createGlobalVarObj(amd_mem_obj, dptr, bytes, name)) { HIP_RETURN(hipErrorUnknown);