2
0

P4 to Git Change 1772785 by cpaquot@cpaquot-ocl-lc-lnx on 2019/04/22 14:31:18

SWDEV-144570 - [HIP] Lazy build kernels to avoid overfilling dev memory.

Affected files ...

... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#28 edit
... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#27 edit


[ROCm/hip commit: 80fb280643]
Este cometimento está contido em:
foreman
2019-04-22 15:19:24 -04:00
ascendente b0a31b621c
cometimento 17f613bfde
2 ficheiros modificados com 82 adições e 74 eliminações
+15 -8
Ver ficheiro
@@ -102,23 +102,30 @@ class PlatformState {
public:
struct RegisteredVar {
public:
RegisteredVar(): hostVar_(nullptr), size_(0), devicePtr_(nullptr), amd_mem_obj_(nullptr) {}
RegisteredVar(char* hostVar, size_t size, hipDeviceptr_t devicePtr, amd::Memory* amd_mem_obj);
RegisteredVar(): size_(0), devicePtr_(nullptr), amd_mem_obj_(nullptr) {}
~RegisteredVar() {}
hipDeviceptr_t getdeviceptr() const { return devicePtr_; };
size_t getvarsize() const { return size_; };
private:
char* hostVar_; // Variable name in host code
size_t size_; // Size of the variable
hipDeviceptr_t devicePtr_; //Device Memory Address of the variable.
amd::Memory* amd_mem_obj_;
};
struct DeviceFunction {
std::string deviceName;
std::vector< std::pair< hipModule_t, bool > >* modules;
std::vector<hipFunction_t> functions;
};
struct DeviceVar {
std::string hostVar;
std::vector< std::pair< hipModule_t, bool > >* modules;
std::vector<RegisteredVar> rvars;
};
private:
std::unordered_map<const void*, std::vector<hipFunction_t> > functions_;
std::unordered_map<const void*, std::vector<RegisteredVar> > vars_;
std::unordered_map<const void*, DeviceFunction > functions_;
std::unordered_map<const void*, DeviceVar > vars_;
static PlatformState* platform_;
@@ -129,8 +136,8 @@ public:
return *platform_;
}
void registerVar(const char* hostvar, const std::vector<RegisteredVar>& rvar);
void registerFunction(const void* hostFunction, const std::vector<hipFunction_t>& funcs);
void registerVar(const void* hostvar, const DeviceVar& var);
void registerFunction(const void* hostFunction, const DeviceFunction& func);
hipFunction_t getFunc(const void* hostFunction, int deviceId);
bool getGlobalVar(const void* hostVar, int deviceId, hipDeviceptr_t* dev_ptr,
+67 -66
Ver ficheiro
@@ -64,7 +64,7 @@ hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes,
hipError_t ihipCreateGlobalVarObj(const char* name, hipModule_t hmod, amd::Memory** amd_mem_obj,
hipDeviceptr_t* dptr, size_t* bytes);
extern "C" std::vector<hipModule_t>* __hipRegisterFatBinary(const void* data)
extern "C" std::vector< std::pair<hipModule_t, bool> >* __hipRegisterFatBinary(const void* data)
{
HIP_INIT();
@@ -80,7 +80,7 @@ extern "C" std::vector<hipModule_t>* __hipRegisterFatBinary(const void* data)
return nullptr;
}
auto programs = new std::vector<hipModule_t>{g_devices.size()};
auto programs = new std::vector< std::pair<hipModule_t, bool> >{g_devices.size()};
const auto obheader = reinterpret_cast<const __ClangOffloadBundleHeader*>(fbwrapper->binary);
const auto* desc = &obheader->desc[0];
@@ -115,9 +115,8 @@ extern "C" std::vector<hipModule_t>* __hipRegisterFatBinary(const void* data)
if (program == nullptr) {
return nullptr;
}
if (CL_SUCCESS == program->addDeviceProgram(*ctx->devices()[0], image, size) &&
CL_SUCCESS == program->build(ctx->devices(), nullptr, nullptr, nullptr)) {
programs->at(dev) = reinterpret_cast<hipModule_t>(as_cl(program));
if (CL_SUCCESS == program->addDeviceProgram(*ctx->devices()[0], image, size)) {
programs->at(dev) = std::make_pair(reinterpret_cast<hipModule_t>(as_cl(program)) , false);
}
}
}
@@ -125,35 +124,45 @@ extern "C" std::vector<hipModule_t>* __hipRegisterFatBinary(const void* data)
return programs;
}
PlatformState::RegisteredVar::RegisteredVar(char* hostVar, size_t size, hipDeviceptr_t devicePtr,
amd::Memory* amd_mem_obj) : hostVar_(hostVar),
size_(size), devicePtr_(devicePtr),
amd_mem_obj_(amd_mem_obj) {
/* Add the memory to the MemObjMap */
amd::MemObjMap::AddMemObj(devicePtr_, amd_mem_obj_);
}
void PlatformState::registerVar(const char* hostvar,
const std::vector<RegisteredVar>& rvar) {
void PlatformState::registerVar(const void* hostvar,
const DeviceVar& rvar) {
amd::ScopedLock lock(lock_);
vars_.insert(std::make_pair(hostvar, rvar));
}
void PlatformState::registerFunction(const void* hostFunction,
const std::vector<hipFunction_t>& funcs) {
const DeviceFunction& func) {
amd::ScopedLock lock(lock_);
functions_.insert(std::make_pair(hostFunction, funcs));
functions_.insert(std::make_pair(hostFunction, func));
}
hipFunction_t PlatformState::getFunc(const void* hostFunction, int deviceId) {
amd::ScopedLock lock(lock_);
const auto it = functions_.find(hostFunction);
if (it != functions_.cend()) {
return it->second[deviceId];
} else {
return nullptr;
PlatformState::DeviceFunction& devFunc = it->second;
if (devFunc.functions[deviceId] == 0) {
hipModule_t module = (*devFunc.modules)[deviceId].first;
if (!(*devFunc.modules)[deviceId].second) {
amd::Program* program = as_amd(reinterpret_cast<cl_program>(module));
if (CL_SUCCESS != program->build(g_devices[deviceId]->devices(), nullptr, nullptr, nullptr)) {
return nullptr;
}
(*devFunc.modules)[deviceId].second = true;
}
hipFunction_t function = nullptr;
if (hipSuccess == hipModuleGetFunction(&function, module, devFunc.deviceName.c_str()) &&
function != nullptr) {
devFunc.functions[deviceId] = function;
}
else {
// tprintf(DB_FB, "__hipRegisterFunction cannot find kernel %s for"
// " device %d\n", deviceName, deviceId);
}
}
return devFunc.functions[deviceId];
}
return nullptr;
}
bool PlatformState::getGlobalVar(const void* hostVar, int deviceId,
@@ -161,8 +170,32 @@ bool PlatformState::getGlobalVar(const void* hostVar, int deviceId,
amd::ScopedLock lock(lock_);
const auto it = vars_.find(hostVar);
if (it != vars_.cend()) {
*size_ptr = it->second[deviceId].getvarsize();
*dev_ptr = it->second[deviceId].getdeviceptr();
DeviceVar& dvar = it->second;
if (dvar.rvars[deviceId].getdeviceptr() == nullptr) {
size_t sym_size = 0;
hipDeviceptr_t device_ptr = nullptr;
amd::Memory* amd_mem_obj = nullptr;
if (!(*dvar.modules)[deviceId].second) {
amd::Program* program = as_amd(reinterpret_cast<cl_program>((*dvar.modules)[deviceId].first));
if (CL_SUCCESS != program->build(g_devices[deviceId]->devices(), nullptr, nullptr, nullptr)) {
return false;
}
(*dvar.modules)[deviceId].second = true;
}
if((hipSuccess == ihipCreateGlobalVarObj(dvar.hostVar.c_str(), (*dvar.modules)[deviceId].first,
&amd_mem_obj, &device_ptr, &sym_size))
&& (device_ptr != nullptr)) {
dvar.rvars[deviceId].size_ = sym_size;
dvar.rvars[deviceId].devicePtr_ = device_ptr;
dvar.rvars[deviceId].amd_mem_obj_ = amd_mem_obj;
amd::MemObjMap::AddMemObj(device_ptr, amd_mem_obj);
} else {
LogError("[HIP] __hipRegisterVar cannot find kernel for device \n");
}
}
*size_ptr = dvar.rvars[deviceId].getvarsize();
*dev_ptr = dvar.rvars[deviceId].getdeviceptr();
return true;
} else {
return false;
@@ -190,7 +223,7 @@ void PlatformState::popExec(ihipExec_t& exec) {
}
extern "C" void __hipRegisterFunction(
std::vector<hipModule_t>* modules,
std::vector<std::pair<hipModule_t,bool> >* modules,
const void* hostFunction,
char* deviceFunction,
const char* deviceName,
@@ -203,21 +236,9 @@ extern "C" void __hipRegisterFunction(
{
HIP_INIT();
std::vector<hipFunction_t> functions{g_devices.size()};
PlatformState::DeviceFunction func{ std::string{deviceName}, modules, std::vector<hipFunction_t>{ g_devices.size() }};
for (size_t deviceId=0; deviceId < g_devices.size(); ++deviceId) {
hipFunction_t function = nullptr;
if (hipSuccess == hipModuleGetFunction(&function, modules->at(deviceId), deviceName) &&
function != nullptr) {
functions[deviceId] = function;
}
else {
// tprintf(DB_FB, "__hipRegisterFunction cannot find kernel %s for"
// " device %d\n", deviceName, deviceId);
}
}
PlatformState::instance().registerFunction(hostFunction, functions);
PlatformState::instance().registerFunction(hostFunction, func);
}
// Registers a device-side global variable.
@@ -226,7 +247,7 @@ extern "C" void __hipRegisterFunction(
// track of the value of the device side global variable between kernel
// executions.
extern "C" void __hipRegisterVar(
std::vector<hipModule_t>* modules, // The device modules containing code object
std::vector<std::pair<hipModule_t,bool> >* modules, // The device modules containing code object
char* var, // The shadow variable in host code
char* hostVar, // Variable name in host code
char* deviceVar, // Variable name in device code
@@ -237,38 +258,19 @@ extern "C" void __hipRegisterVar(
{
HIP_INIT();
size_t sym_size = 0;
std::vector<PlatformState::RegisteredVar> global_vars{g_devices.size()};
PlatformState::DeviceVar dvar{ std::string{ hostVar }, modules,
std::vector<PlatformState::RegisteredVar>{ g_devices.size() } };
for (size_t deviceId=0; deviceId < g_devices.size(); ++deviceId) {
hipDeviceptr_t device_ptr = nullptr;
amd::Memory* amd_mem_obj = nullptr;
if((hipSuccess == ihipCreateGlobalVarObj(hostVar, modules->at(deviceId), &amd_mem_obj,
&device_ptr, &sym_size))
&& (device_ptr != nullptr)) {
if (static_cast<size_t>(size) != sym_size) {
LogError("[OCL] Size Mismatch with the HSA Symbol retrieved \n");
}
global_vars[deviceId] = PlatformState::RegisteredVar(hostVar, sym_size, device_ptr, amd_mem_obj);
} else {
LogError("[OCL] __hipRegisterVar cannot find kernel for device \n");
}
}
PlatformState::instance().registerVar(hostVar, global_vars);
PlatformState::instance().registerVar(hostVar, dvar);
}
extern "C" void __hipUnregisterFatBinary(std::vector<hipModule_t>* modules)
extern "C" void __hipUnregisterFatBinary(std::vector< std::pair<hipModule_t, bool> >* modules)
{
HIP_INIT();
std::for_each(modules->begin(), modules->end(), [](hipModule_t module){
if (module != nullptr) {
as_amd(reinterpret_cast<cl_program>(module))->release();
std::for_each(modules->begin(), modules->end(), [](std::pair<hipModule_t, bool> module){
if (module.first != nullptr) {
as_amd(reinterpret_cast<cl_program>(module.first))->release();
}
});
delete modules;
@@ -355,7 +357,6 @@ hipError_t ihipCreateGlobalVarObj(const char* name, hipModule_t hmod, amd::Memor
if (dev_program == nullptr) {
HIP_RETURN(hipErrorUnknown);
}
/* Find the global Symbols */
if(!dev_program->createGlobalVarObj(amd_mem_obj, dptr, bytes, name)) {
HIP_RETURN(hipErrorUnknown);