P4 to Git Change 1738965 by cpaquot@cpaquot-ocl-lc-lnx on 2019/02/01 13:17:10

SWDEV-145570 - [HIP] Handle multi device setup

Affected files ...

... //depot/stg/opencl/drivers/opencl/api/hip/hip_device_runtime.cpp#13 edit
... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#21 edit
... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#21 edit
Tento commit je obsažen v:
foreman
2019-02-01 13:43:50 -05:00
rodič f024fdb770
revize d7f1cd1fcf
3 změnil soubory, kde provedl 68 přidání a 45 odebrání
+14 -8
Zobrazit soubor
@@ -391,21 +391,27 @@ hipError_t hipDeviceSynchronize ( void ) {
HIP_RETURN(hipSuccess);
}
int ihipGetDevice() {
for (unsigned int i = 0; i < g_devices.size(); i++) {
if (g_devices[i] == hip::getCurrentContext()) {
return i;
}
}
assert(0 && "Current device not found?!");
return -1;
}
hipError_t hipGetDevice ( int* deviceId ) {
HIP_INIT_API(deviceId);
if (deviceId != nullptr) {
for (unsigned int i = 0; i < g_devices.size(); i++) {
if (g_devices[i] == hip::getCurrentContext()) {
*deviceId = i;
HIP_RETURN(hipSuccess);
}
}
int dev = ihipGetDevice();
assert(dev != -1);
*deviceId = dev;
HIP_RETURN(hipSuccess);
} else {
HIP_RETURN(hipErrorInvalidValue);
}
HIP_RETURN(hipErrorUnknown);
}
hipError_t hipGetDeviceCount ( int* count ) {
+1
Zobrazit soubor
@@ -87,6 +87,7 @@ namespace hip {
};
extern std::vector<amd::Context*> g_devices;
extern hipError_t ihipDeviceGetCount(int* count);
extern int ihipGetDevice();
extern amd::Memory* getMemoryObject(const void* ptr, size_t& offset);
#define HIP_RETURN(ret) \
+53 -37
Zobrazit soubor
@@ -55,7 +55,7 @@ struct __ClangOffloadBundleHeader {
__ClangOffloadBundleDesc desc[1];
};
extern "C" hipModule_t __hipRegisterFatBinary(const void* data)
extern "C" std::vector<hipModule_t>* __hipRegisterFatBinary(const void* data)
{
HIP_INIT();
@@ -71,9 +71,7 @@ extern "C" hipModule_t __hipRegisterFatBinary(const void* data)
return nullptr;
}
amd::Program* program = new amd::Program(*hip::getCurrentContext());
if (!program)
return nullptr;
auto programs = new std::vector<hipModule_t>{g_devices.size()};
const auto obheader = reinterpret_cast<const __ClangOffloadBundleHeader*>(fbwrapper->binary);
const auto* desc = &obheader->desc[0];
@@ -87,19 +85,30 @@ extern "C" hipModule_t __hipRegisterFatBinary(const void* data)
std::string target(desc->triple + sizeof(HIP_AMDGCN_AMDHSA_TRIPLE),
desc->tripleSize - sizeof(HIP_AMDGCN_AMDHSA_TRIPLE));
if (target.compare(hip::getCurrentContext()->devices()[0]->info().name_))
continue;
const void *image = reinterpret_cast<const void*>(
reinterpret_cast<uintptr_t>(obheader) + desc->offset);
size_t size = desc->size;
if (CL_SUCCESS == program->addDeviceProgram(*hip::getCurrentContext()->devices()[0], image, size) &&
CL_SUCCESS == program->build(hip::getCurrentContext()->devices(), nullptr, nullptr, nullptr))
break;
for (size_t dev = 0; dev < g_devices.size(); ++dev) {
amd::Context* ctx = g_devices[dev];
if (target.compare(ctx->devices()[0]->info().name_)) {
continue;
}
amd::Program* program = new amd::Program(*ctx);
if (program == nullptr) {
return nullptr;
}
if (CL_SUCCESS == program->addDeviceProgram(*ctx->devices()[0], image, size) &&
CL_SUCCESS == program->build(ctx->devices(), nullptr, nullptr, nullptr)) {
programs->at(dev) = reinterpret_cast<hipModule_t>(as_cl(program));
}
}
}
return reinterpret_cast<hipModule_t>(as_cl(program));
return programs;
}
struct ihipExec_t {
@@ -115,7 +124,7 @@ thread_local std::stack<ihipExec_t> execStack_;
class PlatformState {
amd::Monitor lock_;
private:
std::unordered_map<const void*, hip::Function*> functions_;
std::unordered_map<const void*, std::vector<hipFunction_t> > functions_;
struct RegisteredVar {
char* var;
@@ -125,22 +134,18 @@ private:
bool constant;
};
std::unordered_map<hipModule_t, RegisteredVar> vars_;
std::unordered_map<std::vector<hipModule_t>*, RegisteredVar> vars_;
static PlatformState* platform_;
PlatformState() : lock_("Guards global function map") {}
~PlatformState() {
for (const auto it : functions_) {
delete it.second;
}
}
~PlatformState() {}
public:
static PlatformState& instance() {
return *platform_;
}
void registerVar(hipModule_t modules,
void registerVar(std::vector<hipModule_t>* modules,
char* var,
char* hostVar,
char* deviceVar,
@@ -153,18 +158,17 @@ public:
vars_.insert(std::make_pair(modules, rvar));
}
void registerFunction(const void* hostFunction, amd::Kernel* func) {
void registerFunction(const void* hostFunction, const std::vector<hipFunction_t>& funcs) {
amd::ScopedLock lock(lock_);
hip::Function* f = new hip::Function(func);
functions_.insert(std::make_pair(hostFunction, f));
functions_.insert(std::make_pair(hostFunction, funcs));
}
hip::Function* getFunc(const void* hostFunction) {
hipFunction_t getFunc(const void* hostFunction, int deviceId) {
amd::ScopedLock lock(lock_);
const auto it = functions_.find(hostFunction);
if (it != functions_.cend()) {
return it->second;
return it->second[deviceId];
} else {
return nullptr;
}
@@ -197,7 +201,7 @@ public:
PlatformState* PlatformState::platform_ = new PlatformState();
extern "C" void __hipRegisterFunction(
hipModule_t module,
std::vector<hipModule_t>* modules,
const void* hostFunction,
char* deviceFunction,
const char* deviceName,
@@ -210,15 +214,21 @@ extern "C" void __hipRegisterFunction(
{
HIP_INIT();
amd::Program* program = as_amd(reinterpret_cast<cl_program>(module));
std::vector<hipFunction_t> functions{g_devices.size()};
const amd::Symbol* symbol = program->findSymbol(deviceName);
if (!symbol) return;
for (size_t deviceId=0; deviceId < g_devices.size(); ++deviceId) {
hipFunction_t function = nullptr;
if (hipSuccess == hipModuleGetFunction(&function, modules->at(deviceId), deviceName) &&
function != nullptr) {
functions[deviceId] = function;
}
else {
// tprintf(DB_FB, "__hipRegisterFunction cannot find kernel %s for"
// " device %d\n", deviceName, deviceId);
}
}
amd::Kernel* kernel = new amd::Kernel(*program, *symbol, deviceName);
if (!kernel) return;
PlatformState::instance().registerFunction(hostFunction, kernel);
PlatformState::instance().registerFunction(hostFunction, functions);
}
// Registers a device-side global variable.
@@ -227,7 +237,7 @@ extern "C" void __hipRegisterFunction(
// track of the value of the device side global variable between kernel
// executions.
extern "C" void __hipRegisterVar(
hipModule_t modules, // The device modules containing code object
std::vector<hipModule_t>* modules, // The device modules containing code object
char* var, // The shadow variable in host code
char* hostVar, // Variable name in host code
char* deviceVar, // Variable name in device code
@@ -241,11 +251,16 @@ extern "C" void __hipRegisterVar(
PlatformState::instance().registerVar(modules, var, hostVar, deviceVar, size, constant != 0);
}
extern "C" void __hipUnregisterFatBinary(
hipModule_t module
)
extern "C" void __hipUnregisterFatBinary(std::vector<hipModule_t>* modules)
{
HIP_INIT();
std::for_each(modules->begin(), modules->end(), [](hipModule_t module){
if (module != nullptr) {
as_amd(reinterpret_cast<cl_program>(module))->release();
}
});
delete modules;
}
extern "C" hipError_t hipConfigureCall(
@@ -277,7 +292,8 @@ extern "C" hipError_t hipLaunchByPtr(const void *hostFunction)
{
HIP_INIT_API(hostFunction);
hip::Function* func = PlatformState::instance().getFunc(hostFunction);
int deviceId = ihipGetDevice();
hipFunction_t func = PlatformState::instance().getFunc(hostFunction, deviceId);
if (func == nullptr) {
HIP_RETURN(hipErrorUnknown);
}
@@ -292,7 +308,7 @@ extern "C" hipError_t hipLaunchByPtr(const void *hostFunction)
HIP_LAUNCH_PARAM_END
};
HIP_RETURN(hipModuleLaunchKernel(func->asHipFunction(),
HIP_RETURN(hipModuleLaunchKernel(func,
exec.gridDim_.x, exec.gridDim_.y, exec.gridDim_.z,
exec.blockDim_.x, exec.blockDim_.y, exec.blockDim_.z,
exec.sharedMem_, exec.hStream_, nullptr, extra));