From d7f1cd1fcfac9ce81bbaaddfc7dad30aa007a4ef Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 1 Feb 2019 13:43:50 -0500 Subject: [PATCH] P4 to Git Change 1738965 by cpaquot@cpaquot-ocl-lc-lnx on 2019/02/01 13:17:10 SWDEV-145570 - [HIP] Handle multi device setup Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device_runtime.cpp#13 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#21 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#21 edit --- hipamd/api/hip/hip_device_runtime.cpp | 22 ++++--- hipamd/api/hip/hip_internal.hpp | 1 + hipamd/api/hip/hip_platform.cpp | 90 ++++++++++++++++----------- 3 files changed, 68 insertions(+), 45 deletions(-) diff --git a/hipamd/api/hip/hip_device_runtime.cpp b/hipamd/api/hip/hip_device_runtime.cpp index f5e75d316b..bfaf5558d3 100644 --- a/hipamd/api/hip/hip_device_runtime.cpp +++ b/hipamd/api/hip/hip_device_runtime.cpp @@ -391,21 +391,27 @@ hipError_t hipDeviceSynchronize ( void ) { HIP_RETURN(hipSuccess); } +int ihipGetDevice() { + for (unsigned int i = 0; i < g_devices.size(); i++) { + if (g_devices[i] == hip::getCurrentContext()) { + return i; + } + } + assert(0 && "Current device not found?!"); + return -1; +} + hipError_t hipGetDevice ( int* deviceId ) { HIP_INIT_API(deviceId); if (deviceId != nullptr) { - for (unsigned int i = 0; i < g_devices.size(); i++) { - if (g_devices[i] == hip::getCurrentContext()) { - *deviceId = i; - HIP_RETURN(hipSuccess); - } - } + int dev = ihipGetDevice(); + assert(dev != -1); + *deviceId = dev; + HIP_RETURN(hipSuccess); } else { HIP_RETURN(hipErrorInvalidValue); } - - HIP_RETURN(hipErrorUnknown); } hipError_t hipGetDeviceCount ( int* count ) { diff --git a/hipamd/api/hip/hip_internal.hpp b/hipamd/api/hip/hip_internal.hpp index 3ca1ba61d7..8d918c608e 100644 --- a/hipamd/api/hip/hip_internal.hpp +++ b/hipamd/api/hip/hip_internal.hpp @@ -87,6 +87,7 @@ namespace hip { }; extern std::vector g_devices; extern hipError_t ihipDeviceGetCount(int* count); +extern int ihipGetDevice(); extern amd::Memory* getMemoryObject(const void* ptr, size_t& offset); #define HIP_RETURN(ret) \ diff --git a/hipamd/api/hip/hip_platform.cpp b/hipamd/api/hip/hip_platform.cpp index da7f8beb62..795c55f87b 100644 --- a/hipamd/api/hip/hip_platform.cpp +++ b/hipamd/api/hip/hip_platform.cpp @@ -55,7 +55,7 @@ struct __ClangOffloadBundleHeader { __ClangOffloadBundleDesc desc[1]; }; -extern "C" hipModule_t __hipRegisterFatBinary(const void* data) +extern "C" std::vector* __hipRegisterFatBinary(const void* data) { HIP_INIT(); @@ -71,9 +71,7 @@ extern "C" hipModule_t __hipRegisterFatBinary(const void* data) return nullptr; } - amd::Program* program = new amd::Program(*hip::getCurrentContext()); - if (!program) - return nullptr; + auto programs = new std::vector{g_devices.size()}; const auto obheader = reinterpret_cast(fbwrapper->binary); const auto* desc = &obheader->desc[0]; @@ -87,19 +85,30 @@ extern "C" hipModule_t __hipRegisterFatBinary(const void* data) std::string target(desc->triple + sizeof(HIP_AMDGCN_AMDHSA_TRIPLE), desc->tripleSize - sizeof(HIP_AMDGCN_AMDHSA_TRIPLE)); - if (target.compare(hip::getCurrentContext()->devices()[0]->info().name_)) - continue; const void *image = reinterpret_cast( reinterpret_cast(obheader) + desc->offset); size_t size = desc->size; - if (CL_SUCCESS == program->addDeviceProgram(*hip::getCurrentContext()->devices()[0], image, size) && - CL_SUCCESS == program->build(hip::getCurrentContext()->devices(), nullptr, nullptr, nullptr)) - break; + for (size_t dev = 0; dev < g_devices.size(); ++dev) { + amd::Context* ctx = g_devices[dev]; + + if (target.compare(ctx->devices()[0]->info().name_)) { + continue; + } + + amd::Program* program = new amd::Program(*ctx); + if (program == nullptr) { + return nullptr; + } + if (CL_SUCCESS == program->addDeviceProgram(*ctx->devices()[0], image, size) && + CL_SUCCESS == program->build(ctx->devices(), nullptr, nullptr, nullptr)) { + programs->at(dev) = reinterpret_cast(as_cl(program)); + } + } } - return reinterpret_cast(as_cl(program)); + return programs; } struct ihipExec_t { @@ -115,7 +124,7 @@ thread_local std::stack execStack_; class PlatformState { amd::Monitor lock_; private: - std::unordered_map functions_; + std::unordered_map > functions_; struct RegisteredVar { char* var; @@ -125,22 +134,18 @@ private: bool constant; }; - std::unordered_map vars_; + std::unordered_map*, RegisteredVar> vars_; static PlatformState* platform_; PlatformState() : lock_("Guards global function map") {} - ~PlatformState() { - for (const auto it : functions_) { - delete it.second; - } - } + ~PlatformState() {} public: static PlatformState& instance() { return *platform_; } - void registerVar(hipModule_t modules, + void registerVar(std::vector* modules, char* var, char* hostVar, char* deviceVar, @@ -153,18 +158,17 @@ public: vars_.insert(std::make_pair(modules, rvar)); } - void registerFunction(const void* hostFunction, amd::Kernel* func) { + void registerFunction(const void* hostFunction, const std::vector& funcs) { amd::ScopedLock lock(lock_); - hip::Function* f = new hip::Function(func); - functions_.insert(std::make_pair(hostFunction, f)); + functions_.insert(std::make_pair(hostFunction, funcs)); } - hip::Function* getFunc(const void* hostFunction) { + hipFunction_t getFunc(const void* hostFunction, int deviceId) { amd::ScopedLock lock(lock_); const auto it = functions_.find(hostFunction); if (it != functions_.cend()) { - return it->second; + return it->second[deviceId]; } else { return nullptr; } @@ -197,7 +201,7 @@ public: PlatformState* PlatformState::platform_ = new PlatformState(); extern "C" void __hipRegisterFunction( - hipModule_t module, + std::vector* modules, const void* hostFunction, char* deviceFunction, const char* deviceName, @@ -210,15 +214,21 @@ extern "C" void __hipRegisterFunction( { HIP_INIT(); - amd::Program* program = as_amd(reinterpret_cast(module)); + std::vector functions{g_devices.size()}; - const amd::Symbol* symbol = program->findSymbol(deviceName); - if (!symbol) return; + for (size_t deviceId=0; deviceId < g_devices.size(); ++deviceId) { + hipFunction_t function = nullptr; + if (hipSuccess == hipModuleGetFunction(&function, modules->at(deviceId), deviceName) && + function != nullptr) { + functions[deviceId] = function; + } + else { + // tprintf(DB_FB, "__hipRegisterFunction cannot find kernel %s for" + // " device %d\n", deviceName, deviceId); + } + } - amd::Kernel* kernel = new amd::Kernel(*program, *symbol, deviceName); - if (!kernel) return; - - PlatformState::instance().registerFunction(hostFunction, kernel); + PlatformState::instance().registerFunction(hostFunction, functions); } // Registers a device-side global variable. @@ -227,7 +237,7 @@ extern "C" void __hipRegisterFunction( // track of the value of the device side global variable between kernel // executions. extern "C" void __hipRegisterVar( - hipModule_t modules, // The device modules containing code object + std::vector* modules, // The device modules containing code object char* var, // The shadow variable in host code char* hostVar, // Variable name in host code char* deviceVar, // Variable name in device code @@ -241,11 +251,16 @@ extern "C" void __hipRegisterVar( PlatformState::instance().registerVar(modules, var, hostVar, deviceVar, size, constant != 0); } -extern "C" void __hipUnregisterFatBinary( - hipModule_t module -) +extern "C" void __hipUnregisterFatBinary(std::vector* modules) { HIP_INIT(); + + std::for_each(modules->begin(), modules->end(), [](hipModule_t module){ + if (module != nullptr) { + as_amd(reinterpret_cast(module))->release(); + } + }); + delete modules; } extern "C" hipError_t hipConfigureCall( @@ -277,7 +292,8 @@ extern "C" hipError_t hipLaunchByPtr(const void *hostFunction) { HIP_INIT_API(hostFunction); - hip::Function* func = PlatformState::instance().getFunc(hostFunction); + int deviceId = ihipGetDevice(); + hipFunction_t func = PlatformState::instance().getFunc(hostFunction, deviceId); if (func == nullptr) { HIP_RETURN(hipErrorUnknown); } @@ -292,7 +308,7 @@ extern "C" hipError_t hipLaunchByPtr(const void *hostFunction) HIP_LAUNCH_PARAM_END }; - HIP_RETURN(hipModuleLaunchKernel(func->asHipFunction(), + HIP_RETURN(hipModuleLaunchKernel(func, exec.gridDim_.x, exec.gridDim_.y, exec.gridDim_.z, exec.blockDim_.x, exec.blockDim_.y, exec.blockDim_.z, exec.sharedMem_, exec.hStream_, nullptr, extra));