P4 to Git Change 1534050 by lmoriche@lmoriche_opencl_dev2 on 2018/03/28 19:09:26

SWDEV-145570 - Add support for clang offload bundles

Affected files ...

... //depot/stg/opencl/drivers/opencl/api/hip/hip_context.cpp#5 edit
... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#6 edit
... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#3 edit


[ROCm/clr commit: 416cba1233]
This commit is contained in:
foreman
2018-03-28 19:23:57 -04:00
rodzic 7c6bdcb7a7
commit ec93df6513
3 zmienionych plików z 116 dodań i 33 usunięć
@@ -25,16 +25,16 @@ THE SOFTWARE.
#include "platform/runtime.hpp"
#include "utils/versions.hpp"
#include <stack>
#include <thread>
thread_local amd::Context* g_context = nullptr;
thread_local std::stack<amd::Context*> g_ctxtStack;
std::vector<amd::Context*> g_devices;
std::once_flag g_ihipInitialized;
hipError_t hipInit(unsigned int flags)
void ihipInit()
{
HIP_INIT_API(flags);
if (!amd::Runtime::initialized()) {
amd::Runtime::init();
}
@@ -44,18 +44,26 @@ hipError_t hipInit(unsigned int flags)
for (unsigned int i=0; i<devices.size(); i++) {
const std::vector<amd::Device*> device(1, devices[i]);
amd::Context* context = new amd::Context(device, amd::Context::Info());
if (!context) return hipErrorOutOfMemory;
if (!context) return;
if (context && CL_SUCCESS != context->create(nullptr)) {
context->release();
} else {
g_devices.push_back(context);
g_context = context;
}
}
}
hipError_t hipInit(unsigned int flags)
{
HIP_INIT_API(flags);
return hipSuccess;
}
hipError_t hipCtxCreate(hipCtx_t *ctx, unsigned int flags, hipDevice_t device)
{
HIP_INIT_API(ctx, flags, device);
@@ -171,4 +179,4 @@ hipError_t hipCtxPushCurrent(hipCtx_t ctx)
g_ctxtStack.push(g_context);
return hipSuccess;
}
}
@@ -25,20 +25,26 @@ THE SOFTWARE.
#include "cl_common.hpp"
#define HIP_INIT()\
#include <thread>
#define HIP_INIT() \
std::call_once(g_ihipInitialized, ihipInit);
// This macro should be called at the beginning of every HIP API.
#define HIP_INIT_API(...) \
HIP_INIT(); \
\
amd::Thread* thread = amd::Thread::current(); \
if (!CL_CHECK_THREAD(thread)) { \
return hipErrorOutOfMemory; \
}
// This macro should be called at the beginning of every HIP API.
#define HIP_INIT_API(...) \
HIP_INIT()
extern std::once_flag g_ihipInitialized;
extern thread_local amd::Context* g_context;
extern std::vector<amd::Context*> g_devices;
hipError_t ihipDeviceGetCount(int* count);
extern hipError_t ihipDeviceGetCount(int* count);
extern void ihipInit();
#endif // HIP_SRC_HIP_INTERNAL_H
@@ -44,7 +44,7 @@ struct __CudaFatBinaryHeader {
unsigned long long int fatSize;
};
struct __CudaPartHeader{
struct __CudaPartHeader {
unsigned short type;
unsigned short dummy1;
unsigned int headerSize;
@@ -54,31 +54,20 @@ struct __CudaPartHeader{
unsigned int subarch;
};
extern "C" hipModule_t __hipRegisterFatBinary(void* bundle)
static hipModule_t registerCudaFatBinary(const __CudaFatBinaryHeader* fbheader)
{
if (!amd::Runtime::initialized()) { // FIXME: fix initialization
hipInit(0);
}
const __CudaPartHeader* pheader = reinterpret_cast<const __CudaPartHeader*>(
reinterpret_cast<uintptr_t>(fbheader) + fbheader->headerSize);
const __CudaPartHeader* end = reinterpret_cast<const __CudaPartHeader*>(
reinterpret_cast<uintptr_t>(pheader) + fbheader->fatSize);
amd::Program* program = new amd::Program(*g_context);
if (!program) return nullptr;
struct __CudaFatBinaryWrapper* fbwrapper = (struct __CudaFatBinaryWrapper*)bundle;
if (fbwrapper->magic != __cudaFatMAGIC2 || fbwrapper->version != 1) {
return nullptr;
}
struct __CudaFatBinaryHeader* fbheader = (struct __CudaFatBinaryHeader*)fbwrapper->binary;
if (fbheader->magic != __cudaFatMAGIC3 || fbheader->version != 1) {
return nullptr;
}
struct __CudaPartHeader* pheader = (struct __CudaPartHeader*)(
(uintptr_t)fbheader + fbheader->headerSize);
struct __CudaPartHeader* end = (struct __CudaPartHeader*)(
(uintptr_t)pheader + fbheader->fatSize);
while (pheader < end) {
if (true/*pheader->subarch == match a device in the context*/) {
void *image = (void*)((uintptr_t)pheader + pheader->headerSize);
const void *image = reinterpret_cast<void*>(
reinterpret_cast<uintptr_t>(pheader) + pheader->headerSize);
size_t size = pheader->partSize;
if (CL_SUCCESS != program->addDeviceProgram(*g_context->devices()[0], image, size) ||
CL_SUCCESS != program->build(g_context->devices(), nullptr, nullptr, nullptr)) {
@@ -86,13 +75,83 @@ extern "C" hipModule_t __hipRegisterFatBinary(void* bundle)
}
break;
}
pheader = (struct __CudaPartHeader*)(
(uintptr_t)pheader + pheader->headerSize + pheader->partSize);
pheader = reinterpret_cast<const __CudaPartHeader*>(
reinterpret_cast<uintptr_t>(pheader) + pheader->headerSize + pheader->partSize);
}
return reinterpret_cast<hipModule_t>(as_cl(program));
}
#define CLANG_OFFLOAD_BUNDLER_MAGIC_STR "__CLANG_OFFLOAD_BUNDLE__"
#define AMDGCN_AMDHSA_TRIPLE "openmp-amdgcn--amdhsa"
struct __ClangOffloadBundleDesc {
uint64_t offset;
uint64_t size;
uint64_t tripleSize;
const char triple[1];
};
struct __ClangOffloadBundleHeader {
const char magic[sizeof(CLANG_OFFLOAD_BUNDLER_MAGIC_STR) - 1];
uint64_t numBundles;
__ClangOffloadBundleDesc desc[1];
};
static hipModule_t registerOffloadBundle(const __ClangOffloadBundleHeader* obheader)
{
amd::Program* program = new amd::Program(*g_context);
if (!program)
return nullptr;
const __ClangOffloadBundleDesc* desc = &obheader->desc[0];
for (uint64_t i = 0; i < obheader->numBundles; ++i,
desc = reinterpret_cast<const __ClangOffloadBundleDesc*>(
reinterpret_cast<uintptr_t>(&desc->triple[0]) + desc->tripleSize)) {
std::string triple(desc->triple, sizeof(AMDGCN_AMDHSA_TRIPLE) - 1);
if (triple.compare(AMDGCN_AMDHSA_TRIPLE))
continue;
std::string target(desc->triple + sizeof(AMDGCN_AMDHSA_TRIPLE),
desc->tripleSize - sizeof(AMDGCN_AMDHSA_TRIPLE));
if (target.compare(g_context->devices()[0]->info().name_))
continue;
const void *image = reinterpret_cast<const void*>(
reinterpret_cast<uintptr_t>(obheader) + desc->offset);
size_t size = desc->size;
if (CL_SUCCESS == program->addDeviceProgram(*g_context->devices()[0], image, size) &&
CL_SUCCESS == program->build(g_context->devices(), nullptr, nullptr, nullptr))
break;
}
return reinterpret_cast<hipModule_t>(as_cl(program));
}
extern "C" hipModule_t __hipRegisterFatBinary(const void* data)
{
HIP_INIT();
const __CudaFatBinaryWrapper* fbwrapper = reinterpret_cast<const __CudaFatBinaryWrapper*>(data);
if (fbwrapper->magic != __cudaFatMAGIC2 || fbwrapper->version != 1) {
return nullptr;
}
const __CudaFatBinaryHeader* fbheader = reinterpret_cast<const __CudaFatBinaryHeader*>(fbwrapper->binary);
if (fbheader->magic == __cudaFatMAGIC3 && fbheader->version == 1) {
return registerCudaFatBinary(fbheader);
}
std::string magic((char*)fbwrapper->binary, sizeof(CLANG_OFFLOAD_BUNDLER_MAGIC_STR) - 1);
if (!magic.compare(CLANG_OFFLOAD_BUNDLER_MAGIC_STR)) {
return registerOffloadBundle(reinterpret_cast<const __ClangOffloadBundleHeader*>(fbwrapper->binary));
}
return nullptr;
}
std::map<const void*, hipFunction_t> g_functions;
@@ -108,6 +167,8 @@ extern "C" void __hipRegisterFunction(
dim3* gridDim,
int* wSize)
{
HIP_INIT();
amd::Program* program = as_amd(reinterpret_cast<cl_program>(module));
const amd::Symbol* symbol = program->findSymbol(deviceName);
@@ -130,12 +191,14 @@ extern "C" void __hipRegisterVar(
int constant,
int global)
{
HIP_INIT();
}
extern "C" void __hipUnregisterFatBinary(
hipModule_t module
)
{
HIP_INIT();
}
dim3 g_gridDim; // FIXME: place in execution stack
@@ -149,6 +212,8 @@ extern "C" hipError_t hipConfigureCall(
size_t sharedMem,
hipStream_t stream)
{
HIP_INIT_API(gridDim, blockDim, sharedMem, stream);
// FIXME: should push and new entry on the execution stack
g_gridDim = gridDim;
@@ -166,6 +231,8 @@ extern "C" hipError_t hipSetupArgument(
size_t size,
size_t offset)
{
HIP_INIT_API(arg, size, offset);
// FIXME: should modify the top of the execution stack
::memcpy(g_arguments + offset, arg, size);
@@ -174,6 +241,8 @@ extern "C" hipError_t hipSetupArgument(
extern "C" hipError_t hipLaunchByPtr(const void *hostFunction)
{
HIP_INIT_API(hostFunction);
std::map<const void*, hipFunction_t>::iterator it;
if ((it = g_functions.find(hostFunction)) == g_functions.end())
return hipErrorUnknown;