From ec93df6513fda86e8441152998c8faea2230f951 Mon Sep 17 00:00:00 2001
From: foreman
Date: Wed, 28 Mar 2018 19:23:57 -0400
Subject: [PATCH] P4 to Git Change 1534050 by lmoriche@lmoriche_opencl_dev2 on
2018/03/28 19:09:26
SWDEV-145570 - Add support for clang offload bundles
Affected files ...
... //depot/stg/opencl/drivers/opencl/api/hip/hip_context.cpp#5 edit
... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#6 edit
... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#3 edit
[ROCm/clr commit: 416cba12339ab0a8ada3e161f81074976bd1e96f]
---
projects/clr/hipamd/api/hip/hip_context.cpp | 18 ++-
projects/clr/hipamd/api/hip/hip_internal.hpp | 20 ++--
projects/clr/hipamd/api/hip/hip_platform.cpp | 111 +++++++++++++++----
3 files changed, 116 insertions(+), 33 deletions(-)
diff --git a/projects/clr/hipamd/api/hip/hip_context.cpp b/projects/clr/hipamd/api/hip/hip_context.cpp
index 983dc9b13c..9603b938b6 100644
--- a/projects/clr/hipamd/api/hip/hip_context.cpp
+++ b/projects/clr/hipamd/api/hip/hip_context.cpp
@@ -25,16 +25,16 @@ THE SOFTWARE.
#include "platform/runtime.hpp"
#include "utils/versions.hpp"
#include
+#include
thread_local amd::Context* g_context = nullptr;
thread_local std::stack g_ctxtStack;
std::vector g_devices;
+std::once_flag g_ihipInitialized;
-hipError_t hipInit(unsigned int flags)
+void ihipInit()
{
- HIP_INIT_API(flags);
-
if (!amd::Runtime::initialized()) {
amd::Runtime::init();
}
@@ -44,18 +44,26 @@ hipError_t hipInit(unsigned int flags)
for (unsigned int i=0; i device(1, devices[i]);
amd::Context* context = new amd::Context(device, amd::Context::Info());
- if (!context) return hipErrorOutOfMemory;
+ if (!context) return;
if (context && CL_SUCCESS != context->create(nullptr)) {
context->release();
} else {
g_devices.push_back(context);
+ g_context = context;
}
}
+}
+
+
+hipError_t hipInit(unsigned int flags)
+{
+ HIP_INIT_API(flags);
return hipSuccess;
}
+
hipError_t hipCtxCreate(hipCtx_t *ctx, unsigned int flags, hipDevice_t device)
{
HIP_INIT_API(ctx, flags, device);
@@ -171,4 +179,4 @@ hipError_t hipCtxPushCurrent(hipCtx_t ctx)
g_ctxtStack.push(g_context);
return hipSuccess;
-}
\ No newline at end of file
+}
diff --git a/projects/clr/hipamd/api/hip/hip_internal.hpp b/projects/clr/hipamd/api/hip/hip_internal.hpp
index 239538e613..3d334fa2ac 100644
--- a/projects/clr/hipamd/api/hip/hip_internal.hpp
+++ b/projects/clr/hipamd/api/hip/hip_internal.hpp
@@ -25,20 +25,26 @@ THE SOFTWARE.
#include "cl_common.hpp"
-#define HIP_INIT()\
+#include
+
+#define HIP_INIT() \
+ std::call_once(g_ihipInitialized, ihipInit);
+
+
+// This macro should be called at the beginning of every HIP API.
+#define HIP_INIT_API(...) \
+ HIP_INIT(); \
+ \
amd::Thread* thread = amd::Thread::current(); \
if (!CL_CHECK_THREAD(thread)) { \
return hipErrorOutOfMemory; \
}
-
-// This macro should be called at the beginning of every HIP API.
-#define HIP_INIT_API(...) \
- HIP_INIT()
-
+extern std::once_flag g_ihipInitialized;
extern thread_local amd::Context* g_context;
extern std::vector g_devices;
-hipError_t ihipDeviceGetCount(int* count);
+extern hipError_t ihipDeviceGetCount(int* count);
+extern void ihipInit();
#endif // HIP_SRC_HIP_INTERNAL_H
diff --git a/projects/clr/hipamd/api/hip/hip_platform.cpp b/projects/clr/hipamd/api/hip/hip_platform.cpp
index 0cc6a3b1c2..db7939c9e7 100644
--- a/projects/clr/hipamd/api/hip/hip_platform.cpp
+++ b/projects/clr/hipamd/api/hip/hip_platform.cpp
@@ -44,7 +44,7 @@ struct __CudaFatBinaryHeader {
unsigned long long int fatSize;
};
-struct __CudaPartHeader{
+struct __CudaPartHeader {
unsigned short type;
unsigned short dummy1;
unsigned int headerSize;
@@ -54,31 +54,20 @@ struct __CudaPartHeader{
unsigned int subarch;
};
-extern "C" hipModule_t __hipRegisterFatBinary(void* bundle)
+static hipModule_t registerCudaFatBinary(const __CudaFatBinaryHeader* fbheader)
{
- if (!amd::Runtime::initialized()) { // FIXME: fix initialization
- hipInit(0);
- }
+ const __CudaPartHeader* pheader = reinterpret_cast(
+ reinterpret_cast(fbheader) + fbheader->headerSize);
+ const __CudaPartHeader* end = reinterpret_cast(
+ reinterpret_cast(pheader) + fbheader->fatSize);
amd::Program* program = new amd::Program(*g_context);
if (!program) return nullptr;
- struct __CudaFatBinaryWrapper* fbwrapper = (struct __CudaFatBinaryWrapper*)bundle;
- if (fbwrapper->magic != __cudaFatMAGIC2 || fbwrapper->version != 1) {
- return nullptr;
- }
- struct __CudaFatBinaryHeader* fbheader = (struct __CudaFatBinaryHeader*)fbwrapper->binary;
- if (fbheader->magic != __cudaFatMAGIC3 || fbheader->version != 1) {
- return nullptr;
- }
- struct __CudaPartHeader* pheader = (struct __CudaPartHeader*)(
- (uintptr_t)fbheader + fbheader->headerSize);
- struct __CudaPartHeader* end = (struct __CudaPartHeader*)(
- (uintptr_t)pheader + fbheader->fatSize);
-
while (pheader < end) {
if (true/*pheader->subarch == match a device in the context*/) {
- void *image = (void*)((uintptr_t)pheader + pheader->headerSize);
+ const void *image = reinterpret_cast(
+ reinterpret_cast(pheader) + pheader->headerSize);
size_t size = pheader->partSize;
if (CL_SUCCESS != program->addDeviceProgram(*g_context->devices()[0], image, size) ||
CL_SUCCESS != program->build(g_context->devices(), nullptr, nullptr, nullptr)) {
@@ -86,13 +75,83 @@ extern "C" hipModule_t __hipRegisterFatBinary(void* bundle)
}
break;
}
- pheader = (struct __CudaPartHeader*)(
- (uintptr_t)pheader + pheader->headerSize + pheader->partSize);
+ pheader = reinterpret_cast(
+ reinterpret_cast(pheader) + pheader->headerSize + pheader->partSize);
}
return reinterpret_cast(as_cl(program));
}
+#define CLANG_OFFLOAD_BUNDLER_MAGIC_STR "__CLANG_OFFLOAD_BUNDLE__"
+#define AMDGCN_AMDHSA_TRIPLE "openmp-amdgcn--amdhsa"
+
+struct __ClangOffloadBundleDesc {
+ uint64_t offset;
+ uint64_t size;
+ uint64_t tripleSize;
+ const char triple[1];
+};
+
+struct __ClangOffloadBundleHeader {
+ const char magic[sizeof(CLANG_OFFLOAD_BUNDLER_MAGIC_STR) - 1];
+ uint64_t numBundles;
+ __ClangOffloadBundleDesc desc[1];
+};
+
+static hipModule_t registerOffloadBundle(const __ClangOffloadBundleHeader* obheader)
+{
+ amd::Program* program = new amd::Program(*g_context);
+ if (!program)
+ return nullptr;
+
+ const __ClangOffloadBundleDesc* desc = &obheader->desc[0];
+ for (uint64_t i = 0; i < obheader->numBundles; ++i,
+ desc = reinterpret_cast(
+ reinterpret_cast(&desc->triple[0]) + desc->tripleSize)) {
+
+ std::string triple(desc->triple, sizeof(AMDGCN_AMDHSA_TRIPLE) - 1);
+ if (triple.compare(AMDGCN_AMDHSA_TRIPLE))
+ continue;
+
+ std::string target(desc->triple + sizeof(AMDGCN_AMDHSA_TRIPLE),
+ desc->tripleSize - sizeof(AMDGCN_AMDHSA_TRIPLE));
+ if (target.compare(g_context->devices()[0]->info().name_))
+ continue;
+
+ const void *image = reinterpret_cast(
+ reinterpret_cast(obheader) + desc->offset);
+ size_t size = desc->size;
+
+ if (CL_SUCCESS == program->addDeviceProgram(*g_context->devices()[0], image, size) &&
+ CL_SUCCESS == program->build(g_context->devices(), nullptr, nullptr, nullptr))
+ break;
+ }
+
+ return reinterpret_cast(as_cl(program));
+}
+
+
+extern "C" hipModule_t __hipRegisterFatBinary(const void* data)
+{
+ HIP_INIT();
+
+ const __CudaFatBinaryWrapper* fbwrapper = reinterpret_cast(data);
+ if (fbwrapper->magic != __cudaFatMAGIC2 || fbwrapper->version != 1) {
+ return nullptr;
+ }
+ const __CudaFatBinaryHeader* fbheader = reinterpret_cast(fbwrapper->binary);
+ if (fbheader->magic == __cudaFatMAGIC3 && fbheader->version == 1) {
+ return registerCudaFatBinary(fbheader);
+ }
+
+ std::string magic((char*)fbwrapper->binary, sizeof(CLANG_OFFLOAD_BUNDLER_MAGIC_STR) - 1);
+ if (!magic.compare(CLANG_OFFLOAD_BUNDLER_MAGIC_STR)) {
+ return registerOffloadBundle(reinterpret_cast(fbwrapper->binary));
+ }
+
+ return nullptr;
+}
+
std::map g_functions;
@@ -108,6 +167,8 @@ extern "C" void __hipRegisterFunction(
dim3* gridDim,
int* wSize)
{
+ HIP_INIT();
+
amd::Program* program = as_amd(reinterpret_cast(module));
const amd::Symbol* symbol = program->findSymbol(deviceName);
@@ -130,12 +191,14 @@ extern "C" void __hipRegisterVar(
int constant,
int global)
{
+ HIP_INIT();
}
extern "C" void __hipUnregisterFatBinary(
hipModule_t module
)
{
+ HIP_INIT();
}
dim3 g_gridDim; // FIXME: place in execution stack
@@ -149,6 +212,8 @@ extern "C" hipError_t hipConfigureCall(
size_t sharedMem,
hipStream_t stream)
{
+ HIP_INIT_API(gridDim, blockDim, sharedMem, stream);
+
// FIXME: should push and new entry on the execution stack
g_gridDim = gridDim;
@@ -166,6 +231,8 @@ extern "C" hipError_t hipSetupArgument(
size_t size,
size_t offset)
{
+ HIP_INIT_API(arg, size, offset);
+
// FIXME: should modify the top of the execution stack
::memcpy(g_arguments + offset, arg, size);
@@ -174,6 +241,8 @@ extern "C" hipError_t hipSetupArgument(
extern "C" hipError_t hipLaunchByPtr(const void *hostFunction)
{
+ HIP_INIT_API(hostFunction);
+
std::map::iterator it;
if ((it = g_functions.find(hostFunction)) == g_functions.end())
return hipErrorUnknown;