From 73cb309e2bd65be2a056c6c8dcda9f31381c953a Mon Sep 17 00:00:00 2001
From: foreman
Date: Thu, 1 Mar 2018 22:57:20 -0500
Subject: [PATCH] P4 to Git Change 1521675 by lmoriche@lmoriche_opencl_dev2 on
2018/03/01 22:50:06
SWDEV-145570 - [HIP] - Hip Rearchitecture
- Add initial prototype implementation
Affected files ...
... //depot/stg/opencl/drivers/opencl/api/Makefile#11 edit
... //depot/stg/opencl/drivers/opencl/api/hip/fixme.cpp#1 add
... //depot/stg/opencl/drivers/opencl/api/hip/hip_context.cpp#1 add
... //depot/stg/opencl/drivers/opencl/api/hip/hip_device.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/api/hip/hip_error.cpp#1 add
... //depot/stg/opencl/drivers/opencl/api/hip/hip_event.cpp#1 add
... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.def.in#1 add
... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.map.in#1 add
... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.rc#1 add
... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#2 edit
... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#3 edit
... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#1 add
... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#1 add
---
api/hip/fixme.cpp | 33 +++++++
api/hip/hip_context.cpp | 58 ++++++++++++
api/hip/hip_device.cpp | 140 +++++++++++++---------------
api/hip/hip_error.cpp | 48 ++++++++++
api/hip/hip_event.cpp | 67 ++++++++++++++
api/hip/hip_hcc.def.in | 129 ++++++++++++++++++++++++++
api/hip/hip_hcc.map.in | 133 +++++++++++++++++++++++++++
api/hip/hip_hcc.rc | 75 +++++++++++++++
api/hip/hip_internal.hpp | 4 +-
api/hip/hip_memory.cpp | 133 +++++++++++++--------------
api/hip/hip_module.cpp | 144 +++++++++++++++++++++++++++++
api/hip/hip_platform.cpp | 193 +++++++++++++++++++++++++++++++++++++++
12 files changed, 1010 insertions(+), 147 deletions(-)
create mode 100644 api/hip/fixme.cpp
create mode 100644 api/hip/hip_context.cpp
create mode 100644 api/hip/hip_error.cpp
create mode 100644 api/hip/hip_event.cpp
create mode 100644 api/hip/hip_hcc.def.in
create mode 100644 api/hip/hip_hcc.map.in
create mode 100644 api/hip/hip_hcc.rc
create mode 100644 api/hip/hip_module.cpp
create mode 100644 api/hip/hip_platform.cpp
diff --git a/api/hip/fixme.cpp b/api/hip/fixme.cpp
new file mode 100644
index 0000000000..3d062e2dbc
--- /dev/null
+++ b/api/hip/fixme.cpp
@@ -0,0 +1,33 @@
+/*
+Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "cl_common.hpp"
+
+KHRicdVendorDispatch amd::ICDDispatchedObject::icdVendorDispatch_[] = {0};
+amd::PlatformIDS amd::PlatformID::Platform = {amd::ICDDispatchedObject::icdVendorDispatch_};
+
+RUNTIME_ENTRY(cl_int, clGetDeviceIDs,
+ (cl_platform_id platform, cl_device_type device_type, cl_uint num_entries,
+ cl_device_id* devices, cl_uint* num_devices)) {
+ return CL_SUCCESS;
+}
+RUNTIME_EXIT
diff --git a/api/hip/hip_context.cpp b/api/hip/hip_context.cpp
new file mode 100644
index 0000000000..0e6ff2116a
--- /dev/null
+++ b/api/hip/hip_context.cpp
@@ -0,0 +1,58 @@
+/*
+Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include
+
+#include "hip_internal.hpp"
+#include "platform/runtime.hpp"
+
+
+amd::Context* g_context = nullptr;
+
+hipError_t hipInit(unsigned int flags)
+{
+ HIP_INIT_API(flags);
+
+ if (!amd::Runtime::initialized()) {
+ amd::Runtime::init();
+ }
+
+ // FIXME: move the global VDI context to hipInit.
+ g_context = new amd::Context(
+ amd::Device::getDevices(CL_DEVICE_TYPE_GPU, false), amd::Context::Info());
+ if (!g_context) return hipErrorOutOfMemory;
+
+ if (g_context && CL_SUCCESS != g_context->create(nullptr)) {
+ g_context->release();
+ return hipErrorUnknown;
+ }
+
+ return hipSuccess;
+}
+
+hipError_t hipCtxCreate(hipCtx_t *ctx, unsigned int flags, hipDevice_t device)
+{
+ HIP_INIT_API(ctx, flags, device);
+
+ return hipSuccess;
+}
+
diff --git a/api/hip/hip_device.cpp b/api/hip/hip_device.cpp
index 557ee56643..7296eabb17 100644
--- a/api/hip/hip_device.cpp
+++ b/api/hip/hip_device.cpp
@@ -24,9 +24,6 @@ THE SOFTWARE.
#include "hip_internal.hpp"
-cl_device_id* g_deviceArray = NULL;
-unsigned g_deviceCnt = 0;
-
hipError_t hipGetDevice(int *deviceId) {
HIP_INIT_API(deviceId);
@@ -54,7 +51,7 @@ hipError_t hipDeviceGet(hipDevice_t *device, int deviceId)
return hipSuccess;
};
-hipError_t hipDeviceCount(int* count) {
+hipError_t hipGetDeviceCount(int* count) {
HIP_INIT_API(count);
@@ -63,9 +60,7 @@ hipError_t hipDeviceCount(int* count) {
}
// Get all available devices
- if (!amd::Device::getDeviceIDs(CL_DEVICE_TYPE_GPU, 0, NULL, count, false)) {
- return hipErrorNoDevice;
- }
+ *count = g_context->devices().size();
return hipSuccess;
}
@@ -74,93 +69,95 @@ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device)
HIP_INIT_API(pi, attr, device);
- if(pi == NULL) {
+ if (pi == nullptr) {
return hipErrorInvalidValue;
}
- auto deviceHandle = as_amd(g_deviceArray[hipDevice]);
+ //if (unsigned(device) >= g_context->devices().size()) {
+ // return hipErrorInvalidDevice;
+ //}
+ //auto* deviceHandle = g_context->devices()[device];
- if (deviceHandle == NULL) {
- return hipErrorInvalidDevice;
- }
-
- hipDeviceProp_t *prop = deviceHandle->_props;
+ //FIXME: should we cache the props, or just select from deviceHandle->info_?
+ hipDeviceProp_t prop = {0};
+ hipError_t err = hipGetDeviceProperties(&prop, device);
+ if (err != hipSuccess) return err;
switch (attr) {
case hipDeviceAttributeMaxThreadsPerBlock:
- *pi = prop->maxThreadsPerBlock;
+ *pi = prop.maxThreadsPerBlock;
break;
case hipDeviceAttributeMaxBlockDimX:
- *pi = prop->maxThreadsDim[0];
+ *pi = prop.maxThreadsDim[0];
break;
case hipDeviceAttributeMaxBlockDimY:
- *pi = prop->maxThreadsDim[1];
+ *pi = prop.maxThreadsDim[1];
break;
case hipDeviceAttributeMaxBlockDimZ:
- *pi = prop->maxThreadsDim[2];
+ *pi = prop.maxThreadsDim[2];
break;
case hipDeviceAttributeMaxGridDimX:
- *pi = prop->maxGridSize[0];
+ *pi = prop.maxGridSize[0];
break;
case hipDeviceAttributeMaxGridDimY:
- *pi = prop->maxGridSize[1];
+ *pi = prop.maxGridSize[1];
break;
case hipDeviceAttributeMaxGridDimZ:
- *pi = prop->maxGridSize[2];
+ *pi = prop.maxGridSize[2];
break;
case hipDeviceAttributeMaxSharedMemoryPerBlock:
- *pi = prop->sharedMemPerBlock;
+ *pi = prop.sharedMemPerBlock;
break;
case hipDeviceAttributeTotalConstantMemory:
- *pi = prop->totalConstMem;
+ *pi = prop.totalConstMem;
break;
case hipDeviceAttributeWarpSize:
- *pi = prop->warpSize;
+ *pi = prop.warpSize;
break;
case hipDeviceAttributeMaxRegistersPerBlock:
- *pi = prop->regsPerBlock;
+ *pi = prop.regsPerBlock;
break;
case hipDeviceAttributeClockRate:
- *pi = prop->clockRate;
+ *pi = prop.clockRate;
break;
case hipDeviceAttributeMemoryClockRate:
- *pi = prop->memoryClockRate;
+ *pi = prop.memoryClockRate;
break;
case hipDeviceAttributeMemoryBusWidth:
- *pi = prop->memoryBusWidth;
+ *pi = prop.memoryBusWidth;
break;
case hipDeviceAttributeMultiprocessorCount:
- *pi = prop->multiProcessorCount;
+ *pi = prop.multiProcessorCount;
break;
case hipDeviceAttributeComputeMode:
- *pi = prop->computeMode;
+ *pi = prop.computeMode;
break;
case hipDeviceAttributeL2CacheSize:
- *pi = prop->l2CacheSize;
+ *pi = prop.l2CacheSize;
break;
case hipDeviceAttributeMaxThreadsPerMultiProcessor:
- *pi = prop->maxThreadsPerMultiProcessor;
+ *pi = prop.maxThreadsPerMultiProcessor;
break;
case hipDeviceAttributeComputeCapabilityMajor:
- *pi = prop->major;
+ *pi = prop.major;
break;
case hipDeviceAttributeComputeCapabilityMinor:
- *pi = prop->minor;
+ *pi = prop.minor;
break;
case hipDeviceAttributePciBusId:
- *pi = prop->pciBusID;
+ *pi = prop.pciBusID;
break;
case hipDeviceAttributeConcurrentKernels:
- *pi = prop->concurrentKernels;
+ *pi = prop.concurrentKernels;
break;
case hipDeviceAttributePciDeviceId:
- *pi = prop->pciDeviceID;
+ *pi = prop.pciDeviceID;
break;
case hipDeviceAttributeMaxSharedMemoryPerMultiprocessor:
- *pi = prop->maxSharedMemoryPerMultiProcessor;
+ *pi = prop.maxSharedMemoryPerMultiProcessor;
break;
case hipDeviceAttributeIsMultiGpuBoard:
- *pi = prop->isMultiGpuBoard;
+ *pi = prop.isMultiGpuBoard;
break;
default:
return hipErrorInvalidValue;
@@ -177,10 +174,10 @@ hipError_t hipGetDeviceProperties(hipDeviceProp_t* props, int device) {
return hipErrorInvalidValue;
}
- auto deviceHandle = as_amd(g_deviceArray[device]);
- if (deviceHandle == NULL) {
+ if (unsigned(device) >= g_context->devices().size()) {
return hipErrorInvalidDevice;
}
+ auto* deviceHandle = g_context->devices()[device];
hipDeviceProp_t deviceProps = {0};
@@ -226,11 +223,11 @@ hipError_t hipGetDeviceProperties(hipDeviceProp_t* props, int device) {
deviceProps.arch.has3dGrid = 1;
deviceProps.arch.hasDynamicParallelism = 0;
deviceProps.concurrentKernels = 1;
- deviceProps.pciDomainID = info.deviceTopology_.function;
- deviceProps.pciBusID = info.deviceTopology_.bus;
- deviceProps.pciDeviceID = info.deviceTopology_.device;
+ deviceProps.pciDomainID = info.deviceTopology_.pcie.function;
+ deviceProps.pciBusID = info.deviceTopology_.pcie.bus;
+ deviceProps.pciDeviceID = info.deviceTopology_.pcie.device;
deviceProps.maxSharedMemoryPerMultiProcessor = info.localMemSizePerCU_;
- deviceProps.isMultiGpuBoard = info.;
+ //deviceProps.isMultiGpuBoard = info.;
deviceProps.canMapHostMemory = 1;
deviceProps.gcnArch = info.gfxipVersion_;
@@ -254,32 +251,16 @@ hipError_t hipDeviceGetCacheConfig(hipFuncCache_t *cacheConfig) {
return hipErrorInvalidValue;
}
- *cacheConfig = 0;
+ *cacheConfig = hipFuncCache_t();
return hipSuccess;
}
-hipError_t hipGetDeviceProperties(hipDeviceProp_t* properties, int device) {
-
- HIP_INIT_API(properties, device);
- if ((properties == NULL) || (device < 0) || (device >= g_deviceCnt)) {
- return hipErrorInvalidDevice;
- }
-
- auto * deviceHandle = as_amd(g_deviceArray[device]);
- if (deviceHandle != NULL) {
- *properties = deviceHandle->_props;
- return hipSuccess;
- }
-
- return hipErrorInvalidDevice;
-}
-
hipError_t hipSetDeviceFlags(unsigned int flags) {
HIP_INIT_API(flags);
- assert(0 && "Unimplemented")
+ assert(0 && "Unimplemented");
return hipSuccess;
};
@@ -288,7 +269,7 @@ hipError_t hipDeviceGetLimit (size_t *pValue, hipLimit_t limit) {
HIP_INIT_API(pValue, limit);
- assert(0 && "Unimplemented")
+ assert(0 && "Unimplemented");
return hipSuccess;
}
@@ -297,7 +278,7 @@ hipError_t hipFuncSetCacheConfig (const void* func, hipFuncCache_t cacheConfig)
HIP_INIT_API(cacheConfig);
- assert(0 && "Not supported")
+ assert(0 && "Not supported");
return hipSuccess;
}
@@ -306,7 +287,7 @@ hipError_t hipDeviceSetSharedMemConfig (hipSharedMemConfig config) {
HIP_INIT_API(config);
- assert(0 && "Not Supported")
+ assert(0 && "Not Supported");
return hipSuccess;
}
@@ -315,17 +296,17 @@ hipError_t hipDeviceGetSharedMemConfig (hipSharedMemConfig *pConfig) {
HIP_INIT_API(pConfig);
- assert(0 && "Not supported")
+ assert(0 && "Not supported");
return hipSuccess;
}
-hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* properties) {
+hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* properties) {
HIP_INIT_API(device, properties);
-
- assert(0 && "Unimplemented")
+
+ assert(0 && "Unimplemented");
return hipSuccess;
}
@@ -335,7 +316,7 @@ hipError_t hipDeviceGetByPCIBusId (int* device, const char* pciBusId) {
HIP_INIT_API(device,pciBusId);
- assert(0 && "Unimplemented")
+ assert(0 && "Unimplemented");
return hipSuccess;
}
@@ -345,7 +326,7 @@ hipError_t hipDeviceTotalMem (size_t *bytes,hipDevice_t device) {
HIP_INIT_API(bytes, device);
- assert(0 && "Unimplemented")
+ assert(0 && "Unimplemented");
return hipSuccess;
}
@@ -353,7 +334,8 @@ hipError_t hipDeviceTotalMem (size_t *bytes,hipDevice_t device) {
hipError_t hipDeviceComputeCapability(int *major, int *minor, hipDevice_t device) {
HIP_INIT_API(major,minor, device);
- assert(0 && "Unimplemented")
+
+ assert(0 && "Unimplemented");
return hipSuccess;
}
@@ -362,7 +344,7 @@ hipError_t hipDeviceGetName(char *name,int len, hipDevice_t device) {
HIP_INIT_API((void*)name,len, device);
- assert(0 && "Unimplemented")
+ assert(0 && "Unimplemented");
return hipSuccess;
}
@@ -371,7 +353,13 @@ hipError_t hipDeviceGetPCIBusId (char *pciBusId,int len, int device) {
HIP_INIT_API((void*)pciBusId, len, device);
- assert(0 && "Unimplemented")
+ assert(0 && "Unimplemented");
return hipSuccess;
-}
\ No newline at end of file
+}
+
+hipError_t hipDeviceSynchronize(void)
+{
+ // FIXME: should wait on all streams
+ return hipSuccess;
+}
diff --git a/api/hip/hip_error.cpp b/api/hip/hip_error.cpp
new file mode 100644
index 0000000000..2a8785c375
--- /dev/null
+++ b/api/hip/hip_error.cpp
@@ -0,0 +1,48 @@
+/*
+Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include
+
+#include "hip_internal.hpp"
+
+hipError_t hipGetLastError()
+{
+ HIP_INIT_API();
+ return hipErrorUnknown;
+}
+
+hipError_t hipPeekAtLastError()
+{
+ HIP_INIT_API();
+ return hipErrorUnknown;
+}
+
+const char *hipGetErrorName(hipError_t hip_error)
+{
+ return "";
+}
+
+const char *hipGetErrorString(hipError_t hip_error)
+{
+ return "";
+}
+
diff --git a/api/hip/hip_event.cpp b/api/hip/hip_event.cpp
new file mode 100644
index 0000000000..117b28355e
--- /dev/null
+++ b/api/hip/hip_event.cpp
@@ -0,0 +1,67 @@
+/*
+Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include
+
+#include "hip_internal.hpp"
+
+hipError_t hipEventCreateWithFlags(hipEvent_t* event, unsigned flags)
+{
+ HIP_INIT_API(event, flags);
+
+ return hipErrorUnknown;
+}
+
+hipError_t hipEventCreate(hipEvent_t* event)
+{
+ HIP_INIT_API(event);
+
+ return hipErrorUnknown;
+}
+
+hipError_t hipEventDestroy(hipEvent_t event)
+{
+ HIP_INIT_API(event);
+
+ return hipErrorUnknown;
+}
+
+hipError_t hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop)
+{
+ HIP_INIT_API(ms, start, stop);
+
+ return hipErrorUnknown;
+}
+
+hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream)
+{
+ HIP_INIT_API(event, stream);
+
+ return hipErrorUnknown;
+}
+
+hipError_t hipEventSynchronize(hipEvent_t event)
+{
+ HIP_INIT_API(event);
+
+ return hipErrorUnknown;
+}
diff --git a/api/hip/hip_hcc.def.in b/api/hip/hip_hcc.def.in
new file mode 100644
index 0000000000..6b4793ea94
--- /dev/null
+++ b/api/hip/hip_hcc.def.in
@@ -0,0 +1,129 @@
+EXPORTS
+hipChooseDevice
+hipCtxCreate
+hipCtxDestroy
+hipCtxDisablePeerAccess
+hipCtxEnablePeerAccess
+hipCtxGetApiVersion
+hipCtxGetCacheConfig
+hipCtxGetCurrent
+hipCtxGetDevice
+hipCtxGetFlags
+hipCtxGetSharedMemConfig
+hipCtxPopCurrent
+hipCtxPushCurrent
+hipCtxSetCacheConfig
+hipCtxSetCurrent
+hipCtxSetSharedMemConfig
+hipCtxSynchronize
+hipDeviceCanAccessPeer
+hipDeviceComputeCapability
+hipDeviceDisablePeerAccess
+hipDeviceEnablePeerAccess
+hipDeviceGet
+hipDeviceGetAttribute
+hipDeviceGetByPCIBusId
+hipDeviceGetCacheConfig
+hipDeviceGetLimit
+hipDeviceGetName
+hipDeviceGetPCIBusId
+hipDeviceGetSharedMemConfig
+hipDevicePrimaryCtxGetState
+hipDevicePrimaryCtxRelease
+hipDevicePrimaryCtxReset
+hipDevicePrimaryCtxRetain
+hipDevicePrimaryCtxSetFlags
+hipDeviceReset
+hipDeviceSetCacheConfig
+hipDeviceSetSharedMemConfig
+hipDeviceSynchronize
+hipDeviceTotalMem
+hipDriverGetVersion
+hipEventCreate
+hipEventCreateWithFlags
+hipEventDestroy
+hipEventElapsedTime
+hipEventQuery
+hipEventRecord
+hipEventSynchronize
+hipFree
+hipFreeArray
+hipFreeHost
+hipFuncSetCacheConfig
+hipGetDevice
+hipGetDeviceCount
+hipGetDeviceProperties
+hipGetErrorName
+hipGetErrorString
+hipGetLastError
+hipHostAlloc
+hipHostFree
+hipHostGetDevicePointer
+hipHostGetFlags
+hipHostMalloc
+hipHostRegister
+hipHostUnregister
+hipInit
+hipIpcCloseMemHandle
+hipIpcGetMemHandle
+hipIpcOpenMemHandle
+hipMalloc
+hipMalloc3DArray
+hipMallocArray
+hipMallocHost
+hipMallocPitch
+hipMemcpy
+hipMemcpy2D
+hipMemcpy2DAsync
+hipMemcpy2DToArray
+hipMemcpy3D
+hipMemcpyAsync
+hipMemcpyDtoD
+hipMemcpyDtoDAsync
+hipMemcpyDtoH
+hipMemcpyDtoHAsync
+hipMemcpyFromSymbol
+hipMemcpyFromSymbolAsync
+hipMemcpyHtoD
+hipMemcpyHtoDAsync
+hipMemcpyPeer
+hipMemcpyPeerAsync
+hipMemcpyToArray
+hipMemcpyToSymbol
+hipMemcpyToSymbolAsync
+hipMemGetAddressRange
+hipMemGetInfo
+hipMemPtrGetInfo
+hipMemset
+hipMemset2D
+hipMemsetAsync
+hipMemsetD8
+hipModuleGetFunction
+hipModuleGetGlobal
+hipModuleLaunchKernel
+hipModuleLoad
+hipModuleLoadData
+hipModuleLoadDataEx
+hipModuleUnload
+hipPeekAtLastError
+hipPointerGetAttributes
+hipProfilerStart
+hipProfilerStop
+hipRuntimeGetVersion
+hipSetDevice
+hipSetDeviceFlags
+hipStreamAddCallback
+hipStreamCreate
+hipStreamCreateWithFlags
+hipStreamDestroy
+hipStreamGetFlags
+hipStreamQuery
+hipStreamSynchronize
+hipStreamWaitEvent
+__cudaRegisterFatBinary
+__cudaRegisterFunction
+__cudaRegisterVariable
+__cudaUnregisterFatBinary
+cudaConfigureCall
+cudaSetupArgument
+cudaLaunch
diff --git a/api/hip/hip_hcc.map.in b/api/hip/hip_hcc.map.in
new file mode 100644
index 0000000000..e4025606bc
--- /dev/null
+++ b/api/hip/hip_hcc.map.in
@@ -0,0 +1,133 @@
+HIP_1.0 {
+global:
+ hipChooseDevice;
+ hipCtxCreate;
+ hipCtxDestroy;
+ hipCtxDisablePeerAccess;
+ hipCtxEnablePeerAccess;
+ hipCtxGetApiVersion;
+ hipCtxGetCacheConfig;
+ hipCtxGetCurrent;
+ hipCtxGetDevice;
+ hipCtxGetFlags;
+ hipCtxGetSharedMemConfig;
+ hipCtxPopCurrent;
+ hipCtxPushCurrent;
+ hipCtxSetCacheConfig;
+ hipCtxSetCurrent;
+ hipCtxSetSharedMemConfig;
+ hipCtxSynchronize;
+ hipDeviceCanAccessPeer;
+ hipDeviceComputeCapability;
+ hipDeviceDisablePeerAccess;
+ hipDeviceEnablePeerAccess;
+ hipDeviceGet;
+ hipDeviceGetAttribute;
+ hipDeviceGetByPCIBusId;
+ hipDeviceGetCacheConfig;
+ hipDeviceGetLimit;
+ hipDeviceGetName;
+ hipDeviceGetPCIBusId;
+ hipDeviceGetSharedMemConfig;
+ hipDevicePrimaryCtxGetState;
+ hipDevicePrimaryCtxRelease;
+ hipDevicePrimaryCtxReset;
+ hipDevicePrimaryCtxRetain;
+ hipDevicePrimaryCtxSetFlags;
+ hipDeviceReset;
+ hipDeviceSetCacheConfig;
+ hipDeviceSetSharedMemConfig;
+ hipDeviceSynchronize;
+ hipDeviceTotalMem;
+ hipDriverGetVersion;
+ hipEventCreate;
+ hipEventCreateWithFlags;
+ hipEventDestroy;
+ hipEventElapsedTime;
+ hipEventQuery;
+ hipEventRecord;
+ hipEventSynchronize;
+ hipFree;
+ hipFreeArray;
+ hipFreeHost;
+ hipFuncSetCacheConfig;
+ hipGetDevice;
+ hipGetDeviceCount;
+ hipGetDeviceProperties;
+ hipGetErrorName;
+ hipGetErrorString;
+ hipGetLastError;
+ hipHostAlloc;
+ hipHostFree;
+ hipHostGetDevicePointer;
+ hipHostGetFlags;
+ hipHostMalloc;
+ hipHostRegister;
+ hipHostUnregister;
+ hipInit;
+ hipIpcCloseMemHandle;
+ hipIpcGetMemHandle;
+ hipIpcOpenMemHandle;
+ hipMalloc;
+ hipMalloc3DArray;
+ hipMallocArray;
+ hipMallocHost;
+ hipMallocPitch;
+ hipMemcpy;
+ hipMemcpy2D;
+ hipMemcpy2DAsync;
+ hipMemcpy2DToArray;
+ hipMemcpy3D;
+ hipMemcpyAsync;
+ hipMemcpyDtoD;
+ hipMemcpyDtoDAsync;
+ hipMemcpyDtoH;
+ hipMemcpyDtoHAsync;
+ hipMemcpyFromSymbol;
+ hipMemcpyFromSymbolAsync;
+ hipMemcpyHtoD;
+ hipMemcpyHtoDAsync;
+ hipMemcpyPeer;
+ hipMemcpyPeerAsync;
+ hipMemcpyToArray;
+ hipMemcpyToSymbol;
+ hipMemcpyToSymbolAsync;
+ hipMemGetAddressRange;
+ hipMemGetInfo;
+ hipMemPtrGetInfo;
+ hipMemset;
+ hipMemset2D;
+ hipMemsetAsync;
+ hipMemsetD8;
+ hipModuleGetFunction;
+ hipModuleGetGlobal;
+ hipModuleLaunchKernel;
+ hipModuleLoad;
+ hipModuleLoadData;
+ hipModuleLoadDataEx;
+ hipModuleUnload;
+ hipPeekAtLastError;
+ hipPointerGetAttributes;
+ hipProfilerStart;
+ hipProfilerStop;
+ hipRuntimeGetVersion;
+ hipSetDevice;
+ hipSetDeviceFlags;
+ hipStreamAddCallback;
+ hipStreamCreate;
+ hipStreamCreateWithFlags;
+ hipStreamDestroy;
+ hipStreamGetFlags;
+ hipStreamQuery;
+ hipStreamSynchronize;
+ hipStreamWaitEvent;
+ __cudaRegisterFatBinary;
+ __cudaRegisterFunction;
+ __cudaRegisterVariable;
+ __cudaUnregisterFatBinary;
+ cudaConfigureCall;
+ cudaSetupArgument;
+ cudaLaunch;
+local:
+ *;
+};
diff --git a/api/hip/hip_hcc.rc b/api/hip/hip_hcc.rc
new file mode 100644
index 0000000000..009dc30c18
--- /dev/null
+++ b/api/hip/hip_hcc.rc
@@ -0,0 +1,75 @@
+#define STR(__macro__) #__macro__
+#define XSTR(__macro__) STR(__macro__)
+
+#if defined(_DEBUG)
+#define DEBUG_ONLY(x) x
+#else
+#define DEBUG_ONLY(x)
+#endif
+
+#define VERSION_PREFIX_MAJOR 2
+#define VERSION_PREFIX_MINOR 0
+
+
+#define APSTUDIO_READONLY_SYMBOLS
+/////////////////////////////////////////////////////////////////////////////
+//
+// Generated from the TEXTINCLUDE 2 resource.
+//
+#include "winresrc.h"
+#include "utils/versions.hpp"
+
+/////////////////////////////////////////////////////////////////////////////
+#undef APSTUDIO_READONLY_SYMBOLS
+
+/////////////////////////////////////////////////////////////////////////////
+// English (U.S.) resources
+
+#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU)
+#ifdef _WIN32
+LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US
+#pragma code_page(1252)
+#endif //_WIN32
+
+
+/////////////////////////////////////////////////////////////////////////////
+//
+// Version
+//
+
+VS_VERSION_INFO VERSIONINFO
+ FILEVERSION 10,0,AMD_PLATFORM_BUILD_NUMBER,AMD_PLATFORM_REVISION_NUMBER
+ PRODUCTVERSION 10,0,AMD_PLATFORM_BUILD_NUMBER,AMD_PLATFORM_REVISION_NUMBER
+ FILEFLAGSMASK 0x3fL
+#ifdef _DEBUG
+ FILEFLAGS 0x1L
+#else
+ FILEFLAGS 0x0L
+#endif
+ FILEOS 0x40004L
+ FILETYPE 0x2L
+ FILESUBTYPE 0x0L
+BEGIN
+ BLOCK "StringFileInfo"
+ BEGIN
+ BLOCK "040904b0"
+ BEGIN
+ VALUE "Comments", " \0"
+ VALUE "CompanyName", "Advanced Micro Devices Inc.\0"
+ VALUE "FileDescription", AMD_PLATFORM_NAME " OpenCL " XSTR(VERSION_PREFIX_MAJOR) "." XSTR(VERSION_PREFIX_MINOR) " Runtime\0"
+ VALUE "FileVersion", "10.0." XSTR(AMD_PLATFORM_BUILD_NUMBER) "." XSTR(AMD_PLATFORM_REVISION_NUMBER)
+ VALUE "InternalName", "OpenCL"
+ VALUE "LegalCopyright", "Copyright (C) 2011 Advanced Micro Devices Inc.\0"
+ VALUE "OriginalFilename", "OpenCL.dll"
+ VALUE "ProductName", "OpenCL " XSTR(VERSION_PREFIX_MAJOR) "." XSTR(VERSION_PREFIX_MINOR) " " AMD_PLATFORM_INFO "\0"
+ VALUE "ProductVersion", "10.0." XSTR(AMD_PLATFORM_BUILD_NUMBER) "." XSTR(AMD_PLATFORM_REVISION_NUMBER)
+ END
+ END
+ BLOCK "VarFileInfo"
+ BEGIN
+ VALUE "Translation", 0x409, 1200
+ END
+END
+
+#endif // English (U.S.) resources
+/////////////////////////////////////////////////////////////////////////////
diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp
index 7e7b27c143..b1d906d870 100644
--- a/api/hip/hip_internal.hpp
+++ b/api/hip/hip_internal.hpp
@@ -36,8 +36,6 @@ THE SOFTWARE.
#define HIP_INIT_API(...) \
HIP_INIT()
-extern cl_device_id* g_deviceArray;
-extern unsigned g_deviceCnt;
-extern thread_local cl_context g_currentCtx;
+extern amd::Context* g_context;
#endif // HIP_SRC_HIP_INTERNAL_H
diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp
index 6edbe068fe..2dba003ba6 100644
--- a/api/hip/hip_memory.cpp
+++ b/api/hip/hip_memory.cpp
@@ -26,97 +26,94 @@ THE SOFTWARE.
hipError_t hipMalloc(void** ptr, size_t sizeBytes)
{
- HIP_INIT_API(ptr, sizeBytes);
-
- amd::Context* context = as_amd(g_currentCtx);
-
- if (sizeBytes == 0) {
- *ptr = nullptr;
- return hipSuccess;
- }
- else if (!is_valid(context) || !ptr) {
- return hipErrorInvalidValue;
- }
-
- auto deviceHandle = as_amd(g_deviceArray[0]);
- if ((deviceHandle->info().maxMemAllocSize_ < size)) {
- return hipErrorOutOfMemory;
- }
-
- amd::Memory* mem = new (*context) amd::Buffer(*context, 0, sizeBytes);
- if (!mem) {
- return hipErrorOutOfMemory;
- }
-
- if (!mem->create(nullptr)) {
- return hipErrorMemoryAllocation;
- }
-
- *ptr = reinterpret_cast(as_cl(mem));
+ HIP_INIT_API(ptr, sizeBytes);
+ if (sizeBytes == 0) {
+ *ptr = nullptr;
return hipSuccess;
+ }
+ else if (!ptr) {
+ return hipErrorInvalidValue;
+ }
+
+ if (g_context->devices()[0]->info().maxMemAllocSize_ < sizeBytes) {
+ return hipErrorOutOfMemory;
+ }
+
+ amd::Memory* mem = new (*g_context) amd::Buffer(*g_context, 0, sizeBytes);
+ if (!mem) {
+ return hipErrorOutOfMemory;
+ }
+
+ if (!mem->create(nullptr)) {
+ return hipErrorMemoryAllocation;
+ }
+
+ *ptr = reinterpret_cast(as_cl(mem));
+
+ return hipSuccess;
}
hipError_t hipFree(void* ptr)
{
- if (!is_valid(reinterpret_cast(ptr))) {
- return hipErrorInvalidValue;
- }
- as_amd(reinterpret_cast(ptr))->release();
- return hipSuccess;
+ if (!is_valid(reinterpret_cast(ptr))) {
+ return hipErrorInvalidValue;
+ }
+ as_amd(reinterpret_cast(ptr))->release();
+ return hipSuccess;
}
hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind)
{
- HIP_INIT_API(dst, src, sizeBytes, kind);
+ HIP_INIT_API(dst, src, sizeBytes, kind);
- amd::Context* context = as_amd(g_currentCtx);
- amd::Device* device = context->devices()[0];
+ amd::Device* device = g_context->devices()[0];
- // FIXME : Do we create a queue here or create at init and just reuse
- amd::HostQueue* queue = new amd::HostQueue(*context, *device, 0,
- amd::CommandQueue::RealTimeDisabled,
- amd::CommandQueue::Priority::Normal);
- if (!queue) {
- return hipErrorOutOfMemory;
- }
+ amd::HostQueue* queue = new amd::HostQueue(*g_context, *device, 0,
+ amd::CommandQueue::RealTimeDisabled,
+ amd::CommandQueue::Priority::Normal);
+ if (!queue) {
+ return hipErrorOutOfMemory;
+ }
- amd::Buffer* srcBuffer = as_amd(reinterpret_cast(const_cast(src)))->asBuffer();
- amd::Buffer* dstBuffer = as_amd(reinterpret_cast(const_cast(dst)))->asBuffer();
+ amd::Buffer* srcBuffer = as_amd(reinterpret_cast(const_cast(src)))->asBuffer();
+ amd::Buffer* dstBuffer = as_amd(reinterpret_cast(dst))->asBuffer();
- amd::Command* command;
- amd::Command::EventWaitList waitList;
+ amd::Command* command;
+ amd::Command::EventWaitList waitList;
- switch (kind) {
- case hipMemcpyDeviceToHost:
+ switch (kind) {
+ case hipMemcpyDeviceToHost:
command = new amd::ReadMemoryCommand(*queue, CL_COMMAND_READ_BUFFER, waitList,
- srcBuffer, 0, sizeBytes, dst);
+ *srcBuffer, 0, sizeBytes, dst);
break;
- case hipMemcpyHostToDevice:
+ case hipMemcpyHostToDevice:
command = new amd::WriteMemoryCommand(*queue, CL_COMMAND_WRITE_BUFFER, waitList,
- dstBuffer, 0, sizeBytes, src);
+ *dstBuffer, 0, sizeBytes, src);
break;
- default:
- assert(!"Shouldn't reach here");
+ default:
+ assert(!"Shouldn't reach here");
break;
- }
- if (!command) {
- return hipErrorOutOfMemory;
- }
+ }
+ if (!command) {
+ return hipErrorOutOfMemory;
+ }
- // Make sure we have memory for the command execution
- if (CL_SUCCESS != command->validateMemory()) {
- delete command;
- return hipErrorMemoryAllocation;
- }
+// FIXME: virtualize MemoryCommand::validateMemory()
+#if 0
+ // Make sure we have memory for the command execution
+ if (CL_SUCCESS != command->validateMemory()) {
+ delete command;
+ return hipErrorMemoryAllocation;
+ }
+#endif
+ command->enqueue();
+ command->awaitCompletion();
+ command->release();
- command->enqueue();
- command->awaitCompletion();
- command->release();
+ queue->release();
- queue->release();
-
- return hipSuccess;
+ return hipSuccess;
}
diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp
new file mode 100644
index 0000000000..dde0c4e790
--- /dev/null
+++ b/api/hip/hip_module.cpp
@@ -0,0 +1,144 @@
+/*
+Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include
+#include
+
+#include "hip_internal.hpp"
+#include "platform/program.hpp"
+
+static uint64_t ElfSize(const void *emi)
+{
+ const Elf64_Ehdr *ehdr = (const Elf64_Ehdr*)emi;
+ const Elf64_Shdr *shdr = (const Elf64_Shdr*)((char*)emi + ehdr->e_shoff);
+
+ uint64_t max_offset = ehdr->e_shoff;
+ uint64_t total_size = max_offset + ehdr->e_shentsize * ehdr->e_shnum;
+
+ for (uint16_t i=0; i < ehdr->e_shnum; ++i){
+ uint64_t cur_offset = static_cast(shdr[i].sh_offset);
+ if (max_offset < cur_offset) {
+ max_offset = cur_offset;
+ total_size = max_offset;
+ if(SHT_NOBITS != shdr[i].sh_type) {
+ total_size += static_cast(shdr[i].sh_size);
+ }
+ }
+ }
+ return total_size;
+}
+
+hipError_t hipModuleLoadData(hipModule_t *module, const void *image)
+{
+ HIP_INIT_API(module, image);
+
+ amd::Program* program = new amd::Program(*g_context);
+ if (program == NULL) {
+ return hipErrorOutOfMemory;
+ }
+
+ if (CL_SUCCESS != program->addDeviceProgram(*g_context->devices()[0], image, ElfSize(image)) ||
+ CL_SUCCESS != program->build(g_context->devices(), nullptr, nullptr, nullptr)) {
+ return hipErrorUnknown;
+ }
+
+ *module = reinterpret_cast(as_cl(program));
+
+ return hipSuccess;
+}
+
+hipError_t hipModuleGetFunction(hipFunction_t *hfunc, hipModule_t hmod, const char *name)
+{
+ HIP_INIT_API(hfunc, hmod, name);
+
+ amd::Program* program = as_amd(reinterpret_cast(hmod));
+
+ const amd::Symbol* symbol = program->findSymbol(name);
+ if (!symbol) {
+ return hipErrorNotFound;
+ }
+
+ amd::Kernel* kernel = new amd::Kernel(*program, *symbol, name);
+ if (!kernel) {
+ return hipErrorOutOfMemory;
+ }
+
+ *hfunc = reinterpret_cast(as_cl(kernel));
+
+ return hipSuccess;
+}
+
+hipError_t hipModuleLaunchKernel(hipFunction_t f,
+ uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ,
+ uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ,
+ uint32_t sharedMemBytes, hipStream_t hStream,
+ void **kernelParams, void **extra)
+{
+ HIP_INIT_API(f, gridDimX, gridDimY, gridDimZ,
+ blockDimX, blockDimY, blockDimZ,
+ sharedMemBytes, hStream,
+ kernelParams, extra);
+
+ amd::Kernel* kernel = as_amd(reinterpret_cast(f));
+ amd::Device* device = g_context->devices()[0];
+
+ amd::HostQueue* queue = new amd::HostQueue(*g_context, *device, 0,
+ amd::CommandQueue::RealTimeDisabled,
+ amd::CommandQueue::Priority::Normal);
+ if (!queue) {
+ return hipErrorOutOfMemory;
+ }
+
+ size_t globalWorkOffset[3] = {0};
+ size_t globalWorkSize[3] = { gridDimX, gridDimY, gridDimZ };
+ size_t localWorkSize[3] = { blockDimX, blockDimY, blockDimZ };
+ amd::NDRangeContainer ndrange(3, globalWorkOffset, globalWorkSize, localWorkSize);
+ amd::Command::EventWaitList waitList;
+
+ assert(!kernelParams && extra && "check this code");
+ const amd::KernelSignature& signature = kernel->signature();
+ for (size_t i = 0; i < signature.numParameters(); ++i) {
+ const amd::KernelParameterDescriptor& desc = signature.at(i);
+ kernel->parameters().set(i, desc.size_, reinterpret_cast(extra[1]) + desc.offset_);
+ }
+
+ amd::NDRangeKernelCommand* command = new amd::NDRangeKernelCommand(*queue, waitList, *kernel, ndrange);
+ if (!command) {
+ return hipErrorOutOfMemory;
+ }
+
+ // Make sure we have memory for the command execution
+ if (CL_SUCCESS != command->validateMemory()) {
+ delete command;
+ return hipErrorMemoryAllocation;
+ }
+
+ command->enqueue();
+ command->awaitCompletion();
+ command->release();
+
+ queue->release();
+
+ return hipSuccess;
+}
+
+
diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp
new file mode 100644
index 0000000000..aed3342483
--- /dev/null
+++ b/api/hip/hip_platform.cpp
@@ -0,0 +1,193 @@
+/*
+Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include
+
+#include "hip_internal.hpp"
+#include "platform/program.hpp"
+#include "platform/runtime.hpp"
+
+constexpr unsigned __cudaFatMAGIC = 0x1ee55a01;
+constexpr unsigned __cudaFatMAGIC2 = 0x466243b1;
+constexpr unsigned __cudaFatMAGIC3 = 0xba55ed50;
+
+struct __CudaFatBinaryWrapper {
+ unsigned int magic;
+ unsigned int version;
+ void* binary;
+ void* dummy1;
+};
+
+struct __CudaFatBinaryHeader {
+ unsigned int magic;
+ unsigned short version;
+ unsigned short headerSize;
+ unsigned long long int fatSize;
+};
+
+struct __CudaPartHeader{
+ unsigned short type;
+ unsigned short dummy1;
+ unsigned int headerSize;
+ unsigned long long int partSize;
+ unsigned long long int dummy2;
+ unsigned int dummy3;
+ unsigned int subarch;
+};
+
+extern "C" hipModule_t __cudaRegisterFatBinary(void* bundle)
+{
+ if (!amd::Runtime::initialized()) { // FIXME: fix initialization
+ hipInit(0);
+ }
+
+ amd::Program* program = new amd::Program(*g_context);
+ if (!program) return nullptr;
+
+ struct __CudaFatBinaryWrapper* fbwrapper = (struct __CudaFatBinaryWrapper*)bundle;
+ if (fbwrapper->magic != __cudaFatMAGIC2 || fbwrapper->version != 1) {
+ return nullptr;
+ }
+ struct __CudaFatBinaryHeader* fbheader = (struct __CudaFatBinaryHeader*)fbwrapper->binary;
+ if (fbheader->magic != __cudaFatMAGIC3 || fbheader->version != 1) {
+ return nullptr;
+ }
+ struct __CudaPartHeader* pheader = (struct __CudaPartHeader*)(
+ (uintptr_t)fbheader + fbheader->headerSize);
+ struct __CudaPartHeader* end = (struct __CudaPartHeader*)(
+ (uintptr_t)pheader + fbheader->fatSize);
+
+ while (pheader < end) {
+ if (true/*pheader->subarch == match a device in the context*/) {
+ void *image = (void*)((uintptr_t)pheader + pheader->headerSize);
+ size_t size = pheader->partSize;
+ if (CL_SUCCESS != program->addDeviceProgram(*g_context->devices()[0], image, size) ||
+ CL_SUCCESS != program->build(g_context->devices(), nullptr, nullptr, nullptr)) {
+ return nullptr;
+ }
+ break;
+ }
+ pheader = (struct __CudaPartHeader*)(
+ (uintptr_t)pheader + pheader->headerSize + pheader->partSize);
+ }
+
+ return reinterpret_cast(as_cl(program));
+}
+
+std::map g_functions;
+
+
+extern "C" void __cudaRegisterFunction(
+ hipModule_t module,
+ const void* hostFunction,
+ char* deviceFunction,
+ const char* deviceName,
+ unsigned int threadLimit,
+ uint3* tid,
+ uint3* bid,
+ dim3* blockDim,
+ dim3* gridDim,
+ int* wSize)
+{
+ amd::Program* program = as_amd(reinterpret_cast(module));
+
+ const amd::Symbol* symbol = program->findSymbol(deviceName);
+ if (!symbol) return;
+
+ amd::Kernel* kernel = new amd::Kernel(*program, *symbol, deviceName);
+ if (!kernel) return;
+
+ // FIXME: not thread safe
+ g_functions.insert(std::make_pair(hostFunction, reinterpret_cast(as_cl(kernel))));
+}
+
+extern "C" void __cudaRegisterVar(
+ hipModule_t module,
+ char* hostVar,
+ char* deviceVar,
+ const char* deviceName,
+ int ext,
+ int size,
+ int constant,
+ int global)
+{
+}
+
+extern "C" void __cudaUnregisterFatBinary(
+ hipModule_t module
+)
+{
+}
+
+dim3 g_gridDim; // FIXME: place in execution stack
+dim3 g_blockDim; // FIXME: place in execution stack
+size_t g_sharedMem; // FIXME: place in execution stack
+hipStream_t g_stream; // FIXME: place in execution stack
+
+extern "C" hipError_t cudaConfigureCall(
+ dim3 gridDim,
+ dim3 blockDim,
+ size_t sharedMem,
+ hipStream_t stream)
+{
+ // FIXME: should push and new entry on the execution stack
+
+ g_gridDim = gridDim;
+ g_blockDim = blockDim;
+ g_sharedMem = sharedMem;
+ g_stream = stream;
+
+ return hipSuccess;
+}
+
+char* g_arguments[1024]; // FIXME: needs to grow
+
+extern "C" hipError_t cudaSetupArgument(
+ const void *arg,
+ size_t size,
+ size_t offset)
+{
+ // FIXME: should modify the top of the execution stack
+
+ ::memcpy(g_arguments + offset, arg, size);
+ return hipSuccess;
+}
+
+extern "C" hipError_t cudaLaunch(const void *hostFunction)
+{
+ std::map::iterator it;
+ if ((it = g_functions.find(hostFunction)) == g_functions.end())
+ return hipErrorUnknown;
+
+ // FIXME: should pop an entry from the execution stack
+
+ void *extra[] = {
+ HIP_LAUNCH_PARAM_BUFFER_POINTER, g_arguments,
+ HIP_LAUNCH_PARAM_BUFFER_SIZE, 0 /* FIXME: not needed, but should be correct*/,
+ HIP_LAUNCH_PARAM_END
+ };
+
+ return hipModuleLaunchKernel(it->second,
+ g_gridDim.x, g_gridDim.y, g_gridDim.z,
+ g_blockDim.x, g_blockDim.y, g_blockDim.z,
+ g_sharedMem, g_stream, nullptr, extra);
+}