From 73cb309e2bd65be2a056c6c8dcda9f31381c953a Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 1 Mar 2018 22:57:20 -0500 Subject: [PATCH] P4 to Git Change 1521675 by lmoriche@lmoriche_opencl_dev2 on 2018/03/01 22:50:06 SWDEV-145570 - [HIP] - Hip Rearchitecture - Add initial prototype implementation Affected files ... ... //depot/stg/opencl/drivers/opencl/api/Makefile#11 edit ... //depot/stg/opencl/drivers/opencl/api/hip/fixme.cpp#1 add ... //depot/stg/opencl/drivers/opencl/api/hip/hip_context.cpp#1 add ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_error.cpp#1 add ... //depot/stg/opencl/drivers/opencl/api/hip/hip_event.cpp#1 add ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.def.in#1 add ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.map.in#1 add ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.rc#1 add ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#2 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#3 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#1 add ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#1 add --- api/hip/fixme.cpp | 33 +++++++ api/hip/hip_context.cpp | 58 ++++++++++++ api/hip/hip_device.cpp | 140 +++++++++++++--------------- api/hip/hip_error.cpp | 48 ++++++++++ api/hip/hip_event.cpp | 67 ++++++++++++++ api/hip/hip_hcc.def.in | 129 ++++++++++++++++++++++++++ api/hip/hip_hcc.map.in | 133 +++++++++++++++++++++++++++ api/hip/hip_hcc.rc | 75 +++++++++++++++ api/hip/hip_internal.hpp | 4 +- api/hip/hip_memory.cpp | 133 +++++++++++++-------------- api/hip/hip_module.cpp | 144 +++++++++++++++++++++++++++++ api/hip/hip_platform.cpp | 193 +++++++++++++++++++++++++++++++++++++++ 12 files changed, 1010 insertions(+), 147 deletions(-) create mode 100644 api/hip/fixme.cpp create mode 100644 api/hip/hip_context.cpp create mode 100644 api/hip/hip_error.cpp create mode 100644 api/hip/hip_event.cpp create mode 100644 api/hip/hip_hcc.def.in create mode 100644 api/hip/hip_hcc.map.in create mode 100644 api/hip/hip_hcc.rc create mode 100644 api/hip/hip_module.cpp create mode 100644 api/hip/hip_platform.cpp diff --git a/api/hip/fixme.cpp b/api/hip/fixme.cpp new file mode 100644 index 0000000000..3d062e2dbc --- /dev/null +++ b/api/hip/fixme.cpp @@ -0,0 +1,33 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "cl_common.hpp" + +KHRicdVendorDispatch amd::ICDDispatchedObject::icdVendorDispatch_[] = {0}; +amd::PlatformIDS amd::PlatformID::Platform = {amd::ICDDispatchedObject::icdVendorDispatch_}; + +RUNTIME_ENTRY(cl_int, clGetDeviceIDs, + (cl_platform_id platform, cl_device_type device_type, cl_uint num_entries, + cl_device_id* devices, cl_uint* num_devices)) { + return CL_SUCCESS; +} +RUNTIME_EXIT diff --git a/api/hip/hip_context.cpp b/api/hip/hip_context.cpp new file mode 100644 index 0000000000..0e6ff2116a --- /dev/null +++ b/api/hip/hip_context.cpp @@ -0,0 +1,58 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "hip_internal.hpp" +#include "platform/runtime.hpp" + + +amd::Context* g_context = nullptr; + +hipError_t hipInit(unsigned int flags) +{ + HIP_INIT_API(flags); + + if (!amd::Runtime::initialized()) { + amd::Runtime::init(); + } + + // FIXME: move the global VDI context to hipInit. + g_context = new amd::Context( + amd::Device::getDevices(CL_DEVICE_TYPE_GPU, false), amd::Context::Info()); + if (!g_context) return hipErrorOutOfMemory; + + if (g_context && CL_SUCCESS != g_context->create(nullptr)) { + g_context->release(); + return hipErrorUnknown; + } + + return hipSuccess; +} + +hipError_t hipCtxCreate(hipCtx_t *ctx, unsigned int flags, hipDevice_t device) +{ + HIP_INIT_API(ctx, flags, device); + + return hipSuccess; +} + diff --git a/api/hip/hip_device.cpp b/api/hip/hip_device.cpp index 557ee56643..7296eabb17 100644 --- a/api/hip/hip_device.cpp +++ b/api/hip/hip_device.cpp @@ -24,9 +24,6 @@ THE SOFTWARE. #include "hip_internal.hpp" -cl_device_id* g_deviceArray = NULL; -unsigned g_deviceCnt = 0; - hipError_t hipGetDevice(int *deviceId) { HIP_INIT_API(deviceId); @@ -54,7 +51,7 @@ hipError_t hipDeviceGet(hipDevice_t *device, int deviceId) return hipSuccess; }; -hipError_t hipDeviceCount(int* count) { +hipError_t hipGetDeviceCount(int* count) { HIP_INIT_API(count); @@ -63,9 +60,7 @@ hipError_t hipDeviceCount(int* count) { } // Get all available devices - if (!amd::Device::getDeviceIDs(CL_DEVICE_TYPE_GPU, 0, NULL, count, false)) { - return hipErrorNoDevice; - } + *count = g_context->devices().size(); return hipSuccess; } @@ -74,93 +69,95 @@ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) HIP_INIT_API(pi, attr, device); - if(pi == NULL) { + if (pi == nullptr) { return hipErrorInvalidValue; } - auto deviceHandle = as_amd(g_deviceArray[hipDevice]); + //if (unsigned(device) >= g_context->devices().size()) { + // return hipErrorInvalidDevice; + //} + //auto* deviceHandle = g_context->devices()[device]; - if (deviceHandle == NULL) { - return hipErrorInvalidDevice; - } - - hipDeviceProp_t *prop = deviceHandle->_props; + //FIXME: should we cache the props, or just select from deviceHandle->info_? + hipDeviceProp_t prop = {0}; + hipError_t err = hipGetDeviceProperties(&prop, device); + if (err != hipSuccess) return err; switch (attr) { case hipDeviceAttributeMaxThreadsPerBlock: - *pi = prop->maxThreadsPerBlock; + *pi = prop.maxThreadsPerBlock; break; case hipDeviceAttributeMaxBlockDimX: - *pi = prop->maxThreadsDim[0]; + *pi = prop.maxThreadsDim[0]; break; case hipDeviceAttributeMaxBlockDimY: - *pi = prop->maxThreadsDim[1]; + *pi = prop.maxThreadsDim[1]; break; case hipDeviceAttributeMaxBlockDimZ: - *pi = prop->maxThreadsDim[2]; + *pi = prop.maxThreadsDim[2]; break; case hipDeviceAttributeMaxGridDimX: - *pi = prop->maxGridSize[0]; + *pi = prop.maxGridSize[0]; break; case hipDeviceAttributeMaxGridDimY: - *pi = prop->maxGridSize[1]; + *pi = prop.maxGridSize[1]; break; case hipDeviceAttributeMaxGridDimZ: - *pi = prop->maxGridSize[2]; + *pi = prop.maxGridSize[2]; break; case hipDeviceAttributeMaxSharedMemoryPerBlock: - *pi = prop->sharedMemPerBlock; + *pi = prop.sharedMemPerBlock; break; case hipDeviceAttributeTotalConstantMemory: - *pi = prop->totalConstMem; + *pi = prop.totalConstMem; break; case hipDeviceAttributeWarpSize: - *pi = prop->warpSize; + *pi = prop.warpSize; break; case hipDeviceAttributeMaxRegistersPerBlock: - *pi = prop->regsPerBlock; + *pi = prop.regsPerBlock; break; case hipDeviceAttributeClockRate: - *pi = prop->clockRate; + *pi = prop.clockRate; break; case hipDeviceAttributeMemoryClockRate: - *pi = prop->memoryClockRate; + *pi = prop.memoryClockRate; break; case hipDeviceAttributeMemoryBusWidth: - *pi = prop->memoryBusWidth; + *pi = prop.memoryBusWidth; break; case hipDeviceAttributeMultiprocessorCount: - *pi = prop->multiProcessorCount; + *pi = prop.multiProcessorCount; break; case hipDeviceAttributeComputeMode: - *pi = prop->computeMode; + *pi = prop.computeMode; break; case hipDeviceAttributeL2CacheSize: - *pi = prop->l2CacheSize; + *pi = prop.l2CacheSize; break; case hipDeviceAttributeMaxThreadsPerMultiProcessor: - *pi = prop->maxThreadsPerMultiProcessor; + *pi = prop.maxThreadsPerMultiProcessor; break; case hipDeviceAttributeComputeCapabilityMajor: - *pi = prop->major; + *pi = prop.major; break; case hipDeviceAttributeComputeCapabilityMinor: - *pi = prop->minor; + *pi = prop.minor; break; case hipDeviceAttributePciBusId: - *pi = prop->pciBusID; + *pi = prop.pciBusID; break; case hipDeviceAttributeConcurrentKernels: - *pi = prop->concurrentKernels; + *pi = prop.concurrentKernels; break; case hipDeviceAttributePciDeviceId: - *pi = prop->pciDeviceID; + *pi = prop.pciDeviceID; break; case hipDeviceAttributeMaxSharedMemoryPerMultiprocessor: - *pi = prop->maxSharedMemoryPerMultiProcessor; + *pi = prop.maxSharedMemoryPerMultiProcessor; break; case hipDeviceAttributeIsMultiGpuBoard: - *pi = prop->isMultiGpuBoard; + *pi = prop.isMultiGpuBoard; break; default: return hipErrorInvalidValue; @@ -177,10 +174,10 @@ hipError_t hipGetDeviceProperties(hipDeviceProp_t* props, int device) { return hipErrorInvalidValue; } - auto deviceHandle = as_amd(g_deviceArray[device]); - if (deviceHandle == NULL) { + if (unsigned(device) >= g_context->devices().size()) { return hipErrorInvalidDevice; } + auto* deviceHandle = g_context->devices()[device]; hipDeviceProp_t deviceProps = {0}; @@ -226,11 +223,11 @@ hipError_t hipGetDeviceProperties(hipDeviceProp_t* props, int device) { deviceProps.arch.has3dGrid = 1; deviceProps.arch.hasDynamicParallelism = 0; deviceProps.concurrentKernels = 1; - deviceProps.pciDomainID = info.deviceTopology_.function; - deviceProps.pciBusID = info.deviceTopology_.bus; - deviceProps.pciDeviceID = info.deviceTopology_.device; + deviceProps.pciDomainID = info.deviceTopology_.pcie.function; + deviceProps.pciBusID = info.deviceTopology_.pcie.bus; + deviceProps.pciDeviceID = info.deviceTopology_.pcie.device; deviceProps.maxSharedMemoryPerMultiProcessor = info.localMemSizePerCU_; - deviceProps.isMultiGpuBoard = info.; + //deviceProps.isMultiGpuBoard = info.; deviceProps.canMapHostMemory = 1; deviceProps.gcnArch = info.gfxipVersion_; @@ -254,32 +251,16 @@ hipError_t hipDeviceGetCacheConfig(hipFuncCache_t *cacheConfig) { return hipErrorInvalidValue; } - *cacheConfig = 0; + *cacheConfig = hipFuncCache_t(); return hipSuccess; } -hipError_t hipGetDeviceProperties(hipDeviceProp_t* properties, int device) { - - HIP_INIT_API(properties, device); - if ((properties == NULL) || (device < 0) || (device >= g_deviceCnt)) { - return hipErrorInvalidDevice; - } - - auto * deviceHandle = as_amd(g_deviceArray[device]); - if (deviceHandle != NULL) { - *properties = deviceHandle->_props; - return hipSuccess; - } - - return hipErrorInvalidDevice; -} - hipError_t hipSetDeviceFlags(unsigned int flags) { HIP_INIT_API(flags); - assert(0 && "Unimplemented") + assert(0 && "Unimplemented"); return hipSuccess; }; @@ -288,7 +269,7 @@ hipError_t hipDeviceGetLimit (size_t *pValue, hipLimit_t limit) { HIP_INIT_API(pValue, limit); - assert(0 && "Unimplemented") + assert(0 && "Unimplemented"); return hipSuccess; } @@ -297,7 +278,7 @@ hipError_t hipFuncSetCacheConfig (const void* func, hipFuncCache_t cacheConfig) HIP_INIT_API(cacheConfig); - assert(0 && "Not supported") + assert(0 && "Not supported"); return hipSuccess; } @@ -306,7 +287,7 @@ hipError_t hipDeviceSetSharedMemConfig (hipSharedMemConfig config) { HIP_INIT_API(config); - assert(0 && "Not Supported") + assert(0 && "Not Supported"); return hipSuccess; } @@ -315,17 +296,17 @@ hipError_t hipDeviceGetSharedMemConfig (hipSharedMemConfig *pConfig) { HIP_INIT_API(pConfig); - assert(0 && "Not supported") + assert(0 && "Not supported"); return hipSuccess; } -hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* properties) { +hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* properties) { HIP_INIT_API(device, properties); - - assert(0 && "Unimplemented") + + assert(0 && "Unimplemented"); return hipSuccess; } @@ -335,7 +316,7 @@ hipError_t hipDeviceGetByPCIBusId (int* device, const char* pciBusId) { HIP_INIT_API(device,pciBusId); - assert(0 && "Unimplemented") + assert(0 && "Unimplemented"); return hipSuccess; } @@ -345,7 +326,7 @@ hipError_t hipDeviceTotalMem (size_t *bytes,hipDevice_t device) { HIP_INIT_API(bytes, device); - assert(0 && "Unimplemented") + assert(0 && "Unimplemented"); return hipSuccess; } @@ -353,7 +334,8 @@ hipError_t hipDeviceTotalMem (size_t *bytes,hipDevice_t device) { hipError_t hipDeviceComputeCapability(int *major, int *minor, hipDevice_t device) { HIP_INIT_API(major,minor, device); - assert(0 && "Unimplemented") + + assert(0 && "Unimplemented"); return hipSuccess; } @@ -362,7 +344,7 @@ hipError_t hipDeviceGetName(char *name,int len, hipDevice_t device) { HIP_INIT_API((void*)name,len, device); - assert(0 && "Unimplemented") + assert(0 && "Unimplemented"); return hipSuccess; } @@ -371,7 +353,13 @@ hipError_t hipDeviceGetPCIBusId (char *pciBusId,int len, int device) { HIP_INIT_API((void*)pciBusId, len, device); - assert(0 && "Unimplemented") + assert(0 && "Unimplemented"); return hipSuccess; -} \ No newline at end of file +} + +hipError_t hipDeviceSynchronize(void) +{ + // FIXME: should wait on all streams + return hipSuccess; +} diff --git a/api/hip/hip_error.cpp b/api/hip/hip_error.cpp new file mode 100644 index 0000000000..2a8785c375 --- /dev/null +++ b/api/hip/hip_error.cpp @@ -0,0 +1,48 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "hip_internal.hpp" + +hipError_t hipGetLastError() +{ + HIP_INIT_API(); + return hipErrorUnknown; +} + +hipError_t hipPeekAtLastError() +{ + HIP_INIT_API(); + return hipErrorUnknown; +} + +const char *hipGetErrorName(hipError_t hip_error) +{ + return ""; +} + +const char *hipGetErrorString(hipError_t hip_error) +{ + return ""; +} + diff --git a/api/hip/hip_event.cpp b/api/hip/hip_event.cpp new file mode 100644 index 0000000000..117b28355e --- /dev/null +++ b/api/hip/hip_event.cpp @@ -0,0 +1,67 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "hip_internal.hpp" + +hipError_t hipEventCreateWithFlags(hipEvent_t* event, unsigned flags) +{ + HIP_INIT_API(event, flags); + + return hipErrorUnknown; +} + +hipError_t hipEventCreate(hipEvent_t* event) +{ + HIP_INIT_API(event); + + return hipErrorUnknown; +} + +hipError_t hipEventDestroy(hipEvent_t event) +{ + HIP_INIT_API(event); + + return hipErrorUnknown; +} + +hipError_t hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop) +{ + HIP_INIT_API(ms, start, stop); + + return hipErrorUnknown; +} + +hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) +{ + HIP_INIT_API(event, stream); + + return hipErrorUnknown; +} + +hipError_t hipEventSynchronize(hipEvent_t event) +{ + HIP_INIT_API(event); + + return hipErrorUnknown; +} diff --git a/api/hip/hip_hcc.def.in b/api/hip/hip_hcc.def.in new file mode 100644 index 0000000000..6b4793ea94 --- /dev/null +++ b/api/hip/hip_hcc.def.in @@ -0,0 +1,129 @@ +EXPORTS +hipChooseDevice +hipCtxCreate +hipCtxDestroy +hipCtxDisablePeerAccess +hipCtxEnablePeerAccess +hipCtxGetApiVersion +hipCtxGetCacheConfig +hipCtxGetCurrent +hipCtxGetDevice +hipCtxGetFlags +hipCtxGetSharedMemConfig +hipCtxPopCurrent +hipCtxPushCurrent +hipCtxSetCacheConfig +hipCtxSetCurrent +hipCtxSetSharedMemConfig +hipCtxSynchronize +hipDeviceCanAccessPeer +hipDeviceComputeCapability +hipDeviceDisablePeerAccess +hipDeviceEnablePeerAccess +hipDeviceGet +hipDeviceGetAttribute +hipDeviceGetByPCIBusId +hipDeviceGetCacheConfig +hipDeviceGetLimit +hipDeviceGetName +hipDeviceGetPCIBusId +hipDeviceGetSharedMemConfig +hipDevicePrimaryCtxGetState +hipDevicePrimaryCtxRelease +hipDevicePrimaryCtxReset +hipDevicePrimaryCtxRetain +hipDevicePrimaryCtxSetFlags +hipDeviceReset +hipDeviceSetCacheConfig +hipDeviceSetSharedMemConfig +hipDeviceSynchronize +hipDeviceTotalMem +hipDriverGetVersion +hipEventCreate +hipEventCreateWithFlags +hipEventDestroy +hipEventElapsedTime +hipEventQuery +hipEventRecord +hipEventSynchronize +hipFree +hipFreeArray +hipFreeHost +hipFuncSetCacheConfig +hipGetDevice +hipGetDeviceCount +hipGetDeviceProperties +hipGetErrorName +hipGetErrorString +hipGetLastError +hipHostAlloc +hipHostFree +hipHostGetDevicePointer +hipHostGetFlags +hipHostMalloc +hipHostRegister +hipHostUnregister +hipInit +hipIpcCloseMemHandle +hipIpcGetMemHandle +hipIpcOpenMemHandle +hipMalloc +hipMalloc3DArray +hipMallocArray +hipMallocHost +hipMallocPitch +hipMemcpy +hipMemcpy2D +hipMemcpy2DAsync +hipMemcpy2DToArray +hipMemcpy3D +hipMemcpyAsync +hipMemcpyDtoD +hipMemcpyDtoDAsync +hipMemcpyDtoH +hipMemcpyDtoHAsync +hipMemcpyFromSymbol +hipMemcpyFromSymbolAsync +hipMemcpyHtoD +hipMemcpyHtoDAsync +hipMemcpyPeer +hipMemcpyPeerAsync +hipMemcpyToArray +hipMemcpyToSymbol +hipMemcpyToSymbolAsync +hipMemGetAddressRange +hipMemGetInfo +hipMemPtrGetInfo +hipMemset +hipMemset2D +hipMemsetAsync +hipMemsetD8 +hipModuleGetFunction +hipModuleGetGlobal +hipModuleLaunchKernel +hipModuleLoad +hipModuleLoadData +hipModuleLoadDataEx +hipModuleUnload +hipPeekAtLastError +hipPointerGetAttributes +hipProfilerStart +hipProfilerStop +hipRuntimeGetVersion +hipSetDevice +hipSetDeviceFlags +hipStreamAddCallback +hipStreamCreate +hipStreamCreateWithFlags +hipStreamDestroy +hipStreamGetFlags +hipStreamQuery +hipStreamSynchronize +hipStreamWaitEvent +__cudaRegisterFatBinary +__cudaRegisterFunction +__cudaRegisterVariable +__cudaUnregisterFatBinary +cudaConfigureCall +cudaSetupArgument +cudaLaunch diff --git a/api/hip/hip_hcc.map.in b/api/hip/hip_hcc.map.in new file mode 100644 index 0000000000..e4025606bc --- /dev/null +++ b/api/hip/hip_hcc.map.in @@ -0,0 +1,133 @@ +HIP_1.0 { +global: + hipChooseDevice; + hipCtxCreate; + hipCtxDestroy; + hipCtxDisablePeerAccess; + hipCtxEnablePeerAccess; + hipCtxGetApiVersion; + hipCtxGetCacheConfig; + hipCtxGetCurrent; + hipCtxGetDevice; + hipCtxGetFlags; + hipCtxGetSharedMemConfig; + hipCtxPopCurrent; + hipCtxPushCurrent; + hipCtxSetCacheConfig; + hipCtxSetCurrent; + hipCtxSetSharedMemConfig; + hipCtxSynchronize; + hipDeviceCanAccessPeer; + hipDeviceComputeCapability; + hipDeviceDisablePeerAccess; + hipDeviceEnablePeerAccess; + hipDeviceGet; + hipDeviceGetAttribute; + hipDeviceGetByPCIBusId; + hipDeviceGetCacheConfig; + hipDeviceGetLimit; + hipDeviceGetName; + hipDeviceGetPCIBusId; + hipDeviceGetSharedMemConfig; + hipDevicePrimaryCtxGetState; + hipDevicePrimaryCtxRelease; + hipDevicePrimaryCtxReset; + hipDevicePrimaryCtxRetain; + hipDevicePrimaryCtxSetFlags; + hipDeviceReset; + hipDeviceSetCacheConfig; + hipDeviceSetSharedMemConfig; + hipDeviceSynchronize; + hipDeviceTotalMem; + hipDriverGetVersion; + hipEventCreate; + hipEventCreateWithFlags; + hipEventDestroy; + hipEventElapsedTime; + hipEventQuery; + hipEventRecord; + hipEventSynchronize; + hipFree; + hipFreeArray; + hipFreeHost; + hipFuncSetCacheConfig; + hipGetDevice; + hipGetDeviceCount; + hipGetDeviceProperties; + hipGetErrorName; + hipGetErrorString; + hipGetLastError; + hipHostAlloc; + hipHostFree; + hipHostGetDevicePointer; + hipHostGetFlags; + hipHostMalloc; + hipHostRegister; + hipHostUnregister; + hipInit; + hipIpcCloseMemHandle; + hipIpcGetMemHandle; + hipIpcOpenMemHandle; + hipMalloc; + hipMalloc3DArray; + hipMallocArray; + hipMallocHost; + hipMallocPitch; + hipMemcpy; + hipMemcpy2D; + hipMemcpy2DAsync; + hipMemcpy2DToArray; + hipMemcpy3D; + hipMemcpyAsync; + hipMemcpyDtoD; + hipMemcpyDtoDAsync; + hipMemcpyDtoH; + hipMemcpyDtoHAsync; + hipMemcpyFromSymbol; + hipMemcpyFromSymbolAsync; + hipMemcpyHtoD; + hipMemcpyHtoDAsync; + hipMemcpyPeer; + hipMemcpyPeerAsync; + hipMemcpyToArray; + hipMemcpyToSymbol; + hipMemcpyToSymbolAsync; + hipMemGetAddressRange; + hipMemGetInfo; + hipMemPtrGetInfo; + hipMemset; + hipMemset2D; + hipMemsetAsync; + hipMemsetD8; + hipModuleGetFunction; + hipModuleGetGlobal; + hipModuleLaunchKernel; + hipModuleLoad; + hipModuleLoadData; + hipModuleLoadDataEx; + hipModuleUnload; + hipPeekAtLastError; + hipPointerGetAttributes; + hipProfilerStart; + hipProfilerStop; + hipRuntimeGetVersion; + hipSetDevice; + hipSetDeviceFlags; + hipStreamAddCallback; + hipStreamCreate; + hipStreamCreateWithFlags; + hipStreamDestroy; + hipStreamGetFlags; + hipStreamQuery; + hipStreamSynchronize; + hipStreamWaitEvent; + __cudaRegisterFatBinary; + __cudaRegisterFunction; + __cudaRegisterVariable; + __cudaUnregisterFatBinary; + cudaConfigureCall; + cudaSetupArgument; + cudaLaunch; +local: + *; +}; diff --git a/api/hip/hip_hcc.rc b/api/hip/hip_hcc.rc new file mode 100644 index 0000000000..009dc30c18 --- /dev/null +++ b/api/hip/hip_hcc.rc @@ -0,0 +1,75 @@ +#define STR(__macro__) #__macro__ +#define XSTR(__macro__) STR(__macro__) + +#if defined(_DEBUG) +#define DEBUG_ONLY(x) x +#else +#define DEBUG_ONLY(x) +#endif + +#define VERSION_PREFIX_MAJOR 2 +#define VERSION_PREFIX_MINOR 0 + + +#define APSTUDIO_READONLY_SYMBOLS +///////////////////////////////////////////////////////////////////////////// +// +// Generated from the TEXTINCLUDE 2 resource. +// +#include "winresrc.h" +#include "utils/versions.hpp" + +///////////////////////////////////////////////////////////////////////////// +#undef APSTUDIO_READONLY_SYMBOLS + +///////////////////////////////////////////////////////////////////////////// +// English (U.S.) resources + +#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU) +#ifdef _WIN32 +LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US +#pragma code_page(1252) +#endif //_WIN32 + + +///////////////////////////////////////////////////////////////////////////// +// +// Version +// + +VS_VERSION_INFO VERSIONINFO + FILEVERSION 10,0,AMD_PLATFORM_BUILD_NUMBER,AMD_PLATFORM_REVISION_NUMBER + PRODUCTVERSION 10,0,AMD_PLATFORM_BUILD_NUMBER,AMD_PLATFORM_REVISION_NUMBER + FILEFLAGSMASK 0x3fL +#ifdef _DEBUG + FILEFLAGS 0x1L +#else + FILEFLAGS 0x0L +#endif + FILEOS 0x40004L + FILETYPE 0x2L + FILESUBTYPE 0x0L +BEGIN + BLOCK "StringFileInfo" + BEGIN + BLOCK "040904b0" + BEGIN + VALUE "Comments", " \0" + VALUE "CompanyName", "Advanced Micro Devices Inc.\0" + VALUE "FileDescription", AMD_PLATFORM_NAME " OpenCL " XSTR(VERSION_PREFIX_MAJOR) "." XSTR(VERSION_PREFIX_MINOR) " Runtime\0" + VALUE "FileVersion", "10.0." XSTR(AMD_PLATFORM_BUILD_NUMBER) "." XSTR(AMD_PLATFORM_REVISION_NUMBER) + VALUE "InternalName", "OpenCL" + VALUE "LegalCopyright", "Copyright (C) 2011 Advanced Micro Devices Inc.\0" + VALUE "OriginalFilename", "OpenCL.dll" + VALUE "ProductName", "OpenCL " XSTR(VERSION_PREFIX_MAJOR) "." XSTR(VERSION_PREFIX_MINOR) " " AMD_PLATFORM_INFO "\0" + VALUE "ProductVersion", "10.0." XSTR(AMD_PLATFORM_BUILD_NUMBER) "." XSTR(AMD_PLATFORM_REVISION_NUMBER) + END + END + BLOCK "VarFileInfo" + BEGIN + VALUE "Translation", 0x409, 1200 + END +END + +#endif // English (U.S.) resources +///////////////////////////////////////////////////////////////////////////// diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index 7e7b27c143..b1d906d870 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -36,8 +36,6 @@ THE SOFTWARE. #define HIP_INIT_API(...) \ HIP_INIT() -extern cl_device_id* g_deviceArray; -extern unsigned g_deviceCnt; -extern thread_local cl_context g_currentCtx; +extern amd::Context* g_context; #endif // HIP_SRC_HIP_INTERNAL_H diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 6edbe068fe..2dba003ba6 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -26,97 +26,94 @@ THE SOFTWARE. hipError_t hipMalloc(void** ptr, size_t sizeBytes) { - HIP_INIT_API(ptr, sizeBytes); - - amd::Context* context = as_amd(g_currentCtx); - - if (sizeBytes == 0) { - *ptr = nullptr; - return hipSuccess; - } - else if (!is_valid(context) || !ptr) { - return hipErrorInvalidValue; - } - - auto deviceHandle = as_amd(g_deviceArray[0]); - if ((deviceHandle->info().maxMemAllocSize_ < size)) { - return hipErrorOutOfMemory; - } - - amd::Memory* mem = new (*context) amd::Buffer(*context, 0, sizeBytes); - if (!mem) { - return hipErrorOutOfMemory; - } - - if (!mem->create(nullptr)) { - return hipErrorMemoryAllocation; - } - - *ptr = reinterpret_cast(as_cl(mem)); + HIP_INIT_API(ptr, sizeBytes); + if (sizeBytes == 0) { + *ptr = nullptr; return hipSuccess; + } + else if (!ptr) { + return hipErrorInvalidValue; + } + + if (g_context->devices()[0]->info().maxMemAllocSize_ < sizeBytes) { + return hipErrorOutOfMemory; + } + + amd::Memory* mem = new (*g_context) amd::Buffer(*g_context, 0, sizeBytes); + if (!mem) { + return hipErrorOutOfMemory; + } + + if (!mem->create(nullptr)) { + return hipErrorMemoryAllocation; + } + + *ptr = reinterpret_cast(as_cl(mem)); + + return hipSuccess; } hipError_t hipFree(void* ptr) { - if (!is_valid(reinterpret_cast(ptr))) { - return hipErrorInvalidValue; - } - as_amd(reinterpret_cast(ptr))->release(); - return hipSuccess; + if (!is_valid(reinterpret_cast(ptr))) { + return hipErrorInvalidValue; + } + as_amd(reinterpret_cast(ptr))->release(); + return hipSuccess; } hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind) { - HIP_INIT_API(dst, src, sizeBytes, kind); + HIP_INIT_API(dst, src, sizeBytes, kind); - amd::Context* context = as_amd(g_currentCtx); - amd::Device* device = context->devices()[0]; + amd::Device* device = g_context->devices()[0]; - // FIXME : Do we create a queue here or create at init and just reuse - amd::HostQueue* queue = new amd::HostQueue(*context, *device, 0, - amd::CommandQueue::RealTimeDisabled, - amd::CommandQueue::Priority::Normal); - if (!queue) { - return hipErrorOutOfMemory; - } + amd::HostQueue* queue = new amd::HostQueue(*g_context, *device, 0, + amd::CommandQueue::RealTimeDisabled, + amd::CommandQueue::Priority::Normal); + if (!queue) { + return hipErrorOutOfMemory; + } - amd::Buffer* srcBuffer = as_amd(reinterpret_cast(const_cast(src)))->asBuffer(); - amd::Buffer* dstBuffer = as_amd(reinterpret_cast(const_cast(dst)))->asBuffer(); + amd::Buffer* srcBuffer = as_amd(reinterpret_cast(const_cast(src)))->asBuffer(); + amd::Buffer* dstBuffer = as_amd(reinterpret_cast(dst))->asBuffer(); - amd::Command* command; - amd::Command::EventWaitList waitList; + amd::Command* command; + amd::Command::EventWaitList waitList; - switch (kind) { - case hipMemcpyDeviceToHost: + switch (kind) { + case hipMemcpyDeviceToHost: command = new amd::ReadMemoryCommand(*queue, CL_COMMAND_READ_BUFFER, waitList, - srcBuffer, 0, sizeBytes, dst); + *srcBuffer, 0, sizeBytes, dst); break; - case hipMemcpyHostToDevice: + case hipMemcpyHostToDevice: command = new amd::WriteMemoryCommand(*queue, CL_COMMAND_WRITE_BUFFER, waitList, - dstBuffer, 0, sizeBytes, src); + *dstBuffer, 0, sizeBytes, src); break; - default: - assert(!"Shouldn't reach here"); + default: + assert(!"Shouldn't reach here"); break; - } - if (!command) { - return hipErrorOutOfMemory; - } + } + if (!command) { + return hipErrorOutOfMemory; + } - // Make sure we have memory for the command execution - if (CL_SUCCESS != command->validateMemory()) { - delete command; - return hipErrorMemoryAllocation; - } +// FIXME: virtualize MemoryCommand::validateMemory() +#if 0 + // Make sure we have memory for the command execution + if (CL_SUCCESS != command->validateMemory()) { + delete command; + return hipErrorMemoryAllocation; + } +#endif + command->enqueue(); + command->awaitCompletion(); + command->release(); - command->enqueue(); - command->awaitCompletion(); - command->release(); + queue->release(); - queue->release(); - - return hipSuccess; + return hipSuccess; } diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp new file mode 100644 index 0000000000..dde0c4e790 --- /dev/null +++ b/api/hip/hip_module.cpp @@ -0,0 +1,144 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include + +#include "hip_internal.hpp" +#include "platform/program.hpp" + +static uint64_t ElfSize(const void *emi) +{ + const Elf64_Ehdr *ehdr = (const Elf64_Ehdr*)emi; + const Elf64_Shdr *shdr = (const Elf64_Shdr*)((char*)emi + ehdr->e_shoff); + + uint64_t max_offset = ehdr->e_shoff; + uint64_t total_size = max_offset + ehdr->e_shentsize * ehdr->e_shnum; + + for (uint16_t i=0; i < ehdr->e_shnum; ++i){ + uint64_t cur_offset = static_cast(shdr[i].sh_offset); + if (max_offset < cur_offset) { + max_offset = cur_offset; + total_size = max_offset; + if(SHT_NOBITS != shdr[i].sh_type) { + total_size += static_cast(shdr[i].sh_size); + } + } + } + return total_size; +} + +hipError_t hipModuleLoadData(hipModule_t *module, const void *image) +{ + HIP_INIT_API(module, image); + + amd::Program* program = new amd::Program(*g_context); + if (program == NULL) { + return hipErrorOutOfMemory; + } + + if (CL_SUCCESS != program->addDeviceProgram(*g_context->devices()[0], image, ElfSize(image)) || + CL_SUCCESS != program->build(g_context->devices(), nullptr, nullptr, nullptr)) { + return hipErrorUnknown; + } + + *module = reinterpret_cast(as_cl(program)); + + return hipSuccess; +} + +hipError_t hipModuleGetFunction(hipFunction_t *hfunc, hipModule_t hmod, const char *name) +{ + HIP_INIT_API(hfunc, hmod, name); + + amd::Program* program = as_amd(reinterpret_cast(hmod)); + + const amd::Symbol* symbol = program->findSymbol(name); + if (!symbol) { + return hipErrorNotFound; + } + + amd::Kernel* kernel = new amd::Kernel(*program, *symbol, name); + if (!kernel) { + return hipErrorOutOfMemory; + } + + *hfunc = reinterpret_cast(as_cl(kernel)); + + return hipSuccess; +} + +hipError_t hipModuleLaunchKernel(hipFunction_t f, + uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ, + uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ, + uint32_t sharedMemBytes, hipStream_t hStream, + void **kernelParams, void **extra) +{ + HIP_INIT_API(f, gridDimX, gridDimY, gridDimZ, + blockDimX, blockDimY, blockDimZ, + sharedMemBytes, hStream, + kernelParams, extra); + + amd::Kernel* kernel = as_amd(reinterpret_cast(f)); + amd::Device* device = g_context->devices()[0]; + + amd::HostQueue* queue = new amd::HostQueue(*g_context, *device, 0, + amd::CommandQueue::RealTimeDisabled, + amd::CommandQueue::Priority::Normal); + if (!queue) { + return hipErrorOutOfMemory; + } + + size_t globalWorkOffset[3] = {0}; + size_t globalWorkSize[3] = { gridDimX, gridDimY, gridDimZ }; + size_t localWorkSize[3] = { blockDimX, blockDimY, blockDimZ }; + amd::NDRangeContainer ndrange(3, globalWorkOffset, globalWorkSize, localWorkSize); + amd::Command::EventWaitList waitList; + + assert(!kernelParams && extra && "check this code"); + const amd::KernelSignature& signature = kernel->signature(); + for (size_t i = 0; i < signature.numParameters(); ++i) { + const amd::KernelParameterDescriptor& desc = signature.at(i); + kernel->parameters().set(i, desc.size_, reinterpret_cast
(extra[1]) + desc.offset_); + } + + amd::NDRangeKernelCommand* command = new amd::NDRangeKernelCommand(*queue, waitList, *kernel, ndrange); + if (!command) { + return hipErrorOutOfMemory; + } + + // Make sure we have memory for the command execution + if (CL_SUCCESS != command->validateMemory()) { + delete command; + return hipErrorMemoryAllocation; + } + + command->enqueue(); + command->awaitCompletion(); + command->release(); + + queue->release(); + + return hipSuccess; +} + + diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp new file mode 100644 index 0000000000..aed3342483 --- /dev/null +++ b/api/hip/hip_platform.cpp @@ -0,0 +1,193 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "hip_internal.hpp" +#include "platform/program.hpp" +#include "platform/runtime.hpp" + +constexpr unsigned __cudaFatMAGIC = 0x1ee55a01; +constexpr unsigned __cudaFatMAGIC2 = 0x466243b1; +constexpr unsigned __cudaFatMAGIC3 = 0xba55ed50; + +struct __CudaFatBinaryWrapper { + unsigned int magic; + unsigned int version; + void* binary; + void* dummy1; +}; + +struct __CudaFatBinaryHeader { + unsigned int magic; + unsigned short version; + unsigned short headerSize; + unsigned long long int fatSize; +}; + +struct __CudaPartHeader{ + unsigned short type; + unsigned short dummy1; + unsigned int headerSize; + unsigned long long int partSize; + unsigned long long int dummy2; + unsigned int dummy3; + unsigned int subarch; +}; + +extern "C" hipModule_t __cudaRegisterFatBinary(void* bundle) +{ + if (!amd::Runtime::initialized()) { // FIXME: fix initialization + hipInit(0); + } + + amd::Program* program = new amd::Program(*g_context); + if (!program) return nullptr; + + struct __CudaFatBinaryWrapper* fbwrapper = (struct __CudaFatBinaryWrapper*)bundle; + if (fbwrapper->magic != __cudaFatMAGIC2 || fbwrapper->version != 1) { + return nullptr; + } + struct __CudaFatBinaryHeader* fbheader = (struct __CudaFatBinaryHeader*)fbwrapper->binary; + if (fbheader->magic != __cudaFatMAGIC3 || fbheader->version != 1) { + return nullptr; + } + struct __CudaPartHeader* pheader = (struct __CudaPartHeader*)( + (uintptr_t)fbheader + fbheader->headerSize); + struct __CudaPartHeader* end = (struct __CudaPartHeader*)( + (uintptr_t)pheader + fbheader->fatSize); + + while (pheader < end) { + if (true/*pheader->subarch == match a device in the context*/) { + void *image = (void*)((uintptr_t)pheader + pheader->headerSize); + size_t size = pheader->partSize; + if (CL_SUCCESS != program->addDeviceProgram(*g_context->devices()[0], image, size) || + CL_SUCCESS != program->build(g_context->devices(), nullptr, nullptr, nullptr)) { + return nullptr; + } + break; + } + pheader = (struct __CudaPartHeader*)( + (uintptr_t)pheader + pheader->headerSize + pheader->partSize); + } + + return reinterpret_cast(as_cl(program)); +} + +std::map g_functions; + + +extern "C" void __cudaRegisterFunction( + hipModule_t module, + const void* hostFunction, + char* deviceFunction, + const char* deviceName, + unsigned int threadLimit, + uint3* tid, + uint3* bid, + dim3* blockDim, + dim3* gridDim, + int* wSize) +{ + amd::Program* program = as_amd(reinterpret_cast(module)); + + const amd::Symbol* symbol = program->findSymbol(deviceName); + if (!symbol) return; + + amd::Kernel* kernel = new amd::Kernel(*program, *symbol, deviceName); + if (!kernel) return; + + // FIXME: not thread safe + g_functions.insert(std::make_pair(hostFunction, reinterpret_cast(as_cl(kernel)))); +} + +extern "C" void __cudaRegisterVar( + hipModule_t module, + char* hostVar, + char* deviceVar, + const char* deviceName, + int ext, + int size, + int constant, + int global) +{ +} + +extern "C" void __cudaUnregisterFatBinary( + hipModule_t module +) +{ +} + +dim3 g_gridDim; // FIXME: place in execution stack +dim3 g_blockDim; // FIXME: place in execution stack +size_t g_sharedMem; // FIXME: place in execution stack +hipStream_t g_stream; // FIXME: place in execution stack + +extern "C" hipError_t cudaConfigureCall( + dim3 gridDim, + dim3 blockDim, + size_t sharedMem, + hipStream_t stream) +{ + // FIXME: should push and new entry on the execution stack + + g_gridDim = gridDim; + g_blockDim = blockDim; + g_sharedMem = sharedMem; + g_stream = stream; + + return hipSuccess; +} + +char* g_arguments[1024]; // FIXME: needs to grow + +extern "C" hipError_t cudaSetupArgument( + const void *arg, + size_t size, + size_t offset) +{ + // FIXME: should modify the top of the execution stack + + ::memcpy(g_arguments + offset, arg, size); + return hipSuccess; +} + +extern "C" hipError_t cudaLaunch(const void *hostFunction) +{ + std::map::iterator it; + if ((it = g_functions.find(hostFunction)) == g_functions.end()) + return hipErrorUnknown; + + // FIXME: should pop an entry from the execution stack + + void *extra[] = { + HIP_LAUNCH_PARAM_BUFFER_POINTER, g_arguments, + HIP_LAUNCH_PARAM_BUFFER_SIZE, 0 /* FIXME: not needed, but should be correct*/, + HIP_LAUNCH_PARAM_END + }; + + return hipModuleLaunchKernel(it->second, + g_gridDim.x, g_gridDim.y, g_gridDim.z, + g_blockDim.x, g_blockDim.y, g_blockDim.z, + g_sharedMem, g_stream, nullptr, extra); +}