P4 to Git Change 1521675 by lmoriche@lmoriche_opencl_dev2 on 2018/03/01 22:50:06
SWDEV-145570 - [HIP] - Hip Rearchitecture - Add initial prototype implementation Affected files ... ... //depot/stg/opencl/drivers/opencl/api/Makefile#11 edit ... //depot/stg/opencl/drivers/opencl/api/hip/fixme.cpp#1 add ... //depot/stg/opencl/drivers/opencl/api/hip/hip_context.cpp#1 add ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_error.cpp#1 add ... //depot/stg/opencl/drivers/opencl/api/hip/hip_event.cpp#1 add ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.def.in#1 add ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.map.in#1 add ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.rc#1 add ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#2 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#3 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#1 add ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#1 add
This commit is contained in:
@@ -0,0 +1,33 @@
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "cl_common.hpp"
|
||||
|
||||
KHRicdVendorDispatch amd::ICDDispatchedObject::icdVendorDispatch_[] = {0};
|
||||
amd::PlatformIDS amd::PlatformID::Platform = {amd::ICDDispatchedObject::icdVendorDispatch_};
|
||||
|
||||
RUNTIME_ENTRY(cl_int, clGetDeviceIDs,
|
||||
(cl_platform_id platform, cl_device_type device_type, cl_uint num_entries,
|
||||
cl_device_id* devices, cl_uint* num_devices)) {
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
RUNTIME_EXIT
|
||||
@@ -0,0 +1,58 @@
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
#include "hip_internal.hpp"
|
||||
#include "platform/runtime.hpp"
|
||||
|
||||
|
||||
amd::Context* g_context = nullptr;
|
||||
|
||||
hipError_t hipInit(unsigned int flags)
|
||||
{
|
||||
HIP_INIT_API(flags);
|
||||
|
||||
if (!amd::Runtime::initialized()) {
|
||||
amd::Runtime::init();
|
||||
}
|
||||
|
||||
// FIXME: move the global VDI context to hipInit.
|
||||
g_context = new amd::Context(
|
||||
amd::Device::getDevices(CL_DEVICE_TYPE_GPU, false), amd::Context::Info());
|
||||
if (!g_context) return hipErrorOutOfMemory;
|
||||
|
||||
if (g_context && CL_SUCCESS != g_context->create(nullptr)) {
|
||||
g_context->release();
|
||||
return hipErrorUnknown;
|
||||
}
|
||||
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
hipError_t hipCtxCreate(hipCtx_t *ctx, unsigned int flags, hipDevice_t device)
|
||||
{
|
||||
HIP_INIT_API(ctx, flags, device);
|
||||
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
+64
-76
@@ -24,9 +24,6 @@ THE SOFTWARE.
|
||||
|
||||
#include "hip_internal.hpp"
|
||||
|
||||
cl_device_id* g_deviceArray = NULL;
|
||||
unsigned g_deviceCnt = 0;
|
||||
|
||||
hipError_t hipGetDevice(int *deviceId) {
|
||||
|
||||
HIP_INIT_API(deviceId);
|
||||
@@ -54,7 +51,7 @@ hipError_t hipDeviceGet(hipDevice_t *device, int deviceId)
|
||||
return hipSuccess;
|
||||
};
|
||||
|
||||
hipError_t hipDeviceCount(int* count) {
|
||||
hipError_t hipGetDeviceCount(int* count) {
|
||||
|
||||
HIP_INIT_API(count);
|
||||
|
||||
@@ -63,9 +60,7 @@ hipError_t hipDeviceCount(int* count) {
|
||||
}
|
||||
|
||||
// Get all available devices
|
||||
if (!amd::Device::getDeviceIDs(CL_DEVICE_TYPE_GPU, 0, NULL, count, false)) {
|
||||
return hipErrorNoDevice;
|
||||
}
|
||||
*count = g_context->devices().size();
|
||||
|
||||
return hipSuccess;
|
||||
}
|
||||
@@ -74,93 +69,95 @@ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device)
|
||||
|
||||
HIP_INIT_API(pi, attr, device);
|
||||
|
||||
if(pi == NULL) {
|
||||
if (pi == nullptr) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
auto deviceHandle = as_amd(g_deviceArray[hipDevice]);
|
||||
//if (unsigned(device) >= g_context->devices().size()) {
|
||||
// return hipErrorInvalidDevice;
|
||||
//}
|
||||
//auto* deviceHandle = g_context->devices()[device];
|
||||
|
||||
if (deviceHandle == NULL) {
|
||||
return hipErrorInvalidDevice;
|
||||
}
|
||||
|
||||
hipDeviceProp_t *prop = deviceHandle->_props;
|
||||
//FIXME: should we cache the props, or just select from deviceHandle->info_?
|
||||
hipDeviceProp_t prop = {0};
|
||||
hipError_t err = hipGetDeviceProperties(&prop, device);
|
||||
if (err != hipSuccess) return err;
|
||||
|
||||
switch (attr) {
|
||||
case hipDeviceAttributeMaxThreadsPerBlock:
|
||||
*pi = prop->maxThreadsPerBlock;
|
||||
*pi = prop.maxThreadsPerBlock;
|
||||
break;
|
||||
case hipDeviceAttributeMaxBlockDimX:
|
||||
*pi = prop->maxThreadsDim[0];
|
||||
*pi = prop.maxThreadsDim[0];
|
||||
break;
|
||||
case hipDeviceAttributeMaxBlockDimY:
|
||||
*pi = prop->maxThreadsDim[1];
|
||||
*pi = prop.maxThreadsDim[1];
|
||||
break;
|
||||
case hipDeviceAttributeMaxBlockDimZ:
|
||||
*pi = prop->maxThreadsDim[2];
|
||||
*pi = prop.maxThreadsDim[2];
|
||||
break;
|
||||
case hipDeviceAttributeMaxGridDimX:
|
||||
*pi = prop->maxGridSize[0];
|
||||
*pi = prop.maxGridSize[0];
|
||||
break;
|
||||
case hipDeviceAttributeMaxGridDimY:
|
||||
*pi = prop->maxGridSize[1];
|
||||
*pi = prop.maxGridSize[1];
|
||||
break;
|
||||
case hipDeviceAttributeMaxGridDimZ:
|
||||
*pi = prop->maxGridSize[2];
|
||||
*pi = prop.maxGridSize[2];
|
||||
break;
|
||||
case hipDeviceAttributeMaxSharedMemoryPerBlock:
|
||||
*pi = prop->sharedMemPerBlock;
|
||||
*pi = prop.sharedMemPerBlock;
|
||||
break;
|
||||
case hipDeviceAttributeTotalConstantMemory:
|
||||
*pi = prop->totalConstMem;
|
||||
*pi = prop.totalConstMem;
|
||||
break;
|
||||
case hipDeviceAttributeWarpSize:
|
||||
*pi = prop->warpSize;
|
||||
*pi = prop.warpSize;
|
||||
break;
|
||||
case hipDeviceAttributeMaxRegistersPerBlock:
|
||||
*pi = prop->regsPerBlock;
|
||||
*pi = prop.regsPerBlock;
|
||||
break;
|
||||
case hipDeviceAttributeClockRate:
|
||||
*pi = prop->clockRate;
|
||||
*pi = prop.clockRate;
|
||||
break;
|
||||
case hipDeviceAttributeMemoryClockRate:
|
||||
*pi = prop->memoryClockRate;
|
||||
*pi = prop.memoryClockRate;
|
||||
break;
|
||||
case hipDeviceAttributeMemoryBusWidth:
|
||||
*pi = prop->memoryBusWidth;
|
||||
*pi = prop.memoryBusWidth;
|
||||
break;
|
||||
case hipDeviceAttributeMultiprocessorCount:
|
||||
*pi = prop->multiProcessorCount;
|
||||
*pi = prop.multiProcessorCount;
|
||||
break;
|
||||
case hipDeviceAttributeComputeMode:
|
||||
*pi = prop->computeMode;
|
||||
*pi = prop.computeMode;
|
||||
break;
|
||||
case hipDeviceAttributeL2CacheSize:
|
||||
*pi = prop->l2CacheSize;
|
||||
*pi = prop.l2CacheSize;
|
||||
break;
|
||||
case hipDeviceAttributeMaxThreadsPerMultiProcessor:
|
||||
*pi = prop->maxThreadsPerMultiProcessor;
|
||||
*pi = prop.maxThreadsPerMultiProcessor;
|
||||
break;
|
||||
case hipDeviceAttributeComputeCapabilityMajor:
|
||||
*pi = prop->major;
|
||||
*pi = prop.major;
|
||||
break;
|
||||
case hipDeviceAttributeComputeCapabilityMinor:
|
||||
*pi = prop->minor;
|
||||
*pi = prop.minor;
|
||||
break;
|
||||
case hipDeviceAttributePciBusId:
|
||||
*pi = prop->pciBusID;
|
||||
*pi = prop.pciBusID;
|
||||
break;
|
||||
case hipDeviceAttributeConcurrentKernels:
|
||||
*pi = prop->concurrentKernels;
|
||||
*pi = prop.concurrentKernels;
|
||||
break;
|
||||
case hipDeviceAttributePciDeviceId:
|
||||
*pi = prop->pciDeviceID;
|
||||
*pi = prop.pciDeviceID;
|
||||
break;
|
||||
case hipDeviceAttributeMaxSharedMemoryPerMultiprocessor:
|
||||
*pi = prop->maxSharedMemoryPerMultiProcessor;
|
||||
*pi = prop.maxSharedMemoryPerMultiProcessor;
|
||||
break;
|
||||
case hipDeviceAttributeIsMultiGpuBoard:
|
||||
*pi = prop->isMultiGpuBoard;
|
||||
*pi = prop.isMultiGpuBoard;
|
||||
break;
|
||||
default:
|
||||
return hipErrorInvalidValue;
|
||||
@@ -177,10 +174,10 @@ hipError_t hipGetDeviceProperties(hipDeviceProp_t* props, int device) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
auto deviceHandle = as_amd(g_deviceArray[device]);
|
||||
if (deviceHandle == NULL) {
|
||||
if (unsigned(device) >= g_context->devices().size()) {
|
||||
return hipErrorInvalidDevice;
|
||||
}
|
||||
auto* deviceHandle = g_context->devices()[device];
|
||||
|
||||
hipDeviceProp_t deviceProps = {0};
|
||||
|
||||
@@ -226,11 +223,11 @@ hipError_t hipGetDeviceProperties(hipDeviceProp_t* props, int device) {
|
||||
deviceProps.arch.has3dGrid = 1;
|
||||
deviceProps.arch.hasDynamicParallelism = 0;
|
||||
deviceProps.concurrentKernels = 1;
|
||||
deviceProps.pciDomainID = info.deviceTopology_.function;
|
||||
deviceProps.pciBusID = info.deviceTopology_.bus;
|
||||
deviceProps.pciDeviceID = info.deviceTopology_.device;
|
||||
deviceProps.pciDomainID = info.deviceTopology_.pcie.function;
|
||||
deviceProps.pciBusID = info.deviceTopology_.pcie.bus;
|
||||
deviceProps.pciDeviceID = info.deviceTopology_.pcie.device;
|
||||
deviceProps.maxSharedMemoryPerMultiProcessor = info.localMemSizePerCU_;
|
||||
deviceProps.isMultiGpuBoard = info.;
|
||||
//deviceProps.isMultiGpuBoard = info.;
|
||||
deviceProps.canMapHostMemory = 1;
|
||||
deviceProps.gcnArch = info.gfxipVersion_;
|
||||
|
||||
@@ -254,32 +251,16 @@ hipError_t hipDeviceGetCacheConfig(hipFuncCache_t *cacheConfig) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
*cacheConfig = 0;
|
||||
*cacheConfig = hipFuncCache_t();
|
||||
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
hipError_t hipGetDeviceProperties(hipDeviceProp_t* properties, int device) {
|
||||
|
||||
HIP_INIT_API(properties, device);
|
||||
if ((properties == NULL) || (device < 0) || (device >= g_deviceCnt)) {
|
||||
return hipErrorInvalidDevice;
|
||||
}
|
||||
|
||||
auto * deviceHandle = as_amd(g_deviceArray[device]);
|
||||
if (deviceHandle != NULL) {
|
||||
*properties = deviceHandle->_props;
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
return hipErrorInvalidDevice;
|
||||
}
|
||||
|
||||
hipError_t hipSetDeviceFlags(unsigned int flags) {
|
||||
|
||||
HIP_INIT_API(flags);
|
||||
|
||||
assert(0 && "Unimplemented")
|
||||
assert(0 && "Unimplemented");
|
||||
|
||||
return hipSuccess;
|
||||
};
|
||||
@@ -288,7 +269,7 @@ hipError_t hipDeviceGetLimit (size_t *pValue, hipLimit_t limit) {
|
||||
|
||||
HIP_INIT_API(pValue, limit);
|
||||
|
||||
assert(0 && "Unimplemented")
|
||||
assert(0 && "Unimplemented");
|
||||
|
||||
return hipSuccess;
|
||||
}
|
||||
@@ -297,7 +278,7 @@ hipError_t hipFuncSetCacheConfig (const void* func, hipFuncCache_t cacheConfig)
|
||||
|
||||
HIP_INIT_API(cacheConfig);
|
||||
|
||||
assert(0 && "Not supported")
|
||||
assert(0 && "Not supported");
|
||||
|
||||
return hipSuccess;
|
||||
}
|
||||
@@ -306,7 +287,7 @@ hipError_t hipDeviceSetSharedMemConfig (hipSharedMemConfig config) {
|
||||
|
||||
HIP_INIT_API(config);
|
||||
|
||||
assert(0 && "Not Supported")
|
||||
assert(0 && "Not Supported");
|
||||
|
||||
return hipSuccess;
|
||||
}
|
||||
@@ -315,17 +296,17 @@ hipError_t hipDeviceGetSharedMemConfig (hipSharedMemConfig *pConfig) {
|
||||
|
||||
HIP_INIT_API(pConfig);
|
||||
|
||||
assert(0 && "Not supported")
|
||||
assert(0 && "Not supported");
|
||||
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
|
||||
hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* properties) {
|
||||
hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* properties) {
|
||||
|
||||
HIP_INIT_API(device, properties);
|
||||
|
||||
assert(0 && "Unimplemented")
|
||||
|
||||
assert(0 && "Unimplemented");
|
||||
|
||||
return hipSuccess;
|
||||
}
|
||||
@@ -335,7 +316,7 @@ hipError_t hipDeviceGetByPCIBusId (int* device, const char* pciBusId) {
|
||||
|
||||
HIP_INIT_API(device,pciBusId);
|
||||
|
||||
assert(0 && "Unimplemented")
|
||||
assert(0 && "Unimplemented");
|
||||
|
||||
return hipSuccess;
|
||||
}
|
||||
@@ -345,7 +326,7 @@ hipError_t hipDeviceTotalMem (size_t *bytes,hipDevice_t device) {
|
||||
|
||||
HIP_INIT_API(bytes, device);
|
||||
|
||||
assert(0 && "Unimplemented")
|
||||
assert(0 && "Unimplemented");
|
||||
|
||||
return hipSuccess;
|
||||
}
|
||||
@@ -353,7 +334,8 @@ hipError_t hipDeviceTotalMem (size_t *bytes,hipDevice_t device) {
|
||||
hipError_t hipDeviceComputeCapability(int *major, int *minor, hipDevice_t device) {
|
||||
|
||||
HIP_INIT_API(major,minor, device);
|
||||
assert(0 && "Unimplemented")
|
||||
|
||||
assert(0 && "Unimplemented");
|
||||
|
||||
return hipSuccess;
|
||||
}
|
||||
@@ -362,7 +344,7 @@ hipError_t hipDeviceGetName(char *name,int len, hipDevice_t device) {
|
||||
|
||||
HIP_INIT_API((void*)name,len, device);
|
||||
|
||||
assert(0 && "Unimplemented")
|
||||
assert(0 && "Unimplemented");
|
||||
|
||||
return hipSuccess;
|
||||
}
|
||||
@@ -371,7 +353,13 @@ hipError_t hipDeviceGetPCIBusId (char *pciBusId,int len, int device) {
|
||||
|
||||
HIP_INIT_API((void*)pciBusId, len, device);
|
||||
|
||||
assert(0 && "Unimplemented")
|
||||
assert(0 && "Unimplemented");
|
||||
|
||||
return hipSuccess;
|
||||
}
|
||||
}
|
||||
|
||||
hipError_t hipDeviceSynchronize(void)
|
||||
{
|
||||
// FIXME: should wait on all streams
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,48 @@
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
#include "hip_internal.hpp"
|
||||
|
||||
hipError_t hipGetLastError()
|
||||
{
|
||||
HIP_INIT_API();
|
||||
return hipErrorUnknown;
|
||||
}
|
||||
|
||||
hipError_t hipPeekAtLastError()
|
||||
{
|
||||
HIP_INIT_API();
|
||||
return hipErrorUnknown;
|
||||
}
|
||||
|
||||
const char *hipGetErrorName(hipError_t hip_error)
|
||||
{
|
||||
return "";
|
||||
}
|
||||
|
||||
const char *hipGetErrorString(hipError_t hip_error)
|
||||
{
|
||||
return "";
|
||||
}
|
||||
|
||||
@@ -0,0 +1,67 @@
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
#include "hip_internal.hpp"
|
||||
|
||||
hipError_t hipEventCreateWithFlags(hipEvent_t* event, unsigned flags)
|
||||
{
|
||||
HIP_INIT_API(event, flags);
|
||||
|
||||
return hipErrorUnknown;
|
||||
}
|
||||
|
||||
hipError_t hipEventCreate(hipEvent_t* event)
|
||||
{
|
||||
HIP_INIT_API(event);
|
||||
|
||||
return hipErrorUnknown;
|
||||
}
|
||||
|
||||
hipError_t hipEventDestroy(hipEvent_t event)
|
||||
{
|
||||
HIP_INIT_API(event);
|
||||
|
||||
return hipErrorUnknown;
|
||||
}
|
||||
|
||||
hipError_t hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop)
|
||||
{
|
||||
HIP_INIT_API(ms, start, stop);
|
||||
|
||||
return hipErrorUnknown;
|
||||
}
|
||||
|
||||
hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream)
|
||||
{
|
||||
HIP_INIT_API(event, stream);
|
||||
|
||||
return hipErrorUnknown;
|
||||
}
|
||||
|
||||
hipError_t hipEventSynchronize(hipEvent_t event)
|
||||
{
|
||||
HIP_INIT_API(event);
|
||||
|
||||
return hipErrorUnknown;
|
||||
}
|
||||
@@ -0,0 +1,129 @@
|
||||
EXPORTS
|
||||
hipChooseDevice
|
||||
hipCtxCreate
|
||||
hipCtxDestroy
|
||||
hipCtxDisablePeerAccess
|
||||
hipCtxEnablePeerAccess
|
||||
hipCtxGetApiVersion
|
||||
hipCtxGetCacheConfig
|
||||
hipCtxGetCurrent
|
||||
hipCtxGetDevice
|
||||
hipCtxGetFlags
|
||||
hipCtxGetSharedMemConfig
|
||||
hipCtxPopCurrent
|
||||
hipCtxPushCurrent
|
||||
hipCtxSetCacheConfig
|
||||
hipCtxSetCurrent
|
||||
hipCtxSetSharedMemConfig
|
||||
hipCtxSynchronize
|
||||
hipDeviceCanAccessPeer
|
||||
hipDeviceComputeCapability
|
||||
hipDeviceDisablePeerAccess
|
||||
hipDeviceEnablePeerAccess
|
||||
hipDeviceGet
|
||||
hipDeviceGetAttribute
|
||||
hipDeviceGetByPCIBusId
|
||||
hipDeviceGetCacheConfig
|
||||
hipDeviceGetLimit
|
||||
hipDeviceGetName
|
||||
hipDeviceGetPCIBusId
|
||||
hipDeviceGetSharedMemConfig
|
||||
hipDevicePrimaryCtxGetState
|
||||
hipDevicePrimaryCtxRelease
|
||||
hipDevicePrimaryCtxReset
|
||||
hipDevicePrimaryCtxRetain
|
||||
hipDevicePrimaryCtxSetFlags
|
||||
hipDeviceReset
|
||||
hipDeviceSetCacheConfig
|
||||
hipDeviceSetSharedMemConfig
|
||||
hipDeviceSynchronize
|
||||
hipDeviceTotalMem
|
||||
hipDriverGetVersion
|
||||
hipEventCreate
|
||||
hipEventCreateWithFlags
|
||||
hipEventDestroy
|
||||
hipEventElapsedTime
|
||||
hipEventQuery
|
||||
hipEventRecord
|
||||
hipEventSynchronize
|
||||
hipFree
|
||||
hipFreeArray
|
||||
hipFreeHost
|
||||
hipFuncSetCacheConfig
|
||||
hipGetDevice
|
||||
hipGetDeviceCount
|
||||
hipGetDeviceProperties
|
||||
hipGetErrorName
|
||||
hipGetErrorString
|
||||
hipGetLastError
|
||||
hipHostAlloc
|
||||
hipHostFree
|
||||
hipHostGetDevicePointer
|
||||
hipHostGetFlags
|
||||
hipHostMalloc
|
||||
hipHostRegister
|
||||
hipHostUnregister
|
||||
hipInit
|
||||
hipIpcCloseMemHandle
|
||||
hipIpcGetMemHandle
|
||||
hipIpcOpenMemHandle
|
||||
hipMalloc
|
||||
hipMalloc3DArray
|
||||
hipMallocArray
|
||||
hipMallocHost
|
||||
hipMallocPitch
|
||||
hipMemcpy
|
||||
hipMemcpy2D
|
||||
hipMemcpy2DAsync
|
||||
hipMemcpy2DToArray
|
||||
hipMemcpy3D
|
||||
hipMemcpyAsync
|
||||
hipMemcpyDtoD
|
||||
hipMemcpyDtoDAsync
|
||||
hipMemcpyDtoH
|
||||
hipMemcpyDtoHAsync
|
||||
hipMemcpyFromSymbol
|
||||
hipMemcpyFromSymbolAsync
|
||||
hipMemcpyHtoD
|
||||
hipMemcpyHtoDAsync
|
||||
hipMemcpyPeer
|
||||
hipMemcpyPeerAsync
|
||||
hipMemcpyToArray
|
||||
hipMemcpyToSymbol
|
||||
hipMemcpyToSymbolAsync
|
||||
hipMemGetAddressRange
|
||||
hipMemGetInfo
|
||||
hipMemPtrGetInfo
|
||||
hipMemset
|
||||
hipMemset2D
|
||||
hipMemsetAsync
|
||||
hipMemsetD8
|
||||
hipModuleGetFunction
|
||||
hipModuleGetGlobal
|
||||
hipModuleLaunchKernel
|
||||
hipModuleLoad
|
||||
hipModuleLoadData
|
||||
hipModuleLoadDataEx
|
||||
hipModuleUnload
|
||||
hipPeekAtLastError
|
||||
hipPointerGetAttributes
|
||||
hipProfilerStart
|
||||
hipProfilerStop
|
||||
hipRuntimeGetVersion
|
||||
hipSetDevice
|
||||
hipSetDeviceFlags
|
||||
hipStreamAddCallback
|
||||
hipStreamCreate
|
||||
hipStreamCreateWithFlags
|
||||
hipStreamDestroy
|
||||
hipStreamGetFlags
|
||||
hipStreamQuery
|
||||
hipStreamSynchronize
|
||||
hipStreamWaitEvent
|
||||
__cudaRegisterFatBinary
|
||||
__cudaRegisterFunction
|
||||
__cudaRegisterVariable
|
||||
__cudaUnregisterFatBinary
|
||||
cudaConfigureCall
|
||||
cudaSetupArgument
|
||||
cudaLaunch
|
||||
@@ -0,0 +1,133 @@
|
||||
HIP_1.0 {
|
||||
global:
|
||||
hipChooseDevice;
|
||||
hipCtxCreate;
|
||||
hipCtxDestroy;
|
||||
hipCtxDisablePeerAccess;
|
||||
hipCtxEnablePeerAccess;
|
||||
hipCtxGetApiVersion;
|
||||
hipCtxGetCacheConfig;
|
||||
hipCtxGetCurrent;
|
||||
hipCtxGetDevice;
|
||||
hipCtxGetFlags;
|
||||
hipCtxGetSharedMemConfig;
|
||||
hipCtxPopCurrent;
|
||||
hipCtxPushCurrent;
|
||||
hipCtxSetCacheConfig;
|
||||
hipCtxSetCurrent;
|
||||
hipCtxSetSharedMemConfig;
|
||||
hipCtxSynchronize;
|
||||
hipDeviceCanAccessPeer;
|
||||
hipDeviceComputeCapability;
|
||||
hipDeviceDisablePeerAccess;
|
||||
hipDeviceEnablePeerAccess;
|
||||
hipDeviceGet;
|
||||
hipDeviceGetAttribute;
|
||||
hipDeviceGetByPCIBusId;
|
||||
hipDeviceGetCacheConfig;
|
||||
hipDeviceGetLimit;
|
||||
hipDeviceGetName;
|
||||
hipDeviceGetPCIBusId;
|
||||
hipDeviceGetSharedMemConfig;
|
||||
hipDevicePrimaryCtxGetState;
|
||||
hipDevicePrimaryCtxRelease;
|
||||
hipDevicePrimaryCtxReset;
|
||||
hipDevicePrimaryCtxRetain;
|
||||
hipDevicePrimaryCtxSetFlags;
|
||||
hipDeviceReset;
|
||||
hipDeviceSetCacheConfig;
|
||||
hipDeviceSetSharedMemConfig;
|
||||
hipDeviceSynchronize;
|
||||
hipDeviceTotalMem;
|
||||
hipDriverGetVersion;
|
||||
hipEventCreate;
|
||||
hipEventCreateWithFlags;
|
||||
hipEventDestroy;
|
||||
hipEventElapsedTime;
|
||||
hipEventQuery;
|
||||
hipEventRecord;
|
||||
hipEventSynchronize;
|
||||
hipFree;
|
||||
hipFreeArray;
|
||||
hipFreeHost;
|
||||
hipFuncSetCacheConfig;
|
||||
hipGetDevice;
|
||||
hipGetDeviceCount;
|
||||
hipGetDeviceProperties;
|
||||
hipGetErrorName;
|
||||
hipGetErrorString;
|
||||
hipGetLastError;
|
||||
hipHostAlloc;
|
||||
hipHostFree;
|
||||
hipHostGetDevicePointer;
|
||||
hipHostGetFlags;
|
||||
hipHostMalloc;
|
||||
hipHostRegister;
|
||||
hipHostUnregister;
|
||||
hipInit;
|
||||
hipIpcCloseMemHandle;
|
||||
hipIpcGetMemHandle;
|
||||
hipIpcOpenMemHandle;
|
||||
hipMalloc;
|
||||
hipMalloc3DArray;
|
||||
hipMallocArray;
|
||||
hipMallocHost;
|
||||
hipMallocPitch;
|
||||
hipMemcpy;
|
||||
hipMemcpy2D;
|
||||
hipMemcpy2DAsync;
|
||||
hipMemcpy2DToArray;
|
||||
hipMemcpy3D;
|
||||
hipMemcpyAsync;
|
||||
hipMemcpyDtoD;
|
||||
hipMemcpyDtoDAsync;
|
||||
hipMemcpyDtoH;
|
||||
hipMemcpyDtoHAsync;
|
||||
hipMemcpyFromSymbol;
|
||||
hipMemcpyFromSymbolAsync;
|
||||
hipMemcpyHtoD;
|
||||
hipMemcpyHtoDAsync;
|
||||
hipMemcpyPeer;
|
||||
hipMemcpyPeerAsync;
|
||||
hipMemcpyToArray;
|
||||
hipMemcpyToSymbol;
|
||||
hipMemcpyToSymbolAsync;
|
||||
hipMemGetAddressRange;
|
||||
hipMemGetInfo;
|
||||
hipMemPtrGetInfo;
|
||||
hipMemset;
|
||||
hipMemset2D;
|
||||
hipMemsetAsync;
|
||||
hipMemsetD8;
|
||||
hipModuleGetFunction;
|
||||
hipModuleGetGlobal;
|
||||
hipModuleLaunchKernel;
|
||||
hipModuleLoad;
|
||||
hipModuleLoadData;
|
||||
hipModuleLoadDataEx;
|
||||
hipModuleUnload;
|
||||
hipPeekAtLastError;
|
||||
hipPointerGetAttributes;
|
||||
hipProfilerStart;
|
||||
hipProfilerStop;
|
||||
hipRuntimeGetVersion;
|
||||
hipSetDevice;
|
||||
hipSetDeviceFlags;
|
||||
hipStreamAddCallback;
|
||||
hipStreamCreate;
|
||||
hipStreamCreateWithFlags;
|
||||
hipStreamDestroy;
|
||||
hipStreamGetFlags;
|
||||
hipStreamQuery;
|
||||
hipStreamSynchronize;
|
||||
hipStreamWaitEvent;
|
||||
__cudaRegisterFatBinary;
|
||||
__cudaRegisterFunction;
|
||||
__cudaRegisterVariable;
|
||||
__cudaUnregisterFatBinary;
|
||||
cudaConfigureCall;
|
||||
cudaSetupArgument;
|
||||
cudaLaunch;
|
||||
local:
|
||||
*;
|
||||
};
|
||||
@@ -0,0 +1,75 @@
|
||||
#define STR(__macro__) #__macro__
|
||||
#define XSTR(__macro__) STR(__macro__)
|
||||
|
||||
#if defined(_DEBUG)
|
||||
#define DEBUG_ONLY(x) x
|
||||
#else
|
||||
#define DEBUG_ONLY(x)
|
||||
#endif
|
||||
|
||||
#define VERSION_PREFIX_MAJOR 2
|
||||
#define VERSION_PREFIX_MINOR 0
|
||||
|
||||
|
||||
#define APSTUDIO_READONLY_SYMBOLS
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Generated from the TEXTINCLUDE 2 resource.
|
||||
//
|
||||
#include "winresrc.h"
|
||||
#include "utils/versions.hpp"
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
#undef APSTUDIO_READONLY_SYMBOLS
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// English (U.S.) resources
|
||||
|
||||
#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU)
|
||||
#ifdef _WIN32
|
||||
LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US
|
||||
#pragma code_page(1252)
|
||||
#endif //_WIN32
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Version
|
||||
//
|
||||
|
||||
VS_VERSION_INFO VERSIONINFO
|
||||
FILEVERSION 10,0,AMD_PLATFORM_BUILD_NUMBER,AMD_PLATFORM_REVISION_NUMBER
|
||||
PRODUCTVERSION 10,0,AMD_PLATFORM_BUILD_NUMBER,AMD_PLATFORM_REVISION_NUMBER
|
||||
FILEFLAGSMASK 0x3fL
|
||||
#ifdef _DEBUG
|
||||
FILEFLAGS 0x1L
|
||||
#else
|
||||
FILEFLAGS 0x0L
|
||||
#endif
|
||||
FILEOS 0x40004L
|
||||
FILETYPE 0x2L
|
||||
FILESUBTYPE 0x0L
|
||||
BEGIN
|
||||
BLOCK "StringFileInfo"
|
||||
BEGIN
|
||||
BLOCK "040904b0"
|
||||
BEGIN
|
||||
VALUE "Comments", " \0"
|
||||
VALUE "CompanyName", "Advanced Micro Devices Inc.\0"
|
||||
VALUE "FileDescription", AMD_PLATFORM_NAME " OpenCL " XSTR(VERSION_PREFIX_MAJOR) "." XSTR(VERSION_PREFIX_MINOR) " Runtime\0"
|
||||
VALUE "FileVersion", "10.0." XSTR(AMD_PLATFORM_BUILD_NUMBER) "." XSTR(AMD_PLATFORM_REVISION_NUMBER)
|
||||
VALUE "InternalName", "OpenCL"
|
||||
VALUE "LegalCopyright", "Copyright (C) 2011 Advanced Micro Devices Inc.\0"
|
||||
VALUE "OriginalFilename", "OpenCL.dll"
|
||||
VALUE "ProductName", "OpenCL " XSTR(VERSION_PREFIX_MAJOR) "." XSTR(VERSION_PREFIX_MINOR) " " AMD_PLATFORM_INFO "\0"
|
||||
VALUE "ProductVersion", "10.0." XSTR(AMD_PLATFORM_BUILD_NUMBER) "." XSTR(AMD_PLATFORM_REVISION_NUMBER)
|
||||
END
|
||||
END
|
||||
BLOCK "VarFileInfo"
|
||||
BEGIN
|
||||
VALUE "Translation", 0x409, 1200
|
||||
END
|
||||
END
|
||||
|
||||
#endif // English (U.S.) resources
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
@@ -36,8 +36,6 @@ THE SOFTWARE.
|
||||
#define HIP_INIT_API(...) \
|
||||
HIP_INIT()
|
||||
|
||||
extern cl_device_id* g_deviceArray;
|
||||
extern unsigned g_deviceCnt;
|
||||
extern thread_local cl_context g_currentCtx;
|
||||
extern amd::Context* g_context;
|
||||
|
||||
#endif // HIP_SRC_HIP_INTERNAL_H
|
||||
|
||||
+65
-68
@@ -26,97 +26,94 @@ THE SOFTWARE.
|
||||
|
||||
hipError_t hipMalloc(void** ptr, size_t sizeBytes)
|
||||
{
|
||||
HIP_INIT_API(ptr, sizeBytes);
|
||||
|
||||
amd::Context* context = as_amd(g_currentCtx);
|
||||
|
||||
if (sizeBytes == 0) {
|
||||
*ptr = nullptr;
|
||||
return hipSuccess;
|
||||
}
|
||||
else if (!is_valid(context) || !ptr) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
auto deviceHandle = as_amd(g_deviceArray[0]);
|
||||
if ((deviceHandle->info().maxMemAllocSize_ < size)) {
|
||||
return hipErrorOutOfMemory;
|
||||
}
|
||||
|
||||
amd::Memory* mem = new (*context) amd::Buffer(*context, 0, sizeBytes);
|
||||
if (!mem) {
|
||||
return hipErrorOutOfMemory;
|
||||
}
|
||||
|
||||
if (!mem->create(nullptr)) {
|
||||
return hipErrorMemoryAllocation;
|
||||
}
|
||||
|
||||
*ptr = reinterpret_cast<void*>(as_cl(mem));
|
||||
HIP_INIT_API(ptr, sizeBytes);
|
||||
|
||||
if (sizeBytes == 0) {
|
||||
*ptr = nullptr;
|
||||
return hipSuccess;
|
||||
}
|
||||
else if (!ptr) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
if (g_context->devices()[0]->info().maxMemAllocSize_ < sizeBytes) {
|
||||
return hipErrorOutOfMemory;
|
||||
}
|
||||
|
||||
amd::Memory* mem = new (*g_context) amd::Buffer(*g_context, 0, sizeBytes);
|
||||
if (!mem) {
|
||||
return hipErrorOutOfMemory;
|
||||
}
|
||||
|
||||
if (!mem->create(nullptr)) {
|
||||
return hipErrorMemoryAllocation;
|
||||
}
|
||||
|
||||
*ptr = reinterpret_cast<void*>(as_cl(mem));
|
||||
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
hipError_t hipFree(void* ptr)
|
||||
{
|
||||
if (!is_valid(reinterpret_cast<cl_mem>(ptr))) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
as_amd(reinterpret_cast<cl_mem>(ptr))->release();
|
||||
return hipSuccess;
|
||||
if (!is_valid(reinterpret_cast<cl_mem>(ptr))) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
as_amd(reinterpret_cast<cl_mem>(ptr))->release();
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind)
|
||||
{
|
||||
HIP_INIT_API(dst, src, sizeBytes, kind);
|
||||
HIP_INIT_API(dst, src, sizeBytes, kind);
|
||||
|
||||
amd::Context* context = as_amd(g_currentCtx);
|
||||
amd::Device* device = context->devices()[0];
|
||||
amd::Device* device = g_context->devices()[0];
|
||||
|
||||
// FIXME : Do we create a queue here or create at init and just reuse
|
||||
amd::HostQueue* queue = new amd::HostQueue(*context, *device, 0,
|
||||
amd::CommandQueue::RealTimeDisabled,
|
||||
amd::CommandQueue::Priority::Normal);
|
||||
if (!queue) {
|
||||
return hipErrorOutOfMemory;
|
||||
}
|
||||
amd::HostQueue* queue = new amd::HostQueue(*g_context, *device, 0,
|
||||
amd::CommandQueue::RealTimeDisabled,
|
||||
amd::CommandQueue::Priority::Normal);
|
||||
if (!queue) {
|
||||
return hipErrorOutOfMemory;
|
||||
}
|
||||
|
||||
amd::Buffer* srcBuffer = as_amd(reinterpret_cast<cl_mem>(const_cast<void*>(src)))->asBuffer();
|
||||
amd::Buffer* dstBuffer = as_amd(reinterpret_cast<cl_mem>(const_cast<void*>(dst)))->asBuffer();
|
||||
amd::Buffer* srcBuffer = as_amd(reinterpret_cast<cl_mem>(const_cast<void*>(src)))->asBuffer();
|
||||
amd::Buffer* dstBuffer = as_amd(reinterpret_cast<cl_mem>(dst))->asBuffer();
|
||||
|
||||
amd::Command* command;
|
||||
amd::Command::EventWaitList waitList;
|
||||
amd::Command* command;
|
||||
amd::Command::EventWaitList waitList;
|
||||
|
||||
switch (kind) {
|
||||
case hipMemcpyDeviceToHost:
|
||||
switch (kind) {
|
||||
case hipMemcpyDeviceToHost:
|
||||
command = new amd::ReadMemoryCommand(*queue, CL_COMMAND_READ_BUFFER, waitList,
|
||||
srcBuffer, 0, sizeBytes, dst);
|
||||
*srcBuffer, 0, sizeBytes, dst);
|
||||
break;
|
||||
case hipMemcpyHostToDevice:
|
||||
case hipMemcpyHostToDevice:
|
||||
command = new amd::WriteMemoryCommand(*queue, CL_COMMAND_WRITE_BUFFER, waitList,
|
||||
dstBuffer, 0, sizeBytes, src);
|
||||
*dstBuffer, 0, sizeBytes, src);
|
||||
break;
|
||||
default:
|
||||
assert(!"Shouldn't reach here");
|
||||
default:
|
||||
assert(!"Shouldn't reach here");
|
||||
break;
|
||||
}
|
||||
if (!command) {
|
||||
return hipErrorOutOfMemory;
|
||||
}
|
||||
}
|
||||
if (!command) {
|
||||
return hipErrorOutOfMemory;
|
||||
}
|
||||
|
||||
// Make sure we have memory for the command execution
|
||||
if (CL_SUCCESS != command->validateMemory()) {
|
||||
delete command;
|
||||
return hipErrorMemoryAllocation;
|
||||
}
|
||||
// FIXME: virtualize MemoryCommand::validateMemory()
|
||||
#if 0
|
||||
// Make sure we have memory for the command execution
|
||||
if (CL_SUCCESS != command->validateMemory()) {
|
||||
delete command;
|
||||
return hipErrorMemoryAllocation;
|
||||
}
|
||||
#endif
|
||||
|
||||
command->enqueue();
|
||||
command->awaitCompletion();
|
||||
command->release();
|
||||
|
||||
command->enqueue();
|
||||
command->awaitCompletion();
|
||||
command->release();
|
||||
queue->release();
|
||||
|
||||
queue->release();
|
||||
|
||||
return hipSuccess;
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,144 @@
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <libelf.h>
|
||||
|
||||
#include "hip_internal.hpp"
|
||||
#include "platform/program.hpp"
|
||||
|
||||
static uint64_t ElfSize(const void *emi)
|
||||
{
|
||||
const Elf64_Ehdr *ehdr = (const Elf64_Ehdr*)emi;
|
||||
const Elf64_Shdr *shdr = (const Elf64_Shdr*)((char*)emi + ehdr->e_shoff);
|
||||
|
||||
uint64_t max_offset = ehdr->e_shoff;
|
||||
uint64_t total_size = max_offset + ehdr->e_shentsize * ehdr->e_shnum;
|
||||
|
||||
for (uint16_t i=0; i < ehdr->e_shnum; ++i){
|
||||
uint64_t cur_offset = static_cast<uint64_t>(shdr[i].sh_offset);
|
||||
if (max_offset < cur_offset) {
|
||||
max_offset = cur_offset;
|
||||
total_size = max_offset;
|
||||
if(SHT_NOBITS != shdr[i].sh_type) {
|
||||
total_size += static_cast<uint64_t>(shdr[i].sh_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
return total_size;
|
||||
}
|
||||
|
||||
hipError_t hipModuleLoadData(hipModule_t *module, const void *image)
|
||||
{
|
||||
HIP_INIT_API(module, image);
|
||||
|
||||
amd::Program* program = new amd::Program(*g_context);
|
||||
if (program == NULL) {
|
||||
return hipErrorOutOfMemory;
|
||||
}
|
||||
|
||||
if (CL_SUCCESS != program->addDeviceProgram(*g_context->devices()[0], image, ElfSize(image)) ||
|
||||
CL_SUCCESS != program->build(g_context->devices(), nullptr, nullptr, nullptr)) {
|
||||
return hipErrorUnknown;
|
||||
}
|
||||
|
||||
*module = reinterpret_cast<hipModule_t>(as_cl(program));
|
||||
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
hipError_t hipModuleGetFunction(hipFunction_t *hfunc, hipModule_t hmod, const char *name)
|
||||
{
|
||||
HIP_INIT_API(hfunc, hmod, name);
|
||||
|
||||
amd::Program* program = as_amd(reinterpret_cast<cl_program>(hmod));
|
||||
|
||||
const amd::Symbol* symbol = program->findSymbol(name);
|
||||
if (!symbol) {
|
||||
return hipErrorNotFound;
|
||||
}
|
||||
|
||||
amd::Kernel* kernel = new amd::Kernel(*program, *symbol, name);
|
||||
if (!kernel) {
|
||||
return hipErrorOutOfMemory;
|
||||
}
|
||||
|
||||
*hfunc = reinterpret_cast<hipFunction_t>(as_cl(kernel));
|
||||
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
hipError_t hipModuleLaunchKernel(hipFunction_t f,
|
||||
uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ,
|
||||
uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ,
|
||||
uint32_t sharedMemBytes, hipStream_t hStream,
|
||||
void **kernelParams, void **extra)
|
||||
{
|
||||
HIP_INIT_API(f, gridDimX, gridDimY, gridDimZ,
|
||||
blockDimX, blockDimY, blockDimZ,
|
||||
sharedMemBytes, hStream,
|
||||
kernelParams, extra);
|
||||
|
||||
amd::Kernel* kernel = as_amd(reinterpret_cast<cl_kernel>(f));
|
||||
amd::Device* device = g_context->devices()[0];
|
||||
|
||||
amd::HostQueue* queue = new amd::HostQueue(*g_context, *device, 0,
|
||||
amd::CommandQueue::RealTimeDisabled,
|
||||
amd::CommandQueue::Priority::Normal);
|
||||
if (!queue) {
|
||||
return hipErrorOutOfMemory;
|
||||
}
|
||||
|
||||
size_t globalWorkOffset[3] = {0};
|
||||
size_t globalWorkSize[3] = { gridDimX, gridDimY, gridDimZ };
|
||||
size_t localWorkSize[3] = { blockDimX, blockDimY, blockDimZ };
|
||||
amd::NDRangeContainer ndrange(3, globalWorkOffset, globalWorkSize, localWorkSize);
|
||||
amd::Command::EventWaitList waitList;
|
||||
|
||||
assert(!kernelParams && extra && "check this code");
|
||||
const amd::KernelSignature& signature = kernel->signature();
|
||||
for (size_t i = 0; i < signature.numParameters(); ++i) {
|
||||
const amd::KernelParameterDescriptor& desc = signature.at(i);
|
||||
kernel->parameters().set(i, desc.size_, reinterpret_cast<address>(extra[1]) + desc.offset_);
|
||||
}
|
||||
|
||||
amd::NDRangeKernelCommand* command = new amd::NDRangeKernelCommand(*queue, waitList, *kernel, ndrange);
|
||||
if (!command) {
|
||||
return hipErrorOutOfMemory;
|
||||
}
|
||||
|
||||
// Make sure we have memory for the command execution
|
||||
if (CL_SUCCESS != command->validateMemory()) {
|
||||
delete command;
|
||||
return hipErrorMemoryAllocation;
|
||||
}
|
||||
|
||||
command->enqueue();
|
||||
command->awaitCompletion();
|
||||
command->release();
|
||||
|
||||
queue->release();
|
||||
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,193 @@
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
#include "hip_internal.hpp"
|
||||
#include "platform/program.hpp"
|
||||
#include "platform/runtime.hpp"
|
||||
|
||||
constexpr unsigned __cudaFatMAGIC = 0x1ee55a01;
|
||||
constexpr unsigned __cudaFatMAGIC2 = 0x466243b1;
|
||||
constexpr unsigned __cudaFatMAGIC3 = 0xba55ed50;
|
||||
|
||||
struct __CudaFatBinaryWrapper {
|
||||
unsigned int magic;
|
||||
unsigned int version;
|
||||
void* binary;
|
||||
void* dummy1;
|
||||
};
|
||||
|
||||
struct __CudaFatBinaryHeader {
|
||||
unsigned int magic;
|
||||
unsigned short version;
|
||||
unsigned short headerSize;
|
||||
unsigned long long int fatSize;
|
||||
};
|
||||
|
||||
struct __CudaPartHeader{
|
||||
unsigned short type;
|
||||
unsigned short dummy1;
|
||||
unsigned int headerSize;
|
||||
unsigned long long int partSize;
|
||||
unsigned long long int dummy2;
|
||||
unsigned int dummy3;
|
||||
unsigned int subarch;
|
||||
};
|
||||
|
||||
extern "C" hipModule_t __cudaRegisterFatBinary(void* bundle)
|
||||
{
|
||||
if (!amd::Runtime::initialized()) { // FIXME: fix initialization
|
||||
hipInit(0);
|
||||
}
|
||||
|
||||
amd::Program* program = new amd::Program(*g_context);
|
||||
if (!program) return nullptr;
|
||||
|
||||
struct __CudaFatBinaryWrapper* fbwrapper = (struct __CudaFatBinaryWrapper*)bundle;
|
||||
if (fbwrapper->magic != __cudaFatMAGIC2 || fbwrapper->version != 1) {
|
||||
return nullptr;
|
||||
}
|
||||
struct __CudaFatBinaryHeader* fbheader = (struct __CudaFatBinaryHeader*)fbwrapper->binary;
|
||||
if (fbheader->magic != __cudaFatMAGIC3 || fbheader->version != 1) {
|
||||
return nullptr;
|
||||
}
|
||||
struct __CudaPartHeader* pheader = (struct __CudaPartHeader*)(
|
||||
(uintptr_t)fbheader + fbheader->headerSize);
|
||||
struct __CudaPartHeader* end = (struct __CudaPartHeader*)(
|
||||
(uintptr_t)pheader + fbheader->fatSize);
|
||||
|
||||
while (pheader < end) {
|
||||
if (true/*pheader->subarch == match a device in the context*/) {
|
||||
void *image = (void*)((uintptr_t)pheader + pheader->headerSize);
|
||||
size_t size = pheader->partSize;
|
||||
if (CL_SUCCESS != program->addDeviceProgram(*g_context->devices()[0], image, size) ||
|
||||
CL_SUCCESS != program->build(g_context->devices(), nullptr, nullptr, nullptr)) {
|
||||
return nullptr;
|
||||
}
|
||||
break;
|
||||
}
|
||||
pheader = (struct __CudaPartHeader*)(
|
||||
(uintptr_t)pheader + pheader->headerSize + pheader->partSize);
|
||||
}
|
||||
|
||||
return reinterpret_cast<hipModule_t>(as_cl(program));
|
||||
}
|
||||
|
||||
std::map<const void*, hipFunction_t> g_functions;
|
||||
|
||||
|
||||
extern "C" void __cudaRegisterFunction(
|
||||
hipModule_t module,
|
||||
const void* hostFunction,
|
||||
char* deviceFunction,
|
||||
const char* deviceName,
|
||||
unsigned int threadLimit,
|
||||
uint3* tid,
|
||||
uint3* bid,
|
||||
dim3* blockDim,
|
||||
dim3* gridDim,
|
||||
int* wSize)
|
||||
{
|
||||
amd::Program* program = as_amd(reinterpret_cast<cl_program>(module));
|
||||
|
||||
const amd::Symbol* symbol = program->findSymbol(deviceName);
|
||||
if (!symbol) return;
|
||||
|
||||
amd::Kernel* kernel = new amd::Kernel(*program, *symbol, deviceName);
|
||||
if (!kernel) return;
|
||||
|
||||
// FIXME: not thread safe
|
||||
g_functions.insert(std::make_pair(hostFunction, reinterpret_cast<hipFunction_t>(as_cl(kernel))));
|
||||
}
|
||||
|
||||
extern "C" void __cudaRegisterVar(
|
||||
hipModule_t module,
|
||||
char* hostVar,
|
||||
char* deviceVar,
|
||||
const char* deviceName,
|
||||
int ext,
|
||||
int size,
|
||||
int constant,
|
||||
int global)
|
||||
{
|
||||
}
|
||||
|
||||
extern "C" void __cudaUnregisterFatBinary(
|
||||
hipModule_t module
|
||||
)
|
||||
{
|
||||
}
|
||||
|
||||
dim3 g_gridDim; // FIXME: place in execution stack
|
||||
dim3 g_blockDim; // FIXME: place in execution stack
|
||||
size_t g_sharedMem; // FIXME: place in execution stack
|
||||
hipStream_t g_stream; // FIXME: place in execution stack
|
||||
|
||||
extern "C" hipError_t cudaConfigureCall(
|
||||
dim3 gridDim,
|
||||
dim3 blockDim,
|
||||
size_t sharedMem,
|
||||
hipStream_t stream)
|
||||
{
|
||||
// FIXME: should push and new entry on the execution stack
|
||||
|
||||
g_gridDim = gridDim;
|
||||
g_blockDim = blockDim;
|
||||
g_sharedMem = sharedMem;
|
||||
g_stream = stream;
|
||||
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
char* g_arguments[1024]; // FIXME: needs to grow
|
||||
|
||||
extern "C" hipError_t cudaSetupArgument(
|
||||
const void *arg,
|
||||
size_t size,
|
||||
size_t offset)
|
||||
{
|
||||
// FIXME: should modify the top of the execution stack
|
||||
|
||||
::memcpy(g_arguments + offset, arg, size);
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
extern "C" hipError_t cudaLaunch(const void *hostFunction)
|
||||
{
|
||||
std::map<const void*, hipFunction_t>::iterator it;
|
||||
if ((it = g_functions.find(hostFunction)) == g_functions.end())
|
||||
return hipErrorUnknown;
|
||||
|
||||
// FIXME: should pop an entry from the execution stack
|
||||
|
||||
void *extra[] = {
|
||||
HIP_LAUNCH_PARAM_BUFFER_POINTER, g_arguments,
|
||||
HIP_LAUNCH_PARAM_BUFFER_SIZE, 0 /* FIXME: not needed, but should be correct*/,
|
||||
HIP_LAUNCH_PARAM_END
|
||||
};
|
||||
|
||||
return hipModuleLaunchKernel(it->second,
|
||||
g_gridDim.x, g_gridDim.y, g_gridDim.z,
|
||||
g_blockDim.x, g_blockDim.y, g_blockDim.z,
|
||||
g_sharedMem, g_stream, nullptr, extra);
|
||||
}
|
||||
Reference in New Issue
Block a user