diff --git a/vdi/fixme.cpp b/vdi/fixme.cpp new file mode 100644 index 0000000000..95880cd630 --- /dev/null +++ b/vdi/fixme.cpp @@ -0,0 +1,34 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "vdi_common.hpp" +#include + +KHRicdVendorDispatch amd::ICDDispatchedObject::icdVendorDispatch_[] = {0}; +amd::PlatformIDS amd::PlatformID::Platform = {amd::ICDDispatchedObject::icdVendorDispatch_}; + +RUNTIME_ENTRY(cl_int, clGetDeviceIDs, + (cl_platform_id platform, cl_device_type device_type, cl_uint num_entries, + cl_device_id* devices, cl_uint* num_devices)) { + return CL_SUCCESS; +} +RUNTIME_EXIT diff --git a/vdi/hip_activity.cpp b/vdi/hip_activity.cpp new file mode 100644 index 0000000000..045abc7380 --- /dev/null +++ b/vdi/hip_activity.cpp @@ -0,0 +1,34 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "platform/activity.hpp" + +extern "C" void hipInitActivityCallback(void* id_callback, void* op_callback, void* arg) { + activity_prof::CallbacksTable::init(reinterpret_cast(id_callback), + reinterpret_cast(op_callback), + arg); +} + +extern "C" bool hipEnableActivityCallback(unsigned op, bool enable) { + return activity_prof::CallbacksTable::SetEnabled(op, enable); +} + +extern "C" const char* hipGetCmdName(unsigned op) { + return getOclCommandKindString(static_cast(op)); +} diff --git a/vdi/hip_context.cpp b/vdi/hip_context.cpp new file mode 100644 index 0000000000..c4a68777ab --- /dev/null +++ b/vdi/hip_context.cpp @@ -0,0 +1,382 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include "hip_internal.hpp" +#include "platform/runtime.hpp" +#include "utils/flags.hpp" +#include "utils/versions.hpp" + +std::vector g_devices; + +namespace hip { + +thread_local amd::Context* g_context = nullptr; +thread_local std::stack g_ctxtStack; +thread_local hipError_t g_lastError = hipSuccess; +std::once_flag g_ihipInitialized; +amd::Context* host_context = nullptr; + +std::map g_nullStreams; + +void init() { + if (!amd::Runtime::initialized()) { + amd::IS_HIP = true; + GPU_NUM_MEM_DEPENDENCY = 0; + amd::Runtime::init(); + } + + const std::vector& devices = amd::Device::getDevices(CL_DEVICE_TYPE_GPU, false); + + for (unsigned int i=0; i device(1, devices[i]); + amd::Context* context = new amd::Context(device, amd::Context::Info()); + if (!context) return; + + if (context && CL_SUCCESS != context->create(nullptr)) { + context->release(); + } else { + g_devices.push_back(context); + } + } + + host_context = new amd::Context(devices, amd::Context::Info()); + if (!host_context) return; + + if (host_context && CL_SUCCESS != host_context->create(nullptr)) { + host_context->release(); + } + + PlatformState::instance().init(); +} + +amd::Context* getCurrentContext() { + return g_context; +} + +void setCurrentContext(unsigned int index) { + assert(index(stream); + if ((s->flags & hipStreamNonBlocking) == 0) { + getNullStream()->finish(); + } + return s->asHostQueue(); + } +} + +amd::HostQueue* getNullStream(amd::Context& context) { + auto stream = g_nullStreams.find(&context); + if (stream == g_nullStreams.end()) { + amd::Device* device = context.devices()[0]; + cl_command_queue_properties properties = CL_QUEUE_PROFILING_ENABLE; + amd::HostQueue* queue = new amd::HostQueue(context, *device, properties, + amd::CommandQueue::RealTimeDisabled, + amd::CommandQueue::Priority::Normal); + g_nullStreams[&context] = queue; + return queue; + } + return stream->second; +} + +amd::HostQueue* getNullStream() { + amd::Context* context = getCurrentContext(); + return context ? getNullStream(*context) : nullptr; +} + +}; + +using namespace hip; + +hipError_t hipInit(unsigned int flags) { + HIP_INIT_API(hipInit, flags); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipCtxCreate(hipCtx_t *ctx, unsigned int flags, hipDevice_t device) { + HIP_INIT_API(hipCtxCreate, ctx, flags, device); + + if (static_cast(device) >= g_devices.size()) { + HIP_RETURN(hipErrorInvalidValue); + } + + *ctx = reinterpret_cast(g_devices[device]); + + // Increment ref count for device primary context + g_devices[device]->retain(); + g_ctxtStack.push(g_devices[device]); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipCtxSetCurrent(hipCtx_t ctx) { + HIP_INIT_API(hipCtxSetCurrent, ctx); + + if (ctx == nullptr) { + if(!g_ctxtStack.empty()) { + g_ctxtStack.pop(); + } + } else { + hip::g_context = reinterpret_cast(as_amd(ctx)); + if(!g_ctxtStack.empty()) { + g_ctxtStack.pop(); + } + g_ctxtStack.push(hip::getCurrentContext()); + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipCtxGetCurrent(hipCtx_t* ctx) { + HIP_INIT_API(hipCtxGetCurrent, ctx); + + *ctx = reinterpret_cast(hip::getCurrentContext()); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipCtxGetSharedMemConfig(hipSharedMemConfig* pConfig) { + HIP_INIT_API(hipCtxGetSharedMemConfig, pConfig); + + *pConfig = hipSharedMemBankSizeFourByte; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipRuntimeGetVersion(int *runtimeVersion) { + HIP_INIT_API(hipRuntimeGetVersion, runtimeVersion); + + if (!runtimeVersion) { + HIP_RETURN(hipErrorInvalidValue); + } + + *runtimeVersion = AMD_PLATFORM_BUILD_NUMBER; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipCtxDestroy(hipCtx_t ctx) { + HIP_INIT_API(hipCtxDestroy, ctx); + + amd::Context* amdContext = reinterpret_cast(as_amd(ctx)); + if (amdContext == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + // Release last tracked command + hip::getNullStream()->setLastQueuedCommand(nullptr); + + // Need to remove the ctx of calling thread if its the top one + if (!g_ctxtStack.empty() && g_ctxtStack.top() == amdContext) { + g_ctxtStack.pop(); + } + + // Remove context from global context list + for (unsigned int i = 0; i < g_devices.size(); i++) { + if (g_devices[i] == amdContext) { + // Decrement ref count for device primary context + amdContext->release(); + } + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipCtxPopCurrent(hipCtx_t* ctx) { + HIP_INIT_API(hipCtxPopCurrent, ctx); + + amd::Context* amdContext = reinterpret_cast(as_amd(ctx)); + if (amdContext == nullptr) { + HIP_RETURN(hipErrorInvalidContext); + } + + if (!g_ctxtStack.empty()) { + amdContext = g_ctxtStack.top(); + g_ctxtStack.pop(); + } else { + HIP_RETURN(hipErrorInvalidContext); + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipCtxPushCurrent(hipCtx_t ctx) { + HIP_INIT_API(hipCtxPushCurrent, ctx); + + amd::Context* amdContext = reinterpret_cast(as_amd(ctx)); + if (amdContext == nullptr) { + HIP_RETURN(hipErrorInvalidContext); + } + + hip::g_context = amdContext; + g_ctxtStack.push(hip::getCurrentContext()); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDriverGetVersion(int* driverVersion) { + HIP_INIT_API(hipDriverGetVersion, driverVersion); + + auto* deviceHandle = g_devices[0]->devices()[0]; + const auto& info = deviceHandle->info(); + + if (driverVersion) { + *driverVersion = AMD_PLATFORM_BUILD_NUMBER * 100 + + AMD_PLATFORM_REVISION_NUMBER; + } else { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipCtxGetDevice(hipDevice_t* device) { + HIP_INIT_API(hipCtxGetDevice, device); + + if (device != nullptr) { + for (unsigned int i = 0; i < g_devices.size(); i++) { + if (g_devices[i] == hip::getCurrentContext()) { + *device = static_cast(i); + HIP_RETURN(hipSuccess); + } + } + } else { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(hipErrorInvalidContext); +} + +hipError_t hipCtxGetApiVersion(hipCtx_t ctx, int* apiVersion) { + HIP_INIT_API(hipCtxGetApiVersion, apiVersion); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipCtxGetCacheConfig(hipFuncCache_t* cacheConfig) { + HIP_INIT_API(hipCtxGetCacheConfig, cacheConfig); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipCtxSetCacheConfig(hipFuncCache_t cacheConfig) { + HIP_INIT_API(hipCtxSetCacheConfig, cacheConfig); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipCtxSetSharedMemConfig(hipSharedMemConfig config) { + HIP_INIT_API(hipCtxSetSharedMemConfig, config); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipCtxSynchronize(void) { + HIP_INIT_API(hipCtxSynchronize, 1); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipCtxGetFlags(unsigned int* flags) { + HIP_INIT_API(hipCtxGetFlags, flags); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipDevicePrimaryCtxGetState(hipDevice_t dev, unsigned int* flags, int* active) { + HIP_INIT_API(hipDevicePrimaryCtxGetState, dev, flags, active); + + if (static_cast(dev) >= g_devices.size()) { + HIP_RETURN(hipErrorInvalidDevice); + } + + if (flags != nullptr) { + *flags = 0; + } + + if (active != nullptr) { + *active = (g_devices[dev] == hip::getCurrentContext())? 1 : 0; + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDevicePrimaryCtxRelease(hipDevice_t dev) { + HIP_INIT_API(hipDevicePrimaryCtxRelease, dev); + + if (static_cast(dev) >= g_devices.size()) { + HIP_RETURN(hipErrorInvalidDevice); + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDevicePrimaryCtxRetain(hipCtx_t* pctx, hipDevice_t dev) { + HIP_INIT_API(hipDevicePrimaryCtxRetain, pctx, dev); + + if (static_cast(dev) >= g_devices.size()) { + HIP_RETURN(hipErrorInvalidDevice); + } + if (pctx == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + *pctx = reinterpret_cast(g_devices[dev]); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDevicePrimaryCtxReset(hipDevice_t dev) { + HIP_INIT_API(hipDevicePrimaryCtxReset, dev); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDevicePrimaryCtxSetFlags(hipDevice_t dev, unsigned int flags) { + HIP_INIT_API(hipDevicePrimaryCtxSetFlags, dev, flags); + + if (static_cast(dev) >= g_devices.size()) { + HIP_RETURN(hipErrorInvalidDevice); + } else { + HIP_RETURN(hipErrorContextAlreadyInUse); + } +} diff --git a/vdi/hip_device.cpp b/vdi/hip_device.cpp new file mode 100644 index 0000000000..16d14b2b12 --- /dev/null +++ b/vdi/hip_device.cpp @@ -0,0 +1,234 @@ +/* +Copyright (c) 2018 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "hip_internal.hpp" + +hipError_t hipDeviceGet(hipDevice_t *device, int deviceId) { + HIP_INIT_API(hipDeviceGet, device, deviceId); + + if (device != nullptr) { + *device = deviceId; + } else { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(hipSuccess); +}; + +hipError_t hipFuncSetCacheConfig (const void* func, hipFuncCache_t cacheConfig) { + + HIP_INIT_API(hipFuncSetCacheConfig, cacheConfig); + + // No way to set cache config yet. + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceTotalMem (size_t *bytes, hipDevice_t device) { + + HIP_INIT_API(hipDeviceTotalMem, bytes, device); + + if (device < 0 || static_cast(device) >= g_devices.size()) { + HIP_RETURN(hipErrorInvalidDevice); + } + + if (bytes == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + auto* deviceHandle = g_devices[device]->devices()[0]; + const auto& info = deviceHandle->info(); + + *bytes = info.globalMemSize_; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceComputeCapability(int *major, int *minor, hipDevice_t device) { + + HIP_INIT_API(hipDeviceComputeCapability, major, minor, device); + + if (device < 0 || static_cast(device) >= g_devices.size()) { + HIP_RETURN(hipErrorInvalidDevice); + } + + if (major == nullptr || minor == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + auto* deviceHandle = g_devices[device]->devices()[0]; + const auto& info = deviceHandle->info(); + *major = info.gfxipVersion_ / 100; + *minor = info.gfxipVersion_ % 100; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceGetCount(int* count) { + HIP_INIT_API(NONE, count); + + HIP_RETURN(ihipDeviceGetCount(count)); +} + +hipError_t ihipDeviceGetCount(int* count) { + if (count == nullptr) { + return hipErrorInvalidValue; + } + + // Get all available devices + *count = g_devices.size(); + + if (*count < 1) { + return hipErrorNoDevice; + } + + return hipSuccess; +} + +hipError_t hipDeviceGetName(char *name, int len, hipDevice_t device) { + + HIP_INIT_API(hipDeviceGetName, (void*)name, len, device); + + if (device < 0 || static_cast(device) >= g_devices.size()) { + HIP_RETURN(hipErrorInvalidDevice); + } + + if (name == nullptr || len <= 0) { + HIP_RETURN(hipErrorInvalidValue); + } + + auto* deviceHandle = g_devices[device]->devices()[0]; + const auto& info = deviceHandle->info(); + const auto nameLen = ::strlen(info.boardName_); + + // Make sure that the size of `dest` is big enough to hold `src` including + // trailing zero byte + if (nameLen > (cl_uint)(len - 1)) { + HIP_RETURN(hipErrorInvalidValue); + } + + ::strncpy(name, info.boardName_, (nameLen + 1)); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipGetDeviceProperties ( hipDeviceProp_t* props, hipDevice_t device ) { + HIP_INIT_API(hipGetDeviceProperties, props, device); + + if (props == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + if (unsigned(device) >= g_devices.size()) { + HIP_RETURN(hipErrorInvalidDevice); + } + auto* deviceHandle = g_devices[device]->devices()[0]; + + hipDeviceProp_t deviceProps = {0}; + + const auto& info = deviceHandle->info(); + ::strncpy(deviceProps.name, info.boardName_, 128); + deviceProps.totalGlobalMem = info.globalMemSize_; + deviceProps.sharedMemPerBlock = info.localMemSizePerCU_; + deviceProps.regsPerBlock = info.availableSGPRs_; + deviceProps.warpSize = info.wavefrontWidth_; + deviceProps.maxThreadsPerBlock = info.maxWorkGroupSize_; + deviceProps.maxThreadsDim[0] = info.maxWorkItemSizes_[0]; + deviceProps.maxThreadsDim[1] = info.maxWorkItemSizes_[1]; + deviceProps.maxThreadsDim[2] = info.maxWorkItemSizes_[2]; + deviceProps.maxGridSize[0] = INT32_MAX; + deviceProps.maxGridSize[1] = INT32_MAX; + deviceProps.maxGridSize[2] = INT32_MAX; + deviceProps.clockRate = info.maxEngineClockFrequency_ * 1000; + deviceProps.memoryClockRate = info.maxMemoryClockFrequency_ * 1000; + deviceProps.memoryBusWidth = info.globalMemChannels_ * 32; + deviceProps.totalConstMem = info.maxConstantBufferSize_; + deviceProps.major = info.gfxipVersion_ / 100; + deviceProps.minor = info.gfxipVersion_ % 100; + deviceProps.multiProcessorCount = info.maxComputeUnits_; + deviceProps.l2CacheSize = info.l2CacheSize_; + deviceProps.maxThreadsPerMultiProcessor = info.maxThreadsPerCU_; + deviceProps.computeMode = 0; + deviceProps.clockInstructionRate = info.timeStampFrequency_; + deviceProps.arch.hasGlobalInt32Atomics = 1; + deviceProps.arch.hasGlobalFloatAtomicExch = 1; + deviceProps.arch.hasSharedInt32Atomics = 1; + deviceProps.arch.hasSharedFloatAtomicExch = 1; + deviceProps.arch.hasFloatAtomicAdd = 0; + deviceProps.arch.hasGlobalInt64Atomics = 1; + deviceProps.arch.hasSharedInt64Atomics = 1; + deviceProps.arch.hasDoubles = 1; + deviceProps.arch.hasWarpVote = 0; + deviceProps.arch.hasWarpBallot = 0; + deviceProps.arch.hasWarpShuffle = 0; + deviceProps.arch.hasFunnelShift = 0; + deviceProps.arch.hasThreadFenceSystem = 1; + deviceProps.arch.hasSyncThreadsExt = 0; + deviceProps.arch.hasSurfaceFuncs = 0; + deviceProps.arch.has3dGrid = 1; + deviceProps.arch.hasDynamicParallelism = 0; + deviceProps.concurrentKernels = 1; + deviceProps.pciDomainID = info.deviceTopology_.pcie.function; + deviceProps.pciBusID = info.deviceTopology_.pcie.bus; + deviceProps.pciDeviceID = info.deviceTopology_.pcie.device; + deviceProps.maxSharedMemoryPerMultiProcessor = info.localMemSizePerCU_; + //deviceProps.isMultiGpuBoard = info.; + deviceProps.canMapHostMemory = 1; + deviceProps.gcnArch = info.gfxipVersion_; + deviceProps.cooperativeLaunch = info.cooperativeGroups_; + deviceProps.cooperativeMultiDeviceLaunch = info.cooperativeMultiDeviceGroups_; + + deviceProps.maxTexture1D = info.imageMaxBufferSize_; + deviceProps.maxTexture2D[0] = info.image2DMaxWidth_; + deviceProps.maxTexture2D[1] = info.image2DMaxHeight_; + deviceProps.maxTexture3D[0] = info.image3DMaxWidth_; + deviceProps.maxTexture3D[1] = info.image3DMaxHeight_; + deviceProps.maxTexture3D[2] = info.image3DMaxDepth_; + deviceProps.hdpMemFlushCntl = nullptr; + deviceProps.hdpRegFlushCntl = nullptr; + + deviceProps.memPitch = info.maxMemAllocSize_; + deviceProps.textureAlignment = std::max(info.imageBaseAddressAlignment_, info.imagePitchAlignment_); + deviceProps.kernelExecTimeoutEnabled = 0; + deviceProps.ECCEnabled = info.errorCorrectionSupport_? 1:0; + + *props = deviceProps; + HIP_RETURN(hipSuccess); +} + +hipError_t hipHccGetAccelerator(int deviceId, hc::accelerator* acc) { + HIP_INIT_API(NONE, deviceId, acc); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipHccGetAcceleratorView(hipStream_t stream, hc::accelerator_view** av) { + HIP_INIT_API(NONE, stream, av); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} diff --git a/vdi/hip_device_runtime.cpp b/vdi/hip_device_runtime.cpp new file mode 100644 index 0000000000..1abefdf609 --- /dev/null +++ b/vdi/hip_device_runtime.cpp @@ -0,0 +1,543 @@ +/* +Copyright (c) 2018 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "hip_internal.hpp" + +hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* properties) { + + HIP_INIT_API(hipChooseDevice, device, properties); + + if (device == nullptr || properties == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + *device = 0; + cl_uint maxMatchedCount = 0; + int count = 0; + ihipDeviceGetCount(&count); + + for (cl_int i = 0; i< count; ++i) { + hipDeviceProp_t currentProp = {0}; + cl_uint validPropCount = 0; + cl_uint matchedCount = 0; + hipError_t err = hipGetDeviceProperties(¤tProp, i); + if (properties->major != 0) { + validPropCount++; + if(currentProp.major >= properties->major) { + matchedCount++; + } + } + if (properties->minor != 0) { + validPropCount++; + if(currentProp.minor >= properties->minor) { + matchedCount++; + } + } + if(properties->totalGlobalMem != 0) { + validPropCount++; + if(currentProp.totalGlobalMem >= properties->totalGlobalMem) { + matchedCount++; + } + } + if(properties->sharedMemPerBlock != 0) { + validPropCount++; + if(currentProp.sharedMemPerBlock >= properties->sharedMemPerBlock) { + matchedCount++; + } + } + if(properties->maxThreadsPerBlock != 0) { + validPropCount++; + if(currentProp.maxThreadsPerBlock >= properties->maxThreadsPerBlock ) { + matchedCount++; + } + } + if(properties->totalConstMem != 0) { + validPropCount++; + if(currentProp.totalConstMem >= properties->totalConstMem ) { + matchedCount++; + } + } + if(properties->multiProcessorCount != 0) { + validPropCount++; + if(currentProp.multiProcessorCount >= + properties->multiProcessorCount ) { + matchedCount++; + } + } + if(properties->maxThreadsPerMultiProcessor != 0) { + validPropCount++; + if(currentProp.maxThreadsPerMultiProcessor >= + properties->maxThreadsPerMultiProcessor ) { + matchedCount++; + } + } + if(properties->memoryClockRate != 0) { + validPropCount++; + if(currentProp.memoryClockRate >= properties->memoryClockRate ) { + matchedCount++; + } + } + if(properties->memoryBusWidth != 0) { + validPropCount++; + if(currentProp.memoryBusWidth >= properties->memoryBusWidth ) { + matchedCount++; + } + } + if(properties->l2CacheSize != 0) { + validPropCount++; + if(currentProp.l2CacheSize >= properties->l2CacheSize ) { + matchedCount++; + } + } + if(properties->regsPerBlock != 0) { + validPropCount++; + if(currentProp.regsPerBlock >= properties->regsPerBlock ) { + matchedCount++; + } + } + if(properties->maxSharedMemoryPerMultiProcessor != 0) { + validPropCount++; + if(currentProp.maxSharedMemoryPerMultiProcessor >= + properties->maxSharedMemoryPerMultiProcessor ) { + matchedCount++; + } + } + if(properties->warpSize != 0) { + validPropCount++; + if(currentProp.warpSize >= properties->warpSize ) { + matchedCount++; + } + } + if(validPropCount == matchedCount) { + *device = matchedCount > maxMatchedCount ? i : *device; + maxMatchedCount = std::max(matchedCount, maxMatchedCount); + } + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) { + + HIP_INIT_API(hipDeviceGetAttribute, pi, attr, device); + + if (pi == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + int count = 0; + ihipDeviceGetCount(&count); + if (device < 0 || device >= count) { + HIP_RETURN(hipErrorInvalidDevice); + } + + //FIXME: should we cache the props, or just select from deviceHandle->info_? + hipDeviceProp_t prop = {0}; + hipError_t err = hipGetDeviceProperties(&prop, device); + if (err != hipSuccess) { + HIP_RETURN(err); + } + + switch (attr) { + case hipDeviceAttributeMaxThreadsPerBlock: + *pi = prop.maxThreadsPerBlock; + break; + case hipDeviceAttributeMaxBlockDimX: + *pi = prop.maxThreadsDim[0]; + break; + case hipDeviceAttributeMaxBlockDimY: + *pi = prop.maxThreadsDim[1]; + break; + case hipDeviceAttributeMaxBlockDimZ: + *pi = prop.maxThreadsDim[2]; + break; + case hipDeviceAttributeMaxGridDimX: + *pi = prop.maxGridSize[0]; + break; + case hipDeviceAttributeMaxGridDimY: + *pi = prop.maxGridSize[1]; + break; + case hipDeviceAttributeMaxGridDimZ: + *pi = prop.maxGridSize[2]; + break; + case hipDeviceAttributeMaxSharedMemoryPerBlock: + *pi = prop.sharedMemPerBlock; + break; + case hipDeviceAttributeTotalConstantMemory: + *pi = prop.totalConstMem; + break; + case hipDeviceAttributeWarpSize: + *pi = prop.warpSize; + break; + case hipDeviceAttributeMaxRegistersPerBlock: + *pi = prop.regsPerBlock; + break; + case hipDeviceAttributeClockRate: + *pi = prop.clockRate; + break; + case hipDeviceAttributeMemoryClockRate: + *pi = prop.memoryClockRate; + break; + case hipDeviceAttributeMemoryBusWidth: + *pi = prop.memoryBusWidth; + break; + case hipDeviceAttributeMultiprocessorCount: + *pi = prop.multiProcessorCount; + break; + case hipDeviceAttributeComputeMode: + *pi = prop.computeMode; + break; + case hipDeviceAttributeL2CacheSize: + *pi = prop.l2CacheSize; + break; + case hipDeviceAttributeMaxThreadsPerMultiProcessor: + *pi = prop.maxThreadsPerMultiProcessor; + break; + case hipDeviceAttributeComputeCapabilityMajor: + *pi = prop.major; + break; + case hipDeviceAttributeComputeCapabilityMinor: + *pi = prop.minor; + break; + case hipDeviceAttributePciBusId: + *pi = prop.pciBusID; + break; + case hipDeviceAttributeConcurrentKernels: + *pi = prop.concurrentKernels; + break; + case hipDeviceAttributePciDeviceId: + *pi = prop.pciDeviceID; + break; + case hipDeviceAttributeMaxSharedMemoryPerMultiprocessor: + *pi = prop.maxSharedMemoryPerMultiProcessor; + break; + case hipDeviceAttributeIsMultiGpuBoard: + *pi = prop.isMultiGpuBoard; + break; + case hipDeviceAttributeCooperativeLaunch: + *pi = prop.cooperativeLaunch; + break; + case hipDeviceAttributeCooperativeMultiDeviceLaunch: + *pi = prop.cooperativeMultiDeviceLaunch; + break; + case hipDeviceAttributeMaxTexture1DWidth: + *pi = prop.maxTexture1D; + break; + case hipDeviceAttributeMaxTexture2DWidth: + *pi = prop.maxTexture2D[0]; + break; + case hipDeviceAttributeMaxTexture2DHeight: + *pi = prop.maxTexture2D[1]; + break; + case hipDeviceAttributeMaxTexture3DWidth: + *pi = prop.maxTexture3D[0]; + break; + case hipDeviceAttributeMaxTexture3DHeight: + *pi = prop.maxTexture3D[1]; + break; + case hipDeviceAttributeMaxTexture3DDepth: + *pi = prop.maxTexture3D[2]; + break; + case hipDeviceAttributeHdpMemFlushCntl: + *reinterpret_cast(pi) = prop.hdpMemFlushCntl; + break; + case hipDeviceAttributeHdpRegFlushCntl: + *reinterpret_cast(pi) = prop.hdpRegFlushCntl; + break; + case hipDeviceAttributeMaxPitch: + *pi = prop.memPitch; + break; + case hipDeviceAttributeTextureAlignment: + *pi = prop.textureAlignment; + break; + case hipDeviceAttributeKernelExecTimeout: + *pi = prop.kernelExecTimeoutEnabled; + break; + case hipDeviceAttributeCanMapHostMemory: + *pi = prop.canMapHostMemory; + break; + case hipDeviceAttributeEccEnabled: + *pi = prop.ECCEnabled; + break; + default: + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceGetByPCIBusId(int* device, const char*pciBusIdstr) { + + HIP_INIT_API(hipDeviceGetByPCIBusId, device, pciBusIdstr); + + if (device == nullptr || pciBusIdstr == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + int pciBusID = -1; + int pciDeviceID = -1; + int pciDomainID = -1; + + if (sscanf (pciBusIdstr, "%04x:%02x:%02x", &pciDomainID, &pciBusID, &pciDeviceID) == 0x3) { + int count = 0; + ihipDeviceGetCount(&count); + for (cl_int i = 0; i < count; i++) { + int pi = 0; + hipDevice_t dev; + hipDeviceGet(&dev, i); + hipDeviceGetAttribute(&pi, hipDeviceAttributePciBusId, dev); + + if (pciBusID == pi) { + *device = i; + break; + } + } + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceGetCacheConfig ( hipFuncCache_t * cacheConfig ) { + HIP_INIT_API(hipDeviceGetCacheConfig, cacheConfig); + + if(cacheConfig == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + *cacheConfig = hipFuncCache_t(); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceGetLimit ( size_t* pValue, hipLimit_t limit ) { + + HIP_INIT_API(hipDeviceGetLimit, pValue, limit); + + if(pValue == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + if(limit == hipLimitMallocHeapSize) { + hipDeviceProp_t prop; + hipGetDeviceProperties(&prop, ihipGetDevice()); + + *pValue = prop.totalGlobalMem; + HIP_RETURN(hipSuccess); + } else { + HIP_RETURN(hipErrorUnsupportedLimit); + } +} + +/** +hipError_t hipDeviceGetP2PAttribute ( int* value, hipDeviceP2PAttr attr, int srcDevice, int dstDevice ) { + assert(0); + HIP_RETURN(hipSuccess); +} +**/ + +hipError_t hipDeviceGetPCIBusId ( char* pciBusId, int len, int device ) { + + HIP_INIT_API(hipDeviceGetPCIBusId, (void*)pciBusId, len, device); + + int count; + ihipDeviceGetCount(&count); + if (device < 0 || device > count) { + HIP_RETURN(hipErrorInvalidDevice); + } + + if (pciBusId == nullptr || len < 0) { + HIP_RETURN(hipErrorInvalidValue); + } + + hipDeviceProp_t prop; + hipGetDeviceProperties(&prop, device); + + snprintf (pciBusId, len, "%04x:%02x:%02x.0", + prop.pciDomainID, + prop.pciBusID, + prop.pciDeviceID); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceGetSharedMemConfig ( hipSharedMemConfig * pConfig ) { + HIP_INIT_API(hipDeviceGetSharedMemConfig, pConfig); + + *pConfig = hipSharedMemBankSizeFourByte; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceReset ( void ) { + HIP_INIT_API(hipDeviceReset); + + /* FIXME */ + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceSetCacheConfig ( hipFuncCache_t cacheConfig ) { + HIP_INIT_API(hipDeviceSetCacheConfig, cacheConfig); + + // No way to set cache config yet. + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceSetLimit ( hipLimit_t limit, size_t value ) { + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config ) { + HIP_INIT_API(hipDeviceSetSharedMemConfig, config); + + // No way to set cache config yet. + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceSynchronize ( void ) { + HIP_INIT_API(hipDeviceSynchronize); + + hip::syncStreams(); + + amd::HostQueue* queue = hip::getNullStream(); + + if (!queue) { + HIP_RETURN(hipErrorOutOfMemory); + } + + queue->finish(); + HIP_RETURN(hipSuccess); +} + +int ihipGetDevice() { + for (unsigned int i = 0; i < g_devices.size(); i++) { + if (g_devices[i] == hip::getCurrentContext()) { + return i; + } + } + return -1; +} + +hipError_t hipGetDevice ( int* deviceId ) { + HIP_INIT_API(hipGetDevice, deviceId); + + if (deviceId != nullptr) { + int dev = ihipGetDevice(); + if (dev == -1) { + HIP_RETURN(hipErrorNoDevice); + } + *deviceId = dev; + HIP_RETURN(hipSuccess); + } else { + HIP_RETURN(hipErrorInvalidValue); + } +} + +hipError_t hipGetDeviceCount ( int* count ) { + HIP_INIT_API(hipGetDeviceCount, count); + + HIP_RETURN(ihipDeviceGetCount(count)); +} + +hipError_t hipGetDeviceFlags ( unsigned int* flags ) { + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipIpcGetEventHandle ( hipIpcEventHandle_t* handle, hipEvent_t event ) { + HIP_INIT_API(NONE, handle, event); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipIpcOpenEventHandle ( hipEvent_t* event, hipIpcEventHandle_t handle ) { + HIP_INIT_API(NONE, event, handle); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipSetDevice ( int device ) { + HIP_INIT_API(hipSetDevice, device); + + if (static_cast(device) < g_devices.size()) { + hip::setCurrentContext(device); + + HIP_RETURN(hipSuccess); + } + HIP_RETURN(hipErrorInvalidDevice); +} + +hipError_t hipSetDeviceFlags ( unsigned int flags ) { + HIP_INIT_API(hipSetDeviceFlags, flags); + + /* FIXME */ + /* Not all of Ctx may be implemented */ + + unsigned supportedFlags = + hipDeviceScheduleMask | hipDeviceMapHost | hipDeviceLmemResizeToMax; + + if (flags & (~supportedFlags)) { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipSetValidDevices ( int* device_arr, int len ) { + HIP_INIT_API(NONE, device_arr, len); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipExtGetLinkTypeAndHopCount(int device1, int device2, uint32_t* linktype, uint32_t* hopcount) { + HIP_INIT_API(hipExtGetLinkTypeAndHopCount, device1, device2, linktype, hopcount); + + amd::Device* amd_dev_obj1 = nullptr; + amd::Device* amd_dev_obj2 = nullptr; + const int numDevices = static_cast(g_devices.size()); + + if ((device1 < 0) || (device1 >= numDevices) || (device2 < 0) || (device2 >= numDevices)) { + HIP_RETURN(hipErrorInvalidDevice); + } + + if ((linktype == nullptr) || (hopcount == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + amd_dev_obj1 = g_devices[device1]->devices()[0]; + amd_dev_obj2 = g_devices[device2]->devices()[0]; + + if (!amd_dev_obj1->findLinkTypeAndHopCount(amd_dev_obj2, linktype, hopcount)) { + HIP_RETURN(hipErrorInvalidHandle); + } + + HIP_RETURN(hipSuccess); +} + diff --git a/vdi/hip_error.cpp b/vdi/hip_error.cpp new file mode 100644 index 0000000000..6c0a01e215 --- /dev/null +++ b/vdi/hip_error.cpp @@ -0,0 +1,166 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "hip_internal.hpp" + +hipError_t hipGetLastError() +{ + HIP_INIT_API(hipGetLastError); + hipError_t err = hip::g_lastError; + hip::g_lastError = hipSuccess; + return err; +} + +hipError_t hipPeekAtLastError() +{ + HIP_INIT_API(hipPeekAtLastError); + hipError_t err = hip::g_lastError; + HIP_RETURN(err); +} + +const char *hipGetErrorName(hipError_t hip_error) +{ + switch (hip_error) { + case hipSuccess: + return "hipSuccess"; + case hipErrorOutOfMemory: + return "hipErrorOutOfMemory"; + case hipErrorNotInitialized: + return "hipErrorNotInitialized"; + case hipErrorDeinitialized: + return "hipErrorDeinitialized"; + case hipErrorProfilerDisabled: + return "hipErrorProfilerDisabled"; + case hipErrorProfilerNotInitialized: + return "hipErrorProfilerNotInitialized"; + case hipErrorProfilerAlreadyStarted: + return "hipErrorProfilerAlreadyStarted"; + case hipErrorProfilerAlreadyStopped: + return "hipErrorProfilerAlreadyStopped"; + case hipErrorInvalidImage: + return "hipErrorInvalidImage"; + case hipErrorInvalidContext: + return "hipErrorInvalidContext"; + case hipErrorContextAlreadyCurrent: + return "hipErrorContextAlreadyCurrent"; + case hipErrorMapFailed: + return "hipErrorMapFailed"; + case hipErrorUnmapFailed: + return "hipErrorUnmapFailed"; + case hipErrorArrayIsMapped: + return "hipErrorArrayIsMapped"; + case hipErrorAlreadyMapped: + return "hipErrorAlreadyMapped"; + case hipErrorNoBinaryForGpu: + return "hipErrorNoBinaryForGpu"; + case hipErrorAlreadyAcquired: + return "hipErrorAlreadyAcquired"; + case hipErrorNotMapped: + return "hipErrorNotMapped"; + case hipErrorNotMappedAsArray: + return "hipErrorNotMappedAsArray"; + case hipErrorNotMappedAsPointer: + return "hipErrorNotMappedAsPointer"; + case hipErrorECCNotCorrectable: + return "hipErrorECCNotCorrectable"; + case hipErrorUnsupportedLimit: + return "hipErrorUnsupportedLimit"; + case hipErrorContextAlreadyInUse: + return "hipErrorContextAlreadyInUse"; + case hipErrorPeerAccessUnsupported: + return "hipErrorPeerAccessUnsupported"; + case hipErrorInvalidKernelFile: + return "hipErrorInvalidKernelFile"; + case hipErrorInvalidGraphicsContext: + return "hipErrorInvalidGraphicsContext"; + case hipErrorInvalidSource: + return "hipErrorInvalidSource"; + case hipErrorFileNotFound: + return "hipErrorFileNotFound"; + case hipErrorSharedObjectSymbolNotFound: + return "hipErrorSharedObjectSymbolNotFound"; + case hipErrorSharedObjectInitFailed: + return "hipErrorSharedObjectInitFailed"; + case hipErrorOperatingSystem: + return "hipErrorOperatingSystem"; + case hipErrorSetOnActiveProcess: + return "hipErrorSetOnActiveProcess"; + case hipErrorInvalidHandle: + return "hipErrorInvalidHandle"; + case hipErrorNotFound: + return "hipErrorNotFound"; + case hipErrorIllegalAddress: + return "hipErrorIllegalAddress"; + case hipErrorMissingConfiguration: + return "hipErrorMissingConfiguration"; + case hipErrorLaunchFailure: + return "hipErrorLaunchFailure"; + case hipErrorPriorLaunchFailure: + return "hipErrorPriorLaunchFailure"; + case hipErrorLaunchTimeOut: + return "hipErrorLaunchTimeOut"; + case hipErrorLaunchOutOfResources: + return "hipErrorLaunchOutOfResources"; + case hipErrorInvalidDeviceFunction: + return "hipErrorInvalidDeviceFunction"; + case hipErrorInvalidConfiguration: + return "hipErrorInvalidConfiguration"; + case hipErrorInvalidDevice: + return "hipErrorInvalidDevice"; + case hipErrorInvalidValue: + return "hipErrorInvalidValue"; + case hipErrorInvalidDevicePointer: + return "hipErrorInvalidDevicePointer"; + case hipErrorInvalidMemcpyDirection: + return "hipErrorInvalidMemcpyDirection"; + case hipErrorUnknown: + return "hipErrorUnknown"; + case hipErrorNotReady: + return "hipErrorNotReady"; + case hipErrorNoDevice: + return "hipErrorNoDevice"; + case hipErrorPeerAccessAlreadyEnabled: + return "hipErrorPeerAccessAlreadyEnabled"; + case hipErrorPeerAccessNotEnabled: + return "hipErrorPeerAccessNotEnabled"; + case hipErrorRuntimeMemory: + return "hipErrorRuntimeMemory"; + case hipErrorRuntimeOther: + return "hipErrorRuntimeOther"; + case hipErrorHostMemoryAlreadyRegistered: + return "hipErrorHostMemoryAlreadyRegistered"; + case hipErrorHostMemoryNotRegistered: + return "hipErrorHostMemoryNotRegistered"; + case hipErrorTbd: + return "hipErrorTbd"; + default: + return "hipErrorUnknown"; + }; +} + +const char *hipGetErrorString(hipError_t hip_error) +{ + return hipGetErrorName(hip_error); +} + diff --git a/vdi/hip_event.cpp b/vdi/hip_event.cpp new file mode 100644 index 0000000000..1de32d8fff --- /dev/null +++ b/vdi/hip_event.cpp @@ -0,0 +1,264 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "hip_event.hpp" + +namespace hip { + +bool Event::ready() { + event_->notifyCmdQueue(); + + return (event_->status() == CL_COMPLETE); +} + +hipError_t Event::query() { + amd::ScopedLock lock(lock_); + + if (event_ == nullptr) { + return hipErrorInvalidHandle; + } + + return ready() ? hipSuccess : hipErrorNotReady; +} + +hipError_t Event::synchronize() { + amd::ScopedLock lock(lock_); + + if (event_ == nullptr) { + return hipErrorInvalidHandle; + } + + event_->awaitCompletion(); + + return hipSuccess; +} + +hipError_t Event::elapsedTime(Event& eStop, float& ms) { + amd::ScopedLock startLock(lock_); + + if (this == &eStop) { + if (event_ == nullptr) { + return hipErrorInvalidHandle; + } + + if (flags & hipEventDisableTiming) { + return hipErrorInvalidHandle; + } + + if (!ready()) { + return hipErrorNotReady; + } + + ms = 0.f; + return hipSuccess; + } + amd::ScopedLock stopLock(eStop.lock_); + + if (event_ == nullptr || + eStop.event_ == nullptr) { + return hipErrorInvalidHandle; + } + + if ((flags | eStop.flags) & hipEventDisableTiming) { + return hipErrorInvalidHandle; + } + + if (!ready() || !eStop.ready()) { + return hipErrorNotReady; + } + + if (event_ != eStop.event_) { + ms = static_cast(static_cast(eStop.event_->profilingInfo().end_ - + event_->profilingInfo().start_))/1000000.f; + } else { + ms = 0.f; + } + + return hipSuccess; +} + +hipError_t Event::streamWait(amd::HostQueue* hostQueue, uint flags) { + if (stream_ == hostQueue) return hipSuccess; + + amd::ScopedLock lock(lock_); + bool retain = false; + + if (event_ == nullptr) { + event_ = stream_->getLastQueuedCommand(true); + retain = true; + } + + if (!event_->notifyCmdQueue()) { + return hipErrorLaunchOutOfResources; + } + amd::Command::EventWaitList eventWaitList; + eventWaitList.push_back(event_); + + amd::Command* command = new amd::Marker(*hostQueue, false, eventWaitList); + if (command == NULL) { + return hipErrorOutOfMemory; + } + command->enqueue(); + command->release(); + + if (retain) { + event_->release(); + event_ = nullptr; + } + + return hipSuccess; +} + +void Event::addMarker(amd::HostQueue* queue, amd::Command* command) { + amd::ScopedLock lock(lock_); + + stream_ = queue; + + if (event_ != nullptr) { + event_->release(); + } + + event_ = &command->event(); +} + +} + +hipError_t ihipEventCreateWithFlags(hipEvent_t* event, unsigned flags) { + if (event == nullptr) { + return hipErrorInvalidValue; + } + + unsigned supportedFlags = hipEventDefault | hipEventBlockingSync | hipEventDisableTiming | + hipEventReleaseToDevice | hipEventReleaseToSystem; + const unsigned releaseFlags = (hipEventReleaseToDevice | hipEventReleaseToSystem); + + const bool illegalFlags = + (flags & ~supportedFlags) || // can't set any unsupported flags. + (flags & releaseFlags) == releaseFlags; // can't set both release flags + + if (!illegalFlags) { + hip::Event* e = new hip::Event(flags); + + if (e == nullptr) { + return hipErrorOutOfMemory; + } + + *event = reinterpret_cast(e); + } else { + return hipErrorInvalidValue; + } + return hipSuccess; +} + +hipError_t ihipEventQuery(hipEvent_t event) { + if (event == nullptr) { + return hipErrorInvalidHandle; + } + + hip::Event* e = reinterpret_cast(event); + + return e->query(); +} + +hipError_t hipEventCreateWithFlags(hipEvent_t* event, unsigned flags) { + HIP_INIT_API(hipEventCreateWithFlags, event, flags); + + HIP_RETURN(ihipEventCreateWithFlags(event, flags)); +} + +hipError_t hipEventCreate(hipEvent_t* event) { + HIP_INIT_API(hipEventCreate, event); + + HIP_RETURN(ihipEventCreateWithFlags(event, 0)); +} + +hipError_t hipEventDestroy(hipEvent_t event) { + HIP_INIT_API(hipEventDestroy, event); + + if (event == nullptr) { + HIP_RETURN(hipErrorInvalidHandle); + } + + delete reinterpret_cast(event); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop) { + HIP_INIT_API(hipEventElapsedTime, ms, start, stop); + + if (start == nullptr || stop == nullptr) { + HIP_RETURN(hipErrorInvalidHandle); + } + + if (ms == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + hip::Event* eStart = reinterpret_cast(start); + hip::Event* eStop = reinterpret_cast(stop); + + HIP_RETURN(eStart->elapsedTime(*eStop, *ms)); +} + +hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) { + HIP_INIT_API(hipEventRecord, event, stream); + + if (event == nullptr) { + HIP_RETURN(hipErrorInvalidHandle); + } + + hip::Event* e = reinterpret_cast(event); + + amd::HostQueue* queue = hip::getQueue(stream); + + amd::Command* command = queue->getLastQueuedCommand(true); + + if (command == nullptr) { + command = new amd::Marker(*queue, false); + command->enqueue(); + } + + e->addMarker(queue, command); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipEventSynchronize(hipEvent_t event) { + HIP_INIT_API(hipEventSynchronize, event); + + if (event == nullptr) { + HIP_RETURN(hipErrorInvalidHandle); + } + + hip::Event* e = reinterpret_cast(event); + + HIP_RETURN(e->synchronize()); +} + +hipError_t hipEventQuery(hipEvent_t event) { + HIP_INIT_API(hipEventQuery, event); + + HIP_RETURN(ihipEventQuery(event)); +} diff --git a/vdi/hip_event.hpp b/vdi/hip_event.hpp new file mode 100644 index 0000000000..ddf7a9c06b --- /dev/null +++ b/vdi/hip_event.hpp @@ -0,0 +1,67 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_EVENT_H +#define HIP_EVENT_H + +#include "hip_internal.hpp" +#include "thread/monitor.hpp" + +namespace hip { + +class TimerMarker: public amd::Marker { +public: + TimerMarker(amd::HostQueue& queue) : amd::Marker(queue, false) { + profilingInfo_.enabled_ = true; + profilingInfo_.callback_ = nullptr; + profilingInfo_.start_ = profilingInfo_.end_ = 0; + } +}; + +class Event { +public: + Event(unsigned int flags) : flags(flags), lock_("hipEvent_t"), stream_(getNullStream()), event_(nullptr) {} + ~Event() { + if (event_ != nullptr) { + event_->release(); + } + } + unsigned int flags; + + hipError_t query(); + hipError_t synchronize(); + hipError_t elapsedTime(Event& stop, float& ms); + hipError_t streamWait(amd::HostQueue* queue, uint flags); + + void addMarker(amd::HostQueue* queue, amd::Command* command); + +private: + amd::Monitor lock_; + amd::HostQueue* stream_; + amd::Event* event_; + + bool ready(); +}; + +}; + +#endif // HIP_EVEMT_H diff --git a/vdi/hip_hcc.def.in b/vdi/hip_hcc.def.in new file mode 100644 index 0000000000..54492eb48e --- /dev/null +++ b/vdi/hip_hcc.def.in @@ -0,0 +1,219 @@ +EXPORTS +hipChooseDevice +hipCtxCreate +hipCtxDestroy +hipCtxDisablePeerAccess +hipCtxEnablePeerAccess +hipCtxGetApiVersion +hipCtxGetCacheConfig +hipCtxGetCurrent +hipCtxGetDevice +hipCtxGetFlags +hipCtxGetSharedMemConfig +hipCtxPopCurrent +hipCtxPushCurrent +hipCtxSetCacheConfig +hipCtxSetCurrent +hipCtxSetSharedMemConfig +hipCtxSynchronize +hipDeviceCanAccessPeer +hipDeviceComputeCapability +hipDeviceDisablePeerAccess +hipDeviceEnablePeerAccess +hipDeviceGet +hipDeviceGetAttribute +hipDeviceGetByPCIBusId +hipDeviceGetCacheConfig +hipDeviceGetStreamPriorityRange +hipDeviceGetLimit +hipDeviceGetName +hipDeviceGetPCIBusId +hipDeviceGetSharedMemConfig +hipDevicePrimaryCtxGetState +hipDevicePrimaryCtxRelease +hipDevicePrimaryCtxReset +hipDevicePrimaryCtxRetain +hipDevicePrimaryCtxSetFlags +hipDeviceReset +hipDeviceSetCacheConfig +hipDeviceSetSharedMemConfig +hipDeviceSynchronize +hipDeviceTotalMem +hipDriverGetVersion +hipEventCreate +hipEventCreateWithFlags +hipEventDestroy +hipEventElapsedTime +hipEventQuery +hipEventRecord +hipEventSynchronize +hipExtGetLinkTypeAndHopCount +hipExtLaunchMultiKernelMultiDevice +hipExtMallocWithFlags +hipExtModuleLaunchKernel +hipFree +hipFreeArray +hipFuncSetCacheConfig +hipGetDevice +hipGetDeviceCount +hipGetDeviceProperties +hipGetErrorName +hipGetErrorString +hipGetLastError +hipMemAllocHost +hipHostAlloc +hipHostFree +hipHostGetDevicePointer +hipHostGetFlags +hipHostMalloc +hipHostRegister +hipHostUnregister +hipInit +hipIpcCloseMemHandle +hipIpcGetMemHandle +hipIpcOpenMemHandle +hipMalloc +hipMalloc3D +hipMalloc3DArray +hipMallocManaged +hipArrayCreate +hipArray3DCreate +hipMallocArray +hipMemAllocPitch +hipMallocPitch +hipMemcpy +hipMemcpyWithStream +hipMemcpyParam2D +hipMemcpy2D +hipMemcpy2DAsync +hipMemcpy2DToArray +hipMemcpy3D +hipMemcpy3DAsync +hipMemcpyAsync +hipMemcpyDtoD +hipMemcpyDtoDAsync +hipMemcpyDtoH +hipMemcpyDtoHAsync +hipMemcpyFromSymbol +hipMemcpyFromSymbolAsync +hipMemcpyHtoD +hipMemcpyHtoDAsync +hipMemcpyPeer +hipMemcpyPeerAsync +hipMemcpyToArray +hipMemcpyFromArray +hipMemcpyToSymbol +hipMemcpyToSymbolAsync +hipMemGetAddressRange +hipGetSymbolAddress +hipGetSymbolSize +hipMemGetInfo +hipMemPtrGetInfo +hipMemset +hipMemsetAsync +hipMemsetD8 +hipMemsetD8Async +hipMemsetD16 +hipMemsetD16Async +hipMemsetD32 +hipMemsetD32Async +hipMemset2D +hipMemset2DAsync +hipMemset3D +hipMemset3DAsync +hipModuleGetFunction +hipModuleGetGlobal +hipModuleGetTexRef +hipModuleLaunchKernel +hipModuleLaunchKernelExt +hipLaunchCooperativeKernel +hipLaunchCooperativeKernelMultiDevice +hipHccModuleLaunchKernel +hipModuleLoad +hipModuleLoadData +hipModuleLoadDataEx +hipModuleUnload +hipOccupancyMaxActiveBlocksPerMultiprocessor +hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags +hipFuncGetAttributes +hipPeekAtLastError +hipPointerGetAttributes +hipProfilerStart +hipProfilerStop +hipRuntimeGetVersion +hipSetDevice +hipSetDeviceFlags +hipStreamAddCallback +hipStreamCreate +hipStreamCreateWithFlags +hipStreamCreateWithPriority +hipStreamDestroy +hipStreamGetFlags +hipStreamQuery +hipStreamSynchronize +hipStreamWaitEvent +__hipPopCallConfiguration +__hipPushCallConfiguration +__hipRegisterFatBinary +__hipRegisterFunction +__hipRegisterVar +__hipUnregisterFatBinary +__gnu_h2f_ieee +__gnu_f2h_ieee +hipConfigureCall +hipSetupArgument +hipLaunchByPtr +hipLaunchKernel +hipRegisterApiCallback +hipRemoveApiCallback +hipRegisterActivityCallback +hipRemoveActivityCallback +hipApiName +hipKernelNameRef +hipCreateTextureObject +hipDestroyTextureObject +hipGetTextureObjectResourceDesc +hipGetTextureObjectResourceViewDesc +hipGetTextureObjectTextureDesc +hipBindTexture +hipBindTexture2D +hipBindTextureToArray +hipBindTextureToMipmappedArray +hipUnbindTexture +ihipUnbindTextureImpl +hipGetChannelDesc +hipGetTextureAlignmentOffset +hipGetTextureReference +hipTexRefSetFormat +hipTexRefSetFlags +hipTexRefSetFilterMode +hipTexRefSetAddressMode +hipTexRefSetArray +hipTexRefSetAddress +hipTexRefSetAddress2D +hipTexRefGetAddress +hipTexRefGetAddressMode +hipTexRefGetArray +hipTexRefSetArray +ihipBindTextureImpl +ihipBindTextureToArrayImpl +hipCreateChannelDesc +hipProfilerStart +hipProfilerStop +hipHccGetAccelerator +hipHccGetAcceleratorView +hipCreateSurfaceObject +hipDestroySurfaceObject +hipInitActivityCallback +hipEnableActivityCallback +hipGetCmdName +hiprtcAddNameExpression +hiprtcCompileProgram +hiprtcCreateProgram +hiprtcDestroyProgram +hiprtcGetLoweredName +hiprtcGetProgramLog +hiprtcGetProgramLogSize +hiprtcGetCode +hiprtcGetCodeSize +hiprtcGetErrorString diff --git a/vdi/hip_hcc.map.in b/vdi/hip_hcc.map.in new file mode 100644 index 0000000000..554da1d41b --- /dev/null +++ b/vdi/hip_hcc.map.in @@ -0,0 +1,239 @@ +HIP_1.0 { +global: + hipChooseDevice; + hipCtxCreate; + hipCtxDestroy; + hipCtxDisablePeerAccess; + hipCtxEnablePeerAccess; + hipCtxGetApiVersion; + hipCtxGetCacheConfig; + hipCtxGetCurrent; + hipCtxGetDevice; + hipCtxGetFlags; + hipCtxGetSharedMemConfig; + hipCtxPopCurrent; + hipCtxPushCurrent; + hipCtxSetCacheConfig; + hipCtxSetCurrent; + hipCtxSetSharedMemConfig; + hipCtxSynchronize; + hipDeviceCanAccessPeer; + hipDeviceComputeCapability; + hipDeviceDisablePeerAccess; + hipDeviceEnablePeerAccess; + hipDeviceGet; + hipDeviceGetAttribute; + hipDeviceGetByPCIBusId; + hipDeviceGetCacheConfig; + hipDeviceGetStreamPriorityRange; + hipDeviceGetLimit; + hipDeviceGetName; + hipDeviceGetPCIBusId; + hipDeviceGetSharedMemConfig; + hipDevicePrimaryCtxGetState; + hipDevicePrimaryCtxRelease; + hipDevicePrimaryCtxReset; + hipDevicePrimaryCtxRetain; + hipDevicePrimaryCtxSetFlags; + hipDeviceReset; + hipDeviceSetCacheConfig; + hipDeviceSetSharedMemConfig; + hipDeviceSynchronize; + hipDeviceTotalMem; + hipDriverGetVersion; + hipEventCreate; + hipEventCreateWithFlags; + hipEventDestroy; + hipEventElapsedTime; + hipEventQuery; + hipEventRecord; + hipEventSynchronize; + hipExtGetLinkTypeAndHopCount; + hipExtLaunchMultiKernelMultiDevice; + hipExtMallocWithFlags; + hipExtModuleLaunchKernel; + hipFree; + hipFreeArray; + hipFuncSetCacheConfig; + hipGetDevice; + hipGetDeviceCount; + hipGetDeviceProperties; + hipGetErrorName; + hipGetErrorString; + hipGetLastError; + hipMemAllocHost; + hipHostAlloc; + hipHostFree; + hipHostGetDevicePointer; + hipHostGetFlags; + hipHostMalloc; + hipHostRegister; + hipHostUnregister; + hipInit; + hipIpcCloseMemHandle; + hipIpcGetMemHandle; + hipIpcOpenMemHandle; + hipMalloc; + hipMalloc3D; + hipMalloc3DArray; + hipMallocManaged; + hipArrayCreate; + hipArray3DCreate; + hipMallocArray; + hipMallocPitch; + hipMemAllocPitch; + hipMemcpy; + hipMemcpyWithStream; + hipMemcpyParam2D; + hipMemcpy2D; + hipMemcpy2DAsync; + hipMemcpy2DToArray; + hipMemcpy3D; + hipMemcpy3DAsync; + hipMemcpyAsync; + hipMemcpyDtoD; + hipMemcpyDtoDAsync; + hipMemcpyDtoH; + hipMemcpyDtoHAsync; + hipMemcpyFromSymbol; + hipMemcpyFromSymbolAsync; + hipMemcpyHtoD; + hipMemcpyHtoDAsync; + hipMemcpyPeer; + hipMemcpyPeerAsync; + hipMemcpyToArray; + hipMemcpyFromArray; + hipMemcpyToSymbol; + hipMemcpyToSymbolAsync; + hipMemGetAddressRange; + hipGetSymbolAddress; + hipGetSymbolSize; + hipMemGetInfo; + hipMemPtrGetInfo; + hipMemset; + hipMemsetAsync; + hipMemsetD8; + hipMemsetD8Async; + hipMemsetD16; + hipMemsetD16Async; + hipMemsetD32; + hipMemsetD32Async; + hipMemset2D; + hipMemset2DAsync; + hipMemset3D; + hipMemset3DAsync; + hipModuleGetFunction; + hipModuleGetGlobal; + hipModuleGetTexRef; + hipModuleLaunchKernel; + hipModuleLaunchKernelExt; + hipLaunchCooperativeKernel; + hipLaunchCooperativeKernelMultiDevice; + hipModuleLoad; + hipModuleLoadData; + hipModuleLoadDataEx; + hipModuleUnload; + hipOccupancyMaxActiveBlocksPerMultiprocessor; + hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags; + hipFuncGetAttributes; + hipPeekAtLastError; + hipPointerGetAttributes; + hipProfilerStart; + hipProfilerStop; + hipRuntimeGetVersion; + hipSetDevice; + hipSetDeviceFlags; + hipStreamAddCallback; + hipStreamCreate; + hipStreamCreateWithFlags; + hipStreamCreateWithPriority; + hipStreamDestroy; + hipStreamGetFlags; + hipStreamQuery; + hipStreamSynchronize; + hipStreamWaitEvent; + __hipPopCallConfiguration; + __hipPushCallConfiguration; + __hipRegisterFatBinary; + __hipRegisterFunction; + __hipRegisterVar; + __hipUnregisterFatBinary; + __gnu_h2f_ieee; + __gnu_f2h_ieee; + hipConfigureCall; + hipSetupArgument; + hipLaunchByPtr; + hipLaunchKernel; + hipRegisterApiCallback; + hipRemoveApiCallback; + hipRegisterActivityCallback; + hipRemoveActivityCallback; + hipApiName; + hipKernelNameRef; + hipProfilerStart; + hipProfilerStop; + hiprtcCompileProgram; + hiprtcCreateProgram; + hiprtcDestroyProgram; + hiprtcGetLoweredName; + hiprtcGetProgramLog; + hiprtcGetProgramLogSize; + hiprtcGetCode; + hiprtcGetCodeSize; + hiprtcGetErrorString; + extern "C++" { + hip_impl::hipLaunchKernelGGLImpl*; + hip_impl::demangle*; + hipCreateTextureObject*; + hipDestroyTextureObject*; + hipGetTextureObjectResourceDesc*; + hipGetTextureObjectResourceViewDesc*; + hipGetTextureObjectTextureDesc*; + hipBindTexture*; + hipBindTexture2D*; + hipBindTextureToArray*; + hipBindTextureToMipmappedArray*; + hipUnbindTexture*; + ihipUnbindTextureImpl*; + hipGetChannelDesc*; + hipGetTextureAlignmentOffset*; + hipGetTextureReference*; + hipTexRefSetFormat*; + hipTexRefSetFlags*; + hipTexRefSetFilterMode*; + hipTexRefSetAddressMode*; + hipTexRefSetArray*; + hipTexRefSetAddress*; + hipTexRefSetAddress2D*; + hipTexRefGetAddress*; + hipTexRefGetAddressMode*; + hipTexRefGetArray*; + hipTexRefSetArray*; + hipCreateChannelDesc*; + ihipBindTextureImpl*; + ihipBindTextureToArrayImpl*; + hipHccGetAccelerator*; + hipHccGetAcceleratorView*; + hipCreateSurfaceObject*; + hipDestroySurfaceObject*; + hipHccModuleLaunchKernel*; + hipExtModuleLaunchKernel*; + hipInitActivityCallback*; + hipEnableActivityCallback*; + hipGetCmdName*; + hiprtcAddNameExpression*; + hiprtcCompileProgram*; + hiprtcCreateProgram*; + hiprtcDestroyProgram*; + hiprtcGetLoweredName*; + hiprtcGetProgramLog*; + hiprtcGetProgramLogSize*; + hiprtcGetCode*; + hiprtcGetCodeSize*; + hiprtcGetErrorString*; + hiprtcVersion*; + hiprtcGetTypeName*; + }; +local: + *; +}; diff --git a/vdi/hip_hcc.rc b/vdi/hip_hcc.rc new file mode 100644 index 0000000000..009dc30c18 --- /dev/null +++ b/vdi/hip_hcc.rc @@ -0,0 +1,75 @@ +#define STR(__macro__) #__macro__ +#define XSTR(__macro__) STR(__macro__) + +#if defined(_DEBUG) +#define DEBUG_ONLY(x) x +#else +#define DEBUG_ONLY(x) +#endif + +#define VERSION_PREFIX_MAJOR 2 +#define VERSION_PREFIX_MINOR 0 + + +#define APSTUDIO_READONLY_SYMBOLS +///////////////////////////////////////////////////////////////////////////// +// +// Generated from the TEXTINCLUDE 2 resource. +// +#include "winresrc.h" +#include "utils/versions.hpp" + +///////////////////////////////////////////////////////////////////////////// +#undef APSTUDIO_READONLY_SYMBOLS + +///////////////////////////////////////////////////////////////////////////// +// English (U.S.) resources + +#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU) +#ifdef _WIN32 +LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US +#pragma code_page(1252) +#endif //_WIN32 + + +///////////////////////////////////////////////////////////////////////////// +// +// Version +// + +VS_VERSION_INFO VERSIONINFO + FILEVERSION 10,0,AMD_PLATFORM_BUILD_NUMBER,AMD_PLATFORM_REVISION_NUMBER + PRODUCTVERSION 10,0,AMD_PLATFORM_BUILD_NUMBER,AMD_PLATFORM_REVISION_NUMBER + FILEFLAGSMASK 0x3fL +#ifdef _DEBUG + FILEFLAGS 0x1L +#else + FILEFLAGS 0x0L +#endif + FILEOS 0x40004L + FILETYPE 0x2L + FILESUBTYPE 0x0L +BEGIN + BLOCK "StringFileInfo" + BEGIN + BLOCK "040904b0" + BEGIN + VALUE "Comments", " \0" + VALUE "CompanyName", "Advanced Micro Devices Inc.\0" + VALUE "FileDescription", AMD_PLATFORM_NAME " OpenCL " XSTR(VERSION_PREFIX_MAJOR) "." XSTR(VERSION_PREFIX_MINOR) " Runtime\0" + VALUE "FileVersion", "10.0." XSTR(AMD_PLATFORM_BUILD_NUMBER) "." XSTR(AMD_PLATFORM_REVISION_NUMBER) + VALUE "InternalName", "OpenCL" + VALUE "LegalCopyright", "Copyright (C) 2011 Advanced Micro Devices Inc.\0" + VALUE "OriginalFilename", "OpenCL.dll" + VALUE "ProductName", "OpenCL " XSTR(VERSION_PREFIX_MAJOR) "." XSTR(VERSION_PREFIX_MINOR) " " AMD_PLATFORM_INFO "\0" + VALUE "ProductVersion", "10.0." XSTR(AMD_PLATFORM_BUILD_NUMBER) "." XSTR(AMD_PLATFORM_REVISION_NUMBER) + END + END + BLOCK "VarFileInfo" + BEGIN + VALUE "Translation", 0x409, 1200 + END +END + +#endif // English (U.S.) resources +///////////////////////////////////////////////////////////////////////////// diff --git a/vdi/hip_intercept.cpp b/vdi/hip_intercept.cpp new file mode 100644 index 0000000000..f4936c065d --- /dev/null +++ b/vdi/hip_intercept.cpp @@ -0,0 +1,58 @@ +/* +Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "hip/hip_runtime.h" +#include "hip_prof_api.h" + +// HIP API callback/activity + +api_callbacks_table_t callbacks_table; + +extern const std::string& FunctionName(const hipFunction_t f); +const char* hipKernelNameRef(const hipFunction_t f) { return FunctionName(f).c_str(); } + +hipError_t hipRegisterApiCallback(uint32_t id, void* fun, void* arg) { + return callbacks_table.set_callback(id, reinterpret_cast(fun), arg) ? + hipSuccess : hipErrorInvalidValue; +} + +hipError_t hipRemoveApiCallback(uint32_t id) { + return callbacks_table.set_callback(id, NULL, NULL) ? hipSuccess : hipErrorInvalidValue; +} + +hipError_t hipRegisterActivityCallback(uint32_t id, void* fun, void* arg) { + return callbacks_table.set_activity(id, reinterpret_cast(fun), arg) ? + hipSuccess : hipErrorInvalidValue; +} + +hipError_t hipRemoveActivityCallback(uint32_t id) { + return callbacks_table.set_activity(id, NULL, NULL) ? hipSuccess : hipErrorInvalidValue; +} + +hipError_t hipEnableTracing(bool enabled) { + callbacks_table.set_enabled(enabled); + return hipSuccess; +} + +const char* hipApiName(uint32_t id) { + return hip_api_name(id); +} diff --git a/vdi/hip_internal.hpp b/vdi/hip_internal.hpp new file mode 100644 index 0000000000..dd6639488f --- /dev/null +++ b/vdi/hip_internal.hpp @@ -0,0 +1,230 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_SRC_HIP_INTERNAL_H +#define HIP_SRC_HIP_INTERNAL_H + +#include "vdi_common.hpp" +#include "hip_prof_api.h" +#include "trace_helper.h" +#include "utils/debug.hpp" +#include +#include +#include +#include +#include + + +/*! IHIP IPC MEMORY Structure */ +#define IHIP_IPC_MEM_HANDLE_SIZE 32 +#define IHIP_IPC_MEM_RESERVED_SIZE LP64_SWITCH(28,24) + +typedef struct ihipIpcMemHandle_st { + char ipc_handle[IHIP_IPC_MEM_HANDLE_SIZE]; ///< ipc memory handle on ROCr + size_t psize; + char reserved[IHIP_IPC_MEM_RESERVED_SIZE]; +} ihipIpcMemHandle_t; + +#define HIP_INIT() \ + std::call_once(hip::g_ihipInitialized, hip::init); \ + if (hip::g_context == nullptr && g_devices.size() > 0) { \ + hip::g_context = g_devices[0]; \ + } + +// This macro should be called at the beginning of every HIP API. +#define HIP_INIT_API(cid, ...) \ + ClPrint(amd::LOG_INFO, amd::LOG_API, "[%zx] %s ( %s )", std::this_thread::get_id(), __func__, ToString( __VA_ARGS__ ).c_str()); \ + amd::Thread* thread = amd::Thread::current(); \ + if (!VDI_CHECK_THREAD(thread)) { \ + HIP_RETURN(hipErrorOutOfMemory); \ + } \ + HIP_INIT() \ + HIP_CB_SPAWNER_OBJECT(cid); + +#define HIP_RETURN(ret) \ + hip::g_lastError = ret; \ + ClPrint(amd::LOG_INFO, amd::LOG_API, "[%zx] %s: Returned %s", std::this_thread::get_id(), __func__, hipGetErrorName(hip::g_lastError)); \ + return hip::g_lastError; + +namespace hc { +class accelerator; +class accelerator_view; +}; + +namespace hip { + extern std::once_flag g_ihipInitialized; + extern thread_local amd::Context* g_context; + extern thread_local hipError_t g_lastError; + extern amd::Context* host_context; + + extern void init(); + + extern amd::Context* getCurrentContext(); + extern void setCurrentContext(unsigned int index); + + extern amd::HostQueue* getQueue(hipStream_t s); + extern amd::HostQueue* getNullStream(amd::Context&); + extern amd::HostQueue* getNullStream(); + extern void syncStreams(); + + + struct Function { + amd::Kernel* function_; + amd::Monitor lock_; + + Function(amd::Kernel* f) : function_(f), lock_("function lock") {} + hipFunction_t asHipFunction() { return reinterpret_cast(this); } + + static Function* asFunction(hipFunction_t f) { return reinterpret_cast(f); } + }; + + struct Stream { + amd::HostQueue* queue; + + amd::Device* device; + amd::Context* context; + amd::CommandQueue::Priority priority; + unsigned int flags; + + Stream(amd::Device* dev, amd::Context* ctx, amd::CommandQueue::Priority p, unsigned int f); + void create(); + amd::HostQueue* asHostQueue(); + void destroy(); + void finish(); + }; + +}; + +struct ihipExec_t { + dim3 gridDim_; + dim3 blockDim_; + size_t sharedMem_; + hipStream_t hStream_; + std::vector arguments_; +}; + +class PlatformState { + amd::Monitor lock_{"Guards global function map"}; + + std::unordered_map>> modules_; + bool initialized_{false}; + + void digestFatBinary(const void* data, std::vector>& programs); +public: + void init(); + std::vector>* addFatBinary(const void*data) + { + if (initialized_) { + digestFatBinary(data, modules_[data]); + } + return &modules_[data]; + } + void removeFatBinary(std::vector>* module) + { + for (auto& mod : modules_) { + if (&mod.second == module) { + modules_.erase(&mod); + return; + } + } + } + + struct RegisteredVar { + public: + RegisteredVar(): size_(0), devicePtr_(nullptr), amd_mem_obj_(nullptr) {} + ~RegisteredVar() {} + + hipDeviceptr_t getdeviceptr() const { return devicePtr_; }; + size_t getvarsize() const { return size_; }; + + size_t size_; // Size of the variable + hipDeviceptr_t devicePtr_; //Device Memory Address of the variable. + amd::Memory* amd_mem_obj_; + }; + + struct DeviceFunction { + std::string deviceName; + std::vector< std::pair< hipModule_t, bool > >* modules; + std::vector functions; + }; + struct DeviceVar { + void* shadowVptr; + std::string hostVar; + size_t size; + std::vector< std::pair< hipModule_t, bool > >* modules; + std::vector rvars; + bool dyn_undef; + }; +private: + std::unordered_map functions_; + std::unordered_multimap vars_; + + static PlatformState* platform_; + + PlatformState() {} + ~PlatformState() {} +public: + static PlatformState& instance() { + return *platform_; + } + + std::vector< std::pair >* unregisterVar(hipModule_t hmod); + + + PlatformState::DeviceVar* findVar(std::string hostVar, int deviceId, hipModule_t hmod); + void registerVar(const void* hostvar, const DeviceVar& var); + void registerFunction(const void* hostFunction, const DeviceFunction& func); + + hipFunction_t getFunc(const void* hostFunction, int deviceId); + bool getFuncAttr(const void* hostFunction, hipFuncAttributes* func_attr); + bool getGlobalVar(const void* hostVar, int deviceId, hipModule_t hmod, + hipDeviceptr_t* dev_ptr, size_t* size_ptr); + bool getTexRef(const char* hostVar, textureReference** texRef); + + bool getShadowVarInfo(std::string var_name, hipModule_t hmod, + void** var_addr, size_t* var_size); + void setupArgument(const void *arg, size_t size, size_t offset); + void configureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem, hipStream_t stream); + + void popExec(ihipExec_t& exec); +}; + +extern std::vector g_devices; +extern hipError_t ihipDeviceGetCount(int* count); +extern int ihipGetDevice(); +extern hipError_t ihipMalloc(void** ptr, size_t sizeBytes, unsigned int flags); +extern amd::Memory* getMemoryObject(const void* ptr, size_t& offset); +extern bool CL_CALLBACK getSvarInfo(cl_program program, std::string var_name, void** var_addr, + size_t* var_size); + +inline std::ostream& operator<<(std::ostream& os, const dim3& s) { + os << '{'; + os << s.x; + os << ','; + os << s.y; + os << ','; + os << s.z; + os << '}'; + return os; +} + +#endif // HIP_SRC_HIP_INTERNAL_H diff --git a/vdi/hip_memory.cpp b/vdi/hip_memory.cpp new file mode 100644 index 0000000000..a61b4d83e1 --- /dev/null +++ b/vdi/hip_memory.cpp @@ -0,0 +1,1714 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include "hip_internal.hpp" +#include "platform/context.hpp" +#include "platform/command.hpp" +#include "platform/memory.hpp" + +extern void getChannelOrderAndType(const hipChannelFormatDesc& desc, + enum hipTextureReadMode readMode, + cl_channel_order* channelOrder, + cl_channel_type* channelType); + +extern void getDrvChannelOrderAndType(const enum hipArray_Format Format, + unsigned int NumChannels, + cl_channel_order* channelOrder, + cl_channel_type* channelType); + +extern void setDescFromChannelType(cl_channel_type channelType, hipChannelFormatDesc* desc); + +extern void getByteSizeFromChannelFormatKind(enum hipChannelFormatKind channelFormatKind, size_t* byteSize); + +amd::Memory* getMemoryObject(const void* ptr, size_t& offset) { + amd::Memory *memObj = amd::MemObjMap::FindMemObj(ptr); + if (memObj != nullptr) { + if (memObj->getSvmPtr() != nullptr) { + // SVM pointer + offset = reinterpret_cast(ptr) - reinterpret_cast(memObj->getSvmPtr()); + } else if (memObj->getHostMem() != nullptr) { + // Prepinned memory + offset = reinterpret_cast(ptr) - reinterpret_cast(memObj->getHostMem()); + } else { + ShouldNotReachHere(); + } + } + return memObj; +} + +hipError_t ihipMalloc(void** ptr, size_t sizeBytes, unsigned int flags) +{ + if (sizeBytes == 0) { + *ptr = nullptr; + return hipSuccess; + } + else if (ptr == nullptr) { + return hipErrorInvalidValue; + } + + amd::Context* amdContext = ((flags & CL_MEM_SVM_FINE_GRAIN_BUFFER) != 0)? + hip::host_context : hip::getCurrentContext(); + + if (amdContext == nullptr) { + return hipErrorOutOfMemory; + } + + if (amdContext->devices()[0]->info().maxMemAllocSize_ < sizeBytes) { + return hipErrorOutOfMemory; + } + + *ptr = amd::SvmBuffer::malloc(*amdContext, flags, sizeBytes, amdContext->devices()[0]->info().memBaseAddrAlign_); + if (*ptr == nullptr) { + + hip::syncStreams(); + hip::getNullStream()->finish(); + + *ptr = amd::SvmBuffer::malloc(*amdContext, flags, sizeBytes, amdContext->devices()[0]->info().memBaseAddrAlign_); + if (*ptr == nullptr) { + return hipErrorOutOfMemory; + } + } + ClPrint(amd::LOG_INFO, amd::LOG_API, "ihipMalloc ptr=0x%zx", *ptr); + return hipSuccess; +} + +hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, + amd::HostQueue& queue, bool isAsync = false) { + if (sizeBytes == 0) { + // Skip if nothing needs writing. + return hipSuccess; + } + + amd::Command* command = nullptr; + amd::Command::EventWaitList waitList; + + size_t sOffset = 0; + amd::Memory *srcMemory = getMemoryObject(src, sOffset); + size_t dOffset = 0; + amd::Memory *dstMemory = getMemoryObject(dst, dOffset); + amd::Device* queueDevice = &queue.device(); + + if (((srcMemory == nullptr) && (dstMemory == nullptr)) || + (kind == hipMemcpyHostToHost)) { + queue.finish(); + memcpy(dst, src, sizeBytes); + return hipSuccess; + } else if ((srcMemory == nullptr) && (dstMemory != nullptr)) { + amd::HostQueue* pQueue = &queue; + if (queueDevice != dstMemory->getContext().devices()[0]) { + pQueue = hip::getNullStream(dstMemory->getContext()); + } + command = new amd::WriteMemoryCommand(*pQueue, CL_COMMAND_WRITE_BUFFER, waitList, + *dstMemory->asBuffer(), dOffset, sizeBytes, src); + isAsync = false; + } else if ((srcMemory != nullptr) && (dstMemory == nullptr)) { + amd::HostQueue* pQueue = &queue; + if (queueDevice != srcMemory->getContext().devices()[0]) { + pQueue = hip::getNullStream(srcMemory->getContext()); + } + command = new amd::ReadMemoryCommand(*pQueue, CL_COMMAND_READ_BUFFER, waitList, + *srcMemory->asBuffer(), sOffset, sizeBytes, dst); + isAsync = false; + } else if ((srcMemory != nullptr) && (dstMemory != nullptr)) { + if (queueDevice != srcMemory->getContext().devices()[0]) { + amd::Coord3D srcOffset(sOffset, 0, 0); + amd::Coord3D dstOffset(dOffset, 0, 0); + amd::Coord3D copySize(sizeBytes, 1, 1); + command = new amd::CopyMemoryP2PCommand(queue, CL_COMMAND_COPY_BUFFER, waitList, + *srcMemory->asBuffer(),*dstMemory->asBuffer(), srcOffset, dstOffset, copySize); + command->enqueue(); + if (!isAsync) { + command->awaitCompletion(); + } + command->release(); + return hipSuccess; + } + if (queueDevice != dstMemory->getContext().devices()[0]) { + amd::Coord3D srcOffset(sOffset, 0, 0); + amd::Coord3D dstOffset(dOffset, 0, 0); + amd::Coord3D copySize(sizeBytes, 1, 1); + command = new amd::CopyMemoryP2PCommand(queue, CL_COMMAND_COPY_BUFFER, waitList, + *srcMemory->asBuffer(),*dstMemory->asBuffer(), srcOffset, dstOffset, copySize); + command->enqueue(); + if (!isAsync) { + command->awaitCompletion(); + } + command->release(); + return hipSuccess; + } + command = new amd::CopyMemoryCommand(queue, CL_COMMAND_COPY_BUFFER, waitList, + *srcMemory->asBuffer(),*dstMemory->asBuffer(), sOffset, dOffset, sizeBytes); + } + + if (command == nullptr) { + return hipErrorOutOfMemory; + } + + command->enqueue(); + if (!isAsync) { + command->awaitCompletion(); + } + command->release(); + + return hipSuccess; +} + +hipError_t hipExtMallocWithFlags(void** ptr, size_t sizeBytes, unsigned int flags) { + HIP_INIT_API(hipExtMallocWithFlags, ptr, sizeBytes, flags); + + if (flags != hipDeviceMallocDefault && + flags != hipDeviceMallocFinegrained) { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(ihipMalloc(ptr, sizeBytes, (flags & hipDeviceMallocFinegrained)? CL_MEM_SVM_ATOMICS: 0)); +} + +hipError_t hipMalloc(void** ptr, size_t sizeBytes) { + HIP_INIT_API(hipMalloc, ptr, sizeBytes); + + HIP_RETURN(ihipMalloc(ptr, sizeBytes, 0)); +} + +hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) { + HIP_INIT_API(hipHostMalloc, ptr, sizeBytes, flags); + + if (ptr == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + *ptr = nullptr; + + const unsigned int coherentFlags = hipHostMallocCoherent | hipHostMallocNonCoherent; + + // can't have both Coherent and NonCoherent flags set at the same time + if ((flags & coherentFlags) == coherentFlags) { + HIP_RETURN(hipErrorInvalidValue); + } + + unsigned int ihipFlags = CL_MEM_SVM_FINE_GRAIN_BUFFER | (flags << 16); + if (flags & hipHostMallocCoherent || + (!(flags & hipHostMallocNonCoherent) && HIP_HOST_COHERENT)) { + ihipFlags |= CL_MEM_SVM_ATOMICS; + } + + HIP_RETURN(ihipMalloc(ptr, sizeBytes, ihipFlags)); +} + +hipError_t hipMallocManaged(void** devPtr, size_t size, + unsigned int flags) { + HIP_INIT_API(hipMallocManaged, devPtr, size, flags); + + if (flags != hipMemAttachGlobal) { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(ihipMalloc(devPtr, size, CL_MEM_SVM_FINE_GRAIN_BUFFER)); +} + +hipError_t hipFree(void* ptr) { + HIP_INIT_API(hipFree, ptr); + + if (ptr == nullptr) { + HIP_RETURN(hipSuccess); + } + if (amd::SvmBuffer::malloced(ptr)) { + hip::syncStreams(); + for (size_t i=0; ifinish(); + } + amd::SvmBuffer::free(*hip::getCurrentContext(), ptr); + HIP_RETURN(hipSuccess); + } + HIP_RETURN(hipErrorInvalidValue); +} + +hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind) { + HIP_INIT_API(hipMemcpy, dst, src, sizeBytes, kind); + + hip::syncStreams(); + amd::HostQueue* queue = hip::getNullStream(); + HIP_RETURN(ihipMemcpy(dst, src, sizeBytes, kind, *queue)); +} + +hipError_t hipMemcpyWithStream(void* dst, const void* src, size_t sizeBytes, + hipMemcpyKind kind, hipStream_t stream) { + HIP_INIT_API(hipMemcpyWithStream, dst, src, sizeBytes, kind, stream); + + amd::HostQueue* queue = hip::getQueue(stream); + + HIP_RETURN(ihipMemcpy(dst, src, sizeBytes, kind, *queue, true)); +} + +hipError_t hipMemPtrGetInfo(void *ptr, size_t *size) { + HIP_INIT_API(hipMemPtrGetInfo, ptr, size); + + size_t offset = 0; + amd::Memory* svmMem = getMemoryObject(ptr, offset); + + if (svmMem == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + *size = svmMem->getSize(); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipHostFree(void* ptr) { + HIP_INIT_API(hipHostFree, ptr); + + if (amd::SvmBuffer::malloced(ptr)) { + amd::SvmBuffer::free(*hip::getCurrentContext(), ptr); + HIP_RETURN(hipSuccess); + } + HIP_RETURN(hipErrorInvalidValue); +} + +hipError_t hipFreeArray(hipArray* array) { + HIP_INIT_API(hipFreeArray, array); + + if (amd::SvmBuffer::malloced(array->data)) { + amd::SvmBuffer::free(*hip::getCurrentContext(), array->data); + HIP_RETURN(hipSuccess); + } + HIP_RETURN(hipErrorInvalidValue); +} + +hipError_t hipMemGetAddressRange(hipDeviceptr_t* pbase, size_t* psize, hipDeviceptr_t dptr) { + HIP_INIT_API(hipMemGetAddressRange, pbase, psize, dptr); + + // Since we are using SVM buffer DevicePtr and HostPtr is the same + void* ptr = dptr; + size_t offset = 0; + amd::Memory* svmMem = getMemoryObject(ptr, offset); + + if (svmMem == nullptr) { + HIP_RETURN(hipErrorInvalidDevicePointer); + } + + *pbase = svmMem->getSvmPtr(); + *psize = svmMem->getSize(); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipMemGetInfo(size_t* free, size_t* total) { + HIP_INIT_API(hipMemGetInfo, free, total); + + size_t freeMemory[2]; + amd::Device* device = hip::getCurrentContext()->devices()[0]; + if(device == nullptr) { + HIP_RETURN(hipErrorInvalidDevice); + } + + if(!device->globalFreeMemory(freeMemory)) { + HIP_RETURN(hipErrorInvalidValue); + } + + *free = freeMemory[0] * Ki; + *total = device->info().globalMemSize_; + + HIP_RETURN(hipSuccess); +} + +hipError_t ihipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height, size_t depth, + cl_mem_object_type imageType, const cl_image_format* image_format) { + + amd::Device* device = hip::getCurrentContext()->devices()[0]; + + if ((width == 0) || (height == 0)) { + *ptr = nullptr; + return hipSuccess; + } + else if (!(device->info().image2DMaxWidth_ >= width && + device->info().image2DMaxHeight_ >= height ) || (ptr == nullptr)) { + return hipErrorInvalidValue; + } + + if (device->info().maxMemAllocSize_ < (width * height)) { + return hipErrorOutOfMemory; + } + + const amd::Image::Format imageFormat(*image_format); + + *pitch = amd::alignUp(width * imageFormat.getElementSize(), device->info().imagePitchAlignment_); + + size_t sizeBytes = *pitch * height * depth; + *ptr = amd::SvmBuffer::malloc(*hip::getCurrentContext(), 0, sizeBytes, + device->info().memBaseAddrAlign_); + + if (*ptr == nullptr) { + return hipErrorOutOfMemory; + } + + return hipSuccess; +} + + +hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height) { + HIP_INIT_API(hipMallocPitch, ptr, pitch, width, height); + + const cl_image_format image_format = { CL_R, CL_UNSIGNED_INT8 }; + HIP_RETURN(ihipMallocPitch(ptr, pitch, width, height, 1, CL_MEM_OBJECT_IMAGE2D, &image_format)); +} + +hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent) { + HIP_INIT_API(hipMalloc3D, pitchedDevPtr, &extent); + + size_t pitch = 0; + + if (pitchedDevPtr == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + const cl_image_format image_format = { CL_R, CL_UNSIGNED_INT8 }; + hipError_t status = hipSuccess; + status = ihipMallocPitch(&pitchedDevPtr->ptr, &pitch, extent.width, extent.height, extent.depth, + CL_MEM_OBJECT_IMAGE3D, &image_format); + + if (status == hipSuccess) { + pitchedDevPtr->pitch = pitch; + pitchedDevPtr->xsize = extent.width; + pitchedDevPtr->ysize = extent.height; + } + + HIP_RETURN(status); +} + +hipError_t hipArrayCreate(hipArray** array, const HIP_ARRAY_DESCRIPTOR* pAllocateArray) { + HIP_INIT_API(hipArrayCreate, array, pAllocateArray); + + if (pAllocateArray->Width == 0) { + HIP_RETURN(hipErrorInvalidValue); + } + + *array = (hipArray*)malloc(sizeof(hipArray)); + array[0]->width = pAllocateArray->Width; + array[0]->height = pAllocateArray->Height; + array[0]->isDrv = true; + array[0]->textureType = hipTextureType2D; + void** ptr = &array[0]->data; + + cl_channel_order channelOrder; + cl_channel_type channelType; + getDrvChannelOrderAndType(pAllocateArray->Format, pAllocateArray->NumChannels, + &channelOrder, &channelType); + + const cl_image_format image_format = { channelOrder, channelType }; + setDescFromChannelType(channelType, &(array[0]->desc)); + + size_t pitch = 0; + hipError_t status = ihipMallocPitch(ptr, &pitch, array[0]->width, array[0]->height, 1, CL_MEM_OBJECT_IMAGE2D, + &image_format); + + HIP_RETURN(status); +} + +hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc, + size_t width, size_t height, unsigned int flags) { + HIP_INIT_API(hipMallocArray, array, desc, width, height, flags); + + if (width == 0) { + HIP_RETURN(hipErrorInvalidValue); + } + + *array = (hipArray*)malloc(sizeof(hipArray)); + array[0]->type = flags; + array[0]->width = width; + array[0]->height = height; + array[0]->depth = 1; + array[0]->desc = *desc; + array[0]->isDrv = false; + array[0]->textureType = hipTextureType2D; + void** ptr = &array[0]->data; + + cl_channel_order channelOrder; + cl_channel_type channelType; + getChannelOrderAndType(*desc, hipReadModeElementType, &channelOrder, &channelType); + + const cl_image_format image_format = { channelOrder, channelType }; + + // Dummy flags check + switch (flags) { + case hipArrayLayered: + case hipArrayCubemap: + case hipArraySurfaceLoadStore: + case hipArrayTextureGather: + assert(0 && "Unspported"); + break; + case hipArrayDefault: + default: + break; + } + size_t pitch = 0; + hipError_t status = ihipMallocPitch(ptr, &pitch, width, height, 1, CL_MEM_OBJECT_IMAGE2D, + &image_format); + + HIP_RETURN(status); +} + +hipError_t hipArray3DCreate(hipArray** array, const HIP_ARRAY3D_DESCRIPTOR* pAllocateArray) { + HIP_INIT_API(hipArray3DCreate, array, pAllocateArray); + + *array = (hipArray*)malloc(sizeof(hipArray)); + array[0]->type = pAllocateArray->Flags; + array[0]->width = pAllocateArray->Width; + array[0]->height = pAllocateArray->Height; + array[0]->depth = pAllocateArray->Depth; + array[0]->Format = pAllocateArray->Format; + array[0]->NumChannels = pAllocateArray->NumChannels; + array[0]->isDrv = true; + array[0]->textureType = hipTextureType3D; + void** ptr = &array[0]->data; + + cl_channel_order channelOrder; + cl_channel_type channelType; + getDrvChannelOrderAndType(pAllocateArray->Format, pAllocateArray->NumChannels, + &channelOrder, &channelType); + + const cl_image_format image_format = { channelOrder, channelType }; + size_t pitch = 0; + hipError_t status = ihipMallocPitch(ptr, &pitch, array[0]->width, array[0]->height, array[0]->depth, CL_MEM_OBJECT_IMAGE3D, + &image_format); + + HIP_RETURN(status); +} + +hipError_t hipMalloc3DArray(hipArray_t* array, const struct hipChannelFormatDesc* desc, + struct hipExtent extent, unsigned int flags) { + HIP_INIT_API(hipMalloc3DArray, array, desc, &extent, flags); + + *array = (hipArray*)malloc(sizeof(hipArray)); + array[0]->type = flags; + array[0]->width = extent.width; + array[0]->height = extent.height; + array[0]->depth = extent.depth; + array[0]->desc = *desc; + array[0]->isDrv = false; + array[0]->textureType = hipTextureType3D; + void** ptr = &array[0]->data; + + cl_channel_order channelOrder; + cl_channel_type channelType; + getChannelOrderAndType(*desc, hipReadModeElementType, &channelOrder, &channelType); + + const cl_image_format image_format = { channelOrder, channelType }; + + // Dummy flags check + switch (flags) { + case hipArrayCubemap: + case hipArraySurfaceLoadStore: + case hipArrayTextureGather: + assert(0 && "Unspported"); + break; + case hipArrayLayered: + case hipArrayDefault: + default: + break; + } + size_t pitch = 0; + hipError_t status = ihipMallocPitch(ptr, &pitch, extent.width, extent.height, extent.depth, + CL_MEM_OBJECT_IMAGE3D, &image_format); + + HIP_RETURN(status); +} + +hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) { + HIP_INIT_API(hipHostGetFlags, flagsPtr, hostPtr); + + if (flagsPtr == nullptr || + hostPtr == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + size_t offset = 0; + amd::Memory* svmMem = getMemoryObject(hostPtr, offset); + + if (svmMem == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + *flagsPtr = svmMem->getMemFlags() >> 16; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipHostRegister(void* hostPtr, size_t sizeBytes, unsigned int flags) { + HIP_INIT_API(hipHostRegister, hostPtr, sizeBytes, flags); + if(hostPtr != nullptr) { + amd::Memory* mem = new (*hip::host_context) amd::Buffer(*hip::host_context, CL_MEM_USE_HOST_PTR | CL_MEM_SVM_ATOMICS, sizeBytes); + + constexpr bool sysMemAlloc = false; + constexpr bool skipAlloc = false; + constexpr bool forceAlloc = true; + if (!mem->create(hostPtr, sysMemAlloc, skipAlloc, forceAlloc)) { + mem->release(); + HIP_RETURN(hipErrorOutOfMemory); + } + + for (const auto& device: hip::getCurrentContext()->devices()) { + // Since the amd::Memory object is shared between all devices + // it's fine to have multiple addresses mapped to it + const device::Memory* devMem = mem->getDeviceMemory(*device); + amd::MemObjMap::AddMemObj(reinterpret_cast(devMem->virtualAddress()), mem); + } + + amd::MemObjMap::AddMemObj(hostPtr, mem); + HIP_RETURN(hipSuccess); + } else { + HIP_RETURN(ihipMalloc(&hostPtr, sizeBytes, flags)); + } +} + +hipError_t hipHostUnregister(void* hostPtr) { + HIP_INIT_API(hipHostUnregister, hostPtr); + + if (amd::SvmBuffer::malloced(hostPtr)) { + hip::syncStreams(); + hip::getNullStream()->finish(); + amd::SvmBuffer::free(*hip::host_context, hostPtr); + HIP_RETURN(hipSuccess); + } else { + size_t offset = 0; + amd::Memory* mem = getMemoryObject(hostPtr, offset); + + if(mem) { + hip::syncStreams(); + hip::getNullStream()->finish(); + for (const auto& device: hip::getCurrentContext()->devices()) { + const device::Memory* devMem = mem->getDeviceMemory(*device); + amd::MemObjMap::RemoveMemObj(reinterpret_cast(devMem->virtualAddress())); + } + amd::MemObjMap::RemoveMemObj(hostPtr); + mem->release(); + HIP_RETURN(hipSuccess); + } + } + + HIP_RETURN(hipErrorInvalidValue); +} + +// Deprecated function: +hipError_t hipHostAlloc(void** ptr, size_t sizeBytes, unsigned int flags) { + HIP_RETURN(ihipMalloc(ptr, sizeBytes, flags)); +}; + + +hipError_t hipMemcpyToSymbol(const void* symbolName, const void* src, size_t count, + size_t offset, hipMemcpyKind kind) { + HIP_INIT_API(hipMemcpyToSymbol, symbolName, src, count, offset, kind); + + size_t sym_size = 0; + hipDeviceptr_t device_ptr = nullptr; + + /* Get address and size for the global symbol */ + if (!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), nullptr, + &device_ptr, &sym_size)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + + /* Size Check to make sure offset is correct */ + if ((offset + count) != sym_size) { + return HIP_RETURN(hipErrorInvalidDevicePointer); + } + + device_ptr = reinterpret_cast
(device_ptr) + offset; + + /* Copy memory from source to destination address */ + HIP_RETURN(hipMemcpy(device_ptr, src, count, kind)); +} + +hipError_t hipMemcpyFromSymbol(void* dst, const void* symbolName, size_t count, + size_t offset, hipMemcpyKind kind) { + HIP_INIT_API(hipMemcpyFromSymbol, symbolName, dst, count, offset, kind); + + size_t sym_size = 0; + hipDeviceptr_t device_ptr = nullptr; + + /* Get address and size for the global symbol */ + if (!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), nullptr, + &device_ptr, &sym_size)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + + /* Size Check to make sure offset is correct */ + if ((offset + count) != sym_size) { + return HIP_RETURN(hipErrorInvalidDevicePointer); + } + + device_ptr = reinterpret_cast
(device_ptr) + offset; + + /* Copy memory from source to destination address */ + HIP_RETURN(hipMemcpy(dst, device_ptr, count, kind)); +} + +hipError_t hipMemcpyToSymbolAsync(const void* symbolName, const void* src, size_t count, + size_t offset, hipMemcpyKind kind, hipStream_t stream) { + HIP_INIT_API(hipMemcpyToSymbolAsync, symbolName, src, count, offset, kind, stream); + + size_t sym_size = 0; + hipDeviceptr_t device_ptr = nullptr; + + /* Get address and size for the global symbol */ + if (!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), nullptr, + &device_ptr, &sym_size)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + + /* Size Check to make sure offset is correct */ + if ((offset + count) != sym_size) { + return HIP_RETURN(hipErrorInvalidDevicePointer); + } + + device_ptr = reinterpret_cast
(device_ptr) + offset; + + /* Copy memory from source to destination address */ + HIP_RETURN(hipMemcpyAsync(device_ptr, src, count, kind, stream)); +} + +hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolName, size_t count, + size_t offset, hipMemcpyKind kind, hipStream_t stream) { + HIP_INIT_API(hipMemcpyFromSymbolAsync, symbolName, dst, count, offset, kind, stream); + + size_t sym_size = 0; + hipDeviceptr_t device_ptr = nullptr; + + /* Get address and size for the global symbol */ + if (!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), nullptr, + &device_ptr, &sym_size)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + + /* Size Check to make sure offset is correct */ + if ((offset + count) != sym_size) { + return HIP_RETURN(hipErrorInvalidDevicePointer); + } + + device_ptr = reinterpret_cast
(device_ptr) + offset; + + /* Copy memory from source to destination address */ + HIP_RETURN(hipMemcpyAsync(dst, device_ptr, count, kind, stream)); +} + +hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void* src, size_t sizeBytes) { + HIP_INIT_API(hipMemcpyHtoD, dst, src, sizeBytes); + + hip::syncStreams(); + amd::HostQueue* queue = hip::getNullStream(); + + HIP_RETURN(ihipMemcpy(reinterpret_cast(dst), (const void*) src, sizeBytes, hipMemcpyHostToDevice, *queue)); +} + +hipError_t hipMemcpyDtoH(void* dst, hipDeviceptr_t src, size_t sizeBytes) { + HIP_INIT_API(hipMemcpyDtoH, dst, src, sizeBytes); + + hip::syncStreams(); + amd::HostQueue* queue = hip::getNullStream(); + + HIP_RETURN(ihipMemcpy(reinterpret_cast(dst), (const void*) src, sizeBytes, hipMemcpyDeviceToHost, *queue)); +} + +hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes) { + HIP_INIT_API(hipMemcpyDtoD, dst, src, sizeBytes); + + hip::syncStreams(); + amd::HostQueue* queue = hip::getNullStream(); + + HIP_RETURN(ihipMemcpy(reinterpret_cast(dst), (const void*) src, sizeBytes, hipMemcpyDeviceToDevice, *queue)); +} + +hipError_t hipMemcpyHtoH(void* dst, void* src, size_t sizeBytes) { + HIP_INIT_API(NONE, dst, src, sizeBytes); + + hip::syncStreams(); + amd::HostQueue* queue = hip::getNullStream(); + + HIP_RETURN(ihipMemcpy(reinterpret_cast(dst), (const void*) src, sizeBytes, hipMemcpyHostToHost, *queue)); +} + +hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, + hipMemcpyKind kind, hipStream_t stream) { + HIP_INIT_API(hipMemcpyAsync, dst, src, sizeBytes, kind, stream); + + amd::HostQueue* queue = hip::getQueue(stream); + + HIP_RETURN(ihipMemcpy(dst, src, sizeBytes, kind, *queue, true)); +} + + +hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, void* src, size_t sizeBytes, + hipStream_t stream) { + HIP_INIT_API(hipMemcpyHtoDAsync, dst, src, sizeBytes, stream); + + amd::HostQueue* queue = hip::getQueue(stream); + + HIP_RETURN(ihipMemcpy(reinterpret_cast(dst), (const void*) src, sizeBytes, hipMemcpyHostToDevice, + *queue, true)); +} + +hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes, + hipStream_t stream) { + HIP_INIT_API(hipMemcpyDtoDAsync, dst, src, sizeBytes, stream); + + amd::HostQueue* queue = hip::getQueue(stream); + + HIP_RETURN(ihipMemcpy(reinterpret_cast(dst), (const void*) src, sizeBytes, hipMemcpyDeviceToDevice, + *queue, true)); +} + +hipError_t hipMemcpyDtoHAsync(void* dst, hipDeviceptr_t src, size_t sizeBytes, + hipStream_t stream) { + HIP_INIT_API(hipMemcpyDtoHAsync, dst, src, sizeBytes, stream); + + amd::HostQueue* queue = hip::getQueue(stream); + + HIP_RETURN(ihipMemcpy(reinterpret_cast(dst), (const void*) src, sizeBytes, hipMemcpyDeviceToHost, + *queue, true)); +} + +hipError_t ihipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, + size_t height, hipMemcpyKind kind, amd::HostQueue& queue, + bool isAsync = false) { + // Create buffer rectangle info structure + amd::BufferRect srcRect; + amd::BufferRect dstRect; + + size_t region[3] = {width, height, 1}; + size_t src_slice_pitch = spitch * height; + size_t dst_slice_pitch = dpitch * height; + size_t sOrigin[3] = { }; + size_t dOrigin[3] = { }; + amd::Memory* srcMemory = getMemoryObject(src, sOrigin[0]); + amd::Memory* dstMemory = getMemoryObject(dst, dOrigin[0]); + + if (src_slice_pitch == 0 || + dst_slice_pitch == 0 || + dst == nullptr || + src == nullptr) { + return hipSuccess; + } + + if (!srcRect.create(sOrigin, region, spitch, src_slice_pitch) || + !dstRect.create(dOrigin, region, dpitch, dst_slice_pitch)) { + return hipErrorInvalidValue; + } + + amd::Command* command = nullptr; + amd::Command::EventWaitList waitList; + + amd::Coord3D srcStart(srcRect.start_, 0, 0); + amd::Coord3D dstStart(dstRect.start_, 0, 0); + amd::Coord3D size(region[0], region[1], region[2]); + + if (((srcMemory == nullptr) && (dstMemory == nullptr)) || + (kind == hipMemcpyHostToHost)) { + for(unsigned int y = 0; y < height; y++) { + void* pDst = reinterpret_cast(reinterpret_cast(dst) + y * dpitch); + void* pSrc = reinterpret_cast(reinterpret_cast(src) + y * spitch); + memcpy(pDst, pSrc, width); + } + return hipSuccess; + } else if ((srcMemory == nullptr) && (dstMemory != nullptr)) { + command = new amd::WriteMemoryCommand(queue, CL_COMMAND_WRITE_BUFFER_RECT, waitList, + *dstMemory->asBuffer(), dstStart, size, src, dstRect, srcRect); + } else if ((srcMemory != nullptr) && (dstMemory == nullptr)) { + command = new amd::ReadMemoryCommand(queue, CL_COMMAND_READ_BUFFER_RECT, waitList, + *srcMemory->asBuffer(), srcStart, size, dst, srcRect, dstRect); + } else if ((srcMemory != nullptr) && (dstMemory != nullptr)) { + command = new amd::CopyMemoryCommand(queue, CL_COMMAND_COPY_BUFFER_RECT, waitList, *srcMemory->asBuffer(), + *dstMemory->asBuffer(), srcStart, dstStart, size, srcRect, dstRect); + } + + if (command == nullptr) { + return hipErrorOutOfMemory; + } + + command->enqueue(); + if (!isAsync) { + command->awaitCompletion(); + } + command->release(); + + return hipSuccess; +} + +hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy) { + HIP_INIT_API(hipMemcpyParam2D, pCopy); + hipError_t e = hipSuccess; + if (pCopy == nullptr) { + e = hipErrorInvalidValue; + } else { + hip::syncStreams(); + amd::HostQueue* queue = hip::getNullStream(); + e = ihipMemcpy2D(pCopy->dstArray->data, pCopy->WidthInBytes, pCopy->srcHost, pCopy->srcPitch, + pCopy->WidthInBytes, pCopy->Height, hipMemcpyDefault, *queue); + } + HIP_RETURN(e); +} + +hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, + size_t height, hipMemcpyKind kind) { + HIP_INIT_API(hipMemcpy2D, dst, dpitch, src, spitch, width, height, kind); + + hip::syncStreams(); + amd::HostQueue* queue = hip::getNullStream(); + + HIP_RETURN(ihipMemcpy2D(dst, dpitch, src, spitch, width, height, kind, *queue)); +} + + +hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, + size_t height, hipMemcpyKind kind, hipStream_t stream) { + HIP_INIT_API(hipMemcpy2DAsync, dst, dpitch, src, spitch, width, height, kind, stream); + + amd::HostQueue* queue = hip::getQueue(stream); + + HIP_RETURN(ihipMemcpy2D(dst, dpitch, src, spitch, width, height, kind, *queue, true)); +} + +hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src, + size_t spitch, size_t width, size_t height, hipMemcpyKind kind) { + HIP_INIT_API(hipMemcpy2DToArray, dst, wOffset, hOffset, src, spitch, width, height, kind); + + if (dst->data == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + hip::syncStreams(); + amd::HostQueue* queue = hip::getNullStream(); + + size_t dpitch = dst->width; + getByteSizeFromChannelFormatKind(dst[0].desc.f, &dpitch); + + if ((wOffset + width > (dpitch)) || width > spitch) { + HIP_RETURN(hipErrorInvalidDevicePointer); + } + + // Create buffer rectangle info structure + amd::BufferRect srcRect; + amd::BufferRect dstRect; + + size_t region[3] = {width, height, 1}; + size_t src_slice_pitch = spitch * height; + size_t dst_slice_pitch = dpitch * height; + size_t sOrigin[3] = { }; + size_t dOrigin[3] = {wOffset, hOffset, 0}; + size_t offset = 0; + amd::Memory* srcMemory = getMemoryObject(src, offset); + amd::Memory* dstMemory = getMemoryObject(dst->data, offset); + + assert(offset == 0); + + if (!srcRect.create(sOrigin, region, spitch, src_slice_pitch) || + !dstRect.create(dOrigin, region, dpitch, dst_slice_pitch)) { + HIP_RETURN(hipErrorInvalidValue); + } + + amd::Command* command = nullptr; + amd::Command::EventWaitList waitList; + + amd::Coord3D srcStart(srcRect.start_, 0, 0); + amd::Coord3D dstStart(dstRect.start_, 0, 0); + amd::Coord3D size(region[0], region[1], region[2]); + + if (((srcMemory == nullptr) && (dstMemory == nullptr)) || + (kind == hipMemcpyHostToHost)) { + void* newDst = reinterpret_cast(reinterpret_cast(dst->data) + + dpitch * hOffset + wOffset); + for(unsigned int y = 0; y < height; y++) { + void* pDst = reinterpret_cast(reinterpret_cast(newDst) + y * dpitch); + void* pSrc = reinterpret_cast(reinterpret_cast(src) + y * spitch); + memcpy(pDst, pSrc, width); + } + HIP_RETURN(hipSuccess); + } else if ((srcMemory == nullptr) && (dstMemory != nullptr)) { + command = new amd::WriteMemoryCommand(*queue, CL_COMMAND_WRITE_BUFFER_RECT, waitList, + *dstMemory->asBuffer(), dstStart, size, src, dstRect, srcRect); + } else if ((srcMemory != nullptr) && (dstMemory == nullptr)) { + command = new amd::ReadMemoryCommand(*queue, CL_COMMAND_READ_BUFFER_RECT, waitList, + *srcMemory->asBuffer(), srcStart, size, dst, srcRect, dstRect); + } else if ((srcMemory != nullptr) && (dstMemory != nullptr)) { + command = new amd::CopyMemoryCommand(*queue, CL_COMMAND_COPY_BUFFER_RECT, waitList, *srcMemory->asBuffer(), + *dstMemory->asBuffer(), srcStart, dstStart, size, srcRect, dstRect); + } + + if (command == nullptr) { + HIP_RETURN(hipErrorOutOfMemory); + } + + command->enqueue(); + command->awaitCompletion(); + command->release(); + + HIP_RETURN(hipSuccess); + +} + +hipError_t hipMemcpyToArray(hipArray* dstArray, size_t wOffset, size_t hOffset, const void* src, + size_t count, hipMemcpyKind kind) { + HIP_INIT_API(hipMemcpyToArray, dstArray, wOffset, hOffset, src, count, kind); + + hip::syncStreams(); + amd::HostQueue* queue = hip::getNullStream(); + + amd::Command* command = nullptr; + amd::Command::EventWaitList waitList; + + size_t sOffset = 0; + amd::Memory* srcMemory = getMemoryObject(src, sOffset); + size_t dOffset = 0; + amd::Memory* dstMemory = getMemoryObject(dstArray->data, dOffset); + + assert(dOffset == 0); + + assert((kind == hipMemcpyHostToDevice) && "Invalid case"); + + if ((srcMemory == nullptr) && (dstMemory != nullptr)) { + command = new amd::WriteMemoryCommand(*queue, CL_COMMAND_WRITE_BUFFER, waitList, + *dstMemory->asBuffer(), {wOffset, hOffset}, count, src); + } else if ((srcMemory != nullptr) && (dstMemory != nullptr)) { + command = new amd::CopyMemoryCommand(*queue, CL_COMMAND_COPY_BUFFER, waitList, + *srcMemory->asBuffer(),*dstMemory->asBuffer(), sOffset, {wOffset, hOffset}, count); + } else { + ShouldNotReachHere(); + } + + if (command == nullptr) { + HIP_RETURN(hipErrorOutOfMemory); + } + + command->enqueue(); + command->awaitCompletion(); + command->release(); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t srcArray, size_t wOffset, size_t hOffset, + size_t count, hipMemcpyKind kind) { + HIP_INIT_API(hipMemcpyFromArray, dst, srcArray, wOffset, hOffset, count, kind); + + hip::syncStreams(); + amd::HostQueue* queue = hip::getNullStream(); + + amd::Command* command = nullptr; + amd::Command::EventWaitList waitList; + + size_t sOffset = 0; + amd::Memory* srcMemory = getMemoryObject(srcArray->data, sOffset); + size_t dOffset = 0; + amd::Memory* dstMemory = getMemoryObject(dst, dOffset); + + assert(sOffset == 0); + + assert((kind == hipMemcpyDeviceToHost) && "Invalid case"); + + if ((srcMemory != nullptr) && (dstMemory == nullptr)) { + command = new amd::ReadMemoryCommand(*queue, CL_COMMAND_READ_BUFFER, waitList, + *srcMemory->asBuffer(), {wOffset, hOffset}, count, dst); + } else if ((srcMemory != nullptr) && (dstMemory != nullptr)) { + command = new amd::CopyMemoryCommand(*queue, CL_COMMAND_COPY_BUFFER, waitList, + *srcMemory->asBuffer(), *dstMemory->asBuffer(), {wOffset, hOffset}, dOffset, count); + } else { + ShouldNotReachHere(); + } + + if (command == nullptr) { + HIP_RETURN(hipErrorOutOfMemory); + } + + command->enqueue(); + command->awaitCompletion(); + command->release(); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipMemcpyHtoA(hipArray* dstArray, size_t dstOffset, const void* srcHost, size_t count) { + HIP_INIT_API(hipMemcpyHtoA, dstArray, dstOffset, srcHost, count); + + hip::syncStreams(); + amd::HostQueue* queue = hip::getNullStream(); + + amd::Command* command = nullptr; + amd::Command::EventWaitList waitList; + + size_t sOffset = 0; + amd::Memory* srcMemory = getMemoryObject(srcHost, sOffset); + size_t dOffset = 0; + amd::Memory* dstMemory = getMemoryObject(dstArray->data, dOffset); + + assert(dOffset == 0); + + if ((srcMemory == nullptr) && (dstMemory != nullptr)) { + command = new amd::WriteMemoryCommand(*queue, CL_COMMAND_WRITE_BUFFER, waitList, + *dstMemory->asBuffer(), dstOffset, count, srcHost); + } else if ((srcMemory != nullptr) && (dstMemory != nullptr)) { + command = new amd::CopyMemoryCommand(*queue, CL_COMMAND_COPY_BUFFER, waitList, + *srcMemory->asBuffer(), *dstMemory->asBuffer(), sOffset, dstOffset, count); + } else { + ShouldNotReachHere(); + } + + if (command == nullptr) { + HIP_RETURN(hipErrorOutOfMemory); + } + + command->enqueue(); + command->awaitCompletion(); + command->release(); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipMemcpyAtoH(void* dst, hipArray* srcArray, size_t srcOffset, size_t count) { + HIP_INIT_API(hipMemcpyAtoH, dst, srcArray, srcOffset, count); + + hip::syncStreams(); + amd::HostQueue* queue = hip::getNullStream(); + + amd::Command* command = nullptr; + amd::Command::EventWaitList waitList; + + size_t sOffset = 0; + amd::Memory* srcMemory = getMemoryObject(srcArray->data, sOffset); + size_t dOffset = 0; + amd::Memory* dstMemory = getMemoryObject(dst, dOffset); + + assert(sOffset == 0); + + if ((srcMemory != nullptr) && (dstMemory == nullptr)) { + command = new amd::ReadMemoryCommand(*queue, CL_COMMAND_READ_BUFFER, waitList, + *srcMemory->asBuffer(), srcOffset, count, dst); + } else if ((srcMemory != nullptr) && (dstMemory != nullptr)) { + command = new amd::CopyMemoryCommand(*queue, CL_COMMAND_COPY_BUFFER, waitList, + *srcMemory->asBuffer(), *dstMemory->asBuffer(), srcOffset, dOffset, count); + } else { + ShouldNotReachHere(); + } + + if (command == nullptr) { + HIP_RETURN(hipErrorOutOfMemory); + } + + command->enqueue(); + command->awaitCompletion(); + command->release(); + + HIP_RETURN(hipSuccess); +} + +hipError_t ihipMemcpy3D_V1(const struct hipMemcpy3DParms* p, hipStream_t stream, bool isAsync = false) { + const void* srcPtr = nullptr; + size_t srcElementSizeInBytes = sizeof(unsigned char); + size_t srcRowPitchInBytes = 0; + size_t srcSlicePitchInBytes = 0; + if (p->srcMemoryType == hipMemoryTypeHost) { + srcPtr = p->srcHost; + srcRowPitchInBytes = p->srcPitch; + srcSlicePitchInBytes = srcRowPitchInBytes * p->srcHeight; + } else if ((p->srcMemoryType == hipMemoryTypeDevice) || + (p->srcMemoryType == hipMemoryTypeUnified)) { + srcPtr = p->srcDevice; + srcRowPitchInBytes = p->srcPitch; + srcSlicePitchInBytes = srcRowPitchInBytes * p->srcHeight; + } else if (p->srcMemoryType == hipMemoryTypeArray) { + srcPtr = p->srcArray->data; + getByteSizeFromChannelFormatKind(p->srcArray->desc.f, &srcElementSizeInBytes); + srcElementSizeInBytes *= p->srcArray->NumChannels; + srcRowPitchInBytes = srcElementSizeInBytes * p->srcArray->width; + srcSlicePitchInBytes = srcRowPitchInBytes * p->srcArray->height; + } else { + ShouldNotReachHere(); + } + + void* dstPtr = nullptr; + size_t dstElementSizeInBytes = sizeof(unsigned char); + size_t dstRowPitchInBytes = 0; + size_t dstSlicePitchInBytes = 0; + if (p->dstMemoryType == hipMemoryTypeHost) { + dstPtr = p->dstHost; + dstRowPitchInBytes = p->dstPitch; + dstSlicePitchInBytes = dstRowPitchInBytes * p->dstHeight; + } else if ((p->dstMemoryType == hipMemoryTypeDevice) || + (p->dstMemoryType == hipMemoryTypeUnified)) { + dstPtr = p->dstDevice; + dstRowPitchInBytes = p->dstPitch; + dstSlicePitchInBytes = dstRowPitchInBytes * p->dstHeight; + } else if (p->dstMemoryType == hipMemoryTypeArray) { + dstPtr = p->dstArray->data; + getByteSizeFromChannelFormatKind(p->dstArray->desc.f, &dstElementSizeInBytes); + dstElementSizeInBytes *= p->dstArray->NumChannels; + dstRowPitchInBytes = dstElementSizeInBytes * p->dstArray->width; + dstSlicePitchInBytes = dstRowPitchInBytes * p->dstArray->height; + } else { + ShouldNotReachHere(); + } + + // For HIP arrays, srcXInBytes must be evenly divisible by the array element size. + if ((p->srcMemoryType == hipMemoryTypeArray) && + ((p->srcXInBytes % srcElementSizeInBytes) != 0)) { + return hipErrorInvalidValue; + } + + // If specified, srcPitch must be greater than or equal to WidthInBytes + srcXInBytes + if ((p->srcMemoryType != hipMemoryTypeArray) && + (p->srcPitch < (p->WidthInBytes + p->srcXInBytes))) { + return hipErrorInvalidValue; + } + + // If specified, srcHeight must be greater than or equal to Height + srcY + if ((p->srcMemoryType != hipMemoryTypeArray) && + (p->srcHeight < (p->Height + p->srcY))) { + return hipErrorInvalidValue; + } + + // For HIP arrays, dstXInBytes must be evenly divisible by the array element size. + if ((p->dstMemoryType == hipMemoryTypeArray) && + ((p->dstXInBytes % dstElementSizeInBytes) != 0)) { + return hipErrorInvalidValue; + } + + // If specified, srcPitch must be greater than or equal to WidthInBytes + srcXInBytes + if ((p->dstMemoryType != hipMemoryTypeArray) && + (p->dstPitch < (p->WidthInBytes + p->dstXInBytes))) { + return hipErrorInvalidValue; + } + + // If specified, srcHeight must be greater than or equal to Height + srcY + if ((p->dstMemoryType != hipMemoryTypeArray) && + (p->dstHeight < (p->Height + p->dstY))) { + return hipErrorInvalidValue; + } + + // The srcLOD and dstLOD members of the CUDA_MEMCPY3D structure must be set to 0. + if ((p->srcLOD != 0) || (p->dstLOD != 0)) { + return hipErrorInvalidValue; + } + + size_t region[3]; + region[0] = p->WidthInBytes; + region[1] = p->Height; + region[2] = p->Depth; + + size_t srcOrigin[3]; + srcOrigin[0] = p->srcXInBytes; + srcOrigin[1] = p->srcY; + srcOrigin[2] = p->srcZ; + + size_t dstOrigin[3]; + dstOrigin[0] = p->dstXInBytes; + dstOrigin[1] = p->dstY; + dstOrigin[2] = p->dstZ; + + amd::BufferRect srcRect; + if (!srcRect.create(srcOrigin, region, srcRowPitchInBytes, srcSlicePitchInBytes)) { + return hipErrorInvalidValue; + } + + size_t srcMemoryOffset = 0; + amd::Memory* srcMemory = getMemoryObject(srcPtr, srcMemoryOffset); + amd::Coord3D srcStart(srcRect.start_ + srcMemoryOffset, 0, 0); + + amd::BufferRect dstRect; + if (!dstRect.create(dstOrigin, region, dstRowPitchInBytes, dstSlicePitchInBytes)) { + return hipErrorInvalidValue; + } + + size_t dstMemoryOffset = 0; + amd::Memory* dstMemory = getMemoryObject(dstPtr, dstMemoryOffset); + amd::Coord3D dstStart(dstRect.start_ + dstMemoryOffset, 0, 0); + + amd::Command* command = nullptr; + amd::Command::EventWaitList waitList; + amd::HostQueue* queue = hip::getQueue(stream); + amd::Coord3D regionSize(region[0], region[1], region[2]); + + if (((srcMemory == nullptr) && (dstMemory == nullptr)) || + (p->kind == hipMemcpyHostToHost)) { + memcpy(dstPtr, srcPtr, region[0] * region[1] * region[2]); + return hipSuccess; + } else if ((srcMemory == nullptr) && (dstMemory != nullptr)) { + command = new amd::WriteMemoryCommand(*queue, CL_COMMAND_WRITE_BUFFER_RECT, waitList, + *dstMemory->asBuffer(), srcStart, regionSize, srcPtr, srcRect, dstRect); + } else if ((srcMemory != nullptr) && (dstMemory == nullptr)) { + command = new amd::ReadMemoryCommand(*queue, CL_COMMAND_READ_BUFFER_RECT, waitList, + *srcMemory->asBuffer(), srcStart, regionSize, dstPtr, srcRect, dstRect); + } else if ((srcMemory != nullptr) && (dstMemory != nullptr)) { + command = new amd::CopyMemoryCommand(*queue, CL_COMMAND_COPY_BUFFER_RECT, waitList, + *srcMemory->asBuffer(),*dstMemory->asBuffer(), srcStart, dstStart, regionSize, + srcRect, dstRect); + } + + if (command == nullptr) { + return hipErrorOutOfMemory; + } + + command->enqueue(); + if (!isAsync) { + command->awaitCompletion(); + } + command->release(); + + return hipSuccess; +} + +hipError_t ihipMemcpy3D_V2(const struct hipMemcpy3DParms* p, hipStream_t stream, bool isAsync = false) { + void* srcPtr = nullptr; + size_t srcElementSizeInBytes = sizeof(unsigned char); + size_t srcRowPitchInBytes = 0; + size_t srcSlicePitchInBytes = 0; + if ((p->srcArray != nullptr) && (p->srcPtr.ptr == nullptr)) { + srcPtr = p->srcArray->data; + getByteSizeFromChannelFormatKind(p->srcArray->desc.f, &srcElementSizeInBytes); + srcElementSizeInBytes *= p->srcArray->NumChannels; + srcRowPitchInBytes = srcElementSizeInBytes * p->srcArray->width; + srcSlicePitchInBytes = srcRowPitchInBytes * p->srcArray->height; + } else if ((p->srcArray == nullptr) && (p->srcPtr.ptr != nullptr)) { + srcPtr = p->srcPtr.ptr; + srcRowPitchInBytes = p->srcPtr.pitch; + srcSlicePitchInBytes = srcRowPitchInBytes * p->srcPtr.ysize; + } else { + ShouldNotReachHere(); + } + + void* dstPtr = nullptr; + size_t dstElementSizeInBytes = sizeof(unsigned char); + size_t dstRowPitchInBytes = 0; + size_t dstSlicePitchInBytes = 0; + if ((p->dstArray != nullptr) && (p->dstPtr.ptr == nullptr)) { + dstPtr = p->dstArray->data; + getByteSizeFromChannelFormatKind(p->dstArray->desc.f, &dstElementSizeInBytes); + dstElementSizeInBytes *= p->dstArray->NumChannels; + dstRowPitchInBytes = dstElementSizeInBytes * p->dstArray->width; + dstSlicePitchInBytes = dstRowPitchInBytes * p->dstArray->height; + } else if ((p->dstArray == nullptr) && (p->dstPtr.ptr != nullptr)) { + dstPtr = p->dstPtr.ptr; + dstRowPitchInBytes = p->srcPtr.pitch; + dstSlicePitchInBytes = dstRowPitchInBytes * p->dstPtr.ysize; + } else { + ShouldNotReachHere(); + } + + // If the source and destination are both arrays, they must have the same element size. + if (((p->srcArray != nullptr) && (p->dstArray != nullptr)) && + (srcElementSizeInBytes != dstElementSizeInBytes)) { + return hipErrorInvalidValue; + } + + // If a HIP array is participating in the copy, the extent is defined in terms of that array's elements. + // If no HIP array is participating in the copy, the extent is defined in elements of unsigned char. + size_t region[3]; + if (p->srcArray != nullptr) { + region[0] = srcRowPitchInBytes; + } else if (p->dstArray != nullptr) { + region[0] = dstRowPitchInBytes; + } else { + region[0] = sizeof(unsigned char) * p->extent.width; + } + region[1] = p->extent.height; + region[2] = p->extent.depth; + + // The offset into the object is defined in units of the object's elements. + size_t srcOrigin[3]; + srcOrigin[0] = srcElementSizeInBytes * p->srcPos.x; + srcOrigin[1] = p->srcPos.y; + srcOrigin[2] = p->srcPos.z; + + amd::BufferRect srcRect; + if (!srcRect.create(srcOrigin, region, srcRowPitchInBytes, srcSlicePitchInBytes)) { + return hipErrorInvalidValue; + } + + size_t srcMemoryOffset = 0; + amd::Memory* srcMemory = getMemoryObject(srcPtr, srcMemoryOffset); + amd::Coord3D srcStart(srcRect.start_ + srcMemoryOffset, 0, 0); + + size_t dstOrigin[3]; + dstOrigin[0] = dstElementSizeInBytes * p->dstPos.x; + dstOrigin[1] = p->dstPos.y; + dstOrigin[2] = p->dstPos.z; + + amd::BufferRect dstRect; + if (!dstRect.create(dstOrigin, region, dstRowPitchInBytes, dstSlicePitchInBytes)) { + return hipErrorInvalidValue; + } + + size_t dstMemoryOffset = 0; + amd::Memory* dstMemory = getMemoryObject(dstPtr, dstMemoryOffset); + amd::Coord3D dstStart(dstRect.start_ + dstMemoryOffset, 0, 0); + + amd::Command* command = nullptr; + amd::Command::EventWaitList waitList; + amd::HostQueue* queue = hip::getQueue(stream); + amd::Coord3D regionSize(region[0], region[1], region[2]); + + if (((srcMemory == nullptr) && (dstMemory == nullptr)) || + (p->kind == hipMemcpyHostToHost)) { + memcpy(dstPtr, srcPtr, region[0] * region[1] * region[2]); + return hipSuccess; + } else if ((srcMemory == nullptr) && (dstMemory != nullptr)) { + command = new amd::WriteMemoryCommand(*queue, CL_COMMAND_WRITE_BUFFER_RECT, waitList, + *dstMemory->asBuffer(), srcStart, regionSize, srcPtr, srcRect, dstRect); + } else if ((srcMemory != nullptr) && (dstMemory == nullptr)) { + command = new amd::ReadMemoryCommand(*queue, CL_COMMAND_READ_BUFFER_RECT, waitList, + *srcMemory->asBuffer(), srcStart, regionSize, dstPtr, srcRect, dstRect); + } else if ((srcMemory != nullptr) && (dstMemory != nullptr)) { + command = new amd::CopyMemoryCommand(*queue, CL_COMMAND_COPY_BUFFER_RECT, waitList, + *srcMemory->asBuffer(),*dstMemory->asBuffer(), srcStart, dstStart, regionSize, + srcRect, dstRect); + } + + if (command == nullptr) { + return hipErrorOutOfMemory; + } + + command->enqueue(); + if (!isAsync) { + command->awaitCompletion(); + } + command->release(); + + return hipSuccess; +} + +hipError_t ihipMemcpy3D(const struct hipMemcpy3DParms* p, hipStream_t stream, bool isAsync = false) { + // Having src and dst be an array is ambigous, since we can't tell if the user intended to call hipMemcpy3D_V1() or hipMemcpy3D_V2(). + // For now hope that we never encounter this case. + assert((p->srcArray == nullptr) || (p->dstArray == nullptr)); + + // When calling hipMemcpy3D_V1(), the user must specify + // one of srcHost, srcDevice or srcArray and + // one of dstHost, dstDevice or dstArray. + if (((p->srcHost != nullptr) || (p->srcDevice != nullptr) || (p->srcArray != nullptr)) && + ((p->dstHost != nullptr) || (p->dstDevice != nullptr) || (p->dstArray != nullptr))) { + return ihipMemcpy3D_V1(p, stream, isAsync); + } + + // When calling hipMemcpy3D_V2(), the user must specify + // one of srcArray or srcPtr and + // one of dstArray or dstPtr. + if (((p->srcArray != nullptr) || (p->srcPtr.ptr != nullptr)) && + ((p->dstArray != nullptr) || (p->dstPtr.ptr != nullptr))) { + return ihipMemcpy3D_V2(p, stream, isAsync); + } + + // If we got here, then the user specified an invalid combination of src/dst parameters. + return hipErrorInvalidValue; +} + +hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) { + HIP_INIT_API(hipMemcpy3D, p); + + HIP_RETURN(ihipMemcpy3D(p, nullptr)); +} + +hipError_t hipMemcpy3DAsync(const struct hipMemcpy3DParms* p, hipStream_t stream) { + HIP_INIT_API(hipMemcpy3DAsync, p, stream); + + HIP_RETURN(ihipMemcpy3D(p, stream, true)); +} + +hipError_t ihipMemset(void* dst, int value, size_t valueSize, size_t sizeBytes, + hipStream_t stream, bool isAsync = false) { + if (sizeBytes == 0) { + // Skip if nothing needs filling. + return hipSuccess; + } + + if (dst == nullptr) { + return hipErrorInvalidValue; + } + + size_t offset = 0; + amd::HostQueue* queue = hip::getQueue(stream); + amd::Memory* memory = getMemoryObject(dst, offset); + + if (memory != nullptr) { + // Device memory + amd::Command::EventWaitList waitList; + amd::Coord3D fillOffset(offset, 0, 0); + amd::Coord3D fillSize(sizeBytes, 1, 1); + amd::FillMemoryCommand* command = + new amd::FillMemoryCommand(*queue, CL_COMMAND_FILL_BUFFER, waitList, *memory->asBuffer(), + &value, valueSize, fillOffset, fillSize); + + if (command == nullptr) { + return hipErrorOutOfMemory; + } + + command->enqueue(); + if (!isAsync) { + command->awaitCompletion(); + } + command->release(); + } else { + // Host alloced memory + memset(dst, value, sizeBytes); + } + + return hipSuccess; +} + +hipError_t hipMemset(void* dst, int value, size_t sizeBytes) { + HIP_INIT_API(hipMemset, dst, value, sizeBytes); + + HIP_RETURN(ihipMemset(dst, value, sizeof(int8_t), sizeBytes, nullptr)); +} + +hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t stream) { + HIP_INIT_API(hipMemsetAsync, dst, value, sizeBytes, stream); + + HIP_RETURN(ihipMemset(dst, value, sizeof(int8_t), sizeBytes, stream, true)); +} + +hipError_t hipMemsetD8(hipDeviceptr_t dst, unsigned char value, size_t count) { + HIP_INIT_API(hipMemsetD8, dst, value, count); + + HIP_RETURN(ihipMemset(dst, value, sizeof(int8_t), count * sizeof(int8_t), nullptr)); +} + +hipError_t hipMemsetD8Async(hipDeviceptr_t dst, unsigned char value, size_t count, + hipStream_t stream) { + HIP_INIT_API(hipMemsetD8Async, dst, value, count, stream); + + HIP_RETURN(ihipMemset(dst, value, sizeof(int8_t), count * sizeof(int8_t), stream, true)); +} + +hipError_t hipMemsetD16(hipDeviceptr_t dst, unsigned short value, size_t count) { + HIP_INIT_API(hipMemsetD16, dst, value, count); + + HIP_RETURN(ihipMemset(dst, value, sizeof(int16_t), count * sizeof(int16_t), nullptr)); +} + +hipError_t hipMemsetD16Async(hipDeviceptr_t dst, unsigned short value, size_t count, + hipStream_t stream) { + HIP_INIT_API(hipMemsetD16Async, dst, value, count, stream); + + HIP_RETURN(ihipMemset(dst, value, sizeof(int16_t), count * sizeof(int16_t), stream, true)); +} + +hipError_t hipMemsetD32(hipDeviceptr_t dst, int value, size_t count) { + HIP_INIT_API(hipMemsetD32, dst, value, count); + + HIP_RETURN(ihipMemset(dst, value, sizeof(int32_t), count * sizeof(int32_t), nullptr)); +} + +hipError_t hipMemsetD32Async(hipDeviceptr_t dst, int value, size_t count, + hipStream_t stream) { + HIP_INIT_API(hipMemsetD32Async, dst, value, count, stream); + + HIP_RETURN(ihipMemset(dst, value, sizeof(int32_t), count * sizeof(int32_t), stream, true)); +} + +hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height) { + HIP_INIT_API(hipMemset2D, dst, pitch, value, width, height); + + HIP_RETURN(ihipMemset(dst, value, sizeof(int8_t), pitch * height, nullptr)); +} + +hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value, + size_t width, size_t height, hipStream_t stream) { + HIP_INIT_API(hipMemset2DAsync, dst, pitch, value, width, height, stream); + + HIP_RETURN(ihipMemset(dst, value, sizeof(int8_t), pitch * height, stream, true)); +} + +hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent) { + HIP_INIT_API(hipMemset3D, pitchedDevPtr, value, &extent); + + void *dst = pitchedDevPtr.ptr; + size_t sizeBytes = pitchedDevPtr.pitch * extent.height * extent.depth; + + HIP_RETURN(ihipMemset(dst, value, sizeof(int8_t), sizeBytes, nullptr)); +} + +hipError_t hipMemset3DAsync(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent, hipStream_t stream) { + HIP_INIT_API(hipMemset3DAsync, pitchedDevPtr, value, &extent, stream); + + void *dst = pitchedDevPtr.ptr; + size_t sizeBytes = pitchedDevPtr.pitch * extent.height * extent.depth; + + HIP_RETURN(ihipMemset(dst, value, sizeof(int8_t), sizeBytes, stream, true)); +} + +hipError_t hipMemAllocPitch(hipDeviceptr_t* dptr, size_t* pitch, size_t widthInBytes, + size_t height, unsigned int elementSizeBytes) { + HIP_INIT_API(hipMemAllocPitch, dptr, pitch, widthInBytes, height, elementSizeBytes); + + HIP_RETURN(hipMallocPitch(dptr, pitch, widthInBytes, height)); +} + +hipError_t hipMemAllocHost(void** ptr, size_t size) { + HIP_INIT_API(hipMemAllocHost, ptr, size); + + HIP_RETURN(hipHostMalloc(ptr, size, 0)); +} + +hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* dev_ptr) { + HIP_INIT_API(hipIpcGetMemHandle, handle, dev_ptr); + + size_t offset = 0; + amd::Memory* amd_mem_obj = nullptr; + device::Memory* dev_mem_obj = nullptr; + ihipIpcMemHandle_t* ihandle = nullptr; + + if ((handle == nullptr) || (dev_ptr == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + /* Get AMD::Memory object corresponding to this pointer */ + amd_mem_obj = getMemoryObject(dev_ptr, offset); + if (amd_mem_obj == nullptr) { + HIP_RETURN(hipErrorInvalidDevicePointer); + } + + /* Get Device::Memory object pointer */ + dev_mem_obj = amd_mem_obj->getDeviceMemory(*hip::getCurrentContext()->devices()[0],false); + if (dev_mem_obj == nullptr) { + HIP_RETURN(hipErrorInvalidDevicePointer); + } + + /* Create an handle for IPC. Store the memory size inside the handle */ + ihandle = reinterpret_cast(handle); + dev_mem_obj->IpcCreate(offset, &(ihandle->psize), &(ihandle->ipc_handle)); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipIpcOpenMemHandle(void** dev_ptr, hipIpcMemHandle_t handle, unsigned int flags) { + HIP_INIT_API(hipIpcOpenMemHandle, dev_ptr, &handle, flags); + + amd::Memory* amd_mem_obj = nullptr; + amd::Device* device = nullptr; + ihipIpcMemHandle_t* ihandle = nullptr; + + if (dev_ptr == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + /* Call the IPC Attach from Device class */ + device = hip::getCurrentContext()->devices()[0]; + ihandle = reinterpret_cast(&handle); + + amd_mem_obj = device->IpcAttach(&(ihandle->ipc_handle), ihandle->psize, flags, dev_ptr); + if (amd_mem_obj == nullptr) { + HIP_RETURN(hipErrorInvalidDevicePointer); + } + + /* Add the memory to the MemObjMap */ + amd::MemObjMap::AddMemObj(*dev_ptr, amd_mem_obj); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipIpcCloseMemHandle(void* dev_ptr) { + HIP_INIT_API(hipIpcCloseMemHandle, dev_ptr); + + size_t offset = 0; + amd::Device* device = nullptr; + amd::Memory* amd_mem_obj = nullptr; + + hip::syncStreams(); + hip::getNullStream()->finish(); + + if (dev_ptr == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + /* Get the amd::Memory object */ + amd_mem_obj = getMemoryObject(dev_ptr, offset); + if (amd_mem_obj == nullptr) { + HIP_RETURN(hipErrorInvalidDevicePointer); + } + + /* Call IPC Detach from Device class */ + device = hip::getCurrentContext()->devices()[0]; + if (device == nullptr) { + HIP_RETURN(hipErrorNoDevice); + } + + /* Remove the memory from MemObjMap */ + amd::MemObjMap::RemoveMemObj(amd_mem_obj); + + /* detach the memory */ + device->IpcDetach(*amd_mem_obj); + + HIP_RETURN(hipSuccess); +} + +hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, hipChannelFormatKind f) { + hipChannelFormatDesc cd; + cd.x = x; + cd.y = y; + cd.z = z; + cd.w = w; + cd.f = f; + return cd; +} + +hipError_t hipHostGetDevicePointer(void** devicePointer, void* hostPointer, unsigned flags) { + HIP_INIT_API(hipHostGetDevicePointer, devicePointer, hostPointer, flags); + + size_t offset = 0; + + amd::Memory* memObj = getMemoryObject(hostPointer, offset); + if (!memObj) { + HIP_RETURN(hipErrorInvalidValue); + } + *devicePointer = reinterpret_cast(memObj->getDeviceMemory(*hip::getCurrentContext()->devices()[0])->virtualAddress() + offset); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipPointerGetAttributes(hipPointerAttribute_t* attributes, const void* ptr) { + HIP_INIT_API(hipPointerGetAttributes, attributes, ptr); + + size_t offset = 0; + amd::Memory* memObj = getMemoryObject(ptr, offset); + int device = 0; + + if (memObj != nullptr) { + attributes->memoryType = (CL_MEM_SVM_FINE_GRAIN_BUFFER & memObj->getMemFlags())? hipMemoryTypeHost : hipMemoryTypeDevice; + attributes->hostPointer = memObj->getSvmPtr(); + attributes->devicePointer = memObj->getSvmPtr(); + attributes->isManaged = 0; + attributes->allocationFlags = memObj->getMemFlags() >> 16; + + amd::Context &memObjCtx = memObj->getContext(); + if (*hip::host_context == memObjCtx) { + attributes->device = ihipGetDevice(); + HIP_RETURN(hipSuccess); + } + for (auto& ctx : g_devices) { + if (*ctx == memObjCtx) { + attributes->device = device; + HIP_RETURN(hipSuccess); + } + ++device; + } + HIP_RETURN(hipErrorInvalidDevice); + } + + HIP_RETURN(hipErrorInvalidValue); +} diff --git a/vdi/hip_module.cpp b/vdi/hip_module.cpp new file mode 100644 index 0000000000..2d9fd4468c --- /dev/null +++ b/vdi/hip_module.cpp @@ -0,0 +1,534 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + +#include "hip_internal.hpp" +#include "platform/program.hpp" +#include "hip_event.hpp" + +hipError_t ihipModuleLoadData(hipModule_t *module, const void *image); + +const std::string& FunctionName(const hipFunction_t f) +{ + return hip::Function::asFunction(f)->function_->name(); +} + +static uint64_t ElfSize(const void *emi) +{ + const Elf64_Ehdr *ehdr = (const Elf64_Ehdr*)emi; + const Elf64_Shdr *shdr = (const Elf64_Shdr*)((char*)emi + ehdr->e_shoff); + + uint64_t max_offset = ehdr->e_shoff; + uint64_t total_size = max_offset + ehdr->e_shentsize * ehdr->e_shnum; + + for (uint16_t i=0; i < ehdr->e_shnum; ++i){ + uint64_t cur_offset = static_cast(shdr[i].sh_offset); + if (max_offset < cur_offset) { + max_offset = cur_offset; + total_size = max_offset; + if(SHT_NOBITS != shdr[i].sh_type) { + total_size += static_cast(shdr[i].sh_size); + } + } + } + return total_size; +} + +hipError_t hipModuleLoad(hipModule_t* module, const char* fname) +{ + HIP_INIT_API(hipModuleLoad, module, fname); + + if (!fname) { + HIP_RETURN(hipErrorInvalidValue); + } + + std::ifstream file(fname, std::ios::binary); + + if (!file.is_open()) { + HIP_RETURN(hipErrorFileNotFound); + } + + std::vector tmp{std::istreambuf_iterator{file}, std::istreambuf_iterator{}}; + + HIP_RETURN(ihipModuleLoadData(module, tmp.data())); +} + +bool ihipModuleUnregisterGlobal(hipModule_t hmod) { + std::vector< std::pair >* modules = + PlatformState::instance().unregisterVar(hmod); + if (modules != nullptr) { + delete modules; + } + return true; +} + +hipError_t hipModuleUnload(hipModule_t hmod) +{ + HIP_INIT_API(hipModuleUnload, hmod); + + if (hmod == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + amd::Program* program = as_amd(reinterpret_cast(hmod)); + + if(!ihipModuleUnregisterGlobal(hmod)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + + program->release(); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipModuleLoadData(hipModule_t *module, const void *image) +{ + HIP_INIT_API(hipModuleLoadData, module, image); + + HIP_RETURN(ihipModuleLoadData(module, image)); +} + +hipError_t hipModuleLoadDataEx(hipModule_t *module, const void *image, + unsigned int numOptions, hipJitOption* options, + void** optionsValues) +{ + /* TODO: Pass options to Program */ + HIP_INIT_API(hipModuleLoadData, module, image); + + HIP_RETURN(ihipModuleLoadData(module, image)); +} + +extern bool __hipExtractCodeObjectFromFatBinary(const void* data, + const std::vector& devices, + std::vector>& code_objs); + +bool ihipModuleRegisterUndefined(amd::Program* program, hipModule_t* module) { + + std::vector undef_vars; + device::Program* dev_program + = program->getDeviceProgram(*hip::getCurrentContext()->devices()[0]); + + if (!dev_program->getUndefinedVarFromCodeObj(&undef_vars)) { + return false; + } + + for (auto it = undef_vars.begin(); it != undef_vars.end(); ++it) { + auto modules = new std::vector >(g_devices.size()); + for (size_t dev = 0; dev < g_devices.size(); ++dev) { + modules->at(dev) = std::make_pair(*module, true); + } + + texture* tex_hptr + = new texture(); + memset(tex_hptr, 0x00, sizeof(texture)); + + PlatformState::DeviceVar dvar{ reinterpret_cast(tex_hptr), it->c_str(), sizeof(*tex_hptr), modules, + std::vector{ g_devices.size()}, true }; + PlatformState::instance().registerVar(it->c_str(), dvar); + } + + return true; +} + +bool ihipModuleRegisterGlobal(amd::Program* program, hipModule_t* module) { + + size_t var_size = 0; + hipDeviceptr_t device_ptr = nullptr; + std::vector var_names; + + device::Program* dev_program + = program->getDeviceProgram(*hip::getCurrentContext()->devices()[0]); + + if (!dev_program->getGlobalVarFromCodeObj(&var_names)) { + return false; + } + + for (auto it = var_names.begin(); it != var_names.end(); ++it) { + auto modules = new std::vector >(g_devices.size()); + for (size_t dev = 0; dev < g_devices.size(); ++dev) { + modules->at(dev) = std::make_pair(*module, true); + } + + PlatformState::DeviceVar dvar{nullptr, it->c_str(), 0, modules, + std::vector{ g_devices.size()}, false }; + PlatformState::instance().registerVar(it->c_str(), dvar); + } + + return true; +} + +hipError_t ihipModuleLoadData(hipModule_t *module, const void *image) +{ + std::vector> code_objs; + if (__hipExtractCodeObjectFromFatBinary(image, {hip::getCurrentContext()->devices()[0]->info().name_}, code_objs)) + image = code_objs[0].first; + + amd::Program* program = new amd::Program(*hip::getCurrentContext()); + if (program == NULL) { + return hipErrorOutOfMemory; + } + + program->setVarInfoCallBack(&getSvarInfo); + + if (CL_SUCCESS != program->addDeviceProgram(*hip::getCurrentContext()->devices()[0], image, ElfSize(image))) { + return hipErrorInvalidKernelFile; + } + + *module = reinterpret_cast(as_cl(program)); + + if (!ihipModuleRegisterGlobal(program, module)) { + return hipErrorSharedObjectSymbolNotFound; + } + + if (!ihipModuleRegisterUndefined(program, module)) { + return hipErrorSharedObjectSymbolNotFound; + } + + if(CL_SUCCESS != program->build(hip::getCurrentContext()->devices(), nullptr, nullptr, nullptr)) { + return hipErrorSharedObjectInitFailed; + } + + return hipSuccess; +} + +hipError_t hipModuleGetFunction(hipFunction_t *hfunc, hipModule_t hmod, const char *name) +{ + HIP_INIT_API(hipModuleGetFunction, hfunc, hmod, name); + + amd::Program* program = as_amd(reinterpret_cast(hmod)); + + const amd::Symbol* symbol = program->findSymbol(name); + if (!symbol) { + HIP_RETURN(hipErrorNotFound); + } + + amd::Kernel* kernel = new amd::Kernel(*program, *symbol, name); + if (!kernel) { + HIP_RETURN(hipErrorOutOfMemory); + } + + hip::Function* f = new hip::Function(kernel); + *hfunc = f->asHipFunction(); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod, const char* name) +{ + HIP_INIT_API(hipModuleGetGlobal, dptr, bytes, hmod, name); + + /* Get address and size for the global symbol */ + if (!PlatformState::instance().getGlobalVar(name, ihipGetDevice(), hmod, + dptr, bytes)) { + HIP_RETURN(hipErrorNotFound); + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func) +{ + HIP_INIT_API(hipFuncGetAttributes, attr, func); + + if (!PlatformState::instance().getFuncAttr(func, attr)) { + HIP_RETURN(hipErrorInvalidDeviceFunction); + } + + HIP_RETURN(hipSuccess); +} + + +hipError_t ihipModuleLaunchKernel(hipFunction_t f, + uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ, + uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ, + uint32_t sharedMemBytes, hipStream_t hStream, + void **kernelParams, void **extra, + hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags = 0, + uint32_t params = 0, uint32_t gridId = 0, uint32_t numGrids = 0, + uint64_t prevGridSum = 0, uint64_t allGridSum = 0, uint32_t firstDevice = 0) { + HIP_INIT_API(NONE, f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, + sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent, flags, params); + + hip::Function* function = hip::Function::asFunction(f); + amd::Kernel* kernel = function->function_; + + amd::ScopedLock lock(function->lock_); + + hip::Event* eStart = reinterpret_cast(startEvent); + hip::Event* eStop = reinterpret_cast(stopEvent); + amd::HostQueue* queue = hip::getQueue(hStream); + const amd::Device& device = queue->vdev()->device(); + + if ((params & amd::NDRangeKernelCommand::CooperativeGroups) && + !device.info().cooperativeGroups_) { + return hipErrorLaunchFailure; + } + if ((params & amd::NDRangeKernelCommand::CooperativeMultiDeviceGroups) && + !device.info().cooperativeMultiDeviceGroups_) { + return hipErrorLaunchFailure; + } + if (!queue) { + return hipErrorOutOfMemory; + } + + size_t globalWorkOffset[3] = {0}; + size_t globalWorkSize[3] = { gridDimX, gridDimY, gridDimZ }; + size_t localWorkSize[3] = { blockDimX, blockDimY, blockDimZ }; + amd::NDRangeContainer ndrange(3, globalWorkOffset, globalWorkSize, localWorkSize); + amd::Command::EventWaitList waitList; + + address kernargs = nullptr; + + // 'extra' is a struct that contains the following info: { + // HIP_LAUNCH_PARAM_BUFFER_POINTER, kernargs, + // HIP_LAUNCH_PARAM_BUFFER_SIZE, &kernargs_size, + // HIP_LAUNCH_PARAM_END } + if (extra != nullptr) { + if (extra[0] != HIP_LAUNCH_PARAM_BUFFER_POINTER || + extra[2] != HIP_LAUNCH_PARAM_BUFFER_SIZE || extra[4] != HIP_LAUNCH_PARAM_END) { + return hipErrorNotInitialized; + } + kernargs = reinterpret_cast
(extra[1]); + } + + const amd::KernelSignature& signature = kernel->signature(); + for (size_t i = 0; i < signature.numParameters(); ++i) { + const amd::KernelParameterDescriptor& desc = signature.at(i); + if (kernelParams == nullptr) { + assert(kernargs != nullptr); + kernel->parameters().set(i, desc.size_, kernargs + desc.offset_, + desc.type_ == T_POINTER/*svmBound*/); + } else { + assert(extra == nullptr); + kernel->parameters().set(i, desc.size_, kernelParams[i], desc.type_ == T_POINTER/*svmBound*/); + } + } + + if(startEvent != nullptr) { + amd::Command* startCommand = new hip::TimerMarker(*queue); + startCommand->enqueue(); + eStart->addMarker(queue, startCommand); + } + + amd::NDRangeKernelCommand* command = new amd::NDRangeKernelCommand( + *queue, waitList, *kernel, ndrange, sharedMemBytes, + params, gridId, numGrids, prevGridSum, allGridSum, firstDevice); + if (!command) { + return hipErrorOutOfMemory; + } + + // Capture the kernel arguments + if (CL_SUCCESS != command->captureAndValidate()) { + delete command; + return hipErrorOutOfMemory; + } + + command->enqueue(); + + if(stopEvent != nullptr) { + eStop->addMarker(queue, command); + command->retain(); + } + + command->release(); + + return hipSuccess; +} + +hipError_t hipModuleLaunchKernel(hipFunction_t f, + uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ, + uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ, + uint32_t sharedMemBytes, hipStream_t hStream, + void **kernelParams, void **extra) +{ + HIP_INIT_API(hipModuleLaunchKernel, f, gridDimX, gridDimY, gridDimZ, + blockDimX, blockDimY, blockDimZ, + sharedMemBytes, hStream, + kernelParams, extra); + + HIP_RETURN(ihipModuleLaunchKernel(f, gridDimX * blockDimX, gridDimY * blockDimY, gridDimZ * blockDimZ, + blockDimX, blockDimY, blockDimZ, + sharedMemBytes, hStream, kernelParams, extra, nullptr, nullptr)); +} + +hipError_t hipExtModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX, + uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ, + uint32_t localWorkSizeX, uint32_t localWorkSizeY, + uint32_t localWorkSizeZ, size_t sharedMemBytes, + hipStream_t hStream, void** kernelParams, void** extra, + hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags) +{ + HIP_INIT_API(NONE, f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, + localWorkSizeX, localWorkSizeY, localWorkSizeZ, + sharedMemBytes, hStream, + kernelParams, extra, startEvent, stopEvent, flags); + + HIP_RETURN(ihipModuleLaunchKernel(f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, localWorkSizeX, localWorkSizeY, + localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent, flags)); +} + + + +hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t gridDimX, + uint32_t gridDimY, uint32_t gridDimZ, + uint32_t blockDimX, uint32_t blockDimY, + uint32_t blockDimZ, size_t sharedMemBytes, + hipStream_t hStream, void** kernelParams, void** extra, + hipEvent_t startEvent, + hipEvent_t stopEvent) +{ + HIP_INIT_API(NONE, f, gridDimX, gridDimY, gridDimZ, + blockDimX, blockDimY, blockDimZ, + sharedMemBytes, hStream, + kernelParams, extra, startEvent, stopEvent); + + HIP_RETURN(ihipModuleLaunchKernel(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, + sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent)); +} + +hipError_t hipModuleLaunchKernelExt(hipFunction_t f, uint32_t gridDimX, + uint32_t gridDimY, uint32_t gridDimZ, + uint32_t blockDimX, uint32_t blockDimY, + uint32_t blockDimZ, size_t sharedMemBytes, + hipStream_t hStream, void** kernelParams, void** extra, + hipEvent_t startEvent, + hipEvent_t stopEvent) +{ + HIP_INIT_API(NONE, f, gridDimX, gridDimY, gridDimZ, + blockDimX, blockDimY, blockDimZ, + sharedMemBytes, hStream, + kernelParams, extra, startEvent, stopEvent); + + HIP_RETURN(ihipModuleLaunchKernel(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, + sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent)); +} + +hipError_t hipLaunchCooperativeKernel(const void* f, + dim3 gridDim, dim3 blockDim, + void **kernelParams, uint32_t sharedMemBytes, hipStream_t hStream) +{ + HIP_INIT_API(hipLaunchCooperativeKernel, f, gridDim, blockDim, + sharedMemBytes, hStream); + + int deviceId = ihipGetDevice(); + hipFunction_t func = PlatformState::instance().getFunc(f, deviceId); + if (func == nullptr) { + HIP_RETURN(hipErrorInvalidDeviceFunction); + } + + HIP_RETURN(ihipModuleLaunchKernel(func, gridDim.x * blockDim.x, gridDim.y * blockDim.y, gridDim.z * blockDim.z, + blockDim.x, blockDim.y, blockDim.z, + sharedMemBytes, hStream, kernelParams, nullptr, nullptr, nullptr, 0, + amd::NDRangeKernelCommand::CooperativeGroups)); +} + +hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, + int numDevices, unsigned int flags, uint32_t extFlags) +{ + int numActiveGPUs = 0; + ihipDeviceGetCount(&numActiveGPUs); + + if ((numDevices > numActiveGPUs) || (launchParamsList == nullptr)) { + return hipErrorInvalidValue; + } + + hipError_t result = hipErrorUnknown; + uint64_t allGridSize = 0; + for (int i = 0; i < numDevices; ++i) { + const hipLaunchParams& launch = launchParamsList[i]; + allGridSize += launch.gridDim.x * launch.gridDim.y * launch.gridDim.z; + } + uint64_t prevGridSize = 0; + uint32_t firstDevice = 0; + for (int i = 0; i < numDevices; ++i) { + const hipLaunchParams& launch = launchParamsList[i]; + amd::HostQueue* queue = reinterpret_cast(launch.stream)->asHostQueue(); + hipFunction_t func = nullptr; + // The order of devices in the launch may not match the order in the global array + for (size_t dev = 0; dev < g_devices.size(); ++dev) { + // Find the matching device and request the kernel function + if (&queue->vdev()->device() == g_devices[dev]->devices()[0]) { + func = PlatformState::instance().getFunc(launch.func, dev); + // Save VDI index of the first device in the launch + if (i == 0) { + firstDevice = queue->vdev()->device().index(); + } + break; + } + } + if (func == nullptr) { + result = hipErrorInvalidDeviceFunction; + HIP_RETURN(result); + } + + result = ihipModuleLaunchKernel(func, + launch.gridDim.x * launch.blockDim.x, + launch.gridDim.y * launch.blockDim.y, + launch.gridDim.z * launch.blockDim.z, + launch.blockDim.x, launch.blockDim.y, launch.blockDim.z, + launch.sharedMem, launch.stream, launch.args, nullptr, nullptr, nullptr, + flags, extFlags, i, numDevices, prevGridSize, allGridSize, firstDevice); + if (result != hipSuccess) { + break; + } + prevGridSize += launch.gridDim.x * launch.gridDim.y * launch.gridDim.z; + } + + return result; +} + +hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, + int numDevices, unsigned int flags) +{ + HIP_INIT_API(hipLaunchCooperativeKernelMultiDevice, launchParamsList, numDevices, flags); + + return ihipLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags, + (amd::NDRangeKernelCommand::CooperativeGroups | + amd::NDRangeKernelCommand::CooperativeMultiDeviceGroups)); +} + +hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, + int numDevices, unsigned int flags) { + HIP_INIT_API(hipExtLaunchMultiKernelMultiDevice, launchParamsList, numDevices, flags); + + return ihipLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags, 0); +} + +hipError_t hipModuleGetTexRef(textureReference** texRef, hipModule_t hmod, const char* name) { + HIP_INIT_API(hipModuleGetTexRef, texRef, hmod, name); + + /* input args check */ + if ((texRef == nullptr) || (name == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + /* Get address and size for the global symbol */ + if (!PlatformState::instance().getTexRef(name, texRef)) { + HIP_RETURN(hipErrorNotFound); + } + + HIP_RETURN(hipSuccess); +} + diff --git a/vdi/hip_peer.cpp b/vdi/hip_peer.cpp new file mode 100644 index 0000000000..65d92de363 --- /dev/null +++ b/vdi/hip_peer.cpp @@ -0,0 +1,121 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "hip_internal.hpp" + +hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, hipCtx_t thisCtx, hipCtx_t peerCtx) { + HIP_INIT_API(NONE, canAccessPeer, thisCtx, peerCtx); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipMemcpyPeer(void* dst, hipCtx_t dstCtx, const void* src, hipCtx_t srcCtx, + size_t sizeBytes) { + HIP_INIT_API(NONE, dst, dstCtx, src, srcCtx, sizeBytes); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipMemcpyPeerAsync(void* dst, hipCtx_t dstDevice, const void* src, hipCtx_t srcDevice, + size_t sizeBytes, hipStream_t stream) { + HIP_INIT_API(NONE, dst, dstDevice, src, srcDevice, sizeBytes, stream); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, int deviceId, int peerDeviceId) { + HIP_INIT_API(hipDeviceCanAccessPeer, canAccessPeer, deviceId, peerDeviceId); + + amd::Device* device = nullptr; + amd::Device* peer_device = nullptr; + + if (canAccessPeer == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + /* Peer cannot be self */ + if (deviceId == peerDeviceId) { + *canAccessPeer = 0; + return HIP_RETURN(hipSuccess); + } + + /* Cannot exceed the max number of devices */ + if (static_cast(deviceId) >= g_devices.size() + || static_cast(peerDeviceId) >= g_devices.size()) { + return HIP_RETURN(hipErrorInvalidValue); + } + + device = g_devices[deviceId]->devices()[0]; + peer_device = g_devices[peerDeviceId]->devices()[0]; + + *canAccessPeer = static_cast(std::find(device->p2pDevices_.begin(), + device->p2pDevices_.end(), as_cl(peer_device)) + != device->p2pDevices_.end()); + + return HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceDisablePeerAccess(int peerDeviceId) { + HIP_INIT_API(hipDeviceDisablePeerAccess, peerDeviceId); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceEnablePeerAccess(int peerDeviceId, unsigned int flags) { + HIP_INIT_API(hipDeviceEnablePeerAccess, peerDeviceId, flags); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipMemcpyPeer(void* dst, int dstDevice, const void* src, int srcDevice, + size_t sizeBytes) { + HIP_INIT_API(hipMemcpyPeer, dst, dstDevice, src, srcDevice, sizeBytes); + + HIP_RETURN(hipMemcpy(dst, src, sizeBytes, hipMemcpyDeviceToDevice)); +} + +hipError_t hipMemcpyPeerAsync(void* dst, int dstDevice, const void* src, int srcDevice, + size_t sizeBytes, hipStream_t stream) { + HIP_INIT_API(hipMemcpyPeerAsync, dst, dstDevice, src, srcDevice, sizeBytes, stream); + + HIP_RETURN(hipMemcpyAsync(dst, src, sizeBytes, hipMemcpyDeviceToDevice, stream)); +} + +hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags) { + HIP_INIT_API(hipCtxEnablePeerAccess, peerCtx, flags); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipCtxDisablePeerAccess(hipCtx_t peerCtx) { + HIP_INIT_API(hipCtxDisablePeerAccess, peerCtx); + + HIP_RETURN(hipSuccess); +} diff --git a/vdi/hip_platform.cpp b/vdi/hip_platform.cpp new file mode 100644 index 0000000000..c91199c077 --- /dev/null +++ b/vdi/hip_platform.cpp @@ -0,0 +1,993 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "hip_internal.hpp" +#include "platform/program.hpp" +#include "platform/runtime.hpp" + +#include +#include "elfio.hpp" + +constexpr unsigned __hipFatMAGIC2 = 0x48495046; // "HIPF" + +thread_local std::stack execStack_; +PlatformState* PlatformState::platform_ = new PlatformState(); + +struct __CudaFatBinaryWrapper { + unsigned int magic; + unsigned int version; + void* binary; + void* dummy1; +}; + +#define CLANG_OFFLOAD_BUNDLER_MAGIC_STR "__CLANG_OFFLOAD_BUNDLE__" +#define HIP_AMDGCN_AMDHSA_TRIPLE "hip-amdgcn-amd-amdhsa" +#define HCC_AMDGCN_AMDHSA_TRIPLE "hcc-amdgcn-amd-amdhsa-" + +struct __ClangOffloadBundleDesc { + uint64_t offset; + uint64_t size; + uint64_t tripleSize; + const char triple[1]; +}; + +struct __ClangOffloadBundleHeader { + const char magic[sizeof(CLANG_OFFLOAD_BUNDLER_MAGIC_STR) - 1]; + uint64_t numBundles; + __ClangOffloadBundleDesc desc[1]; +}; + +hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, + hipModule_t hmod, const char* name); + +hipError_t ihipCreateGlobalVarObj(const char* name, hipModule_t hmod, amd::Memory** amd_mem_obj, + hipDeviceptr_t* dptr, size_t* bytes); + +static bool isCompatibleCodeObject(const std::string& codeobj_target_id, + const char* device_name) { + // Workaround for device name mismatch. + // Device name may contain feature strings delimited by '+', e.g. + // gfx900+xnack. Currently HIP-Clang does not include feature strings + // in code object target id in fat binary. Therefore drop the feature + // strings from device name before comparing it with code object target id. + std::string short_name(device_name); + auto feature_loc = short_name.find('+'); + if (feature_loc != std::string::npos) { + short_name.erase(feature_loc); + } + return codeobj_target_id == short_name; +} + +// Extracts code objects from fat binary in data for device names given in devices. +// Returns true if code objects are extracted successfully. +bool __hipExtractCodeObjectFromFatBinary(const void* data, + const std::vector& devices, + std::vector>& code_objs) +{ + std::string magic((const char*)data, sizeof(CLANG_OFFLOAD_BUNDLER_MAGIC_STR) - 1); + if (magic.compare(CLANG_OFFLOAD_BUNDLER_MAGIC_STR)) { + return false; + } + + code_objs.resize(devices.size()); + const auto obheader = reinterpret_cast(data); + const auto* desc = &obheader->desc[0]; + unsigned num_code_objs = 0; + for (uint64_t i = 0; i < obheader->numBundles; ++i, + desc = reinterpret_cast( + reinterpret_cast(&desc->triple[0]) + desc->tripleSize)) { + + std::string triple(desc->triple, sizeof(HIP_AMDGCN_AMDHSA_TRIPLE) - 1); + if (triple.compare(HIP_AMDGCN_AMDHSA_TRIPLE)) + continue; + + std::string target(desc->triple + sizeof(HIP_AMDGCN_AMDHSA_TRIPLE), + desc->tripleSize - sizeof(HIP_AMDGCN_AMDHSA_TRIPLE)); + + const void *image = reinterpret_cast( + reinterpret_cast(obheader) + desc->offset); + size_t size = desc->size; + + for (size_t dev = 0; dev < devices.size(); ++dev) { + const char* name = devices[dev]; + + if (!isCompatibleCodeObject(target, name)) { + continue; + } + code_objs[dev] = std::make_pair(image, size); + num_code_objs++; + } + } + if (num_code_objs == devices.size()) + return true; + else + return false; +} + +extern "C" std::vector>* __hipRegisterFatBinary(const void* data) +{ + const __CudaFatBinaryWrapper* fbwrapper = reinterpret_cast(data); + if (fbwrapper->magic != __hipFatMAGIC2 || fbwrapper->version != 1) { + return nullptr; + } + + return PlatformState::instance().addFatBinary(fbwrapper->binary); +} + +void PlatformState::digestFatBinary(const void* data, std::vector>& programs) +{ + if (programs.size() > 0) { + return; + } + + std::vector> code_objs; + std::vector devices; + for (size_t dev = 0; dev < g_devices.size(); ++dev) { + amd::Context* ctx = g_devices[dev]; + devices.push_back(ctx->devices()[0]->info().name_); + } + + if (!__hipExtractCodeObjectFromFatBinary((char*)data, devices, code_objs)) { + return; + } + + programs.resize(g_devices.size()); + + for (size_t dev = 0; dev < g_devices.size(); ++dev) { + amd::Context* ctx = g_devices[dev]; + amd::Program* program = new amd::Program(*ctx); + if (program == nullptr) { + return; + } + if (CL_SUCCESS == program->addDeviceProgram(*ctx->devices()[0], code_objs[dev].first, code_objs[dev].second)) { + programs.at(dev) = std::make_pair(reinterpret_cast(as_cl(program)) , false); + } + } +} + +void PlatformState::init() +{ + amd::ScopedLock lock(lock_); + + if(initialized_ || g_devices.empty()) { + return; + } + initialized_ = true; + + for (auto& it : modules_) { + digestFatBinary(it.first, it.second); + } + for (auto& it : functions_) { + it.second.functions.resize(g_devices.size()); + } + for (auto& it : vars_) { + it.second.rvars.resize(g_devices.size()); + } +} + +std::vector< std::pair >* PlatformState::unregisterVar(hipModule_t hmod) { + amd::ScopedLock lock(lock_); + std::vector< std::pair >* rmodules = nullptr; + auto it = vars_.begin(); + while (it != vars_.end()) { + DeviceVar& dvar = it->second; + if ((*dvar.modules)[0].first == hmod) { + rmodules = dvar.modules; + if (dvar.dyn_undef) { + texture* tex_hptr + = reinterpret_cast *>(dvar.shadowVptr); + delete tex_hptr; + } + vars_.erase(it++); + } else { + ++it; + } + } + return rmodules; +} + +PlatformState::DeviceVar* PlatformState::findVar(std::string hostVar, int deviceId, hipModule_t hmod) { + DeviceVar* dvar = nullptr; + if (hmod != nullptr) { + // If module is provided, then get the var only from that module + auto var_range = vars_.equal_range(hostVar); + for (auto it = var_range.first; it != var_range.second; ++it) { + if ((*it->second.modules)[deviceId].first == hmod) { + dvar = &(it->second); + break; + } + } + } else { + // If var count is < 2, return the var + if (vars_.count(hostVar) < 2) { + auto it = vars_.find(hostVar); + dvar = ((it == vars_.end()) ? nullptr : &(it->second)); + } else { + // If var count is > 2, return the original var, + // if original var count != 1, return vars_.end()/Invalid + size_t orig_global_count = 0; + auto var_range = vars_.equal_range(hostVar); + for (auto it = var_range.first; it != var_range.second; ++it) { + // when dyn_undef is set, it is a shadow var + if (it->second.dyn_undef == false) { + ++orig_global_count; + dvar = &(it->second); + } + } + dvar = ((orig_global_count == 1) ? dvar : nullptr); + } + } + + return dvar; +} + +void PlatformState::registerVar(const void* hostvar, + const DeviceVar& rvar) { + amd::ScopedLock lock(lock_); + vars_.insert(std::make_pair(std::string(reinterpret_cast(hostvar)), rvar)); +} + +void PlatformState::registerFunction(const void* hostFunction, + const DeviceFunction& func) { + amd::ScopedLock lock(lock_); + functions_.insert(std::make_pair(hostFunction, func)); +} + +bool ihipGetFuncAttributes(const char* func_name, amd::Program* program, hipFuncAttributes* func_attr) { + device::Program* dev_program + = program->getDeviceProgram(*hip::getCurrentContext()->devices()[0]); + + const auto it = dev_program->kernels().find(std::string(func_name)); + if (it == dev_program->kernels().cend()) { + return false; + } + + const device::Kernel::WorkGroupInfo* wginfo = it->second->workGroupInfo(); + func_attr->localSizeBytes = wginfo->localMemSize_; + func_attr->sharedSizeBytes = wginfo->size_; + func_attr->maxThreadsPerBlock = wginfo->wavefrontSize_; + func_attr->numRegs = wginfo->usedVGPRs_; + + return true; +} + +bool PlatformState::getShadowVarInfo(std::string var_name, hipModule_t hmod, + void** var_addr, size_t* var_size) { + DeviceVar* dvar = findVar(var_name, ihipGetDevice(), hmod); + if (dvar != nullptr) { + *var_addr = dvar->shadowVptr; + *var_size = dvar->size; + return true; + } else { + return false; + } +} + +bool CL_CALLBACK getSvarInfo(cl_program program, std::string var_name, void** var_addr, + size_t* var_size) { + return PlatformState::instance().getShadowVarInfo(var_name, reinterpret_cast(program), + var_addr, var_size); +} + +hipFunction_t PlatformState::getFunc(const void* hostFunction, int deviceId) { + amd::ScopedLock lock(lock_); + const auto it = functions_.find(hostFunction); + if (it != functions_.cend()) { + PlatformState::DeviceFunction& devFunc = it->second; + if (devFunc.functions[deviceId] == 0) { + hipModule_t module = (*devFunc.modules)[deviceId].first; + if (!(*devFunc.modules)[deviceId].second) { + amd::Program* program = as_amd(reinterpret_cast(module)); + program->setVarInfoCallBack(&getSvarInfo); + if (CL_SUCCESS != program->build(g_devices[deviceId]->devices(), nullptr, nullptr, nullptr)) { + return nullptr; + } + (*devFunc.modules)[deviceId].second = true; + } + hipFunction_t function = nullptr; + if (hipSuccess == hipModuleGetFunction(&function, module, devFunc.deviceName.c_str()) && + function != nullptr) { + devFunc.functions[deviceId] = function; + } + else { + // tprintf(DB_FB, "__hipRegisterFunction cannot find kernel %s for" + // " device %d\n", deviceName, deviceId); + } + } + return devFunc.functions[deviceId]; + } + return nullptr; +} + +bool PlatformState::getFuncAttr(const void* hostFunction, + hipFuncAttributes* func_attr) { + if (func_attr == nullptr) { + return false; + } + + const auto it = functions_.find(hostFunction); + if (it == functions_.cend()) { + return false; + } + + PlatformState::DeviceFunction& devFunc = it->second; + int deviceId = ihipGetDevice(); + + /* If module has not been initialized yet, build the kernel now*/ + if (!(*devFunc.modules)[deviceId].second) { + if (nullptr == PlatformState::instance().getFunc(hostFunction, deviceId)) { + return false; + } + } + + amd::Program* program = as_amd(reinterpret_cast((*devFunc.modules)[deviceId].first)); + if (!ihipGetFuncAttributes(devFunc.deviceName.c_str(), program, func_attr)) { + return false; + } + return true; +} + +bool PlatformState::getTexRef(const char* hostVar, textureReference** texRef) { + amd::ScopedLock lock(lock_); + DeviceVar* dvar = findVar(std::string(hostVar), ihipGetDevice(), nullptr); + if (dvar == nullptr) { + return false; + } + + if (!dvar->dyn_undef) { + return false; + } + + *texRef = reinterpret_cast(dvar->shadowVptr); + return true; +} + +bool PlatformState::getGlobalVar(const void* hostVar, int deviceId, hipModule_t hmod, + hipDeviceptr_t* dev_ptr, size_t* size_ptr) { + amd::ScopedLock lock(lock_); + DeviceVar* dvar = findVar(std::string(reinterpret_cast(hostVar)), deviceId, hmod); + if (dvar != nullptr) { + if (dvar->rvars[deviceId].getdeviceptr() == nullptr) { + size_t sym_size = 0; + hipDeviceptr_t device_ptr = nullptr; + amd::Memory* amd_mem_obj = nullptr; + + if (!(*dvar->modules)[deviceId].second) { + amd::Program* program = as_amd(reinterpret_cast((*dvar->modules)[deviceId].first)); + program->setVarInfoCallBack(&getSvarInfo); + if (CL_SUCCESS != program->build(g_devices[deviceId]->devices(), nullptr, nullptr, nullptr)) { + return false; + } + (*dvar->modules)[deviceId].second = true; + } + if((hipSuccess == ihipCreateGlobalVarObj(dvar->hostVar.c_str(), (*dvar->modules)[deviceId].first, + &amd_mem_obj, &device_ptr, &sym_size)) + && (device_ptr != nullptr)) { + dvar->rvars[deviceId].size_ = sym_size; + dvar->rvars[deviceId].devicePtr_ = device_ptr; + dvar->rvars[deviceId].amd_mem_obj_ = amd_mem_obj; + amd::MemObjMap::AddMemObj(device_ptr, amd_mem_obj); + } else { + LogError("[HIP] __hipRegisterVar cannot find kernel for device \n"); + } + } + *size_ptr = dvar->rvars[deviceId].getvarsize(); + *dev_ptr = dvar->rvars[deviceId].getdeviceptr(); + return true; + } else { + return false; + } +} + +void PlatformState::setupArgument(const void *arg, size_t size, size_t offset) { + auto& arguments = execStack_.top().arguments_; + + if (arguments.size() < offset + size) { + arguments.resize(offset + size); + } + + ::memcpy(&arguments[offset], arg, size); +} + +void PlatformState::configureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem, + hipStream_t stream) { + execStack_.push(ihipExec_t{gridDim, blockDim, sharedMem, stream}); +} + +void PlatformState::popExec(ihipExec_t& exec) { + exec = std::move(execStack_.top()); + execStack_.pop(); +} + +extern "C" void __hipRegisterFunction( + std::vector >* modules, + const void* hostFunction, + char* deviceFunction, + const char* deviceName, + unsigned int threadLimit, + uint3* tid, + uint3* bid, + dim3* blockDim, + dim3* gridDim, + int* wSize) +{ + PlatformState::DeviceFunction func{ std::string{deviceName}, modules, std::vector{g_devices.size()}}; + PlatformState::instance().registerFunction(hostFunction, func); +// for (size_t i = 0; i < g_devices.size(); ++i) { +// PlatformState::instance().getFunc(hostFunction, i); +// } +} + +// Registers a device-side global variable. +// For each global variable in device code, there is a corresponding shadow +// global variable in host code. The shadow host variable is used to keep +// track of the value of the device side global variable between kernel +// executions. +extern "C" void __hipRegisterVar( + std::vector >* modules, // The device modules containing code object + char* var, // The shadow variable in host code + char* hostVar, // Variable name in host code + char* deviceVar, // Variable name in device code + int ext, // Whether this variable is external + int size, // Size of the variable + int constant, // Whether this variable is constant + int global) // Unknown, always 0 +{ + PlatformState::DeviceVar dvar{var, std::string{ hostVar }, static_cast(size), modules, + std::vector{g_devices.size()}, false }; + + PlatformState::instance().registerVar(hostVar, dvar); +} + +extern "C" void __hipUnregisterFatBinary(std::vector< std::pair >* modules) +{ + HIP_INIT(); + + std::for_each(modules->begin(), modules->end(), [](std::pair module){ + if (module.first != nullptr) { + as_amd(reinterpret_cast(module.first))->release(); + } + }); + if (modules->size() > 0) { + PlatformState::instance().unregisterVar((*modules)[0].first); + } + PlatformState::instance().removeFatBinary(modules); +} + +extern "C" hipError_t hipConfigureCall( + dim3 gridDim, + dim3 blockDim, + size_t sharedMem, + hipStream_t stream) +{ + HIP_INIT_API(NONE, gridDim, blockDim, sharedMem, stream); + + PlatformState::instance().configureCall(gridDim, blockDim, sharedMem, stream); + + HIP_RETURN(hipSuccess); +} + +extern "C" hipError_t __hipPushCallConfiguration( + dim3 gridDim, + dim3 blockDim, + size_t sharedMem, + hipStream_t stream) +{ + HIP_INIT_API(NONE, gridDim, blockDim, sharedMem, stream); + + PlatformState::instance().configureCall(gridDim, blockDim, sharedMem, stream); + + HIP_RETURN(hipSuccess); +} + +extern "C" hipError_t __hipPopCallConfiguration(dim3 *gridDim, + dim3 *blockDim, + size_t *sharedMem, + hipStream_t *stream) { + HIP_INIT_API(NONE, gridDim, blockDim, sharedMem, stream); + + ihipExec_t exec; + PlatformState::instance().popExec(exec); + *gridDim = exec.gridDim_; + *blockDim = exec.blockDim_; + *sharedMem = exec.sharedMem_; + *stream = exec.hStream_; + + HIP_RETURN(hipSuccess); +} + +extern "C" hipError_t hipSetupArgument( + const void *arg, + size_t size, + size_t offset) +{ + HIP_INIT_API(NONE, arg, size, offset); + + PlatformState::instance().setupArgument(arg, size, offset); + + HIP_RETURN(hipSuccess); +} + +extern "C" hipError_t hipLaunchByPtr(const void *hostFunction) +{ + HIP_INIT_API(NONE, hostFunction); + + int deviceId = ihipGetDevice(); + if (deviceId == -1) { + HIP_RETURN(hipErrorNoDevice); + } + hipFunction_t func = PlatformState::instance().getFunc(hostFunction, deviceId); + if (func == nullptr) { + HIP_RETURN(hipErrorInvalidDeviceFunction); + } + + ihipExec_t exec; + PlatformState::instance().popExec(exec); + + size_t size = exec.arguments_.size(); + void *extra[] = { + HIP_LAUNCH_PARAM_BUFFER_POINTER, &exec.arguments_[0], + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, + HIP_LAUNCH_PARAM_END + }; + + HIP_RETURN(hipModuleLaunchKernel(func, + exec.gridDim_.x, exec.gridDim_.y, exec.gridDim_.z, + exec.blockDim_.x, exec.blockDim_.y, exec.blockDim_.z, + exec.sharedMem_, exec.hStream_, nullptr, extra)); +} + +extern "C" hipError_t hipLaunchKernel(const void *hostFunction, + dim3 gridDim, + dim3 blockDim, + void** args, + size_t sharedMemBytes, + hipStream_t stream) +{ + HIP_INIT_API(NONE, hostFunction, gridDim, blockDim, args, sharedMemBytes, + stream); + + int deviceId = ihipGetDevice(); + if (deviceId == -1) { + HIP_RETURN(hipErrorNoDevice); + } + hipFunction_t func = PlatformState::instance().getFunc(hostFunction, deviceId); + if (func == nullptr) { + HIP_RETURN(hipErrorInvalidDeviceFunction); + } + + HIP_RETURN(hipModuleLaunchKernel(func, gridDim.x, gridDim.y, gridDim.z, + blockDim.x, blockDim.y, blockDim.z, + sharedMemBytes, stream, args, nullptr)); +} + + +hipError_t hipGetSymbolAddress(void** devPtr, const void* symbolName) { + size_t size = 0; + if(!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), nullptr, + devPtr, &size)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + HIP_RETURN(hipSuccess); +} + +hipError_t hipGetSymbolSize(size_t* sizePtr, const void* symbolName) { + hipDeviceptr_t devPtr = nullptr; + if (!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), nullptr, + &devPtr, sizePtr)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + HIP_RETURN(hipSuccess); +} + +hipError_t ihipCreateGlobalVarObj(const char* name, hipModule_t hmod, amd::Memory** amd_mem_obj, + hipDeviceptr_t* dptr, size_t* bytes) +{ + HIP_INIT(); + + amd::Program* program = nullptr; + device::Program* dev_program = nullptr; + + /* Get Device Program pointer*/ + program = as_amd(reinterpret_cast(hmod)); + dev_program = program->getDeviceProgram(*hip::getCurrentContext()->devices()[0]); + + if (dev_program == nullptr) { + HIP_RETURN(hipErrorInvalidDeviceFunction); + } + /* Find the global Symbols */ + if(!dev_program->createGlobalVarObj(amd_mem_obj, dptr, bytes, name)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + + HIP_RETURN(hipSuccess); +} + + +namespace hip_impl { + +hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, + hipFunction_t f, + int blockSize, + size_t dynamicSMemSize) +{ + HIP_INIT_API(NONE, f, blockSize, dynamicSMemSize); + int deviceId = ihipGetDevice(); + // FIXME: Function may not be a device function and may have been obtaiend via + // hipModuleGetFunction and thus not in the functions_ map. Check the map + // else interpret as a hip::Function for now. + hipFunction_t func = PlatformState::instance().getFunc(f, deviceId); + if (func == nullptr) { + func = f; + } + hip::Function* function = hip::Function::asFunction(func); + if (function == nullptr) { + HIP_RETURN(hipErrorInvalidDeviceFunction); + } + amd::Kernel* kernel = function->function_; + if (!kernel) { + HIP_RETURN(hipErrorOutOfMemory); + } + if (blockSize == 0) { + HIP_RETURN(hipErrorInvalidValue); + } + amd::Device* device = hip::getCurrentContext()->devices()[0]; + const device::Kernel::WorkGroupInfo* wrkGrpInfo = kernel->getDeviceKernel(*device)->workGroupInfo(); + + // Find threads accupancy per CU => simd_per_cu * GPR usage + constexpr size_t MaxWavesPerSimd = 8; // Limited by SPI 32 per CU, hence 8 per SIMD + size_t VgprWaves = wrkGrpInfo->availableVGPRs_ / amd::alignUp(wrkGrpInfo->usedVGPRs_, 4); + + size_t GprWaves; + if (wrkGrpInfo->usedSGPRs_ > 0) { + const size_t maxSGPRs = (device->info().gfxipVersion_ < 800) ? 512 : 800; + size_t SgprWaves = maxSGPRs / amd::alignUp(wrkGrpInfo->usedSGPRs_, 16); + GprWaves = std::min(VgprWaves, SgprWaves); + } + else { + GprWaves = VgprWaves; + } + + size_t alu_accupancy = device->info().simdPerCU_ * std::min(MaxWavesPerSimd, GprWaves); + alu_accupancy *= wrkGrpInfo->wavefrontSize_; + // Calculate blocks occupancy per CU + *numBlocks = alu_accupancy / amd::alignUp(blockSize, wrkGrpInfo->wavefrontSize_); + + size_t total_used_lds = wrkGrpInfo->usedLDSSize_ + dynamicSMemSize; + if (total_used_lds != 0) { + // Calculate LDS occupancy per CU. lds_per_cu / (static_lsd + dynamic_lds) + int lds_occupancy = static_cast(device->info().localMemSize_ / total_used_lds); + *numBlocks = std::min(*numBlocks, lds_occupancy); + } + + HIP_RETURN(hipSuccess); +} +} + +extern "C" { +// FIXME: Need to replace `uint32_t` with `int` finally. +hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(uint32_t* numBlocks, + hipFunction_t f, + uint32_t blockSize, + size_t dynamicSMemSize) +{ + int NB; + hipError_t Ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(&NB, f, blockSize, dynamicSMemSize); + *numBlocks = NB; + HIP_RETURN(Ret); +} + +// FIXME: Need to replace `uint32_t` with `int` finally. +hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(uint32_t* numBlocks, + hipFunction_t f, + uint32_t blockSize, + size_t dynamicSMemSize, + unsigned int flags) +{ + int NB; + hipError_t Ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(&NB, f, blockSize, dynamicSMemSize); + *numBlocks = NB; + HIP_RETURN(Ret); +} +} + + +#if defined(ATI_OS_LINUX) + +namespace hip_impl { + +struct dl_phdr_info { + ELFIO::Elf64_Addr dlpi_addr; + const char *dlpi_name; + const ELFIO::Elf64_Phdr *dlpi_phdr; + ELFIO::Elf64_Half dlpi_phnum; +}; + +extern "C" int dl_iterate_phdr( + int (*callback) (struct dl_phdr_info *info, size_t size, void *data), void *data +); + +struct Symbol { + std::string name; + ELFIO::Elf64_Addr value = 0; + ELFIO::Elf_Xword size = 0; + ELFIO::Elf_Half sect_idx = 0; + uint8_t bind = 0; + uint8_t type = 0; + uint8_t other = 0; +}; + +inline Symbol read_symbol(const ELFIO::symbol_section_accessor& section, unsigned int idx) { + assert(idx < section.get_symbols_num()); + + Symbol r; + section.get_symbol(idx, r.name, r.value, r.size, r.bind, r.type, r.sect_idx, r.other); + + return r; +} + +template +inline ELFIO::section* find_section_if(ELFIO::elfio& reader, P p) { + const auto it = find_if(reader.sections.begin(), reader.sections.end(), std::move(p)); + + return it != reader.sections.end() ? *it : nullptr; +} + +std::vector> function_names_for(const ELFIO::elfio& reader, + ELFIO::section* symtab) { + std::vector> r; + ELFIO::symbol_section_accessor symbols{reader, symtab}; + + for (auto i = 0u; i != symbols.get_symbols_num(); ++i) { + auto tmp = read_symbol(symbols, i); + + if (tmp.type == STT_FUNC && tmp.sect_idx != SHN_UNDEF && !tmp.name.empty()) { + r.emplace_back(tmp.value, tmp.name); + } + } + + return r; +} + +const std::vector>& function_names_for_process() { + static constexpr const char self[] = "/proc/self/exe"; + + static std::vector> r; + static std::once_flag f; + + std::call_once(f, []() { + ELFIO::elfio reader; + + if (reader.load(self)) { + const auto it = find_section_if( + reader, [](const ELFIO::section* x) { return x->get_type() == SHT_SYMTAB; }); + + if (it) r = function_names_for(reader, it); + } + }); + + return r; +} + + +const std::unordered_map& function_names() +{ + static std::unordered_map r{ + function_names_for_process().cbegin(), + function_names_for_process().cend()}; + static std::once_flag f; + + std::call_once(f, []() { + dl_iterate_phdr([](dl_phdr_info* info, size_t, void*) { + ELFIO::elfio reader; + + if (reader.load(info->dlpi_name)) { + const auto it = find_section_if( + reader, [](const ELFIO::section* x) { return x->get_type() == SHT_SYMTAB; }); + + if (it) { + auto n = function_names_for(reader, it); + + for (auto&& f : n) f.first += info->dlpi_addr; + + r.insert(make_move_iterator(n.begin()), make_move_iterator(n.end())); + } + } + return 0; + }, + nullptr); + }); + + return r; +} + +std::vector bundles_for_process() { + static constexpr const char self[] = "/proc/self/exe"; + static constexpr const char kernel_section[] = ".kernel"; + std::vector r; + + ELFIO::elfio reader; + + if (reader.load(self)) { + auto it = find_section_if( + reader, [](const ELFIO::section* x) { return x->get_name() == kernel_section; }); + + if (it) r.insert(r.end(), it->get_data(), it->get_data() + it->get_size()); + } + + return r; +} + +const std::vector& modules() { + static std::vector r; + static std::once_flag f; + + std::call_once(f, []() { + static std::vector> bundles{bundles_for_process()}; + + dl_iterate_phdr( + [](dl_phdr_info* info, std::size_t, void*) { + ELFIO::elfio tmp; + if (tmp.load(info->dlpi_name)) { + const auto it = find_section_if( + tmp, [](const ELFIO::section* x) { return x->get_name() == ".kernel"; }); + + if (it) bundles.emplace_back(it->get_data(), it->get_data() + it->get_size()); + } + return 0; + }, + nullptr); + + for (auto&& bundle : bundles) { + if (bundle.empty()) { + continue; + } + std::string magic(&bundle[0], sizeof(CLANG_OFFLOAD_BUNDLER_MAGIC_STR) - 1); + if (magic.compare(CLANG_OFFLOAD_BUNDLER_MAGIC_STR)) + continue; + + const auto obheader = reinterpret_cast(&bundle[0]); + const auto* desc = &obheader->desc[0]; + for (uint64_t i = 0; i < obheader->numBundles; ++i, + desc = reinterpret_cast( + reinterpret_cast(&desc->triple[0]) + desc->tripleSize)) { + + std::string triple(desc->triple, sizeof(HCC_AMDGCN_AMDHSA_TRIPLE) - 1); + if (triple.compare(HCC_AMDGCN_AMDHSA_TRIPLE)) + continue; + + std::string target(desc->triple + sizeof(HCC_AMDGCN_AMDHSA_TRIPLE), + desc->tripleSize - sizeof(HCC_AMDGCN_AMDHSA_TRIPLE)); + + if (isCompatibleCodeObject(target, hip::getCurrentContext()->devices()[0]->info().name_)) { + hipModule_t module; + if (hipSuccess == hipModuleLoadData(&module, reinterpret_cast( + reinterpret_cast(obheader) + desc->offset))) + r.push_back(module); + break; + } + } + } + }); + + return r; +} + +const std::unordered_map& functions() +{ + static std::unordered_map r; + static std::once_flag f; + + std::call_once(f, []() { + for (auto&& function : function_names()) { + for (auto&& module : modules()) { + hipFunction_t f; + if (hipSuccess == hipModuleGetFunction(&f, module, function.second.c_str())) { + r[function.first] = f; + } + } + } + }); + + return r; +} + + +void hipLaunchKernelGGLImpl( + uintptr_t function_address, + const dim3& numBlocks, + const dim3& dimBlocks, + uint32_t sharedMemBytes, + hipStream_t stream, + void** kernarg) +{ + HIP_INIT(); + + const auto it = functions().find(function_address); + if (it == functions().cend()) + assert(0); + + hipModuleLaunchKernel(it->second, + numBlocks.x, numBlocks.y, numBlocks.z, + dimBlocks.x, dimBlocks.y, dimBlocks.z, + sharedMemBytes, stream, nullptr, kernarg); +} + +void hipLaunchCooperativeKernelGGLImpl( + uintptr_t function_address, + const dim3& numBlocks, + const dim3& dimBlocks, + uint32_t sharedMemBytes, + hipStream_t stream, + void** kernarg) +{ + HIP_INIT(); + + hipLaunchCooperativeKernel(reinterpret_cast(function_address), + numBlocks, dimBlocks, kernarg, sharedMemBytes, stream); +} + +} + +#endif // defined(ATI_OS_LINUX) + +// conversion routines between float and half precision +static inline std::uint32_t f32_as_u32(float f) { union { float f; std::uint32_t u; } v; v.f = f; return v.u; } +static inline float u32_as_f32(std::uint32_t u) { union { float f; std::uint32_t u; } v; v.u = u; return v.f; } +static inline int clamp_int(int i, int l, int h) { return std::min(std::max(i, l), h); } + +// half float, the f16 is in the low 16 bits of the input argument +static inline float __convert_half_to_float(std::uint32_t a) noexcept { + std::uint32_t u = ((a << 13) + 0x70000000U) & 0x8fffe000U; + std::uint32_t v = f32_as_u32(u32_as_f32(u) * u32_as_f32(0x77800000U)/*0x1.0p+112f*/) + 0x38000000U; + u = (a & 0x7fff) != 0 ? v : u; + return u32_as_f32(u) * u32_as_f32(0x07800000U)/*0x1.0p-112f*/; +} + +// float half with nearest even rounding +// The lower 16 bits of the result is the bit pattern for the f16 +static inline std::uint32_t __convert_float_to_half(float a) noexcept { + std::uint32_t u = f32_as_u32(a); + int e = static_cast((u >> 23) & 0xff) - 127 + 15; + std::uint32_t m = ((u >> 11) & 0xffe) | ((u & 0xfff) != 0); + std::uint32_t i = 0x7c00 | (m != 0 ? 0x0200 : 0); + std::uint32_t n = ((std::uint32_t)e << 12) | m; + std::uint32_t s = (u >> 16) & 0x8000; + int b = clamp_int(1-e, 0, 13); + std::uint32_t d = (0x1000 | m) >> b; + d |= (d << b) != (0x1000 | m); + std::uint32_t v = e < 1 ? d : n; + v = (v >> 2) + (((v & 0x7) == 3) | ((v & 0x7) > 5)); + v = e > 30 ? 0x7c00 : v; + v = e == 143 ? i : v; + return s | v; +} + +extern "C" float __gnu_h2f_ieee(unsigned short h){ + return __convert_half_to_float((std::uint32_t) h); +} + +extern "C" unsigned short __gnu_f2h_ieee(float f){ + return (unsigned short)__convert_float_to_half(f); +} diff --git a/vdi/hip_prof_api.h b/vdi/hip_prof_api.h new file mode 100644 index 0000000000..b05e50608d --- /dev/null +++ b/vdi/hip_prof_api.h @@ -0,0 +1,252 @@ +/* +Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_SRC_HIP_PROF_API_H +#define HIP_SRC_HIP_PROF_API_H + +#include +#include +#include + +#if USE_PROF_API +#include "hip/hcc_detail/hip_prof_str.h" +#include "platform/prof_protocol.h" + +// HIP API callbacks spawner object macro +#define HIP_CB_SPAWNER_OBJECT(CB_ID) \ + api_callbacks_spawner_t __api_tracer; \ + { \ + hip_api_data_t* api_data = __api_tracer.get_api_data_ptr(); \ + if (api_data != NULL) { \ + hip_api_data_t& api_data_ref = *api_data; \ + INIT_CB_ARGS_DATA(CB_ID, api_data_ref); \ + __api_tracer.call(); \ + } \ + } + +static const uint32_t HIP_DOMAIN_ID = ACTIVITY_DOMAIN_HIP_API; +typedef activity_record_t hip_api_record_t; +typedef activity_rtapi_callback_t hip_api_callback_t; +typedef activity_sync_callback_t hip_act_callback_t; + +class api_callbacks_table_t { + public: + typedef std::mutex mutex_t; + + typedef hip_api_record_t record_t; + typedef hip_api_callback_t fun_t; + typedef hip_act_callback_t act_t; + + // HIP API callbacks table + struct hip_cb_table_entry_t { + volatile std::atomic sync; + volatile std::atomic sem; + act_t act; + void* a_arg; + fun_t fun; + void* arg; + }; + + struct hip_cb_table_t { + hip_cb_table_entry_t arr[HIP_API_ID_NUMBER]; + }; + + api_callbacks_table_t() { + memset(&callbacks_table_, 0, sizeof(callbacks_table_)); + } + + bool set_activity(uint32_t id, act_t fun, void* arg) { + std::lock_guard lock(mutex_); + bool ret = true; + + if (id < HIP_API_ID_NUMBER) { + cb_sync(id); + callbacks_table_.arr[id].act = fun; + callbacks_table_.arr[id].a_arg = arg; + enabled_ = true; + cb_release(id); + } else { + ret = false; + } + + return ret; + } + + bool set_callback(uint32_t id, fun_t fun, void* arg) { + std::lock_guard lock(mutex_); + bool ret = true; + + if (id < HIP_API_ID_NUMBER) { + cb_sync(id); + callbacks_table_.arr[id].fun = fun; + callbacks_table_.arr[id].arg = arg; + enabled_ = true; + cb_release(id); + } else { + ret = false; + } + + return ret; + } + + void set_enabled(const bool& enabled) { + enabled_ = enabled; + } + + inline hip_cb_table_entry_t& entry(const uint32_t& id) { + return callbacks_table_.arr[id]; + } + + inline void sem_sync(const uint32_t& id) { + sem_increment(id); + if (entry(id).sync.load() == true) sync_wait(id); + } + + inline void sem_release(const uint32_t& id) { + sem_decrement(id); + } + + inline bool is_enabled() const { + return enabled_; + } + + private: + inline void cb_sync(const uint32_t& id) { + entry(id).sync.store(true); + while (entry(id).sem.load() != 0) {} + } + + inline void cb_release(const uint32_t& id) { + entry(id).sync.store(false); + } + + inline void sem_increment(const uint32_t& id) { + const uint32_t prev = entry(id).sem.fetch_add(1); + if (prev == UINT32_MAX) { + std::cerr << "sem overflow id = " << id << std::endl << std::flush; + abort(); + } + } + + inline void sem_decrement(const uint32_t& id) { + const uint32_t prev = entry(id).sem.fetch_sub(1); + if (prev == 0) { + std::cerr << "sem corrupted id = " << id << std::endl << std::flush; + abort(); + } + } + + void sync_wait(const uint32_t& id) { + sem_decrement(id); + while (entry(id).sync.load() == true) {} + sem_increment(id); + } + + mutex_t mutex_; + hip_cb_table_t callbacks_table_; + bool enabled_; +}; + +extern api_callbacks_table_t callbacks_table; + +template +class api_callbacks_spawner_t { + public: + api_callbacks_spawner_t() : + api_data_(NULL) + { + if (!is_enabled()) return; + + if (cid_ >= HIP_API_ID_NUMBER) { + fprintf(stderr, "HIP %s bad id %d\n", __FUNCTION__, cid_); + abort(); + } + callbacks_table.sem_sync(cid_); + + hip_act_callback_t act = entry(cid_).act; + if (act != NULL) api_data_ = (hip_api_data_t*) act(cid_, NULL, NULL, NULL); + } + + void call() { + hip_api_callback_t fun = entry(cid_).fun; + void* arg = entry(cid_).arg; + if (fun != NULL) { + fun(HIP_DOMAIN_ID, cid_, api_data_, arg); + api_data_->phase = ACTIVITY_API_PHASE_EXIT; + } + } + + ~api_callbacks_spawner_t() { + if (!is_enabled()) return; + + if (api_data_ != NULL) { + hip_api_callback_t fun = entry(cid_).fun; + void* arg = entry(cid_).arg; + hip_act_callback_t act = entry(cid_).act; + void* a_arg = entry(cid_).a_arg; + if (fun != NULL) fun(HIP_DOMAIN_ID, cid_, api_data_, arg); + if (act != NULL) act(cid_, NULL, NULL, a_arg); + } + + callbacks_table.sem_release(cid_); + } + + hip_api_data_t* get_api_data_ptr() { + return api_data_; + } + + bool is_enabled() const { + return callbacks_table.is_enabled(); + } + + private: + inline api_callbacks_table_t::hip_cb_table_entry_t& entry(const uint32_t& id) { + return callbacks_table.entry(id); + } + + hip_api_data_t* api_data_; +}; + +template <> +class api_callbacks_spawner_t { + public: + api_callbacks_spawner_t() {} + void call() {} + hip_api_data_t* get_api_data_ptr() { return NULL; } + bool is_enabled() const { return false; } +}; + +#else + +#define HIP_CB_SPAWNER_OBJECT(x) do {} while(0) + +class api_callbacks_table_t { + public: + typedef void* act_t; + typedef void* fun_t; + bool set_activity(uint32_t id, act_t fun, void* arg) { return false; } + bool set_callback(uint32_t id, fun_t fun, void* arg) { return false; } +}; + +#endif + +#endif // HIP_SRC_HIP_PROF_API_H diff --git a/vdi/hip_profile.cpp b/vdi/hip_profile.cpp new file mode 100644 index 0000000000..7ff93445c4 --- /dev/null +++ b/vdi/hip_profile.cpp @@ -0,0 +1,42 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "hip_internal.hpp" + +hipError_t hipProfilerStart() { + HIP_INIT_API(hipProfilerStart); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + + +hipError_t hipProfilerStop() { + HIP_INIT_API(hipProfilerStop); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} \ No newline at end of file diff --git a/vdi/hip_rtc.cpp b/vdi/hip_rtc.cpp new file mode 100644 index 0000000000..093c828975 --- /dev/null +++ b/vdi/hip_rtc.cpp @@ -0,0 +1,377 @@ +/* +Copyright (c) 2018 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include "hiprtc_internal.hpp" +#include +#include "platform/program.hpp" + +extern "C" char * __cxa_demangle(const char *mangled_name, char *output_buffer, + size_t *length, int *status); + + +namespace hiprtc { +thread_local hiprtcResult g_lastRtcError = HIPRTC_SUCCESS; +} + +class ProgramState { + amd::Monitor lock_; +private: + static ProgramState* programState_; + + ProgramState() : lock_("Guards program state") {} + ~ProgramState() {} +public: + std::unordered_map, std::vector>> progHeaders_; + + std::map> nameExpresssion_; + + static ProgramState& instance(); + void createProgramHeaders(amd::Program* program, int numHeaders, + const char** headers, const char** headerNames); + void getProgramHeaders(amd::Program* program, int* numHeaders, char** headers, char ** headerNames); + uint32_t addNameExpression(const char* name_expression); + char* getLoweredName(const char* name_expression); +}; + +ProgramState* ProgramState::programState_ = nullptr; + +ProgramState& ProgramState::instance() { + if (programState_ == nullptr) { + programState_ = new ProgramState; + } + return *programState_; +} + +void ProgramState::createProgramHeaders(amd::Program* program, int numHeaders, + const char** headers, const char** headerNames) { + amd::ScopedLock lock(lock_); + std::vector vHeaderNames; + std::vector vHeaders; + for (auto i = 0; i != numHeaders; ++i) { + vHeaders.emplace_back(headers[i]); + vHeaderNames.emplace_back(headerNames[i]); + progHeaders_[program] = std::make_pair(std::move(vHeaders), std::move(vHeaderNames)); + } +} + +void ProgramState::getProgramHeaders(amd::Program* program, int* numHeaders, + char** headers, char ** headerNames) { + amd::ScopedLock lock(lock_); + + const auto it = progHeaders_.find(program); + if (it != progHeaders_.cend()) { + *numHeaders = it->second.first.size(); + *headers = reinterpret_cast(it->second.first.data()); + *headerNames = reinterpret_cast(it->second.second.data()); + } +} + +uint32_t ProgramState::addNameExpression(const char* name_expression) { + amd::ScopedLock lock(lock_); + + // Strip clean of any '(' or ')' or '&' + std::string strippedName(name_expression); + if (strippedName.back() == ')') { + strippedName.pop_back(); + strippedName.erase(0, strippedName.find('(')); + } + if (strippedName.front() == '&') { + strippedName.erase(0, 1); + } + auto it = nameExpresssion_.find(name_expression); + if (it == nameExpresssion_.end()) { + nameExpresssion_.insert(std::pair> + (name_expression, std::make_pair(strippedName,""))); + } + return nameExpresssion_.size(); +} + +namespace hip_impl { + +char* demangle(const char* loweredName) { +#ifdef ATI_OS_LINUX + if (!loweredName) { + return nullptr; + } + + int status = 0; + char* demangledName = __cxa_demangle(loweredName, nullptr, nullptr, &status); + if (status != 0) { + return nullptr; + } + + return demangledName; +#else + return nullptr; +#endif +} +} // hip_impl + +static std::string handleMangledName(std::string name) { + std::string loweredName; + char* demangled = hip_impl::demangle(name.c_str()); + loweredName.assign(demangled == nullptr ? std::string() : demangled); + free(demangled); + + if (loweredName.empty()) { + return name; + } + + if (loweredName.find(".kd") != std::string::npos) { + return {}; + } + + if (loweredName.find("void ") == 0) { + loweredName.erase(0, strlen("void ")); + } + + auto dx{loweredName.find_first_of("(<")}; + + if (dx == std::string::npos) { + return loweredName; + } + + if (loweredName[dx] == '<') { + uint32_t count = 1; + do { + ++dx; + count += (loweredName[dx] == '<') ? 1 : ((loweredName[dx] == '>') ? -1 : 0); + } while (count); + + loweredName.erase(++dx); + } else { + loweredName.erase(dx); + } + + return loweredName; +} + + +const char* hiprtcGetErrorString(hiprtcResult x) { + switch (x) { + case HIPRTC_SUCCESS: + return "HIPRTC_SUCCESS"; + case HIPRTC_ERROR_OUT_OF_MEMORY: + return "HIPRTC_ERROR_OUT_OF_MEMORY"; + case HIPRTC_ERROR_PROGRAM_CREATION_FAILURE: + return "HIPRTC_ERROR_PROGRAM_CREATION_FAILURE"; + case HIPRTC_ERROR_INVALID_INPUT: + return "HIPRTC_ERROR_INVALID_INPUT"; + case HIPRTC_ERROR_INVALID_PROGRAM: + return "HIPRTC_ERROR_INVALID_PROGRAM"; + case HIPRTC_ERROR_INVALID_OPTION: + return "HIPRTC_ERROR_INVALID_OPTION"; + case HIPRTC_ERROR_COMPILATION: + return "HIPRTC_ERROR_COMPILATION"; + case HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE: + return "HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE"; + case HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION: + return "HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION"; + case HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION: + return "HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION"; + case HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID: + return "HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID"; + case HIPRTC_ERROR_INTERNAL_ERROR: + return "HIPRTC_ERROR_INTERNAL_ERROR"; + default: + throw std::logic_error{"Invalid HIPRTC result."}; + }; +} + +hiprtcResult hiprtcCreateProgram(hiprtcProgram* prog, const char* src, const char* name, + int numHeaders, const char** headers, const char** headerNames) { + HIPRTC_INIT_API(prog, src, name, numHeaders, headers, headerNames); + + if (prog == nullptr) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_PROGRAM); + } + if (numHeaders < 0) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); + } + if (numHeaders && (headers == nullptr || headerNames == nullptr)) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); + } + + amd::Program* program = new amd::Program(*hip::getCurrentContext(), src, amd::Program::HIP); + if (program == NULL) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); + } + + if (CL_SUCCESS != program->addDeviceProgram(*hip::getCurrentContext()->devices()[0])) { + program->release(); + HIPRTC_RETURN(HIPRTC_ERROR_PROGRAM_CREATION_FAILURE); + } + + ProgramState::instance().createProgramHeaders(program, numHeaders, headers, headerNames); + + *prog = reinterpret_cast(as_cl(program)); + + HIPRTC_RETURN(HIPRTC_SUCCESS); +} + + +hiprtcResult hiprtcCompileProgram(hiprtcProgram prog, int numOptions, const char** options) { + + // FIXME[skudchad] Add headers to amd::Program::build and device::Program::build, + // pass the saved from ProgramState to amd::Program::build + HIPRTC_INIT_API(prog, numOptions, options); + + amd::Program* program = as_amd(reinterpret_cast(prog)); + + std::ostringstream ostrstr; + std::vector oarr(&options[0], &options[numOptions]); + std::copy(oarr.begin(), oarr.end(), std::ostream_iterator(ostrstr, " ")); + + std::vector devices{hip::getCurrentContext()->devices()[0]}; + if (CL_SUCCESS != program->build(devices, ostrstr.str().c_str(), nullptr, nullptr)) { + HIPRTC_RETURN(HIPRTC_ERROR_COMPILATION); + } + + HIPRTC_RETURN(HIPRTC_SUCCESS); +} + +hiprtcResult hiprtcAddNameExpression(hiprtcProgram prog, const char* name_expression) { + HIPRTC_INIT_API(prog, name_expression); + + if (name_expression == nullptr) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); + } + amd::Program* program = as_amd(reinterpret_cast(prog)); + + uint32_t id = ProgramState::instance().addNameExpression(name_expression); + + const auto var{"__hiprtc_" + std::to_string(id)}; + const auto code{"\nextern \"C\" constexpr auto " + var + " = " + name_expression + ';'}; + + program->appendToSource(code.c_str()); + + HIPRTC_RETURN(HIPRTC_SUCCESS); +} + +hiprtcResult hiprtcGetLoweredName(hiprtcProgram prog, const char* name_expression, + const char** loweredName) { + HIPRTC_INIT_API(prog, name_expression, loweredName); + + if (name_expression == nullptr || loweredName == nullptr) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); + } + + amd::Program* program = as_amd(reinterpret_cast(prog)); + + device::Program* dev_program + = program->getDeviceProgram(*hip::getCurrentContext()->devices()[0]); + + auto it = ProgramState::instance().nameExpresssion_.find(name_expression); + if (it == ProgramState::instance().nameExpresssion_.end()) { + return HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID; + } + + std::string strippedName = it->second.first; + std::vector mangledNames; + + if (!dev_program->getLoweredNames(&mangledNames)) { + HIPRTC_RETURN(HIPRTC_ERROR_COMPILATION); + } + + for (auto &name : mangledNames) { + std::string demangledName = handleMangledName(name); + if (demangledName == strippedName) { + it->second.second.assign(name); + } + } + + *loweredName = it->second.second.c_str(); + + HIPRTC_RETURN(HIPRTC_SUCCESS); +} + +hiprtcResult hiprtcDestroyProgram(hiprtcProgram* prog) { + HIPRTC_INIT_API(prog); + + if (prog == NULL) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); + } + + // Release program. hiprtcProgram is a double pointer so free *prog + amd::Program* program = as_amd(reinterpret_cast(*prog)); + + program->release(); + + HIPRTC_RETURN(HIPRTC_SUCCESS); +} + +hiprtcResult hiprtcGetCode(hiprtcProgram prog, char* binaryMem) { + HIPRTC_INIT_API(prog, binaryMem); + + + amd::Program* program = as_amd(reinterpret_cast(prog)); + const device::Program::binary_t& binary = + program->getDeviceProgram(*hip::getCurrentContext()->devices()[0])->binary(); + + ::memcpy(binaryMem, binary.first, binary.second); + + HIPRTC_RETURN(HIPRTC_SUCCESS); +} + +hiprtcResult hiprtcGetCodeSize(hiprtcProgram prog, size_t* binarySizeRet) { + + HIPRTC_INIT_API(prog, binarySizeRet); + + amd::Program* program = as_amd(reinterpret_cast(prog)); + + *binarySizeRet = + program->getDeviceProgram(*hip::getCurrentContext()->devices()[0])->binary().second; + + HIPRTC_RETURN(HIPRTC_SUCCESS); +} + +hiprtcResult hiprtcGetProgramLog(hiprtcProgram prog, char* dst) { + + HIPRTC_INIT_API(prog, dst); + amd::Program* program = as_amd(reinterpret_cast(prog)); + const device::Program* devProgram = + program->getDeviceProgram(*hip::getCurrentContext()->devices()[0]); + + auto log = program->programLog() + devProgram->buildLog().c_str(); + + log.copy(dst, log.size()); + dst[log.size()] = '\0'; + + HIPRTC_RETURN(HIPRTC_SUCCESS); +} + +hiprtcResult hiprtcGetProgramLogSize(hiprtcProgram prog, size_t* logSizeRet) { + + HIPRTC_INIT_API(prog, logSizeRet); + + amd::Program* program = as_amd(reinterpret_cast(prog)); + const device::Program* devProgram = + program->getDeviceProgram(*hip::getCurrentContext()->devices()[0]); + + auto log = program->programLog() + devProgram->buildLog().c_str(); + + *logSizeRet = log.size() + 1; + + HIPRTC_RETURN(HIPRTC_SUCCESS); +} diff --git a/vdi/hip_stream.cpp b/vdi/hip_stream.cpp new file mode 100644 index 0000000000..d01a863980 --- /dev/null +++ b/vdi/hip_stream.cpp @@ -0,0 +1,263 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include "hip_internal.hpp" +#include "hip_event.hpp" +#include "thread/monitor.hpp" + +static amd::Monitor streamSetLock("Guards global stream set"); +static std::unordered_set streamSet; + +// Internal structure for stream callback handler +class StreamCallback { + public: + StreamCallback(hipStream_t stream, hipStreamCallback_t callback, void* userData, + amd::Command* command) + : stream_(stream), callBack_(callback), + userData_(userData), command_(command) { + }; + hipStream_t stream_; + hipStreamCallback_t callBack_; + void* userData_; + amd::Command* command_; +}; + +namespace hip { + +void syncStreams() { + amd::ScopedLock lock(streamSetLock); + + for (const auto& it : streamSet) { + it->finish(); + } +} + +Stream::Stream(amd::Device* dev, amd::Context* ctx, amd::CommandQueue::Priority p, unsigned int f) : + queue(nullptr), device(dev), context(ctx), priority(p), flags(f) {} + +void Stream::create() { + cl_command_queue_properties properties = CL_QUEUE_PROFILING_ENABLE; + queue = new amd::HostQueue(*context, *device, properties, + amd::CommandQueue::RealTimeDisabled, priority); + assert(queue != nullptr); + queue->create(); +} + +amd::HostQueue* Stream::asHostQueue() { + if (queue == nullptr) { + create(); + } + return queue; +} + +void Stream::destroy() { + if (queue != nullptr) { + queue->release(); + queue = nullptr; + } +} + +void Stream::finish() { + if (queue != nullptr) { + queue->finish(); + } +} + +}; + +void CL_CALLBACK ihipStreamCallback(cl_event event, cl_int command_exec_status, void* user_data) { + + hipError_t status = hipSuccess; + StreamCallback* cbo = reinterpret_cast(user_data); + cbo->callBack_(cbo->stream_, status, cbo->userData_); + cbo->command_->release(); + delete cbo; +} + +static hipError_t ihipStreamCreate(hipStream_t *stream, unsigned int flags, amd::CommandQueue::Priority priority) { + amd::Device* device = hip::getCurrentContext()->devices()[0]; + + hip::Stream* hStream = new hip::Stream(device, hip::getCurrentContext(), priority, flags); + + if (hStream == nullptr) { + return hipErrorOutOfMemory; + } + + if (!(flags & hipStreamNonBlocking)) { + hip::syncStreams(); + + { + amd::ScopedLock lock(streamSetLock); + streamSet.insert(hStream); + } + } + + *stream = reinterpret_cast(hStream); + + ClPrint(amd::LOG_INFO, amd::LOG_API, "ihipStreamCreate: %zx", hStream); + + return hipSuccess; +} + +hipError_t hipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags) { + HIP_INIT_API(hipStreamCreateWithFlags, stream, flags); + + HIP_RETURN(ihipStreamCreate(stream, flags, amd::CommandQueue::Priority::Normal)); +} + +hipError_t hipStreamCreate(hipStream_t *stream) { + HIP_INIT_API(hipStreamCreate, stream); + + HIP_RETURN(ihipStreamCreate(stream, hipStreamDefault, amd::CommandQueue::Priority::Normal)); +} + +hipError_t hipStreamCreateWithPriority(hipStream_t* stream, unsigned int flags, int priority) { + HIP_INIT_API(hipStreamCreateWithPriority, stream, flags, priority); + + if (priority > static_cast(amd::CommandQueue::Priority::High)) { + priority = static_cast(amd::CommandQueue::Priority::High); + } else if (priority < static_cast(amd::CommandQueue::Priority::Normal)) { + priority = static_cast(amd::CommandQueue::Priority::Normal); + } + + return HIP_RETURN(ihipStreamCreate(stream, flags, static_cast(priority))); +} + +hipError_t hipDeviceGetStreamPriorityRange(int* leastPriority, int* greatestPriority) { + HIP_INIT_API(hipDeviceGetStreamPriorityRange, leastPriority, greatestPriority); + + if (leastPriority != nullptr) { + *leastPriority = static_cast(amd::CommandQueue::Priority::Normal); + } + if (greatestPriority != nullptr) { + // Only report one kind of priority for now. + *greatestPriority = static_cast(amd::CommandQueue::Priority::Normal); + } + return HIP_RETURN(hipSuccess); +} + +hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags) { + HIP_INIT_API(hipStreamGetFlags, stream, flags); + + hip::Stream* hStream = reinterpret_cast(stream); + + if(flags != nullptr && hStream != nullptr) { + *flags = hStream->flags; + } else { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipStreamSynchronize(hipStream_t stream) { + HIP_INIT_API(hipStreamSynchronize, stream); + + amd::HostQueue* hostQueue = hip::getQueue(stream); + hostQueue->finish(); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipStreamDestroy(hipStream_t stream) { + HIP_INIT_API(hipStreamDestroy, stream); + + if (stream == nullptr) { + HIP_RETURN(hipErrorInvalidHandle); + } + + amd::ScopedLock lock(streamSetLock); + + hip::Stream* hStream = reinterpret_cast(stream); + + hStream->destroy(); + streamSet.erase(hStream); + + delete hStream; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags) { + HIP_INIT_API(hipStreamWaitEvent, stream, event, flags); + + amd::HostQueue* queue; + + if (stream == nullptr) { + queue = hip::getNullStream(); + } else { + queue = reinterpret_cast(stream)->asHostQueue(); + } + + if (event == nullptr) { + HIP_RETURN(hipErrorInvalidHandle); + } + + hip::Event* e = reinterpret_cast(event); + + return HIP_RETURN(e->streamWait(queue, flags)); +} + +hipError_t hipStreamQuery(hipStream_t stream) { + HIP_INIT_API(hipStreamQuery, stream); + + amd::HostQueue* hostQueue; + if (stream == nullptr) { + hostQueue = hip::getNullStream(); + } else { + hostQueue = reinterpret_cast(stream)->asHostQueue(); + } + + amd::Command* command = hostQueue->getLastQueuedCommand(false); + if (command == nullptr) { + HIP_RETURN(hipSuccess); + } + + amd::Event& event = command->event(); + if (command->type() != 0) { + event.notifyCmdQueue(); + } + HIP_RETURN((command->status() == CL_COMPLETE) ? hipSuccess : hipErrorNotReady); +} + +hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, void* userData, + unsigned int flags) { + HIP_INIT_API(hipStreamAddCallback, stream, callback, userData, flags); + + amd::HostQueue* hostQueue = reinterpret_cast + (stream)->asHostQueue(); + amd::Command* command = hostQueue->getLastQueuedCommand(true); + amd::Event& event = command->event(); + StreamCallback* cbo = new StreamCallback(stream, callback, userData, command); + + if(!event.setCallback(CL_COMPLETE, ihipStreamCallback, reinterpret_cast(cbo))) { + command->release(); + return hipErrorInvalidHandle; + } + + event.notifyCmdQueue(); + + HIP_RETURN(hipSuccess); +} + + diff --git a/vdi/hip_surface.cpp b/vdi/hip_surface.cpp new file mode 100644 index 0000000000..2a7b58d191 --- /dev/null +++ b/vdi/hip_surface.cpp @@ -0,0 +1,96 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "hip_internal.hpp" +#include + +namespace hip { + +static amd::Monitor surfaceLock("Guards surface objects"); + +struct hipSurface { + hipSurface(const hipResourceDesc* pResDesc): array(nullptr) + { + memcpy(&resDesc, pResDesc, sizeof(hipResourceDesc)); + } + + hipArray* array; + hipResourceDesc resDesc; +}; + +static std::unordered_map surfaceHash; + +}; + +using namespace hip; + +hipError_t hipCreateSurfaceObject(hipSurfaceObject_t* pSurfObject, + const hipResourceDesc* pResDesc) { + HIP_INIT_API(NONE, pSurfObject, pResDesc); + + hipSurface* pSurface = new hipSurface(pResDesc); + assert(pSurface != nullptr); + + switch (pResDesc->resType) { + case hipResourceTypeArray: + pSurface->array = pResDesc->res.array.array; + break; + default: + break; + } + hipSurfaceObject_t surfObj; + hipError_t err = hipMalloc(reinterpret_cast(&surfObj), sizeof(hipArray)); + if (err != hipSuccess) { + delete pSurface; + HIP_RETURN(hipErrorOutOfMemory); + } + err = hipMemcpy(reinterpret_cast(surfObj), reinterpret_cast(pResDesc->res.array.array), sizeof(hipArray), + hipMemcpyHostToDevice); + if (err != hipSuccess) { + delete pSurface; + hipFree(reinterpret_cast(surfObj)); + HIP_RETURN(err); + } + *pSurfObject = surfObj; + + amd::ScopedLock lock(surfaceLock); + surfaceHash[*pSurfObject] = pSurface; + + HIP_RETURN(hipSuccess); +} + + +hipError_t hipDestroySurfaceObject(hipSurfaceObject_t surfaceObject) { + HIP_INIT_API(NONE, surfaceObject); + + amd::ScopedLock lock(surfaceLock); + hipSurface* pSurface = surfaceHash[surfaceObject]; + if (pSurface != nullptr) { + delete pSurface; + surfaceHash.erase(surfaceObject); + HIP_RETURN(hipFree(reinterpret_cast(surfaceObject))); + } + + HIP_RETURN(hipErrorInvalidValue); +} diff --git a/vdi/hip_texture.cpp b/vdi/hip_texture.cpp new file mode 100644 index 0000000000..0d7683a2d2 --- /dev/null +++ b/vdi/hip_texture.cpp @@ -0,0 +1,808 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include "hip_internal.hpp" +#include "platform/sampler.hpp" + +namespace hip { + struct TextureObject { + uint32_t imageSRD[HIP_IMAGE_OBJECT_SIZE_DWORD]; + uint32_t samplerSRD[HIP_SAMPLER_OBJECT_SIZE_DWORD]; + amd::Image* image; + amd::Sampler* sampler; + hipResourceDesc resDesc; + }; +}; + +void getDrvChannelOrderAndType(const enum hipArray_Format Format, unsigned int NumChannels, + cl_channel_order* channelOrder, + cl_channel_type* channelType) { + switch (Format) { + case HIP_AD_FORMAT_UNSIGNED_INT8: + *channelType = CL_UNSIGNED_INT8; + break; + case HIP_AD_FORMAT_UNSIGNED_INT16: + *channelType = CL_UNSIGNED_INT16; + break; + case HIP_AD_FORMAT_UNSIGNED_INT32: + *channelType = CL_UNSIGNED_INT32; + break; + case HIP_AD_FORMAT_SIGNED_INT8: + *channelType = CL_SIGNED_INT8; + break; + case HIP_AD_FORMAT_SIGNED_INT16: + *channelType = CL_SIGNED_INT16; + break; + case HIP_AD_FORMAT_SIGNED_INT32: + *channelType = CL_SIGNED_INT32; + break; + case HIP_AD_FORMAT_HALF: + *channelType = CL_HALF_FLOAT; + break; + case HIP_AD_FORMAT_FLOAT: + *channelType = CL_FLOAT; + break; + default: + break; + } + + if (NumChannels == 4) { + *channelOrder = CL_RGBA; + } else if (NumChannels == 2) { + *channelOrder = CL_RG; + } else if (NumChannels == 1) { + *channelOrder = CL_R; + } +} + +void setDescFromChannelType(cl_channel_type channelType, hipChannelFormatDesc* desc) { + + memset(desc, 0x00, sizeof(hipChannelFormatDesc)); + + switch (channelType) { + case CL_SIGNED_INT8: + case CL_SIGNED_INT16: + case CL_SIGNED_INT32: + desc->f = hipChannelFormatKindSigned; + break; + case CL_UNSIGNED_INT8: + case CL_UNSIGNED_INT16: + case CL_UNSIGNED_INT32: + desc->f = hipChannelFormatKindUnsigned; + break; + case CL_HALF_FLOAT: + case CL_FLOAT: + desc->f = hipChannelFormatKindFloat; + break; + default: + desc->f = hipChannelFormatKindNone; + break; + } + + switch (channelType) { + case CL_SIGNED_INT8: + case CL_UNSIGNED_INT8: + desc->x = 8; + break; + case CL_SIGNED_INT16: + case CL_UNSIGNED_INT16: + case CL_HALF_FLOAT: + desc->x = 16; + break; + case CL_SIGNED_INT32: + case CL_UNSIGNED_INT32: + case CL_FLOAT: + desc->x = 32; + break; + default: + desc->x = 0; + break; + } +} + +void getChannelOrderAndType(const hipChannelFormatDesc& desc, enum hipTextureReadMode readMode, + cl_channel_order* channelOrder, cl_channel_type* channelType) { + if (desc.x != 0 && desc.y != 0 && desc.z != 0 && desc.w != 0) { + *channelOrder = CL_RGBA; + } else if (desc.x != 0 && desc.y != 0 && desc.z != 0 && desc.w == 0) { + *channelOrder = CL_RGB; + } else if (desc.x != 0 && desc.y != 0 && desc.z == 0 && desc.w == 0) { + *channelOrder = CL_RG; + } else if (desc.x != 0 && desc.y == 0 && desc.z == 0 && desc.w == 0) { + *channelOrder = CL_R; + } else { + } + + switch (desc.f) { + case hipChannelFormatKindUnsigned: + switch (desc.x) { + case 32: + *channelType = CL_UNSIGNED_INT32; + break; + case 16: + *channelType = readMode == hipReadModeNormalizedFloat + ? CL_UNORM_INT16 + : CL_UNSIGNED_INT16; + break; + case 8: + *channelType = readMode == hipReadModeNormalizedFloat + ? CL_UNORM_INT8 + : CL_UNSIGNED_INT8; + break; + default: + *channelType = CL_UNSIGNED_INT32; + } + break; + case hipChannelFormatKindSigned: + switch (desc.x) { + case 32: + *channelType = CL_SIGNED_INT32; + break; + case 16: + *channelType = readMode == hipReadModeNormalizedFloat + ? CL_SNORM_INT16 + : CL_SIGNED_INT16; + break; + case 8: + *channelType = readMode == hipReadModeNormalizedFloat + ? CL_SNORM_INT8 + : CL_SIGNED_INT8; + break; + default: + *channelType = CL_SIGNED_INT32; + } + break; + case hipChannelFormatKindFloat: + switch (desc.x) { + case 32: + *channelType = CL_FLOAT; + break; + case 16: + *channelType = CL_HALF_FLOAT; + break; + case 8: + break; + default: + *channelType = CL_FLOAT; + } + break; + case hipChannelFormatKindNone: + default: + break; + } +} + +void getByteSizeFromChannelFormatKind(enum hipChannelFormatKind channelFormatKind, size_t* byteSize) { + switch (channelFormatKind) + { + case hipChannelFormatKindSigned: + *byteSize = sizeof(int); + break; + case hipChannelFormatKindUnsigned: + *byteSize = sizeof(unsigned int); + break; + case hipChannelFormatKindFloat: + *byteSize = sizeof(float); + break; + case hipChannelFormatKindNone: + *byteSize = sizeof(size_t); + break; + default: + *byteSize = 1; + break; + } +} + +amd::Sampler* fillSamplerDescriptor(enum hipTextureAddressMode addressMode, + enum hipTextureFilterMode filterMode, int normalizedCoords) { +#ifndef CL_FILTER_NONE +#define CL_FILTER_NONE 0x1142 +#endif + uint32_t filter_mode = CL_FILTER_NONE; + switch (filterMode) { + case hipFilterModePoint: + filter_mode = CL_FILTER_NEAREST; + break; + case hipFilterModeLinear: + filter_mode = CL_FILTER_LINEAR; + break; + } + + uint32_t address_mode = CL_ADDRESS_NONE; + switch (addressMode) { + case hipAddressModeWrap: + address_mode = CL_ADDRESS_REPEAT; + break; + case hipAddressModeClamp: + address_mode = CL_ADDRESS_CLAMP; + break; + case hipAddressModeMirror: + address_mode = CL_ADDRESS_MIRRORED_REPEAT; + break; + case hipAddressModeBorder: + address_mode = CL_ADDRESS_CLAMP_TO_EDGE; + break; + } + amd::Sampler* sampler = new amd::Sampler(*hip::getCurrentContext(), + normalizedCoords == CL_TRUE, + address_mode, filter_mode, CL_FILTER_NONE, 0.f, CL_MAXFLOAT); + if (sampler == nullptr) { + return nullptr; + } + if (!sampler->create()) { + delete sampler; + return nullptr; + } + return sampler; +} + +hip::TextureObject* ihipCreateTextureObject(const hipResourceDesc& resDesc, amd::Image& image, amd::Sampler& sampler) { + hip::TextureObject* texture; + ihipMalloc(reinterpret_cast(&texture), sizeof(hip::TextureObject), CL_MEM_SVM_FINE_GRAIN_BUFFER); + + if (texture == nullptr) { + return nullptr; + } + + device::Memory* imageMem = image.getDeviceMemory(*hip::getCurrentContext()->devices()[0]); + memcpy(texture->imageSRD, imageMem->cpuSrd(), sizeof(uint32_t)*HIP_IMAGE_OBJECT_SIZE_DWORD); + texture->image = ℑ + + device::Sampler* devSampler = sampler.getDeviceSampler(*hip::getCurrentContext()->devices()[0]); + memcpy(texture->samplerSRD, devSampler->hwState(), sizeof(uint32_t)*HIP_SAMPLER_OBJECT_SIZE_DWORD); + texture->sampler = &sampler; + + memcpy(&texture->resDesc, &resDesc, sizeof(hipResourceDesc)); + + return texture; +} + +hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, const hipResourceDesc* pResDesc, + const hipTextureDesc* pTexDesc, + const hipResourceViewDesc* pResViewDesc) { + HIP_INIT_API(NONE, pTexObject, pResDesc, pTexDesc, pResViewDesc); + + amd::Device* device = hip::getCurrentContext()->devices()[0]; + + if (!device->info().imageSupport_) { + HIP_RETURN(hipErrorInvalidValue); + } + + amd::Image* image = nullptr; + + cl_image_format image_format; + getChannelOrderAndType(pResDesc->res.pitch2D.desc, pTexDesc->readMode, + &image_format.image_channel_order, &image_format.image_channel_data_type); + + const amd::Image::Format imageFormat(image_format); + + amd::Memory* memory = nullptr; + size_t offset = 0; + cl_mem_object_type clType; + + switch (pResDesc->resType) { + case hipResourceTypeArray: + { + memory = getMemoryObject(pResDesc->res.array.array->data, offset); + + getChannelOrderAndType(pResDesc->res.array.array->desc, pTexDesc->readMode, + &image_format.image_channel_order, &image_format.image_channel_data_type); + const amd::Image::Format imageFormat(image_format); + switch (pResDesc->res.array.array->type) { + case hipArrayLayered: + case hipArrayCubemap: + assert(0); + break; + case hipArraySurfaceLoadStore: + case hipArrayTextureGather: + case hipArrayDefault: + default: + switch(pResDesc->res.array.array->textureType) { + case hipTextureType3D: + clType = CL_MEM_OBJECT_IMAGE3D; + image = new (*hip::getCurrentContext()) amd::Image(*memory->asBuffer(), + clType, memory->getMemFlags(), imageFormat, + pResDesc->res.array.array->width, pResDesc->res.array.array->height, + pResDesc->res.array.array->depth, 0, 0); + break; + case hipTextureType2D: + clType = CL_MEM_OBJECT_IMAGE2D; + image = new (*hip::getCurrentContext()) amd::Image(*memory->asBuffer(), + clType, memory->getMemFlags(), imageFormat, + pResDesc->res.array.array->width, pResDesc->res.array.array->height, 1, 0, 0); + break; + default: + break; + } + break; + } + } + break; + case hipResourceTypeMipmappedArray: + assert(0); + break; + case hipResourceTypeLinear: + { + assert(pResViewDesc == nullptr); + memory = getMemoryObject(pResDesc->res.linear.devPtr, offset); + + getChannelOrderAndType(pResDesc->res.linear.desc, pTexDesc->readMode, + &image_format.image_channel_order, &image_format.image_channel_data_type); + const amd::Image::Format imageFormat(image_format); + + image = new (*hip::getCurrentContext()) amd::Image(*memory->asBuffer(), + CL_MEM_OBJECT_IMAGE2D, memory->getMemFlags(), imageFormat, + pResDesc->res.linear.sizeInBytes / imageFormat.getElementSize(), 1, 1, + pResDesc->res.linear.sizeInBytes, 0); + } + break; + case hipResourceTypePitch2D: + assert(pResViewDesc == nullptr); + memory = getMemoryObject(pResDesc->res.pitch2D.devPtr, offset); + + image = new (*hip::getCurrentContext()) amd::Image(*memory->asBuffer(), + CL_MEM_OBJECT_IMAGE2D, memory->getMemFlags(), imageFormat, + pResDesc->res.pitch2D.width, pResDesc->res.pitch2D.height, 1, + pResDesc->res.pitch2D.pitchInBytes, 0); + break; + default: HIP_RETURN(hipErrorInvalidValue); + } + + if (!image->create()) { + delete image; + HIP_RETURN(hipErrorOutOfMemory); + } + + amd::Sampler* sampler = fillSamplerDescriptor(pTexDesc->addressMode[0], pTexDesc->filterMode, pTexDesc->normalizedCoords); + + *pTexObject = reinterpret_cast(ihipCreateTextureObject(*pResDesc, *image, *sampler)); + + HIP_RETURN(hipSuccess); +} + +void ihipDestroyTextureObject(hip::TextureObject* texture) { + texture->image->release(); + texture->sampler->release(); + + hipFree(texture); +} + +hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject) { + HIP_INIT_API(NONE, textureObject); + + hip::TextureObject* texture = reinterpret_cast(textureObject); + + ihipDestroyTextureObject(texture); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipGetTextureObjectResourceDesc(hipResourceDesc* pResDesc, + hipTextureObject_t textureObject) { + HIP_INIT_API(NONE, pResDesc, textureObject); + + hip::TextureObject* texture = reinterpret_cast(textureObject); + + if (pResDesc != nullptr && texture != nullptr) { + memcpy(pResDesc, &(texture->resDesc), sizeof(hipResourceDesc)); + } + + HIP_RETURN(hipErrorInvalidValue); +} + +hipError_t hipGetTextureObjectResourceViewDesc(hipResourceViewDesc* pResViewDesc, + hipTextureObject_t textureObject) { + HIP_INIT_API(NONE, pResViewDesc, textureObject); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipGetTextureObjectTextureDesc(hipTextureDesc* pTexDesc, + hipTextureObject_t textureObject) { + HIP_INIT_API(NONE, pTexDesc, textureObject); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t ihipBindTexture(cl_mem_object_type type, + size_t* offset, textureReference* tex, const void* devPtr, + const hipChannelFormatDesc& desc, size_t width, size_t height, + size_t pitch) { + if (tex == nullptr) { + return hipErrorInvalidImage; + } + if (hip::getCurrentContext()) { + cl_image_format image_format; + size_t byteSize; + size_t rowPitch = 0; + size_t depth = 0; + size_t slicePitch = 0; + + getChannelOrderAndType(desc, hipReadModeElementType, + &image_format.image_channel_order, &image_format.image_channel_data_type); + getByteSizeFromChannelFormatKind(desc.f, &byteSize); + const amd::Image::Format imageFormat(image_format); + amd::Memory* memory = getMemoryObject(devPtr, *offset); + + switch (type) { + case CL_MEM_OBJECT_IMAGE3D: + rowPitch = width * byteSize; + depth = pitch; + slicePitch = rowPitch * height; + break; + case CL_MEM_OBJECT_IMAGE2D: + default: + rowPitch = pitch; + depth = 1; + slicePitch = 0; + break; + } + + amd::Image* image = new (*hip::getCurrentContext()) amd::Image(*memory->asBuffer(), + type, memory->getMemFlags(), imageFormat, width, height, depth, rowPitch, slicePitch); + if (!image->create()) { + delete image; + return hipErrorOutOfMemory; + } + + *offset = 0; + if (tex->textureObject) { + ihipDestroyTextureObject(reinterpret_cast(tex->textureObject)); + } + amd::Sampler* sampler = fillSamplerDescriptor(tex->addressMode[0], tex->filterMode, tex->normalized); + + hipResourceDesc resDesc; + memset(&resDesc, 0, sizeof(hipResourceDesc)); + switch (type) { + case CL_MEM_OBJECT_IMAGE1D: + resDesc.resType = hipResourceTypeLinear; + resDesc.res.linear.devPtr = const_cast(devPtr); + resDesc.res.linear.desc = desc; + resDesc.res.linear.sizeInBytes = image->getSize(); + break; + case CL_MEM_OBJECT_IMAGE2D: + resDesc.resType = hipResourceTypePitch2D; + resDesc.res.pitch2D.devPtr = const_cast(devPtr); + resDesc.res.pitch2D.desc = desc; + resDesc.res.pitch2D.width = width; + resDesc.res.pitch2D.height = height; + resDesc.res.pitch2D.pitchInBytes = pitch; + break; + case CL_MEM_OBJECT_IMAGE3D: + resDesc.resType = hipResourceTypeArray; + resDesc.res.array.array = (hipArray*)malloc(sizeof(hipArray)); + resDesc.res.array.array->desc = desc; + resDesc.res.array.array->width = width; + resDesc.res.array.array->height = height; + resDesc.res.array.array->depth = depth; + resDesc.res.array.array->Format = tex->format; + resDesc.res.array.array->NumChannels = tex->numChannels; + resDesc.res.array.array->isDrv = false; + resDesc.res.array.array->textureType = hipTextureType3D; + resDesc.res.array.array->data = const_cast(devPtr); + break; + default: + resDesc.resType = hipResourceTypeArray; + resDesc.res.array.array = nullptr; + break; + } + + tex->textureObject = reinterpret_cast(ihipCreateTextureObject(resDesc, *image, *sampler)); + if(type == CL_MEM_OBJECT_IMAGE3D) { + free(resDesc.res.array.array); + } + memset(&resDesc, 0, sizeof(hipResourceDesc)); + return hipSuccess; + } + return hipErrorInvalidValue; +} + +hipError_t hipBindTexture(size_t* offset, textureReference* tex, const void* devPtr, + const hipChannelFormatDesc* desc, size_t size) { + HIP_INIT_API(NONE, offset, tex, devPtr, desc, size); + + if (desc == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + cl_image_format image_format; + getChannelOrderAndType(*desc, hipReadModeElementType, + &image_format.image_channel_order, &image_format.image_channel_data_type); + const amd::Image::Format imageFormat(image_format); + + HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE1D, offset, tex, devPtr, *desc, size / imageFormat.getElementSize(), 1, size)); +} + +hipError_t hipBindTexture2D(size_t* offset, textureReference* tex, const void* devPtr, + const hipChannelFormatDesc* desc, size_t width, size_t height, + size_t pitch) { + HIP_INIT_API(NONE, offset, tex, devPtr, desc, width, height, pitch); + + HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE2D, offset, tex, devPtr, *desc, width, height, pitch)); +} + +hipError_t hipBindTextureToArray(textureReference* tex, hipArray_const_t array, + const hipChannelFormatDesc* desc) { + HIP_INIT_API(NONE, tex, array, desc); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t ihipBindTextureImpl(TlsData* tls, int dim, enum hipTextureReadMode readMode, size_t* offset, + const void* devPtr, const struct hipChannelFormatDesc* desc, + size_t size, textureReference* tex) { + HIP_INIT_API(NONE, dim, readMode, offset, devPtr, size, tex); + + assert(1 == dim); + + HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE1D, offset, tex, devPtr, *desc, size, 1, 0)); +} + +hipError_t ihipBindTextureToArrayImpl(TlsData* tls, int dim, enum hipTextureReadMode readMode, + hipArray_const_t array, + const struct hipChannelFormatDesc& desc, + textureReference* tex) { + HIP_INIT_API(NONE, dim, readMode, &desc, array, tex); + + cl_mem_object_type clType; + size_t offset = 0; + + switch (dim) { + case 1: + clType = CL_MEM_OBJECT_IMAGE1D; + break; + case 2: + clType = CL_MEM_OBJECT_IMAGE2D; + break; + case 3: + case hipTextureType2DLayered: + clType = CL_MEM_OBJECT_IMAGE3D; + break; + default: + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(ihipBindTexture(clType, &offset, tex, array->data, desc, array->width, + array->height, array->depth)); +} + +hipError_t hipBindTextureToMipmappedArray(textureReference* tex, + hipMipmappedArray_const_t mipmappedArray, + const hipChannelFormatDesc* desc) { + HIP_INIT_API(NONE, tex, mipmappedArray, desc); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t ihipUnbindTextureImpl(const hipTextureObject_t& textureObject) { + + ihipDestroyTextureObject(reinterpret_cast(textureObject)); + + return hipSuccess; +} + +hipError_t hipUnbindTexture(const textureReference* tex) { + HIP_INIT_API(NONE, tex); + + ihipDestroyTextureObject(reinterpret_cast(tex->textureObject)); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_const_t array) { + HIP_INIT_API(NONE, desc, array); + + if (desc != nullptr) { + *desc = array->desc; + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipGetTextureAlignmentOffset(size_t* offset, const textureReference* tex) { + HIP_INIT_API(NONE, offset, tex); + + if ((offset == nullptr) || (tex == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + *offset = 0; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipGetTextureReference(const textureReference** tex, const void* symbol) { + HIP_INIT_API(NONE, tex, symbol); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipTexRefSetFormat(textureReference* tex, hipArray_Format fmt, int NumPackedComponents) { + HIP_INIT_API(NONE, tex, fmt, NumPackedComponents); + + if (tex == nullptr) { + HIP_RETURN(hipErrorInvalidImage); + } + + tex->format = fmt; + tex->numChannels = NumPackedComponents; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefSetFlags(textureReference* tex, unsigned int flags) { + HIP_INIT_API(NONE, tex, flags); + + if (tex == nullptr) { + HIP_RETURN(hipErrorInvalidImage); + } + + tex->normalized = flags; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefSetFilterMode(textureReference* tex, hipTextureFilterMode fm) { + HIP_INIT_API(NONE, tex, fm); + + if (tex == nullptr) { + HIP_RETURN(hipErrorInvalidImage); + } + + tex->filterMode = fm; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefGetAddressMode(hipTextureAddressMode* am, textureReference tex, int dim) { + HIP_INIT_API(NONE, am, &tex, dim); + + if ((am == nullptr) || (dim >= 3)) { + HIP_RETURN(hipErrorInvalidValue); + } + + *am = tex.addressMode[dim]; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefSetAddressMode(textureReference* tex, int dim, hipTextureAddressMode am) { + HIP_INIT_API(NONE, tex, dim, am); + + if (tex == nullptr) { + HIP_RETURN(hipErrorInvalidImage); + } + + tex->addressMode[dim] = am; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefGetArray(hipArray_t* array, textureReference tex) { + HIP_INIT_API(NONE, array, &tex); + + hip::TextureObject* texture = nullptr; + + if ((array == nullptr) || (*array == nullptr)) { + HIP_RETURN(hipErrorInvalidImage); + } + + texture = reinterpret_cast(tex.textureObject); + if(hipResourceTypeArray != texture->resDesc.resType){ + HIP_RETURN(hipErrorInvalidValue); + } + + if (texture->resDesc.res.array.array == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + **array = *(texture->resDesc.res.array.array); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefSetArray(textureReference* tex, hipArray_const_t array, unsigned int flags) { + HIP_INIT_API(NONE, tex, array, flags); + + size_t offset = 0; + cl_mem_object_type clType; + + if ((tex == nullptr) || (array == nullptr)) { + HIP_RETURN(hipErrorInvalidImage); + } + + switch(array->textureType) { + case hipTextureType3D: + clType = CL_MEM_OBJECT_IMAGE3D; + break; + case hipTextureType2D: + clType = CL_MEM_OBJECT_IMAGE2D; + break; + case hipTextureType1D: + clType = CL_MEM_OBJECT_IMAGE1D; + break; + default: + HIP_RETURN(hipErrorInvalidValue); + } + HIP_RETURN(ihipBindTexture(clType, &offset, tex, array->data, array->desc, array->width, + array->height, array->depth)); +} + +hipError_t hipTexRefGetAddress(hipDeviceptr_t* dev_ptr, textureReference tex) { + HIP_INIT_API(NONE, dev_ptr, &tex); + + hip::TextureObject* texture = nullptr; + device::Memory* dev_mem = nullptr; + + texture = reinterpret_cast(tex.textureObject); + if ((texture == nullptr) || (texture->image == nullptr)) { + HIP_RETURN(hipErrorInvalidImage); + } + + dev_mem = texture->image->getDeviceMemory(*hip::getCurrentContext()->devices()[0]); + if (dev_mem == nullptr) { + HIP_RETURN(hipErrorInvalidImage); + } + + *dev_ptr = reinterpret_cast(dev_mem->virtualAddress()); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefSetAddress(size_t* offset, textureReference* tex, hipDeviceptr_t devPtr, + size_t size) { + HIP_INIT_API(NONE, offset, tex, devPtr, size); + + if (tex == nullptr) { + HIP_RETURN(hipErrorInvalidImage); + } + + cl_image_format image_format; + getDrvChannelOrderAndType(tex->format, tex->numChannels, + &image_format.image_channel_order, &image_format.image_channel_data_type); + const amd::Image::Format imageFormat(image_format); + + HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE1D, offset, tex, devPtr, tex->channelDesc, size / imageFormat.getElementSize(), 1, size)); +} + +hipError_t hipTexRefSetAddress2D(textureReference* tex, const HIP_ARRAY_DESCRIPTOR* desc, + hipDeviceptr_t devPtr, size_t pitch) { + HIP_INIT_API(NONE, tex, desc, devPtr, pitch); + + if (desc == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + size_t offset; + HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE2D, &offset, tex, devPtr, tex->channelDesc, desc->Width, desc->Height, pitch)); +} diff --git a/vdi/hiprtc_internal.hpp b/vdi/hiprtc_internal.hpp new file mode 100644 index 0000000000..5f1838ffac --- /dev/null +++ b/vdi/hiprtc_internal.hpp @@ -0,0 +1,44 @@ +/* +Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIPRTC_SRC_HIP_INTERNAL_H +#define HIPRTC_SRC_HIP_INTERNAL_H + +#include "hip_internal.hpp" + +// This macro should be called at the beginning of every HIP RTC API. +#define HIPRTC_INIT_API(...) \ + ClPrint(amd::LOG_INFO, amd::LOG_API, "[%zx] %s ( %s )", std::this_thread::get_id(), __func__, ToString( __VA_ARGS__ ).c_str()); \ + amd::Thread* thread = amd::Thread::current(); \ + if (!VDI_CHECK_THREAD(thread)) { \ + HIPRTC_RETURN(HIPRTC_ERROR_INTERNAL_ERROR); \ + } \ + HIP_INIT(); + +#define HIPRTC_RETURN(ret) \ + hiprtc::g_lastRtcError = ret; \ + ClPrint(amd::LOG_INFO, amd::LOG_API, "[%zx] %s: Returned %s", std::this_thread::get_id(), __func__, \ + hiprtcGetErrorString(hiprtc::g_lastRtcError)); \ + return hiprtc::g_lastRtcError; + + +#endif // HIPRTC_SRC_HIP_INTERNAL_H diff --git a/vdi/trace_helper.h b/vdi/trace_helper.h new file mode 100644 index 0000000000..5beb3f34e4 --- /dev/null +++ b/vdi/trace_helper.h @@ -0,0 +1,247 @@ +/* +Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include +#include +#include +#include +//--- +// Helper functions to convert HIP function arguments into strings. +// Handles POD data types as well as enumerations (ie hipMemcpyKind). +// The implementation uses C++11 variadic templates and template specialization. +// The hipMemcpyKind example below is a good example that shows how to implement conversion for a +// new HSA type. + + +// Handy macro to convert an enumeration to a stringified version of same: +#define CASE_STR(x) \ + case x: \ + return #x; + +inline const char* ihipErrorString(hipError_t hip_error) { + switch (hip_error) { + CASE_STR(hipSuccess); + CASE_STR(hipErrorOutOfMemory); + CASE_STR(hipErrorNotInitialized); + CASE_STR(hipErrorDeinitialized); + CASE_STR(hipErrorProfilerDisabled); + CASE_STR(hipErrorProfilerNotInitialized); + CASE_STR(hipErrorProfilerAlreadyStarted); + CASE_STR(hipErrorProfilerAlreadyStopped); + CASE_STR(hipErrorInvalidImage); + CASE_STR(hipErrorInvalidContext); + CASE_STR(hipErrorContextAlreadyCurrent); + CASE_STR(hipErrorMapFailed); + CASE_STR(hipErrorUnmapFailed); + CASE_STR(hipErrorArrayIsMapped); + CASE_STR(hipErrorAlreadyMapped); + CASE_STR(hipErrorNoBinaryForGpu); + CASE_STR(hipErrorAlreadyAcquired); + CASE_STR(hipErrorNotMapped); + CASE_STR(hipErrorNotMappedAsArray); + CASE_STR(hipErrorNotMappedAsPointer); + CASE_STR(hipErrorECCNotCorrectable); + CASE_STR(hipErrorUnsupportedLimit); + CASE_STR(hipErrorContextAlreadyInUse); + CASE_STR(hipErrorPeerAccessUnsupported); + CASE_STR(hipErrorInvalidKernelFile); + CASE_STR(hipErrorInvalidGraphicsContext); + CASE_STR(hipErrorInvalidSource); + CASE_STR(hipErrorFileNotFound); + CASE_STR(hipErrorSharedObjectSymbolNotFound); + CASE_STR(hipErrorSharedObjectInitFailed); + CASE_STR(hipErrorOperatingSystem); + CASE_STR(hipErrorSetOnActiveProcess); + CASE_STR(hipErrorInvalidHandle); + CASE_STR(hipErrorNotFound); + CASE_STR(hipErrorIllegalAddress); + CASE_STR(hipErrorMissingConfiguration); + CASE_STR(hipErrorLaunchFailure); + CASE_STR(hipErrorPriorLaunchFailure); + CASE_STR(hipErrorLaunchTimeOut); + CASE_STR(hipErrorLaunchOutOfResources); + CASE_STR(hipErrorInvalidDeviceFunction); + CASE_STR(hipErrorInvalidConfiguration); + CASE_STR(hipErrorInvalidDevice); + CASE_STR(hipErrorInvalidValue); + CASE_STR(hipErrorInvalidDevicePointer); + CASE_STR(hipErrorInvalidMemcpyDirection); + CASE_STR(hipErrorUnknown); + CASE_STR(hipErrorNotReady); + CASE_STR(hipErrorNoDevice); + CASE_STR(hipErrorPeerAccessAlreadyEnabled); + CASE_STR(hipErrorPeerAccessNotEnabled); + CASE_STR(hipErrorRuntimeMemory); + CASE_STR(hipErrorRuntimeOther); + CASE_STR(hipErrorHostMemoryAlreadyRegistered); + CASE_STR(hipErrorHostMemoryNotRegistered); + CASE_STR(hipErrorTbd); + default: + return "hipErrorUnknown"; + }; +}; + +// Building block functions: +template +inline std::string ToHexString(T v) { + std::ostringstream ss; + ss << "0x" << std::hex << v; + return ss.str(); +}; + +template +inline std::string ToString(T* v) { + std::ostringstream ss; + if (v == NULL) { + ss << "char array:"; + } else { + ss << v; + } + return ss.str(); +}; + +template +inline std::string ToString(T** v) { + std::ostringstream ss; + if (v == NULL) { + ss << "char array:"; + } else { + ss << v; + } + return ss.str(); +}; + +//--- +// Template overloads for ToString to handle specific types + +// This is the default which works for most types: +template +inline std::string ToString(T v) { + std::ostringstream ss; + ss << v; + return ss.str(); +}; + +template <> +inline std::string ToString(hipFunction_t v) { + std::ostringstream ss; + ss << "0x" << std::hex << static_cast(v); + return ss.str(); +}; + +// hipEvent_t specialization. TODO - maybe add an event ID for debug? +template <> +inline std::string ToString(hipEvent_t v) { + std::ostringstream ss; + ss << "event:" << std::hex << static_cast(v); + return ss.str(); +}; +// hipStream_t +template <> +inline std::string ToString(hipStream_t v) { + std::ostringstream ss; + if (v == NULL) { + ss << "stream:"; + } else { + ss << "stream:" << std::hex << static_cast(v); + } + + return ss.str(); +}; + +// hipCtx_t +template <> +inline std::string ToString(hipCtx_t v) { + std::ostringstream ss; + if (v == NULL) { + ss << "context:"; + } else { + ss << "context:" << std::hex << static_cast(v); + } + + return ss.str(); +}; + +// hipPitchedPtr +template <> +inline std::string ToString(hipPitchedPtr v) { + std::ostringstream ss; + ss << "pitchPtr:" << std::hex << static_cast(v.ptr); + return ss.str(); +}; + +// hipMemcpyKind specialization +template <> +inline std::string ToString(hipMemcpyKind v) { + switch (v) { + CASE_STR(hipMemcpyHostToHost); + CASE_STR(hipMemcpyHostToDevice); + CASE_STR(hipMemcpyDeviceToHost); + CASE_STR(hipMemcpyDeviceToDevice); + CASE_STR(hipMemcpyDefault); + default: + return ToHexString(v); + }; +}; + +template <> +inline std::string ToString(hipFuncCache_t v) { + switch (v) { + CASE_STR(hipFuncCachePreferNone); + CASE_STR(hipFuncCachePreferShared); + CASE_STR(hipFuncCachePreferL1); + CASE_STR(hipFuncCachePreferEqual); + default: + return ToHexString(v); + }; +}; + +template <> +inline std::string ToString(hipSharedMemConfig v) { + switch (v) { + CASE_STR(hipSharedMemBankSizeDefault); + CASE_STR(hipSharedMemBankSizeFourByte); + CASE_STR(hipSharedMemBankSizeEightByte); + default: + return ToHexString(v); + }; +}; + +template <> +inline std::string ToString(hipError_t v) { + return ihipErrorString(v); +}; + +// Catch empty arguments case +inline std::string ToString() { return (""); } + + +//--- +// C++11 variadic template - peels off first argument, converts to string, and calls itself again to +// peel the next arg. Strings are automatically separated by comma+space. +template +inline std::string ToString(T first, Args... args) { + return ToString(first) + ", " + ToString(args...); +} +