From 2ed585ec08592975c273f138952a04f38aeebf88 Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 13 Feb 2018 19:45:32 -0500 Subject: [PATCH 001/282] P4 to Git Change 1515837 by skudchad@skudchad_rocm on 2018/02/13 19:37:06 SWDEV-145570 - Initial hip api checkin Affected files ... ... //depot/stg/opencl/drivers/opencl/api/Makefile#10 edit ... //depot/stg/opencl/drivers/opencl/api/hip/Makefile#1 add ... //depot/stg/opencl/drivers/opencl/api/hip/build/Makefile#1 add ... //depot/stg/opencl/drivers/opencl/api/hip/build/Makefile.hip#1 add ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#1 add --- api/hip/hip_memory.cpp | 69 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 api/hip/hip_memory.cpp diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp new file mode 100644 index 0000000000..41ec466623 --- /dev/null +++ b/api/hip/hip_memory.cpp @@ -0,0 +1,69 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "hip_internal.hpp" + +hipError_t hipMalloc(void** ptr, size_t sizeBytes) +{ + HIP_INIT_API(ptr, sizeBytes); + + amd::Context* context = as_amd(g_currentCtx); + + if (sizeBytes == 0) { + *ptr = nullptr; + return hipSuccess; + } + else if (!is_valid(context) || !ptr) { + return hipErrorInvalidValue; + } + + auto deviceHandle = as_amd(g_deviceArray[0]); + if ((deviceHandle->info().maxMemAllocSize_ < size)) { + return hipErrorOutOfMemory; + } + + amd::Memory* mem = new (*context) amd::Buffer(*context, 0, sizeBytes); + if (!mem) { + return hipErrorOutOfMemory; + } + + if (!mem->create(nullptr)) { + return hipErrorMemoryAllocation; + } + + *ptr = reinterpret_cast(as_cl(mem)); + + return hipSuccess; +} + +hipError_t hipFree(void* ptr) +{ + if (!is_valid(reinterpret_cast(ptr))) { + return hipErrorInvalidValue; + } + as_amd(reinterpret_cast(ptr))->release(); + return hipSuccess; +} + + From 37faef47ba50518ce2a972ddd619bc67440c8a56 Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 14 Feb 2018 15:08:01 -0500 Subject: [PATCH 002/282] P4 to Git Change 1516173 by skudchad@skudchad_rocm on 2018/02/14 15:02:45 SWDEV-145570 - Initial hip api checkin Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#2 edit --- api/hip/hip_memory.cpp | 53 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 41ec466623..6edbe068fe 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -66,4 +66,57 @@ hipError_t hipFree(void* ptr) return hipSuccess; } +hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind) +{ + HIP_INIT_API(dst, src, sizeBytes, kind); + + amd::Context* context = as_amd(g_currentCtx); + amd::Device* device = context->devices()[0]; + + // FIXME : Do we create a queue here or create at init and just reuse + amd::HostQueue* queue = new amd::HostQueue(*context, *device, 0, + amd::CommandQueue::RealTimeDisabled, + amd::CommandQueue::Priority::Normal); + if (!queue) { + return hipErrorOutOfMemory; + } + + amd::Buffer* srcBuffer = as_amd(reinterpret_cast(const_cast(src)))->asBuffer(); + amd::Buffer* dstBuffer = as_amd(reinterpret_cast(const_cast(dst)))->asBuffer(); + + amd::Command* command; + amd::Command::EventWaitList waitList; + + switch (kind) { + case hipMemcpyDeviceToHost: + command = new amd::ReadMemoryCommand(*queue, CL_COMMAND_READ_BUFFER, waitList, + srcBuffer, 0, sizeBytes, dst); + break; + case hipMemcpyHostToDevice: + command = new amd::WriteMemoryCommand(*queue, CL_COMMAND_WRITE_BUFFER, waitList, + dstBuffer, 0, sizeBytes, src); + break; + default: + assert(!"Shouldn't reach here"); + break; + } + if (!command) { + return hipErrorOutOfMemory; + } + + // Make sure we have memory for the command execution + if (CL_SUCCESS != command->validateMemory()) { + delete command; + return hipErrorMemoryAllocation; + } + + + command->enqueue(); + command->awaitCompletion(); + command->release(); + + queue->release(); + + return hipSuccess; +} From 4fd1eb28eb1c3d8ccca01d2d1355ddd25fb01e9a Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 27 Feb 2018 18:38:56 -0500 Subject: [PATCH 003/282] P4 to Git Change 1520507 by skudchad@skudchad_test2_win_opencl on 2018/02/27 18:32:09 SWDEV-145570 - Populate some HIP Device Management functions. ReviewBoardURL = http://ocltc.amd.com/reviews/r/14310/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device.cpp#1 add ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#1 add --- api/hip/hip_device.cpp | 377 +++++++++++++++++++++++++++++++++++++++ api/hip/hip_internal.hpp | 43 +++++ 2 files changed, 420 insertions(+) create mode 100644 api/hip/hip_device.cpp create mode 100644 api/hip/hip_internal.hpp diff --git a/api/hip/hip_device.cpp b/api/hip/hip_device.cpp new file mode 100644 index 0000000000..557ee56643 --- /dev/null +++ b/api/hip/hip_device.cpp @@ -0,0 +1,377 @@ +/* +Copyright (c) 2018 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "hip_internal.hpp" + +cl_device_id* g_deviceArray = NULL; +unsigned g_deviceCnt = 0; + +hipError_t hipGetDevice(int *deviceId) { + + HIP_INIT_API(deviceId); + + if (deviceId != NULL) { + // this needs to return default device. For now return 0 always + *deviceId = 0; + } else { + return hipErrorInvalidValue; + } + + return hipSuccess; +} + +hipError_t hipDeviceGet(hipDevice_t *device, int deviceId) +{ + HIP_INIT_API(device, deviceId); + + if (device != nullptr) { + *device = deviceId; + } else { + return hipErrorInvalidValue; + } + + return hipSuccess; +}; + +hipError_t hipDeviceCount(int* count) { + + HIP_INIT_API(count); + + if (count == NULL) { + return hipErrorInvalidValue; + } + + // Get all available devices + if (!amd::Device::getDeviceIDs(CL_DEVICE_TYPE_GPU, 0, NULL, count, false)) { + return hipErrorNoDevice; + } + + return hipSuccess; +} + +hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) { + + HIP_INIT_API(pi, attr, device); + + if(pi == NULL) { + return hipErrorInvalidValue; + } + + auto deviceHandle = as_amd(g_deviceArray[hipDevice]); + + if (deviceHandle == NULL) { + return hipErrorInvalidDevice; + } + + hipDeviceProp_t *prop = deviceHandle->_props; + + switch (attr) { + case hipDeviceAttributeMaxThreadsPerBlock: + *pi = prop->maxThreadsPerBlock; + break; + case hipDeviceAttributeMaxBlockDimX: + *pi = prop->maxThreadsDim[0]; + break; + case hipDeviceAttributeMaxBlockDimY: + *pi = prop->maxThreadsDim[1]; + break; + case hipDeviceAttributeMaxBlockDimZ: + *pi = prop->maxThreadsDim[2]; + break; + case hipDeviceAttributeMaxGridDimX: + *pi = prop->maxGridSize[0]; + break; + case hipDeviceAttributeMaxGridDimY: + *pi = prop->maxGridSize[1]; + break; + case hipDeviceAttributeMaxGridDimZ: + *pi = prop->maxGridSize[2]; + break; + case hipDeviceAttributeMaxSharedMemoryPerBlock: + *pi = prop->sharedMemPerBlock; + break; + case hipDeviceAttributeTotalConstantMemory: + *pi = prop->totalConstMem; + break; + case hipDeviceAttributeWarpSize: + *pi = prop->warpSize; + break; + case hipDeviceAttributeMaxRegistersPerBlock: + *pi = prop->regsPerBlock; + break; + case hipDeviceAttributeClockRate: + *pi = prop->clockRate; + break; + case hipDeviceAttributeMemoryClockRate: + *pi = prop->memoryClockRate; + break; + case hipDeviceAttributeMemoryBusWidth: + *pi = prop->memoryBusWidth; + break; + case hipDeviceAttributeMultiprocessorCount: + *pi = prop->multiProcessorCount; + break; + case hipDeviceAttributeComputeMode: + *pi = prop->computeMode; + break; + case hipDeviceAttributeL2CacheSize: + *pi = prop->l2CacheSize; + break; + case hipDeviceAttributeMaxThreadsPerMultiProcessor: + *pi = prop->maxThreadsPerMultiProcessor; + break; + case hipDeviceAttributeComputeCapabilityMajor: + *pi = prop->major; + break; + case hipDeviceAttributeComputeCapabilityMinor: + *pi = prop->minor; + break; + case hipDeviceAttributePciBusId: + *pi = prop->pciBusID; + break; + case hipDeviceAttributeConcurrentKernels: + *pi = prop->concurrentKernels; + break; + case hipDeviceAttributePciDeviceId: + *pi = prop->pciDeviceID; + break; + case hipDeviceAttributeMaxSharedMemoryPerMultiprocessor: + *pi = prop->maxSharedMemoryPerMultiProcessor; + break; + case hipDeviceAttributeIsMultiGpuBoard: + *pi = prop->isMultiGpuBoard; + break; + default: + return hipErrorInvalidValue; + } + + return hipSuccess; +} + +hipError_t hipGetDeviceProperties(hipDeviceProp_t* props, int device) { + + HIP_INIT_API(props, device); + + if (props == NULL) { + return hipErrorInvalidValue; + } + + auto deviceHandle = as_amd(g_deviceArray[device]); + if (deviceHandle == NULL) { + return hipErrorInvalidDevice; + } + + hipDeviceProp_t deviceProps = {0}; + + const auto& info = deviceHandle->info(); + ::strncpy(deviceProps.name, info.boardName_, 128); + deviceProps.totalGlobalMem = info.globalMemSize_; + deviceProps.sharedMemPerBlock = info.localMemSizePerCU_; + deviceProps.regsPerBlock = info.availableSGPRs_; + deviceProps.warpSize = info.wavefrontWidth_; + deviceProps.maxThreadsPerBlock = info.maxWorkGroupSize_; + deviceProps.maxThreadsDim[0] = info.maxWorkItemSizes_[0]; + deviceProps.maxThreadsDim[1] = info.maxWorkItemSizes_[1]; + deviceProps.maxThreadsDim[2] = info.maxWorkItemSizes_[2]; + deviceProps.maxGridSize[0] = UINT32_MAX; + deviceProps.maxGridSize[1] = UINT32_MAX; + deviceProps.maxGridSize[2] = UINT32_MAX; + deviceProps.clockRate = info.maxEngineClockFrequency_; + deviceProps.memoryClockRate = info.maxMemoryClockFrequency_; + deviceProps.memoryBusWidth = info.globalMemChannels_ * 32; + deviceProps.totalConstMem = info.maxConstantBufferSize_; + deviceProps.major = info.gfxipVersion_ / 100; + deviceProps.minor = info.gfxipVersion_ % 100; + deviceProps.multiProcessorCount = info.maxComputeUnits_; + deviceProps.l2CacheSize = info.l2CacheSize_; + deviceProps.maxThreadsPerMultiProcessor = info.simdPerCU_; + deviceProps.computeMode = 0; + deviceProps.clockInstructionRate = info.timeStampFrequency_; + deviceProps.arch.hasGlobalInt32Atomics = 1; + deviceProps.arch.hasGlobalFloatAtomicExch = 1; + deviceProps.arch.hasSharedInt32Atomics = 1; + deviceProps.arch.hasSharedFloatAtomicExch = 1; + deviceProps.arch.hasFloatAtomicAdd = 0; + deviceProps.arch.hasGlobalInt64Atomics = 1; + deviceProps.arch.hasSharedInt64Atomics = 1; + deviceProps.arch.hasDoubles = 1; + deviceProps.arch.hasWarpVote = 0; + deviceProps.arch.hasWarpBallot = 0; + deviceProps.arch.hasWarpShuffle = 0; + deviceProps.arch.hasFunnelShift = 0; + deviceProps.arch.hasThreadFenceSystem = 1; + deviceProps.arch.hasSyncThreadsExt = 0; + deviceProps.arch.hasSurfaceFuncs = 0; + deviceProps.arch.has3dGrid = 1; + deviceProps.arch.hasDynamicParallelism = 0; + deviceProps.concurrentKernels = 1; + deviceProps.pciDomainID = info.deviceTopology_.function; + deviceProps.pciBusID = info.deviceTopology_.bus; + deviceProps.pciDeviceID = info.deviceTopology_.device; + deviceProps.maxSharedMemoryPerMultiProcessor = info.localMemSizePerCU_; + deviceProps.isMultiGpuBoard = info.; + deviceProps.canMapHostMemory = 1; + deviceProps.gcnArch = info.gfxipVersion_; + + *props = deviceProps; + return hipSuccess; +} + +hipError_t hipDeviceSetCacheConfig(hipFuncCache_t cacheConfig) { + + HIP_INIT_API(cacheConfig); + + // No way to set cache config yet. + + return hipSuccess; +} + +hipError_t hipDeviceGetCacheConfig(hipFuncCache_t *cacheConfig) { + HIP_INIT_API(cacheConfig); + + if(cacheConfig == NULL) { + return hipErrorInvalidValue; + } + + *cacheConfig = 0; + + return hipSuccess; +} + +hipError_t hipGetDeviceProperties(hipDeviceProp_t* properties, int device) { + + HIP_INIT_API(properties, device); + if ((properties == NULL) || (device < 0) || (device >= g_deviceCnt)) { + return hipErrorInvalidDevice; + } + + auto * deviceHandle = as_amd(g_deviceArray[device]); + if (deviceHandle != NULL) { + *properties = deviceHandle->_props; + return hipSuccess; + } + + return hipErrorInvalidDevice; +} + +hipError_t hipSetDeviceFlags(unsigned int flags) { + + HIP_INIT_API(flags); + + assert(0 && "Unimplemented") + + return hipSuccess; +}; + +hipError_t hipDeviceGetLimit (size_t *pValue, hipLimit_t limit) { + + HIP_INIT_API(pValue, limit); + + assert(0 && "Unimplemented") + + return hipSuccess; +} + +hipError_t hipFuncSetCacheConfig (const void* func, hipFuncCache_t cacheConfig) { + + HIP_INIT_API(cacheConfig); + + assert(0 && "Not supported") + + return hipSuccess; +} + +hipError_t hipDeviceSetSharedMemConfig (hipSharedMemConfig config) { + + HIP_INIT_API(config); + + assert(0 && "Not Supported") + + return hipSuccess; +} + +hipError_t hipDeviceGetSharedMemConfig (hipSharedMemConfig *pConfig) { + + HIP_INIT_API(pConfig); + + assert(0 && "Not supported") + + return hipSuccess; +} + + +hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* properties) { + + HIP_INIT_API(device, properties); + + assert(0 && "Unimplemented") + + return hipSuccess; +} + + +hipError_t hipDeviceGetByPCIBusId (int* device, const char* pciBusId) { + + HIP_INIT_API(device,pciBusId); + + assert(0 && "Unimplemented") + + return hipSuccess; +} + + +hipError_t hipDeviceTotalMem (size_t *bytes,hipDevice_t device) { + + HIP_INIT_API(bytes, device); + + assert(0 && "Unimplemented") + + return hipSuccess; +} + +hipError_t hipDeviceComputeCapability(int *major, int *minor, hipDevice_t device) { + + HIP_INIT_API(major,minor, device); + assert(0 && "Unimplemented") + + return hipSuccess; +} + +hipError_t hipDeviceGetName(char *name,int len, hipDevice_t device) { + + HIP_INIT_API((void*)name,len, device); + + assert(0 && "Unimplemented") + + return hipSuccess; +} + +hipError_t hipDeviceGetPCIBusId (char *pciBusId,int len, int device) { + + HIP_INIT_API((void*)pciBusId, len, device); + + assert(0 && "Unimplemented") + + return hipSuccess; +} \ No newline at end of file diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp new file mode 100644 index 0000000000..7e7b27c143 --- /dev/null +++ b/api/hip/hip_internal.hpp @@ -0,0 +1,43 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_SRC_HIP_INTERNAL_H +#define HIP_SRC_HIP_INTERNAL_H + +#include "cl_common.hpp" + +#define HIP_INIT()\ + amd::Thread* thread = amd::Thread::current(); \ + if (!CL_CHECK_THREAD(thread)) { \ + return hipErrorOutOfMemory; \ + } + + +// This macro should be called at the beginning of every HIP API. +#define HIP_INIT_API(...) \ + HIP_INIT() + +extern cl_device_id* g_deviceArray; +extern unsigned g_deviceCnt; +extern thread_local cl_context g_currentCtx; + +#endif // HIP_SRC_HIP_INTERNAL_H From 73cb309e2bd65be2a056c6c8dcda9f31381c953a Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 1 Mar 2018 22:57:20 -0500 Subject: [PATCH 004/282] P4 to Git Change 1521675 by lmoriche@lmoriche_opencl_dev2 on 2018/03/01 22:50:06 SWDEV-145570 - [HIP] - Hip Rearchitecture - Add initial prototype implementation Affected files ... ... //depot/stg/opencl/drivers/opencl/api/Makefile#11 edit ... //depot/stg/opencl/drivers/opencl/api/hip/fixme.cpp#1 add ... //depot/stg/opencl/drivers/opencl/api/hip/hip_context.cpp#1 add ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_error.cpp#1 add ... //depot/stg/opencl/drivers/opencl/api/hip/hip_event.cpp#1 add ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.def.in#1 add ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.map.in#1 add ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.rc#1 add ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#2 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#3 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#1 add ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#1 add --- api/hip/fixme.cpp | 33 +++++++ api/hip/hip_context.cpp | 58 ++++++++++++ api/hip/hip_device.cpp | 140 +++++++++++++--------------- api/hip/hip_error.cpp | 48 ++++++++++ api/hip/hip_event.cpp | 67 ++++++++++++++ api/hip/hip_hcc.def.in | 129 ++++++++++++++++++++++++++ api/hip/hip_hcc.map.in | 133 +++++++++++++++++++++++++++ api/hip/hip_hcc.rc | 75 +++++++++++++++ api/hip/hip_internal.hpp | 4 +- api/hip/hip_memory.cpp | 133 +++++++++++++-------------- api/hip/hip_module.cpp | 144 +++++++++++++++++++++++++++++ api/hip/hip_platform.cpp | 193 +++++++++++++++++++++++++++++++++++++++ 12 files changed, 1010 insertions(+), 147 deletions(-) create mode 100644 api/hip/fixme.cpp create mode 100644 api/hip/hip_context.cpp create mode 100644 api/hip/hip_error.cpp create mode 100644 api/hip/hip_event.cpp create mode 100644 api/hip/hip_hcc.def.in create mode 100644 api/hip/hip_hcc.map.in create mode 100644 api/hip/hip_hcc.rc create mode 100644 api/hip/hip_module.cpp create mode 100644 api/hip/hip_platform.cpp diff --git a/api/hip/fixme.cpp b/api/hip/fixme.cpp new file mode 100644 index 0000000000..3d062e2dbc --- /dev/null +++ b/api/hip/fixme.cpp @@ -0,0 +1,33 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "cl_common.hpp" + +KHRicdVendorDispatch amd::ICDDispatchedObject::icdVendorDispatch_[] = {0}; +amd::PlatformIDS amd::PlatformID::Platform = {amd::ICDDispatchedObject::icdVendorDispatch_}; + +RUNTIME_ENTRY(cl_int, clGetDeviceIDs, + (cl_platform_id platform, cl_device_type device_type, cl_uint num_entries, + cl_device_id* devices, cl_uint* num_devices)) { + return CL_SUCCESS; +} +RUNTIME_EXIT diff --git a/api/hip/hip_context.cpp b/api/hip/hip_context.cpp new file mode 100644 index 0000000000..0e6ff2116a --- /dev/null +++ b/api/hip/hip_context.cpp @@ -0,0 +1,58 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "hip_internal.hpp" +#include "platform/runtime.hpp" + + +amd::Context* g_context = nullptr; + +hipError_t hipInit(unsigned int flags) +{ + HIP_INIT_API(flags); + + if (!amd::Runtime::initialized()) { + amd::Runtime::init(); + } + + // FIXME: move the global VDI context to hipInit. + g_context = new amd::Context( + amd::Device::getDevices(CL_DEVICE_TYPE_GPU, false), amd::Context::Info()); + if (!g_context) return hipErrorOutOfMemory; + + if (g_context && CL_SUCCESS != g_context->create(nullptr)) { + g_context->release(); + return hipErrorUnknown; + } + + return hipSuccess; +} + +hipError_t hipCtxCreate(hipCtx_t *ctx, unsigned int flags, hipDevice_t device) +{ + HIP_INIT_API(ctx, flags, device); + + return hipSuccess; +} + diff --git a/api/hip/hip_device.cpp b/api/hip/hip_device.cpp index 557ee56643..7296eabb17 100644 --- a/api/hip/hip_device.cpp +++ b/api/hip/hip_device.cpp @@ -24,9 +24,6 @@ THE SOFTWARE. #include "hip_internal.hpp" -cl_device_id* g_deviceArray = NULL; -unsigned g_deviceCnt = 0; - hipError_t hipGetDevice(int *deviceId) { HIP_INIT_API(deviceId); @@ -54,7 +51,7 @@ hipError_t hipDeviceGet(hipDevice_t *device, int deviceId) return hipSuccess; }; -hipError_t hipDeviceCount(int* count) { +hipError_t hipGetDeviceCount(int* count) { HIP_INIT_API(count); @@ -63,9 +60,7 @@ hipError_t hipDeviceCount(int* count) { } // Get all available devices - if (!amd::Device::getDeviceIDs(CL_DEVICE_TYPE_GPU, 0, NULL, count, false)) { - return hipErrorNoDevice; - } + *count = g_context->devices().size(); return hipSuccess; } @@ -74,93 +69,95 @@ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) HIP_INIT_API(pi, attr, device); - if(pi == NULL) { + if (pi == nullptr) { return hipErrorInvalidValue; } - auto deviceHandle = as_amd(g_deviceArray[hipDevice]); + //if (unsigned(device) >= g_context->devices().size()) { + // return hipErrorInvalidDevice; + //} + //auto* deviceHandle = g_context->devices()[device]; - if (deviceHandle == NULL) { - return hipErrorInvalidDevice; - } - - hipDeviceProp_t *prop = deviceHandle->_props; + //FIXME: should we cache the props, or just select from deviceHandle->info_? + hipDeviceProp_t prop = {0}; + hipError_t err = hipGetDeviceProperties(&prop, device); + if (err != hipSuccess) return err; switch (attr) { case hipDeviceAttributeMaxThreadsPerBlock: - *pi = prop->maxThreadsPerBlock; + *pi = prop.maxThreadsPerBlock; break; case hipDeviceAttributeMaxBlockDimX: - *pi = prop->maxThreadsDim[0]; + *pi = prop.maxThreadsDim[0]; break; case hipDeviceAttributeMaxBlockDimY: - *pi = prop->maxThreadsDim[1]; + *pi = prop.maxThreadsDim[1]; break; case hipDeviceAttributeMaxBlockDimZ: - *pi = prop->maxThreadsDim[2]; + *pi = prop.maxThreadsDim[2]; break; case hipDeviceAttributeMaxGridDimX: - *pi = prop->maxGridSize[0]; + *pi = prop.maxGridSize[0]; break; case hipDeviceAttributeMaxGridDimY: - *pi = prop->maxGridSize[1]; + *pi = prop.maxGridSize[1]; break; case hipDeviceAttributeMaxGridDimZ: - *pi = prop->maxGridSize[2]; + *pi = prop.maxGridSize[2]; break; case hipDeviceAttributeMaxSharedMemoryPerBlock: - *pi = prop->sharedMemPerBlock; + *pi = prop.sharedMemPerBlock; break; case hipDeviceAttributeTotalConstantMemory: - *pi = prop->totalConstMem; + *pi = prop.totalConstMem; break; case hipDeviceAttributeWarpSize: - *pi = prop->warpSize; + *pi = prop.warpSize; break; case hipDeviceAttributeMaxRegistersPerBlock: - *pi = prop->regsPerBlock; + *pi = prop.regsPerBlock; break; case hipDeviceAttributeClockRate: - *pi = prop->clockRate; + *pi = prop.clockRate; break; case hipDeviceAttributeMemoryClockRate: - *pi = prop->memoryClockRate; + *pi = prop.memoryClockRate; break; case hipDeviceAttributeMemoryBusWidth: - *pi = prop->memoryBusWidth; + *pi = prop.memoryBusWidth; break; case hipDeviceAttributeMultiprocessorCount: - *pi = prop->multiProcessorCount; + *pi = prop.multiProcessorCount; break; case hipDeviceAttributeComputeMode: - *pi = prop->computeMode; + *pi = prop.computeMode; break; case hipDeviceAttributeL2CacheSize: - *pi = prop->l2CacheSize; + *pi = prop.l2CacheSize; break; case hipDeviceAttributeMaxThreadsPerMultiProcessor: - *pi = prop->maxThreadsPerMultiProcessor; + *pi = prop.maxThreadsPerMultiProcessor; break; case hipDeviceAttributeComputeCapabilityMajor: - *pi = prop->major; + *pi = prop.major; break; case hipDeviceAttributeComputeCapabilityMinor: - *pi = prop->minor; + *pi = prop.minor; break; case hipDeviceAttributePciBusId: - *pi = prop->pciBusID; + *pi = prop.pciBusID; break; case hipDeviceAttributeConcurrentKernels: - *pi = prop->concurrentKernels; + *pi = prop.concurrentKernels; break; case hipDeviceAttributePciDeviceId: - *pi = prop->pciDeviceID; + *pi = prop.pciDeviceID; break; case hipDeviceAttributeMaxSharedMemoryPerMultiprocessor: - *pi = prop->maxSharedMemoryPerMultiProcessor; + *pi = prop.maxSharedMemoryPerMultiProcessor; break; case hipDeviceAttributeIsMultiGpuBoard: - *pi = prop->isMultiGpuBoard; + *pi = prop.isMultiGpuBoard; break; default: return hipErrorInvalidValue; @@ -177,10 +174,10 @@ hipError_t hipGetDeviceProperties(hipDeviceProp_t* props, int device) { return hipErrorInvalidValue; } - auto deviceHandle = as_amd(g_deviceArray[device]); - if (deviceHandle == NULL) { + if (unsigned(device) >= g_context->devices().size()) { return hipErrorInvalidDevice; } + auto* deviceHandle = g_context->devices()[device]; hipDeviceProp_t deviceProps = {0}; @@ -226,11 +223,11 @@ hipError_t hipGetDeviceProperties(hipDeviceProp_t* props, int device) { deviceProps.arch.has3dGrid = 1; deviceProps.arch.hasDynamicParallelism = 0; deviceProps.concurrentKernels = 1; - deviceProps.pciDomainID = info.deviceTopology_.function; - deviceProps.pciBusID = info.deviceTopology_.bus; - deviceProps.pciDeviceID = info.deviceTopology_.device; + deviceProps.pciDomainID = info.deviceTopology_.pcie.function; + deviceProps.pciBusID = info.deviceTopology_.pcie.bus; + deviceProps.pciDeviceID = info.deviceTopology_.pcie.device; deviceProps.maxSharedMemoryPerMultiProcessor = info.localMemSizePerCU_; - deviceProps.isMultiGpuBoard = info.; + //deviceProps.isMultiGpuBoard = info.; deviceProps.canMapHostMemory = 1; deviceProps.gcnArch = info.gfxipVersion_; @@ -254,32 +251,16 @@ hipError_t hipDeviceGetCacheConfig(hipFuncCache_t *cacheConfig) { return hipErrorInvalidValue; } - *cacheConfig = 0; + *cacheConfig = hipFuncCache_t(); return hipSuccess; } -hipError_t hipGetDeviceProperties(hipDeviceProp_t* properties, int device) { - - HIP_INIT_API(properties, device); - if ((properties == NULL) || (device < 0) || (device >= g_deviceCnt)) { - return hipErrorInvalidDevice; - } - - auto * deviceHandle = as_amd(g_deviceArray[device]); - if (deviceHandle != NULL) { - *properties = deviceHandle->_props; - return hipSuccess; - } - - return hipErrorInvalidDevice; -} - hipError_t hipSetDeviceFlags(unsigned int flags) { HIP_INIT_API(flags); - assert(0 && "Unimplemented") + assert(0 && "Unimplemented"); return hipSuccess; }; @@ -288,7 +269,7 @@ hipError_t hipDeviceGetLimit (size_t *pValue, hipLimit_t limit) { HIP_INIT_API(pValue, limit); - assert(0 && "Unimplemented") + assert(0 && "Unimplemented"); return hipSuccess; } @@ -297,7 +278,7 @@ hipError_t hipFuncSetCacheConfig (const void* func, hipFuncCache_t cacheConfig) HIP_INIT_API(cacheConfig); - assert(0 && "Not supported") + assert(0 && "Not supported"); return hipSuccess; } @@ -306,7 +287,7 @@ hipError_t hipDeviceSetSharedMemConfig (hipSharedMemConfig config) { HIP_INIT_API(config); - assert(0 && "Not Supported") + assert(0 && "Not Supported"); return hipSuccess; } @@ -315,17 +296,17 @@ hipError_t hipDeviceGetSharedMemConfig (hipSharedMemConfig *pConfig) { HIP_INIT_API(pConfig); - assert(0 && "Not supported") + assert(0 && "Not supported"); return hipSuccess; } -hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* properties) { +hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* properties) { HIP_INIT_API(device, properties); - - assert(0 && "Unimplemented") + + assert(0 && "Unimplemented"); return hipSuccess; } @@ -335,7 +316,7 @@ hipError_t hipDeviceGetByPCIBusId (int* device, const char* pciBusId) { HIP_INIT_API(device,pciBusId); - assert(0 && "Unimplemented") + assert(0 && "Unimplemented"); return hipSuccess; } @@ -345,7 +326,7 @@ hipError_t hipDeviceTotalMem (size_t *bytes,hipDevice_t device) { HIP_INIT_API(bytes, device); - assert(0 && "Unimplemented") + assert(0 && "Unimplemented"); return hipSuccess; } @@ -353,7 +334,8 @@ hipError_t hipDeviceTotalMem (size_t *bytes,hipDevice_t device) { hipError_t hipDeviceComputeCapability(int *major, int *minor, hipDevice_t device) { HIP_INIT_API(major,minor, device); - assert(0 && "Unimplemented") + + assert(0 && "Unimplemented"); return hipSuccess; } @@ -362,7 +344,7 @@ hipError_t hipDeviceGetName(char *name,int len, hipDevice_t device) { HIP_INIT_API((void*)name,len, device); - assert(0 && "Unimplemented") + assert(0 && "Unimplemented"); return hipSuccess; } @@ -371,7 +353,13 @@ hipError_t hipDeviceGetPCIBusId (char *pciBusId,int len, int device) { HIP_INIT_API((void*)pciBusId, len, device); - assert(0 && "Unimplemented") + assert(0 && "Unimplemented"); return hipSuccess; -} \ No newline at end of file +} + +hipError_t hipDeviceSynchronize(void) +{ + // FIXME: should wait on all streams + return hipSuccess; +} diff --git a/api/hip/hip_error.cpp b/api/hip/hip_error.cpp new file mode 100644 index 0000000000..2a8785c375 --- /dev/null +++ b/api/hip/hip_error.cpp @@ -0,0 +1,48 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "hip_internal.hpp" + +hipError_t hipGetLastError() +{ + HIP_INIT_API(); + return hipErrorUnknown; +} + +hipError_t hipPeekAtLastError() +{ + HIP_INIT_API(); + return hipErrorUnknown; +} + +const char *hipGetErrorName(hipError_t hip_error) +{ + return ""; +} + +const char *hipGetErrorString(hipError_t hip_error) +{ + return ""; +} + diff --git a/api/hip/hip_event.cpp b/api/hip/hip_event.cpp new file mode 100644 index 0000000000..117b28355e --- /dev/null +++ b/api/hip/hip_event.cpp @@ -0,0 +1,67 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "hip_internal.hpp" + +hipError_t hipEventCreateWithFlags(hipEvent_t* event, unsigned flags) +{ + HIP_INIT_API(event, flags); + + return hipErrorUnknown; +} + +hipError_t hipEventCreate(hipEvent_t* event) +{ + HIP_INIT_API(event); + + return hipErrorUnknown; +} + +hipError_t hipEventDestroy(hipEvent_t event) +{ + HIP_INIT_API(event); + + return hipErrorUnknown; +} + +hipError_t hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop) +{ + HIP_INIT_API(ms, start, stop); + + return hipErrorUnknown; +} + +hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) +{ + HIP_INIT_API(event, stream); + + return hipErrorUnknown; +} + +hipError_t hipEventSynchronize(hipEvent_t event) +{ + HIP_INIT_API(event); + + return hipErrorUnknown; +} diff --git a/api/hip/hip_hcc.def.in b/api/hip/hip_hcc.def.in new file mode 100644 index 0000000000..6b4793ea94 --- /dev/null +++ b/api/hip/hip_hcc.def.in @@ -0,0 +1,129 @@ +EXPORTS +hipChooseDevice +hipCtxCreate +hipCtxDestroy +hipCtxDisablePeerAccess +hipCtxEnablePeerAccess +hipCtxGetApiVersion +hipCtxGetCacheConfig +hipCtxGetCurrent +hipCtxGetDevice +hipCtxGetFlags +hipCtxGetSharedMemConfig +hipCtxPopCurrent +hipCtxPushCurrent +hipCtxSetCacheConfig +hipCtxSetCurrent +hipCtxSetSharedMemConfig +hipCtxSynchronize +hipDeviceCanAccessPeer +hipDeviceComputeCapability +hipDeviceDisablePeerAccess +hipDeviceEnablePeerAccess +hipDeviceGet +hipDeviceGetAttribute +hipDeviceGetByPCIBusId +hipDeviceGetCacheConfig +hipDeviceGetLimit +hipDeviceGetName +hipDeviceGetPCIBusId +hipDeviceGetSharedMemConfig +hipDevicePrimaryCtxGetState +hipDevicePrimaryCtxRelease +hipDevicePrimaryCtxReset +hipDevicePrimaryCtxRetain +hipDevicePrimaryCtxSetFlags +hipDeviceReset +hipDeviceSetCacheConfig +hipDeviceSetSharedMemConfig +hipDeviceSynchronize +hipDeviceTotalMem +hipDriverGetVersion +hipEventCreate +hipEventCreateWithFlags +hipEventDestroy +hipEventElapsedTime +hipEventQuery +hipEventRecord +hipEventSynchronize +hipFree +hipFreeArray +hipFreeHost +hipFuncSetCacheConfig +hipGetDevice +hipGetDeviceCount +hipGetDeviceProperties +hipGetErrorName +hipGetErrorString +hipGetLastError +hipHostAlloc +hipHostFree +hipHostGetDevicePointer +hipHostGetFlags +hipHostMalloc +hipHostRegister +hipHostUnregister +hipInit +hipIpcCloseMemHandle +hipIpcGetMemHandle +hipIpcOpenMemHandle +hipMalloc +hipMalloc3DArray +hipMallocArray +hipMallocHost +hipMallocPitch +hipMemcpy +hipMemcpy2D +hipMemcpy2DAsync +hipMemcpy2DToArray +hipMemcpy3D +hipMemcpyAsync +hipMemcpyDtoD +hipMemcpyDtoDAsync +hipMemcpyDtoH +hipMemcpyDtoHAsync +hipMemcpyFromSymbol +hipMemcpyFromSymbolAsync +hipMemcpyHtoD +hipMemcpyHtoDAsync +hipMemcpyPeer +hipMemcpyPeerAsync +hipMemcpyToArray +hipMemcpyToSymbol +hipMemcpyToSymbolAsync +hipMemGetAddressRange +hipMemGetInfo +hipMemPtrGetInfo +hipMemset +hipMemset2D +hipMemsetAsync +hipMemsetD8 +hipModuleGetFunction +hipModuleGetGlobal +hipModuleLaunchKernel +hipModuleLoad +hipModuleLoadData +hipModuleLoadDataEx +hipModuleUnload +hipPeekAtLastError +hipPointerGetAttributes +hipProfilerStart +hipProfilerStop +hipRuntimeGetVersion +hipSetDevice +hipSetDeviceFlags +hipStreamAddCallback +hipStreamCreate +hipStreamCreateWithFlags +hipStreamDestroy +hipStreamGetFlags +hipStreamQuery +hipStreamSynchronize +hipStreamWaitEvent +__cudaRegisterFatBinary +__cudaRegisterFunction +__cudaRegisterVariable +__cudaUnregisterFatBinary +cudaConfigureCall +cudaSetupArgument +cudaLaunch diff --git a/api/hip/hip_hcc.map.in b/api/hip/hip_hcc.map.in new file mode 100644 index 0000000000..e4025606bc --- /dev/null +++ b/api/hip/hip_hcc.map.in @@ -0,0 +1,133 @@ +HIP_1.0 { +global: + hipChooseDevice; + hipCtxCreate; + hipCtxDestroy; + hipCtxDisablePeerAccess; + hipCtxEnablePeerAccess; + hipCtxGetApiVersion; + hipCtxGetCacheConfig; + hipCtxGetCurrent; + hipCtxGetDevice; + hipCtxGetFlags; + hipCtxGetSharedMemConfig; + hipCtxPopCurrent; + hipCtxPushCurrent; + hipCtxSetCacheConfig; + hipCtxSetCurrent; + hipCtxSetSharedMemConfig; + hipCtxSynchronize; + hipDeviceCanAccessPeer; + hipDeviceComputeCapability; + hipDeviceDisablePeerAccess; + hipDeviceEnablePeerAccess; + hipDeviceGet; + hipDeviceGetAttribute; + hipDeviceGetByPCIBusId; + hipDeviceGetCacheConfig; + hipDeviceGetLimit; + hipDeviceGetName; + hipDeviceGetPCIBusId; + hipDeviceGetSharedMemConfig; + hipDevicePrimaryCtxGetState; + hipDevicePrimaryCtxRelease; + hipDevicePrimaryCtxReset; + hipDevicePrimaryCtxRetain; + hipDevicePrimaryCtxSetFlags; + hipDeviceReset; + hipDeviceSetCacheConfig; + hipDeviceSetSharedMemConfig; + hipDeviceSynchronize; + hipDeviceTotalMem; + hipDriverGetVersion; + hipEventCreate; + hipEventCreateWithFlags; + hipEventDestroy; + hipEventElapsedTime; + hipEventQuery; + hipEventRecord; + hipEventSynchronize; + hipFree; + hipFreeArray; + hipFreeHost; + hipFuncSetCacheConfig; + hipGetDevice; + hipGetDeviceCount; + hipGetDeviceProperties; + hipGetErrorName; + hipGetErrorString; + hipGetLastError; + hipHostAlloc; + hipHostFree; + hipHostGetDevicePointer; + hipHostGetFlags; + hipHostMalloc; + hipHostRegister; + hipHostUnregister; + hipInit; + hipIpcCloseMemHandle; + hipIpcGetMemHandle; + hipIpcOpenMemHandle; + hipMalloc; + hipMalloc3DArray; + hipMallocArray; + hipMallocHost; + hipMallocPitch; + hipMemcpy; + hipMemcpy2D; + hipMemcpy2DAsync; + hipMemcpy2DToArray; + hipMemcpy3D; + hipMemcpyAsync; + hipMemcpyDtoD; + hipMemcpyDtoDAsync; + hipMemcpyDtoH; + hipMemcpyDtoHAsync; + hipMemcpyFromSymbol; + hipMemcpyFromSymbolAsync; + hipMemcpyHtoD; + hipMemcpyHtoDAsync; + hipMemcpyPeer; + hipMemcpyPeerAsync; + hipMemcpyToArray; + hipMemcpyToSymbol; + hipMemcpyToSymbolAsync; + hipMemGetAddressRange; + hipMemGetInfo; + hipMemPtrGetInfo; + hipMemset; + hipMemset2D; + hipMemsetAsync; + hipMemsetD8; + hipModuleGetFunction; + hipModuleGetGlobal; + hipModuleLaunchKernel; + hipModuleLoad; + hipModuleLoadData; + hipModuleLoadDataEx; + hipModuleUnload; + hipPeekAtLastError; + hipPointerGetAttributes; + hipProfilerStart; + hipProfilerStop; + hipRuntimeGetVersion; + hipSetDevice; + hipSetDeviceFlags; + hipStreamAddCallback; + hipStreamCreate; + hipStreamCreateWithFlags; + hipStreamDestroy; + hipStreamGetFlags; + hipStreamQuery; + hipStreamSynchronize; + hipStreamWaitEvent; + __cudaRegisterFatBinary; + __cudaRegisterFunction; + __cudaRegisterVariable; + __cudaUnregisterFatBinary; + cudaConfigureCall; + cudaSetupArgument; + cudaLaunch; +local: + *; +}; diff --git a/api/hip/hip_hcc.rc b/api/hip/hip_hcc.rc new file mode 100644 index 0000000000..009dc30c18 --- /dev/null +++ b/api/hip/hip_hcc.rc @@ -0,0 +1,75 @@ +#define STR(__macro__) #__macro__ +#define XSTR(__macro__) STR(__macro__) + +#if defined(_DEBUG) +#define DEBUG_ONLY(x) x +#else +#define DEBUG_ONLY(x) +#endif + +#define VERSION_PREFIX_MAJOR 2 +#define VERSION_PREFIX_MINOR 0 + + +#define APSTUDIO_READONLY_SYMBOLS +///////////////////////////////////////////////////////////////////////////// +// +// Generated from the TEXTINCLUDE 2 resource. +// +#include "winresrc.h" +#include "utils/versions.hpp" + +///////////////////////////////////////////////////////////////////////////// +#undef APSTUDIO_READONLY_SYMBOLS + +///////////////////////////////////////////////////////////////////////////// +// English (U.S.) resources + +#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU) +#ifdef _WIN32 +LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US +#pragma code_page(1252) +#endif //_WIN32 + + +///////////////////////////////////////////////////////////////////////////// +// +// Version +// + +VS_VERSION_INFO VERSIONINFO + FILEVERSION 10,0,AMD_PLATFORM_BUILD_NUMBER,AMD_PLATFORM_REVISION_NUMBER + PRODUCTVERSION 10,0,AMD_PLATFORM_BUILD_NUMBER,AMD_PLATFORM_REVISION_NUMBER + FILEFLAGSMASK 0x3fL +#ifdef _DEBUG + FILEFLAGS 0x1L +#else + FILEFLAGS 0x0L +#endif + FILEOS 0x40004L + FILETYPE 0x2L + FILESUBTYPE 0x0L +BEGIN + BLOCK "StringFileInfo" + BEGIN + BLOCK "040904b0" + BEGIN + VALUE "Comments", " \0" + VALUE "CompanyName", "Advanced Micro Devices Inc.\0" + VALUE "FileDescription", AMD_PLATFORM_NAME " OpenCL " XSTR(VERSION_PREFIX_MAJOR) "." XSTR(VERSION_PREFIX_MINOR) " Runtime\0" + VALUE "FileVersion", "10.0." XSTR(AMD_PLATFORM_BUILD_NUMBER) "." XSTR(AMD_PLATFORM_REVISION_NUMBER) + VALUE "InternalName", "OpenCL" + VALUE "LegalCopyright", "Copyright (C) 2011 Advanced Micro Devices Inc.\0" + VALUE "OriginalFilename", "OpenCL.dll" + VALUE "ProductName", "OpenCL " XSTR(VERSION_PREFIX_MAJOR) "." XSTR(VERSION_PREFIX_MINOR) " " AMD_PLATFORM_INFO "\0" + VALUE "ProductVersion", "10.0." XSTR(AMD_PLATFORM_BUILD_NUMBER) "." XSTR(AMD_PLATFORM_REVISION_NUMBER) + END + END + BLOCK "VarFileInfo" + BEGIN + VALUE "Translation", 0x409, 1200 + END +END + +#endif // English (U.S.) resources +///////////////////////////////////////////////////////////////////////////// diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index 7e7b27c143..b1d906d870 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -36,8 +36,6 @@ THE SOFTWARE. #define HIP_INIT_API(...) \ HIP_INIT() -extern cl_device_id* g_deviceArray; -extern unsigned g_deviceCnt; -extern thread_local cl_context g_currentCtx; +extern amd::Context* g_context; #endif // HIP_SRC_HIP_INTERNAL_H diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 6edbe068fe..2dba003ba6 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -26,97 +26,94 @@ THE SOFTWARE. hipError_t hipMalloc(void** ptr, size_t sizeBytes) { - HIP_INIT_API(ptr, sizeBytes); - - amd::Context* context = as_amd(g_currentCtx); - - if (sizeBytes == 0) { - *ptr = nullptr; - return hipSuccess; - } - else if (!is_valid(context) || !ptr) { - return hipErrorInvalidValue; - } - - auto deviceHandle = as_amd(g_deviceArray[0]); - if ((deviceHandle->info().maxMemAllocSize_ < size)) { - return hipErrorOutOfMemory; - } - - amd::Memory* mem = new (*context) amd::Buffer(*context, 0, sizeBytes); - if (!mem) { - return hipErrorOutOfMemory; - } - - if (!mem->create(nullptr)) { - return hipErrorMemoryAllocation; - } - - *ptr = reinterpret_cast(as_cl(mem)); + HIP_INIT_API(ptr, sizeBytes); + if (sizeBytes == 0) { + *ptr = nullptr; return hipSuccess; + } + else if (!ptr) { + return hipErrorInvalidValue; + } + + if (g_context->devices()[0]->info().maxMemAllocSize_ < sizeBytes) { + return hipErrorOutOfMemory; + } + + amd::Memory* mem = new (*g_context) amd::Buffer(*g_context, 0, sizeBytes); + if (!mem) { + return hipErrorOutOfMemory; + } + + if (!mem->create(nullptr)) { + return hipErrorMemoryAllocation; + } + + *ptr = reinterpret_cast(as_cl(mem)); + + return hipSuccess; } hipError_t hipFree(void* ptr) { - if (!is_valid(reinterpret_cast(ptr))) { - return hipErrorInvalidValue; - } - as_amd(reinterpret_cast(ptr))->release(); - return hipSuccess; + if (!is_valid(reinterpret_cast(ptr))) { + return hipErrorInvalidValue; + } + as_amd(reinterpret_cast(ptr))->release(); + return hipSuccess; } hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind) { - HIP_INIT_API(dst, src, sizeBytes, kind); + HIP_INIT_API(dst, src, sizeBytes, kind); - amd::Context* context = as_amd(g_currentCtx); - amd::Device* device = context->devices()[0]; + amd::Device* device = g_context->devices()[0]; - // FIXME : Do we create a queue here or create at init and just reuse - amd::HostQueue* queue = new amd::HostQueue(*context, *device, 0, - amd::CommandQueue::RealTimeDisabled, - amd::CommandQueue::Priority::Normal); - if (!queue) { - return hipErrorOutOfMemory; - } + amd::HostQueue* queue = new amd::HostQueue(*g_context, *device, 0, + amd::CommandQueue::RealTimeDisabled, + amd::CommandQueue::Priority::Normal); + if (!queue) { + return hipErrorOutOfMemory; + } - amd::Buffer* srcBuffer = as_amd(reinterpret_cast(const_cast(src)))->asBuffer(); - amd::Buffer* dstBuffer = as_amd(reinterpret_cast(const_cast(dst)))->asBuffer(); + amd::Buffer* srcBuffer = as_amd(reinterpret_cast(const_cast(src)))->asBuffer(); + amd::Buffer* dstBuffer = as_amd(reinterpret_cast(dst))->asBuffer(); - amd::Command* command; - amd::Command::EventWaitList waitList; + amd::Command* command; + amd::Command::EventWaitList waitList; - switch (kind) { - case hipMemcpyDeviceToHost: + switch (kind) { + case hipMemcpyDeviceToHost: command = new amd::ReadMemoryCommand(*queue, CL_COMMAND_READ_BUFFER, waitList, - srcBuffer, 0, sizeBytes, dst); + *srcBuffer, 0, sizeBytes, dst); break; - case hipMemcpyHostToDevice: + case hipMemcpyHostToDevice: command = new amd::WriteMemoryCommand(*queue, CL_COMMAND_WRITE_BUFFER, waitList, - dstBuffer, 0, sizeBytes, src); + *dstBuffer, 0, sizeBytes, src); break; - default: - assert(!"Shouldn't reach here"); + default: + assert(!"Shouldn't reach here"); break; - } - if (!command) { - return hipErrorOutOfMemory; - } + } + if (!command) { + return hipErrorOutOfMemory; + } - // Make sure we have memory for the command execution - if (CL_SUCCESS != command->validateMemory()) { - delete command; - return hipErrorMemoryAllocation; - } +// FIXME: virtualize MemoryCommand::validateMemory() +#if 0 + // Make sure we have memory for the command execution + if (CL_SUCCESS != command->validateMemory()) { + delete command; + return hipErrorMemoryAllocation; + } +#endif + command->enqueue(); + command->awaitCompletion(); + command->release(); - command->enqueue(); - command->awaitCompletion(); - command->release(); + queue->release(); - queue->release(); - - return hipSuccess; + return hipSuccess; } diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp new file mode 100644 index 0000000000..dde0c4e790 --- /dev/null +++ b/api/hip/hip_module.cpp @@ -0,0 +1,144 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include + +#include "hip_internal.hpp" +#include "platform/program.hpp" + +static uint64_t ElfSize(const void *emi) +{ + const Elf64_Ehdr *ehdr = (const Elf64_Ehdr*)emi; + const Elf64_Shdr *shdr = (const Elf64_Shdr*)((char*)emi + ehdr->e_shoff); + + uint64_t max_offset = ehdr->e_shoff; + uint64_t total_size = max_offset + ehdr->e_shentsize * ehdr->e_shnum; + + for (uint16_t i=0; i < ehdr->e_shnum; ++i){ + uint64_t cur_offset = static_cast(shdr[i].sh_offset); + if (max_offset < cur_offset) { + max_offset = cur_offset; + total_size = max_offset; + if(SHT_NOBITS != shdr[i].sh_type) { + total_size += static_cast(shdr[i].sh_size); + } + } + } + return total_size; +} + +hipError_t hipModuleLoadData(hipModule_t *module, const void *image) +{ + HIP_INIT_API(module, image); + + amd::Program* program = new amd::Program(*g_context); + if (program == NULL) { + return hipErrorOutOfMemory; + } + + if (CL_SUCCESS != program->addDeviceProgram(*g_context->devices()[0], image, ElfSize(image)) || + CL_SUCCESS != program->build(g_context->devices(), nullptr, nullptr, nullptr)) { + return hipErrorUnknown; + } + + *module = reinterpret_cast(as_cl(program)); + + return hipSuccess; +} + +hipError_t hipModuleGetFunction(hipFunction_t *hfunc, hipModule_t hmod, const char *name) +{ + HIP_INIT_API(hfunc, hmod, name); + + amd::Program* program = as_amd(reinterpret_cast(hmod)); + + const amd::Symbol* symbol = program->findSymbol(name); + if (!symbol) { + return hipErrorNotFound; + } + + amd::Kernel* kernel = new amd::Kernel(*program, *symbol, name); + if (!kernel) { + return hipErrorOutOfMemory; + } + + *hfunc = reinterpret_cast(as_cl(kernel)); + + return hipSuccess; +} + +hipError_t hipModuleLaunchKernel(hipFunction_t f, + uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ, + uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ, + uint32_t sharedMemBytes, hipStream_t hStream, + void **kernelParams, void **extra) +{ + HIP_INIT_API(f, gridDimX, gridDimY, gridDimZ, + blockDimX, blockDimY, blockDimZ, + sharedMemBytes, hStream, + kernelParams, extra); + + amd::Kernel* kernel = as_amd(reinterpret_cast(f)); + amd::Device* device = g_context->devices()[0]; + + amd::HostQueue* queue = new amd::HostQueue(*g_context, *device, 0, + amd::CommandQueue::RealTimeDisabled, + amd::CommandQueue::Priority::Normal); + if (!queue) { + return hipErrorOutOfMemory; + } + + size_t globalWorkOffset[3] = {0}; + size_t globalWorkSize[3] = { gridDimX, gridDimY, gridDimZ }; + size_t localWorkSize[3] = { blockDimX, blockDimY, blockDimZ }; + amd::NDRangeContainer ndrange(3, globalWorkOffset, globalWorkSize, localWorkSize); + amd::Command::EventWaitList waitList; + + assert(!kernelParams && extra && "check this code"); + const amd::KernelSignature& signature = kernel->signature(); + for (size_t i = 0; i < signature.numParameters(); ++i) { + const amd::KernelParameterDescriptor& desc = signature.at(i); + kernel->parameters().set(i, desc.size_, reinterpret_cast
(extra[1]) + desc.offset_); + } + + amd::NDRangeKernelCommand* command = new amd::NDRangeKernelCommand(*queue, waitList, *kernel, ndrange); + if (!command) { + return hipErrorOutOfMemory; + } + + // Make sure we have memory for the command execution + if (CL_SUCCESS != command->validateMemory()) { + delete command; + return hipErrorMemoryAllocation; + } + + command->enqueue(); + command->awaitCompletion(); + command->release(); + + queue->release(); + + return hipSuccess; +} + + diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp new file mode 100644 index 0000000000..aed3342483 --- /dev/null +++ b/api/hip/hip_platform.cpp @@ -0,0 +1,193 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "hip_internal.hpp" +#include "platform/program.hpp" +#include "platform/runtime.hpp" + +constexpr unsigned __cudaFatMAGIC = 0x1ee55a01; +constexpr unsigned __cudaFatMAGIC2 = 0x466243b1; +constexpr unsigned __cudaFatMAGIC3 = 0xba55ed50; + +struct __CudaFatBinaryWrapper { + unsigned int magic; + unsigned int version; + void* binary; + void* dummy1; +}; + +struct __CudaFatBinaryHeader { + unsigned int magic; + unsigned short version; + unsigned short headerSize; + unsigned long long int fatSize; +}; + +struct __CudaPartHeader{ + unsigned short type; + unsigned short dummy1; + unsigned int headerSize; + unsigned long long int partSize; + unsigned long long int dummy2; + unsigned int dummy3; + unsigned int subarch; +}; + +extern "C" hipModule_t __cudaRegisterFatBinary(void* bundle) +{ + if (!amd::Runtime::initialized()) { // FIXME: fix initialization + hipInit(0); + } + + amd::Program* program = new amd::Program(*g_context); + if (!program) return nullptr; + + struct __CudaFatBinaryWrapper* fbwrapper = (struct __CudaFatBinaryWrapper*)bundle; + if (fbwrapper->magic != __cudaFatMAGIC2 || fbwrapper->version != 1) { + return nullptr; + } + struct __CudaFatBinaryHeader* fbheader = (struct __CudaFatBinaryHeader*)fbwrapper->binary; + if (fbheader->magic != __cudaFatMAGIC3 || fbheader->version != 1) { + return nullptr; + } + struct __CudaPartHeader* pheader = (struct __CudaPartHeader*)( + (uintptr_t)fbheader + fbheader->headerSize); + struct __CudaPartHeader* end = (struct __CudaPartHeader*)( + (uintptr_t)pheader + fbheader->fatSize); + + while (pheader < end) { + if (true/*pheader->subarch == match a device in the context*/) { + void *image = (void*)((uintptr_t)pheader + pheader->headerSize); + size_t size = pheader->partSize; + if (CL_SUCCESS != program->addDeviceProgram(*g_context->devices()[0], image, size) || + CL_SUCCESS != program->build(g_context->devices(), nullptr, nullptr, nullptr)) { + return nullptr; + } + break; + } + pheader = (struct __CudaPartHeader*)( + (uintptr_t)pheader + pheader->headerSize + pheader->partSize); + } + + return reinterpret_cast(as_cl(program)); +} + +std::map g_functions; + + +extern "C" void __cudaRegisterFunction( + hipModule_t module, + const void* hostFunction, + char* deviceFunction, + const char* deviceName, + unsigned int threadLimit, + uint3* tid, + uint3* bid, + dim3* blockDim, + dim3* gridDim, + int* wSize) +{ + amd::Program* program = as_amd(reinterpret_cast(module)); + + const amd::Symbol* symbol = program->findSymbol(deviceName); + if (!symbol) return; + + amd::Kernel* kernel = new amd::Kernel(*program, *symbol, deviceName); + if (!kernel) return; + + // FIXME: not thread safe + g_functions.insert(std::make_pair(hostFunction, reinterpret_cast(as_cl(kernel)))); +} + +extern "C" void __cudaRegisterVar( + hipModule_t module, + char* hostVar, + char* deviceVar, + const char* deviceName, + int ext, + int size, + int constant, + int global) +{ +} + +extern "C" void __cudaUnregisterFatBinary( + hipModule_t module +) +{ +} + +dim3 g_gridDim; // FIXME: place in execution stack +dim3 g_blockDim; // FIXME: place in execution stack +size_t g_sharedMem; // FIXME: place in execution stack +hipStream_t g_stream; // FIXME: place in execution stack + +extern "C" hipError_t cudaConfigureCall( + dim3 gridDim, + dim3 blockDim, + size_t sharedMem, + hipStream_t stream) +{ + // FIXME: should push and new entry on the execution stack + + g_gridDim = gridDim; + g_blockDim = blockDim; + g_sharedMem = sharedMem; + g_stream = stream; + + return hipSuccess; +} + +char* g_arguments[1024]; // FIXME: needs to grow + +extern "C" hipError_t cudaSetupArgument( + const void *arg, + size_t size, + size_t offset) +{ + // FIXME: should modify the top of the execution stack + + ::memcpy(g_arguments + offset, arg, size); + return hipSuccess; +} + +extern "C" hipError_t cudaLaunch(const void *hostFunction) +{ + std::map::iterator it; + if ((it = g_functions.find(hostFunction)) == g_functions.end()) + return hipErrorUnknown; + + // FIXME: should pop an entry from the execution stack + + void *extra[] = { + HIP_LAUNCH_PARAM_BUFFER_POINTER, g_arguments, + HIP_LAUNCH_PARAM_BUFFER_SIZE, 0 /* FIXME: not needed, but should be correct*/, + HIP_LAUNCH_PARAM_END + }; + + return hipModuleLaunchKernel(it->second, + g_gridDim.x, g_gridDim.y, g_gridDim.z, + g_blockDim.x, g_blockDim.y, g_blockDim.z, + g_sharedMem, g_stream, nullptr, extra); +} From d8a344113f444c595fb543c9f9ab9d75e43f0513 Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 2 Mar 2018 17:55:48 -0500 Subject: [PATCH 005/282] P4 to Git Change 1522211 by lmoriche@lmoriche_opencl_dev2 on 2018/03/02 17:41:47 SWDEV-145570 - [HIP] - Hip Rearchitecture - Rename cuda* launch functions -> hip* - Add more function prototypes to compile the HIP tests Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_context.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device.cpp#3 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.def.in#2 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.map.in#2 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#4 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_stream.cpp#1 add --- api/hip/hip_context.cpp | 13 +++++++ api/hip/hip_device.cpp | 19 ++++++++++ api/hip/hip_hcc.def.in | 14 ++++---- api/hip/hip_hcc.map.in | 14 ++++---- api/hip/hip_memory.cpp | 56 ++++++++++++++++++++++++++--- api/hip/hip_module.cpp | 19 ++++++++++ api/hip/hip_platform.cpp | 14 ++++---- api/hip/hip_stream.cpp | 76 ++++++++++++++++++++++++++++++++++++++++ 8 files changed, 199 insertions(+), 26 deletions(-) create mode 100644 api/hip/hip_stream.cpp diff --git a/api/hip/hip_context.cpp b/api/hip/hip_context.cpp index 0e6ff2116a..78e65e99c5 100644 --- a/api/hip/hip_context.cpp +++ b/api/hip/hip_context.cpp @@ -24,6 +24,7 @@ THE SOFTWARE. #include "hip_internal.hpp" #include "platform/runtime.hpp" +#include "utils/versions.hpp" amd::Context* g_context = nullptr; @@ -56,3 +57,15 @@ hipError_t hipCtxCreate(hipCtx_t *ctx, unsigned int flags, hipDevice_t device) return hipSuccess; } +hipError_t hipRuntimeGetVersion(int *runtimeVersion) +{ + HIP_INIT_API(runtimeVersion); + + if (!runtimeVersion) { + return hipErrorInvalidValue; + } + + *runtimeVersion = AMD_PLATFORM_BUILD_NUMBER; + + return hipSuccess; +} diff --git a/api/hip/hip_device.cpp b/api/hip/hip_device.cpp index 7296eabb17..b5da0c34b5 100644 --- a/api/hip/hip_device.cpp +++ b/api/hip/hip_device.cpp @@ -358,6 +358,25 @@ hipError_t hipDeviceGetPCIBusId (char *pciBusId,int len, int device) { return hipSuccess; } +hipError_t hipSetDevice(int deviceId) +{ + HIP_INIT_API(deviceId); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipDeviceReset(void) +{ + HIP_INIT_API(); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + + hipError_t hipDeviceSynchronize(void) { // FIXME: should wait on all streams diff --git a/api/hip/hip_hcc.def.in b/api/hip/hip_hcc.def.in index 6b4793ea94..10113dc2bd 100644 --- a/api/hip/hip_hcc.def.in +++ b/api/hip/hip_hcc.def.in @@ -120,10 +120,10 @@ hipStreamGetFlags hipStreamQuery hipStreamSynchronize hipStreamWaitEvent -__cudaRegisterFatBinary -__cudaRegisterFunction -__cudaRegisterVariable -__cudaUnregisterFatBinary -cudaConfigureCall -cudaSetupArgument -cudaLaunch +__hipRegisterFatBinary +__hipRegisterFunction +__hipRegisterVariable +__hipUnregisterFatBinary +hipConfigureCall +hipSetupArgument +hipLaunchByPtr diff --git a/api/hip/hip_hcc.map.in b/api/hip/hip_hcc.map.in index e4025606bc..a4153ee56f 100644 --- a/api/hip/hip_hcc.map.in +++ b/api/hip/hip_hcc.map.in @@ -121,13 +121,13 @@ global: hipStreamQuery; hipStreamSynchronize; hipStreamWaitEvent; - __cudaRegisterFatBinary; - __cudaRegisterFunction; - __cudaRegisterVariable; - __cudaUnregisterFatBinary; - cudaConfigureCall; - cudaSetupArgument; - cudaLaunch; + __hipRegisterFatBinary; + __hipRegisterFunction; + __hipRegisterVariable; + __hipUnregisterFatBinary; + hipConfigureCall; + hipSetupArgument; + hipLaunchByPtr; local: *; }; diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 2dba003ba6..0911f61e4c 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -54,6 +54,15 @@ hipError_t hipMalloc(void** ptr, size_t sizeBytes) return hipSuccess; } +hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) +{ + HIP_INIT_API(ptr, sizeBytes, flags); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + hipError_t hipFree(void* ptr) { if (!is_valid(reinterpret_cast(ptr))) { @@ -63,6 +72,20 @@ hipError_t hipFree(void* ptr) return hipSuccess; } +hipError_t hipMemcpyAsync(void* dst, + const void* src, + size_t sizeBytes, + hipMemcpyKind kind, + hipStream_t stream) +{ + HIP_INIT_API(dst, src, sizeBytes, kind, stream); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + + hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind) { HIP_INIT_API(dst, src, sizeBytes, kind); @@ -76,20 +99,17 @@ hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind return hipErrorOutOfMemory; } - amd::Buffer* srcBuffer = as_amd(reinterpret_cast(const_cast(src)))->asBuffer(); - amd::Buffer* dstBuffer = as_amd(reinterpret_cast(dst))->asBuffer(); - amd::Command* command; amd::Command::EventWaitList waitList; switch (kind) { case hipMemcpyDeviceToHost: command = new amd::ReadMemoryCommand(*queue, CL_COMMAND_READ_BUFFER, waitList, - *srcBuffer, 0, sizeBytes, dst); + *as_amd(reinterpret_cast(const_cast(src)))->asBuffer(), 0, sizeBytes, dst); break; case hipMemcpyHostToDevice: command = new amd::WriteMemoryCommand(*queue, CL_COMMAND_WRITE_BUFFER, waitList, - *dstBuffer, 0, sizeBytes, src); + *as_amd(reinterpret_cast(dst))->asBuffer(), 0, sizeBytes, src); break; default: assert(!"Shouldn't reach here"); @@ -117,3 +137,29 @@ hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind return hipSuccess; } +hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t stream ) +{ + HIP_INIT_API(dst, value, sizeBytes, stream); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipMemset(void* dst, int value, size_t sizeBytes) +{ + HIP_INIT_API(dst, value, sizeBytes); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipMemPtrGetInfo(void *ptr, size_t *size) +{ + HIP_INIT_API(ptr, size); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index dde0c4e790..fd7729c6e5 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -47,6 +47,25 @@ static uint64_t ElfSize(const void *emi) return total_size; } +hipError_t hipModuleLoad(hipModule_t *module, const char *fname) +{ + HIP_INIT_API(module, fname); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + + +hipError_t hipModuleUnload(hipModule_t hmod) +{ + HIP_INIT_API(hmod); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + hipError_t hipModuleLoadData(hipModule_t *module, const void *image) { HIP_INIT_API(module, image); diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index aed3342483..0cc6a3b1c2 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -54,7 +54,7 @@ struct __CudaPartHeader{ unsigned int subarch; }; -extern "C" hipModule_t __cudaRegisterFatBinary(void* bundle) +extern "C" hipModule_t __hipRegisterFatBinary(void* bundle) { if (!amd::Runtime::initialized()) { // FIXME: fix initialization hipInit(0); @@ -96,7 +96,7 @@ extern "C" hipModule_t __cudaRegisterFatBinary(void* bundle) std::map g_functions; -extern "C" void __cudaRegisterFunction( +extern "C" void __hipRegisterFunction( hipModule_t module, const void* hostFunction, char* deviceFunction, @@ -120,7 +120,7 @@ extern "C" void __cudaRegisterFunction( g_functions.insert(std::make_pair(hostFunction, reinterpret_cast(as_cl(kernel)))); } -extern "C" void __cudaRegisterVar( +extern "C" void __hipRegisterVar( hipModule_t module, char* hostVar, char* deviceVar, @@ -132,7 +132,7 @@ extern "C" void __cudaRegisterVar( { } -extern "C" void __cudaUnregisterFatBinary( +extern "C" void __hipUnregisterFatBinary( hipModule_t module ) { @@ -143,7 +143,7 @@ dim3 g_blockDim; // FIXME: place in execution stack size_t g_sharedMem; // FIXME: place in execution stack hipStream_t g_stream; // FIXME: place in execution stack -extern "C" hipError_t cudaConfigureCall( +extern "C" hipError_t hipConfigureCall( dim3 gridDim, dim3 blockDim, size_t sharedMem, @@ -161,7 +161,7 @@ extern "C" hipError_t cudaConfigureCall( char* g_arguments[1024]; // FIXME: needs to grow -extern "C" hipError_t cudaSetupArgument( +extern "C" hipError_t hipSetupArgument( const void *arg, size_t size, size_t offset) @@ -172,7 +172,7 @@ extern "C" hipError_t cudaSetupArgument( return hipSuccess; } -extern "C" hipError_t cudaLaunch(const void *hostFunction) +extern "C" hipError_t hipLaunchByPtr(const void *hostFunction) { std::map::iterator it; if ((it = g_functions.find(hostFunction)) == g_functions.end()) diff --git a/api/hip/hip_stream.cpp b/api/hip/hip_stream.cpp new file mode 100644 index 0000000000..efecb5174d --- /dev/null +++ b/api/hip/hip_stream.cpp @@ -0,0 +1,76 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "hip_internal.hpp" + +hipError_t hipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags) +{ + HIP_INIT_API(stream, flags); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + + +hipError_t hipStreamCreate(hipStream_t *stream) +{ + HIP_INIT_API(stream); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + + +hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags) +{ + HIP_INIT_API(stream, flags); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + + +hipError_t hipStreamSynchronize(hipStream_t stream) +{ + HIP_INIT_API(stream); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + + +hipError_t hipStreamDestroy(hipStream_t stream) +{ + HIP_INIT_API(stream); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + + From cd4f8ccac976c9e3a108466ca1aa5df57d5966cb Mon Sep 17 00:00:00 2001 From: foreman Date: Sat, 3 Mar 2018 21:02:59 -0500 Subject: [PATCH 006/282] P4 to Git Change 1522302 by skudchad@skudchad_test2_win_opencl on 2018/03/03 20:41:25 SWDEV-145570 - [HIP] - Hip Rearchitecture - Implemented most of device* functions ReviewBoardURL = http://ocltc.amd.com/reviews/r/14340/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device.cpp#4 edit --- api/hip/hip_device.cpp | 223 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 201 insertions(+), 22 deletions(-) diff --git a/api/hip/hip_device.cpp b/api/hip/hip_device.cpp index b5da0c34b5..c719f30122 100644 --- a/api/hip/hip_device.cpp +++ b/api/hip/hip_device.cpp @@ -28,7 +28,7 @@ hipError_t hipGetDevice(int *deviceId) { HIP_INIT_API(deviceId); - if (deviceId != NULL) { + if (deviceId != nullptr) { // this needs to return default device. For now return 0 always *deviceId = 0; } else { @@ -55,7 +55,7 @@ hipError_t hipGetDeviceCount(int* count) { HIP_INIT_API(count); - if (count == NULL) { + if (count == nullptr) { return hipErrorInvalidValue; } @@ -170,7 +170,7 @@ hipError_t hipGetDeviceProperties(hipDeviceProp_t* props, int device) { HIP_INIT_API(props, device); - if (props == NULL) { + if (props == nullptr) { return hipErrorInvalidValue; } @@ -247,7 +247,7 @@ hipError_t hipDeviceSetCacheConfig(hipFuncCache_t cacheConfig) { hipError_t hipDeviceGetCacheConfig(hipFuncCache_t *cacheConfig) { HIP_INIT_API(cacheConfig); - if(cacheConfig == NULL) { + if(cacheConfig == nullptr) { return hipErrorInvalidValue; } @@ -269,16 +269,26 @@ hipError_t hipDeviceGetLimit (size_t *pValue, hipLimit_t limit) { HIP_INIT_API(pValue, limit); - assert(0 && "Unimplemented"); + auto* deviceHandle = g_context->devices()[0]; + const auto& info = deviceHandle->info(); + + if(pValue == nullptr) { + return hipErrorInvalidValue; + } + if(limit == hipLimitMallocHeapSize) { + *pValue = info.globalMemSize_; + return hipSuccess; + } else { + return hipErrorUnsupportedLimit; + } - return hipSuccess; } hipError_t hipFuncSetCacheConfig (const void* func, hipFuncCache_t cacheConfig) { HIP_INIT_API(cacheConfig); - assert(0 && "Not supported"); + // No way to set cache config yet. return hipSuccess; } @@ -287,7 +297,7 @@ hipError_t hipDeviceSetSharedMemConfig (hipSharedMemConfig config) { HIP_INIT_API(config); - assert(0 && "Not Supported"); + // No way to set cache config yet. return hipSuccess; } @@ -296,7 +306,7 @@ hipError_t hipDeviceGetSharedMemConfig (hipSharedMemConfig *pConfig) { HIP_INIT_API(pConfig); - assert(0 && "Not supported"); + *pConfig = hipSharedMemBankSizeFourByte; return hipSuccess; } @@ -306,54 +316,223 @@ hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* properties) { HIP_INIT_API(device, properties); - assert(0 && "Unimplemented"); + if (device == nullptr || properties == nullptr) { + return hipErrorInvalidValue; + } + + *device = 0; + cl_uint maxMatchedCount = 0; + + for (cl_uint i = 0; i< g_context->devices().size(); ++i) { + hipDeviceProp_t currentProp = {0}; + cl_uint validPropCount = 0; + cl_uint matchedCount = 0; + hipError_t err = hipGetDeviceProperties(¤tProp, i); + if (properties->major != 0) { + validPropCount++; + if(currentProp.major >= properties->major) { + matchedCount++; + } + } + if (properties->minor != 0) { + validPropCount++; + if(currentProp.minor >= properties->minor) { + matchedCount++; + } + } + if(properties->totalGlobalMem != 0) { + validPropCount++; + if(currentProp.totalGlobalMem >= properties->totalGlobalMem) { + matchedCount++; + } + } + if(properties->sharedMemPerBlock != 0) { + validPropCount++; + if(currentProp.sharedMemPerBlock >= properties->sharedMemPerBlock) { + matchedCount++; + } + } + if(properties->maxThreadsPerBlock != 0) { + validPropCount++; + if(currentProp.maxThreadsPerBlock >= properties->maxThreadsPerBlock ) { + matchedCount++; + } + } + if(properties->totalConstMem != 0) { + validPropCount++; + if(currentProp.totalConstMem >= properties->totalConstMem ) { + matchedCount++; + } + } + if(properties->multiProcessorCount != 0) { + validPropCount++; + if(currentProp.multiProcessorCount >= + properties->multiProcessorCount ) { + matchedCount++; + } + } + if(properties->maxThreadsPerMultiProcessor != 0) { + validPropCount++; + if(currentProp.maxThreadsPerMultiProcessor >= + properties->maxThreadsPerMultiProcessor ) { + matchedCount++; + } + } + if(properties->memoryClockRate != 0) { + validPropCount++; + if(currentProp.memoryClockRate >= properties->memoryClockRate ) { + matchedCount++; + } + } + if(properties->memoryBusWidth != 0) { + validPropCount++; + if(currentProp.memoryBusWidth >= properties->memoryBusWidth ) { + matchedCount++; + } + } + if(properties->l2CacheSize != 0) { + validPropCount++; + if(currentProp.l2CacheSize >= properties->l2CacheSize ) { + matchedCount++; + } + } + if(properties->regsPerBlock != 0) { + validPropCount++; + if(currentProp.regsPerBlock >= properties->regsPerBlock ) { + matchedCount++; + } + } + if(properties->maxSharedMemoryPerMultiProcessor != 0) { + validPropCount++; + if(currentProp.maxSharedMemoryPerMultiProcessor >= + properties->maxSharedMemoryPerMultiProcessor ) { + matchedCount++; + } + } + if(properties->warpSize != 0) { + validPropCount++; + if(currentProp.warpSize >= properties->warpSize ) { + matchedCount++; + } + } + if(validPropCount == matchedCount) { + *device = matchedCount > maxMatchedCount ? i : *device; + maxMatchedCount = std::max(matchedCount, maxMatchedCount); + } + } return hipSuccess; } -hipError_t hipDeviceGetByPCIBusId (int* device, const char* pciBusId) { +hipError_t hipDeviceGetByPCIBusId (int* device, const char* pciBusIdstr) { - HIP_INIT_API(device,pciBusId); + HIP_INIT_API(device, pciBusIdstr); - assert(0 && "Unimplemented"); + if (device == nullptr || pciBusIdstr == nullptr) { + return hipErrorInvalidValue; + } + + int pciBusID = -1; + int pciDeviceID = -1; + int pciDomainID = -1; + + if (sscanf (pciBusIdstr, "%04x:%02x:%02x", &pciDomainID, &pciBusID, &pciDeviceID) == 0x3) { + for (cl_uint i = 0; i < g_context->devices().size(); i++) { + auto* deviceHandle = g_context->devices()[i]; + auto& info = deviceHandle->info(); + + if (pciBusID == info.deviceTopology_.pcie.bus) { + *device = i; + break; + } + } + } return hipSuccess; } -hipError_t hipDeviceTotalMem (size_t *bytes,hipDevice_t device) { +hipError_t hipDeviceTotalMem (size_t *bytes, hipDevice_t device) { HIP_INIT_API(bytes, device); - assert(0 && "Unimplemented"); + if (device < 0 || device > (cl_int)g_context->devices().size()) { + return hipErrorInvalidDevice; + } + + if (bytes == nullptr) { + return hipErrorInvalidValue; + } + + auto* deviceHandle = g_context->devices()[device]; + const auto& info = deviceHandle->info(); + + *bytes = info.globalMemSize_; return hipSuccess; } hipError_t hipDeviceComputeCapability(int *major, int *minor, hipDevice_t device) { - HIP_INIT_API(major,minor, device); + HIP_INIT_API(major, minor, device); - assert(0 && "Unimplemented"); + if (device < 0 || device > (cl_int)g_context->devices().size()) { + return hipErrorInvalidDevice; + } + + if (major == nullptr || minor == nullptr) { + return hipErrorInvalidValue; + } + + auto* deviceHandle = g_context->devices()[device]; + const auto& info = deviceHandle->info(); + *major = info.gfxipVersion_ / 100; + *minor = info.gfxipVersion_ % 100; return hipSuccess; } -hipError_t hipDeviceGetName(char *name,int len, hipDevice_t device) { +hipError_t hipDeviceGetName(char *name, int len, hipDevice_t device) { - HIP_INIT_API((void*)name,len, device); + HIP_INIT_API((void*)name, len, device); - assert(0 && "Unimplemented"); + if (device < 0 || device > (cl_int)g_context->devices().size()) { + return hipErrorInvalidDevice; + } + + if (name == nullptr) { + return hipErrorInvalidValue; + } + + auto* deviceHandle = g_context->devices()[device]; + const auto& info = deviceHandle->info(); + + len = ((cl_uint)len < ::strlen(info.boardName_)) ? len : 128; + ::strncpy(name, info.boardName_, len); return hipSuccess; } -hipError_t hipDeviceGetPCIBusId (char *pciBusId,int len, int device) { +hipError_t hipDeviceGetPCIBusId (char *pciBusId, int len, int device) { HIP_INIT_API((void*)pciBusId, len, device); - assert(0 && "Unimplemented"); + if (device < 0 || device > (cl_int)g_context->devices().size()) { + return hipErrorInvalidDevice; + } + + if (pciBusId == nullptr || len < 0) { + return hipErrorInvalidValue; + } + + auto* deviceHandle = g_context->devices()[device]; + const auto& info = deviceHandle->info(); + snprintf (pciBusId, len, "%04x:%02x:%02x.0", + info.deviceTopology_.pcie.function, + info.deviceTopology_.pcie.bus, + info.deviceTopology_.pcie.device); + return hipSuccess; } From f99db275fe935b46bc6a87467a05903fa3e84e37 Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 7 Mar 2018 20:03:36 -0500 Subject: [PATCH 007/282] P4 to Git Change 1524135 by cpaquot@cpaquot-ocl-lc-lnx on 2018/03/07 19:52:00 SWDEV-145570 - [HIP] Hip Rearchitecture Implemented hipHostAlloc Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#5 edit --- api/hip/hip_memory.cpp | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 0911f61e4c..ddebc8623c 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -58,13 +58,32 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) { HIP_INIT_API(ptr, sizeBytes, flags); - assert(0 && "Unimplemented"); + if (sizeBytes == 0) { + *ptr = nullptr; + return hipSuccess; + } + else if (!ptr) { + return hipErrorInvalidValue; + } - return hipErrorUnknown; + if (g_context->devices()[0]->info().maxMemAllocSize_ < sizeBytes) { + return hipErrorOutOfMemory; + } + + *ptr = amd::SvmBuffer::malloc(*g_context, 0, sizeBytes, g_context->devices()[0]->info().memBaseAddrAlign_); + if (!*ptr) { + return hipErrorOutOfMemory; + } + + return hipSuccess; } hipError_t hipFree(void* ptr) { + if (amd::SvmBuffer::malloced(ptr)) { + amd::SvmBuffer::free(*g_context, ptr); + return hipSuccess; + } if (!is_valid(reinterpret_cast(ptr))) { return hipErrorInvalidValue; } From 22aeb7f4f3fc9f7f0b79451a3301619d0129fe3b Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 13 Mar 2018 12:33:01 -0400 Subject: [PATCH 008/282] P4 to Git Change 1526407 by cpaquot@cpaquot-ocl-lc-lnx on 2018/03/13 12:24:51 SWDEV-145570 - [HIP] Separate device runtime and driver APIs Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device.cpp#5 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device_runtime.cpp#1 add --- api/hip/hip_device.cpp | 455 ---------------------------- api/hip/hip_device_runtime.cpp | 533 +++++++++++++++++++++++++++++++++ 2 files changed, 533 insertions(+), 455 deletions(-) create mode 100644 api/hip/hip_device_runtime.cpp diff --git a/api/hip/hip_device.cpp b/api/hip/hip_device.cpp index c719f30122..ae9cba963d 100644 --- a/api/hip/hip_device.cpp +++ b/api/hip/hip_device.cpp @@ -24,20 +24,6 @@ THE SOFTWARE. #include "hip_internal.hpp" -hipError_t hipGetDevice(int *deviceId) { - - HIP_INIT_API(deviceId); - - if (deviceId != nullptr) { - // this needs to return default device. For now return 0 always - *deviceId = 0; - } else { - return hipErrorInvalidValue; - } - - return hipSuccess; -} - hipError_t hipDeviceGet(hipDevice_t *device, int deviceId) { HIP_INIT_API(device, deviceId); @@ -51,239 +37,6 @@ hipError_t hipDeviceGet(hipDevice_t *device, int deviceId) return hipSuccess; }; -hipError_t hipGetDeviceCount(int* count) { - - HIP_INIT_API(count); - - if (count == nullptr) { - return hipErrorInvalidValue; - } - - // Get all available devices - *count = g_context->devices().size(); - - return hipSuccess; -} - -hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) { - - HIP_INIT_API(pi, attr, device); - - if (pi == nullptr) { - return hipErrorInvalidValue; - } - - //if (unsigned(device) >= g_context->devices().size()) { - // return hipErrorInvalidDevice; - //} - //auto* deviceHandle = g_context->devices()[device]; - - //FIXME: should we cache the props, or just select from deviceHandle->info_? - hipDeviceProp_t prop = {0}; - hipError_t err = hipGetDeviceProperties(&prop, device); - if (err != hipSuccess) return err; - - switch (attr) { - case hipDeviceAttributeMaxThreadsPerBlock: - *pi = prop.maxThreadsPerBlock; - break; - case hipDeviceAttributeMaxBlockDimX: - *pi = prop.maxThreadsDim[0]; - break; - case hipDeviceAttributeMaxBlockDimY: - *pi = prop.maxThreadsDim[1]; - break; - case hipDeviceAttributeMaxBlockDimZ: - *pi = prop.maxThreadsDim[2]; - break; - case hipDeviceAttributeMaxGridDimX: - *pi = prop.maxGridSize[0]; - break; - case hipDeviceAttributeMaxGridDimY: - *pi = prop.maxGridSize[1]; - break; - case hipDeviceAttributeMaxGridDimZ: - *pi = prop.maxGridSize[2]; - break; - case hipDeviceAttributeMaxSharedMemoryPerBlock: - *pi = prop.sharedMemPerBlock; - break; - case hipDeviceAttributeTotalConstantMemory: - *pi = prop.totalConstMem; - break; - case hipDeviceAttributeWarpSize: - *pi = prop.warpSize; - break; - case hipDeviceAttributeMaxRegistersPerBlock: - *pi = prop.regsPerBlock; - break; - case hipDeviceAttributeClockRate: - *pi = prop.clockRate; - break; - case hipDeviceAttributeMemoryClockRate: - *pi = prop.memoryClockRate; - break; - case hipDeviceAttributeMemoryBusWidth: - *pi = prop.memoryBusWidth; - break; - case hipDeviceAttributeMultiprocessorCount: - *pi = prop.multiProcessorCount; - break; - case hipDeviceAttributeComputeMode: - *pi = prop.computeMode; - break; - case hipDeviceAttributeL2CacheSize: - *pi = prop.l2CacheSize; - break; - case hipDeviceAttributeMaxThreadsPerMultiProcessor: - *pi = prop.maxThreadsPerMultiProcessor; - break; - case hipDeviceAttributeComputeCapabilityMajor: - *pi = prop.major; - break; - case hipDeviceAttributeComputeCapabilityMinor: - *pi = prop.minor; - break; - case hipDeviceAttributePciBusId: - *pi = prop.pciBusID; - break; - case hipDeviceAttributeConcurrentKernels: - *pi = prop.concurrentKernels; - break; - case hipDeviceAttributePciDeviceId: - *pi = prop.pciDeviceID; - break; - case hipDeviceAttributeMaxSharedMemoryPerMultiprocessor: - *pi = prop.maxSharedMemoryPerMultiProcessor; - break; - case hipDeviceAttributeIsMultiGpuBoard: - *pi = prop.isMultiGpuBoard; - break; - default: - return hipErrorInvalidValue; - } - - return hipSuccess; -} - -hipError_t hipGetDeviceProperties(hipDeviceProp_t* props, int device) { - - HIP_INIT_API(props, device); - - if (props == nullptr) { - return hipErrorInvalidValue; - } - - if (unsigned(device) >= g_context->devices().size()) { - return hipErrorInvalidDevice; - } - auto* deviceHandle = g_context->devices()[device]; - - hipDeviceProp_t deviceProps = {0}; - - const auto& info = deviceHandle->info(); - ::strncpy(deviceProps.name, info.boardName_, 128); - deviceProps.totalGlobalMem = info.globalMemSize_; - deviceProps.sharedMemPerBlock = info.localMemSizePerCU_; - deviceProps.regsPerBlock = info.availableSGPRs_; - deviceProps.warpSize = info.wavefrontWidth_; - deviceProps.maxThreadsPerBlock = info.maxWorkGroupSize_; - deviceProps.maxThreadsDim[0] = info.maxWorkItemSizes_[0]; - deviceProps.maxThreadsDim[1] = info.maxWorkItemSizes_[1]; - deviceProps.maxThreadsDim[2] = info.maxWorkItemSizes_[2]; - deviceProps.maxGridSize[0] = UINT32_MAX; - deviceProps.maxGridSize[1] = UINT32_MAX; - deviceProps.maxGridSize[2] = UINT32_MAX; - deviceProps.clockRate = info.maxEngineClockFrequency_; - deviceProps.memoryClockRate = info.maxMemoryClockFrequency_; - deviceProps.memoryBusWidth = info.globalMemChannels_ * 32; - deviceProps.totalConstMem = info.maxConstantBufferSize_; - deviceProps.major = info.gfxipVersion_ / 100; - deviceProps.minor = info.gfxipVersion_ % 100; - deviceProps.multiProcessorCount = info.maxComputeUnits_; - deviceProps.l2CacheSize = info.l2CacheSize_; - deviceProps.maxThreadsPerMultiProcessor = info.simdPerCU_; - deviceProps.computeMode = 0; - deviceProps.clockInstructionRate = info.timeStampFrequency_; - deviceProps.arch.hasGlobalInt32Atomics = 1; - deviceProps.arch.hasGlobalFloatAtomicExch = 1; - deviceProps.arch.hasSharedInt32Atomics = 1; - deviceProps.arch.hasSharedFloatAtomicExch = 1; - deviceProps.arch.hasFloatAtomicAdd = 0; - deviceProps.arch.hasGlobalInt64Atomics = 1; - deviceProps.arch.hasSharedInt64Atomics = 1; - deviceProps.arch.hasDoubles = 1; - deviceProps.arch.hasWarpVote = 0; - deviceProps.arch.hasWarpBallot = 0; - deviceProps.arch.hasWarpShuffle = 0; - deviceProps.arch.hasFunnelShift = 0; - deviceProps.arch.hasThreadFenceSystem = 1; - deviceProps.arch.hasSyncThreadsExt = 0; - deviceProps.arch.hasSurfaceFuncs = 0; - deviceProps.arch.has3dGrid = 1; - deviceProps.arch.hasDynamicParallelism = 0; - deviceProps.concurrentKernels = 1; - deviceProps.pciDomainID = info.deviceTopology_.pcie.function; - deviceProps.pciBusID = info.deviceTopology_.pcie.bus; - deviceProps.pciDeviceID = info.deviceTopology_.pcie.device; - deviceProps.maxSharedMemoryPerMultiProcessor = info.localMemSizePerCU_; - //deviceProps.isMultiGpuBoard = info.; - deviceProps.canMapHostMemory = 1; - deviceProps.gcnArch = info.gfxipVersion_; - - *props = deviceProps; - return hipSuccess; -} - -hipError_t hipDeviceSetCacheConfig(hipFuncCache_t cacheConfig) { - - HIP_INIT_API(cacheConfig); - - // No way to set cache config yet. - - return hipSuccess; -} - -hipError_t hipDeviceGetCacheConfig(hipFuncCache_t *cacheConfig) { - HIP_INIT_API(cacheConfig); - - if(cacheConfig == nullptr) { - return hipErrorInvalidValue; - } - - *cacheConfig = hipFuncCache_t(); - - return hipSuccess; -} - -hipError_t hipSetDeviceFlags(unsigned int flags) { - - HIP_INIT_API(flags); - - assert(0 && "Unimplemented"); - - return hipSuccess; -}; - -hipError_t hipDeviceGetLimit (size_t *pValue, hipLimit_t limit) { - - HIP_INIT_API(pValue, limit); - - auto* deviceHandle = g_context->devices()[0]; - const auto& info = deviceHandle->info(); - - if(pValue == nullptr) { - return hipErrorInvalidValue; - } - if(limit == hipLimitMallocHeapSize) { - *pValue = info.globalMemSize_; - return hipSuccess; - } else { - return hipErrorUnsupportedLimit; - } - -} - hipError_t hipFuncSetCacheConfig (const void* func, hipFuncCache_t cacheConfig) { HIP_INIT_API(cacheConfig); @@ -293,166 +46,6 @@ hipError_t hipFuncSetCacheConfig (const void* func, hipFuncCache_t cacheConfig) return hipSuccess; } -hipError_t hipDeviceSetSharedMemConfig (hipSharedMemConfig config) { - - HIP_INIT_API(config); - - // No way to set cache config yet. - - return hipSuccess; -} - -hipError_t hipDeviceGetSharedMemConfig (hipSharedMemConfig *pConfig) { - - HIP_INIT_API(pConfig); - - *pConfig = hipSharedMemBankSizeFourByte; - - return hipSuccess; -} - - -hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* properties) { - - HIP_INIT_API(device, properties); - - if (device == nullptr || properties == nullptr) { - return hipErrorInvalidValue; - } - - *device = 0; - cl_uint maxMatchedCount = 0; - - for (cl_uint i = 0; i< g_context->devices().size(); ++i) { - hipDeviceProp_t currentProp = {0}; - cl_uint validPropCount = 0; - cl_uint matchedCount = 0; - hipError_t err = hipGetDeviceProperties(¤tProp, i); - if (properties->major != 0) { - validPropCount++; - if(currentProp.major >= properties->major) { - matchedCount++; - } - } - if (properties->minor != 0) { - validPropCount++; - if(currentProp.minor >= properties->minor) { - matchedCount++; - } - } - if(properties->totalGlobalMem != 0) { - validPropCount++; - if(currentProp.totalGlobalMem >= properties->totalGlobalMem) { - matchedCount++; - } - } - if(properties->sharedMemPerBlock != 0) { - validPropCount++; - if(currentProp.sharedMemPerBlock >= properties->sharedMemPerBlock) { - matchedCount++; - } - } - if(properties->maxThreadsPerBlock != 0) { - validPropCount++; - if(currentProp.maxThreadsPerBlock >= properties->maxThreadsPerBlock ) { - matchedCount++; - } - } - if(properties->totalConstMem != 0) { - validPropCount++; - if(currentProp.totalConstMem >= properties->totalConstMem ) { - matchedCount++; - } - } - if(properties->multiProcessorCount != 0) { - validPropCount++; - if(currentProp.multiProcessorCount >= - properties->multiProcessorCount ) { - matchedCount++; - } - } - if(properties->maxThreadsPerMultiProcessor != 0) { - validPropCount++; - if(currentProp.maxThreadsPerMultiProcessor >= - properties->maxThreadsPerMultiProcessor ) { - matchedCount++; - } - } - if(properties->memoryClockRate != 0) { - validPropCount++; - if(currentProp.memoryClockRate >= properties->memoryClockRate ) { - matchedCount++; - } - } - if(properties->memoryBusWidth != 0) { - validPropCount++; - if(currentProp.memoryBusWidth >= properties->memoryBusWidth ) { - matchedCount++; - } - } - if(properties->l2CacheSize != 0) { - validPropCount++; - if(currentProp.l2CacheSize >= properties->l2CacheSize ) { - matchedCount++; - } - } - if(properties->regsPerBlock != 0) { - validPropCount++; - if(currentProp.regsPerBlock >= properties->regsPerBlock ) { - matchedCount++; - } - } - if(properties->maxSharedMemoryPerMultiProcessor != 0) { - validPropCount++; - if(currentProp.maxSharedMemoryPerMultiProcessor >= - properties->maxSharedMemoryPerMultiProcessor ) { - matchedCount++; - } - } - if(properties->warpSize != 0) { - validPropCount++; - if(currentProp.warpSize >= properties->warpSize ) { - matchedCount++; - } - } - if(validPropCount == matchedCount) { - *device = matchedCount > maxMatchedCount ? i : *device; - maxMatchedCount = std::max(matchedCount, maxMatchedCount); - } - } - - return hipSuccess; -} - - -hipError_t hipDeviceGetByPCIBusId (int* device, const char* pciBusIdstr) { - - HIP_INIT_API(device, pciBusIdstr); - - if (device == nullptr || pciBusIdstr == nullptr) { - return hipErrorInvalidValue; - } - - int pciBusID = -1; - int pciDeviceID = -1; - int pciDomainID = -1; - - if (sscanf (pciBusIdstr, "%04x:%02x:%02x", &pciDomainID, &pciBusID, &pciDeviceID) == 0x3) { - for (cl_uint i = 0; i < g_context->devices().size(); i++) { - auto* deviceHandle = g_context->devices()[i]; - auto& info = deviceHandle->info(); - - if (pciBusID == info.deviceTopology_.pcie.bus) { - *device = i; - break; - } - } - } - - return hipSuccess; -} - - hipError_t hipDeviceTotalMem (size_t *bytes, hipDevice_t device) { HIP_INIT_API(bytes, device); @@ -513,51 +106,3 @@ hipError_t hipDeviceGetName(char *name, int len, hipDevice_t device) { return hipSuccess; } - -hipError_t hipDeviceGetPCIBusId (char *pciBusId, int len, int device) { - - HIP_INIT_API((void*)pciBusId, len, device); - - if (device < 0 || device > (cl_int)g_context->devices().size()) { - return hipErrorInvalidDevice; - } - - if (pciBusId == nullptr || len < 0) { - return hipErrorInvalidValue; - } - - auto* deviceHandle = g_context->devices()[device]; - const auto& info = deviceHandle->info(); - snprintf (pciBusId, len, "%04x:%02x:%02x.0", - info.deviceTopology_.pcie.function, - info.deviceTopology_.pcie.bus, - info.deviceTopology_.pcie.device); - - - return hipSuccess; -} - -hipError_t hipSetDevice(int deviceId) -{ - HIP_INIT_API(deviceId); - - assert(0 && "Unimplemented"); - - return hipErrorUnknown; -} - -hipError_t hipDeviceReset(void) -{ - HIP_INIT_API(); - - assert(0 && "Unimplemented"); - - return hipErrorUnknown; -} - - -hipError_t hipDeviceSynchronize(void) -{ - // FIXME: should wait on all streams - return hipSuccess; -} diff --git a/api/hip/hip_device_runtime.cpp b/api/hip/hip_device_runtime.cpp new file mode 100644 index 0000000000..b9b6ccb68e --- /dev/null +++ b/api/hip/hip_device_runtime.cpp @@ -0,0 +1,533 @@ +/* +Copyright (c) 2018 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "hip_internal.hpp" + +hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* properties) { + + HIP_INIT_API(device, properties); + + if (device == nullptr || properties == nullptr) { + return hipErrorInvalidValue; + } + + *device = 0; + cl_uint maxMatchedCount = 0; + + for (cl_uint i = 0; i< g_context->devices().size(); ++i) { + hipDeviceProp_t currentProp = {0}; + cl_uint validPropCount = 0; + cl_uint matchedCount = 0; + hipError_t err = hipGetDeviceProperties(¤tProp, i); + if (properties->major != 0) { + validPropCount++; + if(currentProp.major >= properties->major) { + matchedCount++; + } + } + if (properties->minor != 0) { + validPropCount++; + if(currentProp.minor >= properties->minor) { + matchedCount++; + } + } + if(properties->totalGlobalMem != 0) { + validPropCount++; + if(currentProp.totalGlobalMem >= properties->totalGlobalMem) { + matchedCount++; + } + } + if(properties->sharedMemPerBlock != 0) { + validPropCount++; + if(currentProp.sharedMemPerBlock >= properties->sharedMemPerBlock) { + matchedCount++; + } + } + if(properties->maxThreadsPerBlock != 0) { + validPropCount++; + if(currentProp.maxThreadsPerBlock >= properties->maxThreadsPerBlock ) { + matchedCount++; + } + } + if(properties->totalConstMem != 0) { + validPropCount++; + if(currentProp.totalConstMem >= properties->totalConstMem ) { + matchedCount++; + } + } + if(properties->multiProcessorCount != 0) { + validPropCount++; + if(currentProp.multiProcessorCount >= + properties->multiProcessorCount ) { + matchedCount++; + } + } + if(properties->maxThreadsPerMultiProcessor != 0) { + validPropCount++; + if(currentProp.maxThreadsPerMultiProcessor >= + properties->maxThreadsPerMultiProcessor ) { + matchedCount++; + } + } + if(properties->memoryClockRate != 0) { + validPropCount++; + if(currentProp.memoryClockRate >= properties->memoryClockRate ) { + matchedCount++; + } + } + if(properties->memoryBusWidth != 0) { + validPropCount++; + if(currentProp.memoryBusWidth >= properties->memoryBusWidth ) { + matchedCount++; + } + } + if(properties->l2CacheSize != 0) { + validPropCount++; + if(currentProp.l2CacheSize >= properties->l2CacheSize ) { + matchedCount++; + } + } + if(properties->regsPerBlock != 0) { + validPropCount++; + if(currentProp.regsPerBlock >= properties->regsPerBlock ) { + matchedCount++; + } + } + if(properties->maxSharedMemoryPerMultiProcessor != 0) { + validPropCount++; + if(currentProp.maxSharedMemoryPerMultiProcessor >= + properties->maxSharedMemoryPerMultiProcessor ) { + matchedCount++; + } + } + if(properties->warpSize != 0) { + validPropCount++; + if(currentProp.warpSize >= properties->warpSize ) { + matchedCount++; + } + } + if(validPropCount == matchedCount) { + *device = matchedCount > maxMatchedCount ? i : *device; + maxMatchedCount = std::max(matchedCount, maxMatchedCount); + } + } + + return hipSuccess; +} + +hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) { + + HIP_INIT_API(pi, attr, device); + + if (pi == nullptr) { + return hipErrorInvalidValue; + } + + //if (unsigned(device) >= g_context->devices().size()) { + // return hipErrorInvalidDevice; + //} + //auto* deviceHandle = g_context->devices()[device]; + + //FIXME: should we cache the props, or just select from deviceHandle->info_? + hipDeviceProp_t prop = {0}; + hipError_t err = hipGetDeviceProperties(&prop, device); + if (err != hipSuccess) return err; + + switch (attr) { + case hipDeviceAttributeMaxThreadsPerBlock: + *pi = prop.maxThreadsPerBlock; + break; + case hipDeviceAttributeMaxBlockDimX: + *pi = prop.maxThreadsDim[0]; + break; + case hipDeviceAttributeMaxBlockDimY: + *pi = prop.maxThreadsDim[1]; + break; + case hipDeviceAttributeMaxBlockDimZ: + *pi = prop.maxThreadsDim[2]; + break; + case hipDeviceAttributeMaxGridDimX: + *pi = prop.maxGridSize[0]; + break; + case hipDeviceAttributeMaxGridDimY: + *pi = prop.maxGridSize[1]; + break; + case hipDeviceAttributeMaxGridDimZ: + *pi = prop.maxGridSize[2]; + break; + case hipDeviceAttributeMaxSharedMemoryPerBlock: + *pi = prop.sharedMemPerBlock; + break; + case hipDeviceAttributeTotalConstantMemory: + *pi = prop.totalConstMem; + break; + case hipDeviceAttributeWarpSize: + *pi = prop.warpSize; + break; + case hipDeviceAttributeMaxRegistersPerBlock: + *pi = prop.regsPerBlock; + break; + case hipDeviceAttributeClockRate: + *pi = prop.clockRate; + break; + case hipDeviceAttributeMemoryClockRate: + *pi = prop.memoryClockRate; + break; + case hipDeviceAttributeMemoryBusWidth: + *pi = prop.memoryBusWidth; + break; + case hipDeviceAttributeMultiprocessorCount: + *pi = prop.multiProcessorCount; + break; + case hipDeviceAttributeComputeMode: + *pi = prop.computeMode; + break; + case hipDeviceAttributeL2CacheSize: + *pi = prop.l2CacheSize; + break; + case hipDeviceAttributeMaxThreadsPerMultiProcessor: + *pi = prop.maxThreadsPerMultiProcessor; + break; + case hipDeviceAttributeComputeCapabilityMajor: + *pi = prop.major; + break; + case hipDeviceAttributeComputeCapabilityMinor: + *pi = prop.minor; + break; + case hipDeviceAttributePciBusId: + *pi = prop.pciBusID; + break; + case hipDeviceAttributeConcurrentKernels: + *pi = prop.concurrentKernels; + break; + case hipDeviceAttributePciDeviceId: + *pi = prop.pciDeviceID; + break; + case hipDeviceAttributeMaxSharedMemoryPerMultiprocessor: + *pi = prop.maxSharedMemoryPerMultiProcessor; + break; + case hipDeviceAttributeIsMultiGpuBoard: + *pi = prop.isMultiGpuBoard; + break; + default: + return hipErrorInvalidValue; + } + + return hipSuccess; +} + +hipError_t hipDeviceGetByPCIBusId(int* device, const char*pciBusIdstr) { + + HIP_INIT_API(device, pciBusIdstr); + + if (device == nullptr || pciBusIdstr == nullptr) { + return hipErrorInvalidValue; + } + + int pciBusID = -1; + int pciDeviceID = -1; + int pciDomainID = -1; + + if (sscanf (pciBusIdstr, "%04x:%02x:%02x", &pciDomainID, &pciBusID, &pciDeviceID) == 0x3) { + for (cl_uint i = 0; i < g_context->devices().size(); i++) { + auto* deviceHandle = g_context->devices()[i]; + auto& info = deviceHandle->info(); + + if (pciBusID == info.deviceTopology_.pcie.bus) { + *device = i; + break; + } + } + } + + return hipSuccess; +} + +hipError_t hipDeviceGetCacheConfig ( hipFuncCache_t * cacheConfig ) { + HIP_INIT_API(cacheConfig); + + if(cacheConfig == nullptr) { + return hipErrorInvalidValue; + } + + *cacheConfig = hipFuncCache_t(); + + return hipSuccess; +} + +hipError_t hipDeviceGetLimit ( size_t* pValue, hipLimit_t limit ) { + + HIP_INIT_API(pValue, limit); + + auto* deviceHandle = g_context->devices()[0]; + const auto& info = deviceHandle->info(); + + if(pValue == nullptr) { + return hipErrorInvalidValue; + } + if(limit == hipLimitMallocHeapSize) { + *pValue = info.globalMemSize_; + return hipSuccess; + } else { + return hipErrorUnsupportedLimit; + } +} + +/** +hipError_t hipDeviceGetP2PAttribute ( int* value, hipDeviceP2PAttr attr, int srcDevice, int dstDevice ) { + assert(0); + return hipSuccess; +} +**/ + +hipError_t hipDeviceGetPCIBusId ( char* pciBusId, int len, int device ) { + + HIP_INIT_API((void*)pciBusId, len, device); + + if (device < 0 || device > (cl_int)g_context->devices().size()) { + return hipErrorInvalidDevice; + } + + if (pciBusId == nullptr || len < 0) { + return hipErrorInvalidValue; + } + + auto* deviceHandle = g_context->devices()[device]; + const auto& info = deviceHandle->info(); + snprintf (pciBusId, len, "%04x:%02x:%02x.0", + info.deviceTopology_.pcie.function, + info.deviceTopology_.pcie.bus, + info.deviceTopology_.pcie.device); + + + return hipSuccess; +} + +hipError_t hipDeviceGetSharedMemConfig ( hipSharedMemConfig * pConfig ) { + HIP_INIT_API(pConfig); + + *pConfig = hipSharedMemBankSizeFourByte; + + return hipSuccess; +} + +hipError_t hipDeviceGetStreamPriorityRange ( int* leastPriority, int* greatestPriority ) { + assert(0); + return hipSuccess; +} + +hipError_t hipDeviceReset ( void ) { + HIP_INIT_API(); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipDeviceSetCacheConfig ( hipFuncCache_t cacheConfig ) { + HIP_INIT_API(cacheConfig); + + // No way to set cache config yet. + + return hipSuccess; +} + +hipError_t hipDeviceSetLimit ( hipLimit_t limit, size_t value ) { + return hipErrorUnknown; +} + +hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config ) { + HIP_INIT_API(config); + + // No way to set cache config yet. + + return hipSuccess; +} + +hipError_t hipDeviceSynchronize ( void ) { + return hipSuccess; +} + +hipError_t hipGetDevice ( int* deviceId ) { + HIP_INIT_API(deviceId); + + if (deviceId != nullptr) { + // this needs to return default device. For now return 0 always + *deviceId = 0; + } else { + return hipErrorInvalidValue; + } + + return hipSuccess; +} + +hipError_t hipGetDeviceCount ( int* count ) { + HIP_INIT_API(count); + + if (count == nullptr) { + return hipErrorInvalidValue; + } + + // Get all available devices + *count = g_context->devices().size(); + + return hipSuccess; +} + +hipError_t hipGetDeviceFlags ( unsigned int* flags ) { + return hipErrorUnknown; +} + +hipError_t hipGetDeviceProperties ( hipDeviceProp_t* props, int device ) { + HIP_INIT_API(props, device); + + if (props == nullptr) { + return hipErrorInvalidValue; + } + + if (unsigned(device) >= g_context->devices().size()) { + return hipErrorInvalidDevice; + } + auto* deviceHandle = g_context->devices()[device]; + + hipDeviceProp_t deviceProps = {0}; + + const auto& info = deviceHandle->info(); + ::strncpy(deviceProps.name, info.boardName_, 128); + deviceProps.totalGlobalMem = info.globalMemSize_; + deviceProps.sharedMemPerBlock = info.localMemSizePerCU_; + deviceProps.regsPerBlock = info.availableSGPRs_; + deviceProps.warpSize = info.wavefrontWidth_; + deviceProps.maxThreadsPerBlock = info.maxWorkGroupSize_; + deviceProps.maxThreadsDim[0] = info.maxWorkItemSizes_[0]; + deviceProps.maxThreadsDim[1] = info.maxWorkItemSizes_[1]; + deviceProps.maxThreadsDim[2] = info.maxWorkItemSizes_[2]; + deviceProps.maxGridSize[0] = UINT32_MAX; + deviceProps.maxGridSize[1] = UINT32_MAX; + deviceProps.maxGridSize[2] = UINT32_MAX; + deviceProps.clockRate = info.maxEngineClockFrequency_; + deviceProps.memoryClockRate = info.maxMemoryClockFrequency_; + deviceProps.memoryBusWidth = info.globalMemChannels_ * 32; + deviceProps.totalConstMem = info.maxConstantBufferSize_; + deviceProps.major = info.gfxipVersion_ / 100; + deviceProps.minor = info.gfxipVersion_ % 100; + deviceProps.multiProcessorCount = info.maxComputeUnits_; + deviceProps.l2CacheSize = info.l2CacheSize_; + deviceProps.maxThreadsPerMultiProcessor = info.simdPerCU_; + deviceProps.computeMode = 0; + deviceProps.clockInstructionRate = info.timeStampFrequency_; + deviceProps.arch.hasGlobalInt32Atomics = 1; + deviceProps.arch.hasGlobalFloatAtomicExch = 1; + deviceProps.arch.hasSharedInt32Atomics = 1; + deviceProps.arch.hasSharedFloatAtomicExch = 1; + deviceProps.arch.hasFloatAtomicAdd = 0; + deviceProps.arch.hasGlobalInt64Atomics = 1; + deviceProps.arch.hasSharedInt64Atomics = 1; + deviceProps.arch.hasDoubles = 1; + deviceProps.arch.hasWarpVote = 0; + deviceProps.arch.hasWarpBallot = 0; + deviceProps.arch.hasWarpShuffle = 0; + deviceProps.arch.hasFunnelShift = 0; + deviceProps.arch.hasThreadFenceSystem = 1; + deviceProps.arch.hasSyncThreadsExt = 0; + deviceProps.arch.hasSurfaceFuncs = 0; + deviceProps.arch.has3dGrid = 1; + deviceProps.arch.hasDynamicParallelism = 0; + deviceProps.concurrentKernels = 1; + deviceProps.pciDomainID = info.deviceTopology_.pcie.function; + deviceProps.pciBusID = info.deviceTopology_.pcie.bus; + deviceProps.pciDeviceID = info.deviceTopology_.pcie.device; + deviceProps.maxSharedMemoryPerMultiProcessor = info.localMemSizePerCU_; + //deviceProps.isMultiGpuBoard = info.; + deviceProps.canMapHostMemory = 1; + deviceProps.gcnArch = info.gfxipVersion_; + + *props = deviceProps; + return hipSuccess; +} + +hipError_t hipIpcCloseMemHandle ( void* devPtr ) { + HIP_INIT_API(devPtr); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipIpcGetEventHandle ( hipIpcEventHandle_t* handle, hipEvent_t event ) { + HIP_INIT_API(handle, event); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipIpcGetMemHandle ( hipIpcMemHandle_t* handle, void* devPtr ) { + HIP_INIT_API(handle, devPtr); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipIpcOpenEventHandle ( hipEvent_t* event, hipIpcEventHandle_t handle ) { + HIP_INIT_API(event, handle); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipIpcOpenMemHandle ( void** devPtr, hipIpcMemHandle_t handle, unsigned int flags ) { + HIP_INIT_API(devPtr, handle, flags); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipSetDevice ( int device ) { + HIP_INIT_API(device); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipSetDeviceFlags ( unsigned int flags ) { + HIP_INIT_API(flags); + + assert(0 && "Unimplemented"); + + return hipSuccess; +} + +hipError_t hipSetValidDevices ( int* device_arr, int len ) { + HIP_INIT_API(device_arr, len); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + From a78a9b035042303f2ae279e0811e4dc335b44373 Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 14 Mar 2018 19:05:10 -0400 Subject: [PATCH 009/282] P4 to Git Change 1527320 by cpaquot@cpaquot-ocl-lc-lnx on 2018/03/14 18:54:10 SWDEV-145570 - Remove all g_context from runtime device implementation Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device.cpp#6 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device_runtime.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#3 edit --- api/hip/hip_device.cpp | 82 +++++++++++++++++++++++ api/hip/hip_device_runtime.cpp | 117 +++++++-------------------------- api/hip/hip_internal.hpp | 2 + 3 files changed, 109 insertions(+), 92 deletions(-) diff --git a/api/hip/hip_device.cpp b/api/hip/hip_device.cpp index ae9cba963d..f646933bb9 100644 --- a/api/hip/hip_device.cpp +++ b/api/hip/hip_device.cpp @@ -86,6 +86,19 @@ hipError_t hipDeviceComputeCapability(int *major, int *minor, hipDevice_t device return hipSuccess; } +hipError_t hipDeviceGetCount(int* count) { + HIP_INIT_API(count); + + if (count == nullptr) { + return hipErrorInvalidValue; + } + + // Get all available devices + *count = g_context->devices().size(); + + return hipSuccess; +} + hipError_t hipDeviceGetName(char *name, int len, hipDevice_t device) { HIP_INIT_API((void*)name, len, device); @@ -106,3 +119,72 @@ hipError_t hipDeviceGetName(char *name, int len, hipDevice_t device) { return hipSuccess; } + +hipError_t hipGetDeviceProperties ( hipDeviceProp_t* props, hipDevice_t device ) { + HIP_INIT_API(props, device); + + if (props == nullptr) { + return hipErrorInvalidValue; + } + + if (unsigned(device) >= g_context->devices().size()) { + return hipErrorInvalidDevice; + } + auto* deviceHandle = g_context->devices()[device]; + + hipDeviceProp_t deviceProps = {0}; + + const auto& info = deviceHandle->info(); + ::strncpy(deviceProps.name, info.boardName_, 128); + deviceProps.totalGlobalMem = info.globalMemSize_; + deviceProps.sharedMemPerBlock = info.localMemSizePerCU_; + deviceProps.regsPerBlock = info.availableSGPRs_; + deviceProps.warpSize = info.wavefrontWidth_; + deviceProps.maxThreadsPerBlock = info.maxWorkGroupSize_; + deviceProps.maxThreadsDim[0] = info.maxWorkItemSizes_[0]; + deviceProps.maxThreadsDim[1] = info.maxWorkItemSizes_[1]; + deviceProps.maxThreadsDim[2] = info.maxWorkItemSizes_[2]; + deviceProps.maxGridSize[0] = UINT32_MAX; + deviceProps.maxGridSize[1] = UINT32_MAX; + deviceProps.maxGridSize[2] = UINT32_MAX; + deviceProps.clockRate = info.maxEngineClockFrequency_; + deviceProps.memoryClockRate = info.maxMemoryClockFrequency_; + deviceProps.memoryBusWidth = info.globalMemChannels_ * 32; + deviceProps.totalConstMem = info.maxConstantBufferSize_; + deviceProps.major = info.gfxipVersion_ / 100; + deviceProps.minor = info.gfxipVersion_ % 100; + deviceProps.multiProcessorCount = info.maxComputeUnits_; + deviceProps.l2CacheSize = info.l2CacheSize_; + deviceProps.maxThreadsPerMultiProcessor = info.simdPerCU_; + deviceProps.computeMode = 0; + deviceProps.clockInstructionRate = info.timeStampFrequency_; + deviceProps.arch.hasGlobalInt32Atomics = 1; + deviceProps.arch.hasGlobalFloatAtomicExch = 1; + deviceProps.arch.hasSharedInt32Atomics = 1; + deviceProps.arch.hasSharedFloatAtomicExch = 1; + deviceProps.arch.hasFloatAtomicAdd = 0; + deviceProps.arch.hasGlobalInt64Atomics = 1; + deviceProps.arch.hasSharedInt64Atomics = 1; + deviceProps.arch.hasDoubles = 1; + deviceProps.arch.hasWarpVote = 0; + deviceProps.arch.hasWarpBallot = 0; + deviceProps.arch.hasWarpShuffle = 0; + deviceProps.arch.hasFunnelShift = 0; + deviceProps.arch.hasThreadFenceSystem = 1; + deviceProps.arch.hasSyncThreadsExt = 0; + deviceProps.arch.hasSurfaceFuncs = 0; + deviceProps.arch.has3dGrid = 1; + deviceProps.arch.hasDynamicParallelism = 0; + deviceProps.concurrentKernels = 1; + deviceProps.pciDomainID = info.deviceTopology_.pcie.function; + deviceProps.pciBusID = info.deviceTopology_.pcie.bus; + deviceProps.pciDeviceID = info.deviceTopology_.pcie.device; + deviceProps.maxSharedMemoryPerMultiProcessor = info.localMemSizePerCU_; + //deviceProps.isMultiGpuBoard = info.; + deviceProps.canMapHostMemory = 1; + deviceProps.gcnArch = info.gfxipVersion_; + + *props = deviceProps; + return hipSuccess; +} + diff --git a/api/hip/hip_device_runtime.cpp b/api/hip/hip_device_runtime.cpp index b9b6ccb68e..bbd0838f76 100644 --- a/api/hip/hip_device_runtime.cpp +++ b/api/hip/hip_device_runtime.cpp @@ -34,8 +34,10 @@ hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* properties) { *device = 0; cl_uint maxMatchedCount = 0; + int count = 0; + hipDeviceGetCount(&count); - for (cl_uint i = 0; i< g_context->devices().size(); ++i) { + for (cl_int i = 0; i< count; ++i) { hipDeviceProp_t currentProp = {0}; cl_uint validPropCount = 0; cl_uint matchedCount = 0; @@ -250,11 +252,15 @@ hipError_t hipDeviceGetByPCIBusId(int* device, const char*pciBusIdstr) { int pciDomainID = -1; if (sscanf (pciBusIdstr, "%04x:%02x:%02x", &pciDomainID, &pciBusID, &pciDeviceID) == 0x3) { - for (cl_uint i = 0; i < g_context->devices().size(); i++) { - auto* deviceHandle = g_context->devices()[i]; - auto& info = deviceHandle->info(); + int count = 0; + hipDeviceGetCount(&count); + for (cl_int i = 0; i < count; i++) { + int pi = 0; + hipDevice_t dev; + hipDeviceGet(&dev, i); + hipDeviceGetAttribute(&pi, hipDeviceAttributePciBusId, dev); - if (pciBusID == info.deviceTopology_.pcie.bus) { + if (pciBusID == pi) { *device = i; break; } @@ -280,14 +286,14 @@ hipError_t hipDeviceGetLimit ( size_t* pValue, hipLimit_t limit ) { HIP_INIT_API(pValue, limit); - auto* deviceHandle = g_context->devices()[0]; - const auto& info = deviceHandle->info(); - if(pValue == nullptr) { return hipErrorInvalidValue; } if(limit == hipLimitMallocHeapSize) { - *pValue = info.globalMemSize_; + hipDeviceProp_t prop; + hipGetDeviceProperties(&prop, 0); + + *pValue = prop.totalGlobalMem; return hipSuccess; } else { return hipErrorUnsupportedLimit; @@ -305,7 +311,9 @@ hipError_t hipDeviceGetPCIBusId ( char* pciBusId, int len, int device ) { HIP_INIT_API((void*)pciBusId, len, device); - if (device < 0 || device > (cl_int)g_context->devices().size()) { + int count; + hipDeviceGetCount(&count); + if (device < 0 || device > count) { return hipErrorInvalidDevice; } @@ -313,13 +321,13 @@ hipError_t hipDeviceGetPCIBusId ( char* pciBusId, int len, int device ) { return hipErrorInvalidValue; } - auto* deviceHandle = g_context->devices()[device]; - const auto& info = deviceHandle->info(); - snprintf (pciBusId, len, "%04x:%02x:%02x.0", - info.deviceTopology_.pcie.function, - info.deviceTopology_.pcie.bus, - info.deviceTopology_.pcie.device); + hipDeviceProp_t prop; + hipGetDeviceProperties(&prop, device); + snprintf (pciBusId, len, "%04x:%02x:%02x.0", + prop.pciDomainID, + prop.pciBusID, + prop.pciDeviceID); return hipSuccess; } @@ -385,88 +393,13 @@ hipError_t hipGetDevice ( int* deviceId ) { hipError_t hipGetDeviceCount ( int* count ) { HIP_INIT_API(count); - if (count == nullptr) { - return hipErrorInvalidValue; - } - - // Get all available devices - *count = g_context->devices().size(); - - return hipSuccess; + return hipDeviceGetCount(count); } hipError_t hipGetDeviceFlags ( unsigned int* flags ) { return hipErrorUnknown; } -hipError_t hipGetDeviceProperties ( hipDeviceProp_t* props, int device ) { - HIP_INIT_API(props, device); - - if (props == nullptr) { - return hipErrorInvalidValue; - } - - if (unsigned(device) >= g_context->devices().size()) { - return hipErrorInvalidDevice; - } - auto* deviceHandle = g_context->devices()[device]; - - hipDeviceProp_t deviceProps = {0}; - - const auto& info = deviceHandle->info(); - ::strncpy(deviceProps.name, info.boardName_, 128); - deviceProps.totalGlobalMem = info.globalMemSize_; - deviceProps.sharedMemPerBlock = info.localMemSizePerCU_; - deviceProps.regsPerBlock = info.availableSGPRs_; - deviceProps.warpSize = info.wavefrontWidth_; - deviceProps.maxThreadsPerBlock = info.maxWorkGroupSize_; - deviceProps.maxThreadsDim[0] = info.maxWorkItemSizes_[0]; - deviceProps.maxThreadsDim[1] = info.maxWorkItemSizes_[1]; - deviceProps.maxThreadsDim[2] = info.maxWorkItemSizes_[2]; - deviceProps.maxGridSize[0] = UINT32_MAX; - deviceProps.maxGridSize[1] = UINT32_MAX; - deviceProps.maxGridSize[2] = UINT32_MAX; - deviceProps.clockRate = info.maxEngineClockFrequency_; - deviceProps.memoryClockRate = info.maxMemoryClockFrequency_; - deviceProps.memoryBusWidth = info.globalMemChannels_ * 32; - deviceProps.totalConstMem = info.maxConstantBufferSize_; - deviceProps.major = info.gfxipVersion_ / 100; - deviceProps.minor = info.gfxipVersion_ % 100; - deviceProps.multiProcessorCount = info.maxComputeUnits_; - deviceProps.l2CacheSize = info.l2CacheSize_; - deviceProps.maxThreadsPerMultiProcessor = info.simdPerCU_; - deviceProps.computeMode = 0; - deviceProps.clockInstructionRate = info.timeStampFrequency_; - deviceProps.arch.hasGlobalInt32Atomics = 1; - deviceProps.arch.hasGlobalFloatAtomicExch = 1; - deviceProps.arch.hasSharedInt32Atomics = 1; - deviceProps.arch.hasSharedFloatAtomicExch = 1; - deviceProps.arch.hasFloatAtomicAdd = 0; - deviceProps.arch.hasGlobalInt64Atomics = 1; - deviceProps.arch.hasSharedInt64Atomics = 1; - deviceProps.arch.hasDoubles = 1; - deviceProps.arch.hasWarpVote = 0; - deviceProps.arch.hasWarpBallot = 0; - deviceProps.arch.hasWarpShuffle = 0; - deviceProps.arch.hasFunnelShift = 0; - deviceProps.arch.hasThreadFenceSystem = 1; - deviceProps.arch.hasSyncThreadsExt = 0; - deviceProps.arch.hasSurfaceFuncs = 0; - deviceProps.arch.has3dGrid = 1; - deviceProps.arch.hasDynamicParallelism = 0; - deviceProps.concurrentKernels = 1; - deviceProps.pciDomainID = info.deviceTopology_.pcie.function; - deviceProps.pciBusID = info.deviceTopology_.pcie.bus; - deviceProps.pciDeviceID = info.deviceTopology_.pcie.device; - deviceProps.maxSharedMemoryPerMultiProcessor = info.localMemSizePerCU_; - //deviceProps.isMultiGpuBoard = info.; - deviceProps.canMapHostMemory = 1; - deviceProps.gcnArch = info.gfxipVersion_; - - *props = deviceProps; - return hipSuccess; -} - hipError_t hipIpcCloseMemHandle ( void* devPtr ) { HIP_INIT_API(devPtr); diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index b1d906d870..159c7671ec 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -38,4 +38,6 @@ THE SOFTWARE. extern amd::Context* g_context; +hipError_t hipDeviceGetCount(int* count); + #endif // HIP_SRC_HIP_INTERNAL_H From 96c3777e3084041cc9afb61bcd6f1a0a5a8e8f6b Mon Sep 17 00:00:00 2001 From: foreman Date: Mon, 19 Mar 2018 13:52:17 -0400 Subject: [PATCH 010/282] P4 to Git Change 1528961 by cpaquot@cpaquot-ocl-lc-lnx on 2018/03/19 13:43:08 SWDEV-145570 - Contexts Create one amd::Context per device g_context is now thread's current context HIP doesn't want more than one context per device so we always use the primary one Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_context.cpp#3 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device.cpp#7 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#4 edit --- api/hip/hip_context.cpp | 45 +++++++++++++++++++++++++++++++++------- api/hip/hip_device.cpp | 18 ++++++++-------- api/hip/hip_internal.hpp | 3 ++- 3 files changed, 48 insertions(+), 18 deletions(-) diff --git a/api/hip/hip_context.cpp b/api/hip/hip_context.cpp index 78e65e99c5..2a67898bf3 100644 --- a/api/hip/hip_context.cpp +++ b/api/hip/hip_context.cpp @@ -27,7 +27,8 @@ THE SOFTWARE. #include "utils/versions.hpp" -amd::Context* g_context = nullptr; +thread_local amd::Context* g_context = nullptr; +std::vector g_devices; hipError_t hipInit(unsigned int flags) { @@ -37,14 +38,18 @@ hipError_t hipInit(unsigned int flags) amd::Runtime::init(); } - // FIXME: move the global VDI context to hipInit. - g_context = new amd::Context( - amd::Device::getDevices(CL_DEVICE_TYPE_GPU, false), amd::Context::Info()); - if (!g_context) return hipErrorOutOfMemory; + const std::vector& devices = amd::Device::getDevices(CL_DEVICE_TYPE_GPU, false); - if (g_context && CL_SUCCESS != g_context->create(nullptr)) { - g_context->release(); - return hipErrorUnknown; + for (unsigned int i=0; i device(1, devices[i]); + amd::Context* context = new amd::Context(device, amd::Context::Info()); + if (!context) return hipErrorOutOfMemory; + + if (context && CL_SUCCESS != context->create(nullptr)) { + context->release(); + } else { + g_devices.push_back(context); + } } return hipSuccess; @@ -54,6 +59,30 @@ hipError_t hipCtxCreate(hipCtx_t *ctx, unsigned int flags, hipDevice_t device) { HIP_INIT_API(ctx, flags, device); + if (static_cast(device) >= g_devices.size()) { + return hipErrorInvalidValue; + } + + *ctx = reinterpret_cast(g_devices[device]); + + return hipSuccess; +} + +hipError_t hipCtxSetCurrent(hipCtx_t ctx) +{ + HIP_INIT_API(ctx); + + g_context = reinterpret_cast(ctx); + + return hipSuccess; +} + +hipError_t hipCtxGetCurrent(hipCtx_t* ctx) +{ + HIP_INIT_API(ctx); + + *ctx = reinterpret_cast(g_context); + return hipSuccess; } diff --git a/api/hip/hip_device.cpp b/api/hip/hip_device.cpp index f646933bb9..66bae6174e 100644 --- a/api/hip/hip_device.cpp +++ b/api/hip/hip_device.cpp @@ -50,7 +50,7 @@ hipError_t hipDeviceTotalMem (size_t *bytes, hipDevice_t device) { HIP_INIT_API(bytes, device); - if (device < 0 || device > (cl_int)g_context->devices().size()) { + if (device < 0 || static_cast(device) >= g_devices.size()) { return hipErrorInvalidDevice; } @@ -58,7 +58,7 @@ hipError_t hipDeviceTotalMem (size_t *bytes, hipDevice_t device) { return hipErrorInvalidValue; } - auto* deviceHandle = g_context->devices()[device]; + auto* deviceHandle = g_devices[device]->devices()[0]; const auto& info = deviceHandle->info(); *bytes = info.globalMemSize_; @@ -70,7 +70,7 @@ hipError_t hipDeviceComputeCapability(int *major, int *minor, hipDevice_t device HIP_INIT_API(major, minor, device); - if (device < 0 || device > (cl_int)g_context->devices().size()) { + if (device < 0 || static_cast(device) >= g_devices.size()) { return hipErrorInvalidDevice; } @@ -78,7 +78,7 @@ hipError_t hipDeviceComputeCapability(int *major, int *minor, hipDevice_t device return hipErrorInvalidValue; } - auto* deviceHandle = g_context->devices()[device]; + auto* deviceHandle = g_devices[device]->devices()[0]; const auto& info = deviceHandle->info(); *major = info.gfxipVersion_ / 100; *minor = info.gfxipVersion_ % 100; @@ -94,7 +94,7 @@ hipError_t hipDeviceGetCount(int* count) { } // Get all available devices - *count = g_context->devices().size(); + *count = g_devices.size(); return hipSuccess; } @@ -103,7 +103,7 @@ hipError_t hipDeviceGetName(char *name, int len, hipDevice_t device) { HIP_INIT_API((void*)name, len, device); - if (device < 0 || device > (cl_int)g_context->devices().size()) { + if (device < 0 || static_cast(device) >= g_devices.size()) { return hipErrorInvalidDevice; } @@ -111,7 +111,7 @@ hipError_t hipDeviceGetName(char *name, int len, hipDevice_t device) { return hipErrorInvalidValue; } - auto* deviceHandle = g_context->devices()[device]; + auto* deviceHandle = g_devices[device]->devices()[0]; const auto& info = deviceHandle->info(); len = ((cl_uint)len < ::strlen(info.boardName_)) ? len : 128; @@ -127,10 +127,10 @@ hipError_t hipGetDeviceProperties ( hipDeviceProp_t* props, hipDevice_t device ) return hipErrorInvalidValue; } - if (unsigned(device) >= g_context->devices().size()) { + if (unsigned(device) >= g_devices.size()) { return hipErrorInvalidDevice; } - auto* deviceHandle = g_context->devices()[device]; + auto* deviceHandle = g_devices[device]->devices()[0]; hipDeviceProp_t deviceProps = {0}; diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index 159c7671ec..ba9446f2c1 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -36,7 +36,8 @@ THE SOFTWARE. #define HIP_INIT_API(...) \ HIP_INIT() -extern amd::Context* g_context; +extern thread_local amd::Context* g_context; +extern std::vector g_devices; hipError_t hipDeviceGetCount(int* count); From 8e99c0960b63769ff49e4d28fefe09f3a6fcfd6f Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 23 Mar 2018 00:19:22 -0400 Subject: [PATCH 011/282] P4 to Git Change 1531138 by cpaquot@cpaquot-ocl-lc-lnx on 2018/03/23 00:10:40 SWDEV-145570 - [HIP] Module Check for correct device id in hipDeviceGetAttribute Implement hipModuleLoad Handle kernelParams in hipModuleLaunchKernel Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device.cpp#8 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device_runtime.cpp#3 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#5 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#3 edit --- api/hip/hip_device.cpp | 4 ++++ api/hip/hip_device_runtime.cpp | 17 +++++++------- api/hip/hip_internal.hpp | 2 +- api/hip/hip_module.cpp | 41 +++++++++++++++++++++++++++++----- 4 files changed, 49 insertions(+), 15 deletions(-) diff --git a/api/hip/hip_device.cpp b/api/hip/hip_device.cpp index 66bae6174e..efb77fb7b0 100644 --- a/api/hip/hip_device.cpp +++ b/api/hip/hip_device.cpp @@ -89,6 +89,10 @@ hipError_t hipDeviceComputeCapability(int *major, int *minor, hipDevice_t device hipError_t hipDeviceGetCount(int* count) { HIP_INIT_API(count); + return ihipDeviceGetCount(count); +} + +hipError_t ihipDeviceGetCount(int* count) { if (count == nullptr) { return hipErrorInvalidValue; } diff --git a/api/hip/hip_device_runtime.cpp b/api/hip/hip_device_runtime.cpp index bbd0838f76..4d8ac9cec0 100644 --- a/api/hip/hip_device_runtime.cpp +++ b/api/hip/hip_device_runtime.cpp @@ -35,7 +35,7 @@ hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* properties) { *device = 0; cl_uint maxMatchedCount = 0; int count = 0; - hipDeviceGetCount(&count); + ihipDeviceGetCount(&count); for (cl_int i = 0; i< count; ++i) { hipDeviceProp_t currentProp = {0}; @@ -146,10 +146,11 @@ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) return hipErrorInvalidValue; } - //if (unsigned(device) >= g_context->devices().size()) { - // return hipErrorInvalidDevice; - //} - //auto* deviceHandle = g_context->devices()[device]; + int count = 0; + ihipDeviceGetCount(&count); + if (device < 0 || device >= count) { + return hipErrorInvalidDevice; + } //FIXME: should we cache the props, or just select from deviceHandle->info_? hipDeviceProp_t prop = {0}; @@ -253,7 +254,7 @@ hipError_t hipDeviceGetByPCIBusId(int* device, const char*pciBusIdstr) { if (sscanf (pciBusIdstr, "%04x:%02x:%02x", &pciDomainID, &pciBusID, &pciDeviceID) == 0x3) { int count = 0; - hipDeviceGetCount(&count); + ihipDeviceGetCount(&count); for (cl_int i = 0; i < count; i++) { int pi = 0; hipDevice_t dev; @@ -312,7 +313,7 @@ hipError_t hipDeviceGetPCIBusId ( char* pciBusId, int len, int device ) { HIP_INIT_API((void*)pciBusId, len, device); int count; - hipDeviceGetCount(&count); + ihipDeviceGetCount(&count); if (device < 0 || device > count) { return hipErrorInvalidDevice; } @@ -393,7 +394,7 @@ hipError_t hipGetDevice ( int* deviceId ) { hipError_t hipGetDeviceCount ( int* count ) { HIP_INIT_API(count); - return hipDeviceGetCount(count); + return ihipDeviceGetCount(count); } hipError_t hipGetDeviceFlags ( unsigned int* flags ) { diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index ba9446f2c1..239538e613 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -39,6 +39,6 @@ THE SOFTWARE. extern thread_local amd::Context* g_context; extern std::vector g_devices; -hipError_t hipDeviceGetCount(int* count); +hipError_t ihipDeviceGetCount(int* count); #endif // HIP_SRC_HIP_INTERNAL_H diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index fd7729c6e5..6fd0cc9ac9 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -22,10 +22,13 @@ THE SOFTWARE. #include #include +#include #include "hip_internal.hpp" #include "platform/program.hpp" +hipError_t ihipModuleLoadData(hipModule_t *module, const void *image); + static uint64_t ElfSize(const void *emi) { const Elf64_Ehdr *ehdr = (const Elf64_Ehdr*)emi; @@ -51,9 +54,19 @@ hipError_t hipModuleLoad(hipModule_t *module, const char *fname) { HIP_INIT_API(module, fname); - assert(0 && "Unimplemented"); + if (!fname) { + return hipErrorInvalidValue; + } - return hipErrorUnknown; + std::ifstream file{fname}; + + if (!file.is_open()) { + return hipErrorFileNotFound; + } + + std::vector tmp{std::istreambuf_iterator{file}, std::istreambuf_iterator{}}; + + return ihipModuleLoadData(module, tmp.data()); } @@ -61,15 +74,26 @@ hipError_t hipModuleUnload(hipModule_t hmod) { HIP_INIT_API(hmod); - assert(0 && "Unimplemented"); + if (hmod == nullptr) { + return hipErrorUnknown; + } - return hipErrorUnknown; + amd::Program* program = as_amd(reinterpret_cast(hmod)); + + program->release(); + + return hipSuccess; } hipError_t hipModuleLoadData(hipModule_t *module, const void *image) { HIP_INIT_API(module, image); + return ihipModuleLoadData(module, image); +} + +hipError_t ihipModuleLoadData(hipModule_t *module, const void *image) +{ amd::Program* program = new amd::Program(*g_context); if (program == NULL) { return hipErrorOutOfMemory; @@ -133,11 +157,16 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, amd::NDRangeContainer ndrange(3, globalWorkOffset, globalWorkSize, localWorkSize); amd::Command::EventWaitList waitList; - assert(!kernelParams && extra && "check this code"); const amd::KernelSignature& signature = kernel->signature(); for (size_t i = 0; i < signature.numParameters(); ++i) { const amd::KernelParameterDescriptor& desc = signature.at(i); - kernel->parameters().set(i, desc.size_, reinterpret_cast
(extra[1]) + desc.offset_); + if (kernelParams == nullptr) { + assert(extra); + kernel->parameters().set(i, desc.size_, reinterpret_cast
(extra[1]) + desc.offset_); + } else { + assert(!extra); + kernel->parameters().set(i, desc.size_, kernelParams[i]); + } } amd::NDRangeKernelCommand* command = new amd::NDRangeKernelCommand(*queue, waitList, *kernel, ndrange); From a2b71e69ef4cb2fdbac4ddb9ebc24816a4c0bd54 Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 23 Mar 2018 14:18:27 -0400 Subject: [PATCH 012/282] P4 to Git Change 1531535 by skudchad@skudchad_rocm on 2018/03/23 13:57:49 SWDEV-145570 - [HIP] Add some context* functions. Add context stack. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_context.cpp#4 edit --- api/hip/hip_context.cpp | 80 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 77 insertions(+), 3 deletions(-) diff --git a/api/hip/hip_context.cpp b/api/hip/hip_context.cpp index 2a67898bf3..983dc9b13c 100644 --- a/api/hip/hip_context.cpp +++ b/api/hip/hip_context.cpp @@ -21,13 +21,14 @@ THE SOFTWARE. */ #include - #include "hip_internal.hpp" #include "platform/runtime.hpp" #include "utils/versions.hpp" - +#include thread_local amd::Context* g_context = nullptr; +thread_local std::stack g_ctxtStack; + std::vector g_devices; hipError_t hipInit(unsigned int flags) @@ -65,6 +66,9 @@ hipError_t hipCtxCreate(hipCtx_t *ctx, unsigned int flags, hipDevice_t device) *ctx = reinterpret_cast(g_devices[device]); + // Increment ref count for device primary context + g_devices[device]->retain(); + return hipSuccess; } @@ -72,7 +76,17 @@ hipError_t hipCtxSetCurrent(hipCtx_t ctx) { HIP_INIT_API(ctx); - g_context = reinterpret_cast(ctx); + if (ctx == nullptr) { + if(!g_ctxtStack.empty()) { + g_ctxtStack.pop(); + } + } else { + g_context = reinterpret_cast(as_amd(ctx)); + if(!g_ctxtStack.empty()) { + g_ctxtStack.pop(); + } + g_ctxtStack.push(g_context); + } return hipSuccess; } @@ -98,3 +112,63 @@ hipError_t hipRuntimeGetVersion(int *runtimeVersion) return hipSuccess; } + +hipError_t hipCtxDestroy(hipCtx_t ctx) +{ + HIP_INIT_API(ctx); + + amd::Context* amdContext = reinterpret_cast(as_amd(ctx)); + if (amdContext == nullptr) { + return hipErrorInvalidValue; + } + + // Need to remove the ctx of calling thread if its the top one + if (g_context == amdContext) { + g_ctxtStack.pop(); + } + + // Remove context from global context list + for (unsigned int i = 0; i < g_devices.size(); i++) { + if (g_devices[i] == amdContext) { + // Decrement ref count for device primary context + amdContext->release(); + } + } + + return hipSuccess; +} + + +hipError_t hipCtxPopCurrent(hipCtx_t* ctx) +{ + HIP_INIT_API(ctx); + + amd::Context* amdContext = reinterpret_cast(as_amd(ctx)); + if (amdContext == nullptr) { + return hipErrorInvalidContext; + } + + if (!g_ctxtStack.empty()) { + amdContext = g_ctxtStack.top(); + g_ctxtStack.pop(); + } else { + return hipErrorInvalidContext; + } + + return hipSuccess; +} + +hipError_t hipCtxPushCurrent(hipCtx_t ctx) +{ + HIP_INIT_API(ctx); + + amd::Context* amdContext = reinterpret_cast(as_amd(ctx)); + if (amdContext == nullptr) { + return hipErrorInvalidContext; + } + + g_context = amdContext; + g_ctxtStack.push(g_context); + + return hipSuccess; +} \ No newline at end of file From 01c8c585f4d4a94f06bee402e5aeac6ebad3d120 Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 28 Mar 2018 19:23:57 -0400 Subject: [PATCH 013/282] P4 to Git Change 1534050 by lmoriche@lmoriche_opencl_dev2 on 2018/03/28 19:09:26 SWDEV-145570 - Add support for clang offload bundles Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_context.cpp#5 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#6 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#3 edit --- api/hip/hip_context.cpp | 18 +++++-- api/hip/hip_internal.hpp | 20 ++++--- api/hip/hip_platform.cpp | 111 +++++++++++++++++++++++++++++++-------- 3 files changed, 116 insertions(+), 33 deletions(-) diff --git a/api/hip/hip_context.cpp b/api/hip/hip_context.cpp index 983dc9b13c..9603b938b6 100644 --- a/api/hip/hip_context.cpp +++ b/api/hip/hip_context.cpp @@ -25,16 +25,16 @@ THE SOFTWARE. #include "platform/runtime.hpp" #include "utils/versions.hpp" #include +#include thread_local amd::Context* g_context = nullptr; thread_local std::stack g_ctxtStack; std::vector g_devices; +std::once_flag g_ihipInitialized; -hipError_t hipInit(unsigned int flags) +void ihipInit() { - HIP_INIT_API(flags); - if (!amd::Runtime::initialized()) { amd::Runtime::init(); } @@ -44,18 +44,26 @@ hipError_t hipInit(unsigned int flags) for (unsigned int i=0; i device(1, devices[i]); amd::Context* context = new amd::Context(device, amd::Context::Info()); - if (!context) return hipErrorOutOfMemory; + if (!context) return; if (context && CL_SUCCESS != context->create(nullptr)) { context->release(); } else { g_devices.push_back(context); + g_context = context; } } +} + + +hipError_t hipInit(unsigned int flags) +{ + HIP_INIT_API(flags); return hipSuccess; } + hipError_t hipCtxCreate(hipCtx_t *ctx, unsigned int flags, hipDevice_t device) { HIP_INIT_API(ctx, flags, device); @@ -171,4 +179,4 @@ hipError_t hipCtxPushCurrent(hipCtx_t ctx) g_ctxtStack.push(g_context); return hipSuccess; -} \ No newline at end of file +} diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index 239538e613..3d334fa2ac 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -25,20 +25,26 @@ THE SOFTWARE. #include "cl_common.hpp" -#define HIP_INIT()\ +#include + +#define HIP_INIT() \ + std::call_once(g_ihipInitialized, ihipInit); + + +// This macro should be called at the beginning of every HIP API. +#define HIP_INIT_API(...) \ + HIP_INIT(); \ + \ amd::Thread* thread = amd::Thread::current(); \ if (!CL_CHECK_THREAD(thread)) { \ return hipErrorOutOfMemory; \ } - -// This macro should be called at the beginning of every HIP API. -#define HIP_INIT_API(...) \ - HIP_INIT() - +extern std::once_flag g_ihipInitialized; extern thread_local amd::Context* g_context; extern std::vector g_devices; -hipError_t ihipDeviceGetCount(int* count); +extern hipError_t ihipDeviceGetCount(int* count); +extern void ihipInit(); #endif // HIP_SRC_HIP_INTERNAL_H diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index 0cc6a3b1c2..db7939c9e7 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -44,7 +44,7 @@ struct __CudaFatBinaryHeader { unsigned long long int fatSize; }; -struct __CudaPartHeader{ +struct __CudaPartHeader { unsigned short type; unsigned short dummy1; unsigned int headerSize; @@ -54,31 +54,20 @@ struct __CudaPartHeader{ unsigned int subarch; }; -extern "C" hipModule_t __hipRegisterFatBinary(void* bundle) +static hipModule_t registerCudaFatBinary(const __CudaFatBinaryHeader* fbheader) { - if (!amd::Runtime::initialized()) { // FIXME: fix initialization - hipInit(0); - } + const __CudaPartHeader* pheader = reinterpret_cast( + reinterpret_cast(fbheader) + fbheader->headerSize); + const __CudaPartHeader* end = reinterpret_cast( + reinterpret_cast(pheader) + fbheader->fatSize); amd::Program* program = new amd::Program(*g_context); if (!program) return nullptr; - struct __CudaFatBinaryWrapper* fbwrapper = (struct __CudaFatBinaryWrapper*)bundle; - if (fbwrapper->magic != __cudaFatMAGIC2 || fbwrapper->version != 1) { - return nullptr; - } - struct __CudaFatBinaryHeader* fbheader = (struct __CudaFatBinaryHeader*)fbwrapper->binary; - if (fbheader->magic != __cudaFatMAGIC3 || fbheader->version != 1) { - return nullptr; - } - struct __CudaPartHeader* pheader = (struct __CudaPartHeader*)( - (uintptr_t)fbheader + fbheader->headerSize); - struct __CudaPartHeader* end = (struct __CudaPartHeader*)( - (uintptr_t)pheader + fbheader->fatSize); - while (pheader < end) { if (true/*pheader->subarch == match a device in the context*/) { - void *image = (void*)((uintptr_t)pheader + pheader->headerSize); + const void *image = reinterpret_cast( + reinterpret_cast(pheader) + pheader->headerSize); size_t size = pheader->partSize; if (CL_SUCCESS != program->addDeviceProgram(*g_context->devices()[0], image, size) || CL_SUCCESS != program->build(g_context->devices(), nullptr, nullptr, nullptr)) { @@ -86,13 +75,83 @@ extern "C" hipModule_t __hipRegisterFatBinary(void* bundle) } break; } - pheader = (struct __CudaPartHeader*)( - (uintptr_t)pheader + pheader->headerSize + pheader->partSize); + pheader = reinterpret_cast( + reinterpret_cast(pheader) + pheader->headerSize + pheader->partSize); } return reinterpret_cast(as_cl(program)); } +#define CLANG_OFFLOAD_BUNDLER_MAGIC_STR "__CLANG_OFFLOAD_BUNDLE__" +#define AMDGCN_AMDHSA_TRIPLE "openmp-amdgcn--amdhsa" + +struct __ClangOffloadBundleDesc { + uint64_t offset; + uint64_t size; + uint64_t tripleSize; + const char triple[1]; +}; + +struct __ClangOffloadBundleHeader { + const char magic[sizeof(CLANG_OFFLOAD_BUNDLER_MAGIC_STR) - 1]; + uint64_t numBundles; + __ClangOffloadBundleDesc desc[1]; +}; + +static hipModule_t registerOffloadBundle(const __ClangOffloadBundleHeader* obheader) +{ + amd::Program* program = new amd::Program(*g_context); + if (!program) + return nullptr; + + const __ClangOffloadBundleDesc* desc = &obheader->desc[0]; + for (uint64_t i = 0; i < obheader->numBundles; ++i, + desc = reinterpret_cast( + reinterpret_cast(&desc->triple[0]) + desc->tripleSize)) { + + std::string triple(desc->triple, sizeof(AMDGCN_AMDHSA_TRIPLE) - 1); + if (triple.compare(AMDGCN_AMDHSA_TRIPLE)) + continue; + + std::string target(desc->triple + sizeof(AMDGCN_AMDHSA_TRIPLE), + desc->tripleSize - sizeof(AMDGCN_AMDHSA_TRIPLE)); + if (target.compare(g_context->devices()[0]->info().name_)) + continue; + + const void *image = reinterpret_cast( + reinterpret_cast(obheader) + desc->offset); + size_t size = desc->size; + + if (CL_SUCCESS == program->addDeviceProgram(*g_context->devices()[0], image, size) && + CL_SUCCESS == program->build(g_context->devices(), nullptr, nullptr, nullptr)) + break; + } + + return reinterpret_cast(as_cl(program)); +} + + +extern "C" hipModule_t __hipRegisterFatBinary(const void* data) +{ + HIP_INIT(); + + const __CudaFatBinaryWrapper* fbwrapper = reinterpret_cast(data); + if (fbwrapper->magic != __cudaFatMAGIC2 || fbwrapper->version != 1) { + return nullptr; + } + const __CudaFatBinaryHeader* fbheader = reinterpret_cast(fbwrapper->binary); + if (fbheader->magic == __cudaFatMAGIC3 && fbheader->version == 1) { + return registerCudaFatBinary(fbheader); + } + + std::string magic((char*)fbwrapper->binary, sizeof(CLANG_OFFLOAD_BUNDLER_MAGIC_STR) - 1); + if (!magic.compare(CLANG_OFFLOAD_BUNDLER_MAGIC_STR)) { + return registerOffloadBundle(reinterpret_cast(fbwrapper->binary)); + } + + return nullptr; +} + std::map g_functions; @@ -108,6 +167,8 @@ extern "C" void __hipRegisterFunction( dim3* gridDim, int* wSize) { + HIP_INIT(); + amd::Program* program = as_amd(reinterpret_cast(module)); const amd::Symbol* symbol = program->findSymbol(deviceName); @@ -130,12 +191,14 @@ extern "C" void __hipRegisterVar( int constant, int global) { + HIP_INIT(); } extern "C" void __hipUnregisterFatBinary( hipModule_t module ) { + HIP_INIT(); } dim3 g_gridDim; // FIXME: place in execution stack @@ -149,6 +212,8 @@ extern "C" hipError_t hipConfigureCall( size_t sharedMem, hipStream_t stream) { + HIP_INIT_API(gridDim, blockDim, sharedMem, stream); + // FIXME: should push and new entry on the execution stack g_gridDim = gridDim; @@ -166,6 +231,8 @@ extern "C" hipError_t hipSetupArgument( size_t size, size_t offset) { + HIP_INIT_API(arg, size, offset); + // FIXME: should modify the top of the execution stack ::memcpy(g_arguments + offset, arg, size); @@ -174,6 +241,8 @@ extern "C" hipError_t hipSetupArgument( extern "C" hipError_t hipLaunchByPtr(const void *hostFunction) { + HIP_INIT_API(hostFunction); + std::map::iterator it; if ((it = g_functions.find(hostFunction)) == g_functions.end()) return hipErrorUnknown; From dd4b3806610eab9a7b3921aede99253eb4801b62 Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 30 Mar 2018 01:06:00 -0400 Subject: [PATCH 014/282] P4 to Git Change 1534798 by cpaquot@cpaquot-ocl-lc-lnx on 2018/03/30 00:56:35 SWDEV-145570 - [HIP] Implemented hipStream create/destroy Use the provided stream in hipModuleLaunchKernel Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#4 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_stream.cpp#2 edit --- api/hip/hip_module.cpp | 7 ++----- api/hip/hip_stream.cpp | 40 ++++++++++++++++++++++++++++++---------- 2 files changed, 32 insertions(+), 15 deletions(-) diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index 6fd0cc9ac9..0bcae3551b 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -144,9 +144,8 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, amd::Kernel* kernel = as_amd(reinterpret_cast(f)); amd::Device* device = g_context->devices()[0]; - amd::HostQueue* queue = new amd::HostQueue(*g_context, *device, 0, - amd::CommandQueue::RealTimeDisabled, - amd::CommandQueue::Priority::Normal); + amd::HostQueue* queue = as_amd(reinterpret_cast(hStream))->asHostQueue(); + if (!queue) { return hipErrorOutOfMemory; } @@ -184,8 +183,6 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, command->awaitCompletion(); command->release(); - queue->release(); - return hipSuccess; } diff --git a/api/hip/hip_stream.cpp b/api/hip/hip_stream.cpp index efecb5174d..46014ebfef 100644 --- a/api/hip/hip_stream.cpp +++ b/api/hip/hip_stream.cpp @@ -24,13 +24,30 @@ THE SOFTWARE. #include "hip_internal.hpp" +static hipError_t ihipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags) +{ + assert(flags == 0); // we don't handle flags yet + + amd::Device* device = g_context->devices()[0]; + + amd::HostQueue* queue = new amd::HostQueue(*g_context, *device, 0, + amd::CommandQueue::RealTimeDisabled, + amd::CommandQueue::Priority::Normal); + + if (queue == nullptr) { + return hipErrorOutOfMemory; + } + + *stream = reinterpret_cast(as_cl(queue)); + + return hipSuccess; +} + hipError_t hipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags) { HIP_INIT_API(stream, flags); - assert(0 && "Unimplemented"); - - return hipErrorUnknown; + return ihipStreamCreateWithFlags(stream, flags); } @@ -38,9 +55,7 @@ hipError_t hipStreamCreate(hipStream_t *stream) { HIP_INIT_API(stream); - assert(0 && "Unimplemented"); - - return hipErrorUnknown; + return ihipStreamCreateWithFlags(stream, hipStreamDefault); } @@ -58,9 +73,14 @@ hipError_t hipStreamSynchronize(hipStream_t stream) { HIP_INIT_API(stream); - assert(0 && "Unimplemented"); + amd::HostQueue* hostQueue = as_amd(reinterpret_cast(stream))->asHostQueue(); + if (hostQueue == nullptr) { + return hipErrorUnknown; + } - return hipErrorUnknown; + hostQueue->finish(); + + return hipSuccess; } @@ -68,9 +88,9 @@ hipError_t hipStreamDestroy(hipStream_t stream) { HIP_INIT_API(stream); - assert(0 && "Unimplemented"); + as_amd(reinterpret_cast(stream))->release(); - return hipErrorUnknown; + return hipSuccess; } From 4574e017404668dc524fcd7477127d9c8ecec85f Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 4 Apr 2018 13:24:15 -0400 Subject: [PATCH 015/282] P4 to Git Change 1536698 by skudchad@skudchad_test2_win_opencl on 2018/04/04 13:18:19 SWDEV-145570 - [HIP] - Add HIP Memory api skeletons ReviewBoardURL = http://ocltc.amd.com/reviews/r/14555/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device_runtime.cpp#4 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#6 edit --- api/hip/hip_device_runtime.cpp | 24 --- api/hip/hip_memory.cpp | 371 +++++++++++++++++++++++++++++++-- 2 files changed, 357 insertions(+), 38 deletions(-) diff --git a/api/hip/hip_device_runtime.cpp b/api/hip/hip_device_runtime.cpp index 4d8ac9cec0..7c5c063ea6 100644 --- a/api/hip/hip_device_runtime.cpp +++ b/api/hip/hip_device_runtime.cpp @@ -401,14 +401,6 @@ hipError_t hipGetDeviceFlags ( unsigned int* flags ) { return hipErrorUnknown; } -hipError_t hipIpcCloseMemHandle ( void* devPtr ) { - HIP_INIT_API(devPtr); - - assert(0 && "Unimplemented"); - - return hipErrorUnknown; -} - hipError_t hipIpcGetEventHandle ( hipIpcEventHandle_t* handle, hipEvent_t event ) { HIP_INIT_API(handle, event); @@ -417,14 +409,6 @@ hipError_t hipIpcGetEventHandle ( hipIpcEventHandle_t* handle, hipEvent_t event return hipErrorUnknown; } -hipError_t hipIpcGetMemHandle ( hipIpcMemHandle_t* handle, void* devPtr ) { - HIP_INIT_API(handle, devPtr); - - assert(0 && "Unimplemented"); - - return hipErrorUnknown; -} - hipError_t hipIpcOpenEventHandle ( hipEvent_t* event, hipIpcEventHandle_t handle ) { HIP_INIT_API(event, handle); @@ -433,14 +417,6 @@ hipError_t hipIpcOpenEventHandle ( hipEvent_t* event, hipIpcEventHandle_t handle return hipErrorUnknown; } -hipError_t hipIpcOpenMemHandle ( void** devPtr, hipIpcMemHandle_t handle, unsigned int flags ) { - HIP_INIT_API(devPtr, handle, flags); - - assert(0 && "Unimplemented"); - - return hipErrorUnknown; -} - hipError_t hipSetDevice ( int device ) { HIP_INIT_API(device); diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index ddebc8623c..1fc000bf1d 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -91,20 +91,6 @@ hipError_t hipFree(void* ptr) return hipSuccess; } -hipError_t hipMemcpyAsync(void* dst, - const void* src, - size_t sizeBytes, - hipMemcpyKind kind, - hipStream_t stream) -{ - HIP_INIT_API(dst, src, sizeBytes, kind, stream); - - assert(0 && "Unimplemented"); - - return hipErrorUnknown; -} - - hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind) { HIP_INIT_API(dst, src, sizeBytes, kind); @@ -182,3 +168,360 @@ hipError_t hipMemPtrGetInfo(void *ptr, size_t *size) return hipErrorUnknown; } + +hipError_t hipHostFree(void* ptr) +{ + HIP_INIT_API(ptr); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipFreeArray(hipArray* array) +{ + HIP_INIT_API(array); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipMemGetAddressRange(hipDeviceptr_t* pbase, size_t* psize, hipDeviceptr_t dptr) +{ + HIP_INIT_API(pbase, psize, dptr); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipMemGetInfo(size_t* free, size_t* total) +{ + HIP_INIT_API(free, total); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height) +{ + HIP_INIT_API(ptr, pitch, width, height); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent) +{ + HIP_INIT_API(pitchedDevPtr, &extent); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipArrayCreate(hipArray** array, const HIP_ARRAY_DESCRIPTOR* pAllocateArray) +{ + HIP_INIT_API(array, pAllocateArray); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc, + size_t width, size_t height, unsigned int flags) +{ + HIP_INIT_API(array, desc, width, height, flags); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipMalloc3DArray(hipArray_t* array, const struct hipChannelFormatDesc* desc, + struct hipExtent extent, unsigned int flags) +{ + HIP_INIT_API(array, desc, &extent, flags); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) +{ + HIP_INIT_API(flagsPtr, hostPtr); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipHostRegister(void* hostPtr, size_t sizeBytes, unsigned int flags) +{ + HIP_INIT_API(hostPtr, sizeBytes, flags); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipHostUnregister(void* hostPtr) +{ + HIP_INIT_API(hostPtr); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipMemcpyToSymbol(const void* symbolName, const void* src, size_t count, + size_t offset, hipMemcpyKind kind) +{ + HIP_INIT_API(symbolName, src, count, offset, kind); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipMemcpyFromSymbol(void* dst, const void* symbolName, size_t count, + size_t offset, hipMemcpyKind kind) +{ + HIP_INIT_API(symbolName, dst, count, offset, kind); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipMemcpyToSymbolAsync(const void* symbolName, const void* src, size_t count, + size_t offset, hipMemcpyKind kind, hipStream_t stream) +{ + HIP_INIT_API(symbolName, src, count, offset, kind, stream); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolName, size_t count, + size_t offset, hipMemcpyKind kind, hipStream_t stream) +{ + HIP_INIT_API(symbolName, dst, count, offset, kind, stream); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void* src, size_t sizeBytes) +{ + HIP_INIT_API(dst, src, sizeBytes); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipMemcpyDtoH(void* dst, hipDeviceptr_t src, size_t sizeBytes) +{ + HIP_INIT_API(dst, src, sizeBytes); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes) +{ + HIP_INIT_API(dst, src, sizeBytes); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipMemcpyHtoH(void* dst, void* src, size_t sizeBytes) +{ + HIP_INIT_API(dst, src, sizeBytes); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, + hipMemcpyKind kind, hipStream_t stream) +{ + HIP_INIT_API(dst, src, sizeBytes, kind, stream); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + + +hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, void* src, size_t sizeBytes, + hipStream_t stream) +{ + HIP_INIT_API(dst, src, sizeBytes, stream); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes, + hipStream_t stream) +{ + HIP_INIT_API(dst, src, sizeBytes, stream); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipMemcpyDtoHAsync(void* dst, hipDeviceptr_t src, size_t sizeBytes, + hipStream_t stream) +{ + HIP_INIT_API(dst, src, sizeBytes, stream); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, + size_t height, hipMemcpyKind kind) +{ + HIP_INIT_API(dst, dpitch, src, spitch, width, height, kind); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy) +{ + HIP_INIT_API(pCopy); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, + size_t height, hipMemcpyKind kind, hipStream_t stream) +{ + HIP_INIT_API(dst, dpitch, src, spitch, width, height, kind, stream); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src, + size_t spitch, size_t width, size_t height, hipMemcpyKind kind) +{ + HIP_INIT_API(dst, wOffset, hOffset, src, spitch, width, height, kind); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipMemcpyToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src, + size_t count, hipMemcpyKind kind) +{ + HIP_INIT_API(dst, wOffset, hOffset, src, count, kind); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t srcArray, size_t wOffset, size_t hOffset, + size_t count, hipMemcpyKind kind) +{ + HIP_INIT_API(dst, srcArray, wOffset, hOffset, count, kind); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipMemcpyHtoA(hipArray* dstArray, size_t dstOffset, const void* srcHost, size_t count) +{ + HIP_INIT_API(dstArray, dstOffset, srcHost, count); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipMemcpyAtoH(void* dst, hipArray* srcArray, size_t srcOffset, size_t count) +{ + HIP_INIT_API(dst, srcArray, srcOffset, count); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) +{ + HIP_INIT_API(p); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height) +{ + HIP_INIT_API(dst, pitch, value, width, height); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipMemsetD8(hipDeviceptr_t dst, unsigned char value, size_t sizeBytes) +{ + HIP_INIT_API(dst, value, sizeBytes); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* devPtr) +{ + HIP_INIT_API(handle, devPtr); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipIpcOpenMemHandle(void** devPtr, hipIpcMemHandle_t handle, unsigned int flags) +{ + HIP_INIT_API(devPtr, &handle, flags); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipIpcCloseMemHandle(void* devPtr) { + HIP_INIT_API(devPtr); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} \ No newline at end of file From 15f11c983f38805efe7414e1e18c83b6337d453f Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 4 Apr 2018 18:00:17 -0400 Subject: [PATCH 016/282] P4 to Git Change 1536925 by vsytchen@vsytchen-ocl-win10 on 2018/04/04 17:20:38 SWDEV-79445 - OCL generic changes and code clean-up 1. This change replaces the use of std::map with std::unordered_map to improve lookup/insert time. 2. Replace the use of std::make_pair and std::pair constructor with uniform initialization for cleaner code. 3. Replace the use of std::Container::iterator type with the auto keyword for cleaner code. 4. Use range based for loops where needed. ReviewBoardURL = http://ocltc.amd.com/reviews/r/14517/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#4 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_context.cpp#58 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d10.cpp#16 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d10_amd.hpp#9 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d11.cpp#24 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d11_amd.hpp#13 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d9.cpp#34 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d9_amd.hpp#17 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_gl.cpp#57 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_pipe.cpp#7 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_program.cpp#46 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_svm.cpp#23 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/appprofile.hpp#14 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpuprogram.cpp#72 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpuvirtual.cpp#27 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#216 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#297 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuappprofile.cpp#13 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpubinary.cpp#59 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpucompiler.cpp#158 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#587 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.cpp#322 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprintf.cpp#46 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.cpp#237 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.hpp#70 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuresource.cpp#242 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#415 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.hpp#143 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palappprofile.cpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palcompiler.cpp#22 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#79 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprintf.cpp#9 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.cpp#59 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.cpp#60 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#84 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.hpp#46 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/CMakeLists.txt#11 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/pro/prodevice.cpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/pro/prodevice.hpp#5 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocbinary.hpp#6 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/roccompiler.cpp#42 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/roccounters.cpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprintf.cpp#10 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.cpp#81 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/command.cpp#81 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/command.hpp#89 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/commandqueue.cpp#24 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/context.cpp#49 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/context.hpp#29 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/memory.cpp#129 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/memory.hpp#102 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/perfctr.hpp#7 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/program.cpp#91 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/program.hpp#43 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/sampler.hpp#9 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.cpp#17 edit --- api/hip/hip_platform.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index db7939c9e7..f1c33dabde 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -243,8 +243,8 @@ extern "C" hipError_t hipLaunchByPtr(const void *hostFunction) { HIP_INIT_API(hostFunction); - std::map::iterator it; - if ((it = g_functions.find(hostFunction)) == g_functions.end()) + const auto it = g_functions.find(hostFunction); + if (it == g_functions.cend()) return hipErrorUnknown; // FIXME: should pop an entry from the execution stack From 204ecba4b686b7491f060d2444beef18c6c84478 Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 5 Apr 2018 15:02:43 -0400 Subject: [PATCH 017/282] P4 to Git Change 1537228 by skudchad@skudchad_test2_win_opencl on 2018/04/05 14:53:31 SWDEV-145570 - [HIP] - Add HIP Memory, texture, surface, context api skeletons ReviewBoardURL = http://ocltc.amd.com/reviews/r/14565/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_context.cpp#6 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device.cpp#9 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_event.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#7 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_stream.cpp#3 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_surface.cpp#1 add ... //depot/stg/opencl/drivers/opencl/api/hip/hip_texture.cpp#1 add --- api/hip/hip_context.cpp | 126 ++++++++++++++++++++----- api/hip/hip_device.cpp | 3 +- api/hip/hip_event.cpp | 36 ++++--- api/hip/hip_memory.cpp | 172 +++++++++++++++++----------------- api/hip/hip_stream.cpp | 44 ++++++--- api/hip/hip_surface.cpp | 49 ++++++++++ api/hip/hip_texture.cpp | 202 ++++++++++++++++++++++++++++++++++++++++ 7 files changed, 496 insertions(+), 136 deletions(-) create mode 100644 api/hip/hip_surface.cpp create mode 100644 api/hip/hip_texture.cpp diff --git a/api/hip/hip_context.cpp b/api/hip/hip_context.cpp index 9603b938b6..9bfe4ad45d 100644 --- a/api/hip/hip_context.cpp +++ b/api/hip/hip_context.cpp @@ -33,8 +33,7 @@ thread_local std::stack g_ctxtStack; std::vector g_devices; std::once_flag g_ihipInitialized; -void ihipInit() -{ +void ihipInit() { if (!amd::Runtime::initialized()) { amd::Runtime::init(); } @@ -55,17 +54,13 @@ void ihipInit() } } - -hipError_t hipInit(unsigned int flags) -{ +hipError_t hipInit(unsigned int flags) { HIP_INIT_API(flags); return hipSuccess; } - -hipError_t hipCtxCreate(hipCtx_t *ctx, unsigned int flags, hipDevice_t device) -{ +hipError_t hipCtxCreate(hipCtx_t *ctx, unsigned int flags, hipDevice_t device) { HIP_INIT_API(ctx, flags, device); if (static_cast(device) >= g_devices.size()) { @@ -80,8 +75,7 @@ hipError_t hipCtxCreate(hipCtx_t *ctx, unsigned int flags, hipDevice_t device) return hipSuccess; } -hipError_t hipCtxSetCurrent(hipCtx_t ctx) -{ +hipError_t hipCtxSetCurrent(hipCtx_t ctx) { HIP_INIT_API(ctx); if (ctx == nullptr) { @@ -99,8 +93,7 @@ hipError_t hipCtxSetCurrent(hipCtx_t ctx) return hipSuccess; } -hipError_t hipCtxGetCurrent(hipCtx_t* ctx) -{ +hipError_t hipCtxGetCurrent(hipCtx_t* ctx) { HIP_INIT_API(ctx); *ctx = reinterpret_cast(g_context); @@ -108,8 +101,7 @@ hipError_t hipCtxGetCurrent(hipCtx_t* ctx) return hipSuccess; } -hipError_t hipRuntimeGetVersion(int *runtimeVersion) -{ +hipError_t hipRuntimeGetVersion(int *runtimeVersion) { HIP_INIT_API(runtimeVersion); if (!runtimeVersion) { @@ -121,8 +113,7 @@ hipError_t hipRuntimeGetVersion(int *runtimeVersion) return hipSuccess; } -hipError_t hipCtxDestroy(hipCtx_t ctx) -{ +hipError_t hipCtxDestroy(hipCtx_t ctx) { HIP_INIT_API(ctx); amd::Context* amdContext = reinterpret_cast(as_amd(ctx)); @@ -146,9 +137,7 @@ hipError_t hipCtxDestroy(hipCtx_t ctx) return hipSuccess; } - -hipError_t hipCtxPopCurrent(hipCtx_t* ctx) -{ +hipError_t hipCtxPopCurrent(hipCtx_t* ctx) { HIP_INIT_API(ctx); amd::Context* amdContext = reinterpret_cast(as_amd(ctx)); @@ -166,8 +155,7 @@ hipError_t hipCtxPopCurrent(hipCtx_t* ctx) return hipSuccess; } -hipError_t hipCtxPushCurrent(hipCtx_t ctx) -{ +hipError_t hipCtxPushCurrent(hipCtx_t ctx) { HIP_INIT_API(ctx); amd::Context* amdContext = reinterpret_cast(as_amd(ctx)); @@ -180,3 +168,99 @@ hipError_t hipCtxPushCurrent(hipCtx_t ctx) return hipSuccess; } + +hipError_t hipCtxGetDevice(hipDevice_t* device) { + HIP_INIT_API(device); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipCtxGetApiVersion(hipCtx_t ctx, int* apiVersion) { + HIP_INIT_API(apiVersion); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipCtxGetCacheConfig(hipFuncCache_t* cacheConfig) { + HIP_INIT_API(cacheConfig); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipCtxSetCacheConfig(hipFuncCache_t cacheConfig) { + HIP_INIT_API(cacheConfig); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipCtxSetSharedMemConfig(hipSharedMemConfig config) { + HIP_INIT_API(config); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipCtxSynchronize(void) { + HIP_INIT_API(1); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipCtxGetFlags(unsigned int* flags) { + HIP_INIT_API(flags); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipDevicePrimaryCtxGetState(hipDevice_t dev, unsigned int* flags, int* active) { + HIP_INIT_API(dev, flags, active); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipDevicePrimaryCtxRelease(hipDevice_t dev) { + HIP_INIT_API(dev); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipDevicePrimaryCtxRetain(hipCtx_t* pctx, hipDevice_t dev) { + HIP_INIT_API(pctx, dev); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipDevicePrimaryCtxReset(hipDevice_t dev) { + HIP_INIT_API(dev); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipDevicePrimaryCtxSetFlags(hipDevice_t dev, unsigned int flags) { + HIP_INIT_API(dev, flags); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} \ No newline at end of file diff --git a/api/hip/hip_device.cpp b/api/hip/hip_device.cpp index efb77fb7b0..d067cf19a8 100644 --- a/api/hip/hip_device.cpp +++ b/api/hip/hip_device.cpp @@ -24,8 +24,7 @@ THE SOFTWARE. #include "hip_internal.hpp" -hipError_t hipDeviceGet(hipDevice_t *device, int deviceId) -{ +hipError_t hipDeviceGet(hipDevice_t *device, int deviceId) { HIP_INIT_API(device, deviceId); if (device != nullptr) { diff --git a/api/hip/hip_event.cpp b/api/hip/hip_event.cpp index 117b28355e..1fe7be9e2f 100644 --- a/api/hip/hip_event.cpp +++ b/api/hip/hip_event.cpp @@ -24,44 +24,56 @@ THE SOFTWARE. #include "hip_internal.hpp" -hipError_t hipEventCreateWithFlags(hipEvent_t* event, unsigned flags) -{ +hipError_t hipEventCreateWithFlags(hipEvent_t* event, unsigned flags) { HIP_INIT_API(event, flags); + assert(0 && "Unimplemented"); + return hipErrorUnknown; } -hipError_t hipEventCreate(hipEvent_t* event) -{ +hipError_t hipEventCreate(hipEvent_t* event) { HIP_INIT_API(event); + assert(0 && "Unimplemented"); + return hipErrorUnknown; } -hipError_t hipEventDestroy(hipEvent_t event) -{ +hipError_t hipEventDestroy(hipEvent_t event) { HIP_INIT_API(event); + assert(0 && "Unimplemented"); + return hipErrorUnknown; } -hipError_t hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop) -{ +hipError_t hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop) { HIP_INIT_API(ms, start, stop); return hipErrorUnknown; } -hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) -{ +hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) { HIP_INIT_API(event, stream); + assert(0 && "Unimplemented"); + return hipErrorUnknown; } -hipError_t hipEventSynchronize(hipEvent_t event) -{ +hipError_t hipEventSynchronize(hipEvent_t event) { HIP_INIT_API(event); + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipEventQuery(hipEvent_t event) { + HIP_INIT_API(event); + + assert(0 && "Unimplemented"); + return hipErrorUnknown; } diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 1fc000bf1d..f2dfcf4b4a 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -24,8 +24,7 @@ THE SOFTWARE. #include "hip_internal.hpp" -hipError_t hipMalloc(void** ptr, size_t sizeBytes) -{ +hipError_t hipMalloc(void** ptr, size_t sizeBytes) { HIP_INIT_API(ptr, sizeBytes); if (sizeBytes == 0) { @@ -54,8 +53,7 @@ hipError_t hipMalloc(void** ptr, size_t sizeBytes) return hipSuccess; } -hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) -{ +hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) { HIP_INIT_API(ptr, sizeBytes, flags); if (sizeBytes == 0) { @@ -78,8 +76,7 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) return hipSuccess; } -hipError_t hipFree(void* ptr) -{ +hipError_t hipFree(void* ptr) { if (amd::SvmBuffer::malloced(ptr)) { amd::SvmBuffer::free(*g_context, ptr); return hipSuccess; @@ -91,8 +88,7 @@ hipError_t hipFree(void* ptr) return hipSuccess; } -hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind) -{ +hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind) { HIP_INIT_API(dst, src, sizeBytes, kind); amd::Device* device = g_context->devices()[0]; @@ -142,8 +138,7 @@ hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind return hipSuccess; } -hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t stream ) -{ +hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t stream) { HIP_INIT_API(dst, value, sizeBytes, stream); assert(0 && "Unimplemented"); @@ -151,8 +146,7 @@ hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t s return hipErrorUnknown; } -hipError_t hipMemset(void* dst, int value, size_t sizeBytes) -{ +hipError_t hipMemset(void* dst, int value, size_t sizeBytes) { HIP_INIT_API(dst, value, sizeBytes); assert(0 && "Unimplemented"); @@ -160,8 +154,7 @@ hipError_t hipMemset(void* dst, int value, size_t sizeBytes) return hipErrorUnknown; } -hipError_t hipMemPtrGetInfo(void *ptr, size_t *size) -{ +hipError_t hipMemPtrGetInfo(void *ptr, size_t *size) { HIP_INIT_API(ptr, size); assert(0 && "Unimplemented"); @@ -169,8 +162,7 @@ hipError_t hipMemPtrGetInfo(void *ptr, size_t *size) return hipErrorUnknown; } -hipError_t hipHostFree(void* ptr) -{ +hipError_t hipHostFree(void* ptr) { HIP_INIT_API(ptr); assert(0 && "Unimplemented"); @@ -178,8 +170,7 @@ hipError_t hipHostFree(void* ptr) return hipErrorUnknown; } -hipError_t hipFreeArray(hipArray* array) -{ +hipError_t hipFreeArray(hipArray* array) { HIP_INIT_API(array); assert(0 && "Unimplemented"); @@ -187,8 +178,7 @@ hipError_t hipFreeArray(hipArray* array) return hipErrorUnknown; } -hipError_t hipMemGetAddressRange(hipDeviceptr_t* pbase, size_t* psize, hipDeviceptr_t dptr) -{ +hipError_t hipMemGetAddressRange(hipDeviceptr_t* pbase, size_t* psize, hipDeviceptr_t dptr) { HIP_INIT_API(pbase, psize, dptr); assert(0 && "Unimplemented"); @@ -196,8 +186,7 @@ hipError_t hipMemGetAddressRange(hipDeviceptr_t* pbase, size_t* psize, hipDevice return hipErrorUnknown; } -hipError_t hipMemGetInfo(size_t* free, size_t* total) -{ +hipError_t hipMemGetInfo(size_t* free, size_t* total) { HIP_INIT_API(free, total); assert(0 && "Unimplemented"); @@ -205,8 +194,7 @@ hipError_t hipMemGetInfo(size_t* free, size_t* total) return hipErrorUnknown; } -hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height) -{ +hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height) { HIP_INIT_API(ptr, pitch, width, height); assert(0 && "Unimplemented"); @@ -214,8 +202,7 @@ hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height return hipErrorUnknown; } -hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent) -{ +hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent) { HIP_INIT_API(pitchedDevPtr, &extent); assert(0 && "Unimplemented"); @@ -223,8 +210,7 @@ hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent) return hipErrorUnknown; } -hipError_t hipArrayCreate(hipArray** array, const HIP_ARRAY_DESCRIPTOR* pAllocateArray) -{ +hipError_t hipArrayCreate(hipArray** array, const HIP_ARRAY_DESCRIPTOR* pAllocateArray) { HIP_INIT_API(array, pAllocateArray); assert(0 && "Unimplemented"); @@ -233,8 +219,7 @@ hipError_t hipArrayCreate(hipArray** array, const HIP_ARRAY_DESCRIPTOR* pAllocat } hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc, - size_t width, size_t height, unsigned int flags) -{ + size_t width, size_t height, unsigned int flags) { HIP_INIT_API(array, desc, width, height, flags); assert(0 && "Unimplemented"); @@ -243,8 +228,7 @@ hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc, } hipError_t hipMalloc3DArray(hipArray_t* array, const struct hipChannelFormatDesc* desc, - struct hipExtent extent, unsigned int flags) -{ + struct hipExtent extent, unsigned int flags) { HIP_INIT_API(array, desc, &extent, flags); assert(0 && "Unimplemented"); @@ -252,8 +236,7 @@ hipError_t hipMalloc3DArray(hipArray_t* array, const struct hipChannelFormatDesc return hipErrorUnknown; } -hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) -{ +hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) { HIP_INIT_API(flagsPtr, hostPtr); assert(0 && "Unimplemented"); @@ -261,8 +244,7 @@ hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) return hipErrorUnknown; } -hipError_t hipHostRegister(void* hostPtr, size_t sizeBytes, unsigned int flags) -{ +hipError_t hipHostRegister(void* hostPtr, size_t sizeBytes, unsigned int flags) { HIP_INIT_API(hostPtr, sizeBytes, flags); assert(0 && "Unimplemented"); @@ -270,8 +252,7 @@ hipError_t hipHostRegister(void* hostPtr, size_t sizeBytes, unsigned int flags) return hipErrorUnknown; } -hipError_t hipHostUnregister(void* hostPtr) -{ +hipError_t hipHostUnregister(void* hostPtr) { HIP_INIT_API(hostPtr); assert(0 && "Unimplemented"); @@ -280,8 +261,7 @@ hipError_t hipHostUnregister(void* hostPtr) } hipError_t hipMemcpyToSymbol(const void* symbolName, const void* src, size_t count, - size_t offset, hipMemcpyKind kind) -{ + size_t offset, hipMemcpyKind kind) { HIP_INIT_API(symbolName, src, count, offset, kind); assert(0 && "Unimplemented"); @@ -290,8 +270,7 @@ hipError_t hipMemcpyToSymbol(const void* symbolName, const void* src, size_t cou } hipError_t hipMemcpyFromSymbol(void* dst, const void* symbolName, size_t count, - size_t offset, hipMemcpyKind kind) -{ + size_t offset, hipMemcpyKind kind) { HIP_INIT_API(symbolName, dst, count, offset, kind); assert(0 && "Unimplemented"); @@ -300,8 +279,7 @@ hipError_t hipMemcpyFromSymbol(void* dst, const void* symbolName, size_t count, } hipError_t hipMemcpyToSymbolAsync(const void* symbolName, const void* src, size_t count, - size_t offset, hipMemcpyKind kind, hipStream_t stream) -{ + size_t offset, hipMemcpyKind kind, hipStream_t stream) { HIP_INIT_API(symbolName, src, count, offset, kind, stream); assert(0 && "Unimplemented"); @@ -310,8 +288,7 @@ hipError_t hipMemcpyToSymbolAsync(const void* symbolName, const void* src, size_ } hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolName, size_t count, - size_t offset, hipMemcpyKind kind, hipStream_t stream) -{ + size_t offset, hipMemcpyKind kind, hipStream_t stream) { HIP_INIT_API(symbolName, dst, count, offset, kind, stream); assert(0 && "Unimplemented"); @@ -319,8 +296,7 @@ hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolName, size_t co return hipErrorUnknown; } -hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void* src, size_t sizeBytes) -{ +hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void* src, size_t sizeBytes) { HIP_INIT_API(dst, src, sizeBytes); assert(0 && "Unimplemented"); @@ -328,8 +304,7 @@ hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void* src, size_t sizeBytes) return hipErrorUnknown; } -hipError_t hipMemcpyDtoH(void* dst, hipDeviceptr_t src, size_t sizeBytes) -{ +hipError_t hipMemcpyDtoH(void* dst, hipDeviceptr_t src, size_t sizeBytes) { HIP_INIT_API(dst, src, sizeBytes); assert(0 && "Unimplemented"); @@ -337,8 +312,7 @@ hipError_t hipMemcpyDtoH(void* dst, hipDeviceptr_t src, size_t sizeBytes) return hipErrorUnknown; } -hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes) -{ +hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes) { HIP_INIT_API(dst, src, sizeBytes); assert(0 && "Unimplemented"); @@ -346,8 +320,7 @@ hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeByte return hipErrorUnknown; } -hipError_t hipMemcpyHtoH(void* dst, void* src, size_t sizeBytes) -{ +hipError_t hipMemcpyHtoH(void* dst, void* src, size_t sizeBytes) { HIP_INIT_API(dst, src, sizeBytes); assert(0 && "Unimplemented"); @@ -356,8 +329,7 @@ hipError_t hipMemcpyHtoH(void* dst, void* src, size_t sizeBytes) } hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, - hipMemcpyKind kind, hipStream_t stream) -{ + hipMemcpyKind kind, hipStream_t stream) { HIP_INIT_API(dst, src, sizeBytes, kind, stream); assert(0 && "Unimplemented"); @@ -367,8 +339,7 @@ hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, void* src, size_t sizeBytes, - hipStream_t stream) -{ + hipStream_t stream) { HIP_INIT_API(dst, src, sizeBytes, stream); assert(0 && "Unimplemented"); @@ -377,8 +348,7 @@ hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, void* src, size_t sizeBytes, } hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes, - hipStream_t stream) -{ + hipStream_t stream) { HIP_INIT_API(dst, src, sizeBytes, stream); assert(0 && "Unimplemented"); @@ -387,8 +357,7 @@ hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t siz } hipError_t hipMemcpyDtoHAsync(void* dst, hipDeviceptr_t src, size_t sizeBytes, - hipStream_t stream) -{ + hipStream_t stream) { HIP_INIT_API(dst, src, sizeBytes, stream); assert(0 && "Unimplemented"); @@ -397,8 +366,7 @@ hipError_t hipMemcpyDtoHAsync(void* dst, hipDeviceptr_t src, size_t sizeBytes, } hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, - size_t height, hipMemcpyKind kind) -{ + size_t height, hipMemcpyKind kind) { HIP_INIT_API(dst, dpitch, src, spitch, width, height, kind); assert(0 && "Unimplemented"); @@ -406,8 +374,7 @@ hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, return hipErrorUnknown; } -hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy) -{ +hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy) { HIP_INIT_API(pCopy); assert(0 && "Unimplemented"); @@ -416,8 +383,7 @@ hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy) } hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, - size_t height, hipMemcpyKind kind, hipStream_t stream) -{ + size_t height, hipMemcpyKind kind, hipStream_t stream) { HIP_INIT_API(dst, dpitch, src, spitch, width, height, kind, stream); assert(0 && "Unimplemented"); @@ -426,8 +392,7 @@ hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t sp } hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src, - size_t spitch, size_t width, size_t height, hipMemcpyKind kind) -{ + size_t spitch, size_t width, size_t height, hipMemcpyKind kind) { HIP_INIT_API(dst, wOffset, hOffset, src, spitch, width, height, kind); assert(0 && "Unimplemented"); @@ -436,8 +401,7 @@ hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, con } hipError_t hipMemcpyToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src, - size_t count, hipMemcpyKind kind) -{ + size_t count, hipMemcpyKind kind) { HIP_INIT_API(dst, wOffset, hOffset, src, count, kind); assert(0 && "Unimplemented"); @@ -446,8 +410,7 @@ hipError_t hipMemcpyToArray(hipArray* dst, size_t wOffset, size_t hOffset, const } hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t srcArray, size_t wOffset, size_t hOffset, - size_t count, hipMemcpyKind kind) -{ + size_t count, hipMemcpyKind kind) { HIP_INIT_API(dst, srcArray, wOffset, hOffset, count, kind); assert(0 && "Unimplemented"); @@ -455,8 +418,7 @@ hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t srcArray, size_t wOffs return hipErrorUnknown; } -hipError_t hipMemcpyHtoA(hipArray* dstArray, size_t dstOffset, const void* srcHost, size_t count) -{ +hipError_t hipMemcpyHtoA(hipArray* dstArray, size_t dstOffset, const void* srcHost, size_t count) { HIP_INIT_API(dstArray, dstOffset, srcHost, count); assert(0 && "Unimplemented"); @@ -464,8 +426,7 @@ hipError_t hipMemcpyHtoA(hipArray* dstArray, size_t dstOffset, const void* srcHo return hipErrorUnknown; } -hipError_t hipMemcpyAtoH(void* dst, hipArray* srcArray, size_t srcOffset, size_t count) -{ +hipError_t hipMemcpyAtoH(void* dst, hipArray* srcArray, size_t srcOffset, size_t count) { HIP_INIT_API(dst, srcArray, srcOffset, count); assert(0 && "Unimplemented"); @@ -473,8 +434,7 @@ hipError_t hipMemcpyAtoH(void* dst, hipArray* srcArray, size_t srcOffset, size_t return hipErrorUnknown; } -hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) -{ +hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) { HIP_INIT_API(p); assert(0 && "Unimplemented"); @@ -482,8 +442,7 @@ hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) return hipErrorUnknown; } -hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height) -{ +hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height) { HIP_INIT_API(dst, pitch, value, width, height); assert(0 && "Unimplemented"); @@ -491,8 +450,7 @@ hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t return hipErrorUnknown; } -hipError_t hipMemsetD8(hipDeviceptr_t dst, unsigned char value, size_t sizeBytes) -{ +hipError_t hipMemsetD8(hipDeviceptr_t dst, unsigned char value, size_t sizeBytes) { HIP_INIT_API(dst, value, sizeBytes); assert(0 && "Unimplemented"); @@ -500,8 +458,7 @@ hipError_t hipMemsetD8(hipDeviceptr_t dst, unsigned char value, size_t sizeBytes return hipErrorUnknown; } -hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* devPtr) -{ +hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* devPtr) { HIP_INIT_API(handle, devPtr); assert(0 && "Unimplemented"); @@ -509,8 +466,7 @@ hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* devPtr) return hipErrorUnknown; } -hipError_t hipIpcOpenMemHandle(void** devPtr, hipIpcMemHandle_t handle, unsigned int flags) -{ +hipError_t hipIpcOpenMemHandle(void** devPtr, hipIpcMemHandle_t handle, unsigned int flags) { HIP_INIT_API(devPtr, &handle, flags); assert(0 && "Unimplemented"); @@ -524,4 +480,42 @@ hipError_t hipIpcCloseMemHandle(void* devPtr) { assert(0 && "Unimplemented"); return hipErrorUnknown; -} \ No newline at end of file +} + +hipError_t hipMemcpyPeer(void* dst, hipCtx_t dstCtx, const void* src, hipCtx_t srcCtx, + size_t sizeBytes) { + HIP_INIT_API(dst, dstCtx, src, srcCtx, sizeBytes); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + + +hipError_t hipMemcpyPeerAsync(void* dst, hipCtx_t dstDevice, const void* src, hipCtx_t srcDevice, + size_t sizeBytes, hipStream_t stream) { + HIP_INIT_API(dst, dstDevice, src, srcDevice, sizeBytes, stream); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipMemcpyPeer(void* dst, int dstDevice, const void* src, int srcDevice, + size_t sizeBytes) { + HIP_INIT_API(dst, dstDevice, src, srcDevice, sizeBytes); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipMemcpyPeerAsync(void* dst, int dstDevice, const void* src, int srcDevice, + size_t sizeBytes, hipStream_t stream) { + HIP_INIT_API(dst, dstDevice, src, srcDevice, sizeBytes, stream); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + diff --git a/api/hip/hip_stream.cpp b/api/hip/hip_stream.cpp index 46014ebfef..6d0da6adfc 100644 --- a/api/hip/hip_stream.cpp +++ b/api/hip/hip_stream.cpp @@ -24,8 +24,7 @@ THE SOFTWARE. #include "hip_internal.hpp" -static hipError_t ihipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags) -{ +static hipError_t ihipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags) { assert(flags == 0); // we don't handle flags yet amd::Device* device = g_context->devices()[0]; @@ -43,24 +42,21 @@ static hipError_t ihipStreamCreateWithFlags(hipStream_t *stream, unsigned int fl return hipSuccess; } -hipError_t hipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags) -{ +hipError_t hipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags) { HIP_INIT_API(stream, flags); return ihipStreamCreateWithFlags(stream, flags); } -hipError_t hipStreamCreate(hipStream_t *stream) -{ +hipError_t hipStreamCreate(hipStream_t *stream) { HIP_INIT_API(stream); return ihipStreamCreateWithFlags(stream, hipStreamDefault); } -hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags) -{ +hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags) { HIP_INIT_API(stream, flags); assert(0 && "Unimplemented"); @@ -69,8 +65,7 @@ hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags) } -hipError_t hipStreamSynchronize(hipStream_t stream) -{ +hipError_t hipStreamSynchronize(hipStream_t stream) { HIP_INIT_API(stream); amd::HostQueue* hostQueue = as_amd(reinterpret_cast(stream))->asHostQueue(); @@ -84,8 +79,7 @@ hipError_t hipStreamSynchronize(hipStream_t stream) } -hipError_t hipStreamDestroy(hipStream_t stream) -{ +hipError_t hipStreamDestroy(hipStream_t stream) { HIP_INIT_API(stream); as_amd(reinterpret_cast(stream))->release(); @@ -94,3 +88,29 @@ hipError_t hipStreamDestroy(hipStream_t stream) } +hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags) { + HIP_INIT_API(stream, event, flags); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipStreamQuery(hipStream_t stream) { + HIP_INIT_API(stream); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, void* userData, + unsigned int flags) { + HIP_INIT_API(stream, callback, userData, flags); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + + diff --git a/api/hip/hip_surface.cpp b/api/hip/hip_surface.cpp new file mode 100644 index 0000000000..ecbd9e60b9 --- /dev/null +++ b/api/hip/hip_surface.cpp @@ -0,0 +1,49 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "hip_internal.hpp" +#include + +struct hipSurface { + hipArray* array; + hipResourceDesc resDesc; +}; + +hipError_t hipCreateSurfaceObject(hipSurfaceObject_t* pSurfObject, + const hipResourceDesc* pResDesc) { + HIP_INIT_API(pSurfObject, pResDesc); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + + +hipError_t hipDestroySurfaceObject(hipSurfaceObject_t surfaceObject) { + HIP_INIT_API(surfaceObject); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} \ No newline at end of file diff --git a/api/hip/hip_texture.cpp b/api/hip/hip_texture.cpp new file mode 100644 index 0000000000..330f86a86f --- /dev/null +++ b/api/hip/hip_texture.cpp @@ -0,0 +1,202 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include "hip_internal.hpp" + +struct hipTexture { + hipResourceDesc resDesc; + hipTextureDesc texDesc; + hipResourceViewDesc resViewDesc; + hsa_ext_image_t image; + hsa_ext_sampler_t sampler; +}; + +hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, const hipResourceDesc* pResDesc, + const hipTextureDesc* pTexDesc, + const hipResourceViewDesc* pResViewDesc) { + HIP_INIT_API(pTexObject, pResDesc, pTexDesc, pResViewDesc); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject) { + HIP_INIT_API(textureObject); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipGetTextureObjectResourceDesc(hipResourceDesc* pResDesc, + hipTextureObject_t textureObject) { + HIP_INIT_API(pResDesc, textureObject); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipGetTextureObjectResourceViewDesc(hipResourceViewDesc* pResViewDesc, + hipTextureObject_t textureObject) { + HIP_INIT_API(pResViewDesc, textureObject); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipGetTextureObjectTextureDesc(hipTextureDesc* pTexDesc, + hipTextureObject_t textureObject) { + HIP_INIT_API(pTexDesc, textureObject); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipBindTexture(size_t* offset, textureReference* tex, const void* devPtr, + const hipChannelFormatDesc* desc, size_t size) { + HIP_INIT_API(offset, tex, devPtr, desc, size); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipBindTexture2D(size_t* offset, textureReference* tex, const void* devPtr, + const hipChannelFormatDesc* desc, size_t width, size_t height, + size_t pitch) { + HIP_INIT_API(offset, tex, devPtr, desc, width, height, pitch); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipBindTextureToArray(textureReference* tex, hipArray_const_t array, + const hipChannelFormatDesc* desc) { + HIP_INIT_API(tex, array, desc); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipBindTextureToMipmappedArray(textureReference* tex, + hipMipmappedArray_const_t mipmappedArray, + const hipChannelFormatDesc* desc) { + HIP_INIT_API(tex, mipmappedArray, desc); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipUnbindTexture(const textureReference* tex) { + HIP_INIT_API(tex); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_const_t array) { + HIP_INIT_API(desc, array); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipGetTextureAlignmentOffset(size_t* offset, const textureReference* tex) { + HIP_INIT_API(offset, tex); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipGetTextureReference(const textureReference** tex, const void* symbol) { + HIP_INIT_API(tex, symbol); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipTexRefSetFormat(textureReference* tex, hipArray_Format fmt, int NumPackedComponents) { + HIP_INIT_API(tex, fmt, NumPackedComponents); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipTexRefSetFlags(textureReference* tex, unsigned int flags) { + HIP_INIT_API(tex, flags); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipTexRefSetFilterMode(textureReference* tex, hipTextureFilterMode fm) { + HIP_INIT_API(tex, fm); +} + +hipError_t hipTexRefSetAddressMode(textureReference* tex, int dim, hipTextureAddressMode am) { + HIP_INIT_API(tex, dim, am); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipTexRefSetArray(textureReference* tex, hipArray_const_t array, unsigned int flags) { + HIP_INIT_API(tex, array, flags); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipTexRefSetAddress(size_t* offset, textureReference* tex, hipDeviceptr_t devPtr, + size_t size) { + HIP_INIT_API(offset, tex, devPtr, size); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipTexRefSetAddress2D(textureReference* tex, const HIP_ARRAY_DESCRIPTOR* desc, + hipDeviceptr_t devPtr, size_t pitch) { + HIP_INIT_API(tex, desc, devPtr, pitch); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} \ No newline at end of file From ce88da9c1b7ec98c74aa4a68a1a661f4674f734b Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 5 Apr 2018 15:12:53 -0400 Subject: [PATCH 018/282] P4 to Git Change 1537232 by skudchad@skudchad_rocm on 2018/04/05 15:00:24 SWDEV-145570 - [HIP] - Fix typo and fix build. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_texture.cpp#2 edit --- api/hip/hip_texture.cpp | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/api/hip/hip_texture.cpp b/api/hip/hip_texture.cpp index 330f86a86f..848ec1b110 100644 --- a/api/hip/hip_texture.cpp +++ b/api/hip/hip_texture.cpp @@ -24,13 +24,6 @@ THE SOFTWARE. #include #include "hip_internal.hpp" -struct hipTexture { - hipResourceDesc resDesc; - hipTextureDesc texDesc; - hipResourceViewDesc resViewDesc; - hsa_ext_image_t image; - hsa_ext_sampler_t sampler; -}; hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, const hipResourceDesc* pResDesc, const hipTextureDesc* pTexDesc, @@ -164,7 +157,11 @@ hipError_t hipTexRefSetFlags(textureReference* tex, unsigned int flags) { } hipError_t hipTexRefSetFilterMode(textureReference* tex, hipTextureFilterMode fm) { - HIP_INIT_API(tex, fm); + HIP_INIT_API(tex, fm); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; } hipError_t hipTexRefSetAddressMode(textureReference* tex, int dim, hipTextureAddressMode am) { From e7f206d249c6b040027397f97bd3d09af21b432f Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 6 Apr 2018 18:08:18 -0400 Subject: [PATCH 019/282] P4 to Git Change 1537729 by cpaquot@cpaquot-ocl-lc-lnx on 2018/04/06 17:59:29 SWDEV-145570 - [HIP] Use Svm path for both hipMalloc and hipHostMalloc Make sure hipMemCpy uses SvmBuffer to fetch the cl_mem Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#8 edit --- api/hip/hip_memory.cpp | 61 +++++++++++++++--------------------------- 1 file changed, 21 insertions(+), 40 deletions(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index f2dfcf4b4a..ef40bdbdf6 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -24,9 +24,8 @@ THE SOFTWARE. #include "hip_internal.hpp" -hipError_t hipMalloc(void** ptr, size_t sizeBytes) { - HIP_INIT_API(ptr, sizeBytes); - +hipError_t ihipMalloc(void** ptr, size_t sizeBytes, unsigned int flags) +{ if (sizeBytes == 0) { *ptr = nullptr; return hipSuccess; @@ -39,36 +38,7 @@ hipError_t hipMalloc(void** ptr, size_t sizeBytes) { return hipErrorOutOfMemory; } - amd::Memory* mem = new (*g_context) amd::Buffer(*g_context, 0, sizeBytes); - if (!mem) { - return hipErrorOutOfMemory; - } - - if (!mem->create(nullptr)) { - return hipErrorMemoryAllocation; - } - - *ptr = reinterpret_cast(as_cl(mem)); - - return hipSuccess; -} - -hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) { - HIP_INIT_API(ptr, sizeBytes, flags); - - if (sizeBytes == 0) { - *ptr = nullptr; - return hipSuccess; - } - else if (!ptr) { - return hipErrorInvalidValue; - } - - if (g_context->devices()[0]->info().maxMemAllocSize_ < sizeBytes) { - return hipErrorOutOfMemory; - } - - *ptr = amd::SvmBuffer::malloc(*g_context, 0, sizeBytes, g_context->devices()[0]->info().memBaseAddrAlign_); + *ptr = amd::SvmBuffer::malloc(*g_context, flags, sizeBytes, g_context->devices()[0]->info().memBaseAddrAlign_); if (!*ptr) { return hipErrorOutOfMemory; } @@ -76,16 +46,24 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) { return hipSuccess; } +hipError_t hipMalloc(void** ptr, size_t sizeBytes) { + HIP_INIT_API(ptr, sizeBytes); + + return ihipMalloc(ptr, sizeBytes, 0); +} + +hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) { + HIP_INIT_API(ptr, sizeBytes, flags); + + return ihipMalloc(ptr, sizeBytes, CL_MEM_SVM_FINE_GRAIN_BUFFER); +} + hipError_t hipFree(void* ptr) { if (amd::SvmBuffer::malloced(ptr)) { amd::SvmBuffer::free(*g_context, ptr); return hipSuccess; } - if (!is_valid(reinterpret_cast(ptr))) { - return hipErrorInvalidValue; - } - as_amd(reinterpret_cast(ptr))->release(); - return hipSuccess; + return hipErrorInvalidValue; } hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind) { @@ -102,15 +80,18 @@ hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind amd::Command* command; amd::Command::EventWaitList waitList; + amd::Memory* memory; switch (kind) { case hipMemcpyDeviceToHost: + memory = amd::SvmManager::FindSvmBuffer(src); command = new amd::ReadMemoryCommand(*queue, CL_COMMAND_READ_BUFFER, waitList, - *as_amd(reinterpret_cast(const_cast(src)))->asBuffer(), 0, sizeBytes, dst); + *memory->asBuffer(), 0, sizeBytes, dst); break; case hipMemcpyHostToDevice: + memory = amd::SvmManager::FindSvmBuffer(dst); command = new amd::WriteMemoryCommand(*queue, CL_COMMAND_WRITE_BUFFER, waitList, - *as_amd(reinterpret_cast(dst))->asBuffer(), 0, sizeBytes, src); + *memory->asBuffer(), 0, sizeBytes, src); break; default: assert(!"Shouldn't reach here"); From 04decb72fcad4002caf80e6fee3236c56c5f2a44 Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 10 Apr 2018 17:41:24 -0400 Subject: [PATCH 020/282] P4 to Git Change 1539198 by skudchad@skudchad_test2_win_opencl on 2018/04/10 17:32:14 SWDEV-145570 - [HIP] - Add HIP API skeletons for Peer and memory ReviewBoardURL = http://ocltc.amd.com/reviews/r/14596/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_context.cpp#7 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#9 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_peer.cpp#1 add --- api/hip/hip_context.cpp | 16 ++++++ api/hip/hip_memory.cpp | 40 ++++++--------- api/hip/hip_peer.cpp | 109 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 139 insertions(+), 26 deletions(-) create mode 100644 api/hip/hip_peer.cpp diff --git a/api/hip/hip_context.cpp b/api/hip/hip_context.cpp index 9bfe4ad45d..2e189f1351 100644 --- a/api/hip/hip_context.cpp +++ b/api/hip/hip_context.cpp @@ -169,6 +169,22 @@ hipError_t hipCtxPushCurrent(hipCtx_t ctx) { return hipSuccess; } +hipError_t hipDriverGetVersion(int* driverVersion) { + HIP_INIT_API(driverVersion); + + auto* deviceHandle = g_devices[0]->devices()[0]; + const auto& info = deviceHandle->info(); + + if (driverVersion) { + *driverVersion = AMD_PLATFORM_BUILD_NUMBER * 100 + + AMD_PLATFORM_REVISION_NUMBER; + } else { + return hipErrorInvalidValue; + } + + return hipSuccess;; +} + hipError_t hipCtxGetDevice(hipDevice_t* device) { HIP_INIT_API(device); diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index ef40bdbdf6..8cb51be08d 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -463,40 +463,28 @@ hipError_t hipIpcCloseMemHandle(void* devPtr) { return hipErrorUnknown; } -hipError_t hipMemcpyPeer(void* dst, hipCtx_t dstCtx, const void* src, hipCtx_t srcCtx, - size_t sizeBytes) { - HIP_INIT_API(dst, dstCtx, src, srcCtx, sizeBytes); +hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, hipChannelFormatKind f) { + hipChannelFormatDesc cd; + cd.x = x; + cd.y = y; + cd.z = z; + cd.w = w; + cd.f = f; + return cd; +} + +hipError_t hipHostGetDevicePointer(void** devicePointer, void* hostPointer, unsigned flags) { + HIP_INIT_API(devicePointer, hostPointer, flags); assert(0 && "Unimplemented"); return hipErrorUnknown; } - -hipError_t hipMemcpyPeerAsync(void* dst, hipCtx_t dstDevice, const void* src, hipCtx_t srcDevice, - size_t sizeBytes, hipStream_t stream) { - HIP_INIT_API(dst, dstDevice, src, srcDevice, sizeBytes, stream); +hipError_t hipPointerGetAttributes(hipPointerAttribute_t* attributes, const void* ptr) { + HIP_INIT_API(attributes, ptr); assert(0 && "Unimplemented"); return hipErrorUnknown; } - -hipError_t hipMemcpyPeer(void* dst, int dstDevice, const void* src, int srcDevice, - size_t sizeBytes) { - HIP_INIT_API(dst, dstDevice, src, srcDevice, sizeBytes); - - assert(0 && "Unimplemented"); - - return hipErrorUnknown; -} - -hipError_t hipMemcpyPeerAsync(void* dst, int dstDevice, const void* src, int srcDevice, - size_t sizeBytes, hipStream_t stream) { - HIP_INIT_API(dst, dstDevice, src, srcDevice, sizeBytes, stream); - - assert(0 && "Unimplemented"); - - return hipErrorUnknown; -} - diff --git a/api/hip/hip_peer.cpp b/api/hip/hip_peer.cpp new file mode 100644 index 0000000000..ad552e94b4 --- /dev/null +++ b/api/hip/hip_peer.cpp @@ -0,0 +1,109 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "hip_internal.hpp" + +hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, hipCtx_t thisCtx, hipCtx_t peerCtx) { + HIP_INIT_API(canAccessPeer, thisCtx, peerCtx); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipMemcpyPeer(void* dst, hipCtx_t dstCtx, const void* src, hipCtx_t srcCtx, + size_t sizeBytes) { + HIP_INIT_API(dst, dstCtx, src, srcCtx, sizeBytes); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipMemcpyPeerAsync(void* dst, hipCtx_t dstDevice, const void* src, hipCtx_t srcDevice, + size_t sizeBytes, hipStream_t stream) { + HIP_INIT_API(dst, dstDevice, src, srcDevice, sizeBytes, stream); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, int deviceId, int peerDeviceId) { + HIP_INIT_API(canAccessPeer, deviceId, peerDeviceId); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipDeviceDisablePeerAccess(int peerDeviceId) { + HIP_INIT_API(peerDeviceId); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipDeviceEnablePeerAccess(int peerDeviceId, unsigned int flags) { + HIP_INIT_API(peerDeviceId, flags); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipMemcpyPeer(void* dst, int dstDevice, const void* src, int srcDevice, + size_t sizeBytes) { + HIP_INIT_API(dst, dstDevice, src, srcDevice, sizeBytes); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipMemcpyPeerAsync(void* dst, int dstDevice, const void* src, int srcDevice, + size_t sizeBytes, hipStream_t stream) { + HIP_INIT_API(dst, dstDevice, src, srcDevice, sizeBytes, stream); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags) { + HIP_INIT_API(peerCtx, flags); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipCtxDisablePeerAccess(hipCtx_t peerCtx) { + HIP_INIT_API(peerCtx); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} \ No newline at end of file From e6718d9747e115f30c973c5667732018c4ca8569 Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 13 Apr 2018 18:19:28 -0400 Subject: [PATCH 021/282] P4 to Git Change 1541197 by skudchad@skudchad_test2_win_opencl on 2018/04/13 17:49:07 SWDEV-145570 - [HIP] - Add missing HIP APIs and fill sym table references. The HIP tests can now build if we do a symlink to libhip_hcc.so that we build with VDI in the install folder in github HIP. ReviewBoardURL = http://ocltc.amd.com/reviews/r/14619/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device.cpp#10 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.def.in#3 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.map.in#3 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#7 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#5 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_profile.cpp#1 add ... //depot/stg/opencl/drivers/opencl/api/hip/hip_texture.cpp#3 edit --- api/hip/hip_device.cpp | 15 ++++++++++++++ api/hip/hip_hcc.def.in | 28 +++++++++++++++++++++++++++ api/hip/hip_hcc.map.in | 32 ++++++++++++++++++++++++++++++ api/hip/hip_internal.hpp | 5 +++++ api/hip/hip_platform.cpp | 17 ++++++++++++++++ api/hip/hip_profile.cpp | 42 ++++++++++++++++++++++++++++++++++++++++ api/hip/hip_texture.cpp | 9 +++++++++ 7 files changed, 148 insertions(+) create mode 100644 api/hip/hip_profile.cpp diff --git a/api/hip/hip_device.cpp b/api/hip/hip_device.cpp index d067cf19a8..531beb52f4 100644 --- a/api/hip/hip_device.cpp +++ b/api/hip/hip_device.cpp @@ -191,3 +191,18 @@ hipError_t hipGetDeviceProperties ( hipDeviceProp_t* props, hipDevice_t device ) return hipSuccess; } +hipError_t hipHccGetAccelerator(int deviceId, hc::accelerator* acc) { + HIP_INIT_API(deviceId, acc); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + +hipError_t hipHccGetAcceleratorView(hipStream_t stream, hc::accelerator_view** av) { + HIP_INIT_API(stream, av); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} diff --git a/api/hip/hip_hcc.def.in b/api/hip/hip_hcc.def.in index 10113dc2bd..e7fd04ad04 100644 --- a/api/hip/hip_hcc.def.in +++ b/api/hip/hip_hcc.def.in @@ -89,6 +89,7 @@ hipMemcpyHtoDAsync hipMemcpyPeer hipMemcpyPeerAsync hipMemcpyToArray +hipMemcpyFromArray hipMemcpyToSymbol hipMemcpyToSymbolAsync hipMemGetAddressRange @@ -127,3 +128,30 @@ __hipUnregisterFatBinary hipConfigureCall hipSetupArgument hipLaunchByPtr +hipCreateTextureObject +hipDestroyTextureObject +hipGetTextureObjectResourceDesc +hipGetTextureObjectResourceViewDesc +hipGetTextureObjectTextureDesc +hipBindTexture +hipBindTexture2D +hipBindTextureToArray +hipBindTextureToMipmappedArray +hipUnbindTexture +hipGetChannelDesc +hipGetTextureAlignmentOffset +hipGetTextureReference +hipTexRefSetFormat +hipTexRefSetFlags +hipTexRefSetFilterMode +hipTexRefSetAddressMode +hipTexRefSetArray +hipTexRefSetAddress +hipTexRefSetAddress2D +hipCreateChannelDesc +hipProfilerStart +hipProfilerStop +hipHccGetAccelerator +hipHccGetAcceleratorView +hipCreateSurfaceObject +hipDestroySurfaceObject diff --git a/api/hip/hip_hcc.map.in b/api/hip/hip_hcc.map.in index a4153ee56f..d0bc6b4618 100644 --- a/api/hip/hip_hcc.map.in +++ b/api/hip/hip_hcc.map.in @@ -90,6 +90,7 @@ global: hipMemcpyPeer; hipMemcpyPeerAsync; hipMemcpyToArray; + hipMemcpyFromArray; hipMemcpyToSymbol; hipMemcpyToSymbolAsync; hipMemGetAddressRange; @@ -128,6 +129,37 @@ global: hipConfigureCall; hipSetupArgument; hipLaunchByPtr; + hipProfilerStart; + hipProfilerStop; + extern "C++" { + hip_impl::hipLaunchKernelGGLImpl*; + hipCreateTextureObject*; + hipDestroyTextureObject*; + hipGetTextureObjectResourceDesc; + hipGetTextureObjectResourceViewDesc; + hipGetTextureObjectTextureDesc; + hipBindTexture; + hipBindTexture2D; + hipBindTextureToArray; + hipBindTextureToMipmappedArray; + hipUnbindTexture; + hipGetChannelDesc; + hipGetTextureAlignmentOffset; + hipGetTextureReference; + hipTexRefSetFormat; + hipTexRefSetFlags; + hipTexRefSetFilterMode; + hipTexRefSetAddressMode; + hipTexRefSetArray; + hipTexRefSetAddress; + hipTexRefSetAddress2D; + hipCreateChannelDesc*; + ihipBindTextureToArrayImpl*; + hipHccGetAccelerator*; + hipHccGetAcceleratorView*; + hipCreateSurfaceObject*; + hipDestroySurfaceObject*; + }; local: *; }; diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index 3d334fa2ac..2512e35c98 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -40,6 +40,11 @@ THE SOFTWARE. return hipErrorOutOfMemory; \ } +namespace hc { +class accelerator; +class accelerator_view; +}; + extern std::once_flag g_ihipInitialized; extern thread_local amd::Context* g_context; extern std::vector g_devices; diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index f1c33dabde..d66bf0930c 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -260,3 +260,20 @@ extern "C" hipError_t hipLaunchByPtr(const void *hostFunction) g_blockDim.x, g_blockDim.y, g_blockDim.z, g_sharedMem, g_stream, nullptr, extra); } + +#if defined(ATI_OS_LINUX) + +namespace hip_impl { + +void hipLaunchKernelGGLImpl( + uintptr_t function_address, + const dim3& numBlocks, + const dim3& dimBlocks, + uint32_t sharedMemBytes, + hipStream_t stream, + void** kernarg) { +} + +} + +#endif // defined(ATI_OS_LINUX) diff --git a/api/hip/hip_profile.cpp b/api/hip/hip_profile.cpp new file mode 100644 index 0000000000..d53d7ffd46 --- /dev/null +++ b/api/hip/hip_profile.cpp @@ -0,0 +1,42 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "hip_internal.hpp" + +hipError_t hipProfilerStart() { + HIP_INIT_API(); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + + +hipError_t hipProfilerStop() { + HIP_INIT_API(); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} \ No newline at end of file diff --git a/api/hip/hip_texture.cpp b/api/hip/hip_texture.cpp index 848ec1b110..59adad761a 100644 --- a/api/hip/hip_texture.cpp +++ b/api/hip/hip_texture.cpp @@ -98,6 +98,15 @@ hipError_t hipBindTextureToArray(textureReference* tex, hipArray_const_t array, return hipErrorUnknown; } +hipError_t ihipBindTextureToArrayImpl(int dim, enum hipTextureReadMode readMode, + hipArray_const_t array, + const struct hipChannelFormatDesc& desc, + textureReference* tex) { + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + hipError_t hipBindTextureToMipmappedArray(textureReference* tex, hipMipmappedArray_const_t mipmappedArray, const hipChannelFormatDesc* desc) { From 250e08f31fa2f036070c9f51b486ab2675a0b8ad Mon Sep 17 00:00:00 2001 From: foreman Date: Mon, 16 Apr 2018 18:37:17 -0400 Subject: [PATCH 022/282] P4 to Git Change 1541938 by cpaquot@cpaquot-ocl-lc-lnx on 2018/04/16 18:27:17 SWDEV-145570 - [HIP] Set/GetDevice and Create/DestroyTextureObject Implemented Set/GetDevice relying on g_context (current context) Implemented create linear/2D texture object function Implemented hipDestroyTextureObject Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device_runtime.cpp#5 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_texture.cpp#4 edit --- api/hip/hip_device_runtime.cpp | 17 +++-- api/hip/hip_texture.cpp | 118 +++++++++++++++++++++++++++++++-- 2 files changed, 126 insertions(+), 9 deletions(-) diff --git a/api/hip/hip_device_runtime.cpp b/api/hip/hip_device_runtime.cpp index 7c5c063ea6..d0d9c1145b 100644 --- a/api/hip/hip_device_runtime.cpp +++ b/api/hip/hip_device_runtime.cpp @@ -382,13 +382,17 @@ hipError_t hipGetDevice ( int* deviceId ) { HIP_INIT_API(deviceId); if (deviceId != nullptr) { - // this needs to return default device. For now return 0 always - *deviceId = 0; + for (unsigned int i = 0; i < g_devices.size(); i++) { + if (g_devices[i] == g_context) { + *deviceId = i; + return hipSuccess; + } + } } else { return hipErrorInvalidValue; } - return hipSuccess; + return hipErrorUnknown; } hipError_t hipGetDeviceCount ( int* count ) { @@ -420,9 +424,12 @@ hipError_t hipIpcOpenEventHandle ( hipEvent_t* event, hipIpcEventHandle_t handle hipError_t hipSetDevice ( int device ) { HIP_INIT_API(device); - assert(0 && "Unimplemented"); + if (static_cast(device) < g_devices.size()) { + g_context = g_devices[device]; - return hipErrorUnknown; + return hipSuccess; + } + return hipErrorInvalidValue; } hipError_t hipSetDeviceFlags ( unsigned int flags ) { diff --git a/api/hip/hip_texture.cpp b/api/hip/hip_texture.cpp index 59adad761a..715c39eb97 100644 --- a/api/hip/hip_texture.cpp +++ b/api/hip/hip_texture.cpp @@ -24,13 +24,123 @@ THE SOFTWARE. #include #include "hip_internal.hpp" +void getChannelOrderAndType(const hipChannelFormatDesc& desc, enum hipTextureReadMode readMode, + cl_channel_order* channelOrder, cl_channel_type* channelType) { + if (desc.x != 0 && desc.y != 0 && desc.z != 0 && desc.w != 0) { + *channelOrder = CL_RGBA; + } else if (desc.x != 0 && desc.y != 0 && desc.z != 0 && desc.w == 0) { + *channelOrder = CL_RGB; + } else if (desc.x != 0 && desc.y != 0 && desc.z == 0 && desc.w == 0) { + *channelOrder = CL_RG; + } else if (desc.x != 0 && desc.y == 0 && desc.z == 0 && desc.w == 0) { + *channelOrder = CL_R; + } else { + } + + switch (desc.f) { + case hipChannelFormatKindUnsigned: + switch (desc.x) { + case 32: + *channelType = CL_UNSIGNED_INT32; + break; + case 16: + *channelType = readMode == hipReadModeNormalizedFloat + ? CL_UNORM_INT16 + : CL_UNSIGNED_INT16; + break; + case 8: + *channelType = readMode == hipReadModeNormalizedFloat + ? CL_UNORM_INT8 + : CL_UNSIGNED_INT8; + break; + default: + *channelType = CL_UNSIGNED_INT32; + } + break; + case hipChannelFormatKindSigned: + switch (desc.x) { + case 32: + *channelType = CL_SIGNED_INT32; + break; + case 16: + *channelType = readMode == hipReadModeNormalizedFloat + ? CL_SNORM_INT16 + : CL_SIGNED_INT16; + break; + case 8: + *channelType = readMode == hipReadModeNormalizedFloat + ? CL_SNORM_INT8 + : CL_SIGNED_INT8; + break; + default: + *channelType = CL_SIGNED_INT32; + } + break; + case hipChannelFormatKindFloat: + switch (desc.x) { + case 32: + *channelType = CL_FLOAT; + break; + case 16: + *channelType = CL_HALF_FLOAT; + break; + case 8: + break; + default: + *channelType = CL_FLOAT; + } + break; + case hipChannelFormatKindNone: + default: + break; + } +} hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, const hipResourceDesc* pResDesc, const hipTextureDesc* pTexDesc, const hipResourceViewDesc* pResViewDesc) { HIP_INIT_API(pTexObject, pResDesc, pTexDesc, pResViewDesc); - assert(0 && "Unimplemented"); + if (!g_context->devices()[0]->info().imageSupport_) { + return hipErrorInvalidValue; + } + + amd::Image* image = nullptr; + + cl_image_format image_format; + getChannelOrderAndType(pResDesc->res.pitch2D.desc, pTexDesc->readMode, + &image_format.image_channel_order, &image_format.image_channel_data_type); + + const amd::Image::Format imageFormat(image_format); + + amd::Memory* memory = nullptr; + + switch (pResDesc->resType) { + case hipResourceTypeArray: + assert(0); + break; + case hipResourceTypeMipmappedArray: + assert(0); + break; + case hipResourceTypeLinear: + assert(pResViewDesc == nullptr); + + memory = amd::SvmManager::FindSvmBuffer(pResDesc->res.linear.devPtr); + image = new (*g_context) amd::Image(*memory->asBuffer(), CL_MEM_OBJECT_IMAGE1D, memory->getMemFlags(), imageFormat, + pResDesc->res.linear.sizeInBytes / imageFormat.getElementSize(), 1, 1, + pResDesc->res.linear.sizeInBytes, 0); + break; + case hipResourceTypePitch2D: + assert(pResViewDesc == nullptr); + + memory = amd::SvmManager::FindSvmBuffer(pResDesc->res.pitch2D.devPtr); + image = new (*g_context) amd::Image(*memory->asBuffer(), CL_MEM_OBJECT_IMAGE2D, memory->getMemFlags(), imageFormat, + pResDesc->res.pitch2D.width, pResDesc->res.pitch2D.height, 1, + pResDesc->res.pitch2D.pitchInBytes, 0); + break; + default: return hipErrorInvalidValue; + } + *pTexObject = reinterpret_cast(as_cl(image)); return hipErrorUnknown; } @@ -38,9 +148,9 @@ hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, const hipResou hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject) { HIP_INIT_API(textureObject); - assert(0 && "Unimplemented"); + as_amd(reinterpret_cast(textureObject))->release(); - return hipErrorUnknown; + return hipSuccess; } hipError_t hipGetTextureObjectResourceDesc(hipResourceDesc* pResDesc, @@ -205,4 +315,4 @@ hipError_t hipTexRefSetAddress2D(textureReference* tex, const HIP_ARRAY_DESCRIPT assert(0 && "Unimplemented"); return hipErrorUnknown; -} \ No newline at end of file +} From 63babe9e66049e7a7887196da52272ca0fb2a7df Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 19 Apr 2018 12:27:09 -0400 Subject: [PATCH 023/282] P4 to Git Change 1543751 by cpaquot@cpaquot-ocl-lc-lnx on 2018/04/19 11:17:02 SWDEV-145570 - [HIP] Implemented a bunch of texture APIs Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_texture.cpp#5 edit --- api/hip/hip_texture.cpp | 143 ++++++++++++++++++++++++++++++++++------ 1 file changed, 124 insertions(+), 19 deletions(-) diff --git a/api/hip/hip_texture.cpp b/api/hip/hip_texture.cpp index 715c39eb97..fbc8c4b481 100644 --- a/api/hip/hip_texture.cpp +++ b/api/hip/hip_texture.cpp @@ -24,6 +24,47 @@ THE SOFTWARE. #include #include "hip_internal.hpp" +void getDrvChannelOrderAndType(const enum hipArray_Format Format, unsigned int NumChannels, + cl_channel_order* channelOrder, + cl_channel_type* channelType) { + switch (Format) { + case HIP_AD_FORMAT_UNSIGNED_INT8: + *channelType = CL_UNSIGNED_INT8; + break; + case HIP_AD_FORMAT_UNSIGNED_INT16: + *channelType = CL_UNSIGNED_INT16; + break; + case HIP_AD_FORMAT_UNSIGNED_INT32: + *channelType = CL_UNSIGNED_INT32; + break; + case HIP_AD_FORMAT_SIGNED_INT8: + *channelType = CL_SIGNED_INT8; + break; + case HIP_AD_FORMAT_SIGNED_INT16: + *channelType = CL_SIGNED_INT16; + break; + case HIP_AD_FORMAT_SIGNED_INT32: + *channelType = CL_SIGNED_INT32; + break; + case HIP_AD_FORMAT_HALF: + *channelType = CL_HALF_FLOAT; + break; + case HIP_AD_FORMAT_FLOAT: + *channelType = CL_FLOAT; + break; + default: + break; + } + + if (NumChannels == 4) { + *channelOrder = CL_RGBA; + } else if (NumChannels == 2) { + *channelOrder = CL_RG; + } else if (NumChannels == 1) { + *channelOrder = CL_R; + } +} + void getChannelOrderAndType(const hipChannelFormatDesc& desc, enum hipTextureReadMode readMode, cl_channel_order* channelOrder, cl_channel_type* channelType) { if (desc.x != 0 && desc.y != 0 && desc.z != 0 && desc.w != 0) { @@ -180,13 +221,52 @@ hipError_t hipGetTextureObjectTextureDesc(hipTextureDesc* pTexDesc, return hipErrorUnknown; } +hipError_t ihipBindTexture(cl_mem_object_type type, + size_t* offset, textureReference* tex, const void* devPtr, + const hipChannelFormatDesc* desc, size_t width, size_t height, + size_t pitch) { + if (tex == nullptr) { + return hipErrorInvalidImage; + } + if (g_context) { + cl_image_format image_format; + + if (nullptr == desc) { + getDrvChannelOrderAndType(tex->format, tex->numChannels, + &image_format.image_channel_order, &image_format.image_channel_data_type); + } else { + getChannelOrderAndType(*desc, hipReadModeElementType, + &image_format.image_channel_order, &image_format.image_channel_data_type); + } + const amd::Image::Format imageFormat(image_format); + + amd::Memory* memory = amd::SvmManager::FindSvmBuffer(devPtr); + amd::Image* image = new (*g_context) amd::Image(*memory->asBuffer(), type, memory->getMemFlags(), + imageFormat, width, height, 1, pitch, 0); + + *offset = 0; + if (tex->textureObject) { + as_amd(reinterpret_cast(tex->textureObject))->release(); + } + tex->textureObject = reinterpret_cast(as_cl(image)); + return hipSuccess; + } + return hipErrorUnknown; +} + hipError_t hipBindTexture(size_t* offset, textureReference* tex, const void* devPtr, const hipChannelFormatDesc* desc, size_t size) { HIP_INIT_API(offset, tex, devPtr, desc, size); - assert(0 && "Unimplemented"); + if (desc == nullptr) { + return hipErrorInvalidValue; + } + cl_image_format image_format; + getChannelOrderAndType(*desc, hipReadModeElementType, + &image_format.image_channel_order, &image_format.image_channel_data_type); + const amd::Image::Format imageFormat(image_format); - return hipErrorUnknown; + return ihipBindTexture(CL_MEM_OBJECT_IMAGE1D, offset, tex, devPtr, desc, size / imageFormat.getElementSize(), 1, size); } hipError_t hipBindTexture2D(size_t* offset, textureReference* tex, const void* devPtr, @@ -194,9 +274,7 @@ hipError_t hipBindTexture2D(size_t* offset, textureReference* tex, const void* d size_t pitch) { HIP_INIT_API(offset, tex, devPtr, desc, width, height, pitch); - assert(0 && "Unimplemented"); - - return hipErrorUnknown; + return ihipBindTexture(CL_MEM_OBJECT_IMAGE2D, offset, tex, devPtr, desc, width, height, pitch); } hipError_t hipBindTextureToArray(textureReference* tex, hipArray_const_t array, @@ -230,9 +308,9 @@ hipError_t hipBindTextureToMipmappedArray(textureReference* tex, hipError_t hipUnbindTexture(const textureReference* tex) { HIP_INIT_API(tex); - assert(0 && "Unimplemented"); + as_amd(reinterpret_cast(tex->textureObject))->release(); - return hipErrorUnknown; + return hipSuccess; } hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_const_t array) { @@ -262,33 +340,50 @@ hipError_t hipGetTextureReference(const textureReference** tex, const void* symb hipError_t hipTexRefSetFormat(textureReference* tex, hipArray_Format fmt, int NumPackedComponents) { HIP_INIT_API(tex, fmt, NumPackedComponents); - assert(0 && "Unimplemented"); + if (tex == nullptr) { + return hipErrorInvalidImage; + } - return hipErrorUnknown; + tex->format = fmt; + tex->numChannels = NumPackedComponents; + + return hipSuccess; } hipError_t hipTexRefSetFlags(textureReference* tex, unsigned int flags) { HIP_INIT_API(tex, flags); - assert(0 && "Unimplemented"); + if (tex == nullptr) { + return hipErrorInvalidImage; + } - return hipErrorUnknown; + tex->normalized = flags; + + return hipSuccess; } hipError_t hipTexRefSetFilterMode(textureReference* tex, hipTextureFilterMode fm) { HIP_INIT_API(tex, fm); - assert(0 && "Unimplemented"); + if (tex == nullptr) { + return hipErrorInvalidImage; + } - return hipErrorUnknown; + tex->filterMode = fm; + + return hipSuccess; } hipError_t hipTexRefSetAddressMode(textureReference* tex, int dim, hipTextureAddressMode am) { HIP_INIT_API(tex, dim, am); - assert(0 && "Unimplemented"); + if (tex == nullptr) { + return hipErrorInvalidImage; + } - return hipErrorUnknown; + tex->addressMode[dim] = am; + + return hipSuccess; } hipError_t hipTexRefSetArray(textureReference* tex, hipArray_const_t array, unsigned int flags) { @@ -303,16 +398,26 @@ hipError_t hipTexRefSetAddress(size_t* offset, textureReference* tex, hipDevicep size_t size) { HIP_INIT_API(offset, tex, devPtr, size); - assert(0 && "Unimplemented"); + if (tex == nullptr) { + return hipErrorInvalidImage; + } - return hipErrorUnknown; + cl_image_format image_format; + getDrvChannelOrderAndType(tex->format, tex->numChannels, + &image_format.image_channel_order, &image_format.image_channel_data_type); + const amd::Image::Format imageFormat(image_format); + + return ihipBindTexture(CL_MEM_OBJECT_IMAGE1D, offset, tex, devPtr, nullptr, size / imageFormat.getElementSize(), 1, size); } hipError_t hipTexRefSetAddress2D(textureReference* tex, const HIP_ARRAY_DESCRIPTOR* desc, hipDeviceptr_t devPtr, size_t pitch) { HIP_INIT_API(tex, desc, devPtr, pitch); - assert(0 && "Unimplemented"); + if (desc == nullptr) { + return hipErrorInvalidValue; + } - return hipErrorUnknown; + size_t offset; + return ihipBindTexture(CL_MEM_OBJECT_IMAGE2D, &offset, tex, devPtr, nullptr, desc->width, desc->height, pitch); } From 3ce752065a6f4e354ee4add8b8ece9592dbbd2d1 Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 19 Apr 2018 18:35:00 -0400 Subject: [PATCH 024/282] P4 to Git Change 1544061 by skudchad@skudchad_test2_win_opencl on 2018/04/19 18:24:45 SWDEV-145570 - [HIP] - Add some hip_mem* APIs ReviewBoardURL = http://ocltc.amd.com/reviews/r/14647/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#10 edit --- api/hip/hip_memory.cpp | 125 +++++++++++++++++++++++++++++++++++------ 1 file changed, 108 insertions(+), 17 deletions(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 8cb51be08d..c0cc21367a 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -21,8 +21,10 @@ THE SOFTWARE. */ #include - #include "hip_internal.hpp" +#include "platform/context.hpp" +#include "platform/command.hpp" +#include "platform/memory.hpp" hipError_t ihipMalloc(void** ptr, size_t sizeBytes, unsigned int flags) { @@ -119,7 +121,7 @@ hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind return hipSuccess; } -hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t stream) { +hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t stream) { HIP_INIT_API(dst, value, sizeBytes, stream); assert(0 && "Unimplemented"); @@ -130,9 +132,36 @@ hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t s hipError_t hipMemset(void* dst, int value, size_t sizeBytes) { HIP_INIT_API(dst, value, sizeBytes); - assert(0 && "Unimplemented"); + amd::Device* device = g_context->devices()[0]; - return hipErrorUnknown; + amd::HostQueue* queue = new amd::HostQueue(*g_context, *device, 0, + amd::CommandQueue::RealTimeDisabled, + amd::CommandQueue::Priority::Normal); + if (!queue) { + return hipErrorOutOfMemory; + } + + amd::Command::EventWaitList waitList; + amd::Memory* memory = amd::SvmManager::FindSvmBuffer(dst); + + + amd::Coord3D fillOffset(0, 0, 0); + amd::Coord3D fillSize(sizeBytes, 1, 1); + amd::FillMemoryCommand* command = + new amd::FillMemoryCommand(*queue, CL_COMMAND_FILL_BUFFER, waitList, *memory->asBuffer(), + &value, sizeof(int), fillOffset, fillSize); + + if (!command) { + return hipErrorOutOfMemory; + } + + command->enqueue(); + command->awaitCompletion(); + command->release(); + + queue->release(); + + return hipSuccess; } hipError_t hipMemPtrGetInfo(void *ptr, size_t *size) { @@ -146,17 +175,21 @@ hipError_t hipMemPtrGetInfo(void *ptr, size_t *size) { hipError_t hipHostFree(void* ptr) { HIP_INIT_API(ptr); - assert(0 && "Unimplemented"); - - return hipErrorUnknown; + if (amd::SvmBuffer::malloced(ptr)) { + amd::SvmBuffer::free(*g_context, ptr); + return hipSuccess; + } + return hipErrorInvalidValue; } hipError_t hipFreeArray(hipArray* array) { HIP_INIT_API(array); - assert(0 && "Unimplemented"); - - return hipErrorUnknown; + if (amd::SvmBuffer::malloced(array->data)) { + amd::SvmBuffer::free(*g_context, array->data); + return hipSuccess; + } + return hipErrorInvalidValue; } hipError_t hipMemGetAddressRange(hipDeviceptr_t* pbase, size_t* psize, hipDeviceptr_t dptr) { @@ -170,25 +203,83 @@ hipError_t hipMemGetAddressRange(hipDeviceptr_t* pbase, size_t* psize, hipDevice hipError_t hipMemGetInfo(size_t* free, size_t* total) { HIP_INIT_API(free, total); - assert(0 && "Unimplemented"); + size_t freeMemory[2]; + amd::Device* device = g_context->devices()[0]; + if(!device) { + return hipErrorInvalidDevice; + } - return hipErrorUnknown; + if(!device->globalFreeMemory(freeMemory)) { + return hipErrorInvalidValue; + } + + *free = freeMemory[0]; + *total = device->info().globalMemSize_; + +return hipSuccess; } +hipError_t ihipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height, size_t depth, + cl_mem_object_type imageType) { + + amd::Device* device = g_context->devices()[0]; + + if ((width == 0) || (height == 0)) { + *ptr = nullptr; + return hipSuccess; + } + else if (!(device->info().image2DMaxWidth_ >= width && + device->info().image2DMaxHeight_ >= height ) || (ptr == nullptr)) { + return hipErrorInvalidValue; + } + + if (g_context->devices()[0]->info().maxMemAllocSize_ < (width * height)) { + return hipErrorOutOfMemory; + } + + const cl_image_format image_format = { CL_R, CL_UNSIGNED_INT8 }; + const amd::Image::Format imageFormat(image_format); + + *pitch = width * imageFormat.getElementSize(); + + size_t sizeBytes = *pitch * height; + *ptr = amd::SvmBuffer::malloc(*g_context, CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeBytes, + g_context->devices()[0]->info().memBaseAddrAlign_); + + if (!*ptr) { + return hipErrorOutOfMemory; + } + + return hipSuccess; +} + + hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height) { HIP_INIT_API(ptr, pitch, width, height); - assert(0 && "Unimplemented"); - - return hipErrorUnknown; + return ihipMallocPitch(ptr, pitch, width, height, 1, CL_MEM_OBJECT_IMAGE2D); } hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent) { HIP_INIT_API(pitchedDevPtr, &extent); - assert(0 && "Unimplemented"); + size_t pitch = 0; - return hipErrorUnknown; + if (pitchedDevPtr == nullptr) { + return hipErrorInvalidValue; + } + + hipError_t status = hipSuccess; + status = ihipMallocPitch(&pitchedDevPtr->ptr, &pitch, extent.width, extent.height, extent.depth, + CL_MEM_OBJECT_IMAGE3D); + + if (status == hipSuccess) { + pitchedDevPtr->pitch = pitch; + pitchedDevPtr->xsize = extent.width; + pitchedDevPtr->ysize = extent.height; + } + + return status; } hipError_t hipArrayCreate(hipArray** array, const HIP_ARRAY_DESCRIPTOR* pAllocateArray) { From 66640b546fa1629b0c63a8ba416ee1eda75e0109 Mon Sep 17 00:00:00 2001 From: foreman Date: Sun, 22 Apr 2018 21:17:27 -0400 Subject: [PATCH 025/282] P4 to Git Change 1544858 by skudchad@skudchad_rocm on 2018/04/22 21:07:25 SWDEV-144570 - [HIP] - Fix a few APIs and typos. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device_runtime.cpp#6 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#6 edit --- api/hip/hip_device_runtime.cpp | 16 ++++++++++++---- api/hip/hip_platform.cpp | 2 ++ 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/api/hip/hip_device_runtime.cpp b/api/hip/hip_device_runtime.cpp index d0d9c1145b..effce5974f 100644 --- a/api/hip/hip_device_runtime.cpp +++ b/api/hip/hip_device_runtime.cpp @@ -349,9 +349,9 @@ hipError_t hipDeviceGetStreamPriorityRange ( int* leastPriority, int* greatestPr hipError_t hipDeviceReset ( void ) { HIP_INIT_API(); - assert(0 && "Unimplemented"); + /* FIXME */ - return hipErrorUnknown; + return hipSuccess; } hipError_t hipDeviceSetCacheConfig ( hipFuncCache_t cacheConfig ) { @@ -429,13 +429,21 @@ hipError_t hipSetDevice ( int device ) { return hipSuccess; } - return hipErrorInvalidValue; + return hipErrorInvalidDevice; } hipError_t hipSetDeviceFlags ( unsigned int flags ) { HIP_INIT_API(flags); - assert(0 && "Unimplemented"); + /* FIXME */ + /* Not all of Ctx may be implemented */ + + unsigned supportedFlags = + hipDeviceScheduleMask | hipDeviceMapHost | hipDeviceLmemResizeToMax; + + if (flags & (~supportedFlags)) { + return hipErrorInvalidValue; + } return hipSuccess; } diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index d66bf0930c..a9c52bac2e 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -272,6 +272,8 @@ void hipLaunchKernelGGLImpl( uint32_t sharedMemBytes, hipStream_t stream, void** kernarg) { + + assert(0 && "Unimplemented"); } } From 13355e5de5d8cf1c440a55be1a9ce9795d26c7ed Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 24 Apr 2018 14:05:20 -0400 Subject: [PATCH 026/282] P4 to Git Change 1545750 by skudchad@skudchad_test2_win_opencl on 2018/04/24 13:55:57 SWDEV-145570 - [HIP] - Add some hip_mem* APIs. Part 2. ReviewBoardURL = http://ocltc.amd.com/reviews/r/14681/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#11 edit --- api/hip/hip_memory.cpp | 113 +++++++++++++++++++++++++++++++++++------ 1 file changed, 98 insertions(+), 15 deletions(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index c0cc21367a..68eb04ac70 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -26,6 +26,9 @@ THE SOFTWARE. #include "platform/command.hpp" #include "platform/memory.hpp" +extern void getChannelOrderAndType(const hipChannelFormatDesc& desc, enum hipTextureReadMode readMode, + cl_channel_order* channelOrder, cl_channel_type* channelType); + hipError_t ihipMalloc(void** ptr, size_t sizeBytes, unsigned int flags) { if (sizeBytes == 0) { @@ -167,9 +170,15 @@ hipError_t hipMemset(void* dst, int value, size_t sizeBytes) { hipError_t hipMemPtrGetInfo(void *ptr, size_t *size) { HIP_INIT_API(ptr, size); - assert(0 && "Unimplemented"); + amd::Memory* svmMem = amd::SvmManager::FindSvmBuffer(ptr); - return hipErrorUnknown; + if (svmMem == nullptr) { + return hipErrorInvalidValue; + } + + *size = svmMem->getSize(); + + return hipSuccess; } hipError_t hipHostFree(void* ptr) { @@ -195,9 +204,18 @@ hipError_t hipFreeArray(hipArray* array) { hipError_t hipMemGetAddressRange(hipDeviceptr_t* pbase, size_t* psize, hipDeviceptr_t dptr) { HIP_INIT_API(pbase, psize, dptr); - assert(0 && "Unimplemented"); + // Since we are using SVM buffer DevicePtr and HostPtr is the same + void* ptr = dptr; + amd::Memory* svmMem = amd::SvmManager::FindSvmBuffer(ptr); - return hipErrorUnknown; + if (svmMem == nullptr) { + return hipErrorInvalidDevicePointer; + } + + *pbase = ptr; + *psize = svmMem->getSize(); + + return hipSuccess; } hipError_t hipMemGetInfo(size_t* free, size_t* total) { @@ -220,7 +238,7 @@ return hipSuccess; } hipError_t ihipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height, size_t depth, - cl_mem_object_type imageType) { + cl_mem_object_type imageType, const cl_image_format* image_format) { amd::Device* device = g_context->devices()[0]; @@ -237,17 +255,16 @@ hipError_t ihipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t heigh return hipErrorOutOfMemory; } - const cl_image_format image_format = { CL_R, CL_UNSIGNED_INT8 }; - const amd::Image::Format imageFormat(image_format); + const amd::Image::Format imageFormat(*image_format); *pitch = width * imageFormat.getElementSize(); - size_t sizeBytes = *pitch * height; + size_t sizeBytes = *pitch * height * depth; *ptr = amd::SvmBuffer::malloc(*g_context, CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeBytes, g_context->devices()[0]->info().memBaseAddrAlign_); if (!*ptr) { - return hipErrorOutOfMemory; + return hipErrorMemoryAllocation; } return hipSuccess; @@ -257,7 +274,8 @@ hipError_t ihipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t heigh hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height) { HIP_INIT_API(ptr, pitch, width, height); - return ihipMallocPitch(ptr, pitch, width, height, 1, CL_MEM_OBJECT_IMAGE2D); + const cl_image_format image_format = { CL_R, CL_UNSIGNED_INT8 }; + return ihipMallocPitch(ptr, pitch, width, height, 1, CL_MEM_OBJECT_IMAGE2D, &image_format); } hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent) { @@ -269,9 +287,10 @@ hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent) { return hipErrorInvalidValue; } + const cl_image_format image_format = { CL_R, CL_UNSIGNED_INT8 }; hipError_t status = hipSuccess; status = ihipMallocPitch(&pitchedDevPtr->ptr, &pitch, extent.width, extent.height, extent.depth, - CL_MEM_OBJECT_IMAGE3D); + CL_MEM_OBJECT_IMAGE3D, &image_format); if (status == hipSuccess) { pitchedDevPtr->pitch = pitch; @@ -294,18 +313,82 @@ hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc, size_t width, size_t height, unsigned int flags) { HIP_INIT_API(array, desc, width, height, flags); - assert(0 && "Unimplemented"); + if (width == 0) { + return hipErrorInvalidValue; + } - return hipErrorUnknown; + *array = (hipArray*)malloc(sizeof(hipArray)); + array[0]->type = flags; + array[0]->width = width; + array[0]->height = height; + array[0]->depth = 1; + array[0]->desc = *desc; + array[0]->isDrv = false; + array[0]->textureType = hipTextureType2D; + void** ptr = &array[0]->data; + + cl_channel_order channelOrder; + cl_channel_type channelType; + getChannelOrderAndType(*desc, hipReadModeElementType, &channelOrder, &channelType); + + const cl_image_format image_format = { channelOrder, channelType }; + + // Dummy flags check + switch (flags) { + case hipArrayLayered: + case hipArrayCubemap: + case hipArraySurfaceLoadStore: + case hipArrayTextureGather: + assert(0 && "Unspported"); + break; + case hipArrayDefault: + default: + break; + } + size_t pitch = 0; + hipError_t status = ihipMallocPitch(ptr, &pitch, width, height, 1, CL_MEM_OBJECT_IMAGE2D, + &image_format); + + return status; } hipError_t hipMalloc3DArray(hipArray_t* array, const struct hipChannelFormatDesc* desc, struct hipExtent extent, unsigned int flags) { HIP_INIT_API(array, desc, &extent, flags); - assert(0 && "Unimplemented"); + *array = (hipArray*)malloc(sizeof(hipArray)); + array[0]->type = flags; + array[0]->width = extent.width; + array[0]->height = extent.height; + array[0]->depth = extent.depth; + array[0]->desc = *desc; + array[0]->isDrv = false; + array[0]->textureType = hipTextureType3D; + void** ptr = &array[0]->data; - return hipErrorUnknown; + cl_channel_order channelOrder; + cl_channel_type channelType; + getChannelOrderAndType(*desc, hipReadModeElementType, &channelOrder, &channelType); + + const cl_image_format image_format = { channelOrder, channelType }; + + // Dummy flags check + switch (flags) { + case hipArrayLayered: + case hipArrayCubemap: + case hipArraySurfaceLoadStore: + case hipArrayTextureGather: + assert(0 && "Unspported"); + break; + case hipArrayDefault: + default: + break; + } + size_t pitch = 0; + hipError_t status = ihipMallocPitch(ptr, &pitch, extent.width, extent.height, extent.depth, + CL_MEM_OBJECT_IMAGE3D, &image_format); + + return status; } hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) { From d2fbde728c3227c586166dd91a6a664c08e2cd30 Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 24 Apr 2018 16:49:38 -0400 Subject: [PATCH 027/282] P4 to Git Change 1545859 by cpaquot@cpaquot-ocl-lc-lnx on 2018/04/24 16:44:17 SWDEV-145570 - [HIP] Get hipCtx_simple to pass Implemented hipCtxGetDevice hipCtxCreate must push the created context onto the context stack hipCtxDestroy must check if the top of the stack is the context being destroy and not just pop the top of the stack w/o checking. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_context.cpp#8 edit --- api/hip/hip_context.cpp | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/api/hip/hip_context.cpp b/api/hip/hip_context.cpp index 2e189f1351..e25a87bde8 100644 --- a/api/hip/hip_context.cpp +++ b/api/hip/hip_context.cpp @@ -71,6 +71,7 @@ hipError_t hipCtxCreate(hipCtx_t *ctx, unsigned int flags, hipDevice_t device) // Increment ref count for device primary context g_devices[device]->retain(); + g_ctxtStack.push(g_devices[device]); return hipSuccess; } @@ -122,7 +123,7 @@ hipError_t hipCtxDestroy(hipCtx_t ctx) { } // Need to remove the ctx of calling thread if its the top one - if (g_context == amdContext) { + if (!g_ctxtStack.empty() && g_ctxtStack.top() == amdContext) { g_ctxtStack.pop(); } @@ -188,7 +189,16 @@ hipError_t hipDriverGetVersion(int* driverVersion) { hipError_t hipCtxGetDevice(hipDevice_t* device) { HIP_INIT_API(device); - assert(0 && "Unimplemented"); + if (device != nullptr) { + for (unsigned int i = 0; i < g_devices.size(); i++) { + if (g_devices[i] == g_context) { + *device = static_cast(i); + return hipSuccess; + } + } + } else { + return hipErrorInvalidValue; + } return hipErrorUnknown; } @@ -279,4 +289,4 @@ hipError_t hipDevicePrimaryCtxSetFlags(hipDevice_t dev, unsigned int flags) { assert(0 && "Unimplemented"); return hipErrorUnknown; -} \ No newline at end of file +} From 18c7c164c6a1182fc70a75d6b590b45bb8b80d48 Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 25 Apr 2018 12:32:11 -0400 Subject: [PATCH 028/282] P4 to Git Change 1546224 by skudchad@skudchad_test2_win_opencl on 2018/04/25 12:24:40 SWDEV-145570 - [HIP] - Add hipMemcpyTo/FromArray APIs. ReviewBoardURL = http://ocltc.amd.com/reviews/r/14696/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#12 edit --- api/hip/hip_memory.cpp | 84 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 78 insertions(+), 6 deletions(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 68eb04ac70..23a631881f 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -555,22 +555,94 @@ hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, con return hipErrorUnknown; } -hipError_t hipMemcpyToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src, +hipError_t hipMemcpyToArray(hipArray* dstArray, size_t wOffset, size_t hOffset, const void* src, size_t count, hipMemcpyKind kind) { - HIP_INIT_API(dst, wOffset, hOffset, src, count, kind); + HIP_INIT_API(dstArray, wOffset, hOffset, src, count, kind); - assert(0 && "Unimplemented"); + amd::Device* device = g_context->devices()[0]; - return hipErrorUnknown; + amd::HostQueue* queue = new amd::HostQueue(*g_context, *device, 0, + amd::CommandQueue::RealTimeDisabled, + amd::CommandQueue::Priority::Normal); + if (!queue) { + return hipErrorOutOfMemory; + } + + amd::Command* command; + amd::Command::EventWaitList waitList; + amd::Memory* memory; + + amd::Coord3D dstOffset(wOffset, hOffset, 0); + + switch (kind) { + case hipMemcpyDeviceToHost: + assert(!"Invalid case"); + /* fall thru */ + case hipMemcpyHostToDevice: + memory = amd::SvmManager::FindSvmBuffer(dstArray->data); + command = new amd::WriteMemoryCommand(*queue, CL_COMMAND_WRITE_BUFFER, waitList, + *memory->asBuffer(), dstOffset, count, src); + break; + default: + assert(!"Shouldn't reach here"); + break; + } + if (!command) { + return hipErrorOutOfMemory; + } + + command->enqueue(); + command->awaitCompletion(); + command->release(); + + queue->release(); + + return hipSuccess; } hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t srcArray, size_t wOffset, size_t hOffset, size_t count, hipMemcpyKind kind) { HIP_INIT_API(dst, srcArray, wOffset, hOffset, count, kind); - assert(0 && "Unimplemented"); + amd::Device* device = g_context->devices()[0]; - return hipErrorUnknown; + amd::HostQueue* queue = new amd::HostQueue(*g_context, *device, 0, + amd::CommandQueue::RealTimeDisabled, + amd::CommandQueue::Priority::Normal); + if (!queue) { + return hipErrorOutOfMemory; + } + + amd::Command* command; + amd::Command::EventWaitList waitList; + amd::Memory* memory; + + amd::Coord3D srcOffset(wOffset, hOffset, 0); + + switch (kind) { + case hipMemcpyHostToDevice: + assert(!"Invalid case"); + /* fall thru */ + case hipMemcpyDeviceToHost: + memory = amd::SvmManager::FindSvmBuffer(srcArray->data); + command = new amd::ReadMemoryCommand(*queue, CL_COMMAND_READ_BUFFER, waitList, + *memory->asBuffer(), srcOffset, count, dst); + break; + default: + assert(!"Shouldn't reach here"); + break; + } + if (!command) { + return hipErrorOutOfMemory; + } + + command->enqueue(); + command->awaitCompletion(); + command->release(); + + queue->release(); + + return hipSuccess; } hipError_t hipMemcpyHtoA(hipArray* dstArray, size_t dstOffset, const void* srcHost, size_t count) { From 2605d50ce7d87e2fe98346ebc0ab6de1b2f3d8ad Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 27 Apr 2018 21:21:31 -0400 Subject: [PATCH 029/282] P4 to Git Change 1547563 by lmoriche@lmoriche_opencl_dev2 on 2018/04/27 21:10:47 SWDEV-145570 - [HIP] - Add support for HCC compiled binaries. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/build/Makefile.hip#2 edit ... //depot/stg/opencl/drivers/opencl/api/hip/elfio/elf_types.hpp#1 add ... //depot/stg/opencl/drivers/opencl/api/hip/elfio/elfio.hpp#1 add ... //depot/stg/opencl/drivers/opencl/api/hip/elfio/elfio_dump.hpp#1 add ... //depot/stg/opencl/drivers/opencl/api/hip/elfio/elfio_dynamic.hpp#1 add ... //depot/stg/opencl/drivers/opencl/api/hip/elfio/elfio_header.hpp#1 add ... //depot/stg/opencl/drivers/opencl/api/hip/elfio/elfio_note.hpp#1 add ... //depot/stg/opencl/drivers/opencl/api/hip/elfio/elfio_relocation.hpp#1 add ... //depot/stg/opencl/drivers/opencl/api/hip/elfio/elfio_section.hpp#1 add ... //depot/stg/opencl/drivers/opencl/api/hip/elfio/elfio_segment.hpp#1 add ... //depot/stg/opencl/drivers/opencl/api/hip/elfio/elfio_strings.hpp#1 add ... //depot/stg/opencl/drivers/opencl/api/hip/elfio/elfio_symbols.hpp#1 add ... //depot/stg/opencl/drivers/opencl/api/hip/elfio/elfio_utils.hpp#1 add ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#7 edit --- api/hip/hip_platform.cpp | 305 +++++++++++++++++++++++++++++---------- 1 file changed, 227 insertions(+), 78 deletions(-) diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index a9c52bac2e..9c9c3b1ecf 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -26,9 +26,10 @@ THE SOFTWARE. #include "platform/program.hpp" #include "platform/runtime.hpp" -constexpr unsigned __cudaFatMAGIC = 0x1ee55a01; +#include +#include "elfio.hpp" + constexpr unsigned __cudaFatMAGIC2 = 0x466243b1; -constexpr unsigned __cudaFatMAGIC3 = 0xba55ed50; struct __CudaFatBinaryWrapper { unsigned int magic; @@ -37,53 +38,9 @@ struct __CudaFatBinaryWrapper { void* dummy1; }; -struct __CudaFatBinaryHeader { - unsigned int magic; - unsigned short version; - unsigned short headerSize; - unsigned long long int fatSize; -}; - -struct __CudaPartHeader { - unsigned short type; - unsigned short dummy1; - unsigned int headerSize; - unsigned long long int partSize; - unsigned long long int dummy2; - unsigned int dummy3; - unsigned int subarch; -}; - -static hipModule_t registerCudaFatBinary(const __CudaFatBinaryHeader* fbheader) -{ - const __CudaPartHeader* pheader = reinterpret_cast( - reinterpret_cast(fbheader) + fbheader->headerSize); - const __CudaPartHeader* end = reinterpret_cast( - reinterpret_cast(pheader) + fbheader->fatSize); - - amd::Program* program = new amd::Program(*g_context); - if (!program) return nullptr; - - while (pheader < end) { - if (true/*pheader->subarch == match a device in the context*/) { - const void *image = reinterpret_cast( - reinterpret_cast(pheader) + pheader->headerSize); - size_t size = pheader->partSize; - if (CL_SUCCESS != program->addDeviceProgram(*g_context->devices()[0], image, size) || - CL_SUCCESS != program->build(g_context->devices(), nullptr, nullptr, nullptr)) { - return nullptr; - } - break; - } - pheader = reinterpret_cast( - reinterpret_cast(pheader) + pheader->headerSize + pheader->partSize); - } - - return reinterpret_cast(as_cl(program)); -} - #define CLANG_OFFLOAD_BUNDLER_MAGIC_STR "__CLANG_OFFLOAD_BUNDLE__" -#define AMDGCN_AMDHSA_TRIPLE "openmp-amdgcn--amdhsa" +#define OPENMP_AMDGCN_AMDHSA_TRIPLE "openmp-amdgcn--amdhsa" +#define HCC_AMDGCN_AMDHSA_TRIPLE "hcc-amdgcn--amdhsa" struct __ClangOffloadBundleDesc { uint64_t offset; @@ -98,23 +55,35 @@ struct __ClangOffloadBundleHeader { __ClangOffloadBundleDesc desc[1]; }; -static hipModule_t registerOffloadBundle(const __ClangOffloadBundleHeader* obheader) +extern "C" hipModule_t __hipRegisterFatBinary(const void* data) { + HIP_INIT(); + + const __CudaFatBinaryWrapper* fbwrapper = reinterpret_cast(data); + if (fbwrapper->magic != __cudaFatMAGIC2 || fbwrapper->version != 1) { + return nullptr; + } + std::string magic((char*)fbwrapper->binary, sizeof(CLANG_OFFLOAD_BUNDLER_MAGIC_STR) - 1); + if (magic.compare(CLANG_OFFLOAD_BUNDLER_MAGIC_STR)) { + return nullptr; + } + amd::Program* program = new amd::Program(*g_context); if (!program) return nullptr; - const __ClangOffloadBundleDesc* desc = &obheader->desc[0]; + const auto obheader = reinterpret_cast(fbwrapper->binary); + const auto* desc = &obheader->desc[0]; for (uint64_t i = 0; i < obheader->numBundles; ++i, desc = reinterpret_cast( reinterpret_cast(&desc->triple[0]) + desc->tripleSize)) { - std::string triple(desc->triple, sizeof(AMDGCN_AMDHSA_TRIPLE) - 1); - if (triple.compare(AMDGCN_AMDHSA_TRIPLE)) + std::string triple(desc->triple, sizeof(OPENMP_AMDGCN_AMDHSA_TRIPLE) - 1); + if (triple.compare(OPENMP_AMDGCN_AMDHSA_TRIPLE)) continue; - std::string target(desc->triple + sizeof(AMDGCN_AMDHSA_TRIPLE), - desc->tripleSize - sizeof(AMDGCN_AMDHSA_TRIPLE)); + std::string target(desc->triple + sizeof(OPENMP_AMDGCN_AMDHSA_TRIPLE), + desc->tripleSize - sizeof(OPENMP_AMDGCN_AMDHSA_TRIPLE)); if (target.compare(g_context->devices()[0]->info().name_)) continue; @@ -130,28 +99,6 @@ static hipModule_t registerOffloadBundle(const __ClangOffloadBundleHeader* obhea return reinterpret_cast(as_cl(program)); } - -extern "C" hipModule_t __hipRegisterFatBinary(const void* data) -{ - HIP_INIT(); - - const __CudaFatBinaryWrapper* fbwrapper = reinterpret_cast(data); - if (fbwrapper->magic != __cudaFatMAGIC2 || fbwrapper->version != 1) { - return nullptr; - } - const __CudaFatBinaryHeader* fbheader = reinterpret_cast(fbwrapper->binary); - if (fbheader->magic == __cudaFatMAGIC3 && fbheader->version == 1) { - return registerCudaFatBinary(fbheader); - } - - std::string magic((char*)fbwrapper->binary, sizeof(CLANG_OFFLOAD_BUNDLER_MAGIC_STR) - 1); - if (!magic.compare(CLANG_OFFLOAD_BUNDLER_MAGIC_STR)) { - return registerOffloadBundle(reinterpret_cast(fbwrapper->binary)); - } - - return nullptr; -} - std::map g_functions; @@ -265,15 +212,217 @@ extern "C" hipError_t hipLaunchByPtr(const void *hostFunction) namespace hip_impl { +struct dl_phdr_info { + ELFIO::Elf64_Addr dlpi_addr; + const char *dlpi_name; + const ELFIO::Elf64_Phdr *dlpi_phdr; + ELFIO::Elf64_Half dlpi_phnum; +}; + +extern "C" int dl_iterate_phdr( + int (*callback) (struct dl_phdr_info *info, size_t size, void *data), void *data +); + +struct Symbol { + std::string name; + ELFIO::Elf64_Addr value = 0; + ELFIO::Elf_Xword size = 0; + ELFIO::Elf_Half sect_idx = 0; + uint8_t bind = 0; + uint8_t type = 0; + uint8_t other = 0; +}; + +inline Symbol read_symbol(const ELFIO::symbol_section_accessor& section, unsigned int idx) { + assert(idx < section.get_symbols_num()); + + Symbol r; + section.get_symbol(idx, r.name, r.value, r.size, r.bind, r.type, r.sect_idx, r.other); + + return r; +} + +template +inline ELFIO::section* find_section_if(ELFIO::elfio& reader, P p) { + const auto it = find_if(reader.sections.begin(), reader.sections.end(), std::move(p)); + + return it != reader.sections.end() ? *it : nullptr; +} + +std::vector> function_names_for(const ELFIO::elfio& reader, + ELFIO::section* symtab) { + std::vector> r; + ELFIO::symbol_section_accessor symbols{reader, symtab}; + + for (auto i = 0u; i != symbols.get_symbols_num(); ++i) { + auto tmp = read_symbol(symbols, i); + + if (tmp.type == STT_FUNC && tmp.sect_idx != SHN_UNDEF && !tmp.name.empty()) { + r.emplace_back(tmp.value, tmp.name); + } + } + + return r; +} + +const std::vector>& function_names_for_process() { + static constexpr const char self[] = "/proc/self/exe"; + + static std::vector> r; + static std::once_flag f; + + std::call_once(f, []() { + ELFIO::elfio reader; + + if (reader.load(self)) { + const auto it = find_section_if( + reader, [](const ELFIO::section* x) { return x->get_type() == SHT_SYMTAB; }); + + if (it) r = function_names_for(reader, it); + } + }); + + return r; +} + + +const std::unordered_map& function_names() +{ + static std::unordered_map r{ + function_names_for_process().cbegin(), + function_names_for_process().cend()}; + static std::once_flag f; + + std::call_once(f, []() { + dl_iterate_phdr([](dl_phdr_info* info, size_t, void*) { + ELFIO::elfio reader; + + if (reader.load(info->dlpi_name)) { + const auto it = find_section_if( + reader, [](const ELFIO::section* x) { return x->get_type() == SHT_SYMTAB; }); + + if (it) { + auto n = function_names_for(reader, it); + + for (auto&& f : n) f.first += info->dlpi_addr; + + r.insert(make_move_iterator(n.begin()), make_move_iterator(n.end())); + } + } + return 0; + }, + nullptr); + }); + + return r; +} + +std::vector bundles_for_process() { + static constexpr const char self[] = "/proc/self/exe"; + static constexpr const char kernel_section[] = ".kernel"; + std::vector r; + + ELFIO::elfio reader; + + if (reader.load(self)) { + auto it = find_section_if( + reader, [](const ELFIO::section* x) { return x->get_name() == kernel_section; }); + + if (it) r.insert(r.end(), it->get_data(), it->get_data() + it->get_size()); + } + + return r; +} + +const std::vector& modules() { + static std::vector r; + static std::once_flag f; + + std::call_once(f, []() { + static std::vector> bundles{bundles_for_process()}; + + dl_iterate_phdr( + [](dl_phdr_info* info, std::size_t, void*) { + ELFIO::elfio tmp; + if (tmp.load(info->dlpi_name)) { + const auto it = find_section_if( + tmp, [](const ELFIO::section* x) { return x->get_name() == ".kernel"; }); + + if (it) bundles.emplace_back(it->get_data(), it->get_data() + it->get_size()); + } + return 0; + }, + nullptr); + + for (auto&& bundle : bundles) { + std::string magic(&bundle[0], sizeof(CLANG_OFFLOAD_BUNDLER_MAGIC_STR) - 1); + if (magic.compare(CLANG_OFFLOAD_BUNDLER_MAGIC_STR)) + continue; + + const auto obheader = reinterpret_cast(&bundle[0]); + const auto* desc = &obheader->desc[0]; + for (uint64_t i = 0; i < obheader->numBundles; ++i, + desc = reinterpret_cast( + reinterpret_cast(&desc->triple[0]) + desc->tripleSize)) { + + std::string triple(desc->triple, sizeof(HCC_AMDGCN_AMDHSA_TRIPLE) - 1); + if (triple.compare(HCC_AMDGCN_AMDHSA_TRIPLE)) + continue; + + std::string target(desc->triple + sizeof(HCC_AMDGCN_AMDHSA_TRIPLE), + desc->tripleSize - sizeof(HCC_AMDGCN_AMDHSA_TRIPLE)); + + if (!target.compare(g_context->devices()[0]->info().name_)) { + hipModule_t module; + if (hipSuccess == hipModuleLoadData(&module, reinterpret_cast( + reinterpret_cast(obheader) + desc->offset))) + r.push_back(module); + break; + } + } + } + }); + + return r; +} + +const std::unordered_map& functions() +{ + static std::unordered_map r; + static std::once_flag f; + + std::call_once(f, []() { + for (auto&& function : function_names()) { + for (auto&& module : modules()) { + hipFunction_t f; + if (hipSuccess == hipModuleGetFunction(&f, module, function.second.c_str())) + r[function.first] = f; + } + } + }); + + return r; +} + + void hipLaunchKernelGGLImpl( uintptr_t function_address, const dim3& numBlocks, const dim3& dimBlocks, uint32_t sharedMemBytes, hipStream_t stream, - void** kernarg) { + void** kernarg) +{ + HIP_INIT(); - assert(0 && "Unimplemented"); + const auto it = functions().find(function_address); + if (it == functions().cend()) + return; + + hipModuleLaunchKernel(it->second, + numBlocks.x, numBlocks.y, numBlocks.z, + dimBlocks.x, dimBlocks.y, dimBlocks.z, + sharedMemBytes, stream, nullptr, kernarg); } } From 1cb879968b3238d7a60e26ad260f4ccc07563fdc Mon Sep 17 00:00:00 2001 From: foreman Date: Sun, 29 Apr 2018 15:04:45 -0400 Subject: [PATCH 030/282] P4 to Git Change 1547669 by lmoriche@lmoriche_opencl_dev2 on 2018/04/29 14:53:31 SWDEV-145570 - [HIP] Fix kernel disptach for HCC compiled programs. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#5 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.cpp#34 edit --- api/hip/hip_module.cpp | 7 ++++--- api/hip/hip_platform.cpp | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index 0bcae3551b..268b92284b 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -151,7 +151,7 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, } size_t globalWorkOffset[3] = {0}; - size_t globalWorkSize[3] = { gridDimX, gridDimY, gridDimZ }; + size_t globalWorkSize[3] = { gridDimX * blockDimX, gridDimY * blockDimY, gridDimZ * blockDimZ}; size_t localWorkSize[3] = { blockDimX, blockDimY, blockDimZ }; amd::NDRangeContainer ndrange(3, globalWorkOffset, globalWorkSize, localWorkSize); amd::Command::EventWaitList waitList; @@ -161,10 +161,11 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, const amd::KernelParameterDescriptor& desc = signature.at(i); if (kernelParams == nullptr) { assert(extra); - kernel->parameters().set(i, desc.size_, reinterpret_cast
(extra[1]) + desc.offset_); + kernel->parameters().set(i, desc.size_, reinterpret_cast
(extra[1]) + desc.offset_, + desc.type_ == T_POINTER/*svmBound*/); } else { assert(!extra); - kernel->parameters().set(i, desc.size_, kernelParams[i]); + kernel->parameters().set(i, desc.size_, kernelParams[i], desc.type_ == T_POINTER/*svmBound*/); } } diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index 9c9c3b1ecf..7152458d38 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -417,7 +417,7 @@ void hipLaunchKernelGGLImpl( const auto it = functions().find(function_address); if (it == functions().cend()) - return; + assert(0); hipModuleLaunchKernel(it->second, numBlocks.x, numBlocks.y, numBlocks.z, From 5fc5d098a95c17d1a51afbdf777da3183e16eadb Mon Sep 17 00:00:00 2001 From: foreman Date: Mon, 30 Apr 2018 01:03:06 -0400 Subject: [PATCH 031/282] P4 to Git Change 1547686 by cpaquot@cpaquot-ocl-lc-lnx on 2018/04/30 00:52:23 SWDEV-145570 - [HIP] CreateTexture with hipArray Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_texture.cpp#6 edit --- api/hip/hip_texture.cpp | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/api/hip/hip_texture.cpp b/api/hip/hip_texture.cpp index fbc8c4b481..220332a57b 100644 --- a/api/hip/hip_texture.cpp +++ b/api/hip/hip_texture.cpp @@ -158,7 +158,26 @@ hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, const hipResou switch (pResDesc->resType) { case hipResourceTypeArray: - assert(0); + { + memory = amd::SvmManager::FindSvmBuffer(pResDesc->res.array.array->data); + + getChannelOrderAndType(pResDesc->res.array.array->desc, pTexDesc->readMode, + &image_format.image_channel_order, &image_format.image_channel_data_type); + const amd::Image::Format imageFormat(image_format); + switch (pResDesc->res.array.array->type) { + case hipArrayLayered: + case hipArrayCubemap: + assert(0); + break; + case hipArraySurfaceLoadStore: + case hipArrayTextureGather: + case hipArrayDefault: + default: + image = new (*g_context) amd::Image(*memory->asBuffer(), CL_MEM_OBJECT_IMAGE2D, memory->getMemFlags(), imageFormat, + pResDesc->res.array.array->width, pResDesc->res.array.array->height, 1, 0, 0); + break; + } + } break; case hipResourceTypeMipmappedArray: assert(0); From 36d2d37804d5b997996415d651f340c02d3d50c5 Mon Sep 17 00:00:00 2001 From: foreman Date: Mon, 30 Apr 2018 13:34:32 -0400 Subject: [PATCH 032/282] P4 to Git Change 1547825 by cpaquot@cpaquot-ocl-lc-lnx on 2018/04/30 11:59:32 SWDEV-145570 - [HIP] Fixed release build Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#13 edit --- api/hip/hip_memory.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 23a631881f..02c5720df4 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -83,7 +83,7 @@ hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind return hipErrorOutOfMemory; } - amd::Command* command; + amd::Command* command = nullptr; amd::Command::EventWaitList waitList; amd::Memory* memory; @@ -568,7 +568,7 @@ hipError_t hipMemcpyToArray(hipArray* dstArray, size_t wOffset, size_t hOffset, return hipErrorOutOfMemory; } - amd::Command* command; + amd::Command* command = nullptr; amd::Command::EventWaitList waitList; amd::Memory* memory; @@ -613,7 +613,7 @@ hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t srcArray, size_t wOffs return hipErrorOutOfMemory; } - amd::Command* command; + amd::Command* command = nullptr; amd::Command::EventWaitList waitList; amd::Memory* memory; From 6f7e33cb91e43da7fa72756b9e7b7376aa99b56b Mon Sep 17 00:00:00 2001 From: foreman Date: Mon, 30 Apr 2018 14:55:41 -0400 Subject: [PATCH 033/282] P4 to Git Change 1547830 by skudchad@skudchad_test2_win_opencl on 2018/04/30 12:03:10 SWDEV-145570 - [HIP] - Add couple of hip_mem* APIs. Part 3. ReviewBoardURL = http://ocltc.amd.com/reviews/r/14727/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#14 edit --- api/hip/hip_memory.cpp | 236 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 226 insertions(+), 10 deletions(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 02c5720df4..830865fb93 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -583,6 +583,8 @@ hipError_t hipMemcpyToArray(hipArray* dstArray, size_t wOffset, size_t hOffset, command = new amd::WriteMemoryCommand(*queue, CL_COMMAND_WRITE_BUFFER, waitList, *memory->asBuffer(), dstOffset, count, src); break; + case hipMemcpyDeviceToDevice: + case hipMemcpyDefault: default: assert(!"Shouldn't reach here"); break; @@ -628,6 +630,8 @@ hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t srcArray, size_t wOffs command = new amd::ReadMemoryCommand(*queue, CL_COMMAND_READ_BUFFER, waitList, *memory->asBuffer(), srcOffset, count, dst); break; + case hipMemcpyDeviceToDevice: + case hipMemcpyDefault: default: assert(!"Shouldn't reach here"); break; @@ -648,41 +652,253 @@ hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t srcArray, size_t wOffs hipError_t hipMemcpyHtoA(hipArray* dstArray, size_t dstOffset, const void* srcHost, size_t count) { HIP_INIT_API(dstArray, dstOffset, srcHost, count); - assert(0 && "Unimplemented"); + amd::Device* device = g_context->devices()[0]; - return hipErrorUnknown; + amd::HostQueue* queue = new amd::HostQueue(*g_context, *device, 0, + amd::CommandQueue::RealTimeDisabled, + amd::CommandQueue::Priority::Normal); + if (!queue) { + return hipErrorOutOfMemory; + } + + amd::Command::EventWaitList waitList; + amd::Memory* memory = amd::SvmManager::FindSvmBuffer(dstArray->data); + amd::Command* command = new amd::WriteMemoryCommand(*queue, CL_COMMAND_WRITE_BUFFER, waitList, + *memory->asBuffer(), dstOffset, count, srcHost); + + if (!command) { + return hipErrorOutOfMemory; + } + + command->enqueue(); + command->awaitCompletion(); + command->release(); + + queue->release(); + + return hipSuccess; } hipError_t hipMemcpyAtoH(void* dst, hipArray* srcArray, size_t srcOffset, size_t count) { HIP_INIT_API(dst, srcArray, srcOffset, count); - assert(0 && "Unimplemented"); + amd::Device* device = g_context->devices()[0]; - return hipErrorUnknown; + amd::HostQueue* queue = new amd::HostQueue(*g_context, *device, 0, + amd::CommandQueue::RealTimeDisabled, + amd::CommandQueue::Priority::Normal); + if (!queue) { + return hipErrorOutOfMemory; + } + + amd::Command::EventWaitList waitList; + amd::Memory* memory = amd::SvmManager::FindSvmBuffer(srcArray->data); + amd::Command* command = new amd::ReadMemoryCommand(*queue, CL_COMMAND_READ_BUFFER, waitList, + *memory->asBuffer(), srcOffset, count, dst); + + if (!command) { + return hipErrorOutOfMemory; + } + + command->enqueue(); + command->awaitCompletion(); + command->release(); + + queue->release(); + + return hipSuccess; } hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) { HIP_INIT_API(p); - assert(0 && "Unimplemented"); + amd::Device* device = g_context->devices()[0]; - return hipErrorUnknown; + amd::HostQueue* queue = new amd::HostQueue(*g_context, *device, 0, + amd::CommandQueue::RealTimeDisabled, + amd::CommandQueue::Priority::Normal); + if (!queue) { + return hipErrorOutOfMemory; + } + + size_t byteSize; + size_t srcPitchInBytes; + size_t dstPitchInbytes; + void* srcPtr; + void* dstPtr; + size_t srcOrigin[3]; + size_t dstOrigin[3]; + size_t region[3]; + if (p->dstArray != nullptr) { + switch (p->dstArray->desc.f) { + case hipChannelFormatKindSigned: + byteSize = sizeof(int); + break; + case hipChannelFormatKindUnsigned: + byteSize = sizeof(unsigned int); + break; + case hipChannelFormatKindFloat: + byteSize = sizeof(float); + break; + case hipChannelFormatKindNone: + byteSize = sizeof(size_t); + break; + default: + byteSize = 1; + break; + } + region[2] = p->Depth; + region[1] = p->Height; + region[0] = p->WidthInBytes * byteSize; + srcOrigin[0] = p->srcXInBytes/byteSize; + srcOrigin[1] = p->srcY; + srcOrigin[2] = p->srcZ; + dstPitchInbytes = p->dstArray->width * byteSize; + srcPitchInBytes = p->srcPitch; + srcPtr = (void*)p->srcHost; + dstPtr = p->dstArray->data; + dstOrigin[0] = p->dstXInBytes/byteSize; + dstOrigin[1] = p->dstY; + dstOrigin[2] = p->dstZ; + } else { + region[2] = p->extent.depth; + region[1] = p->extent.height; + region[0] = p->extent.width; + srcOrigin[0] = p->srcXInBytes; + srcOrigin[1] = p->srcY; + srcOrigin[2] = p->srcZ; + srcPitchInBytes = p->srcPtr.pitch; + dstPitchInbytes = p->dstPtr.pitch; + srcPtr = p->srcPtr.ptr; + dstPtr = p->dstPtr.ptr; + dstOrigin[0] = p->dstXInBytes; + dstOrigin[1] = p->dstY; + dstOrigin[2] = p->dstZ; + } + + // Create buffer rectangle info structure + amd::BufferRect srcRect; + amd::BufferRect dstRect; + amd::Memory* src = amd::SvmManager::FindSvmBuffer(srcPtr); + amd::Memory* dst = amd::SvmManager::FindSvmBuffer(dstPtr); + + size_t src_slice_pitch = srcPitchInBytes * p->srcHeight; + size_t dst_slice_pitch = dstPitchInbytes * p->dstHeight; + + if (!srcRect.create(srcOrigin, region, srcPitchInBytes, src_slice_pitch) || + !dstRect.create(dstOrigin, region, dstPitchInbytes, dst_slice_pitch)) { + return hipErrorInvalidValue; + } + + amd::Coord3D srcStart(srcRect.start_, 0, 0); + amd::Coord3D dstStart(dstRect.start_, 0, 0); + amd::Coord3D srcEnd(srcRect.end_, 1, 1); + amd::Coord3D dstEnd(dstRect.end_, 1, 1); + + if (!src->asBuffer()->validateRegion(srcStart, srcEnd) || + !dst->asBuffer()->validateRegion(dstStart, dstEnd)) { + return hipErrorInvalidValue; + } + + // Check if regions overlap each other + if ((src->asBuffer() == dst->asBuffer()) && + (std::abs(static_cast(srcOrigin[0]) - static_cast(dstOrigin[0])) < + static_cast(region[0])) && + (std::abs(static_cast(srcOrigin[1]) - static_cast(dstOrigin[1])) < + static_cast(region[1])) && + (std::abs(static_cast(srcOrigin[2]) - static_cast(dstOrigin[2])) < + static_cast(region[2]))) { + return hipErrorUnknown; + } + + amd::Command::EventWaitList waitList; + amd::Coord3D size(region[0], region[1], region[2]); + + amd::CopyMemoryCommand* command = + new amd::CopyMemoryCommand(*queue, CL_COMMAND_COPY_BUFFER_RECT, waitList, *src->asBuffer(), + *dst->asBuffer(), srcStart, dstStart, size, srcRect, dstRect); + + if (!command) { + return hipErrorOutOfMemory; + } + + command->enqueue(); + command->awaitCompletion(); + command->release(); + + queue->release(); + + return hipSuccess; } hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height) { HIP_INIT_API(dst, pitch, value, width, height); - assert(0 && "Unimplemented"); + amd::Device* device = g_context->devices()[0]; - return hipErrorUnknown; + amd::HostQueue* queue = new amd::HostQueue(*g_context, *device, 0, + amd::CommandQueue::RealTimeDisabled, + amd::CommandQueue::Priority::Normal); + if (!queue) { + return hipErrorOutOfMemory; + } + + amd::Command::EventWaitList waitList; + amd::Memory* memory = amd::SvmManager::FindSvmBuffer(dst); + + amd::Coord3D fillOffset(0, 0, 0); + + size_t sizeBytes = pitch * height; + amd::Coord3D fillSize(sizeBytes, 1, 1); + amd::FillMemoryCommand* command = + new amd::FillMemoryCommand(*queue, CL_COMMAND_FILL_BUFFER, waitList, *memory->asBuffer(), + &value, sizeof(int), fillOffset, fillSize); + + if (!command) { + return hipErrorOutOfMemory; + } + + command->enqueue(); + command->awaitCompletion(); + command->release(); + + queue->release(); + + return hipSuccess; } hipError_t hipMemsetD8(hipDeviceptr_t dst, unsigned char value, size_t sizeBytes) { HIP_INIT_API(dst, value, sizeBytes); - assert(0 && "Unimplemented"); + amd::Device* device = g_context->devices()[0]; - return hipErrorUnknown; + amd::HostQueue* queue = new amd::HostQueue(*g_context, *device, 0, + amd::CommandQueue::RealTimeDisabled, + amd::CommandQueue::Priority::Normal); + if (!queue) { + return hipErrorOutOfMemory; + } + + amd::Command::EventWaitList waitList; + amd::Memory* memory = amd::SvmManager::FindSvmBuffer(dst); + + amd::Coord3D fillOffset(0, 0, 0); + amd::Coord3D fillSize(sizeBytes, 1, 1); + amd::FillMemoryCommand* command = + new amd::FillMemoryCommand(*queue, CL_COMMAND_FILL_BUFFER, waitList, *memory->asBuffer(), + &value, sizeof(char), fillOffset, fillSize); + + if (!command) { + return hipErrorOutOfMemory; + } + + command->enqueue(); + command->awaitCompletion(); + command->release(); + + queue->release(); + + return hipSuccess; } hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* devPtr) { From 213f7213187d43fffe2f9061ab762bade3257d0a Mon Sep 17 00:00:00 2001 From: foreman Date: Mon, 30 Apr 2018 20:17:46 -0400 Subject: [PATCH 034/282] P4 to Git Change 1548038 by lmoriche@lmoriche_opencl_dev2 on 2018/04/30 15:55:18 SWDEV-145570 - [HIP] Fix kernel disptach for HCC compiled programs. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#6 edit --- api/hip/hip_module.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index 268b92284b..f17d5ae719 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -156,12 +156,22 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, amd::NDRangeContainer ndrange(3, globalWorkOffset, globalWorkSize, localWorkSize); amd::Command::EventWaitList waitList; + // 'extra' is a struct that contains the following info: { + // HIP_LAUNCH_PARAM_BUFFER_POINTER, kernargs, + // HIP_LAUNCH_PARAM_BUFFER_SIZE, &kernargs_size, + // HIP_LAUNCH_PARAM_END } + if (extra[0] != HIP_LAUNCH_PARAM_BUFFER_POINTER || + extra[2] != HIP_LAUNCH_PARAM_BUFFER_SIZE || extra[4] != HIP_LAUNCH_PARAM_END) { + return hipErrorNotInitialized; + } + address kernargs = reinterpret_cast
(extra[1]); + const amd::KernelSignature& signature = kernel->signature(); for (size_t i = 0; i < signature.numParameters(); ++i) { const amd::KernelParameterDescriptor& desc = signature.at(i); if (kernelParams == nullptr) { assert(extra); - kernel->parameters().set(i, desc.size_, reinterpret_cast
(extra[1]) + desc.offset_, + kernel->parameters().set(i, desc.size_, kernargs + desc.offset_, desc.type_ == T_POINTER/*svmBound*/); } else { assert(!extra); From dc1497535d71f8a849a0fc9abfda710b0198f16a Mon Sep 17 00:00:00 2001 From: foreman Date: Mon, 30 Apr 2018 21:20:26 -0400 Subject: [PATCH 035/282] P4 to Git Change 1548145 by cpaquot@cpaquot-ocl-lc-lnx on 2018/04/30 21:15:56 SWDEV-145570 - [HIP] Handle hStream=nullptr case Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#7 edit --- api/hip/hip_module.cpp | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index f17d5ae719..97723d52dc 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -144,8 +144,14 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, amd::Kernel* kernel = as_amd(reinterpret_cast(f)); amd::Device* device = g_context->devices()[0]; - amd::HostQueue* queue = as_amd(reinterpret_cast(hStream))->asHostQueue(); - + amd::HostQueue* queue; + if (hStream == nullptr) { + queue = new amd::HostQueue(*g_context, *device, 0, + amd::CommandQueue::RealTimeDisabled, + amd::CommandQueue::Priority::Normal); + } else { + queue = as_amd(reinterpret_cast(hStream))->asHostQueue(); + } if (!queue) { return hipErrorOutOfMemory; } @@ -194,6 +200,10 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, command->awaitCompletion(); command->release(); + if (hStream == nullptr) { + queue->release(); + } + return hipSuccess; } From 61378a359c8dcec55b853b56574cf24508357abf Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 1 May 2018 18:10:09 -0400 Subject: [PATCH 036/282] P4 to Git Change 1548476 by cpaquot@cpaquot-ocl-lc-lnx on 2018/05/01 15:50:51 SWDEV-145570 - [HIP] Added support for null stream avoiding creating/destroying dummy streams. Added basic event class for hipEvent* support. Refactored some common functionality: No more direct access to g_context. Support hipStreamSynchronize(0). Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_context.cpp#9 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device_runtime.cpp#7 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_event.cpp#3 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_event.hpp#1 add ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#8 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#15 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#8 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#9 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_stream.cpp#4 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_texture.cpp#7 edit --- api/hip/hip_context.cpp | 50 +++++++++++++---- api/hip/hip_device_runtime.cpp | 4 +- api/hip/hip_event.cpp | 26 ++++++--- api/hip/hip_event.hpp | 40 ++++++++++++++ api/hip/hip_internal.hpp | 13 +++-- api/hip/hip_memory.cpp | 98 +++++++++------------------------- api/hip/hip_module.cpp | 16 ++---- api/hip/hip_platform.cpp | 10 ++-- api/hip/hip_stream.cpp | 17 ++++-- api/hip/hip_texture.cpp | 29 +++++----- 10 files changed, 177 insertions(+), 126 deletions(-) create mode 100644 api/hip/hip_event.hpp diff --git a/api/hip/hip_context.cpp b/api/hip/hip_context.cpp index e25a87bde8..46e4c864ff 100644 --- a/api/hip/hip_context.cpp +++ b/api/hip/hip_context.cpp @@ -27,13 +27,17 @@ THE SOFTWARE. #include #include -thread_local amd::Context* g_context = nullptr; -thread_local std::stack g_ctxtStack; - std::vector g_devices; std::once_flag g_ihipInitialized; -void ihipInit() { +namespace hip { + +thread_local amd::Context* g_context = nullptr; +thread_local std::stack g_ctxtStack; + +std::map g_nullStreams; + +void init() { if (!amd::Runtime::initialized()) { amd::Runtime::init(); } @@ -54,6 +58,32 @@ void ihipInit() { } } +amd::Context* getCurrentContext() { + return g_context; +} + +void setCurrentContext(unsigned int index) { + assert(indexdevices()[0]; + amd::HostQueue* queue = new amd::HostQueue(*hip::getCurrentContext(), *device, 0, + amd::CommandQueue::RealTimeDisabled, + amd::CommandQueue::Priority::Normal); + g_nullStreams[getCurrentContext()] = queue; + return queue; + } + return stream->second; +} + +}; + +using namespace hip; + hipError_t hipInit(unsigned int flags) { HIP_INIT_API(flags); @@ -84,11 +114,11 @@ hipError_t hipCtxSetCurrent(hipCtx_t ctx) { g_ctxtStack.pop(); } } else { - g_context = reinterpret_cast(as_amd(ctx)); + hip::g_context = reinterpret_cast(as_amd(ctx)); if(!g_ctxtStack.empty()) { g_ctxtStack.pop(); } - g_ctxtStack.push(g_context); + g_ctxtStack.push(hip::getCurrentContext()); } return hipSuccess; @@ -97,7 +127,7 @@ hipError_t hipCtxSetCurrent(hipCtx_t ctx) { hipError_t hipCtxGetCurrent(hipCtx_t* ctx) { HIP_INIT_API(ctx); - *ctx = reinterpret_cast(g_context); + *ctx = reinterpret_cast(hip::getCurrentContext()); return hipSuccess; } @@ -164,8 +194,8 @@ hipError_t hipCtxPushCurrent(hipCtx_t ctx) { return hipErrorInvalidContext; } - g_context = amdContext; - g_ctxtStack.push(g_context); + hip::g_context = amdContext; + g_ctxtStack.push(hip::getCurrentContext()); return hipSuccess; } @@ -191,7 +221,7 @@ hipError_t hipCtxGetDevice(hipDevice_t* device) { if (device != nullptr) { for (unsigned int i = 0; i < g_devices.size(); i++) { - if (g_devices[i] == g_context) { + if (g_devices[i] == hip::getCurrentContext()) { *device = static_cast(i); return hipSuccess; } diff --git a/api/hip/hip_device_runtime.cpp b/api/hip/hip_device_runtime.cpp index effce5974f..77d90d2cb0 100644 --- a/api/hip/hip_device_runtime.cpp +++ b/api/hip/hip_device_runtime.cpp @@ -383,7 +383,7 @@ hipError_t hipGetDevice ( int* deviceId ) { if (deviceId != nullptr) { for (unsigned int i = 0; i < g_devices.size(); i++) { - if (g_devices[i] == g_context) { + if (g_devices[i] == hip::getCurrentContext()) { *deviceId = i; return hipSuccess; } @@ -425,7 +425,7 @@ hipError_t hipSetDevice ( int device ) { HIP_INIT_API(device); if (static_cast(device) < g_devices.size()) { - g_context = g_devices[device]; + hip::setCurrentContext(device); return hipSuccess; } diff --git a/api/hip/hip_event.cpp b/api/hip/hip_event.cpp index 1fe7be9e2f..b9930636bb 100644 --- a/api/hip/hip_event.cpp +++ b/api/hip/hip_event.cpp @@ -22,30 +22,42 @@ THE SOFTWARE. #include -#include "hip_internal.hpp" +#include "hip_event.hpp" hipError_t hipEventCreateWithFlags(hipEvent_t* event, unsigned flags) { HIP_INIT_API(event, flags); - assert(0 && "Unimplemented"); + hip::Event* e = new hip::Event(flags); - return hipErrorUnknown; + if (e == nullptr) { + return hipErrorOutOfMemory; + } + + *event = reinterpret_cast(e); + + return hipSuccess; } hipError_t hipEventCreate(hipEvent_t* event) { HIP_INIT_API(event); - assert(0 && "Unimplemented"); + hip::Event* e = new hip::Event(0); - return hipErrorUnknown; + if (e == nullptr) { + return hipErrorOutOfMemory; + } + + *event = reinterpret_cast(e); + + return hipSuccess; } hipError_t hipEventDestroy(hipEvent_t event) { HIP_INIT_API(event); - assert(0 && "Unimplemented"); + delete reinterpret_cast(event); - return hipErrorUnknown; + return hipSuccess; } hipError_t hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop) { diff --git a/api/hip/hip_event.hpp b/api/hip/hip_event.hpp new file mode 100644 index 0000000000..3ac1ea8bfe --- /dev/null +++ b/api/hip/hip_event.hpp @@ -0,0 +1,40 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_EVENT_H +#define HIP_EVENT_H + +#include "hip_internal.hpp" + +namespace hip { + +class Event { +public: + Event(unsigned int flags) : flags(flags) {} + ~Event() {} + unsigned int flags; +private: +}; + +}; + +#endif // HIP_EVEMT_H diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index 2512e35c98..489b8b620f 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -28,7 +28,7 @@ THE SOFTWARE. #include #define HIP_INIT() \ - std::call_once(g_ihipInitialized, ihipInit); + std::call_once(g_ihipInitialized, hip::init); // This macro should be called at the beginning of every HIP API. @@ -46,10 +46,17 @@ class accelerator_view; }; extern std::once_flag g_ihipInitialized; -extern thread_local amd::Context* g_context; + +namespace hip { + extern void init(); + + extern amd::Context* getCurrentContext(); + extern void setCurrentContext(unsigned int index); + + extern amd::HostQueue* getNullStream(); +}; extern std::vector g_devices; extern hipError_t ihipDeviceGetCount(int* count); -extern void ihipInit(); #endif // HIP_SRC_HIP_INTERNAL_H diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 830865fb93..3a4af23dee 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -39,11 +39,11 @@ hipError_t ihipMalloc(void** ptr, size_t sizeBytes, unsigned int flags) return hipErrorInvalidValue; } - if (g_context->devices()[0]->info().maxMemAllocSize_ < sizeBytes) { + if (hip::getCurrentContext()->devices()[0]->info().maxMemAllocSize_ < sizeBytes) { return hipErrorOutOfMemory; } - *ptr = amd::SvmBuffer::malloc(*g_context, flags, sizeBytes, g_context->devices()[0]->info().memBaseAddrAlign_); + *ptr = amd::SvmBuffer::malloc(*hip::getCurrentContext(), flags, sizeBytes, hip::getCurrentContext()->devices()[0]->info().memBaseAddrAlign_); if (!*ptr) { return hipErrorOutOfMemory; } @@ -65,7 +65,7 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) { hipError_t hipFree(void* ptr) { if (amd::SvmBuffer::malloced(ptr)) { - amd::SvmBuffer::free(*g_context, ptr); + amd::SvmBuffer::free(*hip::getCurrentContext(), ptr); return hipSuccess; } return hipErrorInvalidValue; @@ -74,11 +74,8 @@ hipError_t hipFree(void* ptr) { hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind) { HIP_INIT_API(dst, src, sizeBytes, kind); - amd::Device* device = g_context->devices()[0]; + amd::HostQueue* queue = hip::getNullStream(); - amd::HostQueue* queue = new amd::HostQueue(*g_context, *device, 0, - amd::CommandQueue::RealTimeDisabled, - amd::CommandQueue::Priority::Normal); if (!queue) { return hipErrorOutOfMemory; } @@ -119,8 +116,6 @@ hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind command->awaitCompletion(); command->release(); - queue->release(); - return hipSuccess; } @@ -135,11 +130,8 @@ hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t st hipError_t hipMemset(void* dst, int value, size_t sizeBytes) { HIP_INIT_API(dst, value, sizeBytes); - amd::Device* device = g_context->devices()[0]; + amd::HostQueue* queue = hip::getNullStream(); - amd::HostQueue* queue = new amd::HostQueue(*g_context, *device, 0, - amd::CommandQueue::RealTimeDisabled, - amd::CommandQueue::Priority::Normal); if (!queue) { return hipErrorOutOfMemory; } @@ -162,8 +154,6 @@ hipError_t hipMemset(void* dst, int value, size_t sizeBytes) { command->awaitCompletion(); command->release(); - queue->release(); - return hipSuccess; } @@ -185,7 +175,7 @@ hipError_t hipHostFree(void* ptr) { HIP_INIT_API(ptr); if (amd::SvmBuffer::malloced(ptr)) { - amd::SvmBuffer::free(*g_context, ptr); + amd::SvmBuffer::free(*hip::getCurrentContext(), ptr); return hipSuccess; } return hipErrorInvalidValue; @@ -195,7 +185,7 @@ hipError_t hipFreeArray(hipArray* array) { HIP_INIT_API(array); if (amd::SvmBuffer::malloced(array->data)) { - amd::SvmBuffer::free(*g_context, array->data); + amd::SvmBuffer::free(*hip::getCurrentContext(), array->data); return hipSuccess; } return hipErrorInvalidValue; @@ -222,7 +212,7 @@ hipError_t hipMemGetInfo(size_t* free, size_t* total) { HIP_INIT_API(free, total); size_t freeMemory[2]; - amd::Device* device = g_context->devices()[0]; + amd::Device* device = hip::getCurrentContext()->devices()[0]; if(!device) { return hipErrorInvalidDevice; } @@ -240,7 +230,7 @@ return hipSuccess; hipError_t ihipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height, size_t depth, cl_mem_object_type imageType, const cl_image_format* image_format) { - amd::Device* device = g_context->devices()[0]; + amd::Device* device = hip::getCurrentContext()->devices()[0]; if ((width == 0) || (height == 0)) { *ptr = nullptr; @@ -251,7 +241,7 @@ hipError_t ihipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t heigh return hipErrorInvalidValue; } - if (g_context->devices()[0]->info().maxMemAllocSize_ < (width * height)) { + if (device->info().maxMemAllocSize_ < (width * height)) { return hipErrorOutOfMemory; } @@ -260,8 +250,8 @@ hipError_t ihipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t heigh *pitch = width * imageFormat.getElementSize(); size_t sizeBytes = *pitch * height * depth; - *ptr = amd::SvmBuffer::malloc(*g_context, CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeBytes, - g_context->devices()[0]->info().memBaseAddrAlign_); + *ptr = amd::SvmBuffer::malloc(*hip::getCurrentContext(), CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeBytes, + device->info().memBaseAddrAlign_); if (!*ptr) { return hipErrorMemoryAllocation; @@ -559,11 +549,7 @@ hipError_t hipMemcpyToArray(hipArray* dstArray, size_t wOffset, size_t hOffset, size_t count, hipMemcpyKind kind) { HIP_INIT_API(dstArray, wOffset, hOffset, src, count, kind); - amd::Device* device = g_context->devices()[0]; - - amd::HostQueue* queue = new amd::HostQueue(*g_context, *device, 0, - amd::CommandQueue::RealTimeDisabled, - amd::CommandQueue::Priority::Normal); + amd::HostQueue* queue = hip::getNullStream(); if (!queue) { return hipErrorOutOfMemory; } @@ -597,8 +583,6 @@ hipError_t hipMemcpyToArray(hipArray* dstArray, size_t wOffset, size_t hOffset, command->awaitCompletion(); command->release(); - queue->release(); - return hipSuccess; } @@ -606,11 +590,7 @@ hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t srcArray, size_t wOffs size_t count, hipMemcpyKind kind) { HIP_INIT_API(dst, srcArray, wOffset, hOffset, count, kind); - amd::Device* device = g_context->devices()[0]; - - amd::HostQueue* queue = new amd::HostQueue(*g_context, *device, 0, - amd::CommandQueue::RealTimeDisabled, - amd::CommandQueue::Priority::Normal); + amd::HostQueue* queue = hip::getNullStream(); if (!queue) { return hipErrorOutOfMemory; } @@ -644,19 +624,13 @@ hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t srcArray, size_t wOffs command->awaitCompletion(); command->release(); - queue->release(); - return hipSuccess; } hipError_t hipMemcpyHtoA(hipArray* dstArray, size_t dstOffset, const void* srcHost, size_t count) { HIP_INIT_API(dstArray, dstOffset, srcHost, count); - amd::Device* device = g_context->devices()[0]; - - amd::HostQueue* queue = new amd::HostQueue(*g_context, *device, 0, - amd::CommandQueue::RealTimeDisabled, - amd::CommandQueue::Priority::Normal); + amd::HostQueue* queue = hip::getNullStream(); if (!queue) { return hipErrorOutOfMemory; } @@ -674,19 +648,13 @@ hipError_t hipMemcpyHtoA(hipArray* dstArray, size_t dstOffset, const void* srcHo command->awaitCompletion(); command->release(); - queue->release(); - return hipSuccess; } hipError_t hipMemcpyAtoH(void* dst, hipArray* srcArray, size_t srcOffset, size_t count) { HIP_INIT_API(dst, srcArray, srcOffset, count); - amd::Device* device = g_context->devices()[0]; - - amd::HostQueue* queue = new amd::HostQueue(*g_context, *device, 0, - amd::CommandQueue::RealTimeDisabled, - amd::CommandQueue::Priority::Normal); + amd::HostQueue* queue = hip::getNullStream(); if (!queue) { return hipErrorOutOfMemory; } @@ -704,19 +672,13 @@ hipError_t hipMemcpyAtoH(void* dst, hipArray* srcArray, size_t srcOffset, size_t command->awaitCompletion(); command->release(); - queue->release(); - return hipSuccess; } hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) { HIP_INIT_API(p); - amd::Device* device = g_context->devices()[0]; - - amd::HostQueue* queue = new amd::HostQueue(*g_context, *device, 0, - amd::CommandQueue::RealTimeDisabled, - amd::CommandQueue::Priority::Normal); + amd::HostQueue* queue = hip::getNullStream(); if (!queue) { return hipErrorOutOfMemory; } @@ -826,19 +788,13 @@ hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) { command->awaitCompletion(); command->release(); - queue->release(); - return hipSuccess; } hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height) { HIP_INIT_API(dst, pitch, value, width, height); - amd::Device* device = g_context->devices()[0]; - - amd::HostQueue* queue = new amd::HostQueue(*g_context, *device, 0, - amd::CommandQueue::RealTimeDisabled, - amd::CommandQueue::Priority::Normal); + amd::HostQueue* queue = hip::getNullStream(); if (!queue) { return hipErrorOutOfMemory; } @@ -862,19 +818,13 @@ hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t command->awaitCompletion(); command->release(); - queue->release(); - return hipSuccess; } hipError_t hipMemsetD8(hipDeviceptr_t dst, unsigned char value, size_t sizeBytes) { HIP_INIT_API(dst, value, sizeBytes); - amd::Device* device = g_context->devices()[0]; - - amd::HostQueue* queue = new amd::HostQueue(*g_context, *device, 0, - amd::CommandQueue::RealTimeDisabled, - amd::CommandQueue::Priority::Normal); + amd::HostQueue* queue = hip::getNullStream(); if (!queue) { return hipErrorOutOfMemory; } @@ -896,8 +846,6 @@ hipError_t hipMemsetD8(hipDeviceptr_t dst, unsigned char value, size_t sizeBytes command->awaitCompletion(); command->release(); - queue->release(); - return hipSuccess; } @@ -938,9 +886,13 @@ hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, hipChannel hipError_t hipHostGetDevicePointer(void** devicePointer, void* hostPointer, unsigned flags) { HIP_INIT_API(devicePointer, hostPointer, flags); - assert(0 && "Unimplemented"); + if (!amd::SvmBuffer::malloced(hostPointer)) { + return hipErrorInvalidValue; + } + // right now we have SVM + *devicePointer = hostPointer; - return hipErrorUnknown; + return hipSuccess; } hipError_t hipPointerGetAttributes(hipPointerAttribute_t* attributes, const void* ptr) { diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index 97723d52dc..0a5675114c 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -94,13 +94,13 @@ hipError_t hipModuleLoadData(hipModule_t *module, const void *image) hipError_t ihipModuleLoadData(hipModule_t *module, const void *image) { - amd::Program* program = new amd::Program(*g_context); + amd::Program* program = new amd::Program(*hip::getCurrentContext()); if (program == NULL) { return hipErrorOutOfMemory; } - if (CL_SUCCESS != program->addDeviceProgram(*g_context->devices()[0], image, ElfSize(image)) || - CL_SUCCESS != program->build(g_context->devices(), nullptr, nullptr, nullptr)) { + if (CL_SUCCESS != program->addDeviceProgram(*hip::getCurrentContext()->devices()[0], image, ElfSize(image)) || + CL_SUCCESS != program->build(hip::getCurrentContext()->devices(), nullptr, nullptr, nullptr)) { return hipErrorUnknown; } @@ -142,13 +142,11 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, kernelParams, extra); amd::Kernel* kernel = as_amd(reinterpret_cast(f)); - amd::Device* device = g_context->devices()[0]; + amd::Device* device = hip::getCurrentContext()->devices()[0]; amd::HostQueue* queue; if (hStream == nullptr) { - queue = new amd::HostQueue(*g_context, *device, 0, - amd::CommandQueue::RealTimeDisabled, - amd::CommandQueue::Priority::Normal); + queue = hip::getNullStream(); } else { queue = as_amd(reinterpret_cast(hStream))->asHostQueue(); } @@ -200,10 +198,6 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, command->awaitCompletion(); command->release(); - if (hStream == nullptr) { - queue->release(); - } - return hipSuccess; } diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index 7152458d38..9f7ccbe658 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -68,7 +68,7 @@ extern "C" hipModule_t __hipRegisterFatBinary(const void* data) return nullptr; } - amd::Program* program = new amd::Program(*g_context); + amd::Program* program = new amd::Program(*hip::getCurrentContext()); if (!program) return nullptr; @@ -84,15 +84,15 @@ extern "C" hipModule_t __hipRegisterFatBinary(const void* data) std::string target(desc->triple + sizeof(OPENMP_AMDGCN_AMDHSA_TRIPLE), desc->tripleSize - sizeof(OPENMP_AMDGCN_AMDHSA_TRIPLE)); - if (target.compare(g_context->devices()[0]->info().name_)) + if (target.compare(hip::getCurrentContext()->devices()[0]->info().name_)) continue; const void *image = reinterpret_cast( reinterpret_cast(obheader) + desc->offset); size_t size = desc->size; - if (CL_SUCCESS == program->addDeviceProgram(*g_context->devices()[0], image, size) && - CL_SUCCESS == program->build(g_context->devices(), nullptr, nullptr, nullptr)) + if (CL_SUCCESS == program->addDeviceProgram(*hip::getCurrentContext()->devices()[0], image, size) && + CL_SUCCESS == program->build(hip::getCurrentContext()->devices(), nullptr, nullptr, nullptr)) break; } @@ -372,7 +372,7 @@ const std::vector& modules() { std::string target(desc->triple + sizeof(HCC_AMDGCN_AMDHSA_TRIPLE), desc->tripleSize - sizeof(HCC_AMDGCN_AMDHSA_TRIPLE)); - if (!target.compare(g_context->devices()[0]->info().name_)) { + if (!target.compare(hip::getCurrentContext()->devices()[0]->info().name_)) { hipModule_t module; if (hipSuccess == hipModuleLoadData(&module, reinterpret_cast( reinterpret_cast(obheader) + desc->offset))) diff --git a/api/hip/hip_stream.cpp b/api/hip/hip_stream.cpp index 6d0da6adfc..6c2c29fb51 100644 --- a/api/hip/hip_stream.cpp +++ b/api/hip/hip_stream.cpp @@ -27,9 +27,9 @@ THE SOFTWARE. static hipError_t ihipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags) { assert(flags == 0); // we don't handle flags yet - amd::Device* device = g_context->devices()[0]; + amd::Device* device = hip::getCurrentContext()->devices()[0]; - amd::HostQueue* queue = new amd::HostQueue(*g_context, *device, 0, + amd::HostQueue* queue = new amd::HostQueue(*hip::getCurrentContext(), *device, 0, amd::CommandQueue::RealTimeDisabled, amd::CommandQueue::Priority::Normal); @@ -68,7 +68,14 @@ hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags) { hipError_t hipStreamSynchronize(hipStream_t stream) { HIP_INIT_API(stream); - amd::HostQueue* hostQueue = as_amd(reinterpret_cast(stream))->asHostQueue(); + amd::HostQueue* hostQueue; + + if (stream == nullptr) { + hostQueue = hip::getNullStream(); + } else { + hostQueue = as_amd(reinterpret_cast(stream))->asHostQueue(); + } + if (hostQueue == nullptr) { return hipErrorUnknown; } @@ -82,6 +89,10 @@ hipError_t hipStreamSynchronize(hipStream_t stream) { hipError_t hipStreamDestroy(hipStream_t stream) { HIP_INIT_API(stream); + if (stream == nullptr) { + return hipErrorInvalidResourceHandle; + } + as_amd(reinterpret_cast(stream))->release(); return hipSuccess; diff --git a/api/hip/hip_texture.cpp b/api/hip/hip_texture.cpp index 220332a57b..e619065f02 100644 --- a/api/hip/hip_texture.cpp +++ b/api/hip/hip_texture.cpp @@ -142,7 +142,9 @@ hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, const hipResou const hipResourceViewDesc* pResViewDesc) { HIP_INIT_API(pTexObject, pResDesc, pTexDesc, pResViewDesc); - if (!g_context->devices()[0]->info().imageSupport_) { + amd::Device* device = hip::getCurrentContext()->devices()[0]; + + if (!device->info().imageSupport_) { return hipErrorInvalidValue; } @@ -173,8 +175,9 @@ hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, const hipResou case hipArrayTextureGather: case hipArrayDefault: default: - image = new (*g_context) amd::Image(*memory->asBuffer(), CL_MEM_OBJECT_IMAGE2D, memory->getMemFlags(), imageFormat, - pResDesc->res.array.array->width, pResDesc->res.array.array->height, 1, 0, 0); + image = new (*hip::getCurrentContext()) amd::Image(*memory->asBuffer(), + CL_MEM_OBJECT_IMAGE2D, memory->getMemFlags(), imageFormat, + pResDesc->res.array.array->width, pResDesc->res.array.array->height, 1, 0, 0); break; } } @@ -186,17 +189,19 @@ hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, const hipResou assert(pResViewDesc == nullptr); memory = amd::SvmManager::FindSvmBuffer(pResDesc->res.linear.devPtr); - image = new (*g_context) amd::Image(*memory->asBuffer(), CL_MEM_OBJECT_IMAGE1D, memory->getMemFlags(), imageFormat, - pResDesc->res.linear.sizeInBytes / imageFormat.getElementSize(), 1, 1, - pResDesc->res.linear.sizeInBytes, 0); + image = new (*hip::getCurrentContext()) amd::Image(*memory->asBuffer(), + CL_MEM_OBJECT_IMAGE1D, memory->getMemFlags(), imageFormat, + pResDesc->res.linear.sizeInBytes / imageFormat.getElementSize(), 1, 1, + pResDesc->res.linear.sizeInBytes, 0); break; case hipResourceTypePitch2D: assert(pResViewDesc == nullptr); memory = amd::SvmManager::FindSvmBuffer(pResDesc->res.pitch2D.devPtr); - image = new (*g_context) amd::Image(*memory->asBuffer(), CL_MEM_OBJECT_IMAGE2D, memory->getMemFlags(), imageFormat, - pResDesc->res.pitch2D.width, pResDesc->res.pitch2D.height, 1, - pResDesc->res.pitch2D.pitchInBytes, 0); + image = new (*hip::getCurrentContext()) amd::Image(*memory->asBuffer(), + CL_MEM_OBJECT_IMAGE2D, memory->getMemFlags(), imageFormat, + pResDesc->res.pitch2D.width, pResDesc->res.pitch2D.height, 1, + pResDesc->res.pitch2D.pitchInBytes, 0); break; default: return hipErrorInvalidValue; } @@ -247,7 +252,7 @@ hipError_t ihipBindTexture(cl_mem_object_type type, if (tex == nullptr) { return hipErrorInvalidImage; } - if (g_context) { + if (hip::getCurrentContext()) { cl_image_format image_format; if (nullptr == desc) { @@ -260,8 +265,8 @@ hipError_t ihipBindTexture(cl_mem_object_type type, const amd::Image::Format imageFormat(image_format); amd::Memory* memory = amd::SvmManager::FindSvmBuffer(devPtr); - amd::Image* image = new (*g_context) amd::Image(*memory->asBuffer(), type, memory->getMemFlags(), - imageFormat, width, height, 1, pitch, 0); + amd::Image* image = new (*hip::getCurrentContext()) amd::Image(*memory->asBuffer(), + type, memory->getMemFlags(), imageFormat, width, height, 1, pitch, 0); *offset = 0; if (tex->textureObject) { From 9c7b32810112a96c778202878f77a0ddf41014f6 Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 1 May 2018 18:46:32 -0400 Subject: [PATCH 037/282] P4 to Git Change 1548558 by skudchad@skudchad_test2_win_opencl on 2018/05/01 18:39:58 SWDEV-145570 - [HIP] - Implement some of hipMem*Async and hipMemSet2D APIs. Part 4. ReviewBoardURL = http://ocltc.amd.com/reviews/r/14743/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#16 edit --- api/hip/hip_memory.cpp | 157 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 147 insertions(+), 10 deletions(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 3a4af23dee..10299de971 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -26,8 +26,15 @@ THE SOFTWARE. #include "platform/command.hpp" #include "platform/memory.hpp" -extern void getChannelOrderAndType(const hipChannelFormatDesc& desc, enum hipTextureReadMode readMode, - cl_channel_order* channelOrder, cl_channel_type* channelType); +extern void getChannelOrderAndType(const hipChannelFormatDesc& desc, + enum hipTextureReadMode readMode, + cl_channel_order* channelOrder, + cl_channel_type* channelType); + +extern void getDrvChannelOrderAndType(const enum hipArray_Format Format, + unsigned int NumChannels, + cl_channel_order* channelOrder, + cl_channel_type* channelType); hipError_t ihipMalloc(void** ptr, size_t sizeBytes, unsigned int flags) { @@ -122,9 +129,35 @@ hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t stream) { HIP_INIT_API(dst, value, sizeBytes, stream); - assert(0 && "Unimplemented"); + amd::HostQueue* queue; - return hipErrorUnknown; + if (stream == nullptr) { + queue = hip::getNullStream(); + } else { + queue = as_amd(reinterpret_cast(stream))->asHostQueue(); + } + + if (!queue) { + return hipErrorOutOfMemory; + } + + amd::Command::EventWaitList waitList; + amd::Memory* memory = amd::SvmManager::FindSvmBuffer(dst); + + amd::Coord3D fillOffset(0, 0, 0); + amd::Coord3D fillSize(sizeBytes, 1, 1); + amd::FillMemoryCommand* command = + new amd::FillMemoryCommand(*queue, CL_COMMAND_FILL_BUFFER, waitList, *memory->asBuffer(), + &value, sizeof(int), fillOffset, fillSize); + + if (!command) { + return hipErrorOutOfMemory; + } + + command->enqueue(); + command->release(); + + return hipSuccess; } hipError_t hipMemset(void* dst, int value, size_t sizeBytes) { @@ -294,9 +327,34 @@ hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent) { hipError_t hipArrayCreate(hipArray** array, const HIP_ARRAY_DESCRIPTOR* pAllocateArray) { HIP_INIT_API(array, pAllocateArray); - assert(0 && "Unimplemented"); + if (array[0]->width == 0) { + return hipErrorInvalidValue; + } - return hipErrorUnknown; + *array = (hipArray*)malloc(sizeof(hipArray)); + array[0]->drvDesc = *pAllocateArray; + array[0]->width = pAllocateArray->width; + array[0]->height = pAllocateArray->height; + array[0]->isDrv = true; + array[0]->textureType = hipTextureType2D; + void** ptr = &array[0]->data; + + cl_channel_order channelOrder; + cl_channel_type channelType; + getDrvChannelOrderAndType(pAllocateArray->format, pAllocateArray->numChannels, + &channelOrder, &channelType); + + const cl_image_format image_format = { channelOrder, channelType }; + size_t size = pAllocateArray->width; + if (pAllocateArray->height > 0) { + size = size * pAllocateArray->height; + } + + size_t pitch = 0; + hipError_t status = ihipMallocPitch(ptr, &pitch, array[0]->width, array[0]->height, 1, CL_MEM_OBJECT_IMAGE2D, + &image_format); + + return status; } hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc, @@ -477,9 +535,44 @@ hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream) { HIP_INIT_API(dst, src, sizeBytes, kind, stream); - assert(0 && "Unimplemented"); + amd::Command* command = nullptr; + amd::Command::EventWaitList waitList; + amd::Memory* memory; + amd::HostQueue* queue; - return hipErrorUnknown; + if (stream == nullptr) { + queue = hip::getNullStream(); + } else { + queue = as_amd(reinterpret_cast(stream))->asHostQueue(); + } + + if (!queue) { + return hipErrorOutOfMemory; + } + + switch (kind) { + case hipMemcpyDeviceToHost: + memory = amd::SvmManager::FindSvmBuffer(src); + command = new amd::ReadMemoryCommand(*queue, CL_COMMAND_READ_BUFFER, waitList, + *memory->asBuffer(), 0, sizeBytes, dst); + break; + case hipMemcpyHostToDevice: + memory = amd::SvmManager::FindSvmBuffer(dst); + command = new amd::WriteMemoryCommand(*queue, CL_COMMAND_WRITE_BUFFER, waitList, + *memory->asBuffer(), 0, sizeBytes, src); + break; + default: + assert(!"Shouldn't reach here"); + break; + } + if (!command) { + return hipErrorOutOfMemory; + } + + command->enqueue(); + command->release(); + + return hipSuccess; } @@ -514,9 +607,52 @@ hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t height, hipMemcpyKind kind) { HIP_INIT_API(dst, dpitch, src, spitch, width, height, kind); - assert(0 && "Unimplemented"); + amd::HostQueue* queue = hip::getNullStream(); + if (!queue) { + return hipErrorOutOfMemory; + } - return hipErrorUnknown; + // Create buffer rectangle info structure + amd::BufferRect srcRect; + amd::BufferRect dstRect; + amd::Memory* srcPtr = amd::SvmManager::FindSvmBuffer(src); + amd::Memory* dstPtr = amd::SvmManager::FindSvmBuffer(dst); + size_t region[3] = {width, height, 0}; + size_t src_slice_pitch = spitch * height; + size_t dst_slice_pitch = dpitch * height; + size_t origin[3] = { }; + + if (!srcRect.create(origin, region, spitch, src_slice_pitch) || + !dstRect.create(origin, region, dpitch, dst_slice_pitch)) { + return hipErrorInvalidValue; + } + + amd::Coord3D srcStart(srcRect.start_, 0, 0); + amd::Coord3D dstStart(dstRect.start_, 0, 0); + amd::Coord3D srcEnd(srcRect.end_, 1, 1); + amd::Coord3D dstEnd(dstRect.end_, 1, 1); + + if (!srcPtr->asBuffer()->validateRegion(srcStart, srcEnd) || + !dstPtr->asBuffer()->validateRegion(dstStart, dstEnd)) { + return hipErrorInvalidValue; + } + + amd::Command::EventWaitList waitList; + amd::Coord3D size(region[0], region[1], region[2]); + + amd::CopyMemoryCommand* command = + new amd::CopyMemoryCommand(*queue, CL_COMMAND_COPY_BUFFER_RECT, waitList, *srcPtr->asBuffer(), + *dstPtr->asBuffer(), srcStart, dstStart, size, srcRect, dstRect); + + if (!command) { + return hipErrorOutOfMemory; + } + + command->enqueue(); + command->awaitCompletion(); + command->release(); + + return hipSuccess; } hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy) { @@ -679,6 +815,7 @@ hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) { HIP_INIT_API(p); amd::HostQueue* queue = hip::getNullStream(); + if (!queue) { return hipErrorOutOfMemory; } From 5f549b62e8ff9746199c922245c8c0ad2cbd67ae Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 1 May 2018 19:01:10 -0400 Subject: [PATCH 038/282] P4 to Git Change 1548560 by skudchad@skudchad_rocm on 2018/05/01 18:53:54 SWDEV-145570 - [HIP] - Fix hipDeviceSynchronize. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device_runtime.cpp#8 edit --- api/hip/hip_device_runtime.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/api/hip/hip_device_runtime.cpp b/api/hip/hip_device_runtime.cpp index 77d90d2cb0..c7a4d4913d 100644 --- a/api/hip/hip_device_runtime.cpp +++ b/api/hip/hip_device_runtime.cpp @@ -375,6 +375,13 @@ hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config ) { } hipError_t hipDeviceSynchronize ( void ) { + amd::HostQueue* queue = hip::getNullStream(); + + if (!queue) { + return hipErrorOutOfMemory; + } + + queue->finish(); return hipSuccess; } From 0ddf28890417def0c33ec2ff33730fd66867eb9f Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 2 May 2018 19:14:52 -0400 Subject: [PATCH 039/282] P4 to Git Change 1549012 by lmoriche@lmoriche_opencl_dev2 on 2018/05/02 16:56:02 SWDEV-145570 - [HIP] - Add hipMemset2DAsync, fix the HCC triple for device binaries (make sure to update to the latest HCC). Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.def.in#4 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.map.in#4 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#17 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#10 edit --- api/hip/hip_hcc.def.in | 1 + api/hip/hip_hcc.map.in | 1 + api/hip/hip_memory.cpp | 9 +++++++++ api/hip/hip_platform.cpp | 2 +- 4 files changed, 12 insertions(+), 1 deletion(-) diff --git a/api/hip/hip_hcc.def.in b/api/hip/hip_hcc.def.in index e7fd04ad04..d2916f3885 100644 --- a/api/hip/hip_hcc.def.in +++ b/api/hip/hip_hcc.def.in @@ -98,6 +98,7 @@ hipMemPtrGetInfo hipMemset hipMemset2D hipMemsetAsync +hipMemset2DAsync hipMemsetD8 hipModuleGetFunction hipModuleGetGlobal diff --git a/api/hip/hip_hcc.map.in b/api/hip/hip_hcc.map.in index d0bc6b4618..761641425e 100644 --- a/api/hip/hip_hcc.map.in +++ b/api/hip/hip_hcc.map.in @@ -99,6 +99,7 @@ global: hipMemset; hipMemset2D; hipMemsetAsync; + hipMemset2DAsync; hipMemsetD8; hipModuleGetFunction; hipModuleGetGlobal; diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 10299de971..921cfbe288 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -958,6 +958,15 @@ hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t return hipSuccess; } +hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value, size_t width, size_t height, + hipStream_t stream) { + HIP_INIT_API(dst, pitch, value, width, height, stream); + + assert(0 && "Unimplemented"); + + return hipErrorUnknown; +} + hipError_t hipMemsetD8(hipDeviceptr_t dst, unsigned char value, size_t sizeBytes) { HIP_INIT_API(dst, value, sizeBytes); diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index 9f7ccbe658..386d711275 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -40,7 +40,7 @@ struct __CudaFatBinaryWrapper { #define CLANG_OFFLOAD_BUNDLER_MAGIC_STR "__CLANG_OFFLOAD_BUNDLE__" #define OPENMP_AMDGCN_AMDHSA_TRIPLE "openmp-amdgcn--amdhsa" -#define HCC_AMDGCN_AMDHSA_TRIPLE "hcc-amdgcn--amdhsa" +#define HCC_AMDGCN_AMDHSA_TRIPLE "hcc-amdgcn-amd-amdhsa-" struct __ClangOffloadBundleDesc { uint64_t offset; From 445d300d6326dadc32f2e99a5d3035fea23e3742 Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 2 May 2018 21:08:53 -0400 Subject: [PATCH 040/282] P4 to Git Change 1549060 by skudchad@skudchad_test2_win_opencl on 2018/05/02 17:36:53 SWDEV-145570 - [HIP] - Add hipMemcpyDeviceToDevice/hipMemcpyHostToHost/hipMemcpyDefault cases to hipMemcpy/hipMemcpyAsync, hipMemset2DAsync ReviewBoardURL = http://ocltc.amd.com/reviews/r/14746/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#18 edit --- api/hip/hip_memory.cpp | 160 +++++++++++++++++++++++++++++------------ 1 file changed, 114 insertions(+), 46 deletions(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 921cfbe288..45435d2e3f 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -42,7 +42,7 @@ hipError_t ihipMalloc(void** ptr, size_t sizeBytes, unsigned int flags) *ptr = nullptr; return hipSuccess; } - else if (!ptr) { + else if (ptr == nullptr) { return hipErrorInvalidValue; } @@ -51,7 +51,7 @@ hipError_t ihipMalloc(void** ptr, size_t sizeBytes, unsigned int flags) } *ptr = amd::SvmBuffer::malloc(*hip::getCurrentContext(), flags, sizeBytes, hip::getCurrentContext()->devices()[0]->info().memBaseAddrAlign_); - if (!*ptr) { + if (*ptr == nullptr) { return hipErrorOutOfMemory; } @@ -83,30 +83,52 @@ hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind amd::HostQueue* queue = hip::getNullStream(); - if (!queue) { + if (queue == nullptr) { return hipErrorOutOfMemory; } amd::Command* command = nullptr; amd::Command::EventWaitList waitList; - amd::Memory* memory; + amd::Memory *srcMemory = nullptr; + amd::Memory *dstMemory = nullptr; + + srcMemory = amd::SvmManager::FindSvmBuffer(src); + dstMemory = amd::SvmManager::FindSvmBuffer(dst); + + if (kind == hipMemcpyDefault) { + // Determine kind on VA + if (srcMemory == nullptr && dstMemory != nullptr) { + kind = hipMemcpyHostToDevice; + } else if (srcMemory != nullptr && dstMemory == nullptr) { + kind = hipMemcpyDeviceToHost; + } else if (srcMemory != nullptr && dstMemory != nullptr) { + kind = hipMemcpyDeviceToDevice; + } else { + kind = hipMemcpyHostToHost; + } + } switch (kind) { case hipMemcpyDeviceToHost: - memory = amd::SvmManager::FindSvmBuffer(src); command = new amd::ReadMemoryCommand(*queue, CL_COMMAND_READ_BUFFER, waitList, - *memory->asBuffer(), 0, sizeBytes, dst); + *srcMemory->asBuffer(), 0, sizeBytes, dst); break; case hipMemcpyHostToDevice: - memory = amd::SvmManager::FindSvmBuffer(dst); command = new amd::WriteMemoryCommand(*queue, CL_COMMAND_WRITE_BUFFER, waitList, - *memory->asBuffer(), 0, sizeBytes, src); + *dstMemory->asBuffer(), 0, sizeBytes, src); break; + case hipMemcpyDeviceToDevice: + command = new amd::CopyMemoryCommand(*queue, CL_COMMAND_COPY_BUFFER, waitList, + *srcMemory->asBuffer(),*dstMemory->asBuffer(), 0, 0, sizeBytes); + break; + case hipMemcpyHostToHost: + memcpy(dst, src, sizeBytes); + return hipSuccess; default: assert(!"Shouldn't reach here"); break; } - if (!command) { + if (command == nullptr) { return hipErrorOutOfMemory; } @@ -133,14 +155,13 @@ hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t st if (stream == nullptr) { queue = hip::getNullStream(); + if (queue == nullptr) { + return hipErrorOutOfMemory; + } } else { queue = as_amd(reinterpret_cast(stream))->asHostQueue(); } - if (!queue) { - return hipErrorOutOfMemory; - } - amd::Command::EventWaitList waitList; amd::Memory* memory = amd::SvmManager::FindSvmBuffer(dst); @@ -150,7 +171,7 @@ hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t st new amd::FillMemoryCommand(*queue, CL_COMMAND_FILL_BUFFER, waitList, *memory->asBuffer(), &value, sizeof(int), fillOffset, fillSize); - if (!command) { + if (command == nullptr) { return hipErrorOutOfMemory; } @@ -165,7 +186,7 @@ hipError_t hipMemset(void* dst, int value, size_t sizeBytes) { amd::HostQueue* queue = hip::getNullStream(); - if (!queue) { + if (queue == nullptr) { return hipErrorOutOfMemory; } @@ -179,7 +200,7 @@ hipError_t hipMemset(void* dst, int value, size_t sizeBytes) { new amd::FillMemoryCommand(*queue, CL_COMMAND_FILL_BUFFER, waitList, *memory->asBuffer(), &value, sizeof(int), fillOffset, fillSize); - if (!command) { + if (command == nullptr) { return hipErrorOutOfMemory; } @@ -246,7 +267,7 @@ hipError_t hipMemGetInfo(size_t* free, size_t* total) { size_t freeMemory[2]; amd::Device* device = hip::getCurrentContext()->devices()[0]; - if(!device) { + if(device == nullptr) { return hipErrorInvalidDevice; } @@ -286,7 +307,7 @@ hipError_t ihipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t heigh *ptr = amd::SvmBuffer::malloc(*hip::getCurrentContext(), CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeBytes, device->info().memBaseAddrAlign_); - if (!*ptr) { + if (*ptr == nullptr) { return hipErrorMemoryAllocation; } @@ -537,35 +558,56 @@ hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, amd::Command* command = nullptr; amd::Command::EventWaitList waitList; - amd::Memory* memory; amd::HostQueue* queue; + amd::Memory *srcMemory = nullptr; + amd::Memory *dstMemory = nullptr; if (stream == nullptr) { queue = hip::getNullStream(); + if (queue == nullptr) { + return hipErrorOutOfMemory; + } } else { queue = as_amd(reinterpret_cast(stream))->asHostQueue(); } - if (!queue) { - return hipErrorOutOfMemory; + srcMemory = amd::SvmManager::FindSvmBuffer(src); + dstMemory = amd::SvmManager::FindSvmBuffer(dst); + + if (kind == hipMemcpyDefault) { + // Determine kind on VA + if (srcMemory == nullptr && dstMemory != nullptr) { + kind = hipMemcpyHostToDevice; + } else if (srcMemory != nullptr && dstMemory == nullptr) { + kind = hipMemcpyDeviceToHost; + } else if (srcMemory != nullptr && dstMemory != nullptr) { + kind = hipMemcpyDeviceToDevice; + } else { + kind = hipMemcpyHostToHost; + } } switch (kind) { case hipMemcpyDeviceToHost: - memory = amd::SvmManager::FindSvmBuffer(src); command = new amd::ReadMemoryCommand(*queue, CL_COMMAND_READ_BUFFER, waitList, - *memory->asBuffer(), 0, sizeBytes, dst); + *srcMemory->asBuffer(), 0, sizeBytes, dst); break; case hipMemcpyHostToDevice: - memory = amd::SvmManager::FindSvmBuffer(dst); command = new amd::WriteMemoryCommand(*queue, CL_COMMAND_WRITE_BUFFER, waitList, - *memory->asBuffer(), 0, sizeBytes, src); + *dstMemory->asBuffer(), 0, sizeBytes, src); break; + case hipMemcpyDeviceToDevice: + command = new amd::CopyMemoryCommand(*queue, CL_COMMAND_COPY_BUFFER, waitList, + *srcMemory->asBuffer(),*dstMemory->asBuffer(), 0, 0, sizeBytes); + break; + case hipMemcpyHostToHost: + memcpy(dst, src, sizeBytes); + return hipSuccess; default: assert(!"Shouldn't reach here"); break; } - if (!command) { + if (command == nullptr) { return hipErrorOutOfMemory; } @@ -608,7 +650,7 @@ hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, HIP_INIT_API(dst, dpitch, src, spitch, width, height, kind); amd::HostQueue* queue = hip::getNullStream(); - if (!queue) { + if (queue == nullptr) { return hipErrorOutOfMemory; } @@ -644,7 +686,7 @@ hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, new amd::CopyMemoryCommand(*queue, CL_COMMAND_COPY_BUFFER_RECT, waitList, *srcPtr->asBuffer(), *dstPtr->asBuffer(), srcStart, dstStart, size, srcRect, dstRect); - if (!command) { + if (command == nullptr) { return hipErrorOutOfMemory; } @@ -686,7 +728,7 @@ hipError_t hipMemcpyToArray(hipArray* dstArray, size_t wOffset, size_t hOffset, HIP_INIT_API(dstArray, wOffset, hOffset, src, count, kind); amd::HostQueue* queue = hip::getNullStream(); - if (!queue) { + if (queue == nullptr) { return hipErrorOutOfMemory; } @@ -711,7 +753,7 @@ hipError_t hipMemcpyToArray(hipArray* dstArray, size_t wOffset, size_t hOffset, assert(!"Shouldn't reach here"); break; } - if (!command) { + if (command == nullptr) { return hipErrorOutOfMemory; } @@ -727,7 +769,7 @@ hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t srcArray, size_t wOffs HIP_INIT_API(dst, srcArray, wOffset, hOffset, count, kind); amd::HostQueue* queue = hip::getNullStream(); - if (!queue) { + if (queue == nullptr) { return hipErrorOutOfMemory; } @@ -752,7 +794,7 @@ hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t srcArray, size_t wOffs assert(!"Shouldn't reach here"); break; } - if (!command) { + if (command == nullptr) { return hipErrorOutOfMemory; } @@ -767,7 +809,7 @@ hipError_t hipMemcpyHtoA(hipArray* dstArray, size_t dstOffset, const void* srcHo HIP_INIT_API(dstArray, dstOffset, srcHost, count); amd::HostQueue* queue = hip::getNullStream(); - if (!queue) { + if (queue == nullptr) { return hipErrorOutOfMemory; } @@ -776,7 +818,7 @@ hipError_t hipMemcpyHtoA(hipArray* dstArray, size_t dstOffset, const void* srcHo amd::Command* command = new amd::WriteMemoryCommand(*queue, CL_COMMAND_WRITE_BUFFER, waitList, *memory->asBuffer(), dstOffset, count, srcHost); - if (!command) { + if (command == nullptr) { return hipErrorOutOfMemory; } @@ -791,7 +833,7 @@ hipError_t hipMemcpyAtoH(void* dst, hipArray* srcArray, size_t srcOffset, size_t HIP_INIT_API(dst, srcArray, srcOffset, count); amd::HostQueue* queue = hip::getNullStream(); - if (!queue) { + if (queue == nullptr) { return hipErrorOutOfMemory; } @@ -800,7 +842,7 @@ hipError_t hipMemcpyAtoH(void* dst, hipArray* srcArray, size_t srcOffset, size_t amd::Command* command = new amd::ReadMemoryCommand(*queue, CL_COMMAND_READ_BUFFER, waitList, *memory->asBuffer(), srcOffset, count, dst); - if (!command) { + if (command == nullptr) { return hipErrorOutOfMemory; } @@ -816,7 +858,7 @@ hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) { amd::HostQueue* queue = hip::getNullStream(); - if (!queue) { + if (queue == nullptr) { return hipErrorOutOfMemory; } @@ -917,7 +959,7 @@ hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) { new amd::CopyMemoryCommand(*queue, CL_COMMAND_COPY_BUFFER_RECT, waitList, *src->asBuffer(), *dst->asBuffer(), srcStart, dstStart, size, srcRect, dstRect); - if (!command) { + if (command == nullptr) { return hipErrorOutOfMemory; } @@ -932,7 +974,7 @@ hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t HIP_INIT_API(dst, pitch, value, width, height); amd::HostQueue* queue = hip::getNullStream(); - if (!queue) { + if (queue == nullptr) { return hipErrorOutOfMemory; } @@ -947,7 +989,7 @@ hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t new amd::FillMemoryCommand(*queue, CL_COMMAND_FILL_BUFFER, waitList, *memory->asBuffer(), &value, sizeof(int), fillOffset, fillSize); - if (!command) { + if (command == nullptr) { return hipErrorOutOfMemory; } @@ -958,20 +1000,46 @@ hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t return hipSuccess; } -hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value, size_t width, size_t height, - hipStream_t stream) { +hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value, + size_t width, size_t height, hipStream_t stream) { HIP_INIT_API(dst, pitch, value, width, height, stream); - assert(0 && "Unimplemented"); + amd::HostQueue* queue == nullptr; + if (stream == nullptr) { + queue = hip::getNullStream(); + if (queue == nullptr) { + return hipErrorOutOfMemory; + } + } else { + queue = as_amd(reinterpret_cast(stream))->asHostQueue(); + } - return hipErrorUnknown; + amd::Command::EventWaitList waitList; + amd::Memory* memory = amd::SvmManager::FindSvmBuffer(dst); + + amd::Coord3D fillOffset(0, 0, 0); + + size_t sizeBytes = pitch * height; + amd::Coord3D fillSize(sizeBytes, 1, 1); + amd::FillMemoryCommand* command = + new amd::FillMemoryCommand(*queue, CL_COMMAND_FILL_BUFFER, waitList, *memory->asBuffer(), + &value, sizeof(int), fillOffset, fillSize); + + if (command == nullptr) { + return hipErrorOutOfMemory; + } + + command->enqueue(); + command->release(); + + return hipSuccess; } hipError_t hipMemsetD8(hipDeviceptr_t dst, unsigned char value, size_t sizeBytes) { HIP_INIT_API(dst, value, sizeBytes); amd::HostQueue* queue = hip::getNullStream(); - if (!queue) { + if (queue == nullptr) { return hipErrorOutOfMemory; } @@ -984,7 +1052,7 @@ hipError_t hipMemsetD8(hipDeviceptr_t dst, unsigned char value, size_t sizeBytes new amd::FillMemoryCommand(*queue, CL_COMMAND_FILL_BUFFER, waitList, *memory->asBuffer(), &value, sizeof(char), fillOffset, fillSize); - if (!command) { + if (command == nullptr) { return hipErrorOutOfMemory; } From a10512c51028d9392ba899c83093b6d326ce5d35 Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 2 May 2018 22:25:53 -0400 Subject: [PATCH 041/282] P4 to Git Change 1549067 by skudchad@skudchad_rocm on 2018/05/02 17:40:37 SWDEV-145570 - [HIP] - Fix build Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#19 edit --- api/hip/hip_memory.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 45435d2e3f..52a6c7be2c 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -1004,7 +1004,7 @@ hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value, size_t width, size_t height, hipStream_t stream) { HIP_INIT_API(dst, pitch, value, width, height, stream); - amd::HostQueue* queue == nullptr; + amd::HostQueue* queue = nullptr; if (stream == nullptr) { queue = hip::getNullStream(); if (queue == nullptr) { From 3ad5f2a01b54165086f7559203c268f71f31477a Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 3 May 2018 18:26:45 -0400 Subject: [PATCH 042/282] P4 to Git Change 1549965 by skudchad@skudchad_test2_win_opencl on 2018/05/03 18:15:18 SWDEV-145570 - [HIP] - Minor fixes. Use coarse grain for hipMallocPitch ReviewBoardURL = http://ocltc.amd.com/reviews/r/14768/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#20 edit --- api/hip/hip_memory.cpp | 68 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 55 insertions(+), 13 deletions(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 52a6c7be2c..c1d00c096f 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -89,11 +89,8 @@ hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind amd::Command* command = nullptr; amd::Command::EventWaitList waitList; - amd::Memory *srcMemory = nullptr; - amd::Memory *dstMemory = nullptr; - - srcMemory = amd::SvmManager::FindSvmBuffer(src); - dstMemory = amd::SvmManager::FindSvmBuffer(dst); + amd::Memory *srcMemory = amd::SvmManager::FindSvmBuffer(src);; + amd::Memory *dstMemory = amd::SvmManager::FindSvmBuffer(dst); if (kind == hipMemcpyDefault) { // Determine kind on VA @@ -304,7 +301,7 @@ hipError_t ihipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t heigh *pitch = width * imageFormat.getElementSize(); size_t sizeBytes = *pitch * height * depth; - *ptr = amd::SvmBuffer::malloc(*hip::getCurrentContext(), CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeBytes, + *ptr = amd::SvmBuffer::malloc(*hip::getCurrentContext(), CL_MEM_SVM_COARSE_GRAIN_BUFFER, sizeBytes, device->info().memBaseAddrAlign_); if (*ptr == nullptr) { @@ -559,8 +556,8 @@ hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, amd::Command* command = nullptr; amd::Command::EventWaitList waitList; amd::HostQueue* queue; - amd::Memory *srcMemory = nullptr; - amd::Memory *dstMemory = nullptr; + amd::Memory *srcMemory = amd::SvmManager::FindSvmBuffer(src);; + amd::Memory *dstMemory = amd::SvmManager::FindSvmBuffer(dst); if (stream == nullptr) { queue = hip::getNullStream(); @@ -571,9 +568,6 @@ hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, queue = as_amd(reinterpret_cast(stream))->asHostQueue(); } - srcMemory = amd::SvmManager::FindSvmBuffer(src); - dstMemory = amd::SvmManager::FindSvmBuffer(dst); - if (kind == hipMemcpyDefault) { // Determine kind on VA if (srcMemory == nullptr && dstMemory != nullptr) { @@ -709,9 +703,57 @@ hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t sp size_t height, hipMemcpyKind kind, hipStream_t stream) { HIP_INIT_API(dst, dpitch, src, spitch, width, height, kind, stream); - assert(0 && "Unimplemented"); + amd::HostQueue* queue; - return hipErrorUnknown; + if (stream == nullptr) { + queue = hip::getNullStream(); + if (queue == nullptr) { + return hipErrorOutOfMemory; + } + } else { + queue = as_amd(reinterpret_cast(stream))->asHostQueue(); + } + + // Create buffer rectangle info structure + amd::BufferRect srcRect; + amd::BufferRect dstRect; + amd::Memory* srcPtr = amd::SvmManager::FindSvmBuffer(src); + amd::Memory* dstPtr = amd::SvmManager::FindSvmBuffer(dst); + size_t region[3] = {width, height, 0}; + size_t src_slice_pitch = spitch * height; + size_t dst_slice_pitch = dpitch * height; + size_t origin[3] = { }; + + if (!srcRect.create(origin, region, spitch, src_slice_pitch) || + !dstRect.create(origin, region, dpitch, dst_slice_pitch)) { + return hipErrorInvalidValue; + } + + amd::Coord3D srcStart(srcRect.start_, 0, 0); + amd::Coord3D dstStart(dstRect.start_, 0, 0); + amd::Coord3D srcEnd(srcRect.end_, 1, 1); + amd::Coord3D dstEnd(dstRect.end_, 1, 1); + + if (!srcPtr->asBuffer()->validateRegion(srcStart, srcEnd) || + !dstPtr->asBuffer()->validateRegion(dstStart, dstEnd)) { + return hipErrorInvalidValue; + } + + amd::Command::EventWaitList waitList; + amd::Coord3D size(region[0], region[1], region[2]); + + amd::CopyMemoryCommand* command = + new amd::CopyMemoryCommand(*queue, CL_COMMAND_COPY_BUFFER_RECT, waitList, *srcPtr->asBuffer(), + *dstPtr->asBuffer(), srcStart, dstStart, size, srcRect, dstRect); + + if (command == nullptr) { + return hipErrorOutOfMemory; + } + + command->enqueue(); + command->release(); + + return hipSuccess; } hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src, From 99d3444f004abcdd6b7df03129ab1c3af2b2a874 Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 3 May 2018 18:56:01 -0400 Subject: [PATCH 043/282] P4 to Git Change 1549971 by skudchad@skudchad_rocm on 2018/05/03 18:39:37 SWDEV-145570 - [HIP] - Fix build Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#21 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.hpp#21 edit --- api/hip/hip_memory.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index c1d00c096f..9be5f5745f 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -301,7 +301,7 @@ hipError_t ihipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t heigh *pitch = width * imageFormat.getElementSize(); size_t sizeBytes = *pitch * height * depth; - *ptr = amd::SvmBuffer::malloc(*hip::getCurrentContext(), CL_MEM_SVM_COARSE_GRAIN_BUFFER, sizeBytes, + *ptr = amd::SvmBuffer::malloc(*hip::getCurrentContext(), 0, sizeBytes, device->info().memBaseAddrAlign_); if (*ptr == nullptr) { From 0508a4ed375e599538be2678cbfaab8720b94b2b Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 3 May 2018 21:26:33 -0400 Subject: [PATCH 044/282] P4 to Git Change 1550010 by cpaquot@cpaquot-ocl-lc-lnx on 2018/05/03 21:16:06 SWDEV-145570 - [HIP] Fixed region initialization Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#22 edit --- api/hip/hip_memory.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 9be5f5745f..b0290ae005 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -653,7 +653,7 @@ hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, amd::BufferRect dstRect; amd::Memory* srcPtr = amd::SvmManager::FindSvmBuffer(src); amd::Memory* dstPtr = amd::SvmManager::FindSvmBuffer(dst); - size_t region[3] = {width, height, 0}; + size_t region[3] = {width, height, 1}; size_t src_slice_pitch = spitch * height; size_t dst_slice_pitch = dpitch * height; size_t origin[3] = { }; @@ -719,7 +719,7 @@ hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t sp amd::BufferRect dstRect; amd::Memory* srcPtr = amd::SvmManager::FindSvmBuffer(src); amd::Memory* dstPtr = amd::SvmManager::FindSvmBuffer(dst); - size_t region[3] = {width, height, 0}; + size_t region[3] = {width, height, 1}; size_t src_slice_pitch = spitch * height; size_t dst_slice_pitch = dpitch * height; size_t origin[3] = { }; From 9129aa17f6d6f6a95294aca43726027fe8746e4c Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 4 May 2018 14:13:13 -0400 Subject: [PATCH 045/282] P4 to Git Change 1550486 by cpaquot@cpaquot-ocl-lc-lnx on 2018/05/04 14:00:33 SWDEV-145570 - [HIP] Fix multithread init Make the g_ihipInitialized variable per thread And make sure to assign a default g_context Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_context.cpp#10 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#9 edit --- api/hip/hip_context.cpp | 3 +-- api/hip/hip_internal.hpp | 17 ++++++++++------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/api/hip/hip_context.cpp b/api/hip/hip_context.cpp index 46e4c864ff..dfeacdc924 100644 --- a/api/hip/hip_context.cpp +++ b/api/hip/hip_context.cpp @@ -28,12 +28,12 @@ THE SOFTWARE. #include std::vector g_devices; -std::once_flag g_ihipInitialized; namespace hip { thread_local amd::Context* g_context = nullptr; thread_local std::stack g_ctxtStack; +std::once_flag g_ihipInitialized; std::map g_nullStreams; @@ -53,7 +53,6 @@ void init() { context->release(); } else { g_devices.push_back(context); - g_context = context; } } } diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index 489b8b620f..099aa6ca25 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -28,26 +28,29 @@ THE SOFTWARE. #include #define HIP_INIT() \ - std::call_once(g_ihipInitialized, hip::init); - + std::call_once(hip::g_ihipInitialized, hip::init); \ + assert(g_devices.size() > 0); \ + if (hip::g_context == nullptr) { \ + hip::g_context = g_devices[0]; \ + } // This macro should be called at the beginning of every HIP API. #define HIP_INIT_API(...) \ - HIP_INIT(); \ - \ amd::Thread* thread = amd::Thread::current(); \ if (!CL_CHECK_THREAD(thread)) { \ return hipErrorOutOfMemory; \ - } + } \ + HIP_INIT(); namespace hc { class accelerator; class accelerator_view; }; -extern std::once_flag g_ihipInitialized; - namespace hip { + extern std::once_flag g_ihipInitialized; + extern thread_local amd::Context* g_context; + extern void init(); extern amd::Context* getCurrentContext(); From 65b557ecce7fbf3bdd763a08eb5814507f3d768a Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 4 May 2018 15:12:48 -0400 Subject: [PATCH 046/282] P4 to Git Change 1550526 by cpaquot@cpaquot-ocl-lc-lnx on 2018/05/04 15:00:44 SWDEV-145570 - [HIP] Missing HIP_INIT_API macro in hipDeviceSynchronize. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device_runtime.cpp#9 edit --- api/hip/hip_device_runtime.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/api/hip/hip_device_runtime.cpp b/api/hip/hip_device_runtime.cpp index c7a4d4913d..c84abe02da 100644 --- a/api/hip/hip_device_runtime.cpp +++ b/api/hip/hip_device_runtime.cpp @@ -375,6 +375,8 @@ hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config ) { } hipError_t hipDeviceSynchronize ( void ) { + HIP_INIT_API(); + amd::HostQueue* queue = hip::getNullStream(); if (!queue) { From 2c5606ea4456a739e7114caafb7a01823cfe70db Mon Sep 17 00:00:00 2001 From: foreman Date: Sat, 5 May 2018 00:34:05 -0400 Subject: [PATCH 047/282] P4 to Git Change 1550648 by skudchad@skudchad_test2_win_opencl on 2018/05/04 20:56:00 SWDEV-145570 - [HIP] - Implement hipMemcpyHtoD/DtoH/DtoD/ and their Async APIs - Combine logic for hipMemset/Memcpy/Memset2D/Memcpy2D that can be shared across multiple APIs ReviewBoardURL = http://ocltc.amd.com/reviews/r/14782/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#23 edit --- api/hip/hip_memory.cpp | 458 ++++++++++++++++------------------------- 1 file changed, 173 insertions(+), 285 deletions(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index b0290ae005..09fefe4c8b 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -58,6 +58,94 @@ hipError_t ihipMalloc(void** ptr, size_t sizeBytes, unsigned int flags) return hipSuccess; } +hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, + amd::HostQueue& queue, bool isAsync = false) { + + amd::Command* command = nullptr; + amd::Command::EventWaitList waitList; + amd::Memory *srcMemory = amd::SvmManager::FindSvmBuffer(src);; + amd::Memory *dstMemory = amd::SvmManager::FindSvmBuffer(dst); + + if (kind == hipMemcpyDefault) { + // Determine kind on VA + if (srcMemory == nullptr && dstMemory != nullptr) { + kind = hipMemcpyHostToDevice; + } else if (srcMemory != nullptr && dstMemory == nullptr) { + kind = hipMemcpyDeviceToHost; + } else if (srcMemory != nullptr && dstMemory != nullptr) { + kind = hipMemcpyDeviceToDevice; + } else { + kind = hipMemcpyHostToHost; + } + } + + switch (kind) { + case hipMemcpyDeviceToHost: + command = new amd::ReadMemoryCommand(queue, CL_COMMAND_READ_BUFFER, waitList, + *srcMemory->asBuffer(), 0, sizeBytes, dst); + break; + case hipMemcpyHostToDevice: + command = new amd::WriteMemoryCommand(queue, CL_COMMAND_WRITE_BUFFER, waitList, + *dstMemory->asBuffer(), 0, sizeBytes, src); + break; + case hipMemcpyDeviceToDevice: + command = new amd::CopyMemoryCommand(queue, CL_COMMAND_COPY_BUFFER, waitList, + *srcMemory->asBuffer(),*dstMemory->asBuffer(), 0, 0, sizeBytes); + break; + case hipMemcpyHostToHost: + memcpy(dst, src, sizeBytes); + return hipSuccess; + default: + assert(!"Shouldn't reach here"); + break; + } + if (command == nullptr) { + return hipErrorOutOfMemory; + } + +// FIXME: virtualize MemoryCommand::validateMemory() +#if 0 + // Make sure we have memory for the command execution + if (CL_SUCCESS != command->validateMemory()) { + delete command; + return hipErrorMemoryAllocation; + } +#endif + + command->enqueue(); + if (!isAsync) { + command->awaitCompletion(); + } + command->release(); + + return hipSuccess; +} + +hipError_t ihipMemset(void* dst, int value, size_t sizeBytes, amd::HostQueue& queue, + bool isAsync = false) { + + amd::Command::EventWaitList waitList; + amd::Memory* memory = amd::SvmManager::FindSvmBuffer(dst); + + amd::Coord3D fillOffset(0, 0, 0); + amd::Coord3D fillSize(sizeBytes, 1, 1); + amd::FillMemoryCommand* command = + new amd::FillMemoryCommand(queue, CL_COMMAND_FILL_BUFFER, waitList, *memory->asBuffer(), + &value, sizeof(int), fillOffset, fillSize); + + if (command == nullptr) { + return hipErrorOutOfMemory; + } + + command->enqueue(); + if (!isAsync) { + command->awaitCompletion(); + } + command->release(); + + return hipSuccess; +} + hipError_t hipMalloc(void** ptr, size_t sizeBytes) { HIP_INIT_API(ptr, sizeBytes); @@ -82,67 +170,7 @@ hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind HIP_INIT_API(dst, src, sizeBytes, kind); amd::HostQueue* queue = hip::getNullStream(); - - if (queue == nullptr) { - return hipErrorOutOfMemory; - } - - amd::Command* command = nullptr; - amd::Command::EventWaitList waitList; - amd::Memory *srcMemory = amd::SvmManager::FindSvmBuffer(src);; - amd::Memory *dstMemory = amd::SvmManager::FindSvmBuffer(dst); - - if (kind == hipMemcpyDefault) { - // Determine kind on VA - if (srcMemory == nullptr && dstMemory != nullptr) { - kind = hipMemcpyHostToDevice; - } else if (srcMemory != nullptr && dstMemory == nullptr) { - kind = hipMemcpyDeviceToHost; - } else if (srcMemory != nullptr && dstMemory != nullptr) { - kind = hipMemcpyDeviceToDevice; - } else { - kind = hipMemcpyHostToHost; - } - } - - switch (kind) { - case hipMemcpyDeviceToHost: - command = new amd::ReadMemoryCommand(*queue, CL_COMMAND_READ_BUFFER, waitList, - *srcMemory->asBuffer(), 0, sizeBytes, dst); - break; - case hipMemcpyHostToDevice: - command = new amd::WriteMemoryCommand(*queue, CL_COMMAND_WRITE_BUFFER, waitList, - *dstMemory->asBuffer(), 0, sizeBytes, src); - break; - case hipMemcpyDeviceToDevice: - command = new amd::CopyMemoryCommand(*queue, CL_COMMAND_COPY_BUFFER, waitList, - *srcMemory->asBuffer(),*dstMemory->asBuffer(), 0, 0, sizeBytes); - break; - case hipMemcpyHostToHost: - memcpy(dst, src, sizeBytes); - return hipSuccess; - default: - assert(!"Shouldn't reach here"); - break; - } - if (command == nullptr) { - return hipErrorOutOfMemory; - } - -// FIXME: virtualize MemoryCommand::validateMemory() -#if 0 - // Make sure we have memory for the command execution - if (CL_SUCCESS != command->validateMemory()) { - delete command; - return hipErrorMemoryAllocation; - } -#endif - - command->enqueue(); - command->awaitCompletion(); - command->release(); - - return hipSuccess; + return ihipMemcpy(dst, src, sizeBytes, kind, *queue); } hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t stream) { @@ -152,30 +180,11 @@ hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t st if (stream == nullptr) { queue = hip::getNullStream(); - if (queue == nullptr) { - return hipErrorOutOfMemory; - } } else { queue = as_amd(reinterpret_cast(stream))->asHostQueue(); } - amd::Command::EventWaitList waitList; - amd::Memory* memory = amd::SvmManager::FindSvmBuffer(dst); - - amd::Coord3D fillOffset(0, 0, 0); - amd::Coord3D fillSize(sizeBytes, 1, 1); - amd::FillMemoryCommand* command = - new amd::FillMemoryCommand(*queue, CL_COMMAND_FILL_BUFFER, waitList, *memory->asBuffer(), - &value, sizeof(int), fillOffset, fillSize); - - if (command == nullptr) { - return hipErrorOutOfMemory; - } - - command->enqueue(); - command->release(); - - return hipSuccess; + return ihipMemset(dst, value, sizeBytes, *queue, true); } hipError_t hipMemset(void* dst, int value, size_t sizeBytes) { @@ -183,29 +192,7 @@ hipError_t hipMemset(void* dst, int value, size_t sizeBytes) { amd::HostQueue* queue = hip::getNullStream(); - if (queue == nullptr) { - return hipErrorOutOfMemory; - } - - amd::Command::EventWaitList waitList; - amd::Memory* memory = amd::SvmManager::FindSvmBuffer(dst); - - - amd::Coord3D fillOffset(0, 0, 0); - amd::Coord3D fillSize(sizeBytes, 1, 1); - amd::FillMemoryCommand* command = - new amd::FillMemoryCommand(*queue, CL_COMMAND_FILL_BUFFER, waitList, *memory->asBuffer(), - &value, sizeof(int), fillOffset, fillSize); - - if (command == nullptr) { - return hipErrorOutOfMemory; - } - - command->enqueue(); - command->awaitCompletion(); - command->release(); - - return hipSuccess; + return ihipMemset(dst, value, sizeBytes, *queue); } hipError_t hipMemPtrGetInfo(void *ptr, size_t *size) { @@ -520,95 +507,48 @@ hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolName, size_t co hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void* src, size_t sizeBytes) { HIP_INIT_API(dst, src, sizeBytes); - assert(0 && "Unimplemented"); + amd::HostQueue* queue = hip::getNullStream(); - return hipErrorUnknown; + return ihipMemcpy((void*) dst, (const void*) src, sizeBytes, hipMemcpyHostToDevice, *queue); } hipError_t hipMemcpyDtoH(void* dst, hipDeviceptr_t src, size_t sizeBytes) { HIP_INIT_API(dst, src, sizeBytes); - assert(0 && "Unimplemented"); + amd::HostQueue* queue = hip::getNullStream(); - return hipErrorUnknown; + return ihipMemcpy((void*) dst, (const void*) src, sizeBytes, hipMemcpyDeviceToHost, *queue); } hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes) { HIP_INIT_API(dst, src, sizeBytes); - assert(0 && "Unimplemented"); + amd::HostQueue* queue = hip::getNullStream(); - return hipErrorUnknown; + return ihipMemcpy((void*) dst, (const void*) src, sizeBytes, hipMemcpyDeviceToDevice, *queue); } hipError_t hipMemcpyHtoH(void* dst, void* src, size_t sizeBytes) { HIP_INIT_API(dst, src, sizeBytes); - assert(0 && "Unimplemented"); + amd::HostQueue* queue = hip::getNullStream(); - return hipErrorUnknown; + return ihipMemcpy((void*) dst, (const void*) src, sizeBytes, hipMemcpyHostToHost, *queue); } hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream) { HIP_INIT_API(dst, src, sizeBytes, kind, stream); - amd::Command* command = nullptr; - amd::Command::EventWaitList waitList; - amd::HostQueue* queue; - amd::Memory *srcMemory = amd::SvmManager::FindSvmBuffer(src);; - amd::Memory *dstMemory = amd::SvmManager::FindSvmBuffer(dst); + amd::HostQueue* queue = nullptr; if (stream == nullptr) { queue = hip::getNullStream(); - if (queue == nullptr) { - return hipErrorOutOfMemory; - } } else { queue = as_amd(reinterpret_cast(stream))->asHostQueue(); } - if (kind == hipMemcpyDefault) { - // Determine kind on VA - if (srcMemory == nullptr && dstMemory != nullptr) { - kind = hipMemcpyHostToDevice; - } else if (srcMemory != nullptr && dstMemory == nullptr) { - kind = hipMemcpyDeviceToHost; - } else if (srcMemory != nullptr && dstMemory != nullptr) { - kind = hipMemcpyDeviceToDevice; - } else { - kind = hipMemcpyHostToHost; - } - } - - switch (kind) { - case hipMemcpyDeviceToHost: - command = new amd::ReadMemoryCommand(*queue, CL_COMMAND_READ_BUFFER, waitList, - *srcMemory->asBuffer(), 0, sizeBytes, dst); - break; - case hipMemcpyHostToDevice: - command = new amd::WriteMemoryCommand(*queue, CL_COMMAND_WRITE_BUFFER, waitList, - *dstMemory->asBuffer(), 0, sizeBytes, src); - break; - case hipMemcpyDeviceToDevice: - command = new amd::CopyMemoryCommand(*queue, CL_COMMAND_COPY_BUFFER, waitList, - *srcMemory->asBuffer(),*dstMemory->asBuffer(), 0, 0, sizeBytes); - break; - case hipMemcpyHostToHost: - memcpy(dst, src, sizeBytes); - return hipSuccess; - default: - assert(!"Shouldn't reach here"); - break; - } - if (command == nullptr) { - return hipErrorOutOfMemory; - } - - command->enqueue(); - command->release(); - - return hipSuccess; + return ihipMemcpy(dst, src, sizeBytes, kind, *queue, true); } @@ -616,79 +556,48 @@ hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, void* src, size_t sizeBytes, hipStream_t stream) { HIP_INIT_API(dst, src, sizeBytes, stream); - assert(0 && "Unimplemented"); + amd::HostQueue* queue = nullptr; - return hipErrorUnknown; + if (stream == nullptr) { + queue = hip::getNullStream(); + } else { + queue = as_amd(reinterpret_cast(stream))->asHostQueue(); + } + + return ihipMemcpy((void*) dst, (const void*) src, sizeBytes, hipMemcpyHostToDevice, + *queue, true); } hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes, hipStream_t stream) { HIP_INIT_API(dst, src, sizeBytes, stream); - assert(0 && "Unimplemented"); + amd::HostQueue* queue = nullptr; - return hipErrorUnknown; + if (stream == nullptr) { + queue = hip::getNullStream(); + } else { + queue = as_amd(reinterpret_cast(stream))->asHostQueue(); + } + + return ihipMemcpy((void*) dst, (const void*) src, sizeBytes, hipMemcpyDeviceToDevice, + *queue, true); } hipError_t hipMemcpyDtoHAsync(void* dst, hipDeviceptr_t src, size_t sizeBytes, hipStream_t stream) { HIP_INIT_API(dst, src, sizeBytes, stream); - assert(0 && "Unimplemented"); + amd::HostQueue* queue = nullptr; - return hipErrorUnknown; -} - -hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, - size_t height, hipMemcpyKind kind) { - HIP_INIT_API(dst, dpitch, src, spitch, width, height, kind); - - amd::HostQueue* queue = hip::getNullStream(); - if (queue == nullptr) { - return hipErrorOutOfMemory; + if (stream == nullptr) { + queue = hip::getNullStream(); + } else { + queue = as_amd(reinterpret_cast(stream))->asHostQueue(); } - // Create buffer rectangle info structure - amd::BufferRect srcRect; - amd::BufferRect dstRect; - amd::Memory* srcPtr = amd::SvmManager::FindSvmBuffer(src); - amd::Memory* dstPtr = amd::SvmManager::FindSvmBuffer(dst); - size_t region[3] = {width, height, 1}; - size_t src_slice_pitch = spitch * height; - size_t dst_slice_pitch = dpitch * height; - size_t origin[3] = { }; - - if (!srcRect.create(origin, region, spitch, src_slice_pitch) || - !dstRect.create(origin, region, dpitch, dst_slice_pitch)) { - return hipErrorInvalidValue; - } - - amd::Coord3D srcStart(srcRect.start_, 0, 0); - amd::Coord3D dstStart(dstRect.start_, 0, 0); - amd::Coord3D srcEnd(srcRect.end_, 1, 1); - amd::Coord3D dstEnd(dstRect.end_, 1, 1); - - if (!srcPtr->asBuffer()->validateRegion(srcStart, srcEnd) || - !dstPtr->asBuffer()->validateRegion(dstStart, dstEnd)) { - return hipErrorInvalidValue; - } - - amd::Command::EventWaitList waitList; - amd::Coord3D size(region[0], region[1], region[2]); - - amd::CopyMemoryCommand* command = - new amd::CopyMemoryCommand(*queue, CL_COMMAND_COPY_BUFFER_RECT, waitList, *srcPtr->asBuffer(), - *dstPtr->asBuffer(), srcStart, dstStart, size, srcRect, dstRect); - - if (command == nullptr) { - return hipErrorOutOfMemory; - } - - command->enqueue(); - command->awaitCompletion(); - command->release(); - - return hipSuccess; + return ihipMemcpy((void*) dst, (const void*) src, sizeBytes, hipMemcpyDeviceToHost, + *queue, true); } hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy) { @@ -699,21 +608,9 @@ hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy) { return hipErrorUnknown; } -hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, - size_t height, hipMemcpyKind kind, hipStream_t stream) { - HIP_INIT_API(dst, dpitch, src, spitch, width, height, kind, stream); - - amd::HostQueue* queue; - - if (stream == nullptr) { - queue = hip::getNullStream(); - if (queue == nullptr) { - return hipErrorOutOfMemory; - } - } else { - queue = as_amd(reinterpret_cast(stream))->asHostQueue(); - } - +hipError_t ihipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, + size_t height, hipMemcpyKind kind, amd::HostQueue& queue, + bool isAsync = false) { // Create buffer rectangle info structure amd::BufferRect srcRect; amd::BufferRect dstRect; @@ -743,7 +640,7 @@ hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t sp amd::Coord3D size(region[0], region[1], region[2]); amd::CopyMemoryCommand* command = - new amd::CopyMemoryCommand(*queue, CL_COMMAND_COPY_BUFFER_RECT, waitList, *srcPtr->asBuffer(), + new amd::CopyMemoryCommand(queue, CL_COMMAND_COPY_BUFFER_RECT, waitList, *srcPtr->asBuffer(), *dstPtr->asBuffer(), srcStart, dstStart, size, srcRect, dstRect); if (command == nullptr) { @@ -751,11 +648,40 @@ hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t sp } command->enqueue(); + + if (!isAsync) { + command->awaitCompletion(); + } command->release(); return hipSuccess; } +hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, + size_t height, hipMemcpyKind kind) { + HIP_INIT_API(dst, dpitch, src, spitch, width, height, kind); + + amd::HostQueue* queue = hip::getNullStream(); + + return ihipMemcpy2D(dst, dpitch, src, spitch, width, height, kind, *queue); +} + + +hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, + size_t height, hipMemcpyKind kind, hipStream_t stream) { + HIP_INIT_API(dst, dpitch, src, spitch, width, height, kind, stream); + + amd::HostQueue* queue; + + if (stream == nullptr) { + queue = hip::getNullStream(); + } else { + queue = as_amd(reinterpret_cast(stream))->asHostQueue(); + } + + return ihipMemcpy2D(dst, dpitch, src, spitch, width, height, kind, *queue, true); +} + hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind) { HIP_INIT_API(dst, wOffset, hOffset, src, spitch, width, height, kind); @@ -770,9 +696,6 @@ hipError_t hipMemcpyToArray(hipArray* dstArray, size_t wOffset, size_t hOffset, HIP_INIT_API(dstArray, wOffset, hOffset, src, count, kind); amd::HostQueue* queue = hip::getNullStream(); - if (queue == nullptr) { - return hipErrorOutOfMemory; - } amd::Command* command = nullptr; amd::Command::EventWaitList waitList; @@ -783,7 +706,6 @@ hipError_t hipMemcpyToArray(hipArray* dstArray, size_t wOffset, size_t hOffset, switch (kind) { case hipMemcpyDeviceToHost: assert(!"Invalid case"); - /* fall thru */ case hipMemcpyHostToDevice: memory = amd::SvmManager::FindSvmBuffer(dstArray->data); command = new amd::WriteMemoryCommand(*queue, CL_COMMAND_WRITE_BUFFER, waitList, @@ -811,9 +733,6 @@ hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t srcArray, size_t wOffs HIP_INIT_API(dst, srcArray, wOffset, hOffset, count, kind); amd::HostQueue* queue = hip::getNullStream(); - if (queue == nullptr) { - return hipErrorOutOfMemory; - } amd::Command* command = nullptr; amd::Command::EventWaitList waitList; @@ -824,7 +743,6 @@ hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t srcArray, size_t wOffs switch (kind) { case hipMemcpyHostToDevice: assert(!"Invalid case"); - /* fall thru */ case hipMemcpyDeviceToHost: memory = amd::SvmManager::FindSvmBuffer(srcArray->data); command = new amd::ReadMemoryCommand(*queue, CL_COMMAND_READ_BUFFER, waitList, @@ -851,9 +769,6 @@ hipError_t hipMemcpyHtoA(hipArray* dstArray, size_t dstOffset, const void* srcHo HIP_INIT_API(dstArray, dstOffset, srcHost, count); amd::HostQueue* queue = hip::getNullStream(); - if (queue == nullptr) { - return hipErrorOutOfMemory; - } amd::Command::EventWaitList waitList; amd::Memory* memory = amd::SvmManager::FindSvmBuffer(dstArray->data); @@ -875,9 +790,6 @@ hipError_t hipMemcpyAtoH(void* dst, hipArray* srcArray, size_t srcOffset, size_t HIP_INIT_API(dst, srcArray, srcOffset, count); amd::HostQueue* queue = hip::getNullStream(); - if (queue == nullptr) { - return hipErrorOutOfMemory; - } amd::Command::EventWaitList waitList; amd::Memory* memory = amd::SvmManager::FindSvmBuffer(srcArray->data); @@ -900,10 +812,6 @@ hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) { amd::HostQueue* queue = hip::getNullStream(); - if (queue == nullptr) { - return hipErrorOutOfMemory; - } - size_t byteSize; size_t srcPitchInBytes; size_t dstPitchInbytes; @@ -1012,13 +920,8 @@ hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) { return hipSuccess; } -hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height) { - HIP_INIT_API(dst, pitch, value, width, height); - - amd::HostQueue* queue = hip::getNullStream(); - if (queue == nullptr) { - return hipErrorOutOfMemory; - } +hipError_t ihipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height, + amd::HostQueue& queue, bool isAsync = false) { amd::Command::EventWaitList waitList; amd::Memory* memory = amd::SvmManager::FindSvmBuffer(dst); @@ -1028,7 +931,7 @@ hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t size_t sizeBytes = pitch * height; amd::Coord3D fillSize(sizeBytes, 1, 1); amd::FillMemoryCommand* command = - new amd::FillMemoryCommand(*queue, CL_COMMAND_FILL_BUFFER, waitList, *memory->asBuffer(), + new amd::FillMemoryCommand(queue, CL_COMMAND_FILL_BUFFER, waitList, *memory->asBuffer(), &value, sizeof(int), fillOffset, fillSize); if (command == nullptr) { @@ -1036,12 +939,21 @@ hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t } command->enqueue(); - command->awaitCompletion(); + if(!isAsync) { + command->awaitCompletion(); + } command->release(); return hipSuccess; } +hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height) { + HIP_INIT_API(dst, pitch, value, width, height); + + amd::HostQueue* queue = hip::getNullStream(); + return ihipMemset2D(dst, pitch, value, width, height, *queue); +} + hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value, size_t width, size_t height, hipStream_t stream) { HIP_INIT_API(dst, pitch, value, width, height, stream); @@ -1049,41 +961,17 @@ hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value, amd::HostQueue* queue = nullptr; if (stream == nullptr) { queue = hip::getNullStream(); - if (queue == nullptr) { - return hipErrorOutOfMemory; - } } else { queue = as_amd(reinterpret_cast(stream))->asHostQueue(); } - amd::Command::EventWaitList waitList; - amd::Memory* memory = amd::SvmManager::FindSvmBuffer(dst); - - amd::Coord3D fillOffset(0, 0, 0); - - size_t sizeBytes = pitch * height; - amd::Coord3D fillSize(sizeBytes, 1, 1); - amd::FillMemoryCommand* command = - new amd::FillMemoryCommand(*queue, CL_COMMAND_FILL_BUFFER, waitList, *memory->asBuffer(), - &value, sizeof(int), fillOffset, fillSize); - - if (command == nullptr) { - return hipErrorOutOfMemory; - } - - command->enqueue(); - command->release(); - - return hipSuccess; + return ihipMemset2D(dst, pitch, value, width, height, *queue, true); } hipError_t hipMemsetD8(hipDeviceptr_t dst, unsigned char value, size_t sizeBytes) { HIP_INIT_API(dst, value, sizeBytes); amd::HostQueue* queue = hip::getNullStream(); - if (queue == nullptr) { - return hipErrorOutOfMemory; - } amd::Command::EventWaitList waitList; amd::Memory* memory = amd::SvmManager::FindSvmBuffer(dst); From 2df122be02c7baefcc74d6ad32a0e48f7ff7bf1b Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 8 May 2018 15:43:13 -0400 Subject: [PATCH 048/282] P4 to Git Change 1552011 by skudchad@skudchad_test2_win_opencl on 2018/05/08 14:48:45 SWDEV-145570 - [HIP] Fix offset calculation when getting a memory object. Also include case when destination VA may just be a CPU host VA and not nessarily device alloced. - Fix hipMemset* to write each byte and now a dword as per the spec ReviewBoardURL = http://ocltc.amd.com/reviews/r/14787/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#24 edit --- api/hip/hip_memory.cpp | 262 ++++++++++++++++++++++++++++------------- 1 file changed, 178 insertions(+), 84 deletions(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 09fefe4c8b..568ea557ec 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -36,6 +36,14 @@ extern void getDrvChannelOrderAndType(const enum hipArray_Format Format, cl_channel_order* channelOrder, cl_channel_type* channelType); +inline amd::Memory* getMemoryObject(const void* ptr, size_t& offset) { + amd::Memory *memObj = amd::SvmManager::FindSvmBuffer(ptr); + if (memObj != nullptr) { + offset = reinterpret_cast(ptr) - reinterpret_cast(memObj->getSvmPtr()); + } + return memObj; +} + hipError_t ihipMalloc(void** ptr, size_t sizeBytes, unsigned int flags) { if (sizeBytes == 0) { @@ -63,8 +71,13 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin amd::Command* command = nullptr; amd::Command::EventWaitList waitList; - amd::Memory *srcMemory = amd::SvmManager::FindSvmBuffer(src);; - amd::Memory *dstMemory = amd::SvmManager::FindSvmBuffer(dst); + size_t sOffset = 0; + amd::Memory *srcMemory = getMemoryObject(src, sOffset); + size_t dOffset = 0; + amd::Memory *dstMemory = getMemoryObject(dst, dOffset); + + amd::Coord3D srcOffset(sOffset, 0, 0); + amd::Coord3D dstOffset(dOffset, 0, 0); if (kind == hipMemcpyDefault) { // Determine kind on VA @@ -82,15 +95,15 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin switch (kind) { case hipMemcpyDeviceToHost: command = new amd::ReadMemoryCommand(queue, CL_COMMAND_READ_BUFFER, waitList, - *srcMemory->asBuffer(), 0, sizeBytes, dst); + *srcMemory->asBuffer(), srcOffset, sizeBytes, dst); break; case hipMemcpyHostToDevice: command = new amd::WriteMemoryCommand(queue, CL_COMMAND_WRITE_BUFFER, waitList, - *dstMemory->asBuffer(), 0, sizeBytes, src); + *dstMemory->asBuffer(), dstOffset, sizeBytes, src); break; case hipMemcpyDeviceToDevice: command = new amd::CopyMemoryCommand(queue, CL_COMMAND_COPY_BUFFER, waitList, - *srcMemory->asBuffer(),*dstMemory->asBuffer(), 0, 0, sizeBytes); + *srcMemory->asBuffer(),*dstMemory->asBuffer(), srcOffset, dstOffset, sizeBytes); break; case hipMemcpyHostToHost: memcpy(dst, src, sizeBytes); @@ -124,24 +137,35 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin hipError_t ihipMemset(void* dst, int value, size_t sizeBytes, amd::HostQueue& queue, bool isAsync = false) { - amd::Command::EventWaitList waitList; - amd::Memory* memory = amd::SvmManager::FindSvmBuffer(dst); - - amd::Coord3D fillOffset(0, 0, 0); - amd::Coord3D fillSize(sizeBytes, 1, 1); - amd::FillMemoryCommand* command = - new amd::FillMemoryCommand(queue, CL_COMMAND_FILL_BUFFER, waitList, *memory->asBuffer(), - &value, sizeof(int), fillOffset, fillSize); - - if (command == nullptr) { - return hipErrorOutOfMemory; + if (dst == nullptr) { + return hipErrorInvalidValue; } - command->enqueue(); - if (!isAsync) { - command->awaitCompletion(); + size_t offset = 0; + amd::Memory* memory = getMemoryObject(dst, offset); + + if (memory != nullptr) { + // Device memory + amd::Command::EventWaitList waitList; + amd::Coord3D fillOffset(offset, 0, 0); + amd::Coord3D fillSize(sizeBytes, 1, 1); + amd::FillMemoryCommand* command = + new amd::FillMemoryCommand(queue, CL_COMMAND_FILL_BUFFER, waitList, *memory->asBuffer(), + &value, sizeof(char), fillOffset, fillSize); + + if (command == nullptr) { + return hipErrorOutOfMemory; + } + + command->enqueue(); + if (!isAsync) { + command->awaitCompletion(); + } + command->release(); + } else { + // Host alloced memory + memset(dst, value, sizeBytes); } - command->release(); return hipSuccess; } @@ -198,7 +222,8 @@ hipError_t hipMemset(void* dst, int value, size_t sizeBytes) { hipError_t hipMemPtrGetInfo(void *ptr, size_t *size) { HIP_INIT_API(ptr, size); - amd::Memory* svmMem = amd::SvmManager::FindSvmBuffer(ptr); + size_t offset = 0; + amd::Memory* svmMem = getMemoryObject(ptr, offset); if (svmMem == nullptr) { return hipErrorInvalidValue; @@ -234,13 +259,14 @@ hipError_t hipMemGetAddressRange(hipDeviceptr_t* pbase, size_t* psize, hipDevice // Since we are using SVM buffer DevicePtr and HostPtr is the same void* ptr = dptr; - amd::Memory* svmMem = amd::SvmManager::FindSvmBuffer(ptr); + size_t offset = 0; + amd::Memory* svmMem = getMemoryObject(ptr, offset); if (svmMem == nullptr) { return hipErrorInvalidDevicePointer; } - *pbase = ptr; + *pbase = svmMem->getSvmPtr(); *psize = svmMem->getSize(); return hipSuccess; @@ -509,7 +535,7 @@ hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void* src, size_t sizeBytes) { amd::HostQueue* queue = hip::getNullStream(); - return ihipMemcpy((void*) dst, (const void*) src, sizeBytes, hipMemcpyHostToDevice, *queue); + return ihipMemcpy(reinterpret_cast(dst), (const void*) src, sizeBytes, hipMemcpyHostToDevice, *queue); } hipError_t hipMemcpyDtoH(void* dst, hipDeviceptr_t src, size_t sizeBytes) { @@ -517,7 +543,7 @@ hipError_t hipMemcpyDtoH(void* dst, hipDeviceptr_t src, size_t sizeBytes) { amd::HostQueue* queue = hip::getNullStream(); - return ihipMemcpy((void*) dst, (const void*) src, sizeBytes, hipMemcpyDeviceToHost, *queue); + return ihipMemcpy(reinterpret_cast(dst), (const void*) src, sizeBytes, hipMemcpyDeviceToHost, *queue); } hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes) { @@ -525,7 +551,7 @@ hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeByte amd::HostQueue* queue = hip::getNullStream(); - return ihipMemcpy((void*) dst, (const void*) src, sizeBytes, hipMemcpyDeviceToDevice, *queue); + return ihipMemcpy(reinterpret_cast(dst), (const void*) src, sizeBytes, hipMemcpyDeviceToDevice, *queue); } hipError_t hipMemcpyHtoH(void* dst, void* src, size_t sizeBytes) { @@ -533,7 +559,7 @@ hipError_t hipMemcpyHtoH(void* dst, void* src, size_t sizeBytes) { amd::HostQueue* queue = hip::getNullStream(); - return ihipMemcpy((void*) dst, (const void*) src, sizeBytes, hipMemcpyHostToHost, *queue); + return ihipMemcpy(reinterpret_cast(dst), (const void*) src, sizeBytes, hipMemcpyHostToHost, *queue); } hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, @@ -564,7 +590,7 @@ hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, void* src, size_t sizeBytes, queue = as_amd(reinterpret_cast(stream))->asHostQueue(); } - return ihipMemcpy((void*) dst, (const void*) src, sizeBytes, hipMemcpyHostToDevice, + return ihipMemcpy(reinterpret_cast(dst), (const void*) src, sizeBytes, hipMemcpyHostToDevice, *queue, true); } @@ -580,7 +606,7 @@ hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t siz queue = as_amd(reinterpret_cast(stream))->asHostQueue(); } - return ihipMemcpy((void*) dst, (const void*) src, sizeBytes, hipMemcpyDeviceToDevice, + return ihipMemcpy(reinterpret_cast(dst), (const void*) src, sizeBytes, hipMemcpyDeviceToDevice, *queue, true); } @@ -596,7 +622,7 @@ hipError_t hipMemcpyDtoHAsync(void* dst, hipDeviceptr_t src, size_t sizeBytes, queue = as_amd(reinterpret_cast(stream))->asHostQueue(); } - return ihipMemcpy((void*) dst, (const void*) src, sizeBytes, hipMemcpyDeviceToHost, + return ihipMemcpy(reinterpret_cast(dst), (const void*) src, sizeBytes, hipMemcpyDeviceToHost, *queue, true); } @@ -614,34 +640,70 @@ hipError_t ihipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch // Create buffer rectangle info structure amd::BufferRect srcRect; amd::BufferRect dstRect; - amd::Memory* srcPtr = amd::SvmManager::FindSvmBuffer(src); - amd::Memory* dstPtr = amd::SvmManager::FindSvmBuffer(dst); + size_t region[3] = {width, height, 1}; size_t src_slice_pitch = spitch * height; size_t dst_slice_pitch = dpitch * height; - size_t origin[3] = { }; + size_t sOrigin[3] = { }; + size_t dOrigin[3] = { }; + amd::Memory* srcPtr = getMemoryObject(src, sOrigin[0]); + amd::Memory* dstPtr = getMemoryObject(dst, dOrigin[0]); - if (!srcRect.create(origin, region, spitch, src_slice_pitch) || - !dstRect.create(origin, region, dpitch, dst_slice_pitch)) { - return hipErrorInvalidValue; + if (kind == hipMemcpyDefault) { + // Determine kind on VA + if (srcPtr == nullptr && dstPtr != nullptr) { + kind = hipMemcpyHostToDevice; + } else if (srcPtr != nullptr && dstPtr == nullptr) { + kind = hipMemcpyDeviceToHost; + } else if (srcPtr != nullptr && dstPtr != nullptr) { + kind = hipMemcpyDeviceToDevice; + } else { + kind = hipMemcpyHostToHost; + } } amd::Coord3D srcStart(srcRect.start_, 0, 0); amd::Coord3D dstStart(dstRect.start_, 0, 0); amd::Coord3D srcEnd(srcRect.end_, 1, 1); amd::Coord3D dstEnd(dstRect.end_, 1, 1); - - if (!srcPtr->asBuffer()->validateRegion(srcStart, srcEnd) || - !dstPtr->asBuffer()->validateRegion(dstStart, dstEnd)) { - return hipErrorInvalidValue; - } - - amd::Command::EventWaitList waitList; amd::Coord3D size(region[0], region[1], region[2]); - amd::CopyMemoryCommand* command = - new amd::CopyMemoryCommand(queue, CL_COMMAND_COPY_BUFFER_RECT, waitList, *srcPtr->asBuffer(), - *dstPtr->asBuffer(), srcStart, dstStart, size, srcRect, dstRect); + if (!srcRect.create(sOrigin, region, spitch, src_slice_pitch) || + !dstRect.create(dOrigin, region, dpitch, dst_slice_pitch)) { + return hipErrorInvalidValue; + } +/* + if (((srcPtr != nullptr) && (!srcPtr->asBuffer()->validateRegion(srcStart, srcEnd))) || + ((srcPtr != nullptr) && (!dstPtr->asBuffer()->validateRegion(dstStart, dstEnd)))) { + return hipErrorInvalidValue; + } +*/ + amd::Command* command = nullptr; + amd::Command::EventWaitList waitList; + switch (kind) { + case hipMemcpyDeviceToHost: + command = new amd::ReadMemoryCommand(queue, CL_COMMAND_READ_BUFFER_RECT, waitList, + *srcPtr->asBuffer(), srcStart, size, dst, srcRect, dstRect); + break; + case hipMemcpyHostToDevice: + command = new amd::WriteMemoryCommand(queue, CL_COMMAND_WRITE_BUFFER_RECT, waitList, + *dstPtr->asBuffer(), dstStart, size, src, dstRect, srcRect); + break; + case hipMemcpyDeviceToDevice: + command = new amd::CopyMemoryCommand(queue, CL_COMMAND_COPY_BUFFER_RECT, waitList, *srcPtr->asBuffer(), + *dstPtr->asBuffer(), srcStart, dstStart, size, srcRect, dstRect); + break; + case hipMemcpyHostToHost: + for(unsigned int y = 0; y < height; y++) { + void* pDst = reinterpret_cast(reinterpret_cast(dst) + y * dpitch); + void* pSrc = reinterpret_cast(reinterpret_cast(src) + y * spitch); + memcpy(pDst, pSrc, width); + } + return hipSuccess; + default: + assert(!"Shouldn't reach here"); + break; + } if (command == nullptr) { return hipErrorOutOfMemory; @@ -700,14 +762,15 @@ hipError_t hipMemcpyToArray(hipArray* dstArray, size_t wOffset, size_t hOffset, amd::Command* command = nullptr; amd::Command::EventWaitList waitList; amd::Memory* memory; - + size_t offset = 0; amd::Coord3D dstOffset(wOffset, hOffset, 0); switch (kind) { case hipMemcpyDeviceToHost: assert(!"Invalid case"); case hipMemcpyHostToDevice: - memory = amd::SvmManager::FindSvmBuffer(dstArray->data); + memory = getMemoryObject(dstArray->data, offset); + assert(offset == 0); command = new amd::WriteMemoryCommand(*queue, CL_COMMAND_WRITE_BUFFER, waitList, *memory->asBuffer(), dstOffset, count, src); break; @@ -738,13 +801,15 @@ hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t srcArray, size_t wOffs amd::Command::EventWaitList waitList; amd::Memory* memory; + size_t offset = 0; amd::Coord3D srcOffset(wOffset, hOffset, 0); switch (kind) { case hipMemcpyHostToDevice: assert(!"Invalid case"); case hipMemcpyDeviceToHost: - memory = amd::SvmManager::FindSvmBuffer(srcArray->data); + memory = getMemoryObject(srcArray->data, offset); + assert(offset == 0); command = new amd::ReadMemoryCommand(*queue, CL_COMMAND_READ_BUFFER, waitList, *memory->asBuffer(), srcOffset, count, dst); break; @@ -771,7 +836,9 @@ hipError_t hipMemcpyHtoA(hipArray* dstArray, size_t dstOffset, const void* srcHo amd::HostQueue* queue = hip::getNullStream(); amd::Command::EventWaitList waitList; - amd::Memory* memory = amd::SvmManager::FindSvmBuffer(dstArray->data); + size_t offset = 0; + amd::Memory* memory = getMemoryObject(dstArray->data, offset); + assert(offset == 0); amd::Command* command = new amd::WriteMemoryCommand(*queue, CL_COMMAND_WRITE_BUFFER, waitList, *memory->asBuffer(), dstOffset, count, srcHost); @@ -792,7 +859,9 @@ hipError_t hipMemcpyAtoH(void* dst, hipArray* srcArray, size_t srcOffset, size_t amd::HostQueue* queue = hip::getNullStream(); amd::Command::EventWaitList waitList; - amd::Memory* memory = amd::SvmManager::FindSvmBuffer(srcArray->data); + size_t offset = 0; + amd::Memory* memory = getMemoryObject(srcArray->data, offset); + assert(offset == 0); amd::Command* command = new amd::ReadMemoryCommand(*queue, CL_COMMAND_READ_BUFFER, waitList, *memory->asBuffer(), srcOffset, count, dst); @@ -870,8 +939,11 @@ hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) { // Create buffer rectangle info structure amd::BufferRect srcRect; amd::BufferRect dstRect; - amd::Memory* src = amd::SvmManager::FindSvmBuffer(srcPtr); - amd::Memory* dst = amd::SvmManager::FindSvmBuffer(dstPtr); + size_t offset = 0; + amd::Memory* src = getMemoryObject(srcPtr, offset); + assert(offset == 0); + amd::Memory* dst = getMemoryObject(dstPtr, offset); + assert(offset == 0); size_t src_slice_pitch = srcPitchInBytes * p->srcHeight; size_t dst_slice_pitch = dstPitchInbytes * p->dstHeight; @@ -923,26 +995,39 @@ hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) { hipError_t ihipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height, amd::HostQueue& queue, bool isAsync = false) { - amd::Command::EventWaitList waitList; - amd::Memory* memory = amd::SvmManager::FindSvmBuffer(dst); - - amd::Coord3D fillOffset(0, 0, 0); + if (dst == nullptr) { + return hipErrorInvalidValue; + } size_t sizeBytes = pitch * height; - amd::Coord3D fillSize(sizeBytes, 1, 1); - amd::FillMemoryCommand* command = - new amd::FillMemoryCommand(queue, CL_COMMAND_FILL_BUFFER, waitList, *memory->asBuffer(), - &value, sizeof(int), fillOffset, fillSize); + size_t offset = 0; - if (command == nullptr) { - return hipErrorOutOfMemory; - } + amd::Memory* memory = getMemoryObject(dst, offset); - command->enqueue(); - if(!isAsync) { - command->awaitCompletion(); + if (memory != nullptr) { + // Device memory + amd::Command::EventWaitList waitList; + amd::Coord3D fillOffset(offset, 0, 0); + amd::Coord3D fillSize(sizeBytes, 1, 1); + + // TODO: Byte copies are inefficient. Combine multiple writes inside runtime + amd::FillMemoryCommand* command = + new amd::FillMemoryCommand(queue, CL_COMMAND_FILL_BUFFER, waitList, *memory->asBuffer(), + &value, sizeof(char), fillOffset, fillSize); + + if (command == nullptr) { + return hipErrorOutOfMemory; + } + + command->enqueue(); + if(!isAsync) { + command->awaitCompletion(); + } + command->release(); + } else { + // Host alloced memory + memset(dst, value, sizeBytes); } - command->release(); return hipSuccess; } @@ -971,24 +1056,33 @@ hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value, hipError_t hipMemsetD8(hipDeviceptr_t dst, unsigned char value, size_t sizeBytes) { HIP_INIT_API(dst, value, sizeBytes); - amd::HostQueue* queue = hip::getNullStream(); - - amd::Command::EventWaitList waitList; - amd::Memory* memory = amd::SvmManager::FindSvmBuffer(dst); - - amd::Coord3D fillOffset(0, 0, 0); - amd::Coord3D fillSize(sizeBytes, 1, 1); - amd::FillMemoryCommand* command = - new amd::FillMemoryCommand(*queue, CL_COMMAND_FILL_BUFFER, waitList, *memory->asBuffer(), - &value, sizeof(char), fillOffset, fillSize); - - if (command == nullptr) { - return hipErrorOutOfMemory; + if (dst == nullptr) { + return hipErrorInvalidValue; } - command->enqueue(); - command->awaitCompletion(); - command->release(); + amd::HostQueue* queue = hip::getNullStream(); + size_t offset = 0; + amd::Command::EventWaitList waitList; + amd::Memory* memory = getMemoryObject(dst, offset); + if (memory != nullptr) { + // Device memory + amd::Coord3D fillOffset(offset, 0, 0); + amd::Coord3D fillSize(sizeBytes, 1, 1); + amd::FillMemoryCommand* command = + new amd::FillMemoryCommand(*queue, CL_COMMAND_FILL_BUFFER, waitList, *memory->asBuffer(), + &value, sizeof(char), fillOffset, fillSize); + + if (command == nullptr) { + return hipErrorOutOfMemory; + } + + command->enqueue(); + command->awaitCompletion(); + command->release(); + } else { + // Host alloced memory + memset(dst, value, sizeBytes); + } return hipSuccess; } From 7176a5fe819dce81219400e71c056b7dd160a0ab Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 8 May 2018 19:04:35 -0400 Subject: [PATCH 049/282] P4 to Git Change 1552198 by skudchad@skudchad_rocm on 2018/05/08 18:57:32 SWDEV-145570 - [HIP] - Synchronize the legacy default stream with other blocking streams as per the spec. ReviewBoardURL = http://ocltc.amd.com/reviews/r/14796/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_context.cpp#11 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#10 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_stream.cpp#5 edit --- api/hip/hip_context.cpp | 11 ++++++++--- api/hip/hip_internal.hpp | 6 ++++-- api/hip/hip_stream.cpp | 29 +++++++++++++++++++---------- 3 files changed, 31 insertions(+), 15 deletions(-) diff --git a/api/hip/hip_context.cpp b/api/hip/hip_context.cpp index dfeacdc924..8559b641fc 100644 --- a/api/hip/hip_context.cpp +++ b/api/hip/hip_context.cpp @@ -24,8 +24,6 @@ THE SOFTWARE. #include "hip_internal.hpp" #include "platform/runtime.hpp" #include "utils/versions.hpp" -#include -#include std::vector g_devices; @@ -35,7 +33,7 @@ thread_local amd::Context* g_context = nullptr; thread_local std::stack g_ctxtStack; std::once_flag g_ihipInitialized; -std::map g_nullStreams; +std::map g_nullStreams; void init() { if (!amd::Runtime::initialized()) { @@ -66,6 +64,12 @@ void setCurrentContext(unsigned int index) { g_context = g_devices[index]; } +void syncStreams() { + for (const auto& it : streamSet) { + it->finish(); + } +} + amd::HostQueue* getNullStream() { auto stream = g_nullStreams.find(getCurrentContext()); if (stream == g_nullStreams.end()) { @@ -76,6 +80,7 @@ amd::HostQueue* getNullStream() { g_nullStreams[getCurrentContext()] = queue; return queue; } + syncStreams(); return stream->second; } diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index 099aa6ca25..1f97be451a 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -24,8 +24,9 @@ THE SOFTWARE. #define HIP_SRC_HIP_INTERNAL_H #include "cl_common.hpp" - +#include #include +#include #define HIP_INIT() \ std::call_once(hip::g_ihipInitialized, hip::init); \ @@ -57,9 +58,10 @@ namespace hip { extern void setCurrentContext(unsigned int index); extern amd::HostQueue* getNullStream(); + extern void syncStreams(); }; extern std::vector g_devices; - +extern thread_local std::unordered_set streamSet; extern hipError_t ihipDeviceGetCount(int* count); #endif // HIP_SRC_HIP_INTERNAL_H diff --git a/api/hip/hip_stream.cpp b/api/hip/hip_stream.cpp index 6c2c29fb51..de6083a856 100644 --- a/api/hip/hip_stream.cpp +++ b/api/hip/hip_stream.cpp @@ -21,18 +21,22 @@ THE SOFTWARE. */ #include - #include "hip_internal.hpp" -static hipError_t ihipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags) { - assert(flags == 0); // we don't handle flags yet +thread_local std::unordered_set streamSet; +static hipError_t ihipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags) { amd::Device* device = hip::getCurrentContext()->devices()[0]; amd::HostQueue* queue = new amd::HostQueue(*hip::getCurrentContext(), *device, 0, amd::CommandQueue::RealTimeDisabled, amd::CommandQueue::Priority::Normal); + if (!(flags & hipStreamNonBlocking)) { + hip::syncStreams(); + streamSet.insert(queue); + } + if (queue == nullptr) { return hipErrorOutOfMemory; } @@ -48,23 +52,27 @@ hipError_t hipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags) { return ihipStreamCreateWithFlags(stream, flags); } - hipError_t hipStreamCreate(hipStream_t *stream) { HIP_INIT_API(stream); return ihipStreamCreateWithFlags(stream, hipStreamDefault); } - hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags) { HIP_INIT_API(stream, flags); - assert(0 && "Unimplemented"); + amd::HostQueue* hostQueue = reinterpret_cast(stream); + auto it = streamSet.find(hostQueue); - return hipErrorUnknown; + if(flags != nullptr) { + *flags = (it != streamSet.end()) ? hipStreamNonBlocking : hipStreamDefault; + } else { + return hipErrorInvalidValue; + } + + return hipSuccess; } - hipError_t hipStreamSynchronize(hipStream_t stream) { HIP_INIT_API(stream); @@ -85,7 +93,6 @@ hipError_t hipStreamSynchronize(hipStream_t stream) { return hipSuccess; } - hipError_t hipStreamDestroy(hipStream_t stream) { HIP_INIT_API(stream); @@ -93,12 +100,14 @@ hipError_t hipStreamDestroy(hipStream_t stream) { return hipErrorInvalidResourceHandle; } + amd::HostQueue* hostQueue = reinterpret_cast(stream); + streamSet.erase(hostQueue); + as_amd(reinterpret_cast(stream))->release(); return hipSuccess; } - hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags) { HIP_INIT_API(stream, event, flags); From 50810d28b84c7951bf76c3961414e90ecb52e798 Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 8 May 2018 20:30:17 -0400 Subject: [PATCH 050/282] P4 to Git Change 1552209 by cpaquot@cpaquot-ocl-lc-lnx on 2018/05/08 20:24:32 SWDEV-145570 - [HIP] Use the as_amd()->asHostQueue with streamSet Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_stream.cpp#6 edit --- api/hip/hip_stream.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/api/hip/hip_stream.cpp b/api/hip/hip_stream.cpp index de6083a856..1c007edf1d 100644 --- a/api/hip/hip_stream.cpp +++ b/api/hip/hip_stream.cpp @@ -61,7 +61,7 @@ hipError_t hipStreamCreate(hipStream_t *stream) { hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags) { HIP_INIT_API(stream, flags); - amd::HostQueue* hostQueue = reinterpret_cast(stream); + amd::HostQueue* hostQueue = as_amd(reinterpret_cast(stream))->asHostQueue(); auto it = streamSet.find(hostQueue); if(flags != nullptr) { @@ -100,10 +100,11 @@ hipError_t hipStreamDestroy(hipStream_t stream) { return hipErrorInvalidResourceHandle; } - amd::HostQueue* hostQueue = reinterpret_cast(stream); + amd::HostQueue* hostQueue = as_amd(reinterpret_cast(stream))->asHostQueue(); + streamSet.erase(hostQueue); - as_amd(reinterpret_cast(stream))->release(); + hostQueue->release(); return hipSuccess; } From ac011af36ba0217a6a09dcea90173d1e4430e2ce Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 9 May 2018 18:15:41 -0400 Subject: [PATCH 051/282] P4 to Git Change 1552807 by skudchad@skudchad_test2_win_opencl on 2018/05/09 18:08:07 SWDEV-145570 - [HIP] - Release a stream first before taking it off from the set. - Queue::create() needs to be called before returning a valid queue. ReviewBoardURL = http://ocltc.amd.com/reviews/r/14830/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_stream.cpp#7 edit --- api/hip/hip_stream.cpp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/api/hip/hip_stream.cpp b/api/hip/hip_stream.cpp index 1c007edf1d..7cb359b1b9 100644 --- a/api/hip/hip_stream.cpp +++ b/api/hip/hip_stream.cpp @@ -32,15 +32,15 @@ static hipError_t ihipStreamCreateWithFlags(hipStream_t *stream, unsigned int fl amd::CommandQueue::RealTimeDisabled, amd::CommandQueue::Priority::Normal); + if (queue == nullptr || !queue->create()) { + return hipErrorOutOfMemory; + } + if (!(flags & hipStreamNonBlocking)) { hip::syncStreams(); streamSet.insert(queue); } - if (queue == nullptr) { - return hipErrorOutOfMemory; - } - *stream = reinterpret_cast(as_cl(queue)); return hipSuccess; @@ -101,10 +101,8 @@ hipError_t hipStreamDestroy(hipStream_t stream) { } amd::HostQueue* hostQueue = as_amd(reinterpret_cast(stream))->asHostQueue(); - - streamSet.erase(hostQueue); - hostQueue->release(); + streamSet.erase(hostQueue); return hipSuccess; } From fbd86acc9f2f82901da3c7ffc6dadba31f93cd24 Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 11 May 2018 11:34:01 -0400 Subject: [PATCH 052/282] P4 to Git Change 1553709 by cpaquot@cpaquot-ocl-lc-lnx on 2018/05/11 11:24:30 SWDEV-145570 - [HIP] Make streamSet global and protect it By default from the spec, streamSet should be global and not per thread. There is a flag to make it per thread but we don't handle this yet. We would just add another variable that will be thread local and use it instead. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_context.cpp#12 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device_runtime.cpp#10 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#11 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_stream.cpp#8 edit --- api/hip/hip_context.cpp | 6 ------ api/hip/hip_device_runtime.cpp | 2 ++ api/hip/hip_internal.hpp | 1 - api/hip/hip_stream.cpp | 24 ++++++++++++++++++++++-- 4 files changed, 24 insertions(+), 9 deletions(-) diff --git a/api/hip/hip_context.cpp b/api/hip/hip_context.cpp index 8559b641fc..e75c8e5b5e 100644 --- a/api/hip/hip_context.cpp +++ b/api/hip/hip_context.cpp @@ -64,12 +64,6 @@ void setCurrentContext(unsigned int index) { g_context = g_devices[index]; } -void syncStreams() { - for (const auto& it : streamSet) { - it->finish(); - } -} - amd::HostQueue* getNullStream() { auto stream = g_nullStreams.find(getCurrentContext()); if (stream == g_nullStreams.end()) { diff --git a/api/hip/hip_device_runtime.cpp b/api/hip/hip_device_runtime.cpp index c84abe02da..5b60623761 100644 --- a/api/hip/hip_device_runtime.cpp +++ b/api/hip/hip_device_runtime.cpp @@ -377,6 +377,8 @@ hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config ) { hipError_t hipDeviceSynchronize ( void ) { HIP_INIT_API(); + hip::syncStreams(); + amd::HostQueue* queue = hip::getNullStream(); if (!queue) { diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index 1f97be451a..197e607597 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -61,7 +61,6 @@ namespace hip { extern void syncStreams(); }; extern std::vector g_devices; -extern thread_local std::unordered_set streamSet; extern hipError_t ihipDeviceGetCount(int* count); #endif // HIP_SRC_HIP_INTERNAL_H diff --git a/api/hip/hip_stream.cpp b/api/hip/hip_stream.cpp index 7cb359b1b9..f4995bcd81 100644 --- a/api/hip/hip_stream.cpp +++ b/api/hip/hip_stream.cpp @@ -22,8 +22,22 @@ THE SOFTWARE. #include #include "hip_internal.hpp" +#include "thread/monitor.hpp" -thread_local std::unordered_set streamSet; +static amd::Monitor streamSetLock("Guards global stream set"); +static std::unordered_set streamSet; + +namespace hip { + +void syncStreams() { + amd::ScopedLock lock(streamSetLock); + + for (const auto& it : streamSet) { + it->finish(); + } +} + +}; static hipError_t ihipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags) { amd::Device* device = hip::getCurrentContext()->devices()[0]; @@ -38,7 +52,11 @@ static hipError_t ihipStreamCreateWithFlags(hipStream_t *stream, unsigned int fl if (!(flags & hipStreamNonBlocking)) { hip::syncStreams(); - streamSet.insert(queue); + + { + amd::ScopedLock lock(streamSetLock); + streamSet.insert(queue); + } } *stream = reinterpret_cast(as_cl(queue)); @@ -100,6 +118,8 @@ hipError_t hipStreamDestroy(hipStream_t stream) { return hipErrorInvalidResourceHandle; } + amd::ScopedLock lock(streamSetLock); + amd::HostQueue* hostQueue = as_amd(reinterpret_cast(stream))->asHostQueue(); hostQueue->release(); streamSet.erase(hostQueue); From da00b9270a05b80549eb0a3411c1d8ed83dd1f63 Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 15 May 2018 16:26:16 -0400 Subject: [PATCH 053/282] P4 to Git Change 1555193 by cpaquot@cpaquot-ocl-lc-lnx on 2018/05/15 16:19:50 SWDEV-145570 - [HIP] Implemented events Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_event.cpp#4 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_event.hpp#2 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#25 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#9 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_stream.cpp#9 edit --- api/hip/hip_event.cpp | 139 +++++++++++++++++++++++++++++++++-------- api/hip/hip_event.hpp | 21 ++++++- api/hip/hip_memory.cpp | 38 ++++++++--- api/hip/hip_module.cpp | 3 +- api/hip/hip_stream.cpp | 29 ++++++++- 5 files changed, 187 insertions(+), 43 deletions(-) diff --git a/api/hip/hip_event.cpp b/api/hip/hip_event.cpp index b9930636bb..127148e63c 100644 --- a/api/hip/hip_event.cpp +++ b/api/hip/hip_event.cpp @@ -24,37 +24,68 @@ THE SOFTWARE. #include "hip_event.hpp" +hipError_t ihipEventCreateWithFlags(hipEvent_t* event, unsigned flags) { + if (event == nullptr) { + return hipErrorInvalidValue; + } + + unsigned supportedFlags = hipEventDefault | hipEventBlockingSync | hipEventDisableTiming | + hipEventReleaseToDevice | hipEventReleaseToSystem; + const unsigned releaseFlags = (hipEventReleaseToDevice | hipEventReleaseToSystem); + + const bool illegalFlags = + (flags & ~supportedFlags) || // can't set any unsupported flags. + (flags & releaseFlags) == releaseFlags; // can't set both release flags + + if (!illegalFlags) { + hip::Event* e = new hip::Event(flags); + + if (e == nullptr) { + return hipErrorOutOfMemory; + } + + *event = reinterpret_cast(e); + } else { + return hipErrorInvalidValue; + } + return hipSuccess; +} + +hipError_t ihipEventQuery(hipEvent_t event) { + if (event == nullptr) { + return hipErrorInvalidResourceHandle; + } + + hip::Event* e = reinterpret_cast(event); + + if (e->event_ == nullptr) { + return hipErrorInvalidResourceHandle; + } + + e->event_->notifyCmdQueue(); + + return (e->event_->status() == CL_COMPLETE)? hipSuccess : hipErrorNotReady; +} + hipError_t hipEventCreateWithFlags(hipEvent_t* event, unsigned flags) { HIP_INIT_API(event, flags); - hip::Event* e = new hip::Event(flags); - - if (e == nullptr) { - return hipErrorOutOfMemory; - } - - *event = reinterpret_cast(e); - - return hipSuccess; -} + return ihipEventCreateWithFlags(event, flags); +} hipError_t hipEventCreate(hipEvent_t* event) { HIP_INIT_API(event); - hip::Event* e = new hip::Event(0); - - if (e == nullptr) { - return hipErrorOutOfMemory; - } - - *event = reinterpret_cast(e); - - return hipSuccess; + return ihipEventCreateWithFlags(event, 0); } hipError_t hipEventDestroy(hipEvent_t event) { HIP_INIT_API(event); + if (event == nullptr) { + return hipErrorInvalidResourceHandle; + } + delete reinterpret_cast(event); return hipSuccess; @@ -63,29 +94,83 @@ hipError_t hipEventDestroy(hipEvent_t event) { hipError_t hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop) { HIP_INIT_API(ms, start, stop); - return hipErrorUnknown; + if (start == nullptr || stop == nullptr) { + return hipErrorInvalidResourceHandle; + } + + hip::Event* eStart = reinterpret_cast(start); + hip::Event* eStop = reinterpret_cast(stop); + + if (eStart->event_ == nullptr || + eStop->event_ == nullptr) { + return hipErrorInvalidResourceHandle; + } + + if ((eStart->flags | eStop->flags) & hipEventDisableTiming) { + return hipErrorInvalidResourceHandle; + } + + if (ihipEventQuery(start) == hipErrorNotReady || + ihipEventQuery(stop) == hipErrorNotReady) { + return hipErrorNotReady; + } + + if (ms == nullptr) { + return hipErrorInvalidValue; + } + + *ms = static_cast(eStop->event_->profilingInfo().submitted_ - eStart->event_->profilingInfo().submitted_)/1000000.f; + + return hipSuccess; } hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) { HIP_INIT_API(event, stream); - assert(0 && "Unimplemented"); + if (event == nullptr) { + return hipErrorInvalidResourceHandle; + } - return hipErrorUnknown; + hip::Event* e = reinterpret_cast(event); + + if (stream == nullptr) { + e->stream_ = hip::getNullStream(); + } else { + e->stream_ = as_amd(reinterpret_cast(stream))->asHostQueue(); + } + amd::Command* command = (e->flags & hipEventDisableTiming)? new amd::Marker(*e->stream_, true) : + new hip::TimerMarker(*e->stream_); + command->enqueue(); + + if (e->event_ != nullptr) { + e->event_->release(); + } + + e->event_ = &command->event(); + + return hipSuccess; } hipError_t hipEventSynchronize(hipEvent_t event) { HIP_INIT_API(event); - assert(0 && "Unimplemented"); + if (event == nullptr) { + return hipErrorInvalidResourceHandle; + } - return hipErrorUnknown; + hip::Event* e = reinterpret_cast(event); + + if (e->event_ == nullptr) { + return hipErrorInvalidResourceHandle; + } + + e->event_->awaitCompletion(); + + return hipSuccess; } hipError_t hipEventQuery(hipEvent_t event) { HIP_INIT_API(event); - assert(0 && "Unimplemented"); - - return hipErrorUnknown; + return ihipEventQuery(event); } diff --git a/api/hip/hip_event.hpp b/api/hip/hip_event.hpp index 3ac1ea8bfe..19f93a5c27 100644 --- a/api/hip/hip_event.hpp +++ b/api/hip/hip_event.hpp @@ -27,12 +27,27 @@ THE SOFTWARE. namespace hip { +class TimerMarker: public amd::Marker { +public: + TimerMarker(amd::HostQueue& queue) : amd::Marker(queue, true) { + profilingInfo_.enabled_ = true; + profilingInfo_.callback_ = nullptr; + profilingInfo_.start_ = profilingInfo_.end_ = 0; + } +}; + class Event { public: - Event(unsigned int flags) : flags(flags) {} - ~Event() {} + Event(unsigned int flags) : flags(flags), stream_(nullptr), event_(nullptr) {} + ~Event() { + if (event_ != nullptr) { + event_->release(); + } + } unsigned int flags; -private: + + amd::HostQueue* stream_; + amd::Event* event_; }; }; diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 568ea557ec..a754745ebb 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -68,7 +68,6 @@ hipError_t ihipMalloc(void** ptr, size_t sizeBytes, unsigned int flags) hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, amd::HostQueue& queue, bool isAsync = false) { - amd::Command* command = nullptr; amd::Command::EventWaitList waitList; size_t sOffset = 0; @@ -116,15 +115,6 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin return hipErrorOutOfMemory; } -// FIXME: virtualize MemoryCommand::validateMemory() -#if 0 - // Make sure we have memory for the command execution - if (CL_SUCCESS != command->validateMemory()) { - delete command; - return hipErrorMemoryAllocation; - } -#endif - command->enqueue(); if (!isAsync) { command->awaitCompletion(); @@ -193,6 +183,7 @@ hipError_t hipFree(void* ptr) { hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind) { HIP_INIT_API(dst, src, sizeBytes, kind); + hip::syncStreams(); amd::HostQueue* queue = hip::getNullStream(); return ihipMemcpy(dst, src, sizeBytes, kind, *queue); } @@ -203,8 +194,10 @@ hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t st amd::HostQueue* queue; if (stream == nullptr) { + hip::syncStreams(); queue = hip::getNullStream(); } else { + hip::getNullStream()->finish(); queue = as_amd(reinterpret_cast(stream))->asHostQueue(); } @@ -214,6 +207,7 @@ hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t st hipError_t hipMemset(void* dst, int value, size_t sizeBytes) { HIP_INIT_API(dst, value, sizeBytes); + hip::syncStreams(); amd::HostQueue* queue = hip::getNullStream(); return ihipMemset(dst, value, sizeBytes, *queue); @@ -533,6 +527,7 @@ hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolName, size_t co hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void* src, size_t sizeBytes) { HIP_INIT_API(dst, src, sizeBytes); + hip::syncStreams(); amd::HostQueue* queue = hip::getNullStream(); return ihipMemcpy(reinterpret_cast(dst), (const void*) src, sizeBytes, hipMemcpyHostToDevice, *queue); @@ -541,6 +536,7 @@ hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void* src, size_t sizeBytes) { hipError_t hipMemcpyDtoH(void* dst, hipDeviceptr_t src, size_t sizeBytes) { HIP_INIT_API(dst, src, sizeBytes); + hip::syncStreams(); amd::HostQueue* queue = hip::getNullStream(); return ihipMemcpy(reinterpret_cast(dst), (const void*) src, sizeBytes, hipMemcpyDeviceToHost, *queue); @@ -549,6 +545,7 @@ hipError_t hipMemcpyDtoH(void* dst, hipDeviceptr_t src, size_t sizeBytes) { hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes) { HIP_INIT_API(dst, src, sizeBytes); + hip::syncStreams(); amd::HostQueue* queue = hip::getNullStream(); return ihipMemcpy(reinterpret_cast(dst), (const void*) src, sizeBytes, hipMemcpyDeviceToDevice, *queue); @@ -557,6 +554,7 @@ hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeByte hipError_t hipMemcpyHtoH(void* dst, void* src, size_t sizeBytes) { HIP_INIT_API(dst, src, sizeBytes); + hip::syncStreams(); amd::HostQueue* queue = hip::getNullStream(); return ihipMemcpy(reinterpret_cast(dst), (const void*) src, sizeBytes, hipMemcpyHostToHost, *queue); @@ -569,8 +567,10 @@ hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, amd::HostQueue* queue = nullptr; if (stream == nullptr) { + hip::syncStreams(); queue = hip::getNullStream(); } else { + hip::getNullStream()->finish(); queue = as_amd(reinterpret_cast(stream))->asHostQueue(); } @@ -585,8 +585,10 @@ hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, void* src, size_t sizeBytes, amd::HostQueue* queue = nullptr; if (stream == nullptr) { + hip::syncStreams(); queue = hip::getNullStream(); } else { + hip::getNullStream()->finish(); queue = as_amd(reinterpret_cast(stream))->asHostQueue(); } @@ -601,8 +603,10 @@ hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t siz amd::HostQueue* queue = nullptr; if (stream == nullptr) { + hip::syncStreams(); queue = hip::getNullStream(); } else { + hip::getNullStream()->finish(); queue = as_amd(reinterpret_cast(stream))->asHostQueue(); } @@ -617,8 +621,10 @@ hipError_t hipMemcpyDtoHAsync(void* dst, hipDeviceptr_t src, size_t sizeBytes, amd::HostQueue* queue = nullptr; if (stream == nullptr) { + hip::syncStreams(); queue = hip::getNullStream(); } else { + hip::getNullStream()->finish(); queue = as_amd(reinterpret_cast(stream))->asHostQueue(); } @@ -723,6 +729,7 @@ hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t height, hipMemcpyKind kind) { HIP_INIT_API(dst, dpitch, src, spitch, width, height, kind); + hip::syncStreams(); amd::HostQueue* queue = hip::getNullStream(); return ihipMemcpy2D(dst, dpitch, src, spitch, width, height, kind, *queue); @@ -736,8 +743,10 @@ hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t sp amd::HostQueue* queue; if (stream == nullptr) { + hip::syncStreams(); queue = hip::getNullStream(); } else { + hip::getNullStream()->finish(); queue = as_amd(reinterpret_cast(stream))->asHostQueue(); } @@ -757,6 +766,7 @@ hipError_t hipMemcpyToArray(hipArray* dstArray, size_t wOffset, size_t hOffset, size_t count, hipMemcpyKind kind) { HIP_INIT_API(dstArray, wOffset, hOffset, src, count, kind); + hip::syncStreams(); amd::HostQueue* queue = hip::getNullStream(); amd::Command* command = nullptr; @@ -795,6 +805,7 @@ hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t srcArray, size_t wOffs size_t count, hipMemcpyKind kind) { HIP_INIT_API(dst, srcArray, wOffset, hOffset, count, kind); + hip::syncStreams(); amd::HostQueue* queue = hip::getNullStream(); amd::Command* command = nullptr; @@ -833,6 +844,7 @@ hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t srcArray, size_t wOffs hipError_t hipMemcpyHtoA(hipArray* dstArray, size_t dstOffset, const void* srcHost, size_t count) { HIP_INIT_API(dstArray, dstOffset, srcHost, count); + hip::syncStreams(); amd::HostQueue* queue = hip::getNullStream(); amd::Command::EventWaitList waitList; @@ -856,6 +868,7 @@ hipError_t hipMemcpyHtoA(hipArray* dstArray, size_t dstOffset, const void* srcHo hipError_t hipMemcpyAtoH(void* dst, hipArray* srcArray, size_t srcOffset, size_t count) { HIP_INIT_API(dst, srcArray, srcOffset, count); + hip::syncStreams(); amd::HostQueue* queue = hip::getNullStream(); amd::Command::EventWaitList waitList; @@ -879,6 +892,7 @@ hipError_t hipMemcpyAtoH(void* dst, hipArray* srcArray, size_t srcOffset, size_t hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) { HIP_INIT_API(p); + hip::syncStreams(); amd::HostQueue* queue = hip::getNullStream(); size_t byteSize; @@ -1035,6 +1049,7 @@ hipError_t ihipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height) { HIP_INIT_API(dst, pitch, value, width, height); + hip::syncStreams(); amd::HostQueue* queue = hip::getNullStream(); return ihipMemset2D(dst, pitch, value, width, height, *queue); } @@ -1045,8 +1060,10 @@ hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value, amd::HostQueue* queue = nullptr; if (stream == nullptr) { + hip::syncStreams(); queue = hip::getNullStream(); } else { + hip::getNullStream()->finish(); queue = as_amd(reinterpret_cast(stream))->asHostQueue(); } @@ -1060,6 +1077,7 @@ hipError_t hipMemsetD8(hipDeviceptr_t dst, unsigned char value, size_t sizeBytes return hipErrorInvalidValue; } + hip::syncStreams(); amd::HostQueue* queue = hip::getNullStream(); size_t offset = 0; amd::Command::EventWaitList waitList; diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index 0a5675114c..0feefad7a3 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -146,8 +146,10 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, amd::HostQueue* queue; if (hStream == nullptr) { + hip::syncStreams(); queue = hip::getNullStream(); } else { + hip::getNullStream()->finish(); queue = as_amd(reinterpret_cast(hStream))->asHostQueue(); } if (!queue) { @@ -195,7 +197,6 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, } command->enqueue(); - command->awaitCompletion(); command->release(); return hipSuccess; diff --git a/api/hip/hip_stream.cpp b/api/hip/hip_stream.cpp index f4995bcd81..6fc8140f6c 100644 --- a/api/hip/hip_stream.cpp +++ b/api/hip/hip_stream.cpp @@ -22,6 +22,7 @@ THE SOFTWARE. #include #include "hip_internal.hpp" +#include "hip_event.hpp" #include "thread/monitor.hpp" static amd::Monitor streamSetLock("Guards global stream set"); @@ -97,8 +98,12 @@ hipError_t hipStreamSynchronize(hipStream_t stream) { amd::HostQueue* hostQueue; if (stream == nullptr) { + hip::syncStreams(); + hostQueue = hip::getNullStream(); } else { + hip::getNullStream()->finish(); + hostQueue = as_amd(reinterpret_cast(stream))->asHostQueue(); } @@ -130,9 +135,29 @@ hipError_t hipStreamDestroy(hipStream_t stream) { hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags) { HIP_INIT_API(stream, event, flags); - assert(0 && "Unimplemented"); + if (stream == nullptr || event == nullptr) { + return hipErrorInvalidResourceHandle; + } - return hipErrorUnknown; + amd::HostQueue* hostQueue = as_amd(reinterpret_cast(stream))->asHostQueue(); + hip::Event* e = reinterpret_cast(event); + cl_event clEvent = as_cl(e->event_); + + amd::Command::EventWaitList eventWaitList; + cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue->context(), 1, + &clEvent); + if (err != CL_SUCCESS) { + return hipErrorUnknown; + } + + amd::Command* command = new amd::Marker(*hostQueue, true, eventWaitList); + if (command == NULL) { + return hipErrorOutOfMemory; + } + command->enqueue(); + command->release(); + + return hipSuccess; } hipError_t hipStreamQuery(hipStream_t stream) { From bb008fbf94d74e9991e5cb1cafea0c906e95e6b9 Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 15 May 2018 16:36:25 -0400 Subject: [PATCH 054/282] P4 to Git Change 1555197 by cpaquot@cpaquot-ocl-lc-lnx on 2018/05/15 16:26:41 SWDEV-145570 - [HIP] Fixed a typo, hipStreamGetFlags test passes now Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_stream.cpp#10 edit --- api/hip/hip_stream.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/hip/hip_stream.cpp b/api/hip/hip_stream.cpp index 6fc8140f6c..9b5efde29c 100644 --- a/api/hip/hip_stream.cpp +++ b/api/hip/hip_stream.cpp @@ -84,7 +84,7 @@ hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags) { auto it = streamSet.find(hostQueue); if(flags != nullptr) { - *flags = (it != streamSet.end()) ? hipStreamNonBlocking : hipStreamDefault; + *flags = (it == streamSet.end()) ? hipStreamNonBlocking : hipStreamDefault; } else { return hipErrorInvalidValue; } From b1ab722a253601cd0ec9da42fe89d745cd8d92fb Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 16 May 2018 16:35:53 -0400 Subject: [PATCH 055/282] P4 to Git Change 1555866 by cpaquot@cpaquot-ocl-lc-lnx on 2018/05/16 16:27:00 SWDEV-145570 - [HIP] Store HIP mem flags inside amd::Buffer's flags Use the 16 upper bits of amd::Buffer's flags field instead of adding a new field. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#26 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.cpp#86 edit --- api/hip/hip_memory.cpp | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index a754745ebb..05fcfbe21a 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -169,7 +169,19 @@ hipError_t hipMalloc(void** ptr, size_t sizeBytes) { hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) { HIP_INIT_API(ptr, sizeBytes, flags); - return ihipMalloc(ptr, sizeBytes, CL_MEM_SVM_FINE_GRAIN_BUFFER); + if (ptr == nullptr) { + return hipErrorInvalidValue; + } + *ptr = nullptr; + + const unsigned int coherentFlags = hipHostMallocCoherent | hipHostMallocNonCoherent; + + // can't have both Coherent and NonCoherent flags set at the same time + if ((flags & coherentFlags) == coherentFlags) { + return hipErrorInvalidValue; + } + + return ihipMalloc(ptr, sizeBytes, CL_MEM_SVM_FINE_GRAIN_BUFFER | (flags << 16)); } hipError_t hipFree(void* ptr) { @@ -467,9 +479,21 @@ hipError_t hipMalloc3DArray(hipArray_t* array, const struct hipChannelFormatDesc hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) { HIP_INIT_API(flagsPtr, hostPtr); - assert(0 && "Unimplemented"); + if (flagsPtr == nullptr || + hostPtr == nullptr) { + return hipErrorInvalidValue; + } - return hipErrorUnknown; + size_t offset = 0; + amd::Memory* svmMem = getMemoryObject(hostPtr, offset); + + if (svmMem == nullptr) { + return hipErrorInvalidValue; + } + + *flagsPtr = svmMem->getMemFlags() >> 16; + + return hipSuccess; } hipError_t hipHostRegister(void* hostPtr, size_t sizeBytes, unsigned int flags) { From 303df5dd2e0a9e06d1c81c07500a8104e316a43f Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 18 May 2018 14:34:14 -0400 Subject: [PATCH 056/282] P4 to Git Change 1556942 by yaxunl@yaxunl-lc8 on 2018/05/18 14:25:12 SWDEV-145570 - [HIP] Change fat binary magic number and clang-offload-bundler target name to match clang Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#11 edit --- api/hip/hip_platform.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index 386d711275..4ffcc3da87 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -29,7 +29,7 @@ THE SOFTWARE. #include #include "elfio.hpp" -constexpr unsigned __cudaFatMAGIC2 = 0x466243b1; +constexpr unsigned __hipFatMAGIC2 = 0x48495046; // "HIPF" struct __CudaFatBinaryWrapper { unsigned int magic; @@ -39,7 +39,7 @@ struct __CudaFatBinaryWrapper { }; #define CLANG_OFFLOAD_BUNDLER_MAGIC_STR "__CLANG_OFFLOAD_BUNDLE__" -#define OPENMP_AMDGCN_AMDHSA_TRIPLE "openmp-amdgcn--amdhsa" +#define HIP_AMDGCN_AMDHSA_TRIPLE "hip-amdgcn-amd-amdhsa" #define HCC_AMDGCN_AMDHSA_TRIPLE "hcc-amdgcn-amd-amdhsa-" struct __ClangOffloadBundleDesc { @@ -60,7 +60,7 @@ extern "C" hipModule_t __hipRegisterFatBinary(const void* data) HIP_INIT(); const __CudaFatBinaryWrapper* fbwrapper = reinterpret_cast(data); - if (fbwrapper->magic != __cudaFatMAGIC2 || fbwrapper->version != 1) { + if (fbwrapper->magic != __hipFatMAGIC2 || fbwrapper->version != 1) { return nullptr; } std::string magic((char*)fbwrapper->binary, sizeof(CLANG_OFFLOAD_BUNDLER_MAGIC_STR) - 1); @@ -78,12 +78,12 @@ extern "C" hipModule_t __hipRegisterFatBinary(const void* data) desc = reinterpret_cast( reinterpret_cast(&desc->triple[0]) + desc->tripleSize)) { - std::string triple(desc->triple, sizeof(OPENMP_AMDGCN_AMDHSA_TRIPLE) - 1); - if (triple.compare(OPENMP_AMDGCN_AMDHSA_TRIPLE)) + std::string triple(desc->triple, sizeof(HIP_AMDGCN_AMDHSA_TRIPLE) - 1); + if (triple.compare(HIP_AMDGCN_AMDHSA_TRIPLE)) continue; - std::string target(desc->triple + sizeof(OPENMP_AMDGCN_AMDHSA_TRIPLE), - desc->tripleSize - sizeof(OPENMP_AMDGCN_AMDHSA_TRIPLE)); + std::string target(desc->triple + sizeof(HIP_AMDGCN_AMDHSA_TRIPLE), + desc->tripleSize - sizeof(HIP_AMDGCN_AMDHSA_TRIPLE)); if (target.compare(hip::getCurrentContext()->devices()[0]->info().name_)) continue; From f165295c66081f057f10beee909c9aabc87e1487 Mon Sep 17 00:00:00 2001 From: foreman Date: Mon, 21 May 2018 20:02:17 -0400 Subject: [PATCH 057/282] P4 to Git Change 1557352 by cpaquot@cpaquot-ocl-lc-lnx on 2018/05/21 19:53:00 SWDEV-145570 - [HIP] Sync streams in hipFree. hipTestHalf passes now. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#27 edit --- api/hip/hip_memory.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 05fcfbe21a..ebe630924b 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -186,6 +186,8 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) { hipError_t hipFree(void* ptr) { if (amd::SvmBuffer::malloced(ptr)) { + hip::syncStreams(); + hip::getNullStream()->finish(); amd::SvmBuffer::free(*hip::getCurrentContext(), ptr); return hipSuccess; } From 0fe5f87cba8a05c2614fb910b20bb8e2fd01343d Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 23 May 2018 13:40:52 -0400 Subject: [PATCH 058/282] P4 to Git Change 1558526 by skudchad@skudchad_test2_win_opencl on 2018/05/23 13:34:33 SWDEV-145570 - [HIP] Implement hipPointerGetAttributes. ReviewBoardURL = http://ocltc.amd.com/reviews/r/14938/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#28 edit --- api/hip/hip_memory.cpp | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index ebe630924b..04e3dc3c3a 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -1180,7 +1180,32 @@ hipError_t hipHostGetDevicePointer(void** devicePointer, void* hostPointer, unsi hipError_t hipPointerGetAttributes(hipPointerAttribute_t* attributes, const void* ptr) { HIP_INIT_API(attributes, ptr); - assert(0 && "Unimplemented"); + size_t offset = 0; + amd::Memory* memObj = getMemoryObject(ptr, offset); + amd::Context &memObjCtx = memObj->getContext(); + int device = 0; - return hipErrorUnknown; + if (memObj != nullptr) { + attributes->memoryType = hipMemoryTypeDevice; + attributes->hostPointer = memObj->getSvmPtr(); + attributes->devicePointer = memObj->getSvmPtr(); + attributes->isManaged = 0; + attributes->allocationFlags = memObj->getMemFlags(); + for (auto& ctx : g_devices) { + ++device; + if (*ctx == memObjCtx) { + attributes->device = device; + break; + } + } + } else { + attributes->memoryType = hipMemoryTypeHost; + attributes->hostPointer = (void*)ptr; + attributes->devicePointer = 0; + attributes->device = -1; + attributes->isManaged = 0; + attributes->allocationFlags = 0; + } + + return hipSuccess; } From da7593ae4089d2d82eed69b18a365352efd2224d Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 24 May 2018 12:01:44 -0400 Subject: [PATCH 059/282] P4 to Git Change 1559149 by skudchad@skudchad_test2_win_opencl on 2018/05/24 11:54:02 SWDEV-145570 - [HIP] - Implement hipMemcpy2DToArray. ReviewBoardURL = http://ocltc.amd.com/reviews/r/14953/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#29 edit --- api/hip/hip_memory.cpp | 130 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 118 insertions(+), 12 deletions(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 04e3dc3c3a..90c060d25b 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -694,22 +694,18 @@ hipError_t ihipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch } } - amd::Coord3D srcStart(srcRect.start_, 0, 0); - amd::Coord3D dstStart(dstRect.start_, 0, 0); - amd::Coord3D srcEnd(srcRect.end_, 1, 1); - amd::Coord3D dstEnd(dstRect.end_, 1, 1); amd::Coord3D size(region[0], region[1], region[2]); if (!srcRect.create(sOrigin, region, spitch, src_slice_pitch) || !dstRect.create(dOrigin, region, dpitch, dst_slice_pitch)) { return hipErrorInvalidValue; } -/* - if (((srcPtr != nullptr) && (!srcPtr->asBuffer()->validateRegion(srcStart, srcEnd))) || - ((srcPtr != nullptr) && (!dstPtr->asBuffer()->validateRegion(dstStart, dstEnd)))) { - return hipErrorInvalidValue; - } -*/ + + amd::Coord3D srcStart(srcRect.start_, 0, 0); + amd::Coord3D dstStart(dstRect.start_, 0, 0); + amd::Coord3D srcEnd(srcRect.end_, 1, 1); + amd::Coord3D dstEnd(dstRect.end_, 1, 1); + amd::Command* command = nullptr; amd::Command::EventWaitList waitList; switch (kind) { @@ -783,9 +779,119 @@ hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, con size_t spitch, size_t width, size_t height, hipMemcpyKind kind) { HIP_INIT_API(dst, wOffset, hOffset, src, spitch, width, height, kind); - assert(0 && "Unimplemented"); + if (dst->data == nullptr) { + return hipErrorUnknown; + } + + hip::syncStreams(); + amd::HostQueue* queue = hip::getNullStream(); + + size_t dpitch = dst->width; + + switch (dst[0].desc.f) { + case hipChannelFormatKindSigned: + dpitch *= sizeof(int); + break; + case hipChannelFormatKindUnsigned: + dpitch *= sizeof(unsigned int); + break; + case hipChannelFormatKindFloat: + dpitch *= sizeof(float); + break; + case hipChannelFormatKindNone: + dpitch *= sizeof(size_t); + break; + default: + dpitch *= 1; + break; + } + + if ((wOffset + width > (dpitch)) || width > spitch) { + return hipErrorUnknown; + } + + // Create buffer rectangle info structure + amd::BufferRect srcRect; + amd::BufferRect dstRect; + + size_t region[3] = {width, height, 1}; + size_t src_slice_pitch = spitch * height; + size_t dst_slice_pitch = dpitch * height; + size_t sOrigin[3] = { }; + size_t dOrigin[3] = {wOffset, hOffset, 0}; + size_t sz = 0; + amd::Memory* srcPtr = getMemoryObject(src, sz); + amd::Memory* dstPtr = getMemoryObject(dst->data, sz); + + if (kind == hipMemcpyDefault) { + // Determine kind on VA + if (srcPtr == nullptr && dstPtr != nullptr) { + kind = hipMemcpyHostToDevice; + } else if (srcPtr != nullptr && dstPtr == nullptr) { + kind = hipMemcpyDeviceToHost; + } else if (srcPtr != nullptr && dstPtr != nullptr) { + kind = hipMemcpyDeviceToDevice; + } else { + kind = hipMemcpyHostToHost; + } + } + + amd::Coord3D size(region[0], region[1], region[2]); + + if (!srcRect.create(sOrigin, region, spitch, src_slice_pitch) || + !dstRect.create(dOrigin, region, dpitch, dst_slice_pitch)) { + return hipErrorInvalidValue; + } + + amd::Coord3D srcStart(srcRect.start_, 0, 0); + amd::Coord3D dstStart(dstRect.start_, 0, 0); + amd::Coord3D srcEnd(srcRect.end_, 1, 1); + amd::Coord3D dstEnd(dstRect.end_, 1, 1); + + amd::Command* command = nullptr; + amd::Command::EventWaitList waitList; + + void* newDst = nullptr; + + switch (kind) { + case hipMemcpyDeviceToHost: + command = new amd::ReadMemoryCommand(*queue, CL_COMMAND_READ_BUFFER_RECT, waitList, + *srcPtr->asBuffer(), srcStart, size, dst->data, srcRect, dstRect); + break; + case hipMemcpyHostToDevice: + command = new amd::WriteMemoryCommand(*queue, CL_COMMAND_WRITE_BUFFER_RECT, waitList, + *dstPtr->asBuffer(), dstStart, size, src, dstRect, srcRect); + break; + case hipMemcpyDeviceToDevice: + command = new amd::CopyMemoryCommand(*queue, CL_COMMAND_COPY_BUFFER_RECT, waitList, *srcPtr->asBuffer(), + *dstPtr->asBuffer(), srcStart, dstStart, size, srcRect, dstRect); + break; + case hipMemcpyHostToHost: + newDst = reinterpret_cast(reinterpret_cast(dst->data) + + dpitch * hOffset + wOffset); + for(unsigned int y = 0; y < height; y++) { + void* pDst = reinterpret_cast(reinterpret_cast(newDst) + y * dpitch); + void* pSrc = reinterpret_cast(reinterpret_cast(src) + y * spitch); + memcpy(pDst, pSrc, width); + } + return hipSuccess; + default: + assert(!"Shouldn't reach here"); + break; + } + + if (command == nullptr) { + return hipErrorOutOfMemory; + } + + command->enqueue(); + + command->awaitCompletion(); + + command->release(); + + return hipSuccess; - return hipErrorUnknown; } hipError_t hipMemcpyToArray(hipArray* dstArray, size_t wOffset, size_t hOffset, const void* src, From cadb15165e6c4e418f0ccb232ba19f73639b230f Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 24 May 2018 18:12:49 -0400 Subject: [PATCH 060/282] P4 to Git Change 1559366 by gandryey@gera-w8 on 2018/05/24 18:06:45 SWDEV-79445 - OCL generic changes and code clean-up - Combine validateMemory() and arguments capture() under a single function. Rename validateMemory() in NDRangeKernelCommand class to captureAndValidate() http://ocltc.amd.com/reviews/r/14964/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#10 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_execute.cpp#26 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/command.cpp#87 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/command.hpp#90 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/kernel.cpp#28 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/kernel.hpp#22 edit --- api/hip/hip_module.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index 0feefad7a3..435fe53af8 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -190,8 +190,8 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, return hipErrorOutOfMemory; } - // Make sure we have memory for the command execution - if (CL_SUCCESS != command->validateMemory()) { + // Capture the kernel arguments + if (CL_SUCCESS != command->captureAndValidate()) { delete command; return hipErrorMemoryAllocation; } From bad00db6ea16cc143b15fcce109fdbef8661d62f Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 31 May 2018 18:18:00 -0400 Subject: [PATCH 061/282] P4 to Git Change 1562507 by skudchad@skudchad_test2_win_opencl on 2018/05/31 18:10:39 SWDEV-145570 - [HIP] - Fix HIP build Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_stream.cpp#11 edit --- api/hip/hip_stream.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/api/hip/hip_stream.cpp b/api/hip/hip_stream.cpp index 9b5efde29c..c48eca87ae 100644 --- a/api/hip/hip_stream.cpp +++ b/api/hip/hip_stream.cpp @@ -144,8 +144,7 @@ hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int cl_event clEvent = as_cl(e->event_); amd::Command::EventWaitList eventWaitList; - cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue->context(), 1, - &clEvent); + cl_int err = amd::clSetEventWaitList(eventWaitList, *hostQueue, 1, &clEvent); if (err != CL_SUCCESS) { return hipErrorUnknown; } From 9a59ddeed408d73cac7756edccec0980339fbea5 Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 1 Jun 2018 15:01:45 -0400 Subject: [PATCH 062/282] P4 to Git Change 1562965 by skudchad@skudchad_test2_win_opencl on 2018/06/01 14:48:31 SWDEV-145570 - [HIP] - Implement hipHostRegister/Unregister, hipHostAlloc. ReviewBoardURL = http://ocltc.amd.com/reviews/r/15041/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#12 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#30 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_texture.cpp#8 edit --- api/hip/hip_internal.hpp | 2 ++ api/hip/hip_memory.cpp | 40 ++++++++++++++++++++++++++++++++++------ api/hip/hip_texture.cpp | 11 +++++------ 3 files changed, 41 insertions(+), 12 deletions(-) diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index 197e607597..e82cb13a2a 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -62,5 +62,7 @@ namespace hip { }; extern std::vector g_devices; extern hipError_t ihipDeviceGetCount(int* count); +extern amd::Memory* getMemoryObject(const void* ptr, size_t& offset); + #endif // HIP_SRC_HIP_INTERNAL_H diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 90c060d25b..922b8abe8f 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -37,7 +37,7 @@ extern void getDrvChannelOrderAndType(const enum hipArray_Format Format, cl_channel_type* channelType); inline amd::Memory* getMemoryObject(const void* ptr, size_t& offset) { - amd::Memory *memObj = amd::SvmManager::FindSvmBuffer(ptr); + amd::Memory *memObj = amd::MemObjMap::FindMemObj(ptr); if (memObj != nullptr) { offset = reinterpret_cast(ptr) - reinterpret_cast(memObj->getSvmPtr()); } @@ -500,20 +500,48 @@ hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) { hipError_t hipHostRegister(void* hostPtr, size_t sizeBytes, unsigned int flags) { HIP_INIT_API(hostPtr, sizeBytes, flags); + if(hostPtr != nullptr) { + amd::Context *amdContext = hip::getCurrentContext(); + amd::Memory* mem = new (*amdContext) amd::Buffer(*amdContext, CL_MEM_USE_HOST_PTR, sizeBytes); - assert(0 && "Unimplemented"); - - return hipErrorUnknown; + if (!mem->create(hostPtr)) { + mem->release(); + return hipErrorMemoryAllocation; + } + amd::MemObjMap::AddMemObj(hostPtr, mem); + return hipSuccess; + } else { + return ihipMalloc(&hostPtr, sizeBytes, flags); + } } hipError_t hipHostUnregister(void* hostPtr) { HIP_INIT_API(hostPtr); - assert(0 && "Unimplemented"); + if (amd::SvmBuffer::malloced(hostPtr)) { + hip::syncStreams(); + hip::getNullStream()->finish(); + amd::SvmBuffer::free(*hip::getCurrentContext(), hostPtr); + return hipSuccess; + } else { + size_t offset = 0; + amd::Memory* mem = getMemoryObject(hostPtr, offset); - return hipErrorUnknown; + if(mem) { + mem->release(); + return hipSuccess; + } + } + + return hipErrorInvalidValue; } +// Deprecated function: +hipError_t hipHostAlloc(void** ptr, size_t sizeBytes, unsigned int flags) { + return ihipMalloc(ptr, sizeBytes, flags); +}; + + hipError_t hipMemcpyToSymbol(const void* symbolName, const void* src, size_t count, size_t offset, hipMemcpyKind kind) { HIP_INIT_API(symbolName, src, count, offset, kind); diff --git a/api/hip/hip_texture.cpp b/api/hip/hip_texture.cpp index e619065f02..f8bdbe1c89 100644 --- a/api/hip/hip_texture.cpp +++ b/api/hip/hip_texture.cpp @@ -157,11 +157,11 @@ hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, const hipResou const amd::Image::Format imageFormat(image_format); amd::Memory* memory = nullptr; - + size_t offset = 0; switch (pResDesc->resType) { case hipResourceTypeArray: { - memory = amd::SvmManager::FindSvmBuffer(pResDesc->res.array.array->data); + memory = getMemoryObject(pResDesc->res.array.array->data, offset); getChannelOrderAndType(pResDesc->res.array.array->desc, pTexDesc->readMode, &image_format.image_channel_order, &image_format.image_channel_data_type); @@ -187,8 +187,8 @@ hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, const hipResou break; case hipResourceTypeLinear: assert(pResViewDesc == nullptr); + memory = getMemoryObject(pResDesc->res.linear.devPtr, offset); - memory = amd::SvmManager::FindSvmBuffer(pResDesc->res.linear.devPtr); image = new (*hip::getCurrentContext()) amd::Image(*memory->asBuffer(), CL_MEM_OBJECT_IMAGE1D, memory->getMemFlags(), imageFormat, pResDesc->res.linear.sizeInBytes / imageFormat.getElementSize(), 1, 1, @@ -196,8 +196,8 @@ hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, const hipResou break; case hipResourceTypePitch2D: assert(pResViewDesc == nullptr); + memory = getMemoryObject(pResDesc->res.pitch2D.devPtr, offset); - memory = amd::SvmManager::FindSvmBuffer(pResDesc->res.pitch2D.devPtr); image = new (*hip::getCurrentContext()) amd::Image(*memory->asBuffer(), CL_MEM_OBJECT_IMAGE2D, memory->getMemFlags(), imageFormat, pResDesc->res.pitch2D.width, pResDesc->res.pitch2D.height, 1, @@ -263,8 +263,7 @@ hipError_t ihipBindTexture(cl_mem_object_type type, &image_format.image_channel_order, &image_format.image_channel_data_type); } const amd::Image::Format imageFormat(image_format); - - amd::Memory* memory = amd::SvmManager::FindSvmBuffer(devPtr); + amd::Memory* memory = getMemoryObject(devPtr, *offset); amd::Image* image = new (*hip::getCurrentContext()) amd::Image(*memory->asBuffer(), type, memory->getMemFlags(), imageFormat, width, height, 1, pitch, 0); From 2f1c2532316859e5a5df1ec7a3076dd2b97701fa Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 5 Jun 2018 19:43:49 -0400 Subject: [PATCH 063/282] P4 to Git Change 1564420 by skudchad@skudchad_rocm on 2018/06/05 19:38:52 SWDEV-145570 - [HIP] - Find memobj in correct class Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#31 edit --- api/hip/hip_memory.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 922b8abe8f..4d8e91250f 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -1302,7 +1302,7 @@ hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, hipChannel hipError_t hipHostGetDevicePointer(void** devicePointer, void* hostPointer, unsigned flags) { HIP_INIT_API(devicePointer, hostPointer, flags); - if (!amd::SvmBuffer::malloced(hostPointer)) { + if (!amd::MemObjMap::FindMemObj(hostPointer)) { return hipErrorInvalidValue; } // right now we have SVM From 2568c8042f55b43132e1f9261744910f382e6c4a Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 12 Jun 2018 12:11:19 -0400 Subject: [PATCH 064/282] P4 to Git Change 1567203 by skudchad@skudchad_test2_win_opencl on 2018/06/12 12:01:43 SWDEV-145570 - [HIP] - Since getMemoryObject is now used in hip_texture it shouldnt be inline Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#32 edit --- api/hip/hip_memory.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 4d8e91250f..cd4c60c319 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -36,7 +36,7 @@ extern void getDrvChannelOrderAndType(const enum hipArray_Format Format, cl_channel_order* channelOrder, cl_channel_type* channelType); -inline amd::Memory* getMemoryObject(const void* ptr, size_t& offset) { +amd::Memory* getMemoryObject(const void* ptr, size_t& offset) { amd::Memory *memObj = amd::MemObjMap::FindMemObj(ptr); if (memObj != nullptr) { offset = reinterpret_cast(ptr) - reinterpret_cast(memObj->getSvmPtr()); From 7330137b0b1042e7e67f1f42f51225b6fbf2c0a5 Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 13 Jun 2018 13:36:52 -0400 Subject: [PATCH 065/282] P4 to Git Change 1567831 by skudchad@skudchad_test2_win_opencl on 2018/06/13 13:20:10 SWDEV-145570 - [HIP] - Implement hipMemset3D ReviewBoardURL = http://ocltc.amd.com/reviews/r/15164/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.def.in#5 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.map.in#5 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#33 edit --- api/hip/hip_hcc.def.in | 1 + api/hip/hip_hcc.map.in | 1 + api/hip/hip_memory.cpp | 12 ++++++++++++ 3 files changed, 14 insertions(+) diff --git a/api/hip/hip_hcc.def.in b/api/hip/hip_hcc.def.in index d2916f3885..bb9c179d3b 100644 --- a/api/hip/hip_hcc.def.in +++ b/api/hip/hip_hcc.def.in @@ -100,6 +100,7 @@ hipMemset2D hipMemsetAsync hipMemset2DAsync hipMemsetD8 +hipMemset3D hipModuleGetFunction hipModuleGetGlobal hipModuleLaunchKernel diff --git a/api/hip/hip_hcc.map.in b/api/hip/hip_hcc.map.in index 761641425e..452f260534 100644 --- a/api/hip/hip_hcc.map.in +++ b/api/hip/hip_hcc.map.in @@ -101,6 +101,7 @@ global: hipMemsetAsync; hipMemset2DAsync; hipMemsetD8; + hipMemset3D; hipModuleGetFunction; hipModuleGetGlobal; hipModuleLaunchKernel; diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index cd4c60c319..376b2b155f 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -363,6 +363,18 @@ hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent) { return status; } +hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent) { + HIP_INIT_API(pitchedDevPtr, value, &extent); + + void *dst = &pitchedDevPtr.ptr; + size_t sizeBytes = pitchedDevPtr.pitch * extent.height * extent.depth; + + hip::syncStreams(); + amd::HostQueue* queue = hip::getNullStream(); + + return ihipMemset(&dst, value, sizeBytes, *queue); +} + hipError_t hipArrayCreate(hipArray** array, const HIP_ARRAY_DESCRIPTOR* pAllocateArray) { HIP_INIT_API(array, pAllocateArray); From 3f0b25f6bf5239e4537630d835dc0c697dd62b75 Mon Sep 17 00:00:00 2001 From: foreman Date: Mon, 25 Jun 2018 13:29:56 -0400 Subject: [PATCH 066/282] P4 to Git Change 1572348 by skudchad@skudchad_test2_win_opencl on 2018/06/25 13:21:41 SWDEV-145570 - [HIP] - Fix hipHostGetDevicePointer. hipHostRegister tests now passes ReviewBoardURL = http://ocltc.amd.com/reviews/r/15261/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#34 edit --- api/hip/hip_memory.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 376b2b155f..424fbec633 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -38,7 +38,7 @@ extern void getDrvChannelOrderAndType(const enum hipArray_Format Format, amd::Memory* getMemoryObject(const void* ptr, size_t& offset) { amd::Memory *memObj = amd::MemObjMap::FindMemObj(ptr); - if (memObj != nullptr) { + if (memObj != nullptr && memObj->getSvmPtr() != nullptr) { offset = reinterpret_cast(ptr) - reinterpret_cast(memObj->getSvmPtr()); } return memObj; @@ -1314,11 +1314,13 @@ hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, hipChannel hipError_t hipHostGetDevicePointer(void** devicePointer, void* hostPointer, unsigned flags) { HIP_INIT_API(devicePointer, hostPointer, flags); - if (!amd::MemObjMap::FindMemObj(hostPointer)) { + size_t offset = 0; + + amd::Memory* memObj = getMemoryObject(hostPointer, offset); + if (!memObj) { return hipErrorInvalidValue; } - // right now we have SVM - *devicePointer = hostPointer; + *devicePointer = reinterpret_cast(memObj->getDeviceMemory(*hip::getCurrentContext()->devices()[0])->virtualAddress() + offset); return hipSuccess; } From 3564a8ddf97e9c15611f4fcd11db181e42ae79e2 Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 29 Jun 2018 01:59:22 -0400 Subject: [PATCH 067/282] P4 to Git Change 1574251 by skudchad@skudchad_rocm on 2018/06/29 01:49:57 SWDEV-145570 - [HIP] Fix hipHostUnregister. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#35 edit --- api/hip/hip_memory.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 424fbec633..98c4c50e03 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -540,6 +540,9 @@ hipError_t hipHostUnregister(void* hostPtr) { amd::Memory* mem = getMemoryObject(hostPtr, offset); if(mem) { + hip::syncStreams(); + hip::getNullStream()->finish(); + amd::MemObjMap::RemoveMemObj(hostPtr); mem->release(); return hipSuccess; } From e143868b47f65d9a5f114c32f9c6434cebc86705 Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 5 Jul 2018 20:29:27 -0400 Subject: [PATCH 068/282] P4 to Git Change 1576872 by skudchad@skudchad_rocm on 2018/07/05 20:20:21 SWDEV-145570 - [HIP] - Add hipMalloc3D to linker symtable. - Add structure for hipFuncGetAttributes Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.def.in#6 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.map.in#6 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#11 edit --- api/hip/hip_hcc.def.in | 2 ++ api/hip/hip_hcc.map.in | 39 +++++++++++++++++++++------------------ api/hip/hip_module.cpp | 7 +++++++ 3 files changed, 30 insertions(+), 18 deletions(-) diff --git a/api/hip/hip_hcc.def.in b/api/hip/hip_hcc.def.in index bb9c179d3b..81f51c34f7 100644 --- a/api/hip/hip_hcc.def.in +++ b/api/hip/hip_hcc.def.in @@ -68,6 +68,7 @@ hipIpcCloseMemHandle hipIpcGetMemHandle hipIpcOpenMemHandle hipMalloc +hipMalloc3D hipMalloc3DArray hipMallocArray hipMallocHost @@ -108,6 +109,7 @@ hipModuleLoad hipModuleLoadData hipModuleLoadDataEx hipModuleUnload +hipFuncGetAttributes hipPeekAtLastError hipPointerGetAttributes hipProfilerStart diff --git a/api/hip/hip_hcc.map.in b/api/hip/hip_hcc.map.in index 452f260534..4114cfe1d9 100644 --- a/api/hip/hip_hcc.map.in +++ b/api/hip/hip_hcc.map.in @@ -69,6 +69,7 @@ global: hipIpcGetMemHandle; hipIpcOpenMemHandle; hipMalloc; + hipMalloc3D; hipMalloc3DArray; hipMallocArray; hipMallocHost; @@ -109,6 +110,7 @@ global: hipModuleLoadData; hipModuleLoadDataEx; hipModuleUnload; + hipFuncGetAttributes; hipPeekAtLastError; hipPointerGetAttributes; hipProfilerStart; @@ -137,25 +139,26 @@ global: hip_impl::hipLaunchKernelGGLImpl*; hipCreateTextureObject*; hipDestroyTextureObject*; - hipGetTextureObjectResourceDesc; - hipGetTextureObjectResourceViewDesc; - hipGetTextureObjectTextureDesc; - hipBindTexture; - hipBindTexture2D; - hipBindTextureToArray; - hipBindTextureToMipmappedArray; - hipUnbindTexture; - hipGetChannelDesc; - hipGetTextureAlignmentOffset; - hipGetTextureReference; - hipTexRefSetFormat; - hipTexRefSetFlags; - hipTexRefSetFilterMode; - hipTexRefSetAddressMode; - hipTexRefSetArray; - hipTexRefSetAddress; - hipTexRefSetAddress2D; + hipGetTextureObjectResourceDesc*; + hipGetTextureObjectResourceViewDesc*; + hipGetTextureObjectTextureDesc*; + hipBindTexture*; + hipBindTexture2D*; + hipBindTextureToArray*; + hipBindTextureToMipmappedArray*; + hipUnbindTexture*; + hipGetChannelDesc*; + hipGetTextureAlignmentOffset*; + hipGetTextureReference*; + hipTexRefSetFormat*; + hipTexRefSetFlags*; + hipTexRefSetFilterMode*; + hipTexRefSetAddressMode*; + hipTexRefSetArray*; + hipTexRefSetAddress*; + hipTexRefSetAddress2D*; hipCreateChannelDesc*; + ihipBindTextureImpl*; ihipBindTextureToArrayImpl*; hipHccGetAccelerator*; hipHccGetAcceleratorView*; diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index 435fe53af8..6831711a9a 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -130,6 +130,13 @@ hipError_t hipModuleGetFunction(hipFunction_t *hfunc, hipModule_t hmod, const ch return hipSuccess; } +hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func) +{ + HIP_INIT_API(attr, func); + + return hipErrorInvalidDeviceFunction; +} + hipError_t hipModuleLaunchKernel(hipFunction_t f, uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ, uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ, From f2ea9a1a8b428aefa913ca3e94a2928cdb5aa63a Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 6 Jul 2018 15:13:22 -0400 Subject: [PATCH 069/282] P4 to Git Change 1577282 by skudchad@skudchad_test2_win_opencl on 2018/07/06 15:02:32 SWDEV-145570 - [HIP] Fix hipMalloc3D ReviewBoardURL = http://ocltc.amd.com/reviews/r/15358/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#36 edit --- api/hip/hip_memory.cpp | 60 +++++++++++++++++++++++++++--------------- 1 file changed, 39 insertions(+), 21 deletions(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 98c4c50e03..0ad00cea78 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -366,13 +366,13 @@ hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent) { hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent) { HIP_INIT_API(pitchedDevPtr, value, &extent); - void *dst = &pitchedDevPtr.ptr; + void *dst = pitchedDevPtr.ptr; size_t sizeBytes = pitchedDevPtr.pitch * extent.height * extent.depth; hip::syncStreams(); amd::HostQueue* queue = hip::getNullStream(); - return ihipMemset(&dst, value, sizeBytes, *queue); + return ihipMemset(dst, value, sizeBytes, *queue); } hipError_t hipArrayCreate(hipArray** array, const HIP_ARRAY_DESCRIPTOR* pAllocateArray) { @@ -1129,9 +1129,9 @@ hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) { amd::BufferRect srcRect; amd::BufferRect dstRect; size_t offset = 0; - amd::Memory* src = getMemoryObject(srcPtr, offset); + amd::Memory* srcMemory = getMemoryObject(srcPtr, offset); assert(offset == 0); - amd::Memory* dst = getMemoryObject(dstPtr, offset); + amd::Memory* dstMemory = getMemoryObject(dstPtr, offset); assert(offset == 0); size_t src_slice_pitch = srcPitchInBytes * p->srcHeight; @@ -1147,28 +1147,46 @@ hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) { amd::Coord3D srcEnd(srcRect.end_, 1, 1); amd::Coord3D dstEnd(dstRect.end_, 1, 1); - if (!src->asBuffer()->validateRegion(srcStart, srcEnd) || - !dst->asBuffer()->validateRegion(dstStart, dstEnd)) { - return hipErrorInvalidValue; - } - - // Check if regions overlap each other - if ((src->asBuffer() == dst->asBuffer()) && - (std::abs(static_cast(srcOrigin[0]) - static_cast(dstOrigin[0])) < - static_cast(region[0])) && - (std::abs(static_cast(srcOrigin[1]) - static_cast(dstOrigin[1])) < - static_cast(region[1])) && - (std::abs(static_cast(srcOrigin[2]) - static_cast(dstOrigin[2])) < - static_cast(region[2]))) { - return hipErrorUnknown; + hipMemcpyKind kind = p->kind; + + if (kind == hipMemcpyDefault) { + // Determine kind on VA + if (srcMemory == nullptr && dstMemory != nullptr) { + kind = hipMemcpyHostToDevice; + } else if (srcMemory != nullptr && dstMemory == nullptr) { + kind = hipMemcpyDeviceToHost; + } else if (srcMemory != nullptr && dstMemory != nullptr) { + kind = hipMemcpyDeviceToDevice; + } else { + kind = hipMemcpyHostToHost; + } } + amd::Command* command = nullptr; amd::Command::EventWaitList waitList; amd::Coord3D size(region[0], region[1], region[2]); - amd::CopyMemoryCommand* command = - new amd::CopyMemoryCommand(*queue, CL_COMMAND_COPY_BUFFER_RECT, waitList, *src->asBuffer(), - *dst->asBuffer(), srcStart, dstStart, size, srcRect, dstRect); + switch (kind) { + case hipMemcpyDeviceToHost: + command = new amd::ReadMemoryCommand(*queue, CL_COMMAND_READ_BUFFER_RECT, waitList, + *srcMemory->asBuffer(), srcStart, size, dstPtr, srcRect, dstRect); + break; + case hipMemcpyHostToDevice: + command = new amd::WriteMemoryCommand(*queue, CL_COMMAND_WRITE_BUFFER_RECT, waitList, + *dstMemory->asBuffer(), srcStart, size, srcPtr, srcRect, dstRect); + break; + case hipMemcpyDeviceToDevice: + command = new amd::CopyMemoryCommand(*queue, CL_COMMAND_COPY_BUFFER_RECT, waitList, + *srcMemory->asBuffer(),*dstMemory->asBuffer(), srcStart, dstStart, size, + srcRect, dstRect); + break; + case hipMemcpyHostToHost: + memcpy(dstPtr, srcPtr, region[0] * region[1] * region[2]); + return hipSuccess; + default: + assert(!"Shouldn't reach here"); + break; + } if (command == nullptr) { return hipErrorOutOfMemory; From 1f02d5c817c94a8d67d85ed6dc77334544165414 Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 10 Jul 2018 19:21:07 -0400 Subject: [PATCH 070/282] P4 to Git Change 1578574 by skudchad@skudchad_rocm on 2018/07/10 19:15:59 SWDEV-145570 - [HIP] - g_arguments isnt a double pointer Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#12 edit --- api/hip/hip_platform.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index 4ffcc3da87..3eddb7824d 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -171,7 +171,7 @@ extern "C" hipError_t hipConfigureCall( return hipSuccess; } -char* g_arguments[1024]; // FIXME: needs to grow +char g_arguments[1024]; // FIXME: needs to grow extern "C" hipError_t hipSetupArgument( const void *arg, From 7006bdb0b86759b97ee67e369890bca17fddb8ee Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 18 Jul 2018 17:51:20 -0400 Subject: [PATCH 071/282] P4 to Git Change 1582432 by cpaquot@cpaquot-ocl-lc-lnx on 2018/07/18 17:16:05 SWDEV-145570 - [HIP] Fixed __hipRegisterVariable to __hipRegisterVar Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.def.in#7 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.map.in#7 edit --- api/hip/hip_hcc.def.in | 2 +- api/hip/hip_hcc.map.in | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/api/hip/hip_hcc.def.in b/api/hip/hip_hcc.def.in index 81f51c34f7..478b4a277c 100644 --- a/api/hip/hip_hcc.def.in +++ b/api/hip/hip_hcc.def.in @@ -127,7 +127,7 @@ hipStreamSynchronize hipStreamWaitEvent __hipRegisterFatBinary __hipRegisterFunction -__hipRegisterVariable +__hipRegisterVar __hipUnregisterFatBinary hipConfigureCall hipSetupArgument diff --git a/api/hip/hip_hcc.map.in b/api/hip/hip_hcc.map.in index 4114cfe1d9..25e9844e94 100644 --- a/api/hip/hip_hcc.map.in +++ b/api/hip/hip_hcc.map.in @@ -128,7 +128,7 @@ global: hipStreamWaitEvent; __hipRegisterFatBinary; __hipRegisterFunction; - __hipRegisterVariable; + __hipRegisterVar; __hipUnregisterFatBinary; hipConfigureCall; hipSetupArgument; From 1b054948e38b8b7e919c5ee1c0b655bfafb2cc9b Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 24 Jul 2018 17:14:35 -0400 Subject: [PATCH 072/282] P4 to Git Change 1584863 by yaxunl@yaxunl-lc10 on 2018/07/24 17:00:42 SWDEV-145570 - [HIP] - Update prototype of __hipRegisterVar Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#13 edit --- api/hip/hip_platform.cpp | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index 3eddb7824d..6a02bbaff4 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -128,15 +128,20 @@ extern "C" void __hipRegisterFunction( g_functions.insert(std::make_pair(hostFunction, reinterpret_cast(as_cl(kernel)))); } +// Registers a device-side global variable. +// For each global variable in device code, there is a corresponding shadow +// global variable in host code. The shadow host variable is used to keep +// track of the value of the device side global variable between kernel +// executions. extern "C" void __hipRegisterVar( - hipModule_t module, - char* hostVar, - char* deviceVar, - const char* deviceName, - int ext, - int size, - int constant, - int global) + hipModule_t modules, // The device modules containing code object + char* var, // The shadow variable in host code + char* hostVar, // Variable name in host code + char* deviceVar, // Variable name in device code + int ext, // Whether this variable is external + int size, // Size of the variable + int constant, // Whether this variable is constant + int global) // Unknown, always 0 { HIP_INIT(); } From 09b46cddf53bb6f510054b2642d36e21616fe34f Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 31 Jul 2018 15:07:56 -0400 Subject: [PATCH 073/282] P4 to Git Change 1587799 by vsytchen@vsytchen-win10 on 2018/07/31 14:47:31 SWDEV-145570 - [HIP] refactor hipMemcpy* functions to correctly handle copies using prepinned memory The current implementation of hipMemcpy functions picks the copy type based on a flag that the user passes. However, one can use the hipMemcpyHostToDevice/hipMemcpyDeviceToHost flag in a combination with prepinned memory. By using the WriteMemoryCommand/ReadMemoyCommand in this case, we will pin the same host memory twice. This is fine on PAL/Linux, since pinning the same VA range is a noop, but this will start failing once we switch to using device memory with HIP/VDI/HSA. The solution is to ignore the hipMemcpyKind flag and let the runtime decide what kind of copy is best to do. Except for the case when hipMemcpyHostToHost is passed, since both host pointers may be prepinned. ReviewBoardURL = http://ocltc.amd.com/reviews/r/15482/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#37 edit --- api/hip/hip_memory.cpp | 340 +++++++++++++++++------------------------ 1 file changed, 140 insertions(+), 200 deletions(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 0ad00cea78..7e74820852 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -70,47 +70,27 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin amd::HostQueue& queue, bool isAsync = false) { amd::Command* command = nullptr; amd::Command::EventWaitList waitList; + size_t sOffset = 0; amd::Memory *srcMemory = getMemoryObject(src, sOffset); size_t dOffset = 0; amd::Memory *dstMemory = getMemoryObject(dst, dOffset); - amd::Coord3D srcOffset(sOffset, 0, 0); - amd::Coord3D dstOffset(dOffset, 0, 0); - - if (kind == hipMemcpyDefault) { - // Determine kind on VA - if (srcMemory == nullptr && dstMemory != nullptr) { - kind = hipMemcpyHostToDevice; - } else if (srcMemory != nullptr && dstMemory == nullptr) { - kind = hipMemcpyDeviceToHost; - } else if (srcMemory != nullptr && dstMemory != nullptr) { - kind = hipMemcpyDeviceToDevice; - } else { - kind = hipMemcpyHostToHost; - } - } - - switch (kind) { - case hipMemcpyDeviceToHost: - command = new amd::ReadMemoryCommand(queue, CL_COMMAND_READ_BUFFER, waitList, - *srcMemory->asBuffer(), srcOffset, sizeBytes, dst); - break; - case hipMemcpyHostToDevice: - command = new amd::WriteMemoryCommand(queue, CL_COMMAND_WRITE_BUFFER, waitList, - *dstMemory->asBuffer(), dstOffset, sizeBytes, src); - break; - case hipMemcpyDeviceToDevice: - command = new amd::CopyMemoryCommand(queue, CL_COMMAND_COPY_BUFFER, waitList, - *srcMemory->asBuffer(),*dstMemory->asBuffer(), srcOffset, dstOffset, sizeBytes); - break; - case hipMemcpyHostToHost: + if (((srcMemory == nullptr) && (dstMemory == nullptr)) || + (kind == hipMemcpyHostToHost)) { memcpy(dst, src, sizeBytes); return hipSuccess; - default: - assert(!"Shouldn't reach here"); - break; + } else if ((srcMemory == nullptr) && (dstMemory != nullptr)) { + command = new amd::WriteMemoryCommand(queue, CL_COMMAND_WRITE_BUFFER, waitList, + *dstMemory->asBuffer(), dOffset, sizeBytes, src); + } else if ((srcMemory != nullptr) && (dstMemory == nullptr)) { + command = new amd::ReadMemoryCommand(queue, CL_COMMAND_READ_BUFFER, waitList, + *srcMemory->asBuffer(), sOffset, sizeBytes, dst); + } else if ((srcMemory != nullptr) && (dstMemory != nullptr)) { + command = new amd::CopyMemoryCommand(queue, CL_COMMAND_COPY_BUFFER, waitList, + *srcMemory->asBuffer(),*dstMemory->asBuffer(), sOffset, dOffset, sizeBytes); } + if (command == nullptr) { return hipErrorOutOfMemory; } @@ -296,7 +276,7 @@ hipError_t hipMemGetInfo(size_t* free, size_t* total) { *free = freeMemory[0]; *total = device->info().globalMemSize_; -return hipSuccess; + return hipSuccess; } hipError_t ihipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height, size_t depth, @@ -392,7 +372,7 @@ hipError_t hipArrayCreate(hipArray** array, const HIP_ARRAY_DESCRIPTOR* pAllocat cl_channel_order channelOrder; cl_channel_type channelType; - getDrvChannelOrderAndType(pAllocateArray->format, pAllocateArray->numChannels, + getDrvChannelOrderAndType(pAllocateArray->format, pAllocateArray->numChannels, &channelOrder, &channelType); const cl_image_format image_format = { channelOrder, channelType }; @@ -710,8 +690,8 @@ hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy) { } hipError_t ihipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, - size_t height, hipMemcpyKind kind, amd::HostQueue& queue, - bool isAsync = false) { + size_t height, hipMemcpyKind kind, amd::HostQueue& queue, + bool isAsync = false) { // Create buffer rectangle info structure amd::BufferRect srcRect; amd::BufferRect dstRect; @@ -721,59 +701,38 @@ hipError_t ihipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch size_t dst_slice_pitch = dpitch * height; size_t sOrigin[3] = { }; size_t dOrigin[3] = { }; - amd::Memory* srcPtr = getMemoryObject(src, sOrigin[0]); - amd::Memory* dstPtr = getMemoryObject(dst, dOrigin[0]); - - if (kind == hipMemcpyDefault) { - // Determine kind on VA - if (srcPtr == nullptr && dstPtr != nullptr) { - kind = hipMemcpyHostToDevice; - } else if (srcPtr != nullptr && dstPtr == nullptr) { - kind = hipMemcpyDeviceToHost; - } else if (srcPtr != nullptr && dstPtr != nullptr) { - kind = hipMemcpyDeviceToDevice; - } else { - kind = hipMemcpyHostToHost; - } - } - - amd::Coord3D size(region[0], region[1], region[2]); + amd::Memory* srcMemory = getMemoryObject(src, sOrigin[0]); + amd::Memory* dstMemory = getMemoryObject(dst, dOrigin[0]); if (!srcRect.create(sOrigin, region, spitch, src_slice_pitch) || !dstRect.create(dOrigin, region, dpitch, dst_slice_pitch)) { return hipErrorInvalidValue; } - amd::Coord3D srcStart(srcRect.start_, 0, 0); - amd::Coord3D dstStart(dstRect.start_, 0, 0); - amd::Coord3D srcEnd(srcRect.end_, 1, 1); - amd::Coord3D dstEnd(dstRect.end_, 1, 1); - amd::Command* command = nullptr; amd::Command::EventWaitList waitList; - switch (kind) { - case hipMemcpyDeviceToHost: - command = new amd::ReadMemoryCommand(queue, CL_COMMAND_READ_BUFFER_RECT, waitList, - *srcPtr->asBuffer(), srcStart, size, dst, srcRect, dstRect); - break; - case hipMemcpyHostToDevice: - command = new amd::WriteMemoryCommand(queue, CL_COMMAND_WRITE_BUFFER_RECT, waitList, - *dstPtr->asBuffer(), dstStart, size, src, dstRect, srcRect); - break; - case hipMemcpyDeviceToDevice: - command = new amd::CopyMemoryCommand(queue, CL_COMMAND_COPY_BUFFER_RECT, waitList, *srcPtr->asBuffer(), - *dstPtr->asBuffer(), srcStart, dstStart, size, srcRect, dstRect); - break; - case hipMemcpyHostToHost: + + amd::Coord3D srcStart(srcRect.start_, 0, 0); + amd::Coord3D dstStart(dstRect.start_, 0, 0); + amd::Coord3D size(region[0], region[1], region[2]); + + if (((srcMemory == nullptr) && (dstMemory == nullptr)) || + (kind == hipMemcpyHostToHost)) { for(unsigned int y = 0; y < height; y++) { void* pDst = reinterpret_cast(reinterpret_cast(dst) + y * dpitch); void* pSrc = reinterpret_cast(reinterpret_cast(src) + y * spitch); memcpy(pDst, pSrc, width); } return hipSuccess; - default: - assert(!"Shouldn't reach here"); - break; + } else if ((srcMemory == nullptr) && (dstMemory != nullptr)) { + command = new amd::WriteMemoryCommand(queue, CL_COMMAND_WRITE_BUFFER_RECT, waitList, + *dstMemory->asBuffer(), dstStart, size, src, dstRect, srcRect); + } else if ((srcMemory != nullptr) && (dstMemory == nullptr)) { + command = new amd::ReadMemoryCommand(queue, CL_COMMAND_READ_BUFFER_RECT, waitList, + *srcMemory->asBuffer(), srcStart, size, dst, srcRect, dstRect); + } else if ((srcMemory != nullptr) && (dstMemory != nullptr)) { + command = new amd::CopyMemoryCommand(queue, CL_COMMAND_COPY_BUFFER_RECT, waitList, *srcMemory->asBuffer(), + *dstMemory->asBuffer(), srcStart, dstStart, size, srcRect, dstRect); } if (command == nullptr) { @@ -781,7 +740,6 @@ hipError_t ihipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch } command->enqueue(); - if (!isAsync) { command->awaitCompletion(); } @@ -850,7 +808,7 @@ hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, con } if ((wOffset + width > (dpitch)) || width > spitch) { - return hipErrorUnknown; + return hipErrorUnknown; } // Create buffer rectangle info structure @@ -862,65 +820,43 @@ hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, con size_t dst_slice_pitch = dpitch * height; size_t sOrigin[3] = { }; size_t dOrigin[3] = {wOffset, hOffset, 0}; - size_t sz = 0; - amd::Memory* srcPtr = getMemoryObject(src, sz); - amd::Memory* dstPtr = getMemoryObject(dst->data, sz); + size_t offset = 0; + amd::Memory* srcMemory = getMemoryObject(src, offset); + amd::Memory* dstMemory = getMemoryObject(dst->data, offset); - if (kind == hipMemcpyDefault) { - // Determine kind on VA - if (srcPtr == nullptr && dstPtr != nullptr) { - kind = hipMemcpyHostToDevice; - } else if (srcPtr != nullptr && dstPtr == nullptr) { - kind = hipMemcpyDeviceToHost; - } else if (srcPtr != nullptr && dstPtr != nullptr) { - kind = hipMemcpyDeviceToDevice; - } else { - kind = hipMemcpyHostToHost; - } - } - - amd::Coord3D size(region[0], region[1], region[2]); + assert(offset == 0); if (!srcRect.create(sOrigin, region, spitch, src_slice_pitch) || !dstRect.create(dOrigin, region, dpitch, dst_slice_pitch)) { return hipErrorInvalidValue; } - amd::Coord3D srcStart(srcRect.start_, 0, 0); - amd::Coord3D dstStart(dstRect.start_, 0, 0); - amd::Coord3D srcEnd(srcRect.end_, 1, 1); - amd::Coord3D dstEnd(dstRect.end_, 1, 1); - amd::Command* command = nullptr; amd::Command::EventWaitList waitList; - void* newDst = nullptr; + amd::Coord3D srcStart(srcRect.start_, 0, 0); + amd::Coord3D dstStart(dstRect.start_, 0, 0); + amd::Coord3D size(region[0], region[1], region[2]); - switch (kind) { - case hipMemcpyDeviceToHost: - command = new amd::ReadMemoryCommand(*queue, CL_COMMAND_READ_BUFFER_RECT, waitList, - *srcPtr->asBuffer(), srcStart, size, dst->data, srcRect, dstRect); - break; - case hipMemcpyHostToDevice: - command = new amd::WriteMemoryCommand(*queue, CL_COMMAND_WRITE_BUFFER_RECT, waitList, - *dstPtr->asBuffer(), dstStart, size, src, dstRect, srcRect); - break; - case hipMemcpyDeviceToDevice: - command = new amd::CopyMemoryCommand(*queue, CL_COMMAND_COPY_BUFFER_RECT, waitList, *srcPtr->asBuffer(), - *dstPtr->asBuffer(), srcStart, dstStart, size, srcRect, dstRect); - break; - case hipMemcpyHostToHost: - newDst = reinterpret_cast(reinterpret_cast(dst->data) - + dpitch * hOffset + wOffset); + if (((srcMemory == nullptr) && (dstMemory == nullptr)) || + (kind == hipMemcpyHostToHost)) { + void* newDst = reinterpret_cast(reinterpret_cast(dst->data) + + dpitch * hOffset + wOffset); for(unsigned int y = 0; y < height; y++) { void* pDst = reinterpret_cast(reinterpret_cast(newDst) + y * dpitch); void* pSrc = reinterpret_cast(reinterpret_cast(src) + y * spitch); memcpy(pDst, pSrc, width); } return hipSuccess; - default: - assert(!"Shouldn't reach here"); - break; + } else if ((srcMemory == nullptr) && (dstMemory != nullptr)) { + command = new amd::WriteMemoryCommand(*queue, CL_COMMAND_WRITE_BUFFER_RECT, waitList, + *dstMemory->asBuffer(), dstStart, size, src, dstRect, srcRect); + } else if ((srcMemory != nullptr) && (dstMemory == nullptr)) { + command = new amd::ReadMemoryCommand(*queue, CL_COMMAND_READ_BUFFER_RECT, waitList, + *srcMemory->asBuffer(), srcStart, size, dst, srcRect, dstRect); + } else if ((srcMemory != nullptr) && (dstMemory != nullptr)) { + command = new amd::CopyMemoryCommand(*queue, CL_COMMAND_COPY_BUFFER_RECT, waitList, *srcMemory->asBuffer(), + *dstMemory->asBuffer(), srcStart, dstStart, size, srcRect, dstRect); } if (command == nullptr) { @@ -928,9 +864,7 @@ hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, con } command->enqueue(); - command->awaitCompletion(); - command->release(); return hipSuccess; @@ -946,25 +880,26 @@ hipError_t hipMemcpyToArray(hipArray* dstArray, size_t wOffset, size_t hOffset, amd::Command* command = nullptr; amd::Command::EventWaitList waitList; - amd::Memory* memory; - size_t offset = 0; - amd::Coord3D dstOffset(wOffset, hOffset, 0); - switch (kind) { - case hipMemcpyDeviceToHost: - assert(!"Invalid case"); - case hipMemcpyHostToDevice: - memory = getMemoryObject(dstArray->data, offset); - assert(offset == 0); + size_t sOffset = 0; + amd::Memory* srcMemory = getMemoryObject(src, sOffset); + size_t dOffset = 0; + amd::Memory* dstMemory = getMemoryObject(dstArray->data, dOffset); + + assert(dOffset == 0); + + assert((kind == hipMemcpyHostToDevice) && "Invalid case"); + + if ((srcMemory == nullptr) && (dstMemory != nullptr)) { command = new amd::WriteMemoryCommand(*queue, CL_COMMAND_WRITE_BUFFER, waitList, - *memory->asBuffer(), dstOffset, count, src); - break; - case hipMemcpyDeviceToDevice: - case hipMemcpyDefault: - default: - assert(!"Shouldn't reach here"); - break; + *dstMemory->asBuffer(), {wOffset, hOffset}, count, src); + } else if ((srcMemory != nullptr) && (dstMemory != nullptr)) { + command = new amd::CopyMemoryCommand(*queue, CL_COMMAND_COPY_BUFFER, waitList, + *srcMemory->asBuffer(),*dstMemory->asBuffer(), sOffset, {wOffset, hOffset}, count); + } else { + ShouldNotReachHere(); } + if (command == nullptr) { return hipErrorOutOfMemory; } @@ -985,26 +920,26 @@ hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t srcArray, size_t wOffs amd::Command* command = nullptr; amd::Command::EventWaitList waitList; - amd::Memory* memory; - size_t offset = 0; - amd::Coord3D srcOffset(wOffset, hOffset, 0); + size_t sOffset = 0; + amd::Memory* srcMemory = getMemoryObject(srcArray->data, sOffset); + size_t dOffset = 0; + amd::Memory* dstMemory = getMemoryObject(dst, dOffset); - switch (kind) { - case hipMemcpyHostToDevice: - assert(!"Invalid case"); - case hipMemcpyDeviceToHost: - memory = getMemoryObject(srcArray->data, offset); - assert(offset == 0); + assert(sOffset == 0); + + assert((kind == hipMemcpyDeviceToHost) && "Invalid case"); + + if ((srcMemory != nullptr) && (dstMemory == nullptr)) { command = new amd::ReadMemoryCommand(*queue, CL_COMMAND_READ_BUFFER, waitList, - *memory->asBuffer(), srcOffset, count, dst); - break; - case hipMemcpyDeviceToDevice: - case hipMemcpyDefault: - default: - assert(!"Shouldn't reach here"); - break; + *srcMemory->asBuffer(), {wOffset, hOffset}, count, dst); + } else if ((srcMemory != nullptr) && (dstMemory != nullptr)) { + command = new amd::CopyMemoryCommand(*queue, CL_COMMAND_COPY_BUFFER, waitList, + *srcMemory->asBuffer(), *dstMemory->asBuffer(), {wOffset, hOffset}, dOffset, count); + } else { + ShouldNotReachHere(); } + if (command == nullptr) { return hipErrorOutOfMemory; } @@ -1022,12 +957,25 @@ hipError_t hipMemcpyHtoA(hipArray* dstArray, size_t dstOffset, const void* srcHo hip::syncStreams(); amd::HostQueue* queue = hip::getNullStream(); + amd::Command* command = nullptr; amd::Command::EventWaitList waitList; - size_t offset = 0; - amd::Memory* memory = getMemoryObject(dstArray->data, offset); - assert(offset == 0); - amd::Command* command = new amd::WriteMemoryCommand(*queue, CL_COMMAND_WRITE_BUFFER, waitList, - *memory->asBuffer(), dstOffset, count, srcHost); + + size_t sOffset = 0; + amd::Memory* srcMemory = getMemoryObject(srcHost, sOffset); + size_t dOffset = 0; + amd::Memory* dstMemory = getMemoryObject(dstArray->data, dOffset); + + assert(dOffset == 0); + + if ((srcMemory == nullptr) && (dstMemory != nullptr)) { + command = new amd::WriteMemoryCommand(*queue, CL_COMMAND_WRITE_BUFFER, waitList, + *dstMemory->asBuffer(), dstOffset, count, srcHost); + } else if ((srcMemory != nullptr) && (dstMemory != nullptr)) { + command = new amd::CopyMemoryCommand(*queue, CL_COMMAND_COPY_BUFFER, waitList, + *srcMemory->asBuffer(), *dstMemory->asBuffer(), sOffset, dstOffset, count); + } else { + ShouldNotReachHere(); + } if (command == nullptr) { return hipErrorOutOfMemory; @@ -1046,12 +994,25 @@ hipError_t hipMemcpyAtoH(void* dst, hipArray* srcArray, size_t srcOffset, size_t hip::syncStreams(); amd::HostQueue* queue = hip::getNullStream(); + amd::Command* command = nullptr; amd::Command::EventWaitList waitList; - size_t offset = 0; - amd::Memory* memory = getMemoryObject(srcArray->data, offset); - assert(offset == 0); - amd::Command* command = new amd::ReadMemoryCommand(*queue, CL_COMMAND_READ_BUFFER, waitList, - *memory->asBuffer(), srcOffset, count, dst); + + size_t sOffset = 0; + amd::Memory* srcMemory = getMemoryObject(srcArray->data, sOffset); + size_t dOffset = 0; + amd::Memory* dstMemory = getMemoryObject(dst, dOffset); + + assert(sOffset == 0); + + if ((srcMemory != nullptr) && (dstMemory == nullptr)) { + command = new amd::ReadMemoryCommand(*queue, CL_COMMAND_READ_BUFFER, waitList, + *srcMemory->asBuffer(), srcOffset, count, dst); + } else if ((srcMemory != nullptr) && (dstMemory != nullptr)) { + command = new amd::CopyMemoryCommand(*queue, CL_COMMAND_COPY_BUFFER, waitList, + *srcMemory->asBuffer(), *dstMemory->asBuffer(), srcOffset, dOffset, count); + } else { + ShouldNotReachHere(); + } if (command == nullptr) { return hipErrorOutOfMemory; @@ -1142,50 +1103,29 @@ hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) { return hipErrorInvalidValue; } - amd::Coord3D srcStart(srcRect.start_, 0, 0); - amd::Coord3D dstStart(dstRect.start_, 0, 0); - amd::Coord3D srcEnd(srcRect.end_, 1, 1); - amd::Coord3D dstEnd(dstRect.end_, 1, 1); - hipMemcpyKind kind = p->kind; - if (kind == hipMemcpyDefault) { - // Determine kind on VA - if (srcMemory == nullptr && dstMemory != nullptr) { - kind = hipMemcpyHostToDevice; - } else if (srcMemory != nullptr && dstMemory == nullptr) { - kind = hipMemcpyDeviceToHost; - } else if (srcMemory != nullptr && dstMemory != nullptr) { - kind = hipMemcpyDeviceToDevice; - } else { - kind = hipMemcpyHostToHost; - } - } - amd::Command* command = nullptr; amd::Command::EventWaitList waitList; + + amd::Coord3D srcStart(srcRect.start_, 0, 0); + amd::Coord3D dstStart(dstRect.start_, 0, 0); amd::Coord3D size(region[0], region[1], region[2]); - switch (kind) { - case hipMemcpyDeviceToHost: - command = new amd::ReadMemoryCommand(*queue, CL_COMMAND_READ_BUFFER_RECT, waitList, - *srcMemory->asBuffer(), srcStart, size, dstPtr, srcRect, dstRect); - break; - case hipMemcpyHostToDevice: - command = new amd::WriteMemoryCommand(*queue, CL_COMMAND_WRITE_BUFFER_RECT, waitList, - *dstMemory->asBuffer(), srcStart, size, srcPtr, srcRect, dstRect); - break; - case hipMemcpyDeviceToDevice: - command = new amd::CopyMemoryCommand(*queue, CL_COMMAND_COPY_BUFFER_RECT, waitList, - *srcMemory->asBuffer(),*dstMemory->asBuffer(), srcStart, dstStart, size, - srcRect, dstRect); - break; - case hipMemcpyHostToHost: + if (((srcMemory == nullptr) && (dstMemory == nullptr)) || + (kind == hipMemcpyHostToHost)) { memcpy(dstPtr, srcPtr, region[0] * region[1] * region[2]); return hipSuccess; - default: - assert(!"Shouldn't reach here"); - break; + } else if ((srcMemory == nullptr) && (dstMemory != nullptr)) { + command = new amd::WriteMemoryCommand(*queue, CL_COMMAND_WRITE_BUFFER_RECT, waitList, + *dstMemory->asBuffer(), srcStart, size, srcPtr, srcRect, dstRect); + } else if ((srcMemory != nullptr) && (dstMemory == nullptr)) { + command = new amd::ReadMemoryCommand(*queue, CL_COMMAND_READ_BUFFER_RECT, waitList, + *srcMemory->asBuffer(), srcStart, size, dstPtr, srcRect, dstRect); + } else if ((srcMemory != nullptr) && (dstMemory != nullptr)) { + command = new amd::CopyMemoryCommand(*queue, CL_COMMAND_COPY_BUFFER_RECT, waitList, + *srcMemory->asBuffer(),*dstMemory->asBuffer(), srcStart, dstStart, size, + srcRect, dstRect); } if (command == nullptr) { From 83ae8eb381fa6e12104da3c6c5d6ecf0fb31af11 Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 1 Aug 2018 12:08:35 -0400 Subject: [PATCH 074/282] P4 to Git Change 1588185 by vsytchen@vsytchen-win10 on 2018/08/01 11:46:00 SWDEV-145570 - [HIP] Correctly handle calculating the offset when retrieving the amd::Memory object from a prepinned pointer ReviewBoardURL = http://ocltc.amd.com/reviews/r/15539/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#38 edit --- api/hip/hip_memory.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 7e74820852..d2cb3b7d8c 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -38,8 +38,16 @@ extern void getDrvChannelOrderAndType(const enum hipArray_Format Format, amd::Memory* getMemoryObject(const void* ptr, size_t& offset) { amd::Memory *memObj = amd::MemObjMap::FindMemObj(ptr); - if (memObj != nullptr && memObj->getSvmPtr() != nullptr) { - offset = reinterpret_cast(ptr) - reinterpret_cast(memObj->getSvmPtr()); + if (memObj != nullptr) { + if (memObj->getSvmPtr() != nullptr) { + // SVM pointer + offset = reinterpret_cast(ptr) - reinterpret_cast(memObj->getSvmPtr()); + } else if (memObj->getHostMem() != nullptr) { + // Prepinned memory + offset = reinterpret_cast(ptr) - reinterpret_cast(memObj->getHostMem()); + } else { + ShouldNotReachHere(); + } } return memObj; } From f94b8d780a38f245fbfd4c74225aa760548ec8d2 Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 2 Aug 2018 02:14:04 -0400 Subject: [PATCH 075/282] P4 to Git Change 1588551 by skudchad@skudchad_rocm on 2018/08/02 01:55:45 SWDEV-145570 - [HIP] - Refactor ihipModuleLaunchKernel and add hipModuleLaunchKernelExt Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.def.in#8 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.map.in#8 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#12 edit --- api/hip/hip_hcc.def.in | 2 ++ api/hip/hip_hcc.map.in | 2 ++ api/hip/hip_module.cpp | 51 +++++++++++++++++++++++++++++++++++++++--- 3 files changed, 52 insertions(+), 3 deletions(-) diff --git a/api/hip/hip_hcc.def.in b/api/hip/hip_hcc.def.in index 478b4a277c..71e87d9a16 100644 --- a/api/hip/hip_hcc.def.in +++ b/api/hip/hip_hcc.def.in @@ -105,6 +105,8 @@ hipMemset3D hipModuleGetFunction hipModuleGetGlobal hipModuleLaunchKernel +hipModuleLaunchKernelExt +hipHccModuleLaunchKernel hipModuleLoad hipModuleLoadData hipModuleLoadDataEx diff --git a/api/hip/hip_hcc.map.in b/api/hip/hip_hcc.map.in index 25e9844e94..7f21b0ebd7 100644 --- a/api/hip/hip_hcc.map.in +++ b/api/hip/hip_hcc.map.in @@ -106,6 +106,8 @@ global: hipModuleGetFunction; hipModuleGetGlobal; hipModuleLaunchKernel; + hipModuleLaunchKernelExt; + hipHccModuleLaunchKernel; hipModuleLoad; hipModuleLoadData; hipModuleLoadDataEx; diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index 6831711a9a..fe87f93a7b 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -26,6 +26,7 @@ THE SOFTWARE. #include "hip_internal.hpp" #include "platform/program.hpp" +#include "hip_event.hpp" hipError_t ihipModuleLoadData(hipModule_t *module, const void *image); @@ -137,20 +138,24 @@ hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func) return hipErrorInvalidDeviceFunction; } -hipError_t hipModuleLaunchKernel(hipFunction_t f, + +hipError_t ihipModuleLaunchKernel(hipFunction_t f, uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ, uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ, uint32_t sharedMemBytes, hipStream_t hStream, - void **kernelParams, void **extra) + void **kernelParams, void **extra, + hipEvent_t startEvent, hipEvent_t stopEvent) { HIP_INIT_API(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, - kernelParams, extra); + kernelParams, extra, startEvent, stopEvent); amd::Kernel* kernel = as_amd(reinterpret_cast(f)); amd::Device* device = hip::getCurrentContext()->devices()[0]; + hip::Event* eStart = reinterpret_cast(startEvent); + hip::Event* eStop = reinterpret_cast(stopEvent); amd::HostQueue* queue; if (hStream == nullptr) { hip::syncStreams(); @@ -192,6 +197,17 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, } } + if(startEvent != nullptr) { + amd::Command* startCommand = new hip::TimerMarker(*eStart->stream_); + startCommand->enqueue(); + + if (eStart->event_ != nullptr) { + eStart->event_->release(); + } + + eStart->event_ = &startCommand->event(); + } + amd::NDRangeKernelCommand* command = new amd::NDRangeKernelCommand(*queue, waitList, *kernel, ndrange); if (!command) { return hipErrorOutOfMemory; @@ -204,9 +220,38 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, } command->enqueue(); + + if(stopEvent != nullptr) { + eStop->event_ = &command->event(); + command->retain(); + } + command->release(); return hipSuccess; } +hipError_t hipModuleLaunchKernel(hipFunction_t f, + uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ, + uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ, + uint32_t sharedMemBytes, hipStream_t hStream, + void **kernelParams, void **extra) +{ + return ihipModuleLaunchKernel(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, + sharedMemBytes, hStream, kernelParams, extra, nullptr, nullptr); +} + +hipError_t hipModuleLaunchKernelExt(hipFunction_t f, + uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ, + uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ, + uint32_t sharedMemBytes, hipStream_t hStream, + void **kernelParams, void **extra, + hipEvent_t startEvent, hipEvent_t stopEvent) +{ + return ihipModuleLaunchKernel(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, + sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent); +} + +const auto& hipHccModuleLaunchKernel = hipModuleLaunchKernelExt; + From aeaa9b80f49aad6022a43890719efbef586491ea Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 2 Aug 2018 02:18:57 -0400 Subject: [PATCH 076/282] P4 to Git Change 1588556 by skudchad@skudchad_rocm on 2018/08/02 02:04:15 SWDEV-145570 - [HIP] - Release eStop if it not a nullptr. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#13 edit --- api/hip/hip_module.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index fe87f93a7b..23cf47d724 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -222,6 +222,9 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, command->enqueue(); if(stopEvent != nullptr) { + if (eStop->event_ != nullptr) { + eStop->event_->release(); + } eStop->event_ = &command->event(); command->retain(); } From 732523c0ebdc10b8416bbe32c2a7e26b4760d33d Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 2 Aug 2018 12:33:55 -0400 Subject: [PATCH 077/282] P4 to Git Change 1588779 by cpaquot@cpaquot-ocl-lc-lnx on 2018/08/02 12:25:51 SWDEV-145570 - [HIP] Refactored some g_* stuff Refactored g_functions into a platform state. Added a _vars for registered variables. Added an execution stack similar to Hcc-clang. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#14 edit --- api/hip/hip_platform.cpp | 129 +++++++++++++++++++++++++++++++-------- 1 file changed, 103 insertions(+), 26 deletions(-) diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index 6a02bbaff4..15bd8525d9 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -99,8 +99,96 @@ extern "C" hipModule_t __hipRegisterFatBinary(const void* data) return reinterpret_cast(as_cl(program)); } -std::map g_functions; +struct ihipExec_t { + dim3 _gridDim; + dim3 _blockDim; + size_t _sharedMem; + hipStream_t _hStream; + std::vector _arguments; +}; +class PlatformState { + amd::Monitor _lock; + + std::stack _execStack; + std::map _functions; + + struct RegisteredVar { + char* var; + char* hostVar; + char* deviceVar; + int size; + bool constant; + }; + + std::map _vars; + + static PlatformState* _platform; + + PlatformState() : _lock("Guards global function map") {} +public: + static PlatformState& instance() { + return *_platform; + } + + void registerVar(hipModule_t modules, + char* var, + char* hostVar, + char* deviceVar, + int size, + bool constant) { + amd::ScopedLock lock(_lock); + + const RegisteredVar rvar = { var, hostVar, deviceVar, size, constant != 0 }; + + _vars.insert(std::make_pair(modules, rvar)); + } + + void registerFunction(const void* hostFunction, hipFunction_t func) { + amd::ScopedLock lock(_lock); + + _functions.insert(std::make_pair(hostFunction, func)); + } + + hipFunction_t getFunc(const void* hostFunction) { + amd::ScopedLock lock(_lock); + const auto it = _functions.find(hostFunction); + if (it != _functions.cend()) { + return it->second; + } else { + return nullptr; + } + } + + void setupArgument(const void *arg, + size_t size, + size_t offset) { + amd::ScopedLock lock(_lock); + + auto& arguments = _execStack.top()._arguments; + + if (arguments.size() < offset + size) { + arguments.resize(offset + size); + } + + ::memcpy(&arguments[offset], arg, size); + } + + void configureCall(dim3 gridDim, + dim3 blockDim, + size_t sharedMem, + hipStream_t stream) { + amd::ScopedLock lock(_lock); + _execStack.push(ihipExec_t{gridDim, blockDim, sharedMem, stream}); + } + + void popExec(ihipExec_t& exec) { + amd::ScopedLock lock(_lock); + exec = std::move(_execStack.top()); + _execStack.pop(); + } +}; +PlatformState* PlatformState::_platform = new PlatformState(); extern "C" void __hipRegisterFunction( hipModule_t module, @@ -124,8 +212,7 @@ extern "C" void __hipRegisterFunction( amd::Kernel* kernel = new amd::Kernel(*program, *symbol, deviceName); if (!kernel) return; - // FIXME: not thread safe - g_functions.insert(std::make_pair(hostFunction, reinterpret_cast(as_cl(kernel)))); + PlatformState::instance().registerFunction(hostFunction, reinterpret_cast(as_cl(kernel))); } // Registers a device-side global variable. @@ -144,6 +231,8 @@ extern "C" void __hipRegisterVar( int global) // Unknown, always 0 { HIP_INIT(); + + PlatformState::instance().registerVar(modules, var, hostVar, deviceVar, size, constant != 0); } extern "C" void __hipUnregisterFatBinary( @@ -153,11 +242,6 @@ extern "C" void __hipUnregisterFatBinary( HIP_INIT(); } -dim3 g_gridDim; // FIXME: place in execution stack -dim3 g_blockDim; // FIXME: place in execution stack -size_t g_sharedMem; // FIXME: place in execution stack -hipStream_t g_stream; // FIXME: place in execution stack - extern "C" hipError_t hipConfigureCall( dim3 gridDim, dim3 blockDim, @@ -166,18 +250,11 @@ extern "C" hipError_t hipConfigureCall( { HIP_INIT_API(gridDim, blockDim, sharedMem, stream); - // FIXME: should push and new entry on the execution stack - - g_gridDim = gridDim; - g_blockDim = blockDim; - g_sharedMem = sharedMem; - g_stream = stream; + PlatformState::instance().configureCall(gridDim, blockDim, sharedMem, stream); return hipSuccess; } -char g_arguments[1024]; // FIXME: needs to grow - extern "C" hipError_t hipSetupArgument( const void *arg, size_t size, @@ -185,9 +262,8 @@ extern "C" hipError_t hipSetupArgument( { HIP_INIT_API(arg, size, offset); - // FIXME: should modify the top of the execution stack + PlatformState::instance().setupArgument(arg, size, offset); - ::memcpy(g_arguments + offset, arg, size); return hipSuccess; } @@ -195,22 +271,23 @@ extern "C" hipError_t hipLaunchByPtr(const void *hostFunction) { HIP_INIT_API(hostFunction); - const auto it = g_functions.find(hostFunction); - if (it == g_functions.cend()) + hipFunction_t func = PlatformState::instance().getFunc(hostFunction); + if (func == nullptr) return hipErrorUnknown; - // FIXME: should pop an entry from the execution stack + ihipExec_t exec; + PlatformState::instance().popExec(exec); void *extra[] = { - HIP_LAUNCH_PARAM_BUFFER_POINTER, g_arguments, + HIP_LAUNCH_PARAM_BUFFER_POINTER, &exec._arguments[0], HIP_LAUNCH_PARAM_BUFFER_SIZE, 0 /* FIXME: not needed, but should be correct*/, HIP_LAUNCH_PARAM_END }; - return hipModuleLaunchKernel(it->second, - g_gridDim.x, g_gridDim.y, g_gridDim.z, - g_blockDim.x, g_blockDim.y, g_blockDim.z, - g_sharedMem, g_stream, nullptr, extra); + return hipModuleLaunchKernel(func, + exec._gridDim.x, exec._gridDim.y, exec._gridDim.z, + exec._blockDim.x, exec._blockDim.y, exec._blockDim.z, + exec._sharedMem, exec._hStream, nullptr, extra); } #if defined(ATI_OS_LINUX) From a8f66beb288044e6624f19fc9864c2079875a2ee Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 2 Aug 2018 18:00:05 -0400 Subject: [PATCH 078/282] P4 to Git Change 1589030 by cpaquot@cpaquot-ocl-lc-lnx on 2018/08/02 17:40:15 SWDEV-145570 - [HIP] Fixed the underline Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#15 edit --- api/hip/hip_platform.cpp | 62 ++++++++++++++++++++-------------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index 15bd8525d9..05e03123a8 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -100,18 +100,18 @@ extern "C" hipModule_t __hipRegisterFatBinary(const void* data) } struct ihipExec_t { - dim3 _gridDim; - dim3 _blockDim; - size_t _sharedMem; - hipStream_t _hStream; - std::vector _arguments; + dim3 gridDim_; + dim3 blockDim_; + size_t sharedMem_; + hipStream_t hStream_; + std::vector arguments_; }; class PlatformState { - amd::Monitor _lock; + amd::Monitor lock_; - std::stack _execStack; - std::map _functions; + std::stack execStack_; + std::map functions_; struct RegisteredVar { char* var; @@ -121,14 +121,14 @@ class PlatformState { bool constant; }; - std::map _vars; + std::map vars_; - static PlatformState* _platform; + static PlatformState* platform_; - PlatformState() : _lock("Guards global function map") {} + PlatformState() : lock_("Guards global function map") {} public: static PlatformState& instance() { - return *_platform; + return *platform_; } void registerVar(hipModule_t modules, @@ -137,23 +137,23 @@ public: char* deviceVar, int size, bool constant) { - amd::ScopedLock lock(_lock); + amd::ScopedLock lock(lock_); const RegisteredVar rvar = { var, hostVar, deviceVar, size, constant != 0 }; - _vars.insert(std::make_pair(modules, rvar)); + vars_.insert(std::make_pair(modules, rvar)); } void registerFunction(const void* hostFunction, hipFunction_t func) { - amd::ScopedLock lock(_lock); + amd::ScopedLock lock(lock_); - _functions.insert(std::make_pair(hostFunction, func)); + functions_.insert(std::make_pair(hostFunction, func)); } hipFunction_t getFunc(const void* hostFunction) { - amd::ScopedLock lock(_lock); - const auto it = _functions.find(hostFunction); - if (it != _functions.cend()) { + amd::ScopedLock lock(lock_); + const auto it = functions_.find(hostFunction); + if (it != functions_.cend()) { return it->second; } else { return nullptr; @@ -163,9 +163,9 @@ public: void setupArgument(const void *arg, size_t size, size_t offset) { - amd::ScopedLock lock(_lock); + amd::ScopedLock lock(lock_); - auto& arguments = _execStack.top()._arguments; + auto& arguments = execStack_.top().arguments_; if (arguments.size() < offset + size) { arguments.resize(offset + size); @@ -178,17 +178,17 @@ public: dim3 blockDim, size_t sharedMem, hipStream_t stream) { - amd::ScopedLock lock(_lock); - _execStack.push(ihipExec_t{gridDim, blockDim, sharedMem, stream}); + amd::ScopedLock lock(lock_); + execStack_.push(ihipExec_t{gridDim, blockDim, sharedMem, stream}); } void popExec(ihipExec_t& exec) { - amd::ScopedLock lock(_lock); - exec = std::move(_execStack.top()); - _execStack.pop(); + amd::ScopedLock lock(lock_); + exec = std::move(execStack_.top()); + execStack_.pop(); } }; -PlatformState* PlatformState::_platform = new PlatformState(); +PlatformState* PlatformState::platform_ = new PlatformState(); extern "C" void __hipRegisterFunction( hipModule_t module, @@ -279,15 +279,15 @@ extern "C" hipError_t hipLaunchByPtr(const void *hostFunction) PlatformState::instance().popExec(exec); void *extra[] = { - HIP_LAUNCH_PARAM_BUFFER_POINTER, &exec._arguments[0], + HIP_LAUNCH_PARAM_BUFFER_POINTER, &exec.arguments_[0], HIP_LAUNCH_PARAM_BUFFER_SIZE, 0 /* FIXME: not needed, but should be correct*/, HIP_LAUNCH_PARAM_END }; return hipModuleLaunchKernel(func, - exec._gridDim.x, exec._gridDim.y, exec._gridDim.z, - exec._blockDim.x, exec._blockDim.y, exec._blockDim.z, - exec._sharedMem, exec._hStream, nullptr, extra); + exec.gridDim_.x, exec.gridDim_.y, exec.gridDim_.z, + exec.blockDim_.x, exec.blockDim_.y, exec.blockDim_.z, + exec.sharedMem_, exec.hStream_, nullptr, extra); } #if defined(ATI_OS_LINUX) From 8e9e9416fd42364ea33d94cc74f8e9c467757300 Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 2 Aug 2018 18:52:37 -0400 Subject: [PATCH 079/282] P4 to Git Change 1589062 by cpaquot@cpaquot-ocl-lc-lnx on 2018/08/02 18:34:20 SWDEV-145570 - [HIP] Fix a segfault exposed by rocBlas. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#39 edit --- api/hip/hip_memory.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index d2cb3b7d8c..d4380eee0b 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -1299,7 +1299,6 @@ hipError_t hipPointerGetAttributes(hipPointerAttribute_t* attributes, const void size_t offset = 0; amd::Memory* memObj = getMemoryObject(ptr, offset); - amd::Context &memObjCtx = memObj->getContext(); int device = 0; if (memObj != nullptr) { @@ -1308,6 +1307,8 @@ hipError_t hipPointerGetAttributes(hipPointerAttribute_t* attributes, const void attributes->devicePointer = memObj->getSvmPtr(); attributes->isManaged = 0; attributes->allocationFlags = memObj->getMemFlags(); + + amd::Context &memObjCtx = memObj->getContext(); for (auto& ctx : g_devices) { ++device; if (*ctx == memObjCtx) { From 1531e24327b12a6854b3e1dfa7a3bb0da3630981 Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 2 Aug 2018 20:20:23 -0400 Subject: [PATCH 080/282] P4 to Git Change 1589087 by skudchad@skudchad_rocm on 2018/08/02 20:10:02 SWDEV-145570 - [HIP] - hipHccModuleLaunchKernel needs to be a c++ sym Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.map.in#9 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#14 edit --- api/hip/hip_hcc.map.in | 2 +- api/hip/hip_module.cpp | 25 ++++++++++++++++++------- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/api/hip/hip_hcc.map.in b/api/hip/hip_hcc.map.in index 7f21b0ebd7..48bb8eccba 100644 --- a/api/hip/hip_hcc.map.in +++ b/api/hip/hip_hcc.map.in @@ -107,7 +107,6 @@ global: hipModuleGetGlobal; hipModuleLaunchKernel; hipModuleLaunchKernelExt; - hipHccModuleLaunchKernel; hipModuleLoad; hipModuleLoadData; hipModuleLoadDataEx; @@ -166,6 +165,7 @@ global: hipHccGetAcceleratorView*; hipCreateSurfaceObject*; hipDestroySurfaceObject*; + hipHccModuleLaunchKernel*; }; local: *; diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index 23cf47d724..6b154b2d9e 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -244,17 +244,28 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, sharedMemBytes, hStream, kernelParams, extra, nullptr, nullptr); } -hipError_t hipModuleLaunchKernelExt(hipFunction_t f, - uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ, - uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ, - uint32_t sharedMemBytes, hipStream_t hStream, - void **kernelParams, void **extra, - hipEvent_t startEvent, hipEvent_t stopEvent) +hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t gridDimX, + uint32_t gridDimY, uint32_t gridDimZ, + uint32_t blockDimX, uint32_t blockDimY, + uint32_t blockDimZ, size_t sharedMemBytes, + hipStream_t hStream, void** kernelParams, void** extra, + hipEvent_t startEvent, + hipEvent_t stopEvent) { return ihipModuleLaunchKernel(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent); } -const auto& hipHccModuleLaunchKernel = hipModuleLaunchKernelExt; +hipError_t hipModuleLaunchKernelExt(hipFunction_t f, uint32_t gridDimX, + uint32_t gridDimY, uint32_t gridDimZ, + uint32_t blockDimX, uint32_t blockDimY, + uint32_t blockDimZ, size_t sharedMemBytes, + hipStream_t hStream, void** kernelParams, void** extra, + hipEvent_t startEvent, + hipEvent_t stopEvent) +{ + return ihipModuleLaunchKernel(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, + sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent); +} From f6d89dae907e17842fb8612320a5deb35616d3dd Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 3 Aug 2018 01:15:15 -0400 Subject: [PATCH 081/282] P4 to Git Change 1589138 by cpaquot@cpaquot-ocl-lc-lnx on 2018/08/03 01:04:05 SWDEV-145570 - [HIP] Added __gnu_h2f_ieee and __gnu_f2h_ieee Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.def.in#9 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.map.in#10 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#16 edit --- api/hip/hip_hcc.def.in | 2 ++ api/hip/hip_hcc.map.in | 2 ++ api/hip/hip_platform.cpp | 40 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+) diff --git a/api/hip/hip_hcc.def.in b/api/hip/hip_hcc.def.in index 71e87d9a16..f93517c67a 100644 --- a/api/hip/hip_hcc.def.in +++ b/api/hip/hip_hcc.def.in @@ -131,6 +131,8 @@ __hipRegisterFatBinary __hipRegisterFunction __hipRegisterVar __hipUnregisterFatBinary +__gnu_f2h_ieee +__gnu_h2f_ieee hipConfigureCall hipSetupArgument hipLaunchByPtr diff --git a/api/hip/hip_hcc.map.in b/api/hip/hip_hcc.map.in index 48bb8eccba..c983f83430 100644 --- a/api/hip/hip_hcc.map.in +++ b/api/hip/hip_hcc.map.in @@ -131,6 +131,8 @@ global: __hipRegisterFunction; __hipRegisterVar; __hipUnregisterFatBinary; + __gnu_h2f_ieee; + __gnu_f2h_ieee; hipConfigureCall; hipSetupArgument; hipLaunchByPtr; diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index 05e03123a8..a6bb4e290c 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -509,4 +509,44 @@ void hipLaunchKernelGGLImpl( } +// conversion routines between float and half precision +static inline std::uint32_t f32_as_u32(float f) { union { float f; std::uint32_t u; } v; v.f = f; return v.u; } +static inline float u32_as_f32(std::uint32_t u) { union { float f; std::uint32_t u; } v; v.u = u; return v.f; } +static inline int clamp_int(int i, int l, int h) { return std::min(std::max(i, l), h); } + +// half � float, the f16 is in the low 16 bits of the input argument �a� +static inline float __convert_half_to_float(std::uint32_t a) noexcept { + std::uint32_t u = ((a << 13) + 0x70000000U) & 0x8fffe000U; + std::uint32_t v = f32_as_u32(u32_as_f32(u) * 0x1.0p+112f) + 0x38000000U; + u = (a & 0x7fff) != 0 ? v : u; + return u32_as_f32(u) * 0x1.0p-112f; +} + +// float � half with nearest even rounding +// The lower 16 bits of the result is the bit pattern for the f16 +static inline std::uint32_t __convert_float_to_half(float a) noexcept { + std::uint32_t u = f32_as_u32(a); + int e = static_cast((u >> 23) & 0xff) - 127 + 15; + std::uint32_t m = ((u >> 11) & 0xffe) | ((u & 0xfff) != 0); + std::uint32_t i = 0x7c00 | (m != 0 ? 0x0200 : 0); + std::uint32_t n = ((std::uint32_t)e << 12) | m; + std::uint32_t s = (u >> 16) & 0x8000; + int b = clamp_int(1-e, 0, 13); + std::uint32_t d = (0x1000 | m) >> b; + d |= (d << b) != (0x1000 | m); + std::uint32_t v = e < 1 ? d : n; + v = (v >> 2) + (((v & 0x7) == 3) | ((v & 0x7) > 5)); + v = e > 30 ? 0x7c00 : v; + v = e == 143 ? i : v; + return s | v; +} + +extern "C" float __gnu_h2f_ieee(unsigned short h){ + return __convert_half_to_float((std::uint32_t) h); +} + +extern "C" unsigned short __gnu_f2h_ieee(float f){ + return (unsigned short)__convert_float_to_half(f); +} + #endif // defined(ATI_OS_LINUX) From c708d8b16bcb04407a08b345b2584f4381b715f5 Mon Sep 17 00:00:00 2001 From: foreman Date: Mon, 6 Aug 2018 21:24:14 -0400 Subject: [PATCH 082/282] P4 to Git Change 1590010 by skudchad@skudchad_test2_win_opencl on 2018/08/06 21:18:48 SWDEV-145570 - [HIP] - Fix some issues in hip runtime - Set stream for event - Free mem needs to be reported in bytes but runtime backends reports in Kb ReviewBoardURL = http://ocltc.amd.com/reviews/r/15586/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#40 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#15 edit --- api/hip/hip_memory.cpp | 2 +- api/hip/hip_module.cpp | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index d4380eee0b..24ed647acb 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -281,7 +281,7 @@ hipError_t hipMemGetInfo(size_t* free, size_t* total) { return hipErrorInvalidValue; } - *free = freeMemory[0]; + *free = freeMemory[0] * Ki; *total = device->info().globalMemSize_; return hipSuccess; diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index 6b154b2d9e..f4a8f6aaa3 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -198,6 +198,7 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, } if(startEvent != nullptr) { + eStart->stream_ = queue; amd::Command* startCommand = new hip::TimerMarker(*eStart->stream_); startCommand->enqueue(); @@ -225,6 +226,7 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, if (eStop->event_ != nullptr) { eStop->event_->release(); } + eStop->stream_ = queue; eStop->event_ = &command->event(); command->retain(); } From c4e3d90ba905a852f1856aaf34b82277a88d4873 Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 10 Aug 2018 14:04:43 -0400 Subject: [PATCH 083/282] P4 to Git Change 1592223 by cpaquot@cpaquot-ocl-lc-lnx on 2018/08/10 13:50:47 SWDEV-145570 - [HIP] Report clockRate in kilohertz. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device.cpp#11 edit --- api/hip/hip_device.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/hip/hip_device.cpp b/api/hip/hip_device.cpp index 531beb52f4..fac9eae04d 100644 --- a/api/hip/hip_device.cpp +++ b/api/hip/hip_device.cpp @@ -150,7 +150,7 @@ hipError_t hipGetDeviceProperties ( hipDeviceProp_t* props, hipDevice_t device ) deviceProps.maxGridSize[0] = UINT32_MAX; deviceProps.maxGridSize[1] = UINT32_MAX; deviceProps.maxGridSize[2] = UINT32_MAX; - deviceProps.clockRate = info.maxEngineClockFrequency_; + deviceProps.clockRate = info.maxEngineClockFrequency_ * 1000; deviceProps.memoryClockRate = info.maxMemoryClockFrequency_; deviceProps.memoryBusWidth = info.globalMemChannels_ * 32; deviceProps.totalConstMem = info.maxConstantBufferSize_; From 903f8a551bcf36426f2e0c449dffa672d3c3845b Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 10 Aug 2018 14:09:04 -0400 Subject: [PATCH 084/282] P4 to Git Change 1592225 by cpaquot@cpaquot-ocl-lc-lnx on 2018/08/10 13:52:21 SWDEV-145570 - [HIP] Report memoryClockRate in kilohertz too. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device.cpp#12 edit --- api/hip/hip_device.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/hip/hip_device.cpp b/api/hip/hip_device.cpp index fac9eae04d..5c8de8c03a 100644 --- a/api/hip/hip_device.cpp +++ b/api/hip/hip_device.cpp @@ -151,7 +151,7 @@ hipError_t hipGetDeviceProperties ( hipDeviceProp_t* props, hipDevice_t device ) deviceProps.maxGridSize[1] = UINT32_MAX; deviceProps.maxGridSize[2] = UINT32_MAX; deviceProps.clockRate = info.maxEngineClockFrequency_ * 1000; - deviceProps.memoryClockRate = info.maxMemoryClockFrequency_; + deviceProps.memoryClockRate = info.maxMemoryClockFrequency_ * 1000; deviceProps.memoryBusWidth = info.globalMemChannels_ * 32; deviceProps.totalConstMem = info.maxConstantBufferSize_; deviceProps.major = info.gfxipVersion_ / 100; From 327e62e31a6cc92e0ed22f12c0c589c83990e976 Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 10 Aug 2018 14:10:19 -0400 Subject: [PATCH 085/282] P4 to Git Change 1592228 by cpaquot@cpaquot-ocl-lc-lnx on 2018/08/10 13:55:25 SWDEV-145570 - [HIP] Report maxGridSize as INT32_MAX, not UINT32_MAX Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device.cpp#13 edit --- api/hip/hip_device.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/api/hip/hip_device.cpp b/api/hip/hip_device.cpp index 5c8de8c03a..5c03f13cc5 100644 --- a/api/hip/hip_device.cpp +++ b/api/hip/hip_device.cpp @@ -147,9 +147,9 @@ hipError_t hipGetDeviceProperties ( hipDeviceProp_t* props, hipDevice_t device ) deviceProps.maxThreadsDim[0] = info.maxWorkItemSizes_[0]; deviceProps.maxThreadsDim[1] = info.maxWorkItemSizes_[1]; deviceProps.maxThreadsDim[2] = info.maxWorkItemSizes_[2]; - deviceProps.maxGridSize[0] = UINT32_MAX; - deviceProps.maxGridSize[1] = UINT32_MAX; - deviceProps.maxGridSize[2] = UINT32_MAX; + deviceProps.maxGridSize[0] = INT32_MAX; + deviceProps.maxGridSize[1] = INT32_MAX; + deviceProps.maxGridSize[2] = INT32_MAX; deviceProps.clockRate = info.maxEngineClockFrequency_ * 1000; deviceProps.memoryClockRate = info.maxMemoryClockFrequency_ * 1000; deviceProps.memoryBusWidth = info.globalMemChannels_ * 32; From a38de7d73d0122020d9a9259db9cb2f3d586b3ca Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 10 Aug 2018 17:16:53 -0400 Subject: [PATCH 086/282] P4 to Git Change 1592348 by cpaquot@cpaquot-ocl-lc-lnx on 2018/08/10 17:00:39 SWDEV-145570 - [HIP] Do not multiply block and dims for Hcc/Ext kernel module launch Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#16 edit --- api/hip/hip_module.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index f4a8f6aaa3..67c4f06c6c 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -169,7 +169,7 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, } size_t globalWorkOffset[3] = {0}; - size_t globalWorkSize[3] = { gridDimX * blockDimX, gridDimY * blockDimY, gridDimZ * blockDimZ}; + size_t globalWorkSize[3] = { gridDimX, gridDimY, gridDimZ }; size_t localWorkSize[3] = { blockDimX, blockDimY, blockDimZ }; amd::NDRangeContainer ndrange(3, globalWorkOffset, globalWorkSize, localWorkSize); amd::Command::EventWaitList waitList; @@ -242,7 +242,8 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, uint32_t sharedMemBytes, hipStream_t hStream, void **kernelParams, void **extra) { - return ihipModuleLaunchKernel(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, + return ihipModuleLaunchKernel(f, gridDimX * blockDimX, gridDimY * blockDimY, gridDimZ * blockDimZ, + blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, extra, nullptr, nullptr); } From 21559762b80a5bc8fd60e4f6ac4d167b3bdd3929 Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 14 Aug 2018 18:54:13 -0400 Subject: [PATCH 087/282] P4 to Git Change 1593706 by skudchad@skudchad_test2_win_opencl on 2018/08/14 18:44:29 SWDEV-145570 - [HIP] Implement hipError* ReviewBoardURL = http://ocltc.amd.com/reviews/r/15619/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_context.cpp#13 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device.cpp#14 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device_runtime.cpp#11 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_error.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_event.cpp#5 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#13 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#41 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#17 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_peer.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#17 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_profile.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_stream.cpp#12 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_surface.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_texture.cpp#9 edit --- api/hip/hip_context.cpp | 41 ++++---- api/hip/hip_device.cpp | 36 +++---- api/hip/hip_device_runtime.cpp | 76 +++++++-------- api/hip/hip_error.cpp | 132 ++++++++++++++++++++++++- api/hip/hip_event.cpp | 34 +++---- api/hip/hip_internal.hpp | 7 +- api/hip/hip_memory.cpp | 172 ++++++++++++++++----------------- api/hip/hip_module.cpp | 32 +++--- api/hip/hip_peer.cpp | 20 ++-- api/hip/hip_platform.cpp | 17 ++-- api/hip/hip_profile.cpp | 4 +- api/hip/hip_stream.cpp | 28 +++--- api/hip/hip_surface.cpp | 4 +- api/hip/hip_texture.cpp | 58 +++++------ 14 files changed, 396 insertions(+), 265 deletions(-) diff --git a/api/hip/hip_context.cpp b/api/hip/hip_context.cpp index e75c8e5b5e..0ac257df83 100644 --- a/api/hip/hip_context.cpp +++ b/api/hip/hip_context.cpp @@ -31,6 +31,7 @@ namespace hip { thread_local amd::Context* g_context = nullptr; thread_local std::stack g_ctxtStack; +thread_local hipError_t g_lastError = hipSuccess; std::once_flag g_ihipInitialized; std::map g_nullStreams; @@ -85,14 +86,14 @@ using namespace hip; hipError_t hipInit(unsigned int flags) { HIP_INIT_API(flags); - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t hipCtxCreate(hipCtx_t *ctx, unsigned int flags, hipDevice_t device) { HIP_INIT_API(ctx, flags, device); if (static_cast(device) >= g_devices.size()) { - return hipErrorInvalidValue; + HIP_RETURN(hipErrorInvalidValue); } *ctx = reinterpret_cast(g_devices[device]); @@ -101,7 +102,7 @@ hipError_t hipCtxCreate(hipCtx_t *ctx, unsigned int flags, hipDevice_t device) g_devices[device]->retain(); g_ctxtStack.push(g_devices[device]); - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t hipCtxSetCurrent(hipCtx_t ctx) { @@ -119,7 +120,7 @@ hipError_t hipCtxSetCurrent(hipCtx_t ctx) { g_ctxtStack.push(hip::getCurrentContext()); } - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t hipCtxGetCurrent(hipCtx_t* ctx) { @@ -127,19 +128,19 @@ hipError_t hipCtxGetCurrent(hipCtx_t* ctx) { *ctx = reinterpret_cast(hip::getCurrentContext()); - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t hipRuntimeGetVersion(int *runtimeVersion) { HIP_INIT_API(runtimeVersion); if (!runtimeVersion) { - return hipErrorInvalidValue; + HIP_RETURN(hipErrorInvalidValue); } *runtimeVersion = AMD_PLATFORM_BUILD_NUMBER; - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t hipCtxDestroy(hipCtx_t ctx) { @@ -147,7 +148,7 @@ hipError_t hipCtxDestroy(hipCtx_t ctx) { amd::Context* amdContext = reinterpret_cast(as_amd(ctx)); if (amdContext == nullptr) { - return hipErrorInvalidValue; + HIP_RETURN(hipErrorInvalidValue); } // Need to remove the ctx of calling thread if its the top one @@ -163,7 +164,7 @@ hipError_t hipCtxDestroy(hipCtx_t ctx) { } } - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t hipCtxPopCurrent(hipCtx_t* ctx) { @@ -171,17 +172,17 @@ hipError_t hipCtxPopCurrent(hipCtx_t* ctx) { amd::Context* amdContext = reinterpret_cast(as_amd(ctx)); if (amdContext == nullptr) { - return hipErrorInvalidContext; + HIP_RETURN(hipErrorInvalidContext); } if (!g_ctxtStack.empty()) { amdContext = g_ctxtStack.top(); g_ctxtStack.pop(); } else { - return hipErrorInvalidContext; + HIP_RETURN(hipErrorInvalidContext); } - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t hipCtxPushCurrent(hipCtx_t ctx) { @@ -189,13 +190,13 @@ hipError_t hipCtxPushCurrent(hipCtx_t ctx) { amd::Context* amdContext = reinterpret_cast(as_amd(ctx)); if (amdContext == nullptr) { - return hipErrorInvalidContext; + HIP_RETURN(hipErrorInvalidContext); } hip::g_context = amdContext; g_ctxtStack.push(hip::getCurrentContext()); - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t hipDriverGetVersion(int* driverVersion) { @@ -205,13 +206,13 @@ hipError_t hipDriverGetVersion(int* driverVersion) { const auto& info = deviceHandle->info(); if (driverVersion) { - *driverVersion = AMD_PLATFORM_BUILD_NUMBER * 100 + - AMD_PLATFORM_REVISION_NUMBER; + *driverVersion = AMD_PLATFORM_BUILD_NUMBER * 100 + + AMD_PLATFORM_REVISION_NUMBER; } else { - return hipErrorInvalidValue; + HIP_RETURN(hipErrorInvalidValue); } - return hipSuccess;; + HIP_RETURN(hipSuccess); } hipError_t hipCtxGetDevice(hipDevice_t* device) { @@ -221,11 +222,11 @@ hipError_t hipCtxGetDevice(hipDevice_t* device) { for (unsigned int i = 0; i < g_devices.size(); i++) { if (g_devices[i] == hip::getCurrentContext()) { *device = static_cast(i); - return hipSuccess; + HIP_RETURN(hipSuccess); } } } else { - return hipErrorInvalidValue; + HIP_RETURN(hipErrorInvalidValue); } return hipErrorUnknown; diff --git a/api/hip/hip_device.cpp b/api/hip/hip_device.cpp index 5c03f13cc5..30ad2f3af1 100644 --- a/api/hip/hip_device.cpp +++ b/api/hip/hip_device.cpp @@ -30,10 +30,10 @@ hipError_t hipDeviceGet(hipDevice_t *device, int deviceId) { if (device != nullptr) { *device = deviceId; } else { - return hipErrorInvalidValue; + HIP_RETURN(hipErrorInvalidValue); } - return hipSuccess; + HIP_RETURN(hipSuccess); }; hipError_t hipFuncSetCacheConfig (const void* func, hipFuncCache_t cacheConfig) { @@ -42,7 +42,7 @@ hipError_t hipFuncSetCacheConfig (const void* func, hipFuncCache_t cacheConfig) // No way to set cache config yet. - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t hipDeviceTotalMem (size_t *bytes, hipDevice_t device) { @@ -50,11 +50,11 @@ hipError_t hipDeviceTotalMem (size_t *bytes, hipDevice_t device) { HIP_INIT_API(bytes, device); if (device < 0 || static_cast(device) >= g_devices.size()) { - return hipErrorInvalidDevice; + HIP_RETURN(hipErrorInvalidDevice); } if (bytes == nullptr) { - return hipErrorInvalidValue; + HIP_RETURN(hipErrorInvalidValue); } auto* deviceHandle = g_devices[device]->devices()[0]; @@ -62,7 +62,7 @@ hipError_t hipDeviceTotalMem (size_t *bytes, hipDevice_t device) { *bytes = info.globalMemSize_; - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t hipDeviceComputeCapability(int *major, int *minor, hipDevice_t device) { @@ -70,11 +70,11 @@ hipError_t hipDeviceComputeCapability(int *major, int *minor, hipDevice_t device HIP_INIT_API(major, minor, device); if (device < 0 || static_cast(device) >= g_devices.size()) { - return hipErrorInvalidDevice; + HIP_RETURN(hipErrorInvalidDevice); } if (major == nullptr || minor == nullptr) { - return hipErrorInvalidValue; + HIP_RETURN(hipErrorInvalidValue); } auto* deviceHandle = g_devices[device]->devices()[0]; @@ -82,13 +82,13 @@ hipError_t hipDeviceComputeCapability(int *major, int *minor, hipDevice_t device *major = info.gfxipVersion_ / 100; *minor = info.gfxipVersion_ % 100; - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t hipDeviceGetCount(int* count) { HIP_INIT_API(count); - return ihipDeviceGetCount(count); + HIP_RETURN(ihipDeviceGetCount(count)); } hipError_t ihipDeviceGetCount(int* count) { @@ -107,11 +107,11 @@ hipError_t hipDeviceGetName(char *name, int len, hipDevice_t device) { HIP_INIT_API((void*)name, len, device); if (device < 0 || static_cast(device) >= g_devices.size()) { - return hipErrorInvalidDevice; + HIP_RETURN(hipErrorInvalidDevice); } if (name == nullptr) { - return hipErrorInvalidValue; + HIP_RETURN(hipErrorInvalidValue); } auto* deviceHandle = g_devices[device]->devices()[0]; @@ -120,18 +120,18 @@ hipError_t hipDeviceGetName(char *name, int len, hipDevice_t device) { len = ((cl_uint)len < ::strlen(info.boardName_)) ? len : 128; ::strncpy(name, info.boardName_, len); - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t hipGetDeviceProperties ( hipDeviceProp_t* props, hipDevice_t device ) { HIP_INIT_API(props, device); if (props == nullptr) { - return hipErrorInvalidValue; + HIP_RETURN(hipErrorInvalidValue); } if (unsigned(device) >= g_devices.size()) { - return hipErrorInvalidDevice; + HIP_RETURN(hipErrorInvalidDevice); } auto* deviceHandle = g_devices[device]->devices()[0]; @@ -188,7 +188,7 @@ hipError_t hipGetDeviceProperties ( hipDeviceProp_t* props, hipDevice_t device ) deviceProps.gcnArch = info.gfxipVersion_; *props = deviceProps; - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t hipHccGetAccelerator(int deviceId, hc::accelerator* acc) { @@ -196,7 +196,7 @@ hipError_t hipHccGetAccelerator(int deviceId, hc::accelerator* acc) { assert(0 && "Unimplemented"); - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } hipError_t hipHccGetAcceleratorView(hipStream_t stream, hc::accelerator_view** av) { @@ -204,5 +204,5 @@ hipError_t hipHccGetAcceleratorView(hipStream_t stream, hc::accelerator_view** a assert(0 && "Unimplemented"); - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } diff --git a/api/hip/hip_device_runtime.cpp b/api/hip/hip_device_runtime.cpp index 5b60623761..87a149a10a 100644 --- a/api/hip/hip_device_runtime.cpp +++ b/api/hip/hip_device_runtime.cpp @@ -29,7 +29,7 @@ hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* properties) { HIP_INIT_API(device, properties); if (device == nullptr || properties == nullptr) { - return hipErrorInvalidValue; + HIP_RETURN(hipErrorInvalidValue); } *device = 0; @@ -135,7 +135,7 @@ hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* properties) { } } - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) { @@ -143,19 +143,19 @@ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) HIP_INIT_API(pi, attr, device); if (pi == nullptr) { - return hipErrorInvalidValue; + HIP_RETURN(hipErrorInvalidValue); } int count = 0; ihipDeviceGetCount(&count); if (device < 0 || device >= count) { - return hipErrorInvalidDevice; + HIP_RETURN(hipErrorInvalidDevice); } //FIXME: should we cache the props, or just select from deviceHandle->info_? hipDeviceProp_t prop = {0}; hipError_t err = hipGetDeviceProperties(&prop, device); - if (err != hipSuccess) return err; + if (err != hipSuccess) HIP_RETURN(err); switch (attr) { case hipDeviceAttributeMaxThreadsPerBlock: @@ -234,10 +234,10 @@ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) *pi = prop.isMultiGpuBoard; break; default: - return hipErrorInvalidValue; + HIP_RETURN(hipErrorInvalidValue); } - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t hipDeviceGetByPCIBusId(int* device, const char*pciBusIdstr) { @@ -245,7 +245,7 @@ hipError_t hipDeviceGetByPCIBusId(int* device, const char*pciBusIdstr) { HIP_INIT_API(device, pciBusIdstr); if (device == nullptr || pciBusIdstr == nullptr) { - return hipErrorInvalidValue; + HIP_RETURN(hipErrorInvalidValue); } int pciBusID = -1; @@ -268,19 +268,19 @@ hipError_t hipDeviceGetByPCIBusId(int* device, const char*pciBusIdstr) { } } - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t hipDeviceGetCacheConfig ( hipFuncCache_t * cacheConfig ) { HIP_INIT_API(cacheConfig); if(cacheConfig == nullptr) { - return hipErrorInvalidValue; + HIP_RETURN(hipErrorInvalidValue); } *cacheConfig = hipFuncCache_t(); - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t hipDeviceGetLimit ( size_t* pValue, hipLimit_t limit ) { @@ -288,23 +288,23 @@ hipError_t hipDeviceGetLimit ( size_t* pValue, hipLimit_t limit ) { HIP_INIT_API(pValue, limit); if(pValue == nullptr) { - return hipErrorInvalidValue; + HIP_RETURN(hipErrorInvalidValue); } if(limit == hipLimitMallocHeapSize) { hipDeviceProp_t prop; hipGetDeviceProperties(&prop, 0); *pValue = prop.totalGlobalMem; - return hipSuccess; + HIP_RETURN(hipSuccess); } else { - return hipErrorUnsupportedLimit; + HIP_RETURN(hipErrorUnsupportedLimit); } } /** hipError_t hipDeviceGetP2PAttribute ( int* value, hipDeviceP2PAttr attr, int srcDevice, int dstDevice ) { assert(0); - return hipSuccess; + HIP_RETURN(hipSuccess); } **/ @@ -315,11 +315,11 @@ hipError_t hipDeviceGetPCIBusId ( char* pciBusId, int len, int device ) { int count; ihipDeviceGetCount(&count); if (device < 0 || device > count) { - return hipErrorInvalidDevice; + HIP_RETURN(hipErrorInvalidDevice); } if (pciBusId == nullptr || len < 0) { - return hipErrorInvalidValue; + HIP_RETURN(hipErrorInvalidValue); } hipDeviceProp_t prop; @@ -330,7 +330,7 @@ hipError_t hipDeviceGetPCIBusId ( char* pciBusId, int len, int device ) { prop.pciBusID, prop.pciDeviceID); - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t hipDeviceGetSharedMemConfig ( hipSharedMemConfig * pConfig ) { @@ -338,12 +338,12 @@ hipError_t hipDeviceGetSharedMemConfig ( hipSharedMemConfig * pConfig ) { *pConfig = hipSharedMemBankSizeFourByte; - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t hipDeviceGetStreamPriorityRange ( int* leastPriority, int* greatestPriority ) { assert(0); - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t hipDeviceReset ( void ) { @@ -351,7 +351,7 @@ hipError_t hipDeviceReset ( void ) { /* FIXME */ - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t hipDeviceSetCacheConfig ( hipFuncCache_t cacheConfig ) { @@ -359,11 +359,11 @@ hipError_t hipDeviceSetCacheConfig ( hipFuncCache_t cacheConfig ) { // No way to set cache config yet. - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t hipDeviceSetLimit ( hipLimit_t limit, size_t value ) { - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config ) { @@ -371,7 +371,7 @@ hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config ) { // No way to set cache config yet. - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t hipDeviceSynchronize ( void ) { @@ -382,11 +382,11 @@ hipError_t hipDeviceSynchronize ( void ) { amd::HostQueue* queue = hip::getNullStream(); if (!queue) { - return hipErrorOutOfMemory; + HIP_RETURN(hipErrorOutOfMemory); } queue->finish(); - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t hipGetDevice ( int* deviceId ) { @@ -396,24 +396,24 @@ hipError_t hipGetDevice ( int* deviceId ) { for (unsigned int i = 0; i < g_devices.size(); i++) { if (g_devices[i] == hip::getCurrentContext()) { *deviceId = i; - return hipSuccess; + HIP_RETURN(hipSuccess); } } } else { - return hipErrorInvalidValue; + HIP_RETURN(hipErrorInvalidValue); } - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } hipError_t hipGetDeviceCount ( int* count ) { HIP_INIT_API(count); - return ihipDeviceGetCount(count); + HIP_RETURN(ihipDeviceGetCount(count)); } hipError_t hipGetDeviceFlags ( unsigned int* flags ) { - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } hipError_t hipIpcGetEventHandle ( hipIpcEventHandle_t* handle, hipEvent_t event ) { @@ -421,7 +421,7 @@ hipError_t hipIpcGetEventHandle ( hipIpcEventHandle_t* handle, hipEvent_t event assert(0 && "Unimplemented"); - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } hipError_t hipIpcOpenEventHandle ( hipEvent_t* event, hipIpcEventHandle_t handle ) { @@ -429,7 +429,7 @@ hipError_t hipIpcOpenEventHandle ( hipEvent_t* event, hipIpcEventHandle_t handle assert(0 && "Unimplemented"); - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } hipError_t hipSetDevice ( int device ) { @@ -438,9 +438,9 @@ hipError_t hipSetDevice ( int device ) { if (static_cast(device) < g_devices.size()) { hip::setCurrentContext(device); - return hipSuccess; + HIP_RETURN(hipSuccess); } - return hipErrorInvalidDevice; + HIP_RETURN(hipErrorInvalidDevice); } hipError_t hipSetDeviceFlags ( unsigned int flags ) { @@ -453,10 +453,10 @@ hipError_t hipSetDeviceFlags ( unsigned int flags ) { hipDeviceScheduleMask | hipDeviceMapHost | hipDeviceLmemResizeToMax; if (flags & (~supportedFlags)) { - return hipErrorInvalidValue; + HIP_RETURN(hipErrorInvalidValue); } - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t hipSetValidDevices ( int* device_arr, int len ) { @@ -464,6 +464,6 @@ hipError_t hipSetValidDevices ( int* device_arr, int len ) { assert(0 && "Unimplemented"); - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } diff --git a/api/hip/hip_error.cpp b/api/hip/hip_error.cpp index 2a8785c375..5f76e560c0 100644 --- a/api/hip/hip_error.cpp +++ b/api/hip/hip_error.cpp @@ -27,22 +27,146 @@ THE SOFTWARE. hipError_t hipGetLastError() { HIP_INIT_API(); - return hipErrorUnknown; + hipError_t err = hip::g_lastError; + hip::g_lastError = hipSuccess; + return err; } hipError_t hipPeekAtLastError() { HIP_INIT_API(); - return hipErrorUnknown; + hipError_t err = hip::g_lastError; + HIP_RETURN(err); } const char *hipGetErrorName(hipError_t hip_error) { - return ""; + switch (hip_error) { + case hipSuccess: + return "hipSuccess"; + case hipErrorOutOfMemory: + return "hipErrorOutOfMemory"; + case hipErrorNotInitialized: + return "hipErrorNotInitialized"; + case hipErrorDeinitialized: + return "hipErrorDeinitialized"; + case hipErrorProfilerDisabled: + return "hipErrorProfilerDisabled"; + case hipErrorProfilerNotInitialized: + return "hipErrorProfilerNotInitialized"; + case hipErrorProfilerAlreadyStarted: + return "hipErrorProfilerAlreadyStarted"; + case hipErrorProfilerAlreadyStopped: + return "hipErrorProfilerAlreadyStopped"; + case hipErrorInvalidImage: + return "hipErrorInvalidImage"; + case hipErrorInvalidContext: + return "hipErrorInvalidContext"; + case hipErrorContextAlreadyCurrent: + return "hipErrorContextAlreadyCurrent"; + case hipErrorMapFailed: + return "hipErrorMapFailed"; + case hipErrorUnmapFailed: + return "hipErrorUnmapFailed"; + case hipErrorArrayIsMapped: + return "hipErrorArrayIsMapped"; + case hipErrorAlreadyMapped: + return "hipErrorAlreadyMapped"; + case hipErrorNoBinaryForGpu: + return "hipErrorNoBinaryForGpu"; + case hipErrorAlreadyAcquired: + return "hipErrorAlreadyAcquired"; + case hipErrorNotMapped: + return "hipErrorNotMapped"; + case hipErrorNotMappedAsArray: + return "hipErrorNotMappedAsArray"; + case hipErrorNotMappedAsPointer: + return "hipErrorNotMappedAsPointer"; + case hipErrorECCNotCorrectable: + return "hipErrorECCNotCorrectable"; + case hipErrorUnsupportedLimit: + return "hipErrorUnsupportedLimit"; + case hipErrorContextAlreadyInUse: + return "hipErrorContextAlreadyInUse"; + case hipErrorPeerAccessUnsupported: + return "hipErrorPeerAccessUnsupported"; + case hipErrorInvalidKernelFile: + return "hipErrorInvalidKernelFile"; + case hipErrorInvalidGraphicsContext: + return "hipErrorInvalidGraphicsContext"; + case hipErrorInvalidSource: + return "hipErrorInvalidSource"; + case hipErrorFileNotFound: + return "hipErrorFileNotFound"; + case hipErrorSharedObjectSymbolNotFound: + return "hipErrorSharedObjectSymbolNotFound"; + case hipErrorSharedObjectInitFailed: + return "hipErrorSharedObjectInitFailed"; + case hipErrorOperatingSystem: + return "hipErrorOperatingSystem"; + case hipErrorSetOnActiveProcess: + return "hipErrorSetOnActiveProcess"; + case hipErrorInvalidHandle: + return "hipErrorInvalidHandle"; + case hipErrorNotFound: + return "hipErrorNotFound"; + case hipErrorIllegalAddress: + return "hipErrorIllegalAddress"; + case hipErrorMissingConfiguration: + return "hipErrorMissingConfiguration"; + case hipErrorMemoryAllocation: + return "hipErrorMemoryAllocation"; + case hipErrorInitializationError: + return "hipErrorInitializationError"; + case hipErrorLaunchFailure: + return "hipErrorLaunchFailure"; + case hipErrorPriorLaunchFailure: + return "hipErrorPriorLaunchFailure"; + case hipErrorLaunchTimeOut: + return "hipErrorLaunchTimeOut"; + case hipErrorLaunchOutOfResources: + return "hipErrorLaunchOutOfResources"; + case hipErrorInvalidDeviceFunction: + return "hipErrorInvalidDeviceFunction"; + case hipErrorInvalidConfiguration: + return "hipErrorInvalidConfiguration"; + case hipErrorInvalidDevice: + return "hipErrorInvalidDevice"; + case hipErrorInvalidValue: + return "hipErrorInvalidValue"; + case hipErrorInvalidDevicePointer: + return "hipErrorInvalidDevicePointer"; + case hipErrorInvalidMemcpyDirection: + return "hipErrorInvalidMemcpyDirection"; + case hipErrorUnknown: + return "hipErrorUnknown"; + case hipErrorInvalidResourceHandle: + return "hipErrorInvalidResourceHandle"; + case hipErrorNotReady: + return "hipErrorNotReady"; + case hipErrorNoDevice: + return "hipErrorNoDevice"; + case hipErrorPeerAccessAlreadyEnabled: + return "hipErrorPeerAccessAlreadyEnabled"; + case hipErrorPeerAccessNotEnabled: + return "hipErrorPeerAccessNotEnabled"; + case hipErrorRuntimeMemory: + return "hipErrorRuntimeMemory"; + case hipErrorRuntimeOther: + return "hipErrorRuntimeOther"; + case hipErrorHostMemoryAlreadyRegistered: + return "hipErrorHostMemoryAlreadyRegistered"; + case hipErrorHostMemoryNotRegistered: + return "hipErrorHostMemoryNotRegistered"; + case hipErrorTbd: + return "hipErrorTbd"; + default: + return "hipErrorUnknown"; + }; } const char *hipGetErrorString(hipError_t hip_error) { - return ""; + return hipGetErrorName(hip_error); } diff --git a/api/hip/hip_event.cpp b/api/hip/hip_event.cpp index 127148e63c..197eac0579 100644 --- a/api/hip/hip_event.cpp +++ b/api/hip/hip_event.cpp @@ -64,38 +64,38 @@ hipError_t ihipEventQuery(hipEvent_t event) { e->event_->notifyCmdQueue(); - return (e->event_->status() == CL_COMPLETE)? hipSuccess : hipErrorNotReady; + return (e->event_->status() == CL_COMPLETE) ? hipSuccess : hipErrorNotReady; } hipError_t hipEventCreateWithFlags(hipEvent_t* event, unsigned flags) { HIP_INIT_API(event, flags); - return ihipEventCreateWithFlags(event, flags); + HIP_RETURN(ihipEventCreateWithFlags(event, flags)); } hipError_t hipEventCreate(hipEvent_t* event) { HIP_INIT_API(event); - return ihipEventCreateWithFlags(event, 0); + HIP_RETURN(ihipEventCreateWithFlags(event, 0)); } hipError_t hipEventDestroy(hipEvent_t event) { HIP_INIT_API(event); if (event == nullptr) { - return hipErrorInvalidResourceHandle; + HIP_RETURN(hipErrorInvalidResourceHandle); } delete reinterpret_cast(event); - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop) { HIP_INIT_API(ms, start, stop); if (start == nullptr || stop == nullptr) { - return hipErrorInvalidResourceHandle; + HIP_RETURN(hipErrorInvalidResourceHandle); } hip::Event* eStart = reinterpret_cast(start); @@ -103,32 +103,32 @@ hipError_t hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop) { if (eStart->event_ == nullptr || eStop->event_ == nullptr) { - return hipErrorInvalidResourceHandle; + HIP_RETURN(hipErrorInvalidResourceHandle); } if ((eStart->flags | eStop->flags) & hipEventDisableTiming) { - return hipErrorInvalidResourceHandle; + HIP_RETURN(hipErrorInvalidResourceHandle); } if (ihipEventQuery(start) == hipErrorNotReady || ihipEventQuery(stop) == hipErrorNotReady) { - return hipErrorNotReady; + HIP_RETURN(hipErrorNotReady); } if (ms == nullptr) { - return hipErrorInvalidValue; + HIP_RETURN(hipErrorInvalidValue); } *ms = static_cast(eStop->event_->profilingInfo().submitted_ - eStart->event_->profilingInfo().submitted_)/1000000.f; - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) { HIP_INIT_API(event, stream); if (event == nullptr) { - return hipErrorInvalidResourceHandle; + HIP_RETURN(hipErrorInvalidResourceHandle); } hip::Event* e = reinterpret_cast(event); @@ -148,29 +148,29 @@ hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) { e->event_ = &command->event(); - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t hipEventSynchronize(hipEvent_t event) { HIP_INIT_API(event); if (event == nullptr) { - return hipErrorInvalidResourceHandle; + HIP_RETURN(hipErrorInvalidResourceHandle); } hip::Event* e = reinterpret_cast(event); if (e->event_ == nullptr) { - return hipErrorInvalidResourceHandle; + HIP_RETURN(hipErrorInvalidResourceHandle); } e->event_->awaitCompletion(); - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t hipEventQuery(hipEvent_t event) { HIP_INIT_API(event); - return ihipEventQuery(event); + HIP_RETURN(ihipEventQuery(event)); } diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index e82cb13a2a..a8a9c3eeb3 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -39,7 +39,7 @@ THE SOFTWARE. #define HIP_INIT_API(...) \ amd::Thread* thread = amd::Thread::current(); \ if (!CL_CHECK_THREAD(thread)) { \ - return hipErrorOutOfMemory; \ + HIP_RETURN(hipErrorOutOfMemory); \ } \ HIP_INIT(); @@ -51,6 +51,7 @@ class accelerator_view; namespace hip { extern std::once_flag g_ihipInitialized; extern thread_local amd::Context* g_context; + extern thread_local hipError_t g_lastError; extern void init(); @@ -64,5 +65,9 @@ extern std::vector g_devices; extern hipError_t ihipDeviceGetCount(int* count); extern amd::Memory* getMemoryObject(const void* ptr, size_t& offset); +#define HIP_RETURN(ret) \ + hip::g_lastError = ret; \ + return ret; \ + #endif // HIP_SRC_HIP_INTERNAL_H diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 24ed647acb..b800ffe713 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -151,14 +151,14 @@ hipError_t ihipMemset(void* dst, int value, size_t sizeBytes, amd::HostQueue& qu hipError_t hipMalloc(void** ptr, size_t sizeBytes) { HIP_INIT_API(ptr, sizeBytes); - return ihipMalloc(ptr, sizeBytes, 0); + HIP_RETURN(ihipMalloc(ptr, sizeBytes, 0)); } hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) { HIP_INIT_API(ptr, sizeBytes, flags); if (ptr == nullptr) { - return hipErrorInvalidValue; + HIP_RETURN(hipErrorInvalidValue); } *ptr = nullptr; @@ -166,10 +166,10 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) { // can't have both Coherent and NonCoherent flags set at the same time if ((flags & coherentFlags) == coherentFlags) { - return hipErrorInvalidValue; + HIP_RETURN(hipErrorInvalidValue); } - return ihipMalloc(ptr, sizeBytes, CL_MEM_SVM_FINE_GRAIN_BUFFER | (flags << 16)); + HIP_RETURN(ihipMalloc(ptr, sizeBytes, CL_MEM_SVM_FINE_GRAIN_BUFFER | (flags << 16))); } hipError_t hipFree(void* ptr) { @@ -177,9 +177,9 @@ hipError_t hipFree(void* ptr) { hip::syncStreams(); hip::getNullStream()->finish(); amd::SvmBuffer::free(*hip::getCurrentContext(), ptr); - return hipSuccess; + HIP_RETURN(hipSuccess); } - return hipErrorInvalidValue; + HIP_RETURN(hipErrorInvalidValue); } hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind) { @@ -187,7 +187,7 @@ hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind hip::syncStreams(); amd::HostQueue* queue = hip::getNullStream(); - return ihipMemcpy(dst, src, sizeBytes, kind, *queue); + HIP_RETURN(ihipMemcpy(dst, src, sizeBytes, kind, *queue)); } hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t stream) { @@ -203,7 +203,7 @@ hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t st queue = as_amd(reinterpret_cast(stream))->asHostQueue(); } - return ihipMemset(dst, value, sizeBytes, *queue, true); + HIP_RETURN(ihipMemset(dst, value, sizeBytes, *queue, true)); } hipError_t hipMemset(void* dst, int value, size_t sizeBytes) { @@ -212,7 +212,7 @@ hipError_t hipMemset(void* dst, int value, size_t sizeBytes) { hip::syncStreams(); amd::HostQueue* queue = hip::getNullStream(); - return ihipMemset(dst, value, sizeBytes, *queue); + HIP_RETURN(ihipMemset(dst, value, sizeBytes, *queue)); } hipError_t hipMemPtrGetInfo(void *ptr, size_t *size) { @@ -222,12 +222,12 @@ hipError_t hipMemPtrGetInfo(void *ptr, size_t *size) { amd::Memory* svmMem = getMemoryObject(ptr, offset); if (svmMem == nullptr) { - return hipErrorInvalidValue; + HIP_RETURN(hipErrorInvalidValue); } *size = svmMem->getSize(); - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t hipHostFree(void* ptr) { @@ -235,9 +235,9 @@ hipError_t hipHostFree(void* ptr) { if (amd::SvmBuffer::malloced(ptr)) { amd::SvmBuffer::free(*hip::getCurrentContext(), ptr); - return hipSuccess; + HIP_RETURN(hipSuccess); } - return hipErrorInvalidValue; + HIP_RETURN(hipErrorInvalidValue); } hipError_t hipFreeArray(hipArray* array) { @@ -245,9 +245,9 @@ hipError_t hipFreeArray(hipArray* array) { if (amd::SvmBuffer::malloced(array->data)) { amd::SvmBuffer::free(*hip::getCurrentContext(), array->data); - return hipSuccess; + HIP_RETURN(hipSuccess); } - return hipErrorInvalidValue; + HIP_RETURN(hipErrorInvalidValue); } hipError_t hipMemGetAddressRange(hipDeviceptr_t* pbase, size_t* psize, hipDeviceptr_t dptr) { @@ -259,13 +259,13 @@ hipError_t hipMemGetAddressRange(hipDeviceptr_t* pbase, size_t* psize, hipDevice amd::Memory* svmMem = getMemoryObject(ptr, offset); if (svmMem == nullptr) { - return hipErrorInvalidDevicePointer; + HIP_RETURN(hipErrorInvalidDevicePointer); } *pbase = svmMem->getSvmPtr(); *psize = svmMem->getSize(); - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t hipMemGetInfo(size_t* free, size_t* total) { @@ -274,17 +274,17 @@ hipError_t hipMemGetInfo(size_t* free, size_t* total) { size_t freeMemory[2]; amd::Device* device = hip::getCurrentContext()->devices()[0]; if(device == nullptr) { - return hipErrorInvalidDevice; + HIP_RETURN(hipErrorInvalidDevice); } if(!device->globalFreeMemory(freeMemory)) { - return hipErrorInvalidValue; + HIP_RETURN(hipErrorInvalidValue); } *free = freeMemory[0] * Ki; *total = device->info().globalMemSize_; - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t ihipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height, size_t depth, @@ -325,7 +325,7 @@ hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height HIP_INIT_API(ptr, pitch, width, height); const cl_image_format image_format = { CL_R, CL_UNSIGNED_INT8 }; - return ihipMallocPitch(ptr, pitch, width, height, 1, CL_MEM_OBJECT_IMAGE2D, &image_format); + HIP_RETURN(ihipMallocPitch(ptr, pitch, width, height, 1, CL_MEM_OBJECT_IMAGE2D, &image_format)); } hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent) { @@ -334,7 +334,7 @@ hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent) { size_t pitch = 0; if (pitchedDevPtr == nullptr) { - return hipErrorInvalidValue; + HIP_RETURN(hipErrorInvalidValue); } const cl_image_format image_format = { CL_R, CL_UNSIGNED_INT8 }; @@ -348,7 +348,7 @@ hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent) { pitchedDevPtr->ysize = extent.height; } - return status; + HIP_RETURN(status); } hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent) { @@ -360,14 +360,14 @@ hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent) hip::syncStreams(); amd::HostQueue* queue = hip::getNullStream(); - return ihipMemset(dst, value, sizeBytes, *queue); + HIP_RETURN(ihipMemset(dst, value, sizeBytes, *queue)); } hipError_t hipArrayCreate(hipArray** array, const HIP_ARRAY_DESCRIPTOR* pAllocateArray) { HIP_INIT_API(array, pAllocateArray); if (array[0]->width == 0) { - return hipErrorInvalidValue; + HIP_RETURN(hipErrorInvalidValue); } *array = (hipArray*)malloc(sizeof(hipArray)); @@ -393,7 +393,7 @@ hipError_t hipArrayCreate(hipArray** array, const HIP_ARRAY_DESCRIPTOR* pAllocat hipError_t status = ihipMallocPitch(ptr, &pitch, array[0]->width, array[0]->height, 1, CL_MEM_OBJECT_IMAGE2D, &image_format); - return status; + HIP_RETURN(status); } hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc, @@ -401,7 +401,7 @@ hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc, HIP_INIT_API(array, desc, width, height, flags); if (width == 0) { - return hipErrorInvalidValue; + HIP_RETURN(hipErrorInvalidValue); } *array = (hipArray*)malloc(sizeof(hipArray)); @@ -436,7 +436,7 @@ hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc, hipError_t status = ihipMallocPitch(ptr, &pitch, width, height, 1, CL_MEM_OBJECT_IMAGE2D, &image_format); - return status; + HIP_RETURN(status); } hipError_t hipMalloc3DArray(hipArray_t* array, const struct hipChannelFormatDesc* desc, @@ -475,7 +475,7 @@ hipError_t hipMalloc3DArray(hipArray_t* array, const struct hipChannelFormatDesc hipError_t status = ihipMallocPitch(ptr, &pitch, extent.width, extent.height, extent.depth, CL_MEM_OBJECT_IMAGE3D, &image_format); - return status; + HIP_RETURN(status); } hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) { @@ -483,19 +483,19 @@ hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) { if (flagsPtr == nullptr || hostPtr == nullptr) { - return hipErrorInvalidValue; + HIP_RETURN(hipErrorInvalidValue); } size_t offset = 0; amd::Memory* svmMem = getMemoryObject(hostPtr, offset); if (svmMem == nullptr) { - return hipErrorInvalidValue; + HIP_RETURN(hipErrorInvalidValue); } *flagsPtr = svmMem->getMemFlags() >> 16; - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t hipHostRegister(void* hostPtr, size_t sizeBytes, unsigned int flags) { @@ -506,12 +506,12 @@ hipError_t hipHostRegister(void* hostPtr, size_t sizeBytes, unsigned int flags) if (!mem->create(hostPtr)) { mem->release(); - return hipErrorMemoryAllocation; + HIP_RETURN(hipErrorMemoryAllocation); } amd::MemObjMap::AddMemObj(hostPtr, mem); - return hipSuccess; + HIP_RETURN(hipSuccess); } else { - return ihipMalloc(&hostPtr, sizeBytes, flags); + HIP_RETURN(ihipMalloc(&hostPtr, sizeBytes, flags)); } } @@ -522,7 +522,7 @@ hipError_t hipHostUnregister(void* hostPtr) { hip::syncStreams(); hip::getNullStream()->finish(); amd::SvmBuffer::free(*hip::getCurrentContext(), hostPtr); - return hipSuccess; + HIP_RETURN(hipSuccess); } else { size_t offset = 0; amd::Memory* mem = getMemoryObject(hostPtr, offset); @@ -532,16 +532,16 @@ hipError_t hipHostUnregister(void* hostPtr) { hip::getNullStream()->finish(); amd::MemObjMap::RemoveMemObj(hostPtr); mem->release(); - return hipSuccess; + HIP_RETURN(hipSuccess); } } - return hipErrorInvalidValue; + HIP_RETURN(hipErrorInvalidValue); } // Deprecated function: hipError_t hipHostAlloc(void** ptr, size_t sizeBytes, unsigned int flags) { - return ihipMalloc(ptr, sizeBytes, flags); + HIP_RETURN(ihipMalloc(ptr, sizeBytes, flags)); }; @@ -551,7 +551,7 @@ hipError_t hipMemcpyToSymbol(const void* symbolName, const void* src, size_t cou assert(0 && "Unimplemented"); - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } hipError_t hipMemcpyFromSymbol(void* dst, const void* symbolName, size_t count, @@ -560,7 +560,7 @@ hipError_t hipMemcpyFromSymbol(void* dst, const void* symbolName, size_t count, assert(0 && "Unimplemented"); - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } hipError_t hipMemcpyToSymbolAsync(const void* symbolName, const void* src, size_t count, @@ -569,7 +569,7 @@ hipError_t hipMemcpyToSymbolAsync(const void* symbolName, const void* src, size_ assert(0 && "Unimplemented"); - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolName, size_t count, @@ -578,7 +578,7 @@ hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolName, size_t co assert(0 && "Unimplemented"); - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void* src, size_t sizeBytes) { @@ -587,7 +587,7 @@ hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void* src, size_t sizeBytes) { hip::syncStreams(); amd::HostQueue* queue = hip::getNullStream(); - return ihipMemcpy(reinterpret_cast(dst), (const void*) src, sizeBytes, hipMemcpyHostToDevice, *queue); + HIP_RETURN(ihipMemcpy(reinterpret_cast(dst), (const void*) src, sizeBytes, hipMemcpyHostToDevice, *queue)); } hipError_t hipMemcpyDtoH(void* dst, hipDeviceptr_t src, size_t sizeBytes) { @@ -596,7 +596,7 @@ hipError_t hipMemcpyDtoH(void* dst, hipDeviceptr_t src, size_t sizeBytes) { hip::syncStreams(); amd::HostQueue* queue = hip::getNullStream(); - return ihipMemcpy(reinterpret_cast(dst), (const void*) src, sizeBytes, hipMemcpyDeviceToHost, *queue); + HIP_RETURN(ihipMemcpy(reinterpret_cast(dst), (const void*) src, sizeBytes, hipMemcpyDeviceToHost, *queue)); } hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes) { @@ -605,7 +605,7 @@ hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeByte hip::syncStreams(); amd::HostQueue* queue = hip::getNullStream(); - return ihipMemcpy(reinterpret_cast(dst), (const void*) src, sizeBytes, hipMemcpyDeviceToDevice, *queue); + HIP_RETURN(ihipMemcpy(reinterpret_cast(dst), (const void*) src, sizeBytes, hipMemcpyDeviceToDevice, *queue)); } hipError_t hipMemcpyHtoH(void* dst, void* src, size_t sizeBytes) { @@ -614,7 +614,7 @@ hipError_t hipMemcpyHtoH(void* dst, void* src, size_t sizeBytes) { hip::syncStreams(); amd::HostQueue* queue = hip::getNullStream(); - return ihipMemcpy(reinterpret_cast(dst), (const void*) src, sizeBytes, hipMemcpyHostToHost, *queue); + HIP_RETURN(ihipMemcpy(reinterpret_cast(dst), (const void*) src, sizeBytes, hipMemcpyHostToHost, *queue)); } hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, @@ -631,7 +631,7 @@ hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, queue = as_amd(reinterpret_cast(stream))->asHostQueue(); } - return ihipMemcpy(dst, src, sizeBytes, kind, *queue, true); + HIP_RETURN(ihipMemcpy(dst, src, sizeBytes, kind, *queue, true)); } @@ -649,8 +649,8 @@ hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, void* src, size_t sizeBytes, queue = as_amd(reinterpret_cast(stream))->asHostQueue(); } - return ihipMemcpy(reinterpret_cast(dst), (const void*) src, sizeBytes, hipMemcpyHostToDevice, - *queue, true); + HIP_RETURN(ihipMemcpy(reinterpret_cast(dst), (const void*) src, sizeBytes, hipMemcpyHostToDevice, + *queue, true)); } hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes, @@ -667,8 +667,8 @@ hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t siz queue = as_amd(reinterpret_cast(stream))->asHostQueue(); } - return ihipMemcpy(reinterpret_cast(dst), (const void*) src, sizeBytes, hipMemcpyDeviceToDevice, - *queue, true); + HIP_RETURN(ihipMemcpy(reinterpret_cast(dst), (const void*) src, sizeBytes, hipMemcpyDeviceToDevice, + *queue, true)); } hipError_t hipMemcpyDtoHAsync(void* dst, hipDeviceptr_t src, size_t sizeBytes, @@ -685,8 +685,8 @@ hipError_t hipMemcpyDtoHAsync(void* dst, hipDeviceptr_t src, size_t sizeBytes, queue = as_amd(reinterpret_cast(stream))->asHostQueue(); } - return ihipMemcpy(reinterpret_cast(dst), (const void*) src, sizeBytes, hipMemcpyDeviceToHost, - *queue, true); + HIP_RETURN(ihipMemcpy(reinterpret_cast(dst), (const void*) src, sizeBytes, hipMemcpyDeviceToHost, + *queue, true)); } hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy) { @@ -694,7 +694,7 @@ hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy) { assert(0 && "Unimplemented"); - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } hipError_t ihipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, @@ -763,7 +763,7 @@ hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, hip::syncStreams(); amd::HostQueue* queue = hip::getNullStream(); - return ihipMemcpy2D(dst, dpitch, src, spitch, width, height, kind, *queue); + HIP_RETURN(ihipMemcpy2D(dst, dpitch, src, spitch, width, height, kind, *queue)); } @@ -781,7 +781,7 @@ hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t sp queue = as_amd(reinterpret_cast(stream))->asHostQueue(); } - return ihipMemcpy2D(dst, dpitch, src, spitch, width, height, kind, *queue, true); + HIP_RETURN(ihipMemcpy2D(dst, dpitch, src, spitch, width, height, kind, *queue, true)); } hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src, @@ -789,7 +789,7 @@ hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, con HIP_INIT_API(dst, wOffset, hOffset, src, spitch, width, height, kind); if (dst->data == nullptr) { - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } hip::syncStreams(); @@ -816,7 +816,7 @@ hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, con } if ((wOffset + width > (dpitch)) || width > spitch) { - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } // Create buffer rectangle info structure @@ -836,7 +836,7 @@ hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, con if (!srcRect.create(sOrigin, region, spitch, src_slice_pitch) || !dstRect.create(dOrigin, region, dpitch, dst_slice_pitch)) { - return hipErrorInvalidValue; + HIP_RETURN(hipErrorInvalidValue); } amd::Command* command = nullptr; @@ -855,7 +855,7 @@ hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, con void* pSrc = reinterpret_cast(reinterpret_cast(src) + y * spitch); memcpy(pDst, pSrc, width); } - return hipSuccess; + HIP_RETURN(hipSuccess); } else if ((srcMemory == nullptr) && (dstMemory != nullptr)) { command = new amd::WriteMemoryCommand(*queue, CL_COMMAND_WRITE_BUFFER_RECT, waitList, *dstMemory->asBuffer(), dstStart, size, src, dstRect, srcRect); @@ -868,14 +868,14 @@ hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, con } if (command == nullptr) { - return hipErrorOutOfMemory; + HIP_RETURN(hipErrorOutOfMemory); } command->enqueue(); command->awaitCompletion(); command->release(); - return hipSuccess; + HIP_RETURN(hipSuccess); } @@ -909,14 +909,14 @@ hipError_t hipMemcpyToArray(hipArray* dstArray, size_t wOffset, size_t hOffset, } if (command == nullptr) { - return hipErrorOutOfMemory; + HIP_RETURN(hipErrorOutOfMemory); } command->enqueue(); command->awaitCompletion(); command->release(); - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t srcArray, size_t wOffset, size_t hOffset, @@ -949,14 +949,14 @@ hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t srcArray, size_t wOffs } if (command == nullptr) { - return hipErrorOutOfMemory; + HIP_RETURN(hipErrorOutOfMemory); } command->enqueue(); command->awaitCompletion(); command->release(); - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t hipMemcpyHtoA(hipArray* dstArray, size_t dstOffset, const void* srcHost, size_t count) { @@ -986,14 +986,14 @@ hipError_t hipMemcpyHtoA(hipArray* dstArray, size_t dstOffset, const void* srcHo } if (command == nullptr) { - return hipErrorOutOfMemory; + HIP_RETURN(hipErrorOutOfMemory); } command->enqueue(); command->awaitCompletion(); command->release(); - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t hipMemcpyAtoH(void* dst, hipArray* srcArray, size_t srcOffset, size_t count) { @@ -1023,14 +1023,14 @@ hipError_t hipMemcpyAtoH(void* dst, hipArray* srcArray, size_t srcOffset, size_t } if (command == nullptr) { - return hipErrorOutOfMemory; + HIP_RETURN(hipErrorOutOfMemory); } command->enqueue(); command->awaitCompletion(); command->release(); - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) { @@ -1108,7 +1108,7 @@ hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) { if (!srcRect.create(srcOrigin, region, srcPitchInBytes, src_slice_pitch) || !dstRect.create(dstOrigin, region, dstPitchInbytes, dst_slice_pitch)) { - return hipErrorInvalidValue; + HIP_RETURN(hipErrorInvalidValue); } hipMemcpyKind kind = p->kind; @@ -1123,7 +1123,7 @@ hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) { if (((srcMemory == nullptr) && (dstMemory == nullptr)) || (kind == hipMemcpyHostToHost)) { memcpy(dstPtr, srcPtr, region[0] * region[1] * region[2]); - return hipSuccess; + HIP_RETURN(hipSuccess); } else if ((srcMemory == nullptr) && (dstMemory != nullptr)) { command = new amd::WriteMemoryCommand(*queue, CL_COMMAND_WRITE_BUFFER_RECT, waitList, *dstMemory->asBuffer(), srcStart, size, srcPtr, srcRect, dstRect); @@ -1137,14 +1137,14 @@ hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) { } if (command == nullptr) { - return hipErrorOutOfMemory; + HIP_RETURN(hipErrorOutOfMemory); } command->enqueue(); command->awaitCompletion(); command->release(); - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t ihipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height, @@ -1192,7 +1192,7 @@ hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t hip::syncStreams(); amd::HostQueue* queue = hip::getNullStream(); - return ihipMemset2D(dst, pitch, value, width, height, *queue); + HIP_RETURN(ihipMemset2D(dst, pitch, value, width, height, *queue)); } hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value, @@ -1208,14 +1208,14 @@ hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value, queue = as_amd(reinterpret_cast(stream))->asHostQueue(); } - return ihipMemset2D(dst, pitch, value, width, height, *queue, true); + HIP_RETURN(ihipMemset2D(dst, pitch, value, width, height, *queue, true)); } hipError_t hipMemsetD8(hipDeviceptr_t dst, unsigned char value, size_t sizeBytes) { HIP_INIT_API(dst, value, sizeBytes); if (dst == nullptr) { - return hipErrorInvalidValue; + HIP_RETURN(hipErrorInvalidValue); } hip::syncStreams(); @@ -1232,7 +1232,7 @@ hipError_t hipMemsetD8(hipDeviceptr_t dst, unsigned char value, size_t sizeBytes &value, sizeof(char), fillOffset, fillSize); if (command == nullptr) { - return hipErrorOutOfMemory; + HIP_RETURN(hipErrorOutOfMemory); } command->enqueue(); @@ -1243,7 +1243,7 @@ hipError_t hipMemsetD8(hipDeviceptr_t dst, unsigned char value, size_t sizeBytes memset(dst, value, sizeBytes); } - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* devPtr) { @@ -1251,7 +1251,7 @@ hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* devPtr) { assert(0 && "Unimplemented"); - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } hipError_t hipIpcOpenMemHandle(void** devPtr, hipIpcMemHandle_t handle, unsigned int flags) { @@ -1259,7 +1259,7 @@ hipError_t hipIpcOpenMemHandle(void** devPtr, hipIpcMemHandle_t handle, unsigned assert(0 && "Unimplemented"); - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } hipError_t hipIpcCloseMemHandle(void* devPtr) { @@ -1267,7 +1267,7 @@ hipError_t hipIpcCloseMemHandle(void* devPtr) { assert(0 && "Unimplemented"); - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, hipChannelFormatKind f) { @@ -1287,11 +1287,11 @@ hipError_t hipHostGetDevicePointer(void** devicePointer, void* hostPointer, unsi amd::Memory* memObj = getMemoryObject(hostPointer, offset); if (!memObj) { - return hipErrorInvalidValue; + HIP_RETURN(hipErrorInvalidValue); } *devicePointer = reinterpret_cast(memObj->getDeviceMemory(*hip::getCurrentContext()->devices()[0])->virtualAddress() + offset); - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t hipPointerGetAttributes(hipPointerAttribute_t* attributes, const void* ptr) { @@ -1325,5 +1325,5 @@ hipError_t hipPointerGetAttributes(hipPointerAttribute_t* attributes, const void attributes->allocationFlags = 0; } - return hipSuccess; + HIP_RETURN(hipSuccess); } diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index 67c4f06c6c..9830a97dfb 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -56,18 +56,18 @@ hipError_t hipModuleLoad(hipModule_t *module, const char *fname) HIP_INIT_API(module, fname); if (!fname) { - return hipErrorInvalidValue; + HIP_RETURN(hipErrorInvalidValue); } std::ifstream file{fname}; if (!file.is_open()) { - return hipErrorFileNotFound; + HIP_RETURN(hipErrorFileNotFound); } std::vector tmp{std::istreambuf_iterator{file}, std::istreambuf_iterator{}}; - return ihipModuleLoadData(module, tmp.data()); + HIP_RETURN(ihipModuleLoadData(module, tmp.data())); } @@ -76,21 +76,21 @@ hipError_t hipModuleUnload(hipModule_t hmod) HIP_INIT_API(hmod); if (hmod == nullptr) { - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } amd::Program* program = as_amd(reinterpret_cast(hmod)); program->release(); - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t hipModuleLoadData(hipModule_t *module, const void *image) { HIP_INIT_API(module, image); - return ihipModuleLoadData(module, image); + HIP_RETURN(ihipModuleLoadData(module, image)); } hipError_t ihipModuleLoadData(hipModule_t *module, const void *image) @@ -118,24 +118,24 @@ hipError_t hipModuleGetFunction(hipFunction_t *hfunc, hipModule_t hmod, const ch const amd::Symbol* symbol = program->findSymbol(name); if (!symbol) { - return hipErrorNotFound; + HIP_RETURN(hipErrorNotFound); } amd::Kernel* kernel = new amd::Kernel(*program, *symbol, name); if (!kernel) { - return hipErrorOutOfMemory; + HIP_RETURN(hipErrorOutOfMemory); } *hfunc = reinterpret_cast(as_cl(kernel)); - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func) { HIP_INIT_API(attr, func); - return hipErrorInvalidDeviceFunction; + HIP_RETURN(hipErrorInvalidDeviceFunction); } @@ -242,9 +242,9 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, uint32_t sharedMemBytes, hipStream_t hStream, void **kernelParams, void **extra) { - return ihipModuleLaunchKernel(f, gridDimX * blockDimX, gridDimY * blockDimY, gridDimZ * blockDimZ, + HIP_RETURN(ihipModuleLaunchKernel(f, gridDimX * blockDimX, gridDimY * blockDimY, gridDimZ * blockDimZ, blockDimX, blockDimY, blockDimZ, - sharedMemBytes, hStream, kernelParams, extra, nullptr, nullptr); + sharedMemBytes, hStream, kernelParams, extra, nullptr, nullptr)); } hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t gridDimX, @@ -255,8 +255,8 @@ hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t gridDimX, hipEvent_t startEvent, hipEvent_t stopEvent) { - return ihipModuleLaunchKernel(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, - sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent); + HIP_RETURN(ihipModuleLaunchKernel(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, + sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent)); } hipError_t hipModuleLaunchKernelExt(hipFunction_t f, uint32_t gridDimX, @@ -267,8 +267,8 @@ hipError_t hipModuleLaunchKernelExt(hipFunction_t f, uint32_t gridDimX, hipEvent_t startEvent, hipEvent_t stopEvent) { - return ihipModuleLaunchKernel(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, - sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent); + HIP_RETURN(ihipModuleLaunchKernel(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, + sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent)); } diff --git a/api/hip/hip_peer.cpp b/api/hip/hip_peer.cpp index ad552e94b4..73919a09ea 100644 --- a/api/hip/hip_peer.cpp +++ b/api/hip/hip_peer.cpp @@ -29,7 +29,7 @@ hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, hipCtx_t thisCtx, hipCtx_t assert(0 && "Unimplemented"); - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } hipError_t hipMemcpyPeer(void* dst, hipCtx_t dstCtx, const void* src, hipCtx_t srcCtx, @@ -38,7 +38,7 @@ hipError_t hipMemcpyPeer(void* dst, hipCtx_t dstCtx, const void* src, hipCtx_t s assert(0 && "Unimplemented"); - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } hipError_t hipMemcpyPeerAsync(void* dst, hipCtx_t dstDevice, const void* src, hipCtx_t srcDevice, @@ -47,7 +47,7 @@ hipError_t hipMemcpyPeerAsync(void* dst, hipCtx_t dstDevice, const void* src, hi assert(0 && "Unimplemented"); - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, int deviceId, int peerDeviceId) { @@ -55,7 +55,7 @@ hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, int deviceId, int peerDevi assert(0 && "Unimplemented"); - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } hipError_t hipDeviceDisablePeerAccess(int peerDeviceId) { @@ -63,7 +63,7 @@ hipError_t hipDeviceDisablePeerAccess(int peerDeviceId) { assert(0 && "Unimplemented"); - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } hipError_t hipDeviceEnablePeerAccess(int peerDeviceId, unsigned int flags) { @@ -71,7 +71,7 @@ hipError_t hipDeviceEnablePeerAccess(int peerDeviceId, unsigned int flags) { assert(0 && "Unimplemented"); - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } hipError_t hipMemcpyPeer(void* dst, int dstDevice, const void* src, int srcDevice, @@ -80,7 +80,7 @@ hipError_t hipMemcpyPeer(void* dst, int dstDevice, const void* src, int srcDevic assert(0 && "Unimplemented"); - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } hipError_t hipMemcpyPeerAsync(void* dst, int dstDevice, const void* src, int srcDevice, @@ -89,7 +89,7 @@ hipError_t hipMemcpyPeerAsync(void* dst, int dstDevice, const void* src, int src assert(0 && "Unimplemented"); - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags) { @@ -97,7 +97,7 @@ hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags) { assert(0 && "Unimplemented"); - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } hipError_t hipCtxDisablePeerAccess(hipCtx_t peerCtx) { @@ -105,5 +105,5 @@ hipError_t hipCtxDisablePeerAccess(hipCtx_t peerCtx) { assert(0 && "Unimplemented"); - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } \ No newline at end of file diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index a6bb4e290c..2adf74f7ce 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -252,7 +252,7 @@ extern "C" hipError_t hipConfigureCall( PlatformState::instance().configureCall(gridDim, blockDim, sharedMem, stream); - return hipSuccess; + HIP_RETURN(hipSuccess); } extern "C" hipError_t hipSetupArgument( @@ -264,7 +264,7 @@ extern "C" hipError_t hipSetupArgument( PlatformState::instance().setupArgument(arg, size, offset); - return hipSuccess; + HIP_RETURN(hipSuccess); } extern "C" hipError_t hipLaunchByPtr(const void *hostFunction) @@ -272,8 +272,9 @@ extern "C" hipError_t hipLaunchByPtr(const void *hostFunction) HIP_INIT_API(hostFunction); hipFunction_t func = PlatformState::instance().getFunc(hostFunction); - if (func == nullptr) - return hipErrorUnknown; + if (func == nullptr) { + HIP_RETURN(hipErrorUnknown); + } ihipExec_t exec; PlatformState::instance().popExec(exec); @@ -284,10 +285,10 @@ extern "C" hipError_t hipLaunchByPtr(const void *hostFunction) HIP_LAUNCH_PARAM_END }; - return hipModuleLaunchKernel(func, + HIP_RETURN(hipModuleLaunchKernel(func, exec.gridDim_.x, exec.gridDim_.y, exec.gridDim_.z, exec.blockDim_.x, exec.blockDim_.y, exec.blockDim_.z, - exec.sharedMem_, exec.hStream_, nullptr, extra); + exec.sharedMem_, exec.hStream_, nullptr, extra)); } #if defined(ATI_OS_LINUX) @@ -514,7 +515,7 @@ static inline std::uint32_t f32_as_u32(float f) { union { float f; std::uint32_t static inline float u32_as_f32(std::uint32_t u) { union { float f; std::uint32_t u; } v; v.u = u; return v.f; } static inline int clamp_int(int i, int l, int h) { return std::min(std::max(i, l), h); } -// half � float, the f16 is in the low 16 bits of the input argument �a� +// half float, the f16 is in the low 16 bits of the input argument static inline float __convert_half_to_float(std::uint32_t a) noexcept { std::uint32_t u = ((a << 13) + 0x70000000U) & 0x8fffe000U; std::uint32_t v = f32_as_u32(u32_as_f32(u) * 0x1.0p+112f) + 0x38000000U; @@ -522,7 +523,7 @@ static inline float __convert_half_to_float(std::uint32_t a) noexcept { return u32_as_f32(u) * 0x1.0p-112f; } -// float � half with nearest even rounding +// float half with nearest even rounding // The lower 16 bits of the result is the bit pattern for the f16 static inline std::uint32_t __convert_float_to_half(float a) noexcept { std::uint32_t u = f32_as_u32(a); diff --git a/api/hip/hip_profile.cpp b/api/hip/hip_profile.cpp index d53d7ffd46..8b3d42cbbc 100644 --- a/api/hip/hip_profile.cpp +++ b/api/hip/hip_profile.cpp @@ -29,7 +29,7 @@ hipError_t hipProfilerStart() { assert(0 && "Unimplemented"); - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } @@ -38,5 +38,5 @@ hipError_t hipProfilerStop() { assert(0 && "Unimplemented"); - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } \ No newline at end of file diff --git a/api/hip/hip_stream.cpp b/api/hip/hip_stream.cpp index c48eca87ae..a7221b0249 100644 --- a/api/hip/hip_stream.cpp +++ b/api/hip/hip_stream.cpp @@ -68,13 +68,13 @@ static hipError_t ihipStreamCreateWithFlags(hipStream_t *stream, unsigned int fl hipError_t hipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags) { HIP_INIT_API(stream, flags); - return ihipStreamCreateWithFlags(stream, flags); + HIP_RETURN(ihipStreamCreateWithFlags(stream, flags)); } hipError_t hipStreamCreate(hipStream_t *stream) { HIP_INIT_API(stream); - return ihipStreamCreateWithFlags(stream, hipStreamDefault); + HIP_RETURN(ihipStreamCreateWithFlags(stream, hipStreamDefault)); } hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags) { @@ -86,10 +86,10 @@ hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags) { if(flags != nullptr) { *flags = (it == streamSet.end()) ? hipStreamNonBlocking : hipStreamDefault; } else { - return hipErrorInvalidValue; + HIP_RETURN(hipErrorInvalidValue); } - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t hipStreamSynchronize(hipStream_t stream) { @@ -108,19 +108,19 @@ hipError_t hipStreamSynchronize(hipStream_t stream) { } if (hostQueue == nullptr) { - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } hostQueue->finish(); - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t hipStreamDestroy(hipStream_t stream) { HIP_INIT_API(stream); if (stream == nullptr) { - return hipErrorInvalidResourceHandle; + HIP_RETURN(hipErrorInvalidResourceHandle); } amd::ScopedLock lock(streamSetLock); @@ -129,14 +129,14 @@ hipError_t hipStreamDestroy(hipStream_t stream) { hostQueue->release(); streamSet.erase(hostQueue); - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags) { HIP_INIT_API(stream, event, flags); if (stream == nullptr || event == nullptr) { - return hipErrorInvalidResourceHandle; + HIP_RETURN(hipErrorInvalidResourceHandle); } amd::HostQueue* hostQueue = as_amd(reinterpret_cast(stream))->asHostQueue(); @@ -146,17 +146,17 @@ hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int amd::Command::EventWaitList eventWaitList; cl_int err = amd::clSetEventWaitList(eventWaitList, *hostQueue, 1, &clEvent); if (err != CL_SUCCESS) { - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } amd::Command* command = new amd::Marker(*hostQueue, true, eventWaitList); if (command == NULL) { - return hipErrorOutOfMemory; + HIP_RETURN(hipErrorOutOfMemory); } command->enqueue(); command->release(); - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t hipStreamQuery(hipStream_t stream) { @@ -164,7 +164,7 @@ hipError_t hipStreamQuery(hipStream_t stream) { assert(0 && "Unimplemented"); - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, void* userData, @@ -173,7 +173,7 @@ hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback assert(0 && "Unimplemented"); - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } diff --git a/api/hip/hip_surface.cpp b/api/hip/hip_surface.cpp index ecbd9e60b9..8cf7e8f35d 100644 --- a/api/hip/hip_surface.cpp +++ b/api/hip/hip_surface.cpp @@ -36,7 +36,7 @@ hipError_t hipCreateSurfaceObject(hipSurfaceObject_t* pSurfObject, assert(0 && "Unimplemented"); - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } @@ -45,5 +45,5 @@ hipError_t hipDestroySurfaceObject(hipSurfaceObject_t surfaceObject) { assert(0 && "Unimplemented"); - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } \ No newline at end of file diff --git a/api/hip/hip_texture.cpp b/api/hip/hip_texture.cpp index f8bdbe1c89..b3b5c16c10 100644 --- a/api/hip/hip_texture.cpp +++ b/api/hip/hip_texture.cpp @@ -145,7 +145,7 @@ hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, const hipResou amd::Device* device = hip::getCurrentContext()->devices()[0]; if (!device->info().imageSupport_) { - return hipErrorInvalidValue; + HIP_RETURN(hipErrorInvalidValue); } amd::Image* image = nullptr; @@ -203,11 +203,11 @@ hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, const hipResou pResDesc->res.pitch2D.width, pResDesc->res.pitch2D.height, 1, pResDesc->res.pitch2D.pitchInBytes, 0); break; - default: return hipErrorInvalidValue; + default: HIP_RETURN(hipErrorInvalidValue); } *pTexObject = reinterpret_cast(as_cl(image)); - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject) { @@ -215,7 +215,7 @@ hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject) { as_amd(reinterpret_cast(textureObject))->release(); - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t hipGetTextureObjectResourceDesc(hipResourceDesc* pResDesc, @@ -224,7 +224,7 @@ hipError_t hipGetTextureObjectResourceDesc(hipResourceDesc* pResDesc, assert(0 && "Unimplemented"); - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } hipError_t hipGetTextureObjectResourceViewDesc(hipResourceViewDesc* pResViewDesc, @@ -233,7 +233,7 @@ hipError_t hipGetTextureObjectResourceViewDesc(hipResourceViewDesc* pResViewDesc assert(0 && "Unimplemented"); - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } hipError_t hipGetTextureObjectTextureDesc(hipTextureDesc* pTexDesc, @@ -242,7 +242,7 @@ hipError_t hipGetTextureObjectTextureDesc(hipTextureDesc* pTexDesc, assert(0 && "Unimplemented"); - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } hipError_t ihipBindTexture(cl_mem_object_type type, @@ -282,14 +282,14 @@ hipError_t hipBindTexture(size_t* offset, textureReference* tex, const void* dev HIP_INIT_API(offset, tex, devPtr, desc, size); if (desc == nullptr) { - return hipErrorInvalidValue; + HIP_RETURN(hipErrorInvalidValue); } cl_image_format image_format; getChannelOrderAndType(*desc, hipReadModeElementType, &image_format.image_channel_order, &image_format.image_channel_data_type); const amd::Image::Format imageFormat(image_format); - return ihipBindTexture(CL_MEM_OBJECT_IMAGE1D, offset, tex, devPtr, desc, size / imageFormat.getElementSize(), 1, size); + HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE1D, offset, tex, devPtr, desc, size / imageFormat.getElementSize(), 1, size)); } hipError_t hipBindTexture2D(size_t* offset, textureReference* tex, const void* devPtr, @@ -297,7 +297,7 @@ hipError_t hipBindTexture2D(size_t* offset, textureReference* tex, const void* d size_t pitch) { HIP_INIT_API(offset, tex, devPtr, desc, width, height, pitch); - return ihipBindTexture(CL_MEM_OBJECT_IMAGE2D, offset, tex, devPtr, desc, width, height, pitch); + HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE2D, offset, tex, devPtr, desc, width, height, pitch)); } hipError_t hipBindTextureToArray(textureReference* tex, hipArray_const_t array, @@ -306,7 +306,7 @@ hipError_t hipBindTextureToArray(textureReference* tex, hipArray_const_t array, assert(0 && "Unimplemented"); - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } hipError_t ihipBindTextureToArrayImpl(int dim, enum hipTextureReadMode readMode, @@ -325,7 +325,7 @@ hipError_t hipBindTextureToMipmappedArray(textureReference* tex, assert(0 && "Unimplemented"); - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } hipError_t hipUnbindTexture(const textureReference* tex) { @@ -333,7 +333,7 @@ hipError_t hipUnbindTexture(const textureReference* tex) { as_amd(reinterpret_cast(tex->textureObject))->release(); - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_const_t array) { @@ -341,7 +341,7 @@ hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_const_t array) assert(0 && "Unimplemented"); - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } hipError_t hipGetTextureAlignmentOffset(size_t* offset, const textureReference* tex) { @@ -349,7 +349,7 @@ hipError_t hipGetTextureAlignmentOffset(size_t* offset, const textureReference* assert(0 && "Unimplemented"); - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } hipError_t hipGetTextureReference(const textureReference** tex, const void* symbol) { @@ -357,56 +357,56 @@ hipError_t hipGetTextureReference(const textureReference** tex, const void* symb assert(0 && "Unimplemented"); - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } hipError_t hipTexRefSetFormat(textureReference* tex, hipArray_Format fmt, int NumPackedComponents) { HIP_INIT_API(tex, fmt, NumPackedComponents); if (tex == nullptr) { - return hipErrorInvalidImage; + HIP_RETURN(hipErrorInvalidImage); } tex->format = fmt; tex->numChannels = NumPackedComponents; - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t hipTexRefSetFlags(textureReference* tex, unsigned int flags) { HIP_INIT_API(tex, flags); if (tex == nullptr) { - return hipErrorInvalidImage; + HIP_RETURN(hipErrorInvalidImage); } tex->normalized = flags; - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t hipTexRefSetFilterMode(textureReference* tex, hipTextureFilterMode fm) { HIP_INIT_API(tex, fm); if (tex == nullptr) { - return hipErrorInvalidImage; + HIP_RETURN(hipErrorInvalidImage); } tex->filterMode = fm; - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t hipTexRefSetAddressMode(textureReference* tex, int dim, hipTextureAddressMode am) { HIP_INIT_API(tex, dim, am); if (tex == nullptr) { - return hipErrorInvalidImage; + HIP_RETURN(hipErrorInvalidImage); } tex->addressMode[dim] = am; - return hipSuccess; + HIP_RETURN(hipSuccess); } hipError_t hipTexRefSetArray(textureReference* tex, hipArray_const_t array, unsigned int flags) { @@ -414,7 +414,7 @@ hipError_t hipTexRefSetArray(textureReference* tex, hipArray_const_t array, unsi assert(0 && "Unimplemented"); - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } hipError_t hipTexRefSetAddress(size_t* offset, textureReference* tex, hipDeviceptr_t devPtr, @@ -422,7 +422,7 @@ hipError_t hipTexRefSetAddress(size_t* offset, textureReference* tex, hipDevicep HIP_INIT_API(offset, tex, devPtr, size); if (tex == nullptr) { - return hipErrorInvalidImage; + HIP_RETURN(hipErrorInvalidImage); } cl_image_format image_format; @@ -430,7 +430,7 @@ hipError_t hipTexRefSetAddress(size_t* offset, textureReference* tex, hipDevicep &image_format.image_channel_order, &image_format.image_channel_data_type); const amd::Image::Format imageFormat(image_format); - return ihipBindTexture(CL_MEM_OBJECT_IMAGE1D, offset, tex, devPtr, nullptr, size / imageFormat.getElementSize(), 1, size); + HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE1D, offset, tex, devPtr, nullptr, size / imageFormat.getElementSize(), 1, size)); } hipError_t hipTexRefSetAddress2D(textureReference* tex, const HIP_ARRAY_DESCRIPTOR* desc, @@ -438,9 +438,9 @@ hipError_t hipTexRefSetAddress2D(textureReference* tex, const HIP_ARRAY_DESCRIPT HIP_INIT_API(tex, desc, devPtr, pitch); if (desc == nullptr) { - return hipErrorInvalidValue; + HIP_RETURN(hipErrorInvalidValue); } size_t offset; - return ihipBindTexture(CL_MEM_OBJECT_IMAGE2D, &offset, tex, devPtr, nullptr, desc->width, desc->height, pitch); + HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE2D, &offset, tex, devPtr, nullptr, desc->width, desc->height, pitch)); } From c998b3c3e279d36777e37628ea51f1404dde2e06 Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 15 Aug 2018 14:13:08 -0400 Subject: [PATCH 088/282] P4 to Git Change 1594075 by skudchad@skudchad-lnx on 2018/08/15 14:00:33 SWDEV-145570 - [HIP] Fix some test failures Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device_runtime.cpp#12 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#14 edit --- api/hip/hip_device_runtime.cpp | 4 +++- api/hip/hip_internal.hpp | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/api/hip/hip_device_runtime.cpp b/api/hip/hip_device_runtime.cpp index 87a149a10a..f5e75d316b 100644 --- a/api/hip/hip_device_runtime.cpp +++ b/api/hip/hip_device_runtime.cpp @@ -155,7 +155,9 @@ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) //FIXME: should we cache the props, or just select from deviceHandle->info_? hipDeviceProp_t prop = {0}; hipError_t err = hipGetDeviceProperties(&prop, device); - if (err != hipSuccess) HIP_RETURN(err); + if (err != hipSuccess) { + HIP_RETURN(err); + } switch (attr) { case hipDeviceAttributeMaxThreadsPerBlock: diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index a8a9c3eeb3..472051a379 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -67,7 +67,7 @@ extern amd::Memory* getMemoryObject(const void* ptr, size_t& offset); #define HIP_RETURN(ret) \ hip::g_lastError = ret; \ - return ret; \ + return hip::g_lastError; \ #endif // HIP_SRC_HIP_INTERNAL_H From 7f80b1c6bcbc78ee1c1cbb7ef24050d5caf25bf4 Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 17 Aug 2018 14:28:54 -0400 Subject: [PATCH 089/282] P4 to Git Change 1595124 by skudchad@skudchad_test2_win_opencl on 2018/08/17 14:05:25 SWDEV-145570 - [HIP] Implement environment variables and subsequent changes for HIP. This gets hipEnvVar passing ReviewBoardURL = http://ocltc.amd.com/reviews/r/15641/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device.cpp#15 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#224 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#310 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#104 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.cpp#95 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.hpp#297 edit --- api/hip/hip_device.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/api/hip/hip_device.cpp b/api/hip/hip_device.cpp index 30ad2f3af1..4da7e42f4c 100644 --- a/api/hip/hip_device.cpp +++ b/api/hip/hip_device.cpp @@ -96,8 +96,10 @@ hipError_t ihipDeviceGetCount(int* count) { return hipErrorInvalidValue; } + auto* deviceHandle = g_devices[0]->devices()[0]; + // Get all available devices - *count = g_devices.size(); + *count = deviceHandle->isOrdinalValid() ? g_devices.size() : 0; return hipSuccess; } From 095572b7d5a9a0e22133532a240a13ba36a5be76 Mon Sep 17 00:00:00 2001 From: foreman Date: Mon, 20 Aug 2018 18:48:00 -0400 Subject: [PATCH 090/282] P4 to Git Change 1596023 by skudchad@skudchad_test2_win_opencl on 2018/08/20 18:40:38 SWDEV-145570 - [HIP] Fixes to HIP env var logic ReviewBoardURL = http://ocltc.amd.com/reviews/r/15653/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device.cpp#16 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#15 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#18 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#225 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#311 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#105 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.cpp#96 edit --- api/hip/hip_device.cpp | 4 +--- api/hip/hip_internal.hpp | 7 +++---- api/hip/hip_platform.cpp | 3 +++ 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/api/hip/hip_device.cpp b/api/hip/hip_device.cpp index 4da7e42f4c..30ad2f3af1 100644 --- a/api/hip/hip_device.cpp +++ b/api/hip/hip_device.cpp @@ -96,10 +96,8 @@ hipError_t ihipDeviceGetCount(int* count) { return hipErrorInvalidValue; } - auto* deviceHandle = g_devices[0]->devices()[0]; - // Get all available devices - *count = deviceHandle->isOrdinalValid() ? g_devices.size() : 0; + *count = g_devices.size(); return hipSuccess; } diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index 472051a379..325f0b7cd3 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -29,10 +29,9 @@ THE SOFTWARE. #include #define HIP_INIT() \ - std::call_once(hip::g_ihipInitialized, hip::init); \ - assert(g_devices.size() > 0); \ - if (hip::g_context == nullptr) { \ - hip::g_context = g_devices[0]; \ + std::call_once(hip::g_ihipInitialized, hip::init); \ + if (hip::g_context == nullptr && g_devices.size() > 0) { \ + hip::g_context = g_devices[0]; \ } // This macro should be called at the beginning of every HIP API. diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index 2adf74f7ce..ba0d50865d 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -59,6 +59,9 @@ extern "C" hipModule_t __hipRegisterFatBinary(const void* data) { HIP_INIT(); + if(g_devices.empty()) { + return nullptr; + } const __CudaFatBinaryWrapper* fbwrapper = reinterpret_cast(data); if (fbwrapper->magic != __hipFatMAGIC2 || fbwrapper->version != 1) { return nullptr; From 28ce8a8a89ef4cc24d39e7b393bcc972bef8486a Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 21 Aug 2018 18:29:16 -0400 Subject: [PATCH 091/282] P4 to Git Change 1596634 by cpaquot@cpaquot-ocl-lc-lnx on 2018/08/21 18:07:55 SWDEV-145570 - [HIP] Implmented hipSurfaceCreate/Destroy APIs Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_surface.cpp#3 edit --- api/hip/hip_surface.cpp | 51 +++++++++++++++++++++++++++++++++++------ 1 file changed, 44 insertions(+), 7 deletions(-) diff --git a/api/hip/hip_surface.cpp b/api/hip/hip_surface.cpp index 8cf7e8f35d..2c1f2b9132 100644 --- a/api/hip/hip_surface.cpp +++ b/api/hip/hip_surface.cpp @@ -25,25 +25,62 @@ THE SOFTWARE. #include "hip_internal.hpp" #include +namespace hip { + +static amd::Monitor surfaceLock("Guards surface objects"); + struct hipSurface { - hipArray* array; - hipResourceDesc resDesc; + hipSurface(const hipResourceDesc* pResDesc): array(0) + { + memcpy((void*)&resDesc, (void*)pResDesc, sizeof(hipResourceDesc)); + } + + hipArray* array; + hipResourceDesc resDesc; }; +static std::map surfaceHash; + +}; + +using namespace hip; + hipError_t hipCreateSurfaceObject(hipSurfaceObject_t* pSurfObject, const hipResourceDesc* pResDesc) { HIP_INIT_API(pSurfObject, pResDesc); - assert(0 && "Unimplemented"); + hipSurface* pSurface = new hipSurface(pResDesc); + assert(pSurface != nullptr); - HIP_RETURN(hipErrorUnknown); + switch (pResDesc->resType) { + case hipResourceTypeArray: + pSurface->array = pResDesc->res.array.array; + break; + default: + break; + } + unsigned int* surfObj; + hipMalloc((void**)&surfObj, sizeof(hipArray)); + hipMemcpy(surfObj, (void*)pResDesc->res.array.array, sizeof(hipArray), + hipMemcpyHostToDevice); + *pSurfObject = (hipSurfaceObject_t)surfObj; + + amd::ScopedLock lock(surfaceLock); + surfaceHash[*pSurfObject] = pSurface; + + HIP_RETURN(hipSuccess); } hipError_t hipDestroySurfaceObject(hipSurfaceObject_t surfaceObject) { HIP_INIT_API(surfaceObject); - assert(0 && "Unimplemented"); + amd::ScopedLock lock(surfaceLock); + hipSurface* pSurface = surfaceHash[surfaceObject]; + if (pSurface != nullptr) { + delete pSurface; + surfaceHash.erase(surfaceObject); + } - HIP_RETURN(hipErrorUnknown); -} \ No newline at end of file + HIP_RETURN(hipSuccess); +} From e52fbca4a14068e723d400722aa0a84cd09c2774 Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 22 Aug 2018 18:42:22 -0400 Subject: [PATCH 092/282] P4 to Git Change 1597160 by cpaquot@cpaquot-ocl-lc-lnx on 2018/08/22 18:31:15 SWDEV-145570 - [HIP] Implemented hipSurfaceCreate/Destroy APIs. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_surface.cpp#4 edit --- api/hip/hip_surface.cpp | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/api/hip/hip_surface.cpp b/api/hip/hip_surface.cpp index 2c1f2b9132..2323209cae 100644 --- a/api/hip/hip_surface.cpp +++ b/api/hip/hip_surface.cpp @@ -30,16 +30,16 @@ namespace hip { static amd::Monitor surfaceLock("Guards surface objects"); struct hipSurface { - hipSurface(const hipResourceDesc* pResDesc): array(0) + hipSurface(const hipResourceDesc* pResDesc): array(nullptr) { - memcpy((void*)&resDesc, (void*)pResDesc, sizeof(hipResourceDesc)); + memcpy(&resDesc, pResDesc, sizeof(hipResourceDesc)); } hipArray* array; hipResourceDesc resDesc; }; -static std::map surfaceHash; +static std::unordered_map surfaceHash; }; @@ -59,11 +59,20 @@ hipError_t hipCreateSurfaceObject(hipSurfaceObject_t* pSurfObject, default: break; } - unsigned int* surfObj; - hipMalloc((void**)&surfObj, sizeof(hipArray)); - hipMemcpy(surfObj, (void*)pResDesc->res.array.array, sizeof(hipArray), + hipSurfaceObject_t surfObj; + hipError_t err = hipMalloc(reinterpret_cast(&surfObj), sizeof(hipArray)); + if (err != hipSuccess) { + delete pSurface; + HIP_RETURN(hipErrorOutOfMemory); + } + err = hipMemcpy(reinterpret_cast(surfObj), reinterpret_cast(pResDesc->res.array.array), sizeof(hipArray), hipMemcpyHostToDevice); - *pSurfObject = (hipSurfaceObject_t)surfObj; + if (err != hipSuccess) { + delete pSurface; + hipFree(reinterpret_cast(surfObj)); + HIP_RETURN(err); + } + *pSurfObject = surfObj; amd::ScopedLock lock(surfaceLock); surfaceHash[*pSurfObject] = pSurface; @@ -80,7 +89,8 @@ hipError_t hipDestroySurfaceObject(hipSurfaceObject_t surfaceObject) { if (pSurface != nullptr) { delete pSurface; surfaceHash.erase(surfaceObject); + HIP_RETURN(hipFree(reinterpret_cast(surfaceObject))); } - HIP_RETURN(hipSuccess); + HIP_RETURN(hipErrorUnknown); } From 938481928a1bd5d6c5f672874854b24564f43480 Mon Sep 17 00:00:00 2001 From: foreman Date: Mon, 27 Aug 2018 18:46:34 -0400 Subject: [PATCH 093/282] P4 to Git Change 1598666 by cpaquot@cpaquot-ocl-lc-lnx on 2018/08/27 18:36:44 SWDEV-145570 - [HIP] - Multithreading issues Add a lock per function so kernel parameters don't get overwritten Make execStack_ thread local and remove global lock use for it: The compiler uses the same thread to set it up and launch the function Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#16 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#18 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#19 edit --- api/hip/hip_internal.hpp | 11 +++++++++++ api/hip/hip_module.cpp | 8 ++++++-- api/hip/hip_platform.cpp | 34 +++++++++++++++++++--------------- 3 files changed, 36 insertions(+), 17 deletions(-) diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index 325f0b7cd3..a59ec04649 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -59,6 +59,17 @@ namespace hip { extern amd::HostQueue* getNullStream(); extern void syncStreams(); + + + struct Function { + amd::Kernel* function_; + amd::Monitor lock_; + + Function(amd::Kernel* f) : function_(f), lock_("function lock") {} + hipFunction_t asHipFunction() { return reinterpret_cast(this); } + + static Function* asFunction(hipFunction_t f) { return reinterpret_cast(f); } + }; }; extern std::vector g_devices; extern hipError_t ihipDeviceGetCount(int* count); diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index 9830a97dfb..389119c128 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -126,7 +126,8 @@ hipError_t hipModuleGetFunction(hipFunction_t *hfunc, hipModule_t hmod, const ch HIP_RETURN(hipErrorOutOfMemory); } - *hfunc = reinterpret_cast(as_cl(kernel)); + hip::Function* f = new hip::Function(kernel); + *hfunc = f->asHipFunction(); HIP_RETURN(hipSuccess); } @@ -151,9 +152,12 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent); - amd::Kernel* kernel = as_amd(reinterpret_cast(f)); + hip::Function* function = hip::Function::asFunction(f); + amd::Kernel* kernel = function->function_; amd::Device* device = hip::getCurrentContext()->devices()[0]; + amd::ScopedLock lock(function->lock_); + hip::Event* eStart = reinterpret_cast(startEvent); hip::Event* eStop = reinterpret_cast(stopEvent); amd::HostQueue* queue; diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index ba0d50865d..c4c2e7d1cb 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -110,11 +110,12 @@ struct ihipExec_t { std::vector arguments_; }; +thread_local std::stack execStack_; + class PlatformState { amd::Monitor lock_; - - std::stack execStack_; - std::map functions_; +private: + std::unordered_map functions_; struct RegisteredVar { char* var; @@ -124,11 +125,16 @@ class PlatformState { bool constant; }; - std::map vars_; + std::unordered_map vars_; static PlatformState* platform_; PlatformState() : lock_("Guards global function map") {} + ~PlatformState() { + for (const auto it : functions_) { + delete it.second; + } + } public: static PlatformState& instance() { return *platform_; @@ -147,13 +153,14 @@ public: vars_.insert(std::make_pair(modules, rvar)); } - void registerFunction(const void* hostFunction, hipFunction_t func) { + void registerFunction(const void* hostFunction, amd::Kernel* func) { amd::ScopedLock lock(lock_); - functions_.insert(std::make_pair(hostFunction, func)); + hip::Function* f = new hip::Function(func); + functions_.insert(std::make_pair(hostFunction, f)); } - hipFunction_t getFunc(const void* hostFunction) { + hip::Function* getFunc(const void* hostFunction) { amd::ScopedLock lock(lock_); const auto it = functions_.find(hostFunction); if (it != functions_.cend()) { @@ -166,8 +173,6 @@ public: void setupArgument(const void *arg, size_t size, size_t offset) { - amd::ScopedLock lock(lock_); - auto& arguments = execStack_.top().arguments_; if (arguments.size() < offset + size) { @@ -181,12 +186,10 @@ public: dim3 blockDim, size_t sharedMem, hipStream_t stream) { - amd::ScopedLock lock(lock_); execStack_.push(ihipExec_t{gridDim, blockDim, sharedMem, stream}); } void popExec(ihipExec_t& exec) { - amd::ScopedLock lock(lock_); exec = std::move(execStack_.top()); execStack_.pop(); } @@ -215,7 +218,7 @@ extern "C" void __hipRegisterFunction( amd::Kernel* kernel = new amd::Kernel(*program, *symbol, deviceName); if (!kernel) return; - PlatformState::instance().registerFunction(hostFunction, reinterpret_cast(as_cl(kernel))); + PlatformState::instance().registerFunction(hostFunction, kernel); } // Registers a device-side global variable. @@ -274,7 +277,7 @@ extern "C" hipError_t hipLaunchByPtr(const void *hostFunction) { HIP_INIT_API(hostFunction); - hipFunction_t func = PlatformState::instance().getFunc(hostFunction); + hip::Function* func = PlatformState::instance().getFunc(hostFunction); if (func == nullptr) { HIP_RETURN(hipErrorUnknown); } @@ -282,13 +285,14 @@ extern "C" hipError_t hipLaunchByPtr(const void *hostFunction) ihipExec_t exec; PlatformState::instance().popExec(exec); + size_t size = exec.arguments_.size(); void *extra[] = { HIP_LAUNCH_PARAM_BUFFER_POINTER, &exec.arguments_[0], - HIP_LAUNCH_PARAM_BUFFER_SIZE, 0 /* FIXME: not needed, but should be correct*/, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, HIP_LAUNCH_PARAM_END }; - HIP_RETURN(hipModuleLaunchKernel(func, + HIP_RETURN(hipModuleLaunchKernel(func->asHipFunction(), exec.gridDim_.x, exec.gridDim_.y, exec.gridDim_.z, exec.blockDim_.x, exec.blockDim_.y, exec.blockDim_.z, exec.sharedMem_, exec.hStream_, nullptr, extra)); From 0d2b874266d81c9e808449c60a84316a55e953de Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 30 Aug 2018 12:17:57 -0400 Subject: [PATCH 094/282] P4 to Git Change 1600056 by skudchad@skudchad_test2_win_opencl on 2018/08/30 12:04:10 SWDEV-145570 - [HIP] Fix elapsed time calc for hipEvent ReviewBoardURL = http://ocltc.amd.com/reviews/r/15717/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_event.cpp#6 edit --- api/hip/hip_event.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/api/hip/hip_event.cpp b/api/hip/hip_event.cpp index 197eac0579..d01273cdb5 100644 --- a/api/hip/hip_event.cpp +++ b/api/hip/hip_event.cpp @@ -119,7 +119,8 @@ hipError_t hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop) { HIP_RETURN(hipErrorInvalidValue); } - *ms = static_cast(eStop->event_->profilingInfo().submitted_ - eStart->event_->profilingInfo().submitted_)/1000000.f; + *ms = static_cast(static_cast(eStop->event_->profilingInfo().submitted_ - + eStart->event_->profilingInfo().submitted_))/1000000.f; HIP_RETURN(hipSuccess); } From 2a9e26c772fed48d9b7bfbb9dfb17d55745b33a5 Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 6 Sep 2018 15:21:28 -0400 Subject: [PATCH 095/282] P4 to Git Change 1602601 by skudchad@skudchad_test2_win_opencl on 2018/09/06 15:01:00 SWDEV-145570 - [HIP] Implement hipStreamAddCallback and hipStreamQuery ReviewBoardURL = http://ocltc.amd.com/reviews/r/15749/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_stream.cpp#13 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/commandqueue.cpp#27 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/commandqueue.hpp#19 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/concurrent.hpp#9 edit --- api/hip/hip_stream.cpp | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/api/hip/hip_stream.cpp b/api/hip/hip_stream.cpp index a7221b0249..d7cd42bb51 100644 --- a/api/hip/hip_stream.cpp +++ b/api/hip/hip_stream.cpp @@ -65,6 +65,15 @@ static hipError_t ihipStreamCreateWithFlags(hipStream_t *stream, unsigned int fl return hipSuccess; } + +void ihipStreamCallback(hipStream_t stream, hipStreamCallback_t callback, void* userData) { + //Stream synchronize + hipError_t status = hipStreamSynchronize(stream); + + // Call the callback function + callback(stream, status, userData); +} + hipError_t hipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags) { HIP_INIT_API(stream, flags); @@ -162,18 +171,22 @@ hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int hipError_t hipStreamQuery(hipStream_t stream) { HIP_INIT_API(stream); - assert(0 && "Unimplemented"); - - HIP_RETURN(hipErrorUnknown); + amd::HostQueue* hostQueue; + if (stream == nullptr) { + hostQueue = hip::getNullStream(); + } else { + hostQueue = as_amd(reinterpret_cast(stream))->asHostQueue(); + } + HIP_RETURN(hostQueue->isEmpty() ? hipSuccess : hipErrorNotReady); } hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, void* userData, unsigned int flags) { HIP_INIT_API(stream, callback, userData, flags); - assert(0 && "Unimplemented"); + std::thread (ihipStreamCallback, stream, callback, userData).detach(); - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipSuccess); } From 372a9d0a553d06326aefbb6e41157230e189a4d0 Mon Sep 17 00:00:00 2001 From: foreman Date: Mon, 10 Sep 2018 20:17:35 -0400 Subject: [PATCH 096/282] P4 to Git Change 1603869 by skudchad@skudchad-lnx on 2018/09/10 20:04:11 SWDEV-1 - Fix to build HIP with gcc-5.4 Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#17 edit --- api/hip/hip_internal.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index a59ec04649..3897b599bf 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -27,6 +27,7 @@ THE SOFTWARE. #include #include #include +#include #define HIP_INIT() \ std::call_once(hip::g_ihipInitialized, hip::init); \ From 18f041c1ff7cf2aa08f419f78f6f411243f09ab8 Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 11 Sep 2018 12:26:34 -0400 Subject: [PATCH 097/282] P4 to Git Change 1604214 by cpaquot@cpaquot-ocl-lc-lnx on 2018/09/11 12:12:23 SWDEV-145570 - [HIP] Added empty hipModuleGetGlobal function. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#19 edit --- api/hip/hip_module.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index 389119c128..592b41f9b9 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -132,6 +132,14 @@ hipError_t hipModuleGetFunction(hipFunction_t *hfunc, hipModule_t hmod, const ch HIP_RETURN(hipSuccess); } + +hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod, const char* name) +{ + HIP_INIT_API(dptr, bytes, hmod, name); + + HIP_RETURN(hipSuccess); +} + hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func) { HIP_INIT_API(attr, func); From b77cd5157983b0619cc1d4f895823e910960e4d0 Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 11 Sep 2018 16:08:44 -0400 Subject: [PATCH 098/282] P4 to Git Change 1604364 by cpaquot@cpaquot-ocl-lc-lnx on 2018/09/11 15:43:44 SWDEV-145570 - [HIP] Added hipCtxGetSharedMemConfig Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_context.cpp#14 edit --- api/hip/hip_context.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/api/hip/hip_context.cpp b/api/hip/hip_context.cpp index 0ac257df83..a4971d50d4 100644 --- a/api/hip/hip_context.cpp +++ b/api/hip/hip_context.cpp @@ -131,6 +131,14 @@ hipError_t hipCtxGetCurrent(hipCtx_t* ctx) { HIP_RETURN(hipSuccess); } +hipError_t hipCtxGetSharedMemConfig(hipSharedMemConfig* pConfig) { + HIP_INIT_API(pConfig); + + *pConfig = hipSharedMemBankSizeFourByte; + + HIP_RETURN(hipSuccess); +} + hipError_t hipRuntimeGetVersion(int *runtimeVersion) { HIP_INIT_API(runtimeVersion); From e29ba4d158f42735b1e1d143a61eb117915ee068 Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 11 Sep 2018 18:34:50 -0400 Subject: [PATCH 099/282] P4 to Git Change 1604468 by skudchad@skudchad_test2_win_opencl on 2018/09/11 18:28:45 SWDEV-145570 - [HIP] - Assert functionality for error != hipSuccess with LOG_LEVEL=3 ReviewBoardURL = http://ocltc.amd.com/reviews/r/15793/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#18 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/debug.hpp#8 edit --- api/hip/hip_internal.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index 3897b599bf..ee046f4410 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -78,6 +78,7 @@ extern amd::Memory* getMemoryObject(const void* ptr, size_t& offset); #define HIP_RETURN(ret) \ hip::g_lastError = ret; \ + DebugInfoGuarantee(hip::g_lastError == hipSuccess); \ return hip::g_lastError; \ From 0a8d4ee78d9f0dbd1da42171b5769da644e9f2f3 Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 25 Sep 2018 15:01:11 -0400 Subject: [PATCH 100/282] P4 to Git Change 1610499 by skudchad@skudchad-lnx on 2018/09/25 14:43:35 SWDEV-145570 - [HIP] - Map hipinfo sample from HIP mirror so that we have the same hipinfo Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_peer.cpp#3 edit ... //depot/stg/opencl/drivers/opencl/tools/hipinfo/CMakeLists.txt#2 edit ... //depot/stg/opencl/drivers/opencl/tools/hipinfo/build/Makefile.hipinfo#2 edit ... //depot/stg/opencl/drivers/opencl/tools/hipinfo/hipinfo.cpp#2 delete --- api/hip/hip_peer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/hip/hip_peer.cpp b/api/hip/hip_peer.cpp index 73919a09ea..9c2da315ca 100644 --- a/api/hip/hip_peer.cpp +++ b/api/hip/hip_peer.cpp @@ -53,9 +53,9 @@ hipError_t hipMemcpyPeerAsync(void* dst, hipCtx_t dstDevice, const void* src, hi hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, int deviceId, int peerDeviceId) { HIP_INIT_API(canAccessPeer, deviceId, peerDeviceId); - assert(0 && "Unimplemented"); + *canAccessPeer = 0; - HIP_RETURN(hipErrorUnknown); + return hipErrorInvalidDevice; } hipError_t hipDeviceDisablePeerAccess(int peerDeviceId) { From 133447a412996eb0d32cdb7d04cb8a3dbf6e5ec6 Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 18 Oct 2018 13:41:32 -0400 Subject: [PATCH 101/282] P4 to Git Change 1620765 by skudchad@skudchad_test2_win_opencl on 2018/10/17 16:58:04 SWDEV-145570 - [HIP] Track last used event and use last enqueued command in a stream rather than creating a new event ReviewBoardURL = http://ocltc.amd.com/reviews/r/15996/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_context.cpp#15 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_event.cpp#7 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_stream.cpp#14 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/command.cpp#90 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/commandqueue.cpp#28 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/commandqueue.hpp#20 edit --- api/hip/hip_context.cpp | 6 +++++- api/hip/hip_event.cpp | 5 ++--- api/hip/hip_stream.cpp | 7 ++++++- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/api/hip/hip_context.cpp b/api/hip/hip_context.cpp index a4971d50d4..7d7fdb6743 100644 --- a/api/hip/hip_context.cpp +++ b/api/hip/hip_context.cpp @@ -69,7 +69,8 @@ amd::HostQueue* getNullStream() { auto stream = g_nullStreams.find(getCurrentContext()); if (stream == g_nullStreams.end()) { amd::Device* device = getCurrentContext()->devices()[0]; - amd::HostQueue* queue = new amd::HostQueue(*hip::getCurrentContext(), *device, 0, + cl_command_queue_properties properties = CL_QUEUE_PROFILING_ENABLE; + amd::HostQueue* queue = new amd::HostQueue(*hip::getCurrentContext(), *device, properties, amd::CommandQueue::RealTimeDisabled, amd::CommandQueue::Priority::Normal); g_nullStreams[getCurrentContext()] = queue; @@ -159,6 +160,9 @@ hipError_t hipCtxDestroy(hipCtx_t ctx) { HIP_RETURN(hipErrorInvalidValue); } + // Release last tracked command + hip::getNullStream()->setLastQueuedCommand(nullptr); + // Need to remove the ctx of calling thread if its the top one if (!g_ctxtStack.empty() && g_ctxtStack.top() == amdContext) { g_ctxtStack.pop(); diff --git a/api/hip/hip_event.cpp b/api/hip/hip_event.cpp index d01273cdb5..0bde2150f8 100644 --- a/api/hip/hip_event.cpp +++ b/api/hip/hip_event.cpp @@ -139,9 +139,8 @@ hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) { } else { e->stream_ = as_amd(reinterpret_cast(stream))->asHostQueue(); } - amd::Command* command = (e->flags & hipEventDisableTiming)? new amd::Marker(*e->stream_, true) : - new hip::TimerMarker(*e->stream_); - command->enqueue(); + + amd::Command* command = e->stream_->getLastQueuedCommand(true); if (e->event_ != nullptr) { e->event_->release(); diff --git a/api/hip/hip_stream.cpp b/api/hip/hip_stream.cpp index d7cd42bb51..af00b0f104 100644 --- a/api/hip/hip_stream.cpp +++ b/api/hip/hip_stream.cpp @@ -43,7 +43,8 @@ void syncStreams() { static hipError_t ihipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags) { amd::Device* device = hip::getCurrentContext()->devices()[0]; - amd::HostQueue* queue = new amd::HostQueue(*hip::getCurrentContext(), *device, 0, + cl_command_queue_properties properties = CL_QUEUE_PROFILING_ENABLE; + amd::HostQueue* queue = new amd::HostQueue(*hip::getCurrentContext(), *device, properties, amd::CommandQueue::RealTimeDisabled, amd::CommandQueue::Priority::Normal); @@ -135,6 +136,10 @@ hipError_t hipStreamDestroy(hipStream_t stream) { amd::ScopedLock lock(streamSetLock); amd::HostQueue* hostQueue = as_amd(reinterpret_cast(stream))->asHostQueue(); + + // Release last tracked command + hostQueue->setLastQueuedCommand(nullptr); + hostQueue->release(); streamSet.erase(hostQueue); From 6f0ef8f62802ba6553c7a36390afb1543b919fca Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 24 Oct 2018 18:00:42 -0400 Subject: [PATCH 102/282] P4 to Git Change 1623601 by skudchad@skudchad_test2_win_opencl on 2018/10/24 17:52:57 SWDEV-145570 - [HIP] Refactor hipStreamAddCallback ReviewBoardURL = http://ocltc.amd.com/reviews/r/16045/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_stream.cpp#15 edit --- api/hip/hip_stream.cpp | 45 ++++++++++++++++++++++++++++++++---------- 1 file changed, 35 insertions(+), 10 deletions(-) diff --git a/api/hip/hip_stream.cpp b/api/hip/hip_stream.cpp index af00b0f104..7acd3d90dd 100644 --- a/api/hip/hip_stream.cpp +++ b/api/hip/hip_stream.cpp @@ -28,6 +28,20 @@ THE SOFTWARE. static amd::Monitor streamSetLock("Guards global stream set"); static std::unordered_set streamSet; +// Internal structure for stream callback handler +class StreamCallback { + public: + StreamCallback(hipStream_t stream, hipStreamCallback_t callback, void* userData, + amd::Command* command) + : stream_(stream), callBack_(callback), + userData_(userData), command_(command) { + }; + hipStream_t stream_; + hipStreamCallback_t callBack_; + void* userData_; + amd::Command* command_; +}; + namespace hip { void syncStreams() { @@ -40,6 +54,15 @@ void syncStreams() { }; +void ihipStreamCallback(cl_event event, cl_int command_exec_status, void* user_data) { + + hipError_t status = hipSuccess; + StreamCallback* cbo = reinterpret_cast(user_data); + cbo->callBack_(cbo->stream_, status, cbo->userData_); + cbo->command_->release(); + delete cbo; +} + static hipError_t ihipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags) { amd::Device* device = hip::getCurrentContext()->devices()[0]; @@ -66,15 +89,6 @@ static hipError_t ihipStreamCreateWithFlags(hipStream_t *stream, unsigned int fl return hipSuccess; } - -void ihipStreamCallback(hipStream_t stream, hipStreamCallback_t callback, void* userData) { - //Stream synchronize - hipError_t status = hipStreamSynchronize(stream); - - // Call the callback function - callback(stream, status, userData); -} - hipError_t hipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags) { HIP_INIT_API(stream, flags); @@ -189,7 +203,18 @@ hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback unsigned int flags) { HIP_INIT_API(stream, callback, userData, flags); - std::thread (ihipStreamCallback, stream, callback, userData).detach(); + amd::HostQueue* hostQueue = as_amd(reinterpret_cast + (stream))->asHostQueue(); + amd::Command* command = hostQueue->getLastQueuedCommand(true); + amd::Event& event = command->event(); + StreamCallback* cbo = new StreamCallback(stream, callback, userData, command); + + if(!event.setCallback(CL_COMPLETE, ihipStreamCallback, reinterpret_cast(cbo))) { + command->release(); + return hipErrorInvalidResourceHandle; + } + + event.notifyCmdQueue(); HIP_RETURN(hipSuccess); } From 26ce431eefe51b94610c1eeb4b7b2075881b40e1 Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 13 Nov 2018 18:10:36 -0500 Subject: [PATCH 103/282] P4 to Git Change 1706503 by skudchad@skudchad_test2_win_opencl on 2018/11/13 18:06:02 SWDEV-145570 - [HIP] Skip empty bundles ReviewBoardURL = http://ocltc.amd.com/reviews/r/16140/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#20 edit --- api/hip/hip_platform.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index c4c2e7d1cb..da7f8beb62 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -445,6 +445,9 @@ const std::vector& modules() { nullptr); for (auto&& bundle : bundles) { + if (bundle.empty()) { + continue; + } std::string magic(&bundle[0], sizeof(CLANG_OFFLOAD_BUNDLER_MAGIC_STR) - 1); if (magic.compare(CLANG_OFFLOAD_BUNDLER_MAGIC_STR)) continue; From eb406d2c918b21c230bd324c4dfbaf47593ac074 Mon Sep 17 00:00:00 2001 From: foreman Date: Mon, 19 Nov 2018 15:46:09 -0500 Subject: [PATCH 104/282] P4 to Git Change 1709028 by skudchad@skudchad_test2_win_opencl on 2018/11/19 15:15:09 SWDEV-145570 - [HIP] Fix a bug in event tracking ReviewBoardURL = http://ocltc.amd.com/reviews/r/16182/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_event.cpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/commandqueue.cpp#29 edit --- api/hip/hip_event.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/api/hip/hip_event.cpp b/api/hip/hip_event.cpp index 0bde2150f8..317e0edc90 100644 --- a/api/hip/hip_event.cpp +++ b/api/hip/hip_event.cpp @@ -142,6 +142,11 @@ hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) { amd::Command* command = e->stream_->getLastQueuedCommand(true); + if (command == nullptr) { + command = new amd::Marker(*e->stream_, true); + command->enqueue(); + } + if (e->event_ != nullptr) { e->event_->release(); } From 349a5e6d73dbe0133dbabd2ded233c418ee8fe66 Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 10 Jan 2019 14:46:01 -0500 Subject: [PATCH 105/282] P4 to Git Change 1728676 by cpaquot@cpaquot-ocl-lc-lnx on 2019/01/10 14:29:52 SWDEV-145570 - [HIP] Add API tracing, enabled via LOG_LEVEL=3 Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#19 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#42 edit ... //depot/stg/opencl/drivers/opencl/api/hip/trace_helper.h#1 add --- api/hip/hip_internal.hpp | 13 +++ api/hip/hip_memory.cpp | 5 +- api/hip/trace_helper.h | 228 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 245 insertions(+), 1 deletion(-) create mode 100644 api/hip/trace_helper.h diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index ee046f4410..27f7e33fc3 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -24,6 +24,8 @@ THE SOFTWARE. #define HIP_SRC_HIP_INTERNAL_H #include "cl_common.hpp" +#include "trace_helper.h" +#include "utils/debug.hpp" #include #include #include @@ -37,6 +39,7 @@ THE SOFTWARE. // This macro should be called at the beginning of every HIP API. #define HIP_INIT_API(...) \ + LogPrintfInfo("%s ( %s )", __func__, ToString( __VA_ARGS__ ).c_str()); \ amd::Thread* thread = amd::Thread::current(); \ if (!CL_CHECK_THREAD(thread)) { \ HIP_RETURN(hipErrorOutOfMemory); \ @@ -81,5 +84,15 @@ extern amd::Memory* getMemoryObject(const void* ptr, size_t& offset); DebugInfoGuarantee(hip::g_lastError == hipSuccess); \ return hip::g_lastError; \ +inline std::ostream& operator<<(std::ostream& os, const dim3& s) { + os << '{'; + os << s.x; + os << ','; + os << s.y; + os << ','; + os << s.z; + os << '}'; + return os; +} #endif // HIP_SRC_HIP_INTERNAL_H diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index b800ffe713..dcea3bd083 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -63,7 +63,7 @@ hipError_t ihipMalloc(void** ptr, size_t sizeBytes, unsigned int flags) } if (hip::getCurrentContext()->devices()[0]->info().maxMemAllocSize_ < sizeBytes) { - return hipErrorOutOfMemory; + return hipErrorMemoryAllocation; } *ptr = amd::SvmBuffer::malloc(*hip::getCurrentContext(), flags, sizeBytes, hip::getCurrentContext()->devices()[0]->info().memBaseAddrAlign_); @@ -173,6 +173,9 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) { } hipError_t hipFree(void* ptr) { + if (ptr == nullptr) { + HIP_RETURN(hipSuccess); + } if (amd::SvmBuffer::malloced(ptr)) { hip::syncStreams(); hip::getNullStream()->finish(); diff --git a/api/hip/trace_helper.h b/api/hip/trace_helper.h new file mode 100644 index 0000000000..4bb5202558 --- /dev/null +++ b/api/hip/trace_helper.h @@ -0,0 +1,228 @@ +/* +Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include +#include +#include +//--- +// Helper functions to convert HIP function arguments into strings. +// Handles POD data types as well as enumerations (ie hipMemcpyKind). +// The implementation uses C++11 variadic templates and template specialization. +// The hipMemcpyKind example below is a good example that shows how to implement conversion for a +// new HSA type. + + +// Handy macro to convert an enumeration to a stringified version of same: +#define CASE_STR(x) \ + case x: \ + return #x; + +inline const char* ihipErrorString(hipError_t hip_error) { + switch (hip_error) { + CASE_STR(hipSuccess); + CASE_STR(hipErrorOutOfMemory); + CASE_STR(hipErrorNotInitialized); + CASE_STR(hipErrorDeinitialized); + CASE_STR(hipErrorProfilerDisabled); + CASE_STR(hipErrorProfilerNotInitialized); + CASE_STR(hipErrorProfilerAlreadyStarted); + CASE_STR(hipErrorProfilerAlreadyStopped); + CASE_STR(hipErrorInvalidImage); + CASE_STR(hipErrorInvalidContext); + CASE_STR(hipErrorContextAlreadyCurrent); + CASE_STR(hipErrorMapFailed); + CASE_STR(hipErrorUnmapFailed); + CASE_STR(hipErrorArrayIsMapped); + CASE_STR(hipErrorAlreadyMapped); + CASE_STR(hipErrorNoBinaryForGpu); + CASE_STR(hipErrorAlreadyAcquired); + CASE_STR(hipErrorNotMapped); + CASE_STR(hipErrorNotMappedAsArray); + CASE_STR(hipErrorNotMappedAsPointer); + CASE_STR(hipErrorECCNotCorrectable); + CASE_STR(hipErrorUnsupportedLimit); + CASE_STR(hipErrorContextAlreadyInUse); + CASE_STR(hipErrorPeerAccessUnsupported); + CASE_STR(hipErrorInvalidKernelFile); + CASE_STR(hipErrorInvalidGraphicsContext); + CASE_STR(hipErrorInvalidSource); + CASE_STR(hipErrorFileNotFound); + CASE_STR(hipErrorSharedObjectSymbolNotFound); + CASE_STR(hipErrorSharedObjectInitFailed); + CASE_STR(hipErrorOperatingSystem); + CASE_STR(hipErrorSetOnActiveProcess); + CASE_STR(hipErrorInvalidHandle); + CASE_STR(hipErrorNotFound); + CASE_STR(hipErrorIllegalAddress); + CASE_STR(hipErrorMissingConfiguration); + CASE_STR(hipErrorMemoryAllocation); + CASE_STR(hipErrorInitializationError); + CASE_STR(hipErrorLaunchFailure); + CASE_STR(hipErrorPriorLaunchFailure); + CASE_STR(hipErrorLaunchTimeOut); + CASE_STR(hipErrorLaunchOutOfResources); + CASE_STR(hipErrorInvalidDeviceFunction); + CASE_STR(hipErrorInvalidConfiguration); + CASE_STR(hipErrorInvalidDevice); + CASE_STR(hipErrorInvalidValue); + CASE_STR(hipErrorInvalidDevicePointer); + CASE_STR(hipErrorInvalidMemcpyDirection); + CASE_STR(hipErrorUnknown); + CASE_STR(hipErrorInvalidResourceHandle); + CASE_STR(hipErrorNotReady); + CASE_STR(hipErrorNoDevice); + CASE_STR(hipErrorPeerAccessAlreadyEnabled); + CASE_STR(hipErrorPeerAccessNotEnabled); + CASE_STR(hipErrorRuntimeMemory); + CASE_STR(hipErrorRuntimeOther); + CASE_STR(hipErrorHostMemoryAlreadyRegistered); + CASE_STR(hipErrorHostMemoryNotRegistered); + CASE_STR(hipErrorTbd); + default: + return "hipErrorUnknown"; + }; +}; + +// Building block functions: +template +inline std::string ToHexString(T v) { + std::ostringstream ss; + ss << "0x" << std::hex << v; + return ss.str(); +}; + + +//--- +// Template overloads for ToString to handle specific types + +// This is the default which works for most types: +template +inline std::string ToString(T v) { + std::ostringstream ss; + ss << v; + return ss.str(); +}; + +template <> +inline std::string ToString(hipFunction_t v) { + std::ostringstream ss; + ss << "0x" << std::hex << static_cast(v); + return ss.str(); +}; + +// hipEvent_t specialization. TODO - maybe add an event ID for debug? +template <> +inline std::string ToString(hipEvent_t v) { + std::ostringstream ss; + ss << "event:" << std::hex << static_cast(v); + return ss.str(); +}; +// hipStream_t +template <> +inline std::string ToString(hipStream_t v) { + std::ostringstream ss; + if (v == NULL) { + ss << "stream:"; + } else { + ss << "stream:" << std::hex << static_cast(v); + } + + return ss.str(); +}; + +// hipCtx_t +template <> +inline std::string ToString(hipCtx_t v) { + std::ostringstream ss; + if (v == NULL) { + ss << "context:"; + } else { + ss << "context:" << std::hex << static_cast(v); + } + + return ss.str(); +}; + +// hipPitchedPtr +template <> +inline std::string ToString(hipPitchedPtr v) { + std::ostringstream ss; + ss << "pitchPtr:" << std::hex << static_cast(v.ptr); + return ss.str(); +}; + +// hipMemcpyKind specialization +template <> +inline std::string ToString(hipMemcpyKind v) { + switch (v) { + CASE_STR(hipMemcpyHostToHost); + CASE_STR(hipMemcpyHostToDevice); + CASE_STR(hipMemcpyDeviceToHost); + CASE_STR(hipMemcpyDeviceToDevice); + CASE_STR(hipMemcpyDefault); + default: + return ToHexString(v); + }; +}; + +template <> +inline std::string ToString(hipFuncCache_t v) { + switch (v) { + CASE_STR(hipFuncCachePreferNone); + CASE_STR(hipFuncCachePreferShared); + CASE_STR(hipFuncCachePreferL1); + CASE_STR(hipFuncCachePreferEqual); + default: + return ToHexString(v); + }; +}; + +template <> +inline std::string ToString(hipSharedMemConfig v) { + switch (v) { + CASE_STR(hipSharedMemBankSizeDefault); + CASE_STR(hipSharedMemBankSizeFourByte); + CASE_STR(hipSharedMemBankSizeEightByte); + default: + return ToHexString(v); + }; +}; + +template <> +inline std::string ToString(hipError_t v) { + return ihipErrorString(v); +}; + +// Catch empty arguments case +inline std::string ToString() { return (""); } + + +//--- +// C++11 variadic template - peels off first argument, converts to string, and calls itself again to +// peel the next arg. Strings are automatically separated by comma+space. +template +inline std::string ToString(T first, Args... args) { + return ToString(first) + ", " + ToString(args...); +} + From 3bef77c170ba04104f2b73e3ac7f63beb5461069 Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 11 Jan 2019 22:45:33 -0500 Subject: [PATCH 106/282] P4 to Git Change 1729577 by lmoriche@lmoriche_opencl_lc on 2019/01/11 22:21:53 SWDEV-145570 - [HIP] Windows build fix Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/build/Makefile.hip#9 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.def.in#10 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.map.in#11 edit --- api/hip/hip_hcc.def.in | 5 ----- api/hip/hip_hcc.map.in | 3 --- 2 files changed, 8 deletions(-) diff --git a/api/hip/hip_hcc.def.in b/api/hip/hip_hcc.def.in index f93517c67a..576f840695 100644 --- a/api/hip/hip_hcc.def.in +++ b/api/hip/hip_hcc.def.in @@ -48,7 +48,6 @@ hipEventRecord hipEventSynchronize hipFree hipFreeArray -hipFreeHost hipFuncSetCacheConfig hipGetDevice hipGetDeviceCount @@ -71,7 +70,6 @@ hipMalloc hipMalloc3D hipMalloc3DArray hipMallocArray -hipMallocHost hipMallocPitch hipMemcpy hipMemcpy2D @@ -109,7 +107,6 @@ hipModuleLaunchKernelExt hipHccModuleLaunchKernel hipModuleLoad hipModuleLoadData -hipModuleLoadDataEx hipModuleUnload hipFuncGetAttributes hipPeekAtLastError @@ -131,8 +128,6 @@ __hipRegisterFatBinary __hipRegisterFunction __hipRegisterVar __hipUnregisterFatBinary -__gnu_f2h_ieee -__gnu_h2f_ieee hipConfigureCall hipSetupArgument hipLaunchByPtr diff --git a/api/hip/hip_hcc.map.in b/api/hip/hip_hcc.map.in index c983f83430..8f033617be 100644 --- a/api/hip/hip_hcc.map.in +++ b/api/hip/hip_hcc.map.in @@ -49,7 +49,6 @@ global: hipEventSynchronize; hipFree; hipFreeArray; - hipFreeHost; hipFuncSetCacheConfig; hipGetDevice; hipGetDeviceCount; @@ -72,7 +71,6 @@ global: hipMalloc3D; hipMalloc3DArray; hipMallocArray; - hipMallocHost; hipMallocPitch; hipMemcpy; hipMemcpy2D; @@ -109,7 +107,6 @@ global: hipModuleLaunchKernelExt; hipModuleLoad; hipModuleLoadData; - hipModuleLoadDataEx; hipModuleUnload; hipFuncGetAttributes; hipPeekAtLastError; From 83792452247cd218a30f8c7ba55396d9e42cd891 Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 25 Jan 2019 19:18:53 -0500 Subject: [PATCH 107/282] P4 to Git Change 1736033 by kjayapra@9_HIPWS_IPCCHKIN on 2019/01/25 17:44:48 SWDEV-145570 - IPC Mem Handle Changes for HIP. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#20 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#43 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#330 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.cpp#112 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.hpp#33 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocmemory.cpp#42 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocmemory.hpp#13 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/memory.hpp#108 edit --- api/hip/hip_internal.hpp | 10 +++++ api/hip/hip_memory.cpp | 92 ++++++++++++++++++++++++++++++++++------ 2 files changed, 90 insertions(+), 12 deletions(-) diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index 27f7e33fc3..3ca1ba61d7 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -31,6 +31,16 @@ THE SOFTWARE. #include #include +/*! IHIP IPC MEMORY Structure */ +#define IHIP_IPC_MEM_HANDLE_SIZE 32 +#define IHIP_IPC_MEM_RESERVED_SIZE LP64_SWITCH(28,24) + +typedef struct ihipIpcMemHandle_st { + char ipc_handle[IHIP_IPC_MEM_HANDLE_SIZE]; ///< ipc memory handle on ROCr + size_t psize; + char reserved[IHIP_IPC_MEM_RESERVED_SIZE]; +} ihipIpcMemHandle_t; + #define HIP_INIT() \ std::call_once(hip::g_ihipInitialized, hip::init); \ if (hip::g_context == nullptr && g_devices.size() > 0) { \ diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index dcea3bd083..587bbbc96e 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -1249,28 +1249,96 @@ hipError_t hipMemsetD8(hipDeviceptr_t dst, unsigned char value, size_t sizeBytes HIP_RETURN(hipSuccess); } -hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* devPtr) { - HIP_INIT_API(handle, devPtr); +hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* dev_ptr) { + HIP_INIT_API(handle, dev_ptr); - assert(0 && "Unimplemented"); + size_t offset = 0; + amd::Memory* amd_mem_obj = nullptr; + device::Memory* dev_mem_obj = nullptr; + ihipIpcMemHandle_t* ihandle = nullptr; - HIP_RETURN(hipErrorUnknown); + if ((handle == nullptr) || (dev_ptr == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + /* Get AMD::Memory object corresponding to this pointer */ + amd_mem_obj = getMemoryObject(dev_ptr, offset); + if (amd_mem_obj == nullptr) { + HIP_RETURN(hipErrorInvalidDevicePointer); + } + + /* Get Device::Memory object pointer */ + dev_mem_obj = amd_mem_obj->getDeviceMemory(*hip::getCurrentContext()->devices()[0],false); + if (dev_mem_obj == nullptr) { + HIP_RETURN(hipErrorInvalidDevicePointer); + } + + /* Create an handle for IPC. Store the memory size inside the handle */ + ihandle = reinterpret_cast(handle); + dev_mem_obj->IpcCreate(offset, &(ihandle->psize), &(ihandle->ipc_handle)); + + HIP_RETURN(hipSuccess); } -hipError_t hipIpcOpenMemHandle(void** devPtr, hipIpcMemHandle_t handle, unsigned int flags) { - HIP_INIT_API(devPtr, &handle, flags); +hipError_t hipIpcOpenMemHandle(void** dev_ptr, hipIpcMemHandle_t handle, unsigned int flags) { + HIP_INIT_API(dev_ptr, &handle, flags); - assert(0 && "Unimplemented"); + amd::Memory* amd_mem_obj = nullptr; + amd::Device* device = nullptr; + ihipIpcMemHandle_t* ihandle = nullptr; - HIP_RETURN(hipErrorUnknown); + if (dev_ptr == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + /* Call the IPC Attach from Device class */ + device = hip::getCurrentContext()->devices()[0]; + ihandle = reinterpret_cast(&handle); + + amd_mem_obj = device->IpcAttach(&(ihandle->ipc_handle), ihandle->psize, flags, dev_ptr); + if (amd_mem_obj == nullptr) { + HIP_RETURN(hipErrorInvalidDevicePointer); + } + + /* Add the memory to the MemObjMap */ + amd::MemObjMap::AddMemObj(*dev_ptr, amd_mem_obj); + + HIP_RETURN(hipSuccess); } -hipError_t hipIpcCloseMemHandle(void* devPtr) { - HIP_INIT_API(devPtr); +hipError_t hipIpcCloseMemHandle(void* dev_ptr) { + HIP_INIT_API(dev_ptr); - assert(0 && "Unimplemented"); + size_t offset = 0; + amd::Device* device = nullptr; + amd::Memory* amd_mem_obj = nullptr; - HIP_RETURN(hipErrorUnknown); + hip::syncStreams(); + hip::getNullStream()->finish(); + + if (dev_ptr == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + /* Get the amd::Memory object */ + amd_mem_obj = getMemoryObject(dev_ptr, offset); + if (amd_mem_obj == nullptr) { + HIP_RETURN(hipErrorInvalidDevicePointer); + } + + /* Call IPC Detach from Device class */ + device = hip::getCurrentContext()->devices()[0]; + if (device == nullptr) { + HIP_RETURN(hipErrorNoDevice); + } + + /* Remove the memory from MemObjMap */ + amd::MemObjMap::RemoveMemObj(amd_mem_obj); + + /* detach the memory */ + device->IpcDetach(*amd_mem_obj); + + HIP_RETURN(hipSuccess); } hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, hipChannelFormatKind f) { From 72094d3281e55c4e3f544cfb19ad0a74a2099496 Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 29 Jan 2019 17:33:39 -0500 Subject: [PATCH 108/282] P4 to Git Change 1737265 by skudchad@skudchad_test2_win_opencl on 2019/01/29 16:13:19 SWDEV-145570 - [HIP] - Fix maxThreadsPerStreamProcessor info parameter for HIP-VDI ReviewBoardURL = http://ocltc.amd.com/reviews/r/16626/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device.cpp#17 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#331 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#601 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#123 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.cpp#113 edit --- api/hip/hip_device.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/hip/hip_device.cpp b/api/hip/hip_device.cpp index 30ad2f3af1..6e103e2d40 100644 --- a/api/hip/hip_device.cpp +++ b/api/hip/hip_device.cpp @@ -158,7 +158,7 @@ hipError_t hipGetDeviceProperties ( hipDeviceProp_t* props, hipDevice_t device ) deviceProps.minor = info.gfxipVersion_ % 100; deviceProps.multiProcessorCount = info.maxComputeUnits_; deviceProps.l2CacheSize = info.l2CacheSize_; - deviceProps.maxThreadsPerMultiProcessor = info.simdPerCU_; + deviceProps.maxThreadsPerMultiProcessor = info.maxThreadsPerCU_; deviceProps.computeMode = 0; deviceProps.clockInstructionRate = info.timeStampFrequency_; deviceProps.arch.hasGlobalInt32Atomics = 1; From 7e5ebb16e1d74633ee8113071297c491ce2e8d83 Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 1 Feb 2019 13:43:50 -0500 Subject: [PATCH 109/282] P4 to Git Change 1738965 by cpaquot@cpaquot-ocl-lc-lnx on 2019/02/01 13:17:10 SWDEV-145570 - [HIP] Handle multi device setup Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device_runtime.cpp#13 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#21 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#21 edit --- api/hip/hip_device_runtime.cpp | 22 ++++++--- api/hip/hip_internal.hpp | 1 + api/hip/hip_platform.cpp | 90 ++++++++++++++++++++-------------- 3 files changed, 68 insertions(+), 45 deletions(-) diff --git a/api/hip/hip_device_runtime.cpp b/api/hip/hip_device_runtime.cpp index f5e75d316b..bfaf5558d3 100644 --- a/api/hip/hip_device_runtime.cpp +++ b/api/hip/hip_device_runtime.cpp @@ -391,21 +391,27 @@ hipError_t hipDeviceSynchronize ( void ) { HIP_RETURN(hipSuccess); } +int ihipGetDevice() { + for (unsigned int i = 0; i < g_devices.size(); i++) { + if (g_devices[i] == hip::getCurrentContext()) { + return i; + } + } + assert(0 && "Current device not found?!"); + return -1; +} + hipError_t hipGetDevice ( int* deviceId ) { HIP_INIT_API(deviceId); if (deviceId != nullptr) { - for (unsigned int i = 0; i < g_devices.size(); i++) { - if (g_devices[i] == hip::getCurrentContext()) { - *deviceId = i; - HIP_RETURN(hipSuccess); - } - } + int dev = ihipGetDevice(); + assert(dev != -1); + *deviceId = dev; + HIP_RETURN(hipSuccess); } else { HIP_RETURN(hipErrorInvalidValue); } - - HIP_RETURN(hipErrorUnknown); } hipError_t hipGetDeviceCount ( int* count ) { diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index 3ca1ba61d7..8d918c608e 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -87,6 +87,7 @@ namespace hip { }; extern std::vector g_devices; extern hipError_t ihipDeviceGetCount(int* count); +extern int ihipGetDevice(); extern amd::Memory* getMemoryObject(const void* ptr, size_t& offset); #define HIP_RETURN(ret) \ diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index da7f8beb62..795c55f87b 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -55,7 +55,7 @@ struct __ClangOffloadBundleHeader { __ClangOffloadBundleDesc desc[1]; }; -extern "C" hipModule_t __hipRegisterFatBinary(const void* data) +extern "C" std::vector* __hipRegisterFatBinary(const void* data) { HIP_INIT(); @@ -71,9 +71,7 @@ extern "C" hipModule_t __hipRegisterFatBinary(const void* data) return nullptr; } - amd::Program* program = new amd::Program(*hip::getCurrentContext()); - if (!program) - return nullptr; + auto programs = new std::vector{g_devices.size()}; const auto obheader = reinterpret_cast(fbwrapper->binary); const auto* desc = &obheader->desc[0]; @@ -87,19 +85,30 @@ extern "C" hipModule_t __hipRegisterFatBinary(const void* data) std::string target(desc->triple + sizeof(HIP_AMDGCN_AMDHSA_TRIPLE), desc->tripleSize - sizeof(HIP_AMDGCN_AMDHSA_TRIPLE)); - if (target.compare(hip::getCurrentContext()->devices()[0]->info().name_)) - continue; const void *image = reinterpret_cast( reinterpret_cast(obheader) + desc->offset); size_t size = desc->size; - if (CL_SUCCESS == program->addDeviceProgram(*hip::getCurrentContext()->devices()[0], image, size) && - CL_SUCCESS == program->build(hip::getCurrentContext()->devices(), nullptr, nullptr, nullptr)) - break; + for (size_t dev = 0; dev < g_devices.size(); ++dev) { + amd::Context* ctx = g_devices[dev]; + + if (target.compare(ctx->devices()[0]->info().name_)) { + continue; + } + + amd::Program* program = new amd::Program(*ctx); + if (program == nullptr) { + return nullptr; + } + if (CL_SUCCESS == program->addDeviceProgram(*ctx->devices()[0], image, size) && + CL_SUCCESS == program->build(ctx->devices(), nullptr, nullptr, nullptr)) { + programs->at(dev) = reinterpret_cast(as_cl(program)); + } + } } - return reinterpret_cast(as_cl(program)); + return programs; } struct ihipExec_t { @@ -115,7 +124,7 @@ thread_local std::stack execStack_; class PlatformState { amd::Monitor lock_; private: - std::unordered_map functions_; + std::unordered_map > functions_; struct RegisteredVar { char* var; @@ -125,22 +134,18 @@ private: bool constant; }; - std::unordered_map vars_; + std::unordered_map*, RegisteredVar> vars_; static PlatformState* platform_; PlatformState() : lock_("Guards global function map") {} - ~PlatformState() { - for (const auto it : functions_) { - delete it.second; - } - } + ~PlatformState() {} public: static PlatformState& instance() { return *platform_; } - void registerVar(hipModule_t modules, + void registerVar(std::vector* modules, char* var, char* hostVar, char* deviceVar, @@ -153,18 +158,17 @@ public: vars_.insert(std::make_pair(modules, rvar)); } - void registerFunction(const void* hostFunction, amd::Kernel* func) { + void registerFunction(const void* hostFunction, const std::vector& funcs) { amd::ScopedLock lock(lock_); - hip::Function* f = new hip::Function(func); - functions_.insert(std::make_pair(hostFunction, f)); + functions_.insert(std::make_pair(hostFunction, funcs)); } - hip::Function* getFunc(const void* hostFunction) { + hipFunction_t getFunc(const void* hostFunction, int deviceId) { amd::ScopedLock lock(lock_); const auto it = functions_.find(hostFunction); if (it != functions_.cend()) { - return it->second; + return it->second[deviceId]; } else { return nullptr; } @@ -197,7 +201,7 @@ public: PlatformState* PlatformState::platform_ = new PlatformState(); extern "C" void __hipRegisterFunction( - hipModule_t module, + std::vector* modules, const void* hostFunction, char* deviceFunction, const char* deviceName, @@ -210,15 +214,21 @@ extern "C" void __hipRegisterFunction( { HIP_INIT(); - amd::Program* program = as_amd(reinterpret_cast(module)); + std::vector functions{g_devices.size()}; - const amd::Symbol* symbol = program->findSymbol(deviceName); - if (!symbol) return; + for (size_t deviceId=0; deviceId < g_devices.size(); ++deviceId) { + hipFunction_t function = nullptr; + if (hipSuccess == hipModuleGetFunction(&function, modules->at(deviceId), deviceName) && + function != nullptr) { + functions[deviceId] = function; + } + else { + // tprintf(DB_FB, "__hipRegisterFunction cannot find kernel %s for" + // " device %d\n", deviceName, deviceId); + } + } - amd::Kernel* kernel = new amd::Kernel(*program, *symbol, deviceName); - if (!kernel) return; - - PlatformState::instance().registerFunction(hostFunction, kernel); + PlatformState::instance().registerFunction(hostFunction, functions); } // Registers a device-side global variable. @@ -227,7 +237,7 @@ extern "C" void __hipRegisterFunction( // track of the value of the device side global variable between kernel // executions. extern "C" void __hipRegisterVar( - hipModule_t modules, // The device modules containing code object + std::vector* modules, // The device modules containing code object char* var, // The shadow variable in host code char* hostVar, // Variable name in host code char* deviceVar, // Variable name in device code @@ -241,11 +251,16 @@ extern "C" void __hipRegisterVar( PlatformState::instance().registerVar(modules, var, hostVar, deviceVar, size, constant != 0); } -extern "C" void __hipUnregisterFatBinary( - hipModule_t module -) +extern "C" void __hipUnregisterFatBinary(std::vector* modules) { HIP_INIT(); + + std::for_each(modules->begin(), modules->end(), [](hipModule_t module){ + if (module != nullptr) { + as_amd(reinterpret_cast(module))->release(); + } + }); + delete modules; } extern "C" hipError_t hipConfigureCall( @@ -277,7 +292,8 @@ extern "C" hipError_t hipLaunchByPtr(const void *hostFunction) { HIP_INIT_API(hostFunction); - hip::Function* func = PlatformState::instance().getFunc(hostFunction); + int deviceId = ihipGetDevice(); + hipFunction_t func = PlatformState::instance().getFunc(hostFunction, deviceId); if (func == nullptr) { HIP_RETURN(hipErrorUnknown); } @@ -292,7 +308,7 @@ extern "C" hipError_t hipLaunchByPtr(const void *hostFunction) HIP_LAUNCH_PARAM_END }; - HIP_RETURN(hipModuleLaunchKernel(func->asHipFunction(), + HIP_RETURN(hipModuleLaunchKernel(func, exec.gridDim_.x, exec.gridDim_.y, exec.gridDim_.z, exec.blockDim_.x, exec.blockDim_.y, exec.blockDim_.z, exec.sharedMem_, exec.hStream_, nullptr, extra)); From 38d6d7f87f882886f8cdfd2c42f5017aebcb2ebd Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 21 Feb 2019 17:42:08 -0500 Subject: [PATCH 110/282] P4 to Git Change 1746912 by cpaquot@cpaquot-ocl-lc-lnx on 2019/02/21 14:16:35 SWDEV-178453 - [HIP] Add extra parameter for sharedMemBytes Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#20 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#129 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.hpp#58 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.cpp#72 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.hpp#21 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/command.cpp#91 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/command.hpp#91 edit --- api/hip/hip_module.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index 592b41f9b9..43ac97d2ac 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -221,7 +221,7 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, eStart->event_ = &startCommand->event(); } - amd::NDRangeKernelCommand* command = new amd::NDRangeKernelCommand(*queue, waitList, *kernel, ndrange); + amd::NDRangeKernelCommand* command = new amd::NDRangeKernelCommand(*queue, waitList, *kernel, ndrange, sharedMemBytes); if (!command) { return hipErrorOutOfMemory; } From c49cb353ca5471fa26391218d1689db0d1d4c353 Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 22 Feb 2019 19:43:36 -0500 Subject: [PATCH 111/282] P4 to Git Change 1747723 by cpaquot@cpaquot-ocl-lc-lnx on 2019/02/22 19:30:33 SWDEV-145570 - [HIP] Texture object implementation hipTextureObject2D test passes on PAL backend. Still needs to have sampler handled properly on ROCm backend. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#22 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_texture.cpp#10 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#333 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.hpp#36 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palmemory.hpp#10 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocmemory.hpp#14 edit --- api/hip/hip_internal.hpp | 1 + api/hip/hip_texture.cpp | 123 +++++++++++++++++++++++++++++++++++---- 2 files changed, 113 insertions(+), 11 deletions(-) diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index 8d918c608e..ed32d90476 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -88,6 +88,7 @@ namespace hip { extern std::vector g_devices; extern hipError_t ihipDeviceGetCount(int* count); extern int ihipGetDevice(); +extern hipError_t ihipMalloc(void** ptr, size_t sizeBytes, unsigned int flags); extern amd::Memory* getMemoryObject(const void* ptr, size_t& offset); #define HIP_RETURN(ret) \ diff --git a/api/hip/hip_texture.cpp b/api/hip/hip_texture.cpp index b3b5c16c10..7fca3f506d 100644 --- a/api/hip/hip_texture.cpp +++ b/api/hip/hip_texture.cpp @@ -23,6 +23,16 @@ THE SOFTWARE. #include #include #include "hip_internal.hpp" +#include "platform/sampler.hpp" + +namespace hip { + struct TextureObject { + uint32_t imageSRD[HIP_IMAGE_OBJECT_SIZE_DWORD]; + uint32_t samplerSRD[HIP_SAMPLER_OBJECT_SIZE_DWORD]; + amd::Image* image; + amd::Sampler* sampler; + }; +}; void getDrvChannelOrderAndType(const enum hipArray_Format Format, unsigned int NumChannels, cl_channel_order* channelOrder, @@ -137,6 +147,68 @@ void getChannelOrderAndType(const hipChannelFormatDesc& desc, enum hipTextureRea } } +amd::Sampler* fillSamplerDescriptor(enum hipTextureAddressMode addressMode, + enum hipTextureFilterMode filterMode, int normalizedCoords) { +#ifndef CL_FILTER_NONE +#define CL_FILTER_NONE 0x1142 +#endif + uint32_t filter_mode = CL_FILTER_NONE; + switch (filterMode) { + case hipFilterModePoint: + filter_mode = CL_FILTER_NEAREST; + break; + case hipFilterModeLinear: + filter_mode = CL_FILTER_LINEAR; + break; + } + + uint32_t address_mode = CL_ADDRESS_NONE; + switch (addressMode) { + case hipAddressModeWrap: + address_mode = CL_ADDRESS_REPEAT; + break; + case hipAddressModeClamp: + address_mode = CL_ADDRESS_CLAMP; + break; + case hipAddressModeMirror: + address_mode = CL_ADDRESS_MIRRORED_REPEAT; + break; + case hipAddressModeBorder: + address_mode = CL_ADDRESS_CLAMP_TO_EDGE; + break; + } + amd::Sampler* sampler = new amd::Sampler(*hip::getCurrentContext(), + normalizedCoords == CL_TRUE, + address_mode, filter_mode, CL_FILTER_NONE, 0.f, CL_MAXFLOAT); + if (sampler == nullptr) { + return nullptr; + } + if (!sampler->create()) { + delete sampler; + return nullptr; + } + return sampler; +} + +hip::TextureObject* ihipCreateTextureObject(amd::Image& image, amd::Sampler& sampler) { + hip::TextureObject* texture; + ihipMalloc(reinterpret_cast(&texture), sizeof(hip::TextureObject), CL_MEM_SVM_FINE_GRAIN_BUFFER); + + if (texture == nullptr) { + return nullptr; + } + + device::Memory* imageMem = image.getDeviceMemory(*hip::getCurrentContext()->devices()[0]); + memcpy(texture->imageSRD, imageMem->cpuSrd(), sizeof(uint32_t)*HIP_IMAGE_OBJECT_SIZE_DWORD); + texture->image = ℑ + + device::Sampler* devSampler = sampler.getDeviceSampler(*hip::getCurrentContext()->devices()[0]); + memcpy(texture->samplerSRD, devSampler->hwState(), sizeof(uint32_t)*HIP_SAMPLER_OBJECT_SIZE_DWORD); + texture->sampler = &sampler; + + return texture; +} + hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, const hipResourceDesc* pResDesc, const hipTextureDesc* pTexDesc, const hipResourceViewDesc* pResViewDesc) { @@ -186,13 +258,19 @@ hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, const hipResou assert(0); break; case hipResourceTypeLinear: - assert(pResViewDesc == nullptr); - memory = getMemoryObject(pResDesc->res.linear.devPtr, offset); + { + assert(pResViewDesc == nullptr); + memory = getMemoryObject(pResDesc->res.linear.devPtr, offset); - image = new (*hip::getCurrentContext()) amd::Image(*memory->asBuffer(), - CL_MEM_OBJECT_IMAGE1D, memory->getMemFlags(), imageFormat, - pResDesc->res.linear.sizeInBytes / imageFormat.getElementSize(), 1, 1, - pResDesc->res.linear.sizeInBytes, 0); + getChannelOrderAndType(pResDesc->res.linear.desc, pTexDesc->readMode, + &image_format.image_channel_order, &image_format.image_channel_data_type); + const amd::Image::Format imageFormat(image_format); + + image = new (*hip::getCurrentContext()) amd::Image(*memory->asBuffer(), + CL_MEM_OBJECT_IMAGE2D, memory->getMemFlags(), imageFormat, + pResDesc->res.linear.sizeInBytes / imageFormat.getElementSize(), 1, 1, + pResDesc->res.linear.sizeInBytes, 0); + } break; case hipResourceTypePitch2D: assert(pResViewDesc == nullptr); @@ -205,15 +283,32 @@ hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, const hipResou break; default: HIP_RETURN(hipErrorInvalidValue); } - *pTexObject = reinterpret_cast(as_cl(image)); - HIP_RETURN(hipErrorUnknown); + if (!image->create()) { + delete image; + HIP_RETURN(hipErrorUnknown); + } + + amd::Sampler* sampler = fillSamplerDescriptor(pTexDesc->addressMode[0], pTexDesc->filterMode, pTexDesc->normalizedCoords); + + *pTexObject = reinterpret_cast(ihipCreateTextureObject(*image, *sampler)); + + HIP_RETURN(hipSuccess); +} + +void ihipDestroyTextureObject(hip::TextureObject* texture) { + texture->image->release(); + texture->sampler->release(); + + hipFree(texture); } hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject) { HIP_INIT_API(textureObject); - as_amd(reinterpret_cast(textureObject))->release(); + hip::TextureObject* texture = reinterpret_cast(textureObject); + + ihipDestroyTextureObject(texture); HIP_RETURN(hipSuccess); } @@ -267,11 +362,17 @@ hipError_t ihipBindTexture(cl_mem_object_type type, amd::Image* image = new (*hip::getCurrentContext()) amd::Image(*memory->asBuffer(), type, memory->getMemFlags(), imageFormat, width, height, 1, pitch, 0); + if (!image->create()) { + delete image; + return hipErrorUnknown; + } + *offset = 0; if (tex->textureObject) { - as_amd(reinterpret_cast(tex->textureObject))->release(); + ihipDestroyTextureObject(reinterpret_cast(tex->textureObject)); } - tex->textureObject = reinterpret_cast(as_cl(image)); + amd::Sampler* sampler = fillSamplerDescriptor(tex->addressMode[0], tex->filterMode, tex->normalized); + tex->textureObject = reinterpret_cast(ihipCreateTextureObject(*image, *sampler)); return hipSuccess; } return hipErrorUnknown; From e66470b435d16420a20b2f41f2406eddd79b8549 Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 7 Mar 2019 14:58:49 -0500 Subject: [PATCH 112/282] P4 to Git Change 1752994 by kjayapra@2_HIPWS_SL_D32 on 2019/03/07 14:06:54 SWDEV-145570 - Implementation of ihipMemsetD32 and ihipMemsetD32Async HIP-HCC Info: https://github.com/ROCm-Developer-Tools/HIP/pull/933 Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.def.in#11 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.map.in#12 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#44 edit --- api/hip/hip_hcc.def.in | 2 ++ api/hip/hip_hcc.map.in | 2 ++ api/hip/hip_memory.cpp | 45 +++++++++++++++++++++++------------------- 3 files changed, 29 insertions(+), 20 deletions(-) diff --git a/api/hip/hip_hcc.def.in b/api/hip/hip_hcc.def.in index 576f840695..f0b4560fd6 100644 --- a/api/hip/hip_hcc.def.in +++ b/api/hip/hip_hcc.def.in @@ -95,8 +95,10 @@ hipMemGetAddressRange hipMemGetInfo hipMemPtrGetInfo hipMemset +hipMemsetD32 hipMemset2D hipMemsetAsync +hipMemsetD32Async hipMemset2DAsync hipMemsetD8 hipMemset3D diff --git a/api/hip/hip_hcc.map.in b/api/hip/hip_hcc.map.in index 8f033617be..b6d16035fa 100644 --- a/api/hip/hip_hcc.map.in +++ b/api/hip/hip_hcc.map.in @@ -96,8 +96,10 @@ global: hipMemGetInfo; hipMemPtrGetInfo; hipMemset; + hipMemsetD32; hipMemset2D; hipMemsetAsync; + hipMemsetD32Async; hipMemset2DAsync; hipMemsetD8; hipMemset3D; diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 587bbbc96e..2a73d98378 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -112,24 +112,33 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin return hipSuccess; } -hipError_t ihipMemset(void* dst, int value, size_t sizeBytes, amd::HostQueue& queue, - bool isAsync = false) { +hipError_t ihipMemset(void* dst, int value, size_t valueSize, size_t sizeBytes, + hipStream_t stream, bool isAsync = false) { if (dst == nullptr) { return hipErrorInvalidValue; } size_t offset = 0; + amd::HostQueue* queue = nullptr; amd::Memory* memory = getMemoryObject(dst, offset); + if (stream == nullptr) { + hip::syncStreams(); + queue = hip::getNullStream(); + } else { + hip::getNullStream()->finish(); + queue = as_amd(reinterpret_cast(stream))->asHostQueue(); + } + if (memory != nullptr) { // Device memory amd::Command::EventWaitList waitList; amd::Coord3D fillOffset(offset, 0, 0); amd::Coord3D fillSize(sizeBytes, 1, 1); amd::FillMemoryCommand* command = - new amd::FillMemoryCommand(queue, CL_COMMAND_FILL_BUFFER, waitList, *memory->asBuffer(), - &value, sizeof(char), fillOffset, fillSize); + new amd::FillMemoryCommand(*queue, CL_COMMAND_FILL_BUFFER, waitList, *memory->asBuffer(), + &value, valueSize, fillOffset, fillSize); if (command == nullptr) { return hipErrorOutOfMemory; @@ -196,26 +205,25 @@ hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t stream) { HIP_INIT_API(dst, value, sizeBytes, stream); - amd::HostQueue* queue; + HIP_RETURN(ihipMemset(dst, value, sizeof(char), sizeBytes, stream, true)); +} - if (stream == nullptr) { - hip::syncStreams(); - queue = hip::getNullStream(); - } else { - hip::getNullStream()->finish(); - queue = as_amd(reinterpret_cast(stream))->asHostQueue(); - } +hipError_t hipMemsetD32Async(hipDeviceptr_t dst, int value, size_t count, hipStream_t stream) { + HIP_INIT_API(dst, value, count, stream); - HIP_RETURN(ihipMemset(dst, value, sizeBytes, *queue, true)); + HIP_RETURN(ihipMemset(dst, value, sizeof(int), count * sizeof(int), stream, true)); } hipError_t hipMemset(void* dst, int value, size_t sizeBytes) { HIP_INIT_API(dst, value, sizeBytes); - hip::syncStreams(); - amd::HostQueue* queue = hip::getNullStream(); + HIP_RETURN(ihipMemset(dst, value, sizeof(char), sizeBytes, nullptr)); +} - HIP_RETURN(ihipMemset(dst, value, sizeBytes, *queue)); +hipError_t hipMemsetD32(hipDeviceptr_t dst, int value, size_t count) { + HIP_INIT_API(dst, value, count); + + HIP_RETURN(ihipMemset(dst, value, sizeof(int), count * sizeof(int), nullptr)); } hipError_t hipMemPtrGetInfo(void *ptr, size_t *size) { @@ -360,10 +368,7 @@ hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent) void *dst = pitchedDevPtr.ptr; size_t sizeBytes = pitchedDevPtr.pitch * extent.height * extent.depth; - hip::syncStreams(); - amd::HostQueue* queue = hip::getNullStream(); - - HIP_RETURN(ihipMemset(dst, value, sizeBytes, *queue)); + HIP_RETURN(ihipMemset(dst, value, sizeof(char), sizeBytes, nullptr)); } hipError_t hipArrayCreate(hipArray** array, const HIP_ARRAY_DESCRIPTOR* pAllocateArray) { From e461d71afa83f9f14ddd4f8301fa186b0dd46111 Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 7 Mar 2019 17:24:23 -0500 Subject: [PATCH 113/282] P4 to Git Change 1753099 by cpaquot@cpaquot-ocl-lc-lnx on 2019/03/07 16:43:54 SWDEV-145570 - [HIP] Change IS_HIP into a dynamic variable set by HIP layer Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_context.cpp#16 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#127 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.cpp#69 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.cpp#18 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.hpp#303 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/macros.hpp#11 edit --- api/hip/hip_context.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/api/hip/hip_context.cpp b/api/hip/hip_context.cpp index 7d7fdb6743..94699fa38d 100644 --- a/api/hip/hip_context.cpp +++ b/api/hip/hip_context.cpp @@ -23,6 +23,7 @@ THE SOFTWARE. #include #include "hip_internal.hpp" #include "platform/runtime.hpp" +#include "utils/flags.hpp" #include "utils/versions.hpp" std::vector g_devices; @@ -38,6 +39,7 @@ std::map g_nullStreams; void init() { if (!amd::Runtime::initialized()) { + amd::IS_HIP = true; amd::Runtime::init(); } From a35c12208d3c196e846913f9e2467a99ccd15b7d Mon Sep 17 00:00:00 2001 From: foreman Date: Mon, 18 Mar 2019 18:44:55 -0400 Subject: [PATCH 114/282] P4 to Git Change 1757948 by kjayapra@1_HIPWS_SL_IPC on 2019/03/18 18:29:24 SWDEV-144570 - Implementation of hipMemcpyToSymbol, hipMemcpyFromSymbol, hipMemcpyToSymbolAsync, hipMemcpyFromSymbolAsync, hipGetSymbolAddress, hipModuleGetGlobal Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.def.in#12 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.map.in#13 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#23 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#45 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#21 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#22 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.hpp#20 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.cpp#101 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.hpp#45 edit --- api/hip/hip_hcc.def.in | 2 + api/hip/hip_hcc.map.in | 2 + api/hip/hip_internal.hpp | 53 ++++++++++++ api/hip/hip_memory.cpp | 76 ++++++++++++++-- api/hip/hip_module.cpp | 16 ++++ api/hip/hip_platform.cpp | 182 ++++++++++++++++++++++----------------- 6 files changed, 245 insertions(+), 86 deletions(-) diff --git a/api/hip/hip_hcc.def.in b/api/hip/hip_hcc.def.in index f0b4560fd6..01e58aef73 100644 --- a/api/hip/hip_hcc.def.in +++ b/api/hip/hip_hcc.def.in @@ -92,6 +92,8 @@ hipMemcpyFromArray hipMemcpyToSymbol hipMemcpyToSymbolAsync hipMemGetAddressRange +hipGetSymbolAddress +hipGetSymbolSize hipMemGetInfo hipMemPtrGetInfo hipMemset diff --git a/api/hip/hip_hcc.map.in b/api/hip/hip_hcc.map.in index b6d16035fa..64d55460a9 100644 --- a/api/hip/hip_hcc.map.in +++ b/api/hip/hip_hcc.map.in @@ -93,6 +93,8 @@ global: hipMemcpyToSymbol; hipMemcpyToSymbolAsync; hipMemGetAddressRange; + hipGetSymbolAddress; + hipGetSymbolSize; hipMemGetInfo; hipMemPtrGetInfo; hipMemset; diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index ed32d90476..c5ba11b68a 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -85,6 +85,59 @@ namespace hip { static Function* asFunction(hipFunction_t f) { return reinterpret_cast(f); } }; }; + +struct ihipExec_t { + dim3 gridDim_; + dim3 blockDim_; + size_t sharedMem_; + hipStream_t hStream_; + std::vector arguments_; +}; + +class PlatformState { + amd::Monitor lock_; + +public: + struct RegisteredVar { + public: + RegisteredVar(): hostVar_(nullptr), size_(0), devicePtr_(nullptr) {} + RegisteredVar(char* hostVar, size_t size, hipDeviceptr_t devicePtr); + ~RegisteredVar() {} + + hipDeviceptr_t getdeviceptr() const { return devicePtr_; }; + size_t getvarsize() const { return size_; }; + + private: + char* hostVar_; // Variable name in host code + size_t size_; // Size of the variable + hipDeviceptr_t devicePtr_; //Device Memory Address of the variable. + }; + +private: + std::unordered_map > functions_; + std::unordered_map > vars_; + + static PlatformState* platform_; + + PlatformState() : lock_("Guards global function map") {} + ~PlatformState() {} +public: + static PlatformState& instance() { + return *platform_; + } + + void registerVar(const char* hostvar, const std::vector& rvar); + void registerFunction(const void* hostFunction, const std::vector& funcs); + + hipFunction_t getFunc(const void* hostFunction, int deviceId); + bool getGlobalVar(const void* hostVar, int deviceId, hipDeviceptr_t* dev_ptr, + size_t* size_ptr); + void setupArgument(const void *arg, size_t size, size_t offset); + void configureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem, hipStream_t stream); + + void popExec(ihipExec_t& exec); +}; + extern std::vector g_devices; extern hipError_t ihipDeviceGetCount(int* count); extern int ihipGetDevice(); diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 2a73d98378..f45b4944d3 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -557,36 +557,96 @@ hipError_t hipMemcpyToSymbol(const void* symbolName, const void* src, size_t cou size_t offset, hipMemcpyKind kind) { HIP_INIT_API(symbolName, src, count, offset, kind); - assert(0 && "Unimplemented"); + size_t sym_size = 0; + hipDeviceptr_t device_ptr = nullptr; - HIP_RETURN(hipErrorUnknown); + /* Get address and size for the global symbol */ + if (!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), &device_ptr, + &sym_size)) { + HIP_RETURN(hipErrorUnknown); + } + + /* Size Check to make sure offset is correct */ + if ((offset + count) != sym_size) { + return HIP_RETURN(hipErrorUnknown); + } + + device_ptr = reinterpret_cast
(device_ptr) + offset; + + /* Copy memory from source to destination address */ + HIP_RETURN(hipMemcpy(device_ptr, src, count, kind)); } hipError_t hipMemcpyFromSymbol(void* dst, const void* symbolName, size_t count, size_t offset, hipMemcpyKind kind) { HIP_INIT_API(symbolName, dst, count, offset, kind); - assert(0 && "Unimplemented"); + size_t sym_size = 0; + hipDeviceptr_t device_ptr = nullptr; - HIP_RETURN(hipErrorUnknown); + /* Get address and size for the global symbol */ + if (!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), &device_ptr, + &sym_size)) { + HIP_RETURN(hipErrorUnknown); + } + + /* Size Check to make sure offset is correct */ + if ((offset + count) != sym_size) { + return HIP_RETURN(hipErrorUnknown); + } + + device_ptr = reinterpret_cast
(device_ptr) + offset; + + /* Copy memory from source to destination address */ + HIP_RETURN(hipMemcpy(dst, device_ptr, count, kind)); } hipError_t hipMemcpyToSymbolAsync(const void* symbolName, const void* src, size_t count, size_t offset, hipMemcpyKind kind, hipStream_t stream) { HIP_INIT_API(symbolName, src, count, offset, kind, stream); - assert(0 && "Unimplemented"); + size_t sym_size = 0; + hipDeviceptr_t device_ptr = nullptr; - HIP_RETURN(hipErrorUnknown); + /* Get address and size for the global symbol */ + if (!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), &device_ptr, + &sym_size)) { + HIP_RETURN(hipErrorUnknown); + } + + /* Size Check to make sure offset is correct */ + if ((offset + count) != sym_size) { + return HIP_RETURN(hipErrorUnknown); + } + + device_ptr = reinterpret_cast
(device_ptr) + offset; + + /* Copy memory from source to destination address */ + HIP_RETURN(hipMemcpyAsync(device_ptr, src, count, kind, stream)); } hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolName, size_t count, size_t offset, hipMemcpyKind kind, hipStream_t stream) { HIP_INIT_API(symbolName, dst, count, offset, kind, stream); - assert(0 && "Unimplemented"); + size_t sym_size = 0; + hipDeviceptr_t device_ptr = nullptr; - HIP_RETURN(hipErrorUnknown); + /* Get address and size for the global symbol */ + if (!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), &device_ptr, + &sym_size)) { + HIP_RETURN(hipErrorUnknown); + } + + /* Size Check to make sure offset is correct */ + if ((offset + count) != sym_size) { + return HIP_RETURN(hipErrorUnknown); + } + + device_ptr = reinterpret_cast
(device_ptr) + offset; + + /* Copy memory from source to destination address */ + HIP_RETURN(hipMemcpyAsync(dst, device_ptr, count, kind, stream)); } hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void* src, size_t sizeBytes) { diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index 43ac97d2ac..62f676dfe0 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -137,6 +137,22 @@ hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t h { HIP_INIT_API(dptr, bytes, hmod, name); + amd::Program* program = nullptr; + const device::Program* dev_program = nullptr; + + /* Get Device Program pointer*/ + program = as_amd(reinterpret_cast(hmod)); + dev_program = program->getDeviceProgram(*hip::getCurrentContext()->devices()[0]); + + if (dev_program == nullptr) { + HIP_RETURN(hipErrorUnknown); + } + + /* Find the global Symbols */ + if(!dev_program->findGlobalSymbols(dptr, bytes, name)) { + HIP_RETURN(hipErrorUnknown); + } + HIP_RETURN(hipSuccess); } diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index 795c55f87b..c6b731af58 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -31,6 +31,9 @@ THE SOFTWARE. constexpr unsigned __hipFatMAGIC2 = 0x48495046; // "HIPF" +thread_local std::stack execStack_; +PlatformState* PlatformState::platform_ = new PlatformState(); + struct __CudaFatBinaryWrapper { unsigned int magic; unsigned int version; @@ -111,94 +114,82 @@ extern "C" std::vector* __hipRegisterFatBinary(const void* data) return programs; } -struct ihipExec_t { - dim3 gridDim_; - dim3 blockDim_; - size_t sharedMem_; - hipStream_t hStream_; - std::vector arguments_; -}; +PlatformState::RegisteredVar::RegisteredVar(char* hostVar, size_t size, hipDeviceptr_t devicePtr) + : hostVar_(hostVar), size_(size), devicePtr_(devicePtr) { + amd::Memory* amd_mem_obj = nullptr; + uint32_t flags = 0; -thread_local std::stack execStack_; + /* Create an amd Memory object for the pointer */ + amd_mem_obj + = new (*hip::getCurrentContext()) amd::Buffer(*hip::getCurrentContext(), flags, size, devicePtr_); -class PlatformState { - amd::Monitor lock_; -private: - std::unordered_map > functions_; - - struct RegisteredVar { - char* var; - char* hostVar; - char* deviceVar; - int size; - bool constant; - }; - - std::unordered_map*, RegisteredVar> vars_; - - static PlatformState* platform_; - - PlatformState() : lock_("Guards global function map") {} - ~PlatformState() {} -public: - static PlatformState& instance() { - return *platform_; + if (amd_mem_obj == nullptr) { + LogError("[OCL] failed to create a mem object!"); } - void registerVar(std::vector* modules, - char* var, - char* hostVar, - char* deviceVar, - int size, - bool constant) { - amd::ScopedLock lock(lock_); - - const RegisteredVar rvar = { var, hostVar, deviceVar, size, constant != 0 }; - - vars_.insert(std::make_pair(modules, rvar)); + if (!amd_mem_obj->create(nullptr)) { + LogError("[OCL] failed to create a svm hidden buffer!"); + amd_mem_obj->release(); } - void registerFunction(const void* hostFunction, const std::vector& funcs) { - amd::ScopedLock lock(lock_); + /* Add the memory to the MemObjMap */ + amd::MemObjMap::AddMemObj(devicePtr_, amd_mem_obj); +} - functions_.insert(std::make_pair(hostFunction, funcs)); +void PlatformState::registerVar(const char* hostvar, + const std::vector& rvar) { + amd::ScopedLock lock(lock_); + vars_.insert(std::make_pair(hostvar, rvar)); +} + +void PlatformState::registerFunction(const void* hostFunction, + const std::vector& funcs) { + amd::ScopedLock lock(lock_); + functions_.insert(std::make_pair(hostFunction, funcs)); +} + +hipFunction_t PlatformState::getFunc(const void* hostFunction, int deviceId) { + amd::ScopedLock lock(lock_); + const auto it = functions_.find(hostFunction); + if (it != functions_.cend()) { + return it->second[deviceId]; + } else { + return nullptr; + } +} + +bool PlatformState::getGlobalVar(const void* hostVar, int deviceId, + hipDeviceptr_t* dev_ptr, size_t* size_ptr) { + amd::ScopedLock lock(lock_); + const auto it = vars_.find(hostVar); + if (it != vars_.cend()) { + *size_ptr = it->second[deviceId].getvarsize(); + *dev_ptr = it->second[deviceId].getdeviceptr(); + return true; + } else { + return false; + } +} + +void PlatformState::setupArgument(const void *arg, size_t size, size_t offset) { + auto& arguments = execStack_.top().arguments_; + + if (arguments.size() < offset + size) { + arguments.resize(offset + size); } - hipFunction_t getFunc(const void* hostFunction, int deviceId) { - amd::ScopedLock lock(lock_); - const auto it = functions_.find(hostFunction); - if (it != functions_.cend()) { - return it->second[deviceId]; - } else { - return nullptr; - } - } + ::memcpy(&arguments[offset], arg, size); +} - void setupArgument(const void *arg, - size_t size, - size_t offset) { - auto& arguments = execStack_.top().arguments_; +void PlatformState::configureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem, + hipStream_t stream) { + execStack_.push(ihipExec_t{gridDim, blockDim, sharedMem, stream}); +} - if (arguments.size() < offset + size) { - arguments.resize(offset + size); - } - - ::memcpy(&arguments[offset], arg, size); - } - - void configureCall(dim3 gridDim, - dim3 blockDim, - size_t sharedMem, - hipStream_t stream) { - execStack_.push(ihipExec_t{gridDim, blockDim, sharedMem, stream}); - } - - void popExec(ihipExec_t& exec) { - exec = std::move(execStack_.top()); - execStack_.pop(); - } -}; -PlatformState* PlatformState::platform_ = new PlatformState(); +void PlatformState::popExec(ihipExec_t& exec) { + exec = std::move(execStack_.top()); + execStack_.pop(); +} extern "C" void __hipRegisterFunction( std::vector* modules, @@ -248,7 +239,26 @@ extern "C" void __hipRegisterVar( { HIP_INIT(); - PlatformState::instance().registerVar(modules, var, hostVar, deviceVar, size, constant != 0); + size_t sym_size = 0; + std::vector global_vars{g_devices.size()}; + + for (size_t deviceId=0; deviceId < g_devices.size(); ++deviceId) { + hipDeviceptr_t device_ptr = nullptr; + if((hipSuccess == hipModuleGetGlobal(&device_ptr, &sym_size, modules->at(deviceId), + hostVar)) && (device_ptr != nullptr)) { + + if (static_cast(size) != sym_size) { + LogError("[OCL] Size Mismatch with the HSA Symbol retrieved \n"); + } + + global_vars[deviceId] = PlatformState::RegisteredVar(hostVar, sym_size, device_ptr); + + } else { + LogError("[OCL] __hipRegisterVar cannot find kernel for device \n"); + } + } + + PlatformState::instance().registerVar(hostVar, global_vars); } extern "C" void __hipUnregisterFatBinary(std::vector* modules) @@ -314,6 +324,22 @@ extern "C" hipError_t hipLaunchByPtr(const void *hostFunction) exec.sharedMem_, exec.hStream_, nullptr, extra)); } +hipError_t hipGetSymbolAddress(void** devPtr, const void* symbolName) { + size_t size = 0; + if(!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), devPtr, &size)) { + HIP_RETURN(hipErrorUnknown); + } + HIP_RETURN(hipSuccess); +} + +hipError_t hipGetSymbolSize(size_t* sizePtr, const void* symbolName) { + hipDeviceptr_t devPtr = nullptr; + if (!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), &devPtr, sizePtr)) { + HIP_RETURN(hipErrorUnknown); + } + HIP_RETURN(hipSuccess); +} + #if defined(ATI_OS_LINUX) namespace hip_impl { From 2bd7f75ba9ccdc441670a02d1a10b21d4e58cc33 Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 19 Mar 2019 11:31:24 -0400 Subject: [PATCH 115/282] P4 to Git Change 1758323 by yaxunl@yaxunl-lc10 on 2019/03/19 11:22:17 SWDEV-145570 - Forward declare hipModuleGetGlobal to fix build failure. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#23 edit --- api/hip/hip_platform.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index c6b731af58..efc8caffa6 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -58,6 +58,9 @@ struct __ClangOffloadBundleHeader { __ClangOffloadBundleDesc desc[1]; }; +hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, + hipModule_t hmod, const char* name); + extern "C" std::vector* __hipRegisterFatBinary(const void* data) { HIP_INIT(); From 950104532220aef01f3ac3a9ac0c5bf691c47321 Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 20 Mar 2019 12:24:45 -0400 Subject: [PATCH 116/282] P4 to Git Change 1758971 by yaxunl@yaxunl-lc10 on 2019/03/20 12:00:49 SWDEV-145570 - Fix build failure due to type mismatch of amd::Event::CallBackFunction Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_stream.cpp#16 edit --- api/hip/hip_stream.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/hip/hip_stream.cpp b/api/hip/hip_stream.cpp index 7acd3d90dd..3331688424 100644 --- a/api/hip/hip_stream.cpp +++ b/api/hip/hip_stream.cpp @@ -54,7 +54,7 @@ void syncStreams() { }; -void ihipStreamCallback(cl_event event, cl_int command_exec_status, void* user_data) { +void CL_CALLBACK ihipStreamCallback(cl_event event, cl_int command_exec_status, void* user_data) { hipError_t status = hipSuccess; StreamCallback* cbo = reinterpret_cast(user_data); From eea83a2c5e459af76966387b902998d5063da2d1 Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 20 Mar 2019 18:43:07 -0400 Subject: [PATCH 117/282] P4 to Git Change 1759224 by cpaquot@cpaquot-ocl-lc-lnx on 2019/03/20 17:47:43 SWDEV-183452 - [HIP] PyTorch test_pin_memory fails hipPointerGetAttributes should return hipMemoryTypeHost for hipHostMalloc memories. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#46 edit --- api/hip/hip_memory.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index f45b4944d3..ab6b788921 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -1438,11 +1438,11 @@ hipError_t hipPointerGetAttributes(hipPointerAttribute_t* attributes, const void int device = 0; if (memObj != nullptr) { - attributes->memoryType = hipMemoryTypeDevice; + attributes->memoryType = (CL_MEM_SVM_FINE_GRAIN_BUFFER | memObj->getMemFlags())? hipMemoryTypeHost : hipMemoryTypeDevice; attributes->hostPointer = memObj->getSvmPtr(); attributes->devicePointer = memObj->getSvmPtr(); attributes->isManaged = 0; - attributes->allocationFlags = memObj->getMemFlags(); + attributes->allocationFlags = memObj->getMemFlags() >> 16; amd::Context &memObjCtx = memObj->getContext(); for (auto& ctx : g_devices) { From 636dadc1aaaa02f42cb9ee3c9e3d7797b51ac513 Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 20 Mar 2019 19:18:10 -0400 Subject: [PATCH 118/282] P4 to Git Change 1759248 by cpaquot@cpaquot-ocl-lc-lnx on 2019/03/20 18:32:19 SWDEV-183452 - [HIP] Typo of | instead of & Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#47 edit --- api/hip/hip_memory.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index ab6b788921..f20b26aaa0 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -1438,7 +1438,7 @@ hipError_t hipPointerGetAttributes(hipPointerAttribute_t* attributes, const void int device = 0; if (memObj != nullptr) { - attributes->memoryType = (CL_MEM_SVM_FINE_GRAIN_BUFFER | memObj->getMemFlags())? hipMemoryTypeHost : hipMemoryTypeDevice; + attributes->memoryType = (CL_MEM_SVM_FINE_GRAIN_BUFFER & memObj->getMemFlags())? hipMemoryTypeHost : hipMemoryTypeDevice; attributes->hostPointer = memObj->getSvmPtr(); attributes->devicePointer = memObj->getSvmPtr(); attributes->isManaged = 0; From eb852bdb53ba79e2b0c511ffbf642ddbb9d447f0 Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 21 Mar 2019 12:43:25 -0400 Subject: [PATCH 119/282] P4 to Git Change 1759609 by kjayapra@1_HIPWS_SL_IPC on 2019/03/21 12:09:27 SWDEV-144570 - Disbaling __hipHostRegister implementation to unblock pytorch tests in PAL. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#24 edit --- api/hip/hip_platform.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index efc8caffa6..09143adcf3 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -240,6 +240,7 @@ extern "C" void __hipRegisterVar( int constant, // Whether this variable is constant int global) // Unknown, always 0 { +#if 0 HIP_INIT(); size_t sym_size = 0; @@ -262,6 +263,7 @@ extern "C" void __hipRegisterVar( } PlatformState::instance().registerVar(hostVar, global_vars); +#endif } extern "C" void __hipUnregisterFatBinary(std::vector* modules) From 292e93bf114139d520ed4497ccfb5a486b4900ed Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 27 Mar 2019 12:53:17 -0400 Subject: [PATCH 120/282] P4 to Git Change 1762349 by yaxunl@yaxunl-lc10 on 2019/03/27 11:50:12 SWDEV-145570 - Workaround for mismatch of device name and bundle id for gfx906. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#25 edit --- api/hip/hip_platform.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index 09143adcf3..f2602babb1 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -100,7 +100,12 @@ extern "C" std::vector* __hipRegisterFatBinary(const void* data) amd::Context* ctx = g_devices[dev]; if (target.compare(ctx->devices()[0]->info().name_)) { - continue; + // Workaround for gfx906 device name mismatch. + // If bundle target id starts with gfx906 and device name starts with + // gfx906, treat them as match. + if (target.find("gfx906") != 0 || + std::string(ctx->devices()[0]->info().name_).find("gfx906") != 0) + continue; } amd::Program* program = new amd::Program(*ctx); From b392004e71d0d402870b79ce5c70cdaf8def3037 Mon Sep 17 00:00:00 2001 From: foreman Date: Mon, 1 Apr 2019 11:51:06 -0400 Subject: [PATCH 121/282] P4 to Git Change 1764069 by michliao@hliao-dev-00-hip-workspace on 2019/04/01 11:23:34 SWDEV-144570 - Handle zero-byte memset & memcpy - Properly handle zero-byte memset & memcpy by skipping the real stuff. RB: http://ocltc.amd.com/reviews/r/17062/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#48 edit --- api/hip/hip_memory.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index f20b26aaa0..ae052d6288 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -76,6 +76,11 @@ hipError_t ihipMalloc(void** ptr, size_t sizeBytes, unsigned int flags) hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, amd::HostQueue& queue, bool isAsync = false) { + if (sizeBytes == 0) { + // Skip if nothing needs writing. + return hipSuccess; + } + amd::Command* command = nullptr; amd::Command::EventWaitList waitList; @@ -114,6 +119,10 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin hipError_t ihipMemset(void* dst, int value, size_t valueSize, size_t sizeBytes, hipStream_t stream, bool isAsync = false) { + if (sizeBytes == 0) { + // Skip if nothing needs filling. + return hipSuccess; + } if (dst == nullptr) { return hipErrorInvalidValue; From 41c75b2f8df96ba4831c563bddc84225fe2ec363 Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 4 Apr 2019 17:50:08 -0400 Subject: [PATCH 122/282] P4 to Git Change 1766076 by michliao@hliao-dev-00-hip.rocm-workspace on 2019/04/04 17:35:28 SWDEV-144570 - Fix pointer attribute query. - Device should be numbered from zero. RB: http://ocltc.amd.com/reviews/r/17090/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#49 edit --- api/hip/hip_memory.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index ae052d6288..2c8c30b588 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -1452,14 +1452,14 @@ hipError_t hipPointerGetAttributes(hipPointerAttribute_t* attributes, const void attributes->devicePointer = memObj->getSvmPtr(); attributes->isManaged = 0; attributes->allocationFlags = memObj->getMemFlags() >> 16; - + amd::Context &memObjCtx = memObj->getContext(); for (auto& ctx : g_devices) { - ++device; if (*ctx == memObjCtx) { attributes->device = device; break; } + ++device; } } else { attributes->memoryType = hipMemoryTypeHost; From e3910b720c20e336e409eeab76b7de32ac143573 Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 4 Apr 2019 18:22:40 -0400 Subject: [PATCH 123/282] P4 to Git Change 1766106 by kjayapra@99_HIPWS_SLV_CHECKIN on 2019/04/04 18:07:26 SWDEV-144570 - Implementation of hipMemcpyToSymbol and simillar fns for PAL. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#24 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#22 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#26 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.hpp#23 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.cpp#89 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.hpp#37 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.cpp#102 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.hpp#46 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/memory.cpp#135 edit --- api/hip/hip_internal.hpp | 5 ++-- api/hip/hip_module.cpp | 17 ++--------- api/hip/hip_platform.cpp | 61 +++++++++++++++++++++++++--------------- 3 files changed, 44 insertions(+), 39 deletions(-) diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index c5ba11b68a..de9de24a32 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -100,8 +100,8 @@ class PlatformState { public: struct RegisteredVar { public: - RegisteredVar(): hostVar_(nullptr), size_(0), devicePtr_(nullptr) {} - RegisteredVar(char* hostVar, size_t size, hipDeviceptr_t devicePtr); + RegisteredVar(): hostVar_(nullptr), size_(0), devicePtr_(nullptr), amd_mem_obj_(nullptr) {} + RegisteredVar(char* hostVar, size_t size, hipDeviceptr_t devicePtr, amd::Memory* amd_mem_obj); ~RegisteredVar() {} hipDeviceptr_t getdeviceptr() const { return devicePtr_; }; @@ -111,6 +111,7 @@ public: char* hostVar_; // Variable name in host code size_t size_; // Size of the variable hipDeviceptr_t devicePtr_; //Device Memory Address of the variable. + amd::Memory* amd_mem_obj_; }; private: diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index 62f676dfe0..b482f39b77 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -132,24 +132,13 @@ hipError_t hipModuleGetFunction(hipFunction_t *hfunc, hipModule_t hmod, const ch HIP_RETURN(hipSuccess); } - hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod, const char* name) { HIP_INIT_API(dptr, bytes, hmod, name); - amd::Program* program = nullptr; - const device::Program* dev_program = nullptr; - - /* Get Device Program pointer*/ - program = as_amd(reinterpret_cast(hmod)); - dev_program = program->getDeviceProgram(*hip::getCurrentContext()->devices()[0]); - - if (dev_program == nullptr) { - HIP_RETURN(hipErrorUnknown); - } - - /* Find the global Symbols */ - if(!dev_program->findGlobalSymbols(dptr, bytes, name)) { + /* Get address and size for the global symbol */ + if (!PlatformState::instance().getGlobalVar(name, ihipGetDevice(), dptr, + bytes)) { HIP_RETURN(hipErrorUnknown); } diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index f2602babb1..36a79cf35f 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -61,6 +61,9 @@ struct __ClangOffloadBundleHeader { hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod, const char* name); +hipError_t ihipCreateGlobalVarObj(const char* name, hipModule_t hmod, amd::Memory** amd_mem_obj, + hipDeviceptr_t* dptr, size_t* bytes); + extern "C" std::vector* __hipRegisterFatBinary(const void* data) { HIP_INIT(); @@ -122,26 +125,13 @@ extern "C" std::vector* __hipRegisterFatBinary(const void* data) return programs; } -PlatformState::RegisteredVar::RegisteredVar(char* hostVar, size_t size, hipDeviceptr_t devicePtr) - : hostVar_(hostVar), size_(size), devicePtr_(devicePtr) { - amd::Memory* amd_mem_obj = nullptr; - uint32_t flags = 0; - - /* Create an amd Memory object for the pointer */ - amd_mem_obj - = new (*hip::getCurrentContext()) amd::Buffer(*hip::getCurrentContext(), flags, size, devicePtr_); - - if (amd_mem_obj == nullptr) { - LogError("[OCL] failed to create a mem object!"); - } - - if (!amd_mem_obj->create(nullptr)) { - LogError("[OCL] failed to create a svm hidden buffer!"); - amd_mem_obj->release(); - } +PlatformState::RegisteredVar::RegisteredVar(char* hostVar, size_t size, hipDeviceptr_t devicePtr, + amd::Memory* amd_mem_obj) : hostVar_(hostVar), + size_(size), devicePtr_(devicePtr), + amd_mem_obj_(amd_mem_obj) { /* Add the memory to the MemObjMap */ - amd::MemObjMap::AddMemObj(devicePtr_, amd_mem_obj); + amd::MemObjMap::AddMemObj(devicePtr_, amd_mem_obj_); } void PlatformState::registerVar(const char* hostvar, @@ -245,7 +235,6 @@ extern "C" void __hipRegisterVar( int constant, // Whether this variable is constant int global) // Unknown, always 0 { -#if 0 HIP_INIT(); size_t sym_size = 0; @@ -253,14 +242,17 @@ extern "C" void __hipRegisterVar( for (size_t deviceId=0; deviceId < g_devices.size(); ++deviceId) { hipDeviceptr_t device_ptr = nullptr; - if((hipSuccess == hipModuleGetGlobal(&device_ptr, &sym_size, modules->at(deviceId), - hostVar)) && (device_ptr != nullptr)) { + amd::Memory* amd_mem_obj = nullptr; + + if((hipSuccess == ihipCreateGlobalVarObj(hostVar, modules->at(deviceId), &amd_mem_obj, + &device_ptr, &sym_size)) + && (device_ptr != nullptr)) { if (static_cast(size) != sym_size) { LogError("[OCL] Size Mismatch with the HSA Symbol retrieved \n"); } - global_vars[deviceId] = PlatformState::RegisteredVar(hostVar, sym_size, device_ptr); + global_vars[deviceId] = PlatformState::RegisteredVar(hostVar, sym_size, device_ptr, amd_mem_obj); } else { LogError("[OCL] __hipRegisterVar cannot find kernel for device \n"); @@ -268,7 +260,6 @@ extern "C" void __hipRegisterVar( } PlatformState::instance().registerVar(hostVar, global_vars); -#endif } extern "C" void __hipUnregisterFatBinary(std::vector* modules) @@ -350,6 +341,30 @@ hipError_t hipGetSymbolSize(size_t* sizePtr, const void* symbolName) { HIP_RETURN(hipSuccess); } +hipError_t ihipCreateGlobalVarObj(const char* name, hipModule_t hmod, amd::Memory** amd_mem_obj, hipDeviceptr_t* dptr, size_t* bytes) +{ + HIP_INIT(); + + amd::Program* program = nullptr; + device::Program* dev_program = nullptr; + + /* Get Device Program pointer*/ + program = as_amd(reinterpret_cast(hmod)); + dev_program = program->getDeviceProgram(*hip::getCurrentContext()->devices()[0]); + + if (dev_program == nullptr) { + HIP_RETURN(hipErrorUnknown); + } + + /* Find the global Symbols */ + if(!dev_program->createGlobalVarObj(amd_mem_obj, dptr, bytes, name)) { + HIP_RETURN(hipErrorUnknown); + } + + HIP_RETURN(hipSuccess); +} + + #if defined(ATI_OS_LINUX) namespace hip_impl { From 22c104f84f3e997201ea51613aec3718ff8690d9 Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 4 Apr 2019 18:41:09 -0400 Subject: [PATCH 124/282] P4 to Git Change 1766115 by cpaquot@cpaquot-ocl-lc-lnx on 2019/04/04 18:29:12 SWDEV-145570 - Don't assert when LOG_LEVEL=3 and return value isn't HIP_SUCCESS. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#25 edit --- api/hip/hip_internal.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index de9de24a32..ceff284dda 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -147,7 +147,6 @@ extern amd::Memory* getMemoryObject(const void* ptr, size_t& offset); #define HIP_RETURN(ret) \ hip::g_lastError = ret; \ - DebugInfoGuarantee(hip::g_lastError == hipSuccess); \ return hip::g_lastError; \ inline std::ostream& operator<<(std::ostream& os, const dim3& s) { From 8753616c9727ebf74f4141b93fdcb45e35cb0753 Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 5 Apr 2019 11:58:25 -0400 Subject: [PATCH 125/282] P4 to Git Change 1766264 by cpaquot@cpaquot-ocl-lc-lnx on 2019/04/05 11:38:18 SWDEV-145570 - [HIP] Use a context with all devices in system for host register hipHostRegister and hipMemcpy 0x10 and 0x20 fail in mGPU systems because we only register the memory on the current device. But in HIP, the registering needs to happen on all devices. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_context.cpp#17 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#26 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#50 edit --- api/hip/hip_context.cpp | 8 ++++++++ api/hip/hip_internal.hpp | 1 + api/hip/hip_memory.cpp | 12 +++++++----- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/api/hip/hip_context.cpp b/api/hip/hip_context.cpp index 94699fa38d..02b57f0316 100644 --- a/api/hip/hip_context.cpp +++ b/api/hip/hip_context.cpp @@ -34,6 +34,7 @@ thread_local amd::Context* g_context = nullptr; thread_local std::stack g_ctxtStack; thread_local hipError_t g_lastError = hipSuccess; std::once_flag g_ihipInitialized; +amd::Context* host_context = nullptr; std::map g_nullStreams; @@ -56,6 +57,13 @@ void init() { g_devices.push_back(context); } } + + host_context = new amd::Context(devices, amd::Context::Info()); + if (!host_context) return; + + if (host_context && CL_SUCCESS != host_context->create(nullptr)) { + host_context->release(); + } } amd::Context* getCurrentContext() { diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index ceff284dda..f7266ba82f 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -65,6 +65,7 @@ namespace hip { extern std::once_flag g_ihipInitialized; extern thread_local amd::Context* g_context; extern thread_local hipError_t g_lastError; + extern amd::Context* host_context; extern void init(); diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 2c8c30b588..75f45414f0 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -62,11 +62,14 @@ hipError_t ihipMalloc(void** ptr, size_t sizeBytes, unsigned int flags) return hipErrorInvalidValue; } - if (hip::getCurrentContext()->devices()[0]->info().maxMemAllocSize_ < sizeBytes) { + amd::Context* amdContext = ((flags & CL_MEM_SVM_FINE_GRAIN_BUFFER) != 0)? + hip::host_context : hip::getCurrentContext(); + + if (amdContext->devices()[0]->info().maxMemAllocSize_ < sizeBytes) { return hipErrorMemoryAllocation; } - *ptr = amd::SvmBuffer::malloc(*hip::getCurrentContext(), flags, sizeBytes, hip::getCurrentContext()->devices()[0]->info().memBaseAddrAlign_); + *ptr = amd::SvmBuffer::malloc(*amdContext, flags, sizeBytes, amdContext->devices()[0]->info().memBaseAddrAlign_); if (*ptr == nullptr) { return hipErrorOutOfMemory; } @@ -518,8 +521,7 @@ hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) { hipError_t hipHostRegister(void* hostPtr, size_t sizeBytes, unsigned int flags) { HIP_INIT_API(hostPtr, sizeBytes, flags); if(hostPtr != nullptr) { - amd::Context *amdContext = hip::getCurrentContext(); - amd::Memory* mem = new (*amdContext) amd::Buffer(*amdContext, CL_MEM_USE_HOST_PTR, sizeBytes); + amd::Memory* mem = new (*hip::host_context) amd::Buffer(*hip::host_context, CL_MEM_USE_HOST_PTR, sizeBytes); if (!mem->create(hostPtr)) { mem->release(); @@ -538,7 +540,7 @@ hipError_t hipHostUnregister(void* hostPtr) { if (amd::SvmBuffer::malloced(hostPtr)) { hip::syncStreams(); hip::getNullStream()->finish(); - amd::SvmBuffer::free(*hip::getCurrentContext(), hostPtr); + amd::SvmBuffer::free(*hip::host_context, hostPtr); HIP_RETURN(hipSuccess); } else { size_t offset = 0; From 0f67004f12d17c941b30695076d2e935f5cb8809 Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 5 Apr 2019 14:51:37 -0400 Subject: [PATCH 126/282] P4 to Git Change 1766349 by michliao@hliao-dev-00-hip.rocm-workspace on 2019/04/05 14:24:45 SWDEV-144570 - Fix pointer attribute query. - For memory not registered with runtime, return `hipErrorInvalidValue`. That's the behavior expected to check whether a host buffer is pinned. - Return `hipErrorInvalidDevice` in case a registered memory object cannot find its matching device. RB: http://ocltc.amd.com/reviews/r/17094/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#51 edit --- api/hip/hip_memory.cpp | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 75f45414f0..9dfa4d4ddd 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -1459,18 +1459,12 @@ hipError_t hipPointerGetAttributes(hipPointerAttribute_t* attributes, const void for (auto& ctx : g_devices) { if (*ctx == memObjCtx) { attributes->device = device; - break; + HIP_RETURN(hipSuccess); } ++device; } - } else { - attributes->memoryType = hipMemoryTypeHost; - attributes->hostPointer = (void*)ptr; - attributes->devicePointer = 0; - attributes->device = -1; - attributes->isManaged = 0; - attributes->allocationFlags = 0; + HIP_RETURN(hipErrorInvalidDevice); } - HIP_RETURN(hipSuccess); + HIP_RETURN(hipErrorInvalidValue); } From ac884527f4fff2e9755d91bf4534a45a2d202671 Mon Sep 17 00:00:00 2001 From: foreman Date: Mon, 8 Apr 2019 15:23:35 -0400 Subject: [PATCH 127/282] P4 to Git Change 1766904 by michliao@hliao-dev-00-hip.rocm-workspace on 2019/04/08 14:42:29 SWDEV-145570 - Check host_context when matching GPU device. - In CL#1766264, `host_context` is introduced for mGPU support. Need to match that context specially when trying to match GPU device context. The following tests passed: $ python test_dataloader.py TestDictDataLoader.test_pin_memory . ---------------------------------------------------------------------- Ran 1 test in 0.004s OK $ python test_dataloader.py TestDataLoader.test_sequential_pin_memory . ---------------------------------------------------------------------- Ran 1 test in 0.063s OK $ python test_dataloader.py TestDataLoader.test_shuffle_pin_memory . ---------------------------------------------------------------------- Ran 1 test in 0.174s OK $ python test_dataloader.py TestStringDataLoader.test_shuffle_pin_memory . ---------------------------------------------------------------------- Ran 1 test in 0.104s OK $ python test_torch.py TestTorch.test_pin_memory . ---------------------------------------------------------------------- Ran 1 test in 0.124s OK Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#52 edit --- api/hip/hip_memory.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 9dfa4d4ddd..ab33573d72 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -1456,6 +1456,8 @@ hipError_t hipPointerGetAttributes(hipPointerAttribute_t* attributes, const void attributes->allocationFlags = memObj->getMemFlags() >> 16; amd::Context &memObjCtx = memObj->getContext(); + if (*hip::host_context == memObjCtx) + HIP_RETURN(hipSuccess); for (auto& ctx : g_devices) { if (*ctx == memObjCtx) { attributes->device = device; From 881618fc1b09e87efaea6705eaa947114ef5fc44 Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 17 Apr 2019 18:38:30 -0400 Subject: [PATCH 128/282] P4 to Git Change 1771336 by cpaquot@cpaquot-ocl-lc-lnx on 2019/04/17 18:19:42 SWDEV-145570 - [HIP] Use staging buffer to copy peer to peer. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_context.cpp#18 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#27 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#53 edit --- api/hip/hip_context.cpp | 14 +++++++----- api/hip/hip_internal.hpp | 1 + api/hip/hip_memory.cpp | 47 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 57 insertions(+), 5 deletions(-) diff --git a/api/hip/hip_context.cpp b/api/hip/hip_context.cpp index 02b57f0316..47bf651b8c 100644 --- a/api/hip/hip_context.cpp +++ b/api/hip/hip_context.cpp @@ -75,21 +75,25 @@ void setCurrentContext(unsigned int index) { g_context = g_devices[index]; } -amd::HostQueue* getNullStream() { - auto stream = g_nullStreams.find(getCurrentContext()); +amd::HostQueue* getNullStream(amd::Context& context) { + auto stream = g_nullStreams.find(&context); if (stream == g_nullStreams.end()) { - amd::Device* device = getCurrentContext()->devices()[0]; + amd::Device* device = context.devices()[0]; cl_command_queue_properties properties = CL_QUEUE_PROFILING_ENABLE; - amd::HostQueue* queue = new amd::HostQueue(*hip::getCurrentContext(), *device, properties, + amd::HostQueue* queue = new amd::HostQueue(context, *device, properties, amd::CommandQueue::RealTimeDisabled, amd::CommandQueue::Priority::Normal); - g_nullStreams[getCurrentContext()] = queue; + g_nullStreams[&context] = queue; return queue; } syncStreams(); return stream->second; } +amd::HostQueue* getNullStream() { + return getNullStream(*getCurrentContext()); +} + }; using namespace hip; diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index f7266ba82f..ff5dc1673b 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -72,6 +72,7 @@ namespace hip { extern amd::Context* getCurrentContext(); extern void setCurrentContext(unsigned int index); + extern amd::HostQueue* getNullStream(amd::Context&); extern amd::HostQueue* getNullStream(); extern void syncStreams(); diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index ab33573d72..0ae9df3fc3 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -103,6 +103,53 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin command = new amd::ReadMemoryCommand(queue, CL_COMMAND_READ_BUFFER, waitList, *srcMemory->asBuffer(), sOffset, sizeBytes, dst); } else if ((srcMemory != nullptr) && (dstMemory != nullptr)) { + static const uint hostMem = CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_USE_HOST_PTR; + if ((kind == hipMemcpyDeviceToDevice || + kind == hipMemcpyDefault) && + ((srcMemory->getMemFlags() & hostMem) == 0) && + ((dstMemory->getMemFlags() & hostMem) == 0)) { + amd::Device* queueDevice = &queue.device(); + if (queueDevice != srcMemory->getContext().devices()[0]) { + void* staging = nullptr; + ihipMalloc(&staging, sizeBytes, CL_MEM_SVM_FINE_GRAIN_BUFFER); + ihipMemcpy(staging, src, sizeBytes, hipMemcpyDeviceToHost, *hip::getNullStream(srcMemory->getContext())); + ihipMemcpy(dst, staging, sizeBytes, hipMemcpyHostToDevice, queue); + hipFree(staging); +#if 0 + amd::Coord3D srcOffset(sOffset, 0, 0); + amd::Coord3D dstOffset(dOffset, 0, 0); + amd::Coord3D copySize(sizeBytes, 1, 1); + command = new amd::CopyMemoryP2PCommand(queue, CL_COMMAND_COPY_BUFFER, waitList, + *srcMemory->asBuffer(),*dstMemory->asBuffer(), srcOffset, dstOffset, copySize); + command->enqueue(); + if (!isAsync) { + command->awaitCompletion(); + } + command->release(); +#endif + return hipSuccess; + } + if (queueDevice != dstMemory->getContext().devices()[0]) { + void* staging = nullptr; + ihipMalloc(&staging, sizeBytes, CL_MEM_SVM_FINE_GRAIN_BUFFER); + ihipMemcpy(staging, src, sizeBytes, hipMemcpyDeviceToHost, queue); + ihipMemcpy(dst, staging, sizeBytes, hipMemcpyHostToDevice, *hip::getNullStream(dstMemory->getContext())); + hipFree(staging); +#if 0 + amd::Coord3D srcOffset(sOffset, 0, 0); + amd::Coord3D dstOffset(dOffset, 0, 0); + amd::Coord3D copySize(sizeBytes, 1, 1); + command = new amd::CopyMemoryP2PCommand(queue, CL_COMMAND_COPY_BUFFER, waitList, + *srcMemory->asBuffer(),*dstMemory->asBuffer(), srcOffset, dstOffset, copySize); + command->enqueue(); + if (!isAsync) { + command->awaitCompletion(); + } + command->release(); +#endif + return hipSuccess; + } + } command = new amd::CopyMemoryCommand(queue, CL_COMMAND_COPY_BUFFER, waitList, *srcMemory->asBuffer(),*dstMemory->asBuffer(), sOffset, dOffset, sizeBytes); } From 17b5ecc6dc417683de8a65968c3a22dfbc72f845 Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 18 Apr 2019 12:15:25 -0400 Subject: [PATCH 129/282] P4 to Git Change 1771752 by cpaquot@cpaquot-ocl-lc-lnx on 2019/04/18 11:54:18 SWDEV-145570 - [HIP] Use isAsync flag for async cases. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#54 edit --- api/hip/hip_memory.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 0ae9df3fc3..9bf326386d 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -113,7 +113,7 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin void* staging = nullptr; ihipMalloc(&staging, sizeBytes, CL_MEM_SVM_FINE_GRAIN_BUFFER); ihipMemcpy(staging, src, sizeBytes, hipMemcpyDeviceToHost, *hip::getNullStream(srcMemory->getContext())); - ihipMemcpy(dst, staging, sizeBytes, hipMemcpyHostToDevice, queue); + ihipMemcpy(dst, staging, sizeBytes, hipMemcpyHostToDevice, queue, isAsync); hipFree(staging); #if 0 amd::Coord3D srcOffset(sOffset, 0, 0); @@ -133,7 +133,7 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin void* staging = nullptr; ihipMalloc(&staging, sizeBytes, CL_MEM_SVM_FINE_GRAIN_BUFFER); ihipMemcpy(staging, src, sizeBytes, hipMemcpyDeviceToHost, queue); - ihipMemcpy(dst, staging, sizeBytes, hipMemcpyHostToDevice, *hip::getNullStream(dstMemory->getContext())); + ihipMemcpy(dst, staging, sizeBytes, hipMemcpyHostToDevice, *hip::getNullStream(dstMemory->getContext()), isAsync); hipFree(staging); #if 0 amd::Coord3D srcOffset(sOffset, 0, 0); From 7a1d02c8a1071850b227fb5a509870a5508d8333 Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 19 Apr 2019 09:49:37 -0400 Subject: [PATCH 130/282] P4 to Git Change 1772193 by mshivama@mshivama_tf on 2019/04/19 09:39:39 SWDEV-187020 - Basic "Hello World" Tensorflow program fails to execute on HIP/VDI/PAL/LINUX the API hiDeviceGetName() should pass the length of device name as argument to strncpy() instead of total (dest) memory size which is being passed to it by its caller. Also, bit of a code clean-up. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device.cpp#18 edit --- api/hip/hip_device.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/api/hip/hip_device.cpp b/api/hip/hip_device.cpp index 6e103e2d40..ae4ee89ccf 100644 --- a/api/hip/hip_device.cpp +++ b/api/hip/hip_device.cpp @@ -110,15 +110,19 @@ hipError_t hipDeviceGetName(char *name, int len, hipDevice_t device) { HIP_RETURN(hipErrorInvalidDevice); } - if (name == nullptr) { + if (name == nullptr || len <= 0) { HIP_RETURN(hipErrorInvalidValue); } auto* deviceHandle = g_devices[device]->devices()[0]; const auto& info = deviceHandle->info(); + const auto nameLen = ::strlen(info.boardName_); - len = ((cl_uint)len < ::strlen(info.boardName_)) ? len : 128; - ::strncpy(name, info.boardName_, len); + if (nameLen > (cl_uint)len) { + HIP_RETURN(hipErrorInvalidValue); + } + + ::strncpy(name, info.boardName_, nameLen); HIP_RETURN(hipSuccess); } From 84679ca20eb1e5780561ccc12a68fff921a27e95 Mon Sep 17 00:00:00 2001 From: foreman Date: Sun, 21 Apr 2019 21:50:04 -0400 Subject: [PATCH 131/282] P4 to Git Change 1772349 by mshivama@mshivama_tf on 2019/04/20 07:35:24 SWDEV-187020 - Basic "Hello World" Tensorflow program fails to execute on HIP/VDI/PAL/LINUX Make sure that the size of `dest` is big enough to hoid `src` including trailing zero byte Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device.cpp#19 edit --- api/hip/hip_device.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/api/hip/hip_device.cpp b/api/hip/hip_device.cpp index ae4ee89ccf..a3019cc7d7 100644 --- a/api/hip/hip_device.cpp +++ b/api/hip/hip_device.cpp @@ -118,11 +118,13 @@ hipError_t hipDeviceGetName(char *name, int len, hipDevice_t device) { const auto& info = deviceHandle->info(); const auto nameLen = ::strlen(info.boardName_); - if (nameLen > (cl_uint)len) { + // Make sure that the size of `dest` is big enough to hold `src` including + // trailing zero byte + if (nameLen > (cl_uint)(len - 1)) { HIP_RETURN(hipErrorInvalidValue); } - ::strncpy(name, info.boardName_, nameLen); + ::strncpy(name, info.boardName_, (nameLen + 1)); HIP_RETURN(hipSuccess); } From 80fb2806434cc4c5d36c38e85687e8942f0a4e6e Mon Sep 17 00:00:00 2001 From: foreman Date: Mon, 22 Apr 2019 15:19:24 -0400 Subject: [PATCH 132/282] P4 to Git Change 1772785 by cpaquot@cpaquot-ocl-lc-lnx on 2019/04/22 14:31:18 SWDEV-144570 - [HIP] Lazy build kernels to avoid overfilling dev memory. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#28 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#27 edit --- api/hip/hip_internal.hpp | 23 ++++--- api/hip/hip_platform.cpp | 133 ++++++++++++++++++++------------------- 2 files changed, 82 insertions(+), 74 deletions(-) diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index ff5dc1673b..9606cd47d1 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -102,23 +102,30 @@ class PlatformState { public: struct RegisteredVar { public: - RegisteredVar(): hostVar_(nullptr), size_(0), devicePtr_(nullptr), amd_mem_obj_(nullptr) {} - RegisteredVar(char* hostVar, size_t size, hipDeviceptr_t devicePtr, amd::Memory* amd_mem_obj); + RegisteredVar(): size_(0), devicePtr_(nullptr), amd_mem_obj_(nullptr) {} ~RegisteredVar() {} hipDeviceptr_t getdeviceptr() const { return devicePtr_; }; size_t getvarsize() const { return size_; }; - private: - char* hostVar_; // Variable name in host code size_t size_; // Size of the variable hipDeviceptr_t devicePtr_; //Device Memory Address of the variable. amd::Memory* amd_mem_obj_; }; + struct DeviceFunction { + std::string deviceName; + std::vector< std::pair< hipModule_t, bool > >* modules; + std::vector functions; + }; + struct DeviceVar { + std::string hostVar; + std::vector< std::pair< hipModule_t, bool > >* modules; + std::vector rvars; + }; private: - std::unordered_map > functions_; - std::unordered_map > vars_; + std::unordered_map functions_; + std::unordered_map vars_; static PlatformState* platform_; @@ -129,8 +136,8 @@ public: return *platform_; } - void registerVar(const char* hostvar, const std::vector& rvar); - void registerFunction(const void* hostFunction, const std::vector& funcs); + void registerVar(const void* hostvar, const DeviceVar& var); + void registerFunction(const void* hostFunction, const DeviceFunction& func); hipFunction_t getFunc(const void* hostFunction, int deviceId); bool getGlobalVar(const void* hostVar, int deviceId, hipDeviceptr_t* dev_ptr, diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index 36a79cf35f..54a8806d54 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -64,7 +64,7 @@ hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipError_t ihipCreateGlobalVarObj(const char* name, hipModule_t hmod, amd::Memory** amd_mem_obj, hipDeviceptr_t* dptr, size_t* bytes); -extern "C" std::vector* __hipRegisterFatBinary(const void* data) +extern "C" std::vector< std::pair >* __hipRegisterFatBinary(const void* data) { HIP_INIT(); @@ -80,7 +80,7 @@ extern "C" std::vector* __hipRegisterFatBinary(const void* data) return nullptr; } - auto programs = new std::vector{g_devices.size()}; + auto programs = new std::vector< std::pair >{g_devices.size()}; const auto obheader = reinterpret_cast(fbwrapper->binary); const auto* desc = &obheader->desc[0]; @@ -115,9 +115,8 @@ extern "C" std::vector* __hipRegisterFatBinary(const void* data) if (program == nullptr) { return nullptr; } - if (CL_SUCCESS == program->addDeviceProgram(*ctx->devices()[0], image, size) && - CL_SUCCESS == program->build(ctx->devices(), nullptr, nullptr, nullptr)) { - programs->at(dev) = reinterpret_cast(as_cl(program)); + if (CL_SUCCESS == program->addDeviceProgram(*ctx->devices()[0], image, size)) { + programs->at(dev) = std::make_pair(reinterpret_cast(as_cl(program)) , false); } } } @@ -125,35 +124,45 @@ extern "C" std::vector* __hipRegisterFatBinary(const void* data) return programs; } -PlatformState::RegisteredVar::RegisteredVar(char* hostVar, size_t size, hipDeviceptr_t devicePtr, - amd::Memory* amd_mem_obj) : hostVar_(hostVar), - size_(size), devicePtr_(devicePtr), - amd_mem_obj_(amd_mem_obj) { - - /* Add the memory to the MemObjMap */ - amd::MemObjMap::AddMemObj(devicePtr_, amd_mem_obj_); -} - -void PlatformState::registerVar(const char* hostvar, - const std::vector& rvar) { +void PlatformState::registerVar(const void* hostvar, + const DeviceVar& rvar) { amd::ScopedLock lock(lock_); vars_.insert(std::make_pair(hostvar, rvar)); } void PlatformState::registerFunction(const void* hostFunction, - const std::vector& funcs) { + const DeviceFunction& func) { amd::ScopedLock lock(lock_); - functions_.insert(std::make_pair(hostFunction, funcs)); + functions_.insert(std::make_pair(hostFunction, func)); } hipFunction_t PlatformState::getFunc(const void* hostFunction, int deviceId) { amd::ScopedLock lock(lock_); const auto it = functions_.find(hostFunction); if (it != functions_.cend()) { - return it->second[deviceId]; - } else { - return nullptr; + PlatformState::DeviceFunction& devFunc = it->second; + if (devFunc.functions[deviceId] == 0) { + hipModule_t module = (*devFunc.modules)[deviceId].first; + if (!(*devFunc.modules)[deviceId].second) { + amd::Program* program = as_amd(reinterpret_cast(module)); + if (CL_SUCCESS != program->build(g_devices[deviceId]->devices(), nullptr, nullptr, nullptr)) { + return nullptr; + } + (*devFunc.modules)[deviceId].second = true; + } + hipFunction_t function = nullptr; + if (hipSuccess == hipModuleGetFunction(&function, module, devFunc.deviceName.c_str()) && + function != nullptr) { + devFunc.functions[deviceId] = function; + } + else { + // tprintf(DB_FB, "__hipRegisterFunction cannot find kernel %s for" + // " device %d\n", deviceName, deviceId); + } + } + return devFunc.functions[deviceId]; } + return nullptr; } bool PlatformState::getGlobalVar(const void* hostVar, int deviceId, @@ -161,8 +170,32 @@ bool PlatformState::getGlobalVar(const void* hostVar, int deviceId, amd::ScopedLock lock(lock_); const auto it = vars_.find(hostVar); if (it != vars_.cend()) { - *size_ptr = it->second[deviceId].getvarsize(); - *dev_ptr = it->second[deviceId].getdeviceptr(); + DeviceVar& dvar = it->second; + if (dvar.rvars[deviceId].getdeviceptr() == nullptr) { + size_t sym_size = 0; + hipDeviceptr_t device_ptr = nullptr; + amd::Memory* amd_mem_obj = nullptr; + + if (!(*dvar.modules)[deviceId].second) { + amd::Program* program = as_amd(reinterpret_cast((*dvar.modules)[deviceId].first)); + if (CL_SUCCESS != program->build(g_devices[deviceId]->devices(), nullptr, nullptr, nullptr)) { + return false; + } + (*dvar.modules)[deviceId].second = true; + } + if((hipSuccess == ihipCreateGlobalVarObj(dvar.hostVar.c_str(), (*dvar.modules)[deviceId].first, + &amd_mem_obj, &device_ptr, &sym_size)) + && (device_ptr != nullptr)) { + dvar.rvars[deviceId].size_ = sym_size; + dvar.rvars[deviceId].devicePtr_ = device_ptr; + dvar.rvars[deviceId].amd_mem_obj_ = amd_mem_obj; + amd::MemObjMap::AddMemObj(device_ptr, amd_mem_obj); + } else { + LogError("[HIP] __hipRegisterVar cannot find kernel for device \n"); + } + } + *size_ptr = dvar.rvars[deviceId].getvarsize(); + *dev_ptr = dvar.rvars[deviceId].getdeviceptr(); return true; } else { return false; @@ -190,7 +223,7 @@ void PlatformState::popExec(ihipExec_t& exec) { } extern "C" void __hipRegisterFunction( - std::vector* modules, + std::vector >* modules, const void* hostFunction, char* deviceFunction, const char* deviceName, @@ -203,21 +236,9 @@ extern "C" void __hipRegisterFunction( { HIP_INIT(); - std::vector functions{g_devices.size()}; + PlatformState::DeviceFunction func{ std::string{deviceName}, modules, std::vector{ g_devices.size() }}; - for (size_t deviceId=0; deviceId < g_devices.size(); ++deviceId) { - hipFunction_t function = nullptr; - if (hipSuccess == hipModuleGetFunction(&function, modules->at(deviceId), deviceName) && - function != nullptr) { - functions[deviceId] = function; - } - else { - // tprintf(DB_FB, "__hipRegisterFunction cannot find kernel %s for" - // " device %d\n", deviceName, deviceId); - } - } - - PlatformState::instance().registerFunction(hostFunction, functions); + PlatformState::instance().registerFunction(hostFunction, func); } // Registers a device-side global variable. @@ -226,7 +247,7 @@ extern "C" void __hipRegisterFunction( // track of the value of the device side global variable between kernel // executions. extern "C" void __hipRegisterVar( - std::vector* modules, // The device modules containing code object + std::vector >* modules, // The device modules containing code object char* var, // The shadow variable in host code char* hostVar, // Variable name in host code char* deviceVar, // Variable name in device code @@ -237,38 +258,19 @@ extern "C" void __hipRegisterVar( { HIP_INIT(); - size_t sym_size = 0; - std::vector global_vars{g_devices.size()}; + PlatformState::DeviceVar dvar{ std::string{ hostVar }, modules, + std::vector{ g_devices.size() } }; - for (size_t deviceId=0; deviceId < g_devices.size(); ++deviceId) { - hipDeviceptr_t device_ptr = nullptr; - amd::Memory* amd_mem_obj = nullptr; - - if((hipSuccess == ihipCreateGlobalVarObj(hostVar, modules->at(deviceId), &amd_mem_obj, - &device_ptr, &sym_size)) - && (device_ptr != nullptr)) { - - if (static_cast(size) != sym_size) { - LogError("[OCL] Size Mismatch with the HSA Symbol retrieved \n"); - } - - global_vars[deviceId] = PlatformState::RegisteredVar(hostVar, sym_size, device_ptr, amd_mem_obj); - - } else { - LogError("[OCL] __hipRegisterVar cannot find kernel for device \n"); - } - } - - PlatformState::instance().registerVar(hostVar, global_vars); + PlatformState::instance().registerVar(hostVar, dvar); } -extern "C" void __hipUnregisterFatBinary(std::vector* modules) +extern "C" void __hipUnregisterFatBinary(std::vector< std::pair >* modules) { HIP_INIT(); - std::for_each(modules->begin(), modules->end(), [](hipModule_t module){ - if (module != nullptr) { - as_amd(reinterpret_cast(module))->release(); + std::for_each(modules->begin(), modules->end(), [](std::pair module){ + if (module.first != nullptr) { + as_amd(reinterpret_cast(module.first))->release(); } }); delete modules; @@ -355,7 +357,6 @@ hipError_t ihipCreateGlobalVarObj(const char* name, hipModule_t hmod, amd::Memor if (dev_program == nullptr) { HIP_RETURN(hipErrorUnknown); } - /* Find the global Symbols */ if(!dev_program->createGlobalVarObj(amd_mem_obj, dptr, bytes, name)) { HIP_RETURN(hipErrorUnknown); From 9011aa9282b5bb19edd16a42bf9d112cca3f2ce2 Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 26 Apr 2019 15:15:48 -0400 Subject: [PATCH 133/282] P4 to Git Change 1775366 by kjayapra@99_HIPWS_SLV_CHECKIN on 2019/04/26 14:48:23 SWDEV-144570 - hipFuncGetAttributes() API changes. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#29 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#23 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#28 edit --- api/hip/hip_internal.hpp | 1 + api/hip/hip_module.cpp | 6 ++++- api/hip/hip_platform.cpp | 50 ++++++++++++++++++++++++++++++++++++++-- 3 files changed, 54 insertions(+), 3 deletions(-) diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index 9606cd47d1..25fe6f9537 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -140,6 +140,7 @@ public: void registerFunction(const void* hostFunction, const DeviceFunction& func); hipFunction_t getFunc(const void* hostFunction, int deviceId); + bool getFuncAttr(const void* hostFunction, hipFuncAttributes* func_attr); bool getGlobalVar(const void* hostVar, int deviceId, hipDeviceptr_t* dev_ptr, size_t* size_ptr); void setupArgument(const void *arg, size_t size, size_t offset); diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index b482f39b77..79c5733c74 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -149,7 +149,11 @@ hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func) { HIP_INIT_API(attr, func); - HIP_RETURN(hipErrorInvalidDeviceFunction); + if (!PlatformState::instance().getFuncAttr(func, attr)) { + HIP_RETURN(hipErrorUnknown); + } + + HIP_RETURN(hipSuccess); } diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index 54a8806d54..ec34db8c0a 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -136,6 +136,24 @@ void PlatformState::registerFunction(const void* hostFunction, functions_.insert(std::make_pair(hostFunction, func)); } +bool ihipGetFuncAttributes(const char* func_name, amd::Program* program, hipFuncAttributes* func_attr) { + device::Program* dev_program + = program->getDeviceProgram(*hip::getCurrentContext()->devices()[0]); + + const auto it = dev_program->kernels().find(std::string(func_name)); + if (it == dev_program->kernels().cend()) { + return false; + } + + const device::Kernel::WorkGroupInfo* wginfo = it->second->workGroupInfo(); + func_attr->localSizeBytes = wginfo->localMemSize_; + func_attr->sharedSizeBytes = wginfo->size_; + func_attr->maxThreadsPerBlock = wginfo->wavefrontSize_; + func_attr->numRegs = wginfo->usedVGPRs_; + + return true; +} + hipFunction_t PlatformState::getFunc(const void* hostFunction, int deviceId) { amd::ScopedLock lock(lock_); const auto it = functions_.find(hostFunction); @@ -165,6 +183,36 @@ hipFunction_t PlatformState::getFunc(const void* hostFunction, int deviceId) { return nullptr; } +bool PlatformState::getFuncAttr(const void* hostFunction, + hipFuncAttributes* func_attr) { + + if (func_attr == nullptr) { + return false; + } + + const auto it = functions_.find(hostFunction); + if (it == functions_.cend()) { + return false; + } + + PlatformState::DeviceFunction& devFunc = it->second; + int deviceId = ihipGetDevice(); + + /* If module has not been initialized yet, build the kernel now*/ + if (!(*devFunc.modules)[deviceId].second) { + if (nullptr == PlatformState::instance().getFunc(hostFunction, deviceId)) { + return false; + } + } + + amd::Program* program = as_amd(reinterpret_cast((*devFunc.modules)[deviceId].first)); + if (!ihipGetFuncAttributes(devFunc.deviceName.c_str(), program, func_attr)) { + return false; + } + return true; +} + + bool PlatformState::getGlobalVar(const void* hostVar, int deviceId, hipDeviceptr_t* dev_ptr, size_t* size_ptr) { amd::ScopedLock lock(lock_); @@ -235,9 +283,7 @@ extern "C" void __hipRegisterFunction( int* wSize) { HIP_INIT(); - PlatformState::DeviceFunction func{ std::string{deviceName}, modules, std::vector{ g_devices.size() }}; - PlatformState::instance().registerFunction(hostFunction, func); } From 345f3f989a996093f01821c001056c653f0e4b24 Mon Sep 17 00:00:00 2001 From: foreman Date: Mon, 29 Apr 2019 13:58:26 -0400 Subject: [PATCH 134/282] P4 to Git Change 1775995 by gandryey@gera-w8 on 2019/04/29 13:46:53 SWDEV-79445 - OCL generic changes and code clean-up - Enable P2P extension for PAL path, currently it's staging copy only - Fix P2P staging copy Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#55 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#243 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#336 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#130 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.cpp#77 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#131 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.hpp#59 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.cpp#124 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.hpp#36 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.cpp#74 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/command.cpp#92 edit --- api/hip/hip_memory.cpp | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 9bf326386d..5190bd4fdb 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -110,12 +110,6 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin ((dstMemory->getMemFlags() & hostMem) == 0)) { amd::Device* queueDevice = &queue.device(); if (queueDevice != srcMemory->getContext().devices()[0]) { - void* staging = nullptr; - ihipMalloc(&staging, sizeBytes, CL_MEM_SVM_FINE_GRAIN_BUFFER); - ihipMemcpy(staging, src, sizeBytes, hipMemcpyDeviceToHost, *hip::getNullStream(srcMemory->getContext())); - ihipMemcpy(dst, staging, sizeBytes, hipMemcpyHostToDevice, queue, isAsync); - hipFree(staging); -#if 0 amd::Coord3D srcOffset(sOffset, 0, 0); amd::Coord3D dstOffset(dOffset, 0, 0); amd::Coord3D copySize(sizeBytes, 1, 1); @@ -126,16 +120,9 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin command->awaitCompletion(); } command->release(); -#endif return hipSuccess; } if (queueDevice != dstMemory->getContext().devices()[0]) { - void* staging = nullptr; - ihipMalloc(&staging, sizeBytes, CL_MEM_SVM_FINE_GRAIN_BUFFER); - ihipMemcpy(staging, src, sizeBytes, hipMemcpyDeviceToHost, queue); - ihipMemcpy(dst, staging, sizeBytes, hipMemcpyHostToDevice, *hip::getNullStream(dstMemory->getContext()), isAsync); - hipFree(staging); -#if 0 amd::Coord3D srcOffset(sOffset, 0, 0); amd::Coord3D dstOffset(dOffset, 0, 0); amd::Coord3D copySize(sizeBytes, 1, 1); @@ -146,7 +133,6 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin command->awaitCompletion(); } command->release(); -#endif return hipSuccess; } } From ba93e1ae21c49e0e5deb8dfd6a1cb91045f2d5eb Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 1 May 2019 18:43:47 -0400 Subject: [PATCH 135/282] P4 to Git Change 1777158 by cpaquot@cpaquot-ocl-lc-lnx on 2019/05/01 17:12:01 SWDEV-188219 - [HIP] Add hipStreamCreateWithPriority. NOP right now. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device_runtime.cpp#14 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.def.in#13 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.map.in#14 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_stream.cpp#17 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/commandqueue.hpp#21 edit --- api/hip/hip_device_runtime.cpp | 5 ----- api/hip/hip_hcc.def.in | 2 ++ api/hip/hip_hcc.map.in | 2 ++ api/hip/hip_stream.cpp | 33 +++++++++++++++++++++++++++++---- 4 files changed, 33 insertions(+), 9 deletions(-) diff --git a/api/hip/hip_device_runtime.cpp b/api/hip/hip_device_runtime.cpp index bfaf5558d3..6bb4261964 100644 --- a/api/hip/hip_device_runtime.cpp +++ b/api/hip/hip_device_runtime.cpp @@ -343,11 +343,6 @@ hipError_t hipDeviceGetSharedMemConfig ( hipSharedMemConfig * pConfig ) { HIP_RETURN(hipSuccess); } -hipError_t hipDeviceGetStreamPriorityRange ( int* leastPriority, int* greatestPriority ) { - assert(0); - HIP_RETURN(hipSuccess); -} - hipError_t hipDeviceReset ( void ) { HIP_INIT_API(); diff --git a/api/hip/hip_hcc.def.in b/api/hip/hip_hcc.def.in index 01e58aef73..6f2295dbf7 100644 --- a/api/hip/hip_hcc.def.in +++ b/api/hip/hip_hcc.def.in @@ -24,6 +24,7 @@ hipDeviceGet hipDeviceGetAttribute hipDeviceGetByPCIBusId hipDeviceGetCacheConfig +hipDeviceGetStreamPriorityRange hipDeviceGetLimit hipDeviceGetName hipDeviceGetPCIBusId @@ -123,6 +124,7 @@ hipSetDeviceFlags hipStreamAddCallback hipStreamCreate hipStreamCreateWithFlags +hipStreamCreateWithPriority hipStreamDestroy hipStreamGetFlags hipStreamQuery diff --git a/api/hip/hip_hcc.map.in b/api/hip/hip_hcc.map.in index 64d55460a9..b29d81f2f6 100644 --- a/api/hip/hip_hcc.map.in +++ b/api/hip/hip_hcc.map.in @@ -25,6 +25,7 @@ global: hipDeviceGetAttribute; hipDeviceGetByPCIBusId; hipDeviceGetCacheConfig; + hipDeviceGetStreamPriorityRange; hipDeviceGetLimit; hipDeviceGetName; hipDeviceGetPCIBusId; @@ -123,6 +124,7 @@ global: hipStreamAddCallback; hipStreamCreate; hipStreamCreateWithFlags; + hipStreamCreateWithPriority; hipStreamDestroy; hipStreamGetFlags; hipStreamQuery; diff --git a/api/hip/hip_stream.cpp b/api/hip/hip_stream.cpp index 3331688424..1e4c820a44 100644 --- a/api/hip/hip_stream.cpp +++ b/api/hip/hip_stream.cpp @@ -63,13 +63,13 @@ void CL_CALLBACK ihipStreamCallback(cl_event event, cl_int command_exec_status, delete cbo; } -static hipError_t ihipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags) { +static hipError_t ihipStreamCreate(hipStream_t *stream, unsigned int flags, amd::CommandQueue::Priority priority) { amd::Device* device = hip::getCurrentContext()->devices()[0]; cl_command_queue_properties properties = CL_QUEUE_PROFILING_ENABLE; amd::HostQueue* queue = new amd::HostQueue(*hip::getCurrentContext(), *device, properties, amd::CommandQueue::RealTimeDisabled, - amd::CommandQueue::Priority::Normal); + priority); if (queue == nullptr || !queue->create()) { return hipErrorOutOfMemory; @@ -92,13 +92,38 @@ static hipError_t ihipStreamCreateWithFlags(hipStream_t *stream, unsigned int fl hipError_t hipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags) { HIP_INIT_API(stream, flags); - HIP_RETURN(ihipStreamCreateWithFlags(stream, flags)); + HIP_RETURN(ihipStreamCreate(stream, flags, amd::CommandQueue::Priority::Normal)); } hipError_t hipStreamCreate(hipStream_t *stream) { HIP_INIT_API(stream); - HIP_RETURN(ihipStreamCreateWithFlags(stream, hipStreamDefault)); + HIP_RETURN(ihipStreamCreate(stream, hipStreamDefault, amd::CommandQueue::Priority::Normal)); +} + +hipError_t hipStreamCreateWithPriority(hipStream_t* stream, unsigned int flags, int priority) { + HIP_INIT_API(stream, flags, priority); + + if (priority > static_cast(amd::CommandQueue::Priority::High)) { + priority = static_cast(amd::CommandQueue::Priority::High); + } else if (priority < static_cast(amd::CommandQueue::Priority::Normal)) { + priority = static_cast(amd::CommandQueue::Priority::Normal); + } + + return HIP_RETURN(ihipStreamCreate(stream, flags, static_cast(priority))); +} + +hipError_t hipDeviceGetStreamPriorityRange(int* leastPriority, int* greatestPriority) { + HIP_INIT_API(leastPriority, greatestPriority); + + if (leastPriority != nullptr) { + *leastPriority = static_cast(amd::CommandQueue::Priority::Normal); + } + if (greatestPriority != nullptr) { + // Only report one kind of priority for now. + *greatestPriority = static_cast(amd::CommandQueue::Priority::Normal); + } + return HIP_RETURN(hipSuccess); } hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags) { From 3ad9c3f98420ba322d8d7b9278f98fe19f810ddd Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 1 May 2019 19:53:29 -0400 Subject: [PATCH 136/282] P4 to Git Change 1777225 by cpaquot@cpaquot-ocl-lc-lnx on 2019/05/01 18:47:25 SWDEV-188233 - [HIP] Implemented hipGetChannelDesc. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_texture.cpp#11 edit --- api/hip/hip_texture.cpp | 46 +++++++++++++++++++++++++++++++++++------ 1 file changed, 40 insertions(+), 6 deletions(-) diff --git a/api/hip/hip_texture.cpp b/api/hip/hip_texture.cpp index 7fca3f506d..f1ed3580b3 100644 --- a/api/hip/hip_texture.cpp +++ b/api/hip/hip_texture.cpp @@ -31,6 +31,7 @@ namespace hip { uint32_t samplerSRD[HIP_SAMPLER_OBJECT_SIZE_DWORD]; amd::Image* image; amd::Sampler* sampler; + hipResourceDesc resDesc; }; }; @@ -190,7 +191,7 @@ amd::Sampler* fillSamplerDescriptor(enum hipTextureAddressMode addressMode, return sampler; } -hip::TextureObject* ihipCreateTextureObject(amd::Image& image, amd::Sampler& sampler) { +hip::TextureObject* ihipCreateTextureObject(const hipResourceDesc& resDesc, amd::Image& image, amd::Sampler& sampler) { hip::TextureObject* texture; ihipMalloc(reinterpret_cast(&texture), sizeof(hip::TextureObject), CL_MEM_SVM_FINE_GRAIN_BUFFER); @@ -206,6 +207,8 @@ hip::TextureObject* ihipCreateTextureObject(amd::Image& image, amd::Sampler& sam memcpy(texture->samplerSRD, devSampler->hwState(), sizeof(uint32_t)*HIP_SAMPLER_OBJECT_SIZE_DWORD); texture->sampler = &sampler; + memcpy(&texture->resDesc, &resDesc, sizeof(hipResourceDesc)); + return texture; } @@ -291,7 +294,7 @@ hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, const hipResou amd::Sampler* sampler = fillSamplerDescriptor(pTexDesc->addressMode[0], pTexDesc->filterMode, pTexDesc->normalizedCoords); - *pTexObject = reinterpret_cast(ihipCreateTextureObject(*image, *sampler)); + *pTexObject = reinterpret_cast(ihipCreateTextureObject(*pResDesc, *image, *sampler)); HIP_RETURN(hipSuccess); } @@ -317,7 +320,11 @@ hipError_t hipGetTextureObjectResourceDesc(hipResourceDesc* pResDesc, hipTextureObject_t textureObject) { HIP_INIT_API(pResDesc, textureObject); - assert(0 && "Unimplemented"); + hip::TextureObject* texture = reinterpret_cast(textureObject); + + if (pResDesc != nullptr && texture != nullptr) { + memcpy(pResDesc, &(texture->resDesc), sizeof(hipResourceDesc)); + } HIP_RETURN(hipErrorUnknown); } @@ -372,7 +379,32 @@ hipError_t ihipBindTexture(cl_mem_object_type type, ihipDestroyTextureObject(reinterpret_cast(tex->textureObject)); } amd::Sampler* sampler = fillSamplerDescriptor(tex->addressMode[0], tex->filterMode, tex->normalized); - tex->textureObject = reinterpret_cast(ihipCreateTextureObject(*image, *sampler)); + + hipResourceDesc resDesc; + memset(&resDesc, 0, sizeof(hipResourceDesc)); + switch (type) { + case CL_MEM_OBJECT_IMAGE1D: + resDesc.resType = hipResourceTypeLinear; + resDesc.res.linear.devPtr = const_cast(devPtr); + resDesc.res.linear.desc = *desc; + resDesc.res.linear.sizeInBytes = image->getSize(); + break; + case CL_MEM_OBJECT_IMAGE2D: + resDesc.resType = hipResourceTypePitch2D; + resDesc.res.pitch2D.devPtr = const_cast(devPtr); + resDesc.res.pitch2D.desc = *desc; + resDesc.res.pitch2D.width = width; + resDesc.res.pitch2D.height = height; + resDesc.res.pitch2D.pitchInBytes = pitch; + break; + default: + resDesc.resType = hipResourceTypeArray; + resDesc.res.array.array = nullptr; + break; + } + + tex->textureObject = reinterpret_cast(ihipCreateTextureObject(resDesc, *image, *sampler)); + return hipSuccess; } return hipErrorUnknown; @@ -440,9 +472,11 @@ hipError_t hipUnbindTexture(const textureReference* tex) { hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_const_t array) { HIP_INIT_API(desc, array); - assert(0 && "Unimplemented"); + if (desc != nullptr) { + *desc = array->desc; + } - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipSuccess); } hipError_t hipGetTextureAlignmentOffset(size_t* offset, const textureReference* tex) { From 09920e602470645cbfe29b931bd7208c10ce6321 Mon Sep 17 00:00:00 2001 From: foreman Date: Mon, 6 May 2019 17:20:00 -0400 Subject: [PATCH 137/282] P4 to Git Change 1778949 by michliao@hliao-dev-00-hip.rocm-workspace on 2019/05/06 16:36:53 SWDEV-144570 - Fix build failure after switching to gcc-7 - Hex representation of float needs gnu++11. We'd better not relying on that. Change the float in hex format into alternative representation. RBT: http://ocltc.amd.com/reviews/r/17300/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#29 edit --- api/hip/hip_platform.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index ec34db8c0a..5203c8123a 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -642,9 +642,9 @@ static inline int clamp_int(int i, int l, int h) { return std::min(std::max(i, l // half float, the f16 is in the low 16 bits of the input argument static inline float __convert_half_to_float(std::uint32_t a) noexcept { std::uint32_t u = ((a << 13) + 0x70000000U) & 0x8fffe000U; - std::uint32_t v = f32_as_u32(u32_as_f32(u) * 0x1.0p+112f) + 0x38000000U; + std::uint32_t v = f32_as_u32(u32_as_f32(u) * u32_as_f32(0x77800000U)/*0x1.0p+112f*/) + 0x38000000U; u = (a & 0x7fff) != 0 ? v : u; - return u32_as_f32(u) * 0x1.0p-112f; + return u32_as_f32(u) * u32_as_f32(0x07800000U)/*0x1.0p-112f*/; } // float half with nearest even rounding From d41643c39f136d98ff1cb9dfafb89c28012c6e19 Mon Sep 17 00:00:00 2001 From: foreman Date: Mon, 6 May 2019 17:43:06 -0400 Subject: [PATCH 138/282] P4 to Git Change 1778971 by cpaquot@cpaquot-ocl-lc-lnx on 2019/05/06 17:03:12 SWDEV-187125 - [HIP] Protect hip::Event with lock and add threadId to logs. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_event.cpp#9 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_event.hpp#3 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#30 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#24 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_stream.cpp#18 edit --- api/hip/hip_event.cpp | 146 +++++++++++++++++++++++++++------------ api/hip/hip_event.hpp | 14 +++- api/hip/hip_internal.hpp | 2 +- api/hip/hip_module.cpp | 16 +---- api/hip/hip_stream.cpp | 17 +---- 5 files changed, 121 insertions(+), 74 deletions(-) diff --git a/api/hip/hip_event.cpp b/api/hip/hip_event.cpp index 317e0edc90..aa61443d2f 100644 --- a/api/hip/hip_event.cpp +++ b/api/hip/hip_event.cpp @@ -24,6 +24,98 @@ THE SOFTWARE. #include "hip_event.hpp" +namespace hip { + +bool Event::ready() { + event_->notifyCmdQueue(); + + return (event_->status() == CL_COMPLETE); +} + +hipError_t Event::query() { + amd::ScopedLock lock(lock_); + + if (event_ == nullptr) { + return hipErrorInvalidResourceHandle; + } + + return ready() ? hipSuccess : hipErrorNotReady; +} + +hipError_t Event::synchronize() { + amd::ScopedLock lock(lock_); + + if (event_ == nullptr) { + return hipErrorInvalidResourceHandle; + } + + event_->awaitCompletion(); + + return hipSuccess; +} + +hipError_t Event::elapsedTime(Event& eStop, float& ms) { + amd::ScopedLock startLock(lock_); + amd::ScopedLock stopLock(eStop.lock_); + + if (event_ == nullptr || + eStop.event_ == nullptr) { + return hipErrorInvalidResourceHandle; + } + + if ((flags | eStop.flags) & hipEventDisableTiming) { + return hipErrorInvalidResourceHandle; + } + + if (!ready() || !eStop.ready()) { + return hipErrorNotReady; + } + + ms = static_cast(static_cast(eStop.event_->profilingInfo().submitted_ - + event_->profilingInfo().submitted_))/1000000.f; + + return hipSuccess; +} + +hipError_t Event::streamWait(hipStream_t stream, uint flags) { + amd::HostQueue* hostQueue = as_amd(reinterpret_cast(stream))->asHostQueue(); + + if (stream_ == hostQueue) return hipSuccess; + + amd::ScopedLock lock(lock_); + + cl_event clEvent = as_cl(event_); + + amd::Command::EventWaitList eventWaitList; + cl_int err = amd::clSetEventWaitList(eventWaitList, *hostQueue, 1, &clEvent); + if (err != CL_SUCCESS) { + return hipErrorUnknown; + } + + amd::Command* command = new amd::Marker(*hostQueue, true, eventWaitList); + if (command == NULL) { + return hipErrorOutOfMemory; + } + command->enqueue(); + command->release(); + + return hipSuccess; +} + +void Event::addMarker(amd::HostQueue* queue, amd::Command* command) { + amd::ScopedLock lock(lock_); + + stream_ = queue; + + if (event_ != nullptr) { + event_->release(); + } + + event_ = &command->event(); +} + +} + hipError_t ihipEventCreateWithFlags(hipEvent_t* event, unsigned flags) { if (event == nullptr) { return hipErrorInvalidValue; @@ -58,13 +150,7 @@ hipError_t ihipEventQuery(hipEvent_t event) { hip::Event* e = reinterpret_cast(event); - if (e->event_ == nullptr) { - return hipErrorInvalidResourceHandle; - } - - e->event_->notifyCmdQueue(); - - return (e->event_->status() == CL_COMPLETE) ? hipSuccess : hipErrorNotReady; + return e->query(); } hipError_t hipEventCreateWithFlags(hipEvent_t* event, unsigned flags) { @@ -98,31 +184,14 @@ hipError_t hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop) { HIP_RETURN(hipErrorInvalidResourceHandle); } - hip::Event* eStart = reinterpret_cast(start); - hip::Event* eStop = reinterpret_cast(stop); - - if (eStart->event_ == nullptr || - eStop->event_ == nullptr) { - HIP_RETURN(hipErrorInvalidResourceHandle); - } - - if ((eStart->flags | eStop->flags) & hipEventDisableTiming) { - HIP_RETURN(hipErrorInvalidResourceHandle); - } - - if (ihipEventQuery(start) == hipErrorNotReady || - ihipEventQuery(stop) == hipErrorNotReady) { - HIP_RETURN(hipErrorNotReady); - } - if (ms == nullptr) { HIP_RETURN(hipErrorInvalidValue); } - *ms = static_cast(static_cast(eStop->event_->profilingInfo().submitted_ - - eStart->event_->profilingInfo().submitted_))/1000000.f; + hip::Event* eStart = reinterpret_cast(start); + hip::Event* eStop = reinterpret_cast(stop); - HIP_RETURN(hipSuccess); + return HIP_RETURN(eStart->elapsedTime(*eStop, *ms)); } hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) { @@ -134,24 +203,21 @@ hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) { hip::Event* e = reinterpret_cast(event); + amd::HostQueue* queue; if (stream == nullptr) { - e->stream_ = hip::getNullStream(); + queue = hip::getNullStream(); } else { - e->stream_ = as_amd(reinterpret_cast(stream))->asHostQueue(); + queue = as_amd(reinterpret_cast(stream))->asHostQueue(); } - amd::Command* command = e->stream_->getLastQueuedCommand(true); + amd::Command* command = queue->getLastQueuedCommand(true); if (command == nullptr) { - command = new amd::Marker(*e->stream_, true); + command = new amd::Marker(*queue, true); command->enqueue(); } - if (e->event_ != nullptr) { - e->event_->release(); - } - - e->event_ = &command->event(); + e->addMarker(queue, command); HIP_RETURN(hipSuccess); } @@ -165,13 +231,7 @@ hipError_t hipEventSynchronize(hipEvent_t event) { hip::Event* e = reinterpret_cast(event); - if (e->event_ == nullptr) { - HIP_RETURN(hipErrorInvalidResourceHandle); - } - - e->event_->awaitCompletion(); - - HIP_RETURN(hipSuccess); + HIP_RETURN(e->synchronize()); } hipError_t hipEventQuery(hipEvent_t event) { diff --git a/api/hip/hip_event.hpp b/api/hip/hip_event.hpp index 19f93a5c27..953665ed5b 100644 --- a/api/hip/hip_event.hpp +++ b/api/hip/hip_event.hpp @@ -24,6 +24,7 @@ THE SOFTWARE. #define HIP_EVENT_H #include "hip_internal.hpp" +#include "thread/monitor.hpp" namespace hip { @@ -38,7 +39,7 @@ public: class Event { public: - Event(unsigned int flags) : flags(flags), stream_(nullptr), event_(nullptr) {} + Event(unsigned int flags) : flags(flags), lock_("hipEvent_t"), stream_(nullptr), event_(nullptr) {} ~Event() { if (event_ != nullptr) { event_->release(); @@ -46,8 +47,19 @@ public: } unsigned int flags; + hipError_t query(); + hipError_t synchronize(); + hipError_t elapsedTime(Event& stop, float& ms); + hipError_t streamWait(hipStream_t stream, uint flags); + + void addMarker(amd::HostQueue* queue, amd::Command* command); + +private: + amd::Monitor lock_; amd::HostQueue* stream_; amd::Event* event_; + + bool ready(); }; }; diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index 25fe6f9537..2787e2b187 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -49,7 +49,7 @@ typedef struct ihipIpcMemHandle_st { // This macro should be called at the beginning of every HIP API. #define HIP_INIT_API(...) \ - LogPrintfInfo("%s ( %s )", __func__, ToString( __VA_ARGS__ ).c_str()); \ + LogPrintfInfo("[%zx] %s ( %s )", std::this_thread::get_id(), __func__, ToString( __VA_ARGS__ ).c_str()); \ amd::Thread* thread = amd::Thread::current(); \ if (!CL_CHECK_THREAD(thread)) { \ HIP_RETURN(hipErrorOutOfMemory); \ diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index 79c5733c74..ad13ab5ff2 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -219,15 +219,9 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, } if(startEvent != nullptr) { - eStart->stream_ = queue; - amd::Command* startCommand = new hip::TimerMarker(*eStart->stream_); + amd::Command* startCommand = new hip::TimerMarker(*queue); startCommand->enqueue(); - - if (eStart->event_ != nullptr) { - eStart->event_->release(); - } - - eStart->event_ = &startCommand->event(); + eStart->addMarker(queue, startCommand); } amd::NDRangeKernelCommand* command = new amd::NDRangeKernelCommand(*queue, waitList, *kernel, ndrange, sharedMemBytes); @@ -244,11 +238,7 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, command->enqueue(); if(stopEvent != nullptr) { - if (eStop->event_ != nullptr) { - eStop->event_->release(); - } - eStop->stream_ = queue; - eStop->event_ = &command->event(); + eStop->addMarker(queue, command); command->retain(); } diff --git a/api/hip/hip_stream.cpp b/api/hip/hip_stream.cpp index 1e4c820a44..72ede63ecb 100644 --- a/api/hip/hip_stream.cpp +++ b/api/hip/hip_stream.cpp @@ -192,24 +192,9 @@ hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int HIP_RETURN(hipErrorInvalidResourceHandle); } - amd::HostQueue* hostQueue = as_amd(reinterpret_cast(stream))->asHostQueue(); hip::Event* e = reinterpret_cast(event); - cl_event clEvent = as_cl(e->event_); - amd::Command::EventWaitList eventWaitList; - cl_int err = amd::clSetEventWaitList(eventWaitList, *hostQueue, 1, &clEvent); - if (err != CL_SUCCESS) { - HIP_RETURN(hipErrorUnknown); - } - - amd::Command* command = new amd::Marker(*hostQueue, true, eventWaitList); - if (command == NULL) { - HIP_RETURN(hipErrorOutOfMemory); - } - command->enqueue(); - command->release(); - - HIP_RETURN(hipSuccess); + return HIP_RETURN(e->streamWait(stream, flags)); } hipError_t hipStreamQuery(hipStream_t stream) { From 1e856beba96305fa434722ec36bac39b54a16c1e Mon Sep 17 00:00:00 2001 From: foreman Date: Mon, 6 May 2019 18:02:54 -0400 Subject: [PATCH 139/282] P4 to Git Change 1778985 by cpaquot@cpaquot-ocl-lc-lnx on 2019/05/06 17:21:57 SWDEV-188668 - [HIP] Implemented hipExtModuleLaunchKernel. Ignoring the flag for now. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.def.in#14 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.map.in#15 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#25 edit --- api/hip/hip_hcc.def.in | 1 + api/hip/hip_hcc.map.in | 1 + api/hip/hip_module.cpp | 17 +++++++++++++++-- 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/api/hip/hip_hcc.def.in b/api/hip/hip_hcc.def.in index 6f2295dbf7..44dd6edea7 100644 --- a/api/hip/hip_hcc.def.in +++ b/api/hip/hip_hcc.def.in @@ -47,6 +47,7 @@ hipEventElapsedTime hipEventQuery hipEventRecord hipEventSynchronize +hipExtModuleLaunchKernel hipFree hipFreeArray hipFuncSetCacheConfig diff --git a/api/hip/hip_hcc.map.in b/api/hip/hip_hcc.map.in index b29d81f2f6..49318d6e15 100644 --- a/api/hip/hip_hcc.map.in +++ b/api/hip/hip_hcc.map.in @@ -48,6 +48,7 @@ global: hipEventQuery; hipEventRecord; hipEventSynchronize; + hipExtModuleLaunchKernel; hipFree; hipFreeArray; hipFuncSetCacheConfig; diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index ad13ab5ff2..58fa5e3680 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -162,12 +162,12 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ, uint32_t sharedMemBytes, hipStream_t hStream, void **kernelParams, void **extra, - hipEvent_t startEvent, hipEvent_t stopEvent) + hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags = 0) { HIP_INIT_API(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, - kernelParams, extra, startEvent, stopEvent); + kernelParams, extra, startEvent, stopEvent, flags); hip::Function* function = hip::Function::asFunction(f); amd::Kernel* kernel = function->function_; @@ -258,6 +258,19 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, sharedMemBytes, hStream, kernelParams, extra, nullptr, nullptr)); } +hipError_t hipExtModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX, + uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ, + uint32_t localWorkSizeX, uint32_t localWorkSizeY, + uint32_t localWorkSizeZ, size_t sharedMemBytes, + hipStream_t hStream, void** kernelParams, void** extra, + hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags) +{ + HIP_RETURN(ihipModuleLaunchKernel(f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, localWorkSizeX, localWorkSizeY, + localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent, flags)); +} + + + hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ, uint32_t blockDimX, uint32_t blockDimY, From 1d60371d01a80250154e65f2de1ae575405c9b9f Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 9 May 2019 14:19:54 -0400 Subject: [PATCH 140/282] P4 to Git Change 1780704 by yaxunl@yaxunl-lc10 on 2019/05/09 13:26:13 SWDEV-145570 - Fix device name mismatch for gfx906. For now hip-clang can only emits gfx906 ISA with conservative configurations, i.e. with ecc on and xnack on, therefore it is always gfx906. It is still under discussion how to encode the target id for xnack off or ecc off. Therefore, the reasonable solution for now is just allow code object marked as gfx906 to be loaded on any device name that starts with gfx906. We will have more detailed control once hip-clang is able to emit code object for gfx906 with ecc off or xnack off. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#30 edit --- api/hip/hip_platform.cpp | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index 5203c8123a..73055a1cb9 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -64,6 +64,16 @@ hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipError_t ihipCreateGlobalVarObj(const char* name, hipModule_t hmod, amd::Memory** amd_mem_obj, hipDeviceptr_t* dptr, size_t* bytes); +static bool isCompatibleCodeObject(const std::string& codeobj_target_id, + const char* device_name) { + // Workaround for gfx906 device name mismatch. + // If bundle target id starts with gfx906 and device name starts with + // gfx906, treat them as match. + return codeobj_target_id.compare(device_name) == 0 || + (codeobj_target_id.find("gfx906") == 0 && + std::string(device_name).find("gfx906") == 0); +} + extern "C" std::vector< std::pair >* __hipRegisterFatBinary(const void* data) { HIP_INIT(); @@ -102,12 +112,7 @@ extern "C" std::vector< std::pair >* __hipRegisterFatBinary(c for (size_t dev = 0; dev < g_devices.size(); ++dev) { amd::Context* ctx = g_devices[dev]; - if (target.compare(ctx->devices()[0]->info().name_)) { - // Workaround for gfx906 device name mismatch. - // If bundle target id starts with gfx906 and device name starts with - // gfx906, treat them as match. - if (target.find("gfx906") != 0 || - std::string(ctx->devices()[0]->info().name_).find("gfx906") != 0) + if (!isCompatibleCodeObject(target, ctx->devices()[0]->info().name_)) { continue; } @@ -579,7 +584,7 @@ const std::vector& modules() { std::string target(desc->triple + sizeof(HCC_AMDGCN_AMDHSA_TRIPLE), desc->tripleSize - sizeof(HCC_AMDGCN_AMDHSA_TRIPLE)); - if (!target.compare(hip::getCurrentContext()->devices()[0]->info().name_)) { + if (isCompatibleCodeObject(target, hip::getCurrentContext()->devices()[0]->info().name_)) { hipModule_t module; if (hipSuccess == hipModuleLoadData(&module, reinterpret_cast( reinterpret_cast(obheader) + desc->offset))) From 0949beeeb2776308eb396d5c742e86875c796420 Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 10 May 2019 15:50:37 -0400 Subject: [PATCH 141/282] P4 to Git Change 1781384 by michliao@hliao-dev-00-hip.rocm-workspace on 2019/05/10 14:00:04 SWDEV-189216 - Export `hipExtModuleLaunchKernel` RB: http://ocltc.amd.com/reviews/r/17332/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.map.in#16 edit --- api/hip/hip_hcc.map.in | 1 + 1 file changed, 1 insertion(+) diff --git a/api/hip/hip_hcc.map.in b/api/hip/hip_hcc.map.in index 49318d6e15..d0b862eaa3 100644 --- a/api/hip/hip_hcc.map.in +++ b/api/hip/hip_hcc.map.in @@ -172,6 +172,7 @@ global: hipCreateSurfaceObject*; hipDestroySurfaceObject*; hipHccModuleLaunchKernel*; + hipExtModuleLaunchKernel*; }; local: *; From 680e1a6552c21c6a018949c94eca4db309b70988 Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 10 May 2019 19:19:11 -0400 Subject: [PATCH 142/282] P4 to Git Change 1781540 by kpyzhov@hip-vdi-pal-win on 2019/05/10 17:10:30 SWDEV-188234 - Corrected reading binary module file in hipModuleLoad(). Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#26 edit --- api/hip/hip_module.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index 58fa5e3680..3837a35daf 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -59,7 +59,7 @@ hipError_t hipModuleLoad(hipModule_t *module, const char *fname) HIP_RETURN(hipErrorInvalidValue); } - std::ifstream file{fname}; + std::ifstream file(fname, std::ios::binary); if (!file.is_open()) { HIP_RETURN(hipErrorFileNotFound); From 15d1df9255a9c28dc495fe74b67263debcede1a9 Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 15 May 2019 12:22:50 -0400 Subject: [PATCH 143/282] P4 to Git Change 1783301 by cpaquot@cpaquot-ocl-lc-lnx on 2019/05/15 11:57:57 SWDEV-189488 - [HIP] Caffe2 TensorTest.TensorSerializationMultiDevices fails 1. Make sure to set attributes->device to current device for host malloc'd 2. Return hipSuccess for hipDeviceCanAccessPeer Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#56 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_peer.cpp#4 edit --- api/hip/hip_memory.cpp | 4 +++- api/hip/hip_peer.cpp | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 5190bd4fdb..f34b9e99c0 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -1489,8 +1489,10 @@ hipError_t hipPointerGetAttributes(hipPointerAttribute_t* attributes, const void attributes->allocationFlags = memObj->getMemFlags() >> 16; amd::Context &memObjCtx = memObj->getContext(); - if (*hip::host_context == memObjCtx) + if (*hip::host_context == memObjCtx) { + attributes->device = ihipGetDevice(); HIP_RETURN(hipSuccess); + } for (auto& ctx : g_devices) { if (*ctx == memObjCtx) { attributes->device = device; diff --git a/api/hip/hip_peer.cpp b/api/hip/hip_peer.cpp index 9c2da315ca..14a41f9953 100644 --- a/api/hip/hip_peer.cpp +++ b/api/hip/hip_peer.cpp @@ -55,7 +55,7 @@ hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, int deviceId, int peerDevi *canAccessPeer = 0; - return hipErrorInvalidDevice; + return HIP_RETURN(hipSuccess); } hipError_t hipDeviceDisablePeerAccess(int peerDeviceId) { @@ -106,4 +106,4 @@ hipError_t hipCtxDisablePeerAccess(hipCtx_t peerCtx) { assert(0 && "Unimplemented"); HIP_RETURN(hipErrorUnknown); -} \ No newline at end of file +} From 37cbce4df78e2fe474f8a15c4b001c5193949339 Mon Sep 17 00:00:00 2001 From: foreman Date: Mon, 27 May 2019 20:11:08 -0400 Subject: [PATCH 144/282] P4 to Git Change 1787806 by yaxunl@yaxunl-lc10 on 2019/05/27 10:53:56 SWDEV-145570 - Support loading fat binary generated through --genco by hipModuleLoad. hip-clang --genco generates fat binary instead of code object. To support that we need to extract code object from fat binary in hipModuleLoadData. This is needed for hipRTC since multiple GPU archs may be passed. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#27 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#31 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.cpp#42 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.hpp#308 edit --- api/hip/hip_module.cpp | 8 +++++ api/hip/hip_platform.cpp | 77 ++++++++++++++++++++++++++++------------ 2 files changed, 62 insertions(+), 23 deletions(-) diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index 3837a35daf..2163ef3da4 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -93,8 +93,16 @@ hipError_t hipModuleLoadData(hipModule_t *module, const void *image) HIP_RETURN(ihipModuleLoadData(module, image)); } +extern bool __hipExtractCodeObjectFromFatBinary(const void* data, + const std::vector& devices, + std::vector>& code_objs); + hipError_t ihipModuleLoadData(hipModule_t *module, const void *image) { + std::vector> code_objs; + if (__hipExtractCodeObjectFromFatBinary(image, {hip::getCurrentContext()->devices()[0]->info().name_}, code_objs)) + image = code_objs[0].first; + amd::Program* program = new amd::Program(*hip::getCurrentContext()); if (program == NULL) { return hipErrorOutOfMemory; diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index 73055a1cb9..f09bc1dea9 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -74,26 +74,23 @@ static bool isCompatibleCodeObject(const std::string& codeobj_target_id, std::string(device_name).find("gfx906") == 0); } -extern "C" std::vector< std::pair >* __hipRegisterFatBinary(const void* data) +// Extracts code objects from fat binary in data for device names given in devices. +// Returns true if code objects are extracted successfully. +bool __hipExtractCodeObjectFromFatBinary(const void* data, + const std::vector& devices, + std::vector>& code_objs) { HIP_INIT(); - if(g_devices.empty()) { - return nullptr; - } - const __CudaFatBinaryWrapper* fbwrapper = reinterpret_cast(data); - if (fbwrapper->magic != __hipFatMAGIC2 || fbwrapper->version != 1) { - return nullptr; - } - std::string magic((char*)fbwrapper->binary, sizeof(CLANG_OFFLOAD_BUNDLER_MAGIC_STR) - 1); + std::string magic((const char*)data, sizeof(CLANG_OFFLOAD_BUNDLER_MAGIC_STR) - 1); if (magic.compare(CLANG_OFFLOAD_BUNDLER_MAGIC_STR)) { - return nullptr; + return false; } - auto programs = new std::vector< std::pair >{g_devices.size()}; - - const auto obheader = reinterpret_cast(fbwrapper->binary); + code_objs.resize(devices.size()); + const auto obheader = reinterpret_cast(data); const auto* desc = &obheader->desc[0]; + unsigned num_code_objs = 0; for (uint64_t i = 0; i < obheader->numBundles; ++i, desc = reinterpret_cast( reinterpret_cast(&desc->triple[0]) + desc->tripleSize)) { @@ -109,20 +106,54 @@ extern "C" std::vector< std::pair >* __hipRegisterFatBinary(c reinterpret_cast(obheader) + desc->offset); size_t size = desc->size; - for (size_t dev = 0; dev < g_devices.size(); ++dev) { - amd::Context* ctx = g_devices[dev]; + for (size_t dev = 0; dev < devices.size(); ++dev) { + const char* name = devices[dev]; - if (!isCompatibleCodeObject(target, ctx->devices()[0]->info().name_)) { + if (!isCompatibleCodeObject(target, name)) { continue; } + code_objs[dev] = std::make_pair(image, size); + num_code_objs++; + } + } + if (num_code_objs == devices.size()) + return true; + else + return false; +} - amd::Program* program = new amd::Program(*ctx); - if (program == nullptr) { - return nullptr; - } - if (CL_SUCCESS == program->addDeviceProgram(*ctx->devices()[0], image, size)) { - programs->at(dev) = std::make_pair(reinterpret_cast(as_cl(program)) , false); - } +extern "C" std::vector< std::pair >* __hipRegisterFatBinary(const void* data) +{ + HIP_INIT(); + + if(g_devices.empty()) { + return nullptr; + } + const __CudaFatBinaryWrapper* fbwrapper = reinterpret_cast(data); + if (fbwrapper->magic != __hipFatMAGIC2 || fbwrapper->version != 1) { + return nullptr; + } + + std::vector devices; + std::vector> code_objs; + for (size_t dev = 0; dev < g_devices.size(); ++dev) { + amd::Context* ctx = g_devices[dev]; + devices.push_back(ctx->devices()[0]->info().name_); + } + + if (!__hipExtractCodeObjectFromFatBinary((char*)fbwrapper->binary, devices, code_objs)) { + return nullptr; + } + + auto programs = new std::vector< std::pair >{g_devices.size()}; + for (size_t dev = 0; dev < g_devices.size(); ++dev) { + amd::Context* ctx = g_devices[dev]; + amd::Program* program = new amd::Program(*ctx); + if (program == nullptr) { + return nullptr; + } + if (CL_SUCCESS == program->addDeviceProgram(*ctx->devices()[0], code_objs[dev].first, code_objs[dev].second)) { + programs->at(dev) = std::make_pair(reinterpret_cast(as_cl(program)) , false); } } From 0f9c66c7d9c2325cc35ac35e23f40ac236e166dc Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 28 May 2019 15:08:10 -0400 Subject: [PATCH 145/282] P4 to Git Change 1788476 by kjayapra@1_HIPWS_TRT2_PAL on 2019/05/28 14:44:38 SWDEV-190190 - Implemented API hipCanAccessPeerDevice Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_peer.cpp#5 edit --- api/hip/hip_peer.cpp | 42 +++++++++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/api/hip/hip_peer.cpp b/api/hip/hip_peer.cpp index 14a41f9953..e986cfeac0 100644 --- a/api/hip/hip_peer.cpp +++ b/api/hip/hip_peer.cpp @@ -53,7 +53,31 @@ hipError_t hipMemcpyPeerAsync(void* dst, hipCtx_t dstDevice, const void* src, hi hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, int deviceId, int peerDeviceId) { HIP_INIT_API(canAccessPeer, deviceId, peerDeviceId); - *canAccessPeer = 0; + amd::Device* device = nullptr; + amd::Device* peer_device = nullptr; + + if (canAccessPeer == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + /* Peer cannot be self */ + if (deviceId == peerDeviceId) { + *canAccessPeer = 0; + return HIP_RETURN(hipSuccess); + } + + /* Cannot exceed the max number of devices */ + if (static_cast(deviceId) >= g_devices.size() + || static_cast(peerDeviceId) >= g_devices.size()) { + return HIP_RETURN(hipErrorInvalidValue); + } + + device = g_devices[deviceId]->devices()[0]; + peer_device = g_devices[peerDeviceId]->devices()[0]; + + *canAccessPeer = static_cast(std::find(device->p2pDevices_.begin(), + device->p2pDevices_.end(), as_cl(peer_device)) + != device->p2pDevices_.end()); return HIP_RETURN(hipSuccess); } @@ -61,17 +85,13 @@ hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, int deviceId, int peerDevi hipError_t hipDeviceDisablePeerAccess(int peerDeviceId) { HIP_INIT_API(peerDeviceId); - assert(0 && "Unimplemented"); - - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipSuccess); } hipError_t hipDeviceEnablePeerAccess(int peerDeviceId, unsigned int flags) { HIP_INIT_API(peerDeviceId, flags); - assert(0 && "Unimplemented"); - - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipSuccess); } hipError_t hipMemcpyPeer(void* dst, int dstDevice, const void* src, int srcDevice, @@ -95,15 +115,11 @@ hipError_t hipMemcpyPeerAsync(void* dst, int dstDevice, const void* src, int src hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags) { HIP_INIT_API(peerCtx, flags); - assert(0 && "Unimplemented"); - - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipSuccess); } hipError_t hipCtxDisablePeerAccess(hipCtx_t peerCtx) { HIP_INIT_API(peerCtx); - assert(0 && "Unimplemented"); - - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipSuccess); } From 3330d5e7a4092e30fa098c7928fbaca5c1719d98 Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 28 May 2019 19:06:58 -0400 Subject: [PATCH 146/282] P4 to Git Change 1788615 by cpaquot@cpaquot-ocl-lc-lnx on 2019/05/28 18:54:55 SWDEV-190565 - [HIP] Allow null stream as argument to hipStreamWaitEvent. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_context.cpp#19 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_stream.cpp#19 edit --- api/hip/hip_context.cpp | 24 ++++++++++++------------ api/hip/hip_stream.cpp | 6 +++++- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/api/hip/hip_context.cpp b/api/hip/hip_context.cpp index 47bf651b8c..d519370971 100644 --- a/api/hip/hip_context.cpp +++ b/api/hip/hip_context.cpp @@ -255,7 +255,7 @@ hipError_t hipCtxGetDevice(hipDevice_t* device) { HIP_RETURN(hipErrorInvalidValue); } - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } hipError_t hipCtxGetApiVersion(hipCtx_t ctx, int* apiVersion) { @@ -263,7 +263,7 @@ hipError_t hipCtxGetApiVersion(hipCtx_t ctx, int* apiVersion) { assert(0 && "Unimplemented"); - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } hipError_t hipCtxGetCacheConfig(hipFuncCache_t* cacheConfig) { @@ -271,7 +271,7 @@ hipError_t hipCtxGetCacheConfig(hipFuncCache_t* cacheConfig) { assert(0 && "Unimplemented"); - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } hipError_t hipCtxSetCacheConfig(hipFuncCache_t cacheConfig) { @@ -279,7 +279,7 @@ hipError_t hipCtxSetCacheConfig(hipFuncCache_t cacheConfig) { assert(0 && "Unimplemented"); - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } hipError_t hipCtxSetSharedMemConfig(hipSharedMemConfig config) { @@ -287,7 +287,7 @@ hipError_t hipCtxSetSharedMemConfig(hipSharedMemConfig config) { assert(0 && "Unimplemented"); - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } hipError_t hipCtxSynchronize(void) { @@ -295,7 +295,7 @@ hipError_t hipCtxSynchronize(void) { assert(0 && "Unimplemented"); - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } hipError_t hipCtxGetFlags(unsigned int* flags) { @@ -303,7 +303,7 @@ hipError_t hipCtxGetFlags(unsigned int* flags) { assert(0 && "Unimplemented"); - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } hipError_t hipDevicePrimaryCtxGetState(hipDevice_t dev, unsigned int* flags, int* active) { @@ -311,7 +311,7 @@ hipError_t hipDevicePrimaryCtxGetState(hipDevice_t dev, unsigned int* flags, int assert(0 && "Unimplemented"); - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } hipError_t hipDevicePrimaryCtxRelease(hipDevice_t dev) { @@ -319,7 +319,7 @@ hipError_t hipDevicePrimaryCtxRelease(hipDevice_t dev) { assert(0 && "Unimplemented"); - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } hipError_t hipDevicePrimaryCtxRetain(hipCtx_t* pctx, hipDevice_t dev) { @@ -327,7 +327,7 @@ hipError_t hipDevicePrimaryCtxRetain(hipCtx_t* pctx, hipDevice_t dev) { assert(0 && "Unimplemented"); - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } hipError_t hipDevicePrimaryCtxReset(hipDevice_t dev) { @@ -335,7 +335,7 @@ hipError_t hipDevicePrimaryCtxReset(hipDevice_t dev) { assert(0 && "Unimplemented"); - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } hipError_t hipDevicePrimaryCtxSetFlags(hipDevice_t dev, unsigned int flags) { @@ -343,5 +343,5 @@ hipError_t hipDevicePrimaryCtxSetFlags(hipDevice_t dev, unsigned int flags) { assert(0 && "Unimplemented"); - return hipErrorUnknown; + HIP_RETURN(hipErrorUnknown); } diff --git a/api/hip/hip_stream.cpp b/api/hip/hip_stream.cpp index 72ede63ecb..59456303fe 100644 --- a/api/hip/hip_stream.cpp +++ b/api/hip/hip_stream.cpp @@ -188,7 +188,11 @@ hipError_t hipStreamDestroy(hipStream_t stream) { hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags) { HIP_INIT_API(stream, event, flags); - if (stream == nullptr || event == nullptr) { + if (stream == nullptr) { + stream = reinterpret_cast(as_cl(hip::getNullStream())); + } + + if (event == nullptr) { HIP_RETURN(hipErrorInvalidResourceHandle); } From 78905ef7e9ba67343c3fd3de388dbd3e56232e16 Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 29 May 2019 14:38:26 -0400 Subject: [PATCH 147/282] P4 to Git Change 1789054 by cpaquot@cpaquot-ocl-lc-lnx on 2019/05/29 14:02:50 SWDEV-190565 - [HIP] Don't use clSetEventWaitList and just add the event to the list in HIP. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_event.cpp#10 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_common.hpp#24 edit --- api/hip/hip_event.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/api/hip/hip_event.cpp b/api/hip/hip_event.cpp index aa61443d2f..b7929461cb 100644 --- a/api/hip/hip_event.cpp +++ b/api/hip/hip_event.cpp @@ -84,13 +84,11 @@ hipError_t Event::streamWait(hipStream_t stream, uint flags) { amd::ScopedLock lock(lock_); - cl_event clEvent = as_cl(event_); - - amd::Command::EventWaitList eventWaitList; - cl_int err = amd::clSetEventWaitList(eventWaitList, *hostQueue, 1, &clEvent); - if (err != CL_SUCCESS) { + if (!event_->notifyCmdQueue()) { return hipErrorUnknown; } + amd::Command::EventWaitList eventWaitList; + eventWaitList.push_back(event_); amd::Command* command = new amd::Marker(*hostQueue, true, eventWaitList); if (command == NULL) { From 0e11975282aedd2cad9e342de0cebc551989f849 Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 4 Jun 2019 10:24:11 -0400 Subject: [PATCH 148/282] P4 to Git Change 1791519 by vsytchen@vsytchen-remote-ocl-win10 on 2019/06/04 10:06:43 SWDEV-189383 - [HIP CQE][HIPonPAL][WIN] hipDeviceMalloc, hip_test_ldg, hipHostRegister, hipModule, hipStreamSync2 tests failed on VEGA10. 1. For pinned memory allocations add the host pointer and all of its respective device pointers to the memory object map. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#57 edit --- api/hip/hip_memory.cpp | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index f34b9e99c0..18097e725a 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -560,6 +560,22 @@ hipError_t hipHostRegister(void* hostPtr, size_t sizeBytes, unsigned int flags) mem->release(); HIP_RETURN(hipErrorMemoryAllocation); } + + std::vector devPtrs; + for (const auto& device: hip::getCurrentContext()->devices()) { + const device::Memory* devMem = mem->getDeviceMemory(*device); + if (devMem != nullptr) { + devPtrs.emplace_back(reinterpret_cast(devMem->virtualAddress())); + } else { + mem->release(); + HIP_RETURN(hipErrorMemoryAllocation); + } + } + // Since the amd::Memory object is shared between all devices + // it's fine to have multiple addresses mapped to it + for (const auto& devPtr: devPtrs) { + amd::MemObjMap::AddMemObj(devPtr, mem); + } amd::MemObjMap::AddMemObj(hostPtr, mem); HIP_RETURN(hipSuccess); } else { @@ -582,6 +598,10 @@ hipError_t hipHostUnregister(void* hostPtr) { if(mem) { hip::syncStreams(); hip::getNullStream()->finish(); + for (const auto& device: hip::getCurrentContext()->devices()) { + const device::Memory* devMem = mem->getDeviceMemory(*device); + amd::MemObjMap::RemoveMemObj(reinterpret_cast(devMem->virtualAddress())); + } amd::MemObjMap::RemoveMemObj(hostPtr); mem->release(); HIP_RETURN(hipSuccess); From 21b1c1a1836f5fe57c4799b66a26137a24bc3c81 Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 6 Jun 2019 11:51:22 -0400 Subject: [PATCH 149/282] P4 to Git Change 1792745 by vsytchen@vsytchen-remote-ocl-win10 on 2019/06/06 11:18:13 SWDEV-145570 - Simplify pinned memory allocation logic ReviewBoardURL = http://ocltc.amd.com/reviews/r/17467/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#58 edit --- api/hip/hip_memory.cpp | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 18097e725a..d932c88705 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -556,26 +556,21 @@ hipError_t hipHostRegister(void* hostPtr, size_t sizeBytes, unsigned int flags) if(hostPtr != nullptr) { amd::Memory* mem = new (*hip::host_context) amd::Buffer(*hip::host_context, CL_MEM_USE_HOST_PTR, sizeBytes); - if (!mem->create(hostPtr)) { + constexpr bool sysMemAlloc = false; + constexpr bool skipAlloc = false; + constexpr bool forceAlloc = true; + if (!mem->create(hostPtr, sysMemAlloc, skipAlloc, forceAlloc)) { mem->release(); HIP_RETURN(hipErrorMemoryAllocation); } - std::vector devPtrs; for (const auto& device: hip::getCurrentContext()->devices()) { + // Since the amd::Memory object is shared between all devices + // it's fine to have multiple addresses mapped to it const device::Memory* devMem = mem->getDeviceMemory(*device); - if (devMem != nullptr) { - devPtrs.emplace_back(reinterpret_cast(devMem->virtualAddress())); - } else { - mem->release(); - HIP_RETURN(hipErrorMemoryAllocation); - } - } - // Since the amd::Memory object is shared between all devices - // it's fine to have multiple addresses mapped to it - for (const auto& devPtr: devPtrs) { - amd::MemObjMap::AddMemObj(devPtr, mem); + amd::MemObjMap::AddMemObj(reinterpret_cast(devMem->virtualAddress()), mem); } + amd::MemObjMap::AddMemObj(hostPtr, mem); HIP_RETURN(hipSuccess); } else { From 17943639e4e2f686c7776d631fa5d672ceb224fb Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 12 Jun 2019 10:00:38 -0400 Subject: [PATCH 150/282] P4 to Git Change 1809277 by gandryey@gera-win10 on 2019/06/11 17:34:13 SWDEV-180872 - Runtime support changes for Cooperative Group Features - Initial implementation of the core functionality. Disabled by default. Use GPU_ENABLE_COOP_GROUPS=1 to enable the feature. - Runtime uses device queue for cooperative executions with a synchronization on the launched queue. - The current implementation is pure runtime change and it can work if only one app uses this feature. No ROCr/KFD support was added or tested - Only inline assembler was tested Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device.cpp#20 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device_runtime.cpp#15 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.def.in#15 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.map.in#17 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#28 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#32 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#338 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#606 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.hpp#171 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palblit.cpp#31 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palblit.hpp#9 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#142 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.hpp#39 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palschedcl.cpp#6 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#135 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.hpp#61 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocblit.cpp#32 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocblit.hpp#12 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.cpp#127 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.hpp#37 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocschedcl.cpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.cpp#75 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.hpp#23 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/command.cpp#94 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/command.hpp#92 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.hpp#311 edit --- api/hip/hip_device.cpp | 2 + api/hip/hip_device_runtime.cpp | 8 +++- api/hip/hip_hcc.def.in | 4 ++ api/hip/hip_hcc.map.in | 4 ++ api/hip/hip_module.cpp | 69 ++++++++++++++++++++++++++---- api/hip/hip_platform.cpp | 77 ++++++++++++++++++++++++++++++++++ 6 files changed, 155 insertions(+), 9 deletions(-) diff --git a/api/hip/hip_device.cpp b/api/hip/hip_device.cpp index a3019cc7d7..622ad150a1 100644 --- a/api/hip/hip_device.cpp +++ b/api/hip/hip_device.cpp @@ -192,6 +192,8 @@ hipError_t hipGetDeviceProperties ( hipDeviceProp_t* props, hipDevice_t device ) //deviceProps.isMultiGpuBoard = info.; deviceProps.canMapHostMemory = 1; deviceProps.gcnArch = info.gfxipVersion_; + deviceProps.cooperativeLaunch = info.cooperativeGroups_; + deviceProps.cooperativeMultiDeviceLaunch = info.cooperativeMultiDeviceGroups_; *props = deviceProps; HIP_RETURN(hipSuccess); diff --git a/api/hip/hip_device_runtime.cpp b/api/hip/hip_device_runtime.cpp index 6bb4261964..fea9fb4dd9 100644 --- a/api/hip/hip_device_runtime.cpp +++ b/api/hip/hip_device_runtime.cpp @@ -235,6 +235,12 @@ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) case hipDeviceAttributeIsMultiGpuBoard: *pi = prop.isMultiGpuBoard; break; + case hipDeviceAttributeCooperativeLaunch: + *pi = prop.cooperativeLaunch; + break; + case hipDeviceAttributeCooperativeMultiDeviceLaunch: + *pi = prop.cooperativeMultiDeviceLaunch; + break; default: HIP_RETURN(hipErrorInvalidValue); } @@ -401,7 +407,7 @@ hipError_t hipGetDevice ( int* deviceId ) { if (deviceId != nullptr) { int dev = ihipGetDevice(); - assert(dev != -1); + assert(dev != -1); *deviceId = dev; HIP_RETURN(hipSuccess); } else { diff --git a/api/hip/hip_hcc.def.in b/api/hip/hip_hcc.def.in index 44dd6edea7..662bfd04c3 100644 --- a/api/hip/hip_hcc.def.in +++ b/api/hip/hip_hcc.def.in @@ -110,10 +110,14 @@ hipModuleGetFunction hipModuleGetGlobal hipModuleLaunchKernel hipModuleLaunchKernelExt +hipLaunchCooperativeKernel +hipLaunchCooperativeMultiDeviceKernel hipHccModuleLaunchKernel hipModuleLoad hipModuleLoadData hipModuleUnload +hipOccupancyMaxActiveBlocksPerMultiprocessor +hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags hipFuncGetAttributes hipPeekAtLastError hipPointerGetAttributes diff --git a/api/hip/hip_hcc.map.in b/api/hip/hip_hcc.map.in index d0b862eaa3..f6ab5a533b 100644 --- a/api/hip/hip_hcc.map.in +++ b/api/hip/hip_hcc.map.in @@ -111,9 +111,13 @@ global: hipModuleGetGlobal; hipModuleLaunchKernel; hipModuleLaunchKernelExt; + hipLaunchCooperativeKernel; + hipLaunchCooperativeMultiDeviceKernel; hipModuleLoad; hipModuleLoadData; hipModuleUnload; + hipOccupancyMaxActiveBlocksPerMultiprocessor; + hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags; hipFuncGetAttributes; hipPeekAtLastError; hipPointerGetAttributes; diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index 2163ef3da4..edc3ba4384 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -170,7 +170,8 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ, uint32_t sharedMemBytes, hipStream_t hStream, void **kernelParams, void **extra, - hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags = 0) + hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags = 0, + uint32_t params = 0) { HIP_INIT_API(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, @@ -193,6 +194,14 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, hip::getNullStream()->finish(); queue = as_amd(reinterpret_cast(hStream))->asHostQueue(); } + if ((params & amd::NDRangeKernelCommand::CooperativeGroups) && + !device->info().cooperativeGroups_) { + return hipErrorLaunchFailure; + } + if ((params & amd::NDRangeKernelCommand::CooperativeMultiDeviceGroups) && + !device->info().cooperativeMultiDeviceGroups_) { + return hipErrorLaunchFailure; + } if (!queue) { return hipErrorOutOfMemory; } @@ -203,25 +212,29 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, amd::NDRangeContainer ndrange(3, globalWorkOffset, globalWorkSize, localWorkSize); amd::Command::EventWaitList waitList; + address kernargs = nullptr; + // 'extra' is a struct that contains the following info: { // HIP_LAUNCH_PARAM_BUFFER_POINTER, kernargs, // HIP_LAUNCH_PARAM_BUFFER_SIZE, &kernargs_size, // HIP_LAUNCH_PARAM_END } - if (extra[0] != HIP_LAUNCH_PARAM_BUFFER_POINTER || - extra[2] != HIP_LAUNCH_PARAM_BUFFER_SIZE || extra[4] != HIP_LAUNCH_PARAM_END) { - return hipErrorNotInitialized; + if (extra != nullptr) { + if (extra[0] != HIP_LAUNCH_PARAM_BUFFER_POINTER || + extra[2] != HIP_LAUNCH_PARAM_BUFFER_SIZE || extra[4] != HIP_LAUNCH_PARAM_END) { + return hipErrorNotInitialized; + } + kernargs = reinterpret_cast
(extra[1]); } - address kernargs = reinterpret_cast
(extra[1]); const amd::KernelSignature& signature = kernel->signature(); for (size_t i = 0; i < signature.numParameters(); ++i) { const amd::KernelParameterDescriptor& desc = signature.at(i); if (kernelParams == nullptr) { - assert(extra); + assert(kernargs != nullptr); kernel->parameters().set(i, desc.size_, kernargs + desc.offset_, desc.type_ == T_POINTER/*svmBound*/); } else { - assert(!extra); + assert(extra == nullptr); kernel->parameters().set(i, desc.size_, kernelParams[i], desc.type_ == T_POINTER/*svmBound*/); } } @@ -232,7 +245,8 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, eStart->addMarker(queue, startCommand); } - amd::NDRangeKernelCommand* command = new amd::NDRangeKernelCommand(*queue, waitList, *kernel, ndrange, sharedMemBytes); + amd::NDRangeKernelCommand* command = new amd::NDRangeKernelCommand( + *queue, waitList, *kernel, ndrange, sharedMemBytes, params); if (!command) { return hipErrorOutOfMemory; } @@ -303,4 +317,43 @@ hipError_t hipModuleLaunchKernelExt(hipFunction_t f, uint32_t gridDimX, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent)); } +hipError_t hipLaunchCooperativeKernel(const void* f, + dim3 gridDim, dim3 blockDim, + void **kernelParams, uint32_t sharedMemBytes, hipStream_t hStream) +{ + int deviceId = ihipGetDevice(); + hipFunction_t func = PlatformState::instance().getFunc(f, deviceId); + if (func == nullptr) { + HIP_RETURN(hipErrorUnknown); + } + HIP_RETURN(ihipModuleLaunchKernel(func, gridDim.x * blockDim.x, gridDim.y * blockDim.y, gridDim.z * blockDim.z, + blockDim.x, blockDim.y, blockDim.z, + sharedMemBytes, hStream, kernelParams, nullptr, nullptr, nullptr, 0, + amd::NDRangeKernelCommand::CooperativeGroups)); +} + +hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, + int numDevices, unsigned int flags) +{ + int deviceId = ihipGetDevice(); + + hipError_t result = hipErrorUnknown; + for (int i = 0; i < numDevices; ++i) { + const hipLaunchParams& launch = launchParamsList[i]; + amd::HostQueue* queue = as_amd(reinterpret_cast(launch.stream))->asHostQueue(); + hipFunction_t func = PlatformState::instance().getFunc(launch.func, deviceId); + if (func == nullptr) { + HIP_RETURN(result); + } + result = ihipModuleLaunchKernel(func, + launch.gridDim.x * launch.blockDim.x, + launch.gridDim.y * launch.blockDim.y, + launch.gridDim.z * launch.blockDim.z, + launch.blockDim.x, launch.blockDim.y, launch.blockDim.z, + launch.sharedMem, launch.stream, + launch.args, nullptr, nullptr, nullptr, flags, + (amd::NDRangeKernelCommand::CooperativeGroups | amd::NDRangeKernelCommand::CooperativeMultiDeviceGroups)); + } + return result; +} diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index f09bc1dea9..07a68faccf 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -448,6 +448,69 @@ hipError_t ihipCreateGlobalVarObj(const char* name, hipModule_t hmod, amd::Memor } +namespace hip_impl { + +hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, + const void* f, + int blockSize, + size_t dynamicSMemSize) +{ + HIP_INIT_API(f, blockSize, dynamicSMemSize); + int deviceId = ihipGetDevice(); + hipFunction_t func = PlatformState::instance().getFunc(f, deviceId); + if (func == nullptr) { + HIP_RETURN(hipErrorUnknown); + } + + hip::Function* function = hip::Function::asFunction(func); + amd::Kernel* kernel = function->function_; + if (!kernel) { + HIP_RETURN(hipErrorOutOfMemory); + } + if (blockSize == 0) { + HIP_RETURN(hipErrorInvalidValue); + } + amd::Device* device = hip::getCurrentContext()->devices()[0]; + const device::Kernel::WorkGroupInfo* wrkGrpInfo = kernel->getDeviceKernel(*device)->workGroupInfo(); + + // Find threads accupancy per CU => simd_per_cu * GPR usage + constexpr size_t MaxWavesPerSimd = 8; // Limited by SPI 32 per CU, hence 8 per SIMD + size_t alu_accupancy = device->info().simdPerCU_ * + std::min(MaxWavesPerSimd, (wrkGrpInfo->availableVGPRs_ / amd::alignUp(wrkGrpInfo->usedVGPRs_, 4))); + + alu_accupancy *= wrkGrpInfo->wavefrontSize_; + // Calculate blocks occupancy per CU + *numBlocks = alu_accupancy / amd::alignUp(blockSize, wrkGrpInfo->wavefrontSize_); + + size_t total_used_lds = wrkGrpInfo->usedLDSSize_ + dynamicSMemSize; + if (total_used_lds != 0) { + // Calculate LDS occupacy per CU. lds_per_cu / (static_lsd + dynamic_lds) + int lds_occupancy = static_cast(device->info().localMemSize_ / total_used_lds); + *numBlocks = std::min(*numBlocks, lds_occupancy); + } + + HIP_RETURN(hipSuccess); +} +} + +hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, + const void* f, + int blockSize, + size_t dynamicSMemSize) +{ + HIP_RETURN(hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, f, blockSize, dynamicSMemSize)); +} + +hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks, + const void* f, + int blockSize, + size_t dynamicSMemSize, + unsigned int flags) +{ + HIP_RETURN(hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, f, blockSize, dynamicSMemSize)); +} + + #if defined(ATI_OS_LINUX) namespace hip_impl { @@ -668,6 +731,20 @@ void hipLaunchKernelGGLImpl( sharedMemBytes, stream, nullptr, kernarg); } +void hipLaunchCooperativeKernelGGLImpl( + uintptr_t function_address, + const dim3& numBlocks, + const dim3& dimBlocks, + uint32_t sharedMemBytes, + hipStream_t stream, + void** kernarg) +{ + HIP_INIT(); + + hipLaunchCooperativeKernel(reinterpret_cast(function_address), + numBlocks, dimBlocks, kernarg, sharedMemBytes, stream); +} + } // conversion routines between float and half precision From c2b13cce1d5c54a7d3d61cb4957ded9ddf2f372a Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 12 Jun 2019 10:48:40 -0400 Subject: [PATCH 151/282] P4 to Git Change 1809454 by kjayapra@0_HIPWS_TRT2_ROCM on 2019/06/11 17:45:36 SWDEV-191936 - Implementation of hipMemcpyPeer & hipMemcpyPeerAsync + fix to check source P2PAgents for P2P. RocM Verified. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_peer.cpp#6 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.cpp#76 edit --- api/hip/hip_peer.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/api/hip/hip_peer.cpp b/api/hip/hip_peer.cpp index e986cfeac0..cdfec74f0e 100644 --- a/api/hip/hip_peer.cpp +++ b/api/hip/hip_peer.cpp @@ -98,18 +98,14 @@ hipError_t hipMemcpyPeer(void* dst, int dstDevice, const void* src, int srcDevic size_t sizeBytes) { HIP_INIT_API(dst, dstDevice, src, srcDevice, sizeBytes); - assert(0 && "Unimplemented"); - - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipMemcpy(dst, src, sizeBytes, hipMemcpyDeviceToDevice)); } hipError_t hipMemcpyPeerAsync(void* dst, int dstDevice, const void* src, int srcDevice, size_t sizeBytes, hipStream_t stream) { HIP_INIT_API(dst, dstDevice, src, srcDevice, sizeBytes, stream); - assert(0 && "Unimplemented"); - - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipMemcpyAsync(dst, src, sizeBytes, hipMemcpyDeviceToDevice, stream)); } hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags) { From b15b82ffb978b7797fb7e47cecc8970b5ed8056a Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 13 Jun 2019 12:47:17 -0400 Subject: [PATCH 152/282] P4 to Git Change 1815146 by cpaquot@cpaquot-ocl-lc-lnx on 2019/06/12 13:43:33 SWDEV-192333 - [HIP] Implemented hipExtMallocWithFlags and hipExtGetLinkTypeAndHopCount Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device_runtime.cpp#16 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.def.in#16 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.map.in#18 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#59 edit --- api/hip/hip_device_runtime.cpp | 19 +++++++++++++++++++ api/hip/hip_hcc.def.in | 2 ++ api/hip/hip_hcc.map.in | 2 ++ api/hip/hip_memory.cpp | 11 +++++++++++ 4 files changed, 34 insertions(+) diff --git a/api/hip/hip_device_runtime.cpp b/api/hip/hip_device_runtime.cpp index fea9fb4dd9..eabf622f28 100644 --- a/api/hip/hip_device_runtime.cpp +++ b/api/hip/hip_device_runtime.cpp @@ -476,3 +476,22 @@ hipError_t hipSetValidDevices ( int* device_arr, int len ) { HIP_RETURN(hipErrorUnknown); } +hipError_t hipExtGetLinkTypeAndHopCount(int device1, int device2, uint32_t* linktype, uint32_t* hopcount) { + HIP_INIT_API(device1, device2, linktype, hopcount); + + const int numDevices = static_cast(g_devices.size()); + + if ((device1 < 0) || (device1 >= numDevices) || (device2 < 0) || (device2 >= numDevices)) { + HIP_RETURN(hipErrorInvalidDevice); + } + + if (linktype != nullptr) { + *linktype = 0; + } + if (hopcount != nullptr) { + *hopcount = 1; + } + + HIP_RETURN(hipSuccess); +} + diff --git a/api/hip/hip_hcc.def.in b/api/hip/hip_hcc.def.in index 662bfd04c3..f33515ec18 100644 --- a/api/hip/hip_hcc.def.in +++ b/api/hip/hip_hcc.def.in @@ -47,6 +47,8 @@ hipEventElapsedTime hipEventQuery hipEventRecord hipEventSynchronize +hipExtGetLinkTypeAndHopCount +hipExtMallocWithFlags hipExtModuleLaunchKernel hipFree hipFreeArray diff --git a/api/hip/hip_hcc.map.in b/api/hip/hip_hcc.map.in index f6ab5a533b..fc8a302b2a 100644 --- a/api/hip/hip_hcc.map.in +++ b/api/hip/hip_hcc.map.in @@ -48,6 +48,8 @@ global: hipEventQuery; hipEventRecord; hipEventSynchronize; + hipExtGetLinkTypeAndHopCount; + hipExtMallocWithFlags; hipExtModuleLaunchKernel; hipFree; hipFreeArray; diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index d932c88705..0c0cdcd6ba 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -202,6 +202,17 @@ hipError_t ihipMemset(void* dst, int value, size_t valueSize, size_t sizeBytes, return hipSuccess; } +hipError_t hipExtMallocWithFlags(void** ptr, size_t sizeBytes, unsigned int flags) { + HIP_INIT_API(ptr, sizeBytes, flags); + + if (flags != hipDeviceMallocDefault && + flags != hipDeviceMallocFinegrained) { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(ihipMalloc(ptr, sizeBytes, (flags & hipDeviceMallocFinegrained)? CL_MEM_SVM_FINE_GRAIN_BUFFER : 0)); +} + hipError_t hipMalloc(void** ptr, size_t sizeBytes) { HIP_INIT_API(ptr, sizeBytes); From 4e8767e7f5064bd6c565bca9599ef01da9a2173f Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 13 Jun 2019 13:00:48 -0400 Subject: [PATCH 153/282] P4 to Git Change 1815283 by gandryey@gera-win10 on 2019/06/12 15:26:45 SWDEV-180872 - Runtime support changes for Cooperative Group Features - Correct the name in the def files Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.def.in#17 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.map.in#19 edit --- api/hip/hip_hcc.def.in | 2 +- api/hip/hip_hcc.map.in | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/api/hip/hip_hcc.def.in b/api/hip/hip_hcc.def.in index f33515ec18..3c1a05716f 100644 --- a/api/hip/hip_hcc.def.in +++ b/api/hip/hip_hcc.def.in @@ -113,7 +113,7 @@ hipModuleGetGlobal hipModuleLaunchKernel hipModuleLaunchKernelExt hipLaunchCooperativeKernel -hipLaunchCooperativeMultiDeviceKernel +hipLaunchCooperativeKernelMultiDevice hipHccModuleLaunchKernel hipModuleLoad hipModuleLoadData diff --git a/api/hip/hip_hcc.map.in b/api/hip/hip_hcc.map.in index fc8a302b2a..624761944d 100644 --- a/api/hip/hip_hcc.map.in +++ b/api/hip/hip_hcc.map.in @@ -114,7 +114,7 @@ global: hipModuleLaunchKernel; hipModuleLaunchKernelExt; hipLaunchCooperativeKernel; - hipLaunchCooperativeMultiDeviceKernel; + hipLaunchCooperativeKernelMultiDevice; hipModuleLoad; hipModuleLoadData; hipModuleUnload; From 975d4b53f776e6521079a657d2090b8f6a4aca57 Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 13 Jun 2019 15:40:39 -0400 Subject: [PATCH 154/282] P4 to Git Change 1815357 by cpaquot@cpaquot-ocl-lc-lnx on 2019/06/12 16:24:09 SWDEV-192384 - [HIP] Fixed case where start and stop events are the same for ElapsedTime. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_event.cpp#11 edit --- api/hip/hip_event.cpp | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/api/hip/hip_event.cpp b/api/hip/hip_event.cpp index b7929461cb..1507629321 100644 --- a/api/hip/hip_event.cpp +++ b/api/hip/hip_event.cpp @@ -56,6 +56,23 @@ hipError_t Event::synchronize() { hipError_t Event::elapsedTime(Event& eStop, float& ms) { amd::ScopedLock startLock(lock_); + + if (this == &eStop) { + if (event_ == nullptr) { + return hipErrorInvalidResourceHandle; + } + + if (flags & hipEventDisableTiming) { + return hipErrorInvalidResourceHandle; + } + + if (!ready()) { + return hipErrorNotReady; + } + + ms = 0.f; + return hipSuccess; + } amd::ScopedLock stopLock(eStop.lock_); if (event_ == nullptr || @@ -189,7 +206,7 @@ hipError_t hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop) { hip::Event* eStart = reinterpret_cast(start); hip::Event* eStop = reinterpret_cast(stop); - return HIP_RETURN(eStart->elapsedTime(*eStop, *ms)); + HIP_RETURN(eStart->elapsedTime(*eStop, *ms)); } hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) { From fd634d58f13bae0c44a981ca954e3aa6d3bca814 Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 20 Jun 2019 18:13:20 -0400 Subject: [PATCH 155/282] P4 to Git Change 1879857 by gandryey@gera-win10 on 2019/06/20 18:06:07 SWDEV-184710 - Support hipLaunchCooperativeKernelMultiDevice() - Clean-up the loop for the launch on each device - Add hipExtLaunchMultiKernelMultiDevice() http://ocltc.amd.com/reviews/r/17573/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#29 edit --- api/hip/hip_module.cpp | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index edc3ba4384..eb6aec8e08 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -333,16 +333,23 @@ hipError_t hipLaunchCooperativeKernel(const void* f, amd::NDRangeKernelCommand::CooperativeGroups)); } -hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, - int numDevices, unsigned int flags) +hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, + int numDevices, unsigned int flags, uint32_t extFlags) { - int deviceId = ihipGetDevice(); + int currentDevice = ihipGetDevice(); + int numActiveGPUs = 0; + ihipDeviceGetCount(&numActiveGPUs); + if ((numDevices > numActiveGPUs) || (launchParamsList == nullptr)) { + return hipErrorInvalidValue; + } + hipError_t result = hipErrorUnknown; for (int i = 0; i < numDevices; ++i) { + hipSetDevice(i); const hipLaunchParams& launch = launchParamsList[i]; amd::HostQueue* queue = as_amd(reinterpret_cast(launch.stream))->asHostQueue(); - hipFunction_t func = PlatformState::instance().getFunc(launch.func, deviceId); + hipFunction_t func = PlatformState::instance().getFunc(launch.func, i); if (func == nullptr) { HIP_RETURN(result); } @@ -352,8 +359,22 @@ hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsLi launch.gridDim.z * launch.blockDim.z, launch.blockDim.x, launch.blockDim.y, launch.blockDim.z, launch.sharedMem, launch.stream, - launch.args, nullptr, nullptr, nullptr, flags, - (amd::NDRangeKernelCommand::CooperativeGroups | amd::NDRangeKernelCommand::CooperativeMultiDeviceGroups)); + launch.args, nullptr, nullptr, nullptr, flags, extFlags); } + + hipSetDevice(currentDevice); return result; } + +hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, + int numDevices, unsigned int flags) +{ + return ihipLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags, + (amd::NDRangeKernelCommand::CooperativeGroups | + amd::NDRangeKernelCommand::CooperativeMultiDeviceGroups)); +} + +hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, + int numDevices, unsigned int flags) { + return ihipLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags, 0); +} From 97a079826eb7be971e6b9a6ada47079e8d608333 Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 28 Jun 2019 08:06:23 -0400 Subject: [PATCH 156/282] P4 to Git Change 1917620 by yaxunl@yaxunl-lc10 on 2019/06/28 07:57:46 SWDEV-145570 - Fix device name mismatch. Not only gfx906 can have device name with +xnack etc. Other devices e.g. gfx900 could have that too. Make the previous fix more generic. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#33 edit --- api/hip/hip_platform.cpp | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index 07a68faccf..c358f8c4b3 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -66,12 +66,17 @@ hipError_t ihipCreateGlobalVarObj(const char* name, hipModule_t hmod, amd::Memor static bool isCompatibleCodeObject(const std::string& codeobj_target_id, const char* device_name) { - // Workaround for gfx906 device name mismatch. - // If bundle target id starts with gfx906 and device name starts with - // gfx906, treat them as match. - return codeobj_target_id.compare(device_name) == 0 || - (codeobj_target_id.find("gfx906") == 0 && - std::string(device_name).find("gfx906") == 0); + // Workaround for device name mismatch. + // Device name may contain feature strings delimited by '+', e.g. + // gfx900+xnack. Currently HIP-Clang does not include feature strings + // in code object target id in fat binary. Therefore drop the feature + // strings from device name before comparing it with code object target id. + std::string short_name(device_name); + auto feature_loc = short_name.find('+'); + if (feature_loc != std::string::npos) { + short_name.erase(feature_loc); + } + return codeobj_target_id == short_name; } // Extracts code objects from fat binary in data for device names given in devices. From e9f58233e10bb473158bd8b58e6d87de618771c6 Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 28 Jun 2019 12:09:09 -0400 Subject: [PATCH 157/282] P4 to Git Change 1917740 by cpaquot@cpaquot-ocl-lc-lnx on 2019/06/28 12:03:07 SWDEV-193938 - [HIP] RCCL test fails Set default stream to null-stream In hipStreamWaitEvent if event_ is null, get the last queued command instead. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_event.cpp#12 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_event.hpp#4 edit --- api/hip/hip_event.cpp | 11 +++++++++++ api/hip/hip_event.hpp | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/api/hip/hip_event.cpp b/api/hip/hip_event.cpp index 1507629321..4d2eea3985 100644 --- a/api/hip/hip_event.cpp +++ b/api/hip/hip_event.cpp @@ -100,6 +100,12 @@ hipError_t Event::streamWait(hipStream_t stream, uint flags) { if (stream_ == hostQueue) return hipSuccess; amd::ScopedLock lock(lock_); + bool retain = false; + + if (event_ == nullptr) { + event_ = stream_->getLastQueuedCommand(true); + retain = true; + } if (!event_->notifyCmdQueue()) { return hipErrorUnknown; @@ -114,6 +120,11 @@ hipError_t Event::streamWait(hipStream_t stream, uint flags) { command->enqueue(); command->release(); + if (retain) { + event_->release(); + event_ = nullptr; + } + return hipSuccess; } diff --git a/api/hip/hip_event.hpp b/api/hip/hip_event.hpp index 953665ed5b..4c6fb132e8 100644 --- a/api/hip/hip_event.hpp +++ b/api/hip/hip_event.hpp @@ -39,7 +39,7 @@ public: class Event { public: - Event(unsigned int flags) : flags(flags), lock_("hipEvent_t"), stream_(nullptr), event_(nullptr) {} + Event(unsigned int flags) : flags(flags), lock_("hipEvent_t"), stream_(getNullStream()), event_(nullptr) {} ~Event() { if (event_ != nullptr) { event_->release(); From 4a26d801fb0565d0374b979691cc5f7653435ace Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 3 Jul 2019 14:51:15 -0400 Subject: [PATCH 158/282] P4 to Git Change 1940447 by cpaquot@cpaquot-ocl-lc-lnx on 2019/07/03 14:44:49 SWDEV-194872 - [HIP] CUDA and HCC sync after a DeviceToHost async copy. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#60 edit --- api/hip/hip_memory.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 0c0cdcd6ba..510368d57b 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -102,6 +102,7 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin } else if ((srcMemory != nullptr) && (dstMemory == nullptr)) { command = new amd::ReadMemoryCommand(queue, CL_COMMAND_READ_BUFFER, waitList, *srcMemory->asBuffer(), sOffset, sizeBytes, dst); + isAsync = false; } else if ((srcMemory != nullptr) && (dstMemory != nullptr)) { static const uint hostMem = CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_USE_HOST_PTR; if ((kind == hipMemcpyDeviceToDevice || From ff4b99311a0d4c2e8e8f340244e4ce5051a67d6d Mon Sep 17 00:00:00 2001 From: foreman Date: Mon, 8 Jul 2019 18:14:01 -0400 Subject: [PATCH 159/282] P4 to Git Change 1960615 by gandryey@gera-win10 on 2019/07/08 18:05:10 SWDEV-79445 - HIP generic changes and code clean-up - Correct elapsed time calculation. Use event start and end. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_event.cpp#13 edit --- api/hip/hip_event.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/api/hip/hip_event.cpp b/api/hip/hip_event.cpp index 4d2eea3985..746d93a8b2 100644 --- a/api/hip/hip_event.cpp +++ b/api/hip/hip_event.cpp @@ -88,8 +88,12 @@ hipError_t Event::elapsedTime(Event& eStop, float& ms) { return hipErrorNotReady; } - ms = static_cast(static_cast(eStop.event_->profilingInfo().submitted_ - - event_->profilingInfo().submitted_))/1000000.f; + if (event_ != eStop.event_) { + ms = static_cast(static_cast(eStop.event_->profilingInfo().end_ - + event_->profilingInfo().start_))/1000000.f; + } else { + ms = 0.f; + } return hipSuccess; } From 763c2944417c832da02f73b2ee34301e9d7073d7 Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 11 Jul 2019 19:13:06 -0400 Subject: [PATCH 160/282] P4 to Git Change 1966049 by cpaquot@cpaquot-ocl-lc-lnx on 2019/07/11 19:05:18 SWDEV-189500 - [HIP] Have to force async=false for host to device case as well Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#61 edit --- api/hip/hip_memory.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 510368d57b..f638381b39 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -99,6 +99,7 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin } else if ((srcMemory == nullptr) && (dstMemory != nullptr)) { command = new amd::WriteMemoryCommand(queue, CL_COMMAND_WRITE_BUFFER, waitList, *dstMemory->asBuffer(), dOffset, sizeBytes, src); + isAsync = false; } else if ((srcMemory != nullptr) && (dstMemory == nullptr)) { command = new amd::ReadMemoryCommand(queue, CL_COMMAND_READ_BUFFER, waitList, *srcMemory->asBuffer(), sOffset, sizeBytes, dst); From fd89a62ac3f505d7418415ee254f8624db1b2f72 Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 16 Jul 2019 15:54:22 -0400 Subject: [PATCH 161/282] P4 to Git Change 1968576 by cpaquot@cpaquot-ocl-lc-lnx on 2019/07/16 15:48:18 SWDEV-145570 - [HIP] update code after header change Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#62 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_texture.cpp#12 edit --- api/hip/hip_memory.cpp | 13 ++++++------- api/hip/hip_texture.cpp | 2 +- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index f638381b39..a591c9ecd3 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -437,22 +437,21 @@ hipError_t hipArrayCreate(hipArray** array, const HIP_ARRAY_DESCRIPTOR* pAllocat } *array = (hipArray*)malloc(sizeof(hipArray)); - array[0]->drvDesc = *pAllocateArray; - array[0]->width = pAllocateArray->width; - array[0]->height = pAllocateArray->height; + array[0]->width = pAllocateArray->Width; + array[0]->height = pAllocateArray->Height; array[0]->isDrv = true; array[0]->textureType = hipTextureType2D; void** ptr = &array[0]->data; cl_channel_order channelOrder; cl_channel_type channelType; - getDrvChannelOrderAndType(pAllocateArray->format, pAllocateArray->numChannels, + getDrvChannelOrderAndType(pAllocateArray->Format, pAllocateArray->NumChannels, &channelOrder, &channelType); const cl_image_format image_format = { channelOrder, channelType }; - size_t size = pAllocateArray->width; - if (pAllocateArray->height > 0) { - size = size * pAllocateArray->height; + size_t size = pAllocateArray->Width; + if (pAllocateArray->Height > 0) { + size = size * pAllocateArray->Height; } size_t pitch = 0; diff --git a/api/hip/hip_texture.cpp b/api/hip/hip_texture.cpp index f1ed3580b3..e8fd421470 100644 --- a/api/hip/hip_texture.cpp +++ b/api/hip/hip_texture.cpp @@ -577,5 +577,5 @@ hipError_t hipTexRefSetAddress2D(textureReference* tex, const HIP_ARRAY_DESCRIPT } size_t offset; - HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE2D, &offset, tex, devPtr, nullptr, desc->width, desc->height, pitch)); + HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE2D, &offset, tex, devPtr, nullptr, desc->Width, desc->Height, pitch)); } From 6987b1896ba80ac9af93277267bcd8042fb4b188 Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 23 Jul 2019 15:18:51 -0400 Subject: [PATCH 162/282] P4 to Git Change 1972329 by cpaquot@cpaquot-ocl-lc-lnx on 2019/07/23 15:16:26 SWDEV-197168 - [HIP] handle width or height or src or dst being 0 Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#63 edit --- api/hip/hip_memory.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index a591c9ecd3..c3755cf028 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -851,6 +851,13 @@ hipError_t ihipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch amd::Memory* srcMemory = getMemoryObject(src, sOrigin[0]); amd::Memory* dstMemory = getMemoryObject(dst, dOrigin[0]); + if (src_slice_pitch == 0 || + dst_slice_pitch == 0 || + dst == nullptr || + src == nullptr) { + return hipSuccess; + } + if (!srcRect.create(sOrigin, region, spitch, src_slice_pitch) || !dstRect.create(dOrigin, region, dpitch, dst_slice_pitch)) { return hipErrorInvalidValue; From c837c6e7692bc4a6a63561295bf679327bae7ec8 Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 23 Jul 2019 20:09:59 -0400 Subject: [PATCH 163/282] P4 to Git Change 1972494 by kjayapra@0_HIPWS_P2P1_ROCM on 2019/07/23 20:01:13 SWDEV-144570 - Handling variable register during hipModuleLoad. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#31 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#30 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#34 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.cpp#51 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.hpp#30 edit --- api/hip/hip_internal.hpp | 4 +++- api/hip/hip_module.cpp | 39 +++++++++++++++++++++++++++++++++++++++ api/hip/hip_platform.cpp | 18 +++++++++++++++--- 3 files changed, 57 insertions(+), 4 deletions(-) diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index 2787e2b187..eae35ecae9 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -125,7 +125,7 @@ public: }; private: std::unordered_map functions_; - std::unordered_map vars_; + std::unordered_map vars_; static PlatformState* platform_; @@ -136,6 +136,8 @@ public: return *platform_; } + void unregisterVar(hipModule_t hmod); + void registerVar(const void* hostvar, const DeviceVar& var); void registerFunction(const void* hostFunction, const DeviceFunction& func); diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index eb6aec8e08..f9c5a85137 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -70,6 +70,10 @@ hipError_t hipModuleLoad(hipModule_t *module, const char *fname) HIP_RETURN(ihipModuleLoadData(module, tmp.data())); } +bool ihipModuleUnregisterGlobal(hipModule_t hmod) { + PlatformState::instance().unregisterVar(hmod); + return true; +} hipError_t hipModuleUnload(hipModule_t hmod) { @@ -81,6 +85,10 @@ hipError_t hipModuleUnload(hipModule_t hmod) amd::Program* program = as_amd(reinterpret_cast(hmod)); + if(!ihipModuleUnregisterGlobal(hmod)) { + HIP_RETURN(hipErrorUnknown); + } + program->release(); HIP_RETURN(hipSuccess); @@ -97,6 +105,33 @@ extern bool __hipExtractCodeObjectFromFatBinary(const void* data, const std::vector& devices, std::vector>& code_objs); +bool ihipModuleRegisterGlobal(amd::Program* program, hipModule_t* module) { + + size_t var_size = 0; + hipDeviceptr_t device_ptr = nullptr; + std::vector var_names; + + device::Program* dev_program + = program->getDeviceProgram(*hip::getCurrentContext()->devices()[0]); + + if (!dev_program->getGlobalSymbolsFromCodeObj(&var_names)) { + return false; + } + + for (auto it = var_names.begin(); it != var_names.end(); ++it) { + auto modules = new std::vector >{g_devices.size()}; + for (size_t dev = 0; dev < g_devices.size(); ++dev) { + modules->at(dev) = std::make_pair(*module, false); + } + + PlatformState::DeviceVar dvar{it->c_str(), modules, + std::vector{ g_devices.size()}}; + PlatformState::instance().registerVar(it->c_str(), dvar); + } + + return true; +} + hipError_t ihipModuleLoadData(hipModule_t *module, const void *image) { std::vector> code_objs; @@ -115,6 +150,10 @@ hipError_t ihipModuleLoadData(hipModule_t *module, const void *image) *module = reinterpret_cast(as_cl(program)); + if (!ihipModuleRegisterGlobal(program, module)) { + return hipErrorUnknown; + } + return hipSuccess; } diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index c358f8c4b3..d284570322 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -165,10 +165,23 @@ extern "C" std::vector< std::pair >* __hipRegisterFatBinary(c return programs; } +void PlatformState::unregisterVar(hipModule_t hmod) { + auto it = vars_.begin(); + while (it != vars_.end()) { + DeviceVar& dvar = it->second; + if ((*dvar.modules)[0].first == hmod) { + delete dvar.modules; + vars_.erase(it++); + } else { + ++it; + } + } +} + void PlatformState::registerVar(const void* hostvar, const DeviceVar& rvar) { amd::ScopedLock lock(lock_); - vars_.insert(std::make_pair(hostvar, rvar)); + vars_.insert(std::make_pair(std::string(reinterpret_cast(hostvar)), rvar)); } void PlatformState::registerFunction(const void* hostFunction, @@ -253,11 +266,10 @@ bool PlatformState::getFuncAttr(const void* hostFunction, return true; } - bool PlatformState::getGlobalVar(const void* hostVar, int deviceId, hipDeviceptr_t* dev_ptr, size_t* size_ptr) { amd::ScopedLock lock(lock_); - const auto it = vars_.find(hostVar); + const auto it = vars_.find(std::string(reinterpret_cast(hostVar))); if (it != vars_.cend()) { DeviceVar& dvar = it->second; if (dvar.rvars[deviceId].getdeviceptr() == nullptr) { From 49bdfeb9bea5e101d4c5e04c7f3d934c09631dae Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 24 Jul 2019 05:06:40 -0400 Subject: [PATCH 164/282] P4 to Git Change 1972736 by pghafari@pghafari-gera-win10 on 2019/07/24 04:51:12 SWDEV-197122 - Initial CMake file for hip-vdi-rocr path Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/CMakeFiles.txt#1 add --- api/hip/CMakeFiles.txt | 98 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 api/hip/CMakeFiles.txt diff --git a/api/hip/CMakeFiles.txt b/api/hip/CMakeFiles.txt new file mode 100644 index 0000000000..c3ef0c2559 --- /dev/null +++ b/api/hip/CMakeFiles.txt @@ -0,0 +1,98 @@ +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") + +set(CMAKE_SHARED_LINKER_FLAGS "-Wl,--no-keep-memory -Wl,-Bsymbolic -Wl,--unresolved-symbols=report-all -Wl,--version-script=${CMAKE_SOURCE_DIR}/api/hip/hip_hcc.map.in") + +if(CMAKE_CXX_FLAGS MATCHES "fsanitize=address") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -shared-libasan") +endif() + +set (CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) + +add_definitions(-D__HIP_VDI__ -D__HIP_PLATFORM_HCC__ -DLINUX -D__x86_64__ -D__AMD64__ -DUNIX_OS -DqLittleEndian -DOPENCL_MAJOR=2 -DOPENCL_MINOR=0 -DWITH_AQL -DWITH_ONLINE_COMPILER -DATI_OS_LINUX -DATI_ARCH_X86 -DLITTLEENDIAN_CPU -DATI_BITS_64 -DATI_COMP_GCC -DWITH_HSA_DEVICE -DWITH_TARGET_AMDGCN -DOPENCL_EXPORTS -DCL_USE_DEPRECATED_OPENCL_1_0_APIS -DCL_USE_DEPRECATED_OPENCL_1_1_APIS -DCL_USE_DEPRECATED_OPENCL_1_2_APIS -DCL_USE_DEPRECATED_OPENCL_2_0_APIS -DVEGA10_ONLY=false -DWITH_LIGHTNING_COMPILER) + +if(CMAKE_BUILD_TYPE MATCHES "^Debug$") + add_definitions(-DDEBUG) +endif() + +include_directories(${CMAKE_SOURCE_DIR}/api/hip/include) +include_directories(${CMAKE_SOURCE_DIR}/api/hip/elfio) +include_directories(${CMAKE_SOURCE_DIR}/runtime) +include_directories(${CMAKE_SOURCE_DIR}/api/opencl) +include_directories(${CMAKE_SOURCE_DIR}/api/opencl/amdocl) +include_directories(${CMAKE_SOURCE_DIR}/api/opencl/khronos) +include_directories(${CMAKE_SOURCE_DIR}/api/opencl/khronos/headers) +include_directories(${CMAKE_SOURCE_DIR}/api/opencl/khronos/headers/opencl2.2) +include_directories(${CMAKE_SOURCE_DIR}/compiler/lib) +include_directories(${CMAKE_SOURCE_DIR}/compiler/lib/backends/common) +include_directories(${CMAKE_SOURCE_DIR}/compiler/lib/include) +include_directories(${CMAKE_SOURCE_DIR}/compiler/lib/loaders) +include_directories(${CMAKE_SOURCE_DIR}/compiler/lib/loaders/elf/utils/libelf) +include_directories(${CMAKE_SOURCE_DIR}/compiler/lib/loaders/elf/utils/common) +include_directories(${CMAKE_SOURCE_DIR}/compiler/driver/src) +include_directories(${CMAKE_SOURCE_DIR}/compiler/tools) + + +include_directories(${CMAKE_BINARY_DIR}/runtime/device/rocm) +add_definitions(-DBSD_LIBELF) +include_directories(${CMAKE_SOURCE_DIR}/compiler/lib/loaders/elf/utils/common) +include_directories(${CMAKE_SOURCE_DIR}/compiler/lib/loaders/elf/utils/libelf) +#find_package( Threads REQUIRED CONFIG) +#find_package( LibElf REQUIRED CONFIG) + +add_library(hipamd64 SHARED + hip_context.cpp + hip_device.cpp + hip_device_runtime.cpp + hip_error.cpp + hip_event.cpp + hip_memory.cpp + hip_module.cpp + hip_peer.cpp + hip_platform.cpp + hip_profile.cpp + hip_stream.cpp + hip_surface.cpp + hip_texture.cpp + # $ +$ + ${CMAKE_CURRENT_BINARY_DIR}/../opencl/amdocl/CMakeFiles/amdocl64.dir/cl_icd.cpp.o + ${CMAKE_CURRENT_BINARY_DIR}/../opencl/amdocl/CMakeFiles/amdocl64.dir/cl_gl.cpp.o + ${CMAKE_CURRENT_BINARY_DIR}/../opencl/amdocl/CMakeFiles/amdocl64.dir/cl_program.cpp.o + ${CMAKE_CURRENT_BINARY_DIR}/../opencl/amdocl/CMakeFiles/amdocl64.dir/cl_lqdflash_amd.cpp.o + ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/thread/thread.cpp.o + ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/thread/monitor.cpp.o + ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/thread/semaphore.cpp.o + ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/utils/flags.cpp.o + ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/utils/debug.cpp.o + ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/device/appprofile.cpp.o + ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/device/device.cpp.o + ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/device/hwdebug.cpp.o + ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/device/blitcl.cpp.o + ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/device/blit.cpp.o + ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/device/devkernel.cpp.o + ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/device/devwavelimiter.cpp.o + ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/device/devprogram.cpp.o + ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/platform/kernel.cpp.o + ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/platform/context.cpp.o + ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/platform/command.cpp.o + ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/platform/ndrange.cpp.o + ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/platform/runtime.cpp.o + ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/platform/object.cpp.o + ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/platform/memory.cpp.o + ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/platform/program.cpp.o + ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/platform/commandqueue.cpp.o + ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/platform/agent.cpp.o + ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/os/os_win32.cpp.o + ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/os/alloc.cpp.o + ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/os/os.cpp.o + ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/os/os_posix.cpp.o + ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/__/compiler/lib/utils/options.cpp.o + ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/__/compiler/lib/loaders/elf/elf.cpp.o + ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/__/compiler/lib/loaders/elf/elf_utils.cpp.o + ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/device/comgrctx.cpp.o + + ) + +target_link_libraries(hipamd64 amdocl64 oclelf pthread) + +install(TARGETS hipamd64 LIBRARY DESTINATION lib/x86_64 COMPONENT applications) \ No newline at end of file From 01a2cf16e094d8e1f966ffd9a6063ec603e56300 Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 24 Jul 2019 12:05:47 -0400 Subject: [PATCH 165/282] P4 to Git Change 1972922 by cpaquot@cpaquot-ocl-lc-lnx on 2019/07/24 11:59:16 SWDEV-196881 - [HIP] Fix hipStreamQuery implementation. Using isEmpty isn't correct. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#32 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_stream.cpp#20 edit --- api/hip/hip_internal.hpp | 1 + api/hip/hip_stream.cpp | 13 ++++++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index eae35ecae9..5d32f1049f 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -159,6 +159,7 @@ extern amd::Memory* getMemoryObject(const void* ptr, size_t& offset); #define HIP_RETURN(ret) \ hip::g_lastError = ret; \ + LogPrintfInfo("[%zx] %s: Returned %s", std::this_thread::get_id(), __func__, hipGetErrorName(ret)); \ return hip::g_lastError; \ inline std::ostream& operator<<(std::ostream& os, const dim3& s) { diff --git a/api/hip/hip_stream.cpp b/api/hip/hip_stream.cpp index 59456303fe..cbba339efb 100644 --- a/api/hip/hip_stream.cpp +++ b/api/hip/hip_stream.cpp @@ -210,7 +210,18 @@ hipError_t hipStreamQuery(hipStream_t stream) { } else { hostQueue = as_amd(reinterpret_cast(stream))->asHostQueue(); } - HIP_RETURN(hostQueue->isEmpty() ? hipSuccess : hipErrorNotReady); + + amd::Command* command = hostQueue->getLastQueuedCommand(false); + if (command == nullptr) { + HIP_RETURN(hipSuccess); + } + + amd::Event& event = command->event(); + + if (command->type() != CL_COMMAND_MARKER) { + event.notifyCmdQueue(); + } + HIP_RETURN((command->status() == CL_COMPLETE) ? hipSuccess : hipErrorNotReady); } hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, void* userData, From e4072ca0ec87663b90fd2247f0315796dd69fe2b Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 24 Jul 2019 12:29:10 -0400 Subject: [PATCH 166/282] P4 to Git Change 1972936 by pghafari@pghafari-gera-win10 on 2019/07/24 12:22:38 SWDEV-197122 - Renamed filename typo Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/CMakeFiles.txt#2 move/delete ... //depot/stg/opencl/drivers/opencl/api/hip/CMakeLists.txt#1 move/add --- api/hip/CMakeFiles.txt | 98 ------------------------------------------ 1 file changed, 98 deletions(-) delete mode 100644 api/hip/CMakeFiles.txt diff --git a/api/hip/CMakeFiles.txt b/api/hip/CMakeFiles.txt deleted file mode 100644 index c3ef0c2559..0000000000 --- a/api/hip/CMakeFiles.txt +++ /dev/null @@ -1,98 +0,0 @@ -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") - -set(CMAKE_SHARED_LINKER_FLAGS "-Wl,--no-keep-memory -Wl,-Bsymbolic -Wl,--unresolved-symbols=report-all -Wl,--version-script=${CMAKE_SOURCE_DIR}/api/hip/hip_hcc.map.in") - -if(CMAKE_CXX_FLAGS MATCHES "fsanitize=address") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -shared-libasan") -endif() - -set (CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) - -add_definitions(-D__HIP_VDI__ -D__HIP_PLATFORM_HCC__ -DLINUX -D__x86_64__ -D__AMD64__ -DUNIX_OS -DqLittleEndian -DOPENCL_MAJOR=2 -DOPENCL_MINOR=0 -DWITH_AQL -DWITH_ONLINE_COMPILER -DATI_OS_LINUX -DATI_ARCH_X86 -DLITTLEENDIAN_CPU -DATI_BITS_64 -DATI_COMP_GCC -DWITH_HSA_DEVICE -DWITH_TARGET_AMDGCN -DOPENCL_EXPORTS -DCL_USE_DEPRECATED_OPENCL_1_0_APIS -DCL_USE_DEPRECATED_OPENCL_1_1_APIS -DCL_USE_DEPRECATED_OPENCL_1_2_APIS -DCL_USE_DEPRECATED_OPENCL_2_0_APIS -DVEGA10_ONLY=false -DWITH_LIGHTNING_COMPILER) - -if(CMAKE_BUILD_TYPE MATCHES "^Debug$") - add_definitions(-DDEBUG) -endif() - -include_directories(${CMAKE_SOURCE_DIR}/api/hip/include) -include_directories(${CMAKE_SOURCE_DIR}/api/hip/elfio) -include_directories(${CMAKE_SOURCE_DIR}/runtime) -include_directories(${CMAKE_SOURCE_DIR}/api/opencl) -include_directories(${CMAKE_SOURCE_DIR}/api/opencl/amdocl) -include_directories(${CMAKE_SOURCE_DIR}/api/opencl/khronos) -include_directories(${CMAKE_SOURCE_DIR}/api/opencl/khronos/headers) -include_directories(${CMAKE_SOURCE_DIR}/api/opencl/khronos/headers/opencl2.2) -include_directories(${CMAKE_SOURCE_DIR}/compiler/lib) -include_directories(${CMAKE_SOURCE_DIR}/compiler/lib/backends/common) -include_directories(${CMAKE_SOURCE_DIR}/compiler/lib/include) -include_directories(${CMAKE_SOURCE_DIR}/compiler/lib/loaders) -include_directories(${CMAKE_SOURCE_DIR}/compiler/lib/loaders/elf/utils/libelf) -include_directories(${CMAKE_SOURCE_DIR}/compiler/lib/loaders/elf/utils/common) -include_directories(${CMAKE_SOURCE_DIR}/compiler/driver/src) -include_directories(${CMAKE_SOURCE_DIR}/compiler/tools) - - -include_directories(${CMAKE_BINARY_DIR}/runtime/device/rocm) -add_definitions(-DBSD_LIBELF) -include_directories(${CMAKE_SOURCE_DIR}/compiler/lib/loaders/elf/utils/common) -include_directories(${CMAKE_SOURCE_DIR}/compiler/lib/loaders/elf/utils/libelf) -#find_package( Threads REQUIRED CONFIG) -#find_package( LibElf REQUIRED CONFIG) - -add_library(hipamd64 SHARED - hip_context.cpp - hip_device.cpp - hip_device_runtime.cpp - hip_error.cpp - hip_event.cpp - hip_memory.cpp - hip_module.cpp - hip_peer.cpp - hip_platform.cpp - hip_profile.cpp - hip_stream.cpp - hip_surface.cpp - hip_texture.cpp - # $ -$ - ${CMAKE_CURRENT_BINARY_DIR}/../opencl/amdocl/CMakeFiles/amdocl64.dir/cl_icd.cpp.o - ${CMAKE_CURRENT_BINARY_DIR}/../opencl/amdocl/CMakeFiles/amdocl64.dir/cl_gl.cpp.o - ${CMAKE_CURRENT_BINARY_DIR}/../opencl/amdocl/CMakeFiles/amdocl64.dir/cl_program.cpp.o - ${CMAKE_CURRENT_BINARY_DIR}/../opencl/amdocl/CMakeFiles/amdocl64.dir/cl_lqdflash_amd.cpp.o - ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/thread/thread.cpp.o - ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/thread/monitor.cpp.o - ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/thread/semaphore.cpp.o - ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/utils/flags.cpp.o - ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/utils/debug.cpp.o - ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/device/appprofile.cpp.o - ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/device/device.cpp.o - ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/device/hwdebug.cpp.o - ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/device/blitcl.cpp.o - ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/device/blit.cpp.o - ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/device/devkernel.cpp.o - ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/device/devwavelimiter.cpp.o - ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/device/devprogram.cpp.o - ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/platform/kernel.cpp.o - ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/platform/context.cpp.o - ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/platform/command.cpp.o - ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/platform/ndrange.cpp.o - ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/platform/runtime.cpp.o - ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/platform/object.cpp.o - ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/platform/memory.cpp.o - ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/platform/program.cpp.o - ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/platform/commandqueue.cpp.o - ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/platform/agent.cpp.o - ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/os/os_win32.cpp.o - ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/os/alloc.cpp.o - ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/os/os.cpp.o - ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/os/os_posix.cpp.o - ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/__/compiler/lib/utils/options.cpp.o - ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/__/compiler/lib/loaders/elf/elf.cpp.o - ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/__/compiler/lib/loaders/elf/elf_utils.cpp.o - ${CMAKE_CURRENT_BINARY_DIR}/../../runtime/CMakeFiles/oclruntime.dir/device/comgrctx.cpp.o - - ) - -target_link_libraries(hipamd64 amdocl64 oclelf pthread) - -install(TARGETS hipamd64 LIBRARY DESTINATION lib/x86_64 COMPONENT applications) \ No newline at end of file From 00abf507c3b440ccf74ca30670eb919fc7f45062 Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 30 Jul 2019 11:26:23 -0400 Subject: [PATCH 167/282] P4 to Git Change 1975700 by jatang@jatang_win_pal_lc on 2019/07/30 11:21:33 SWDEV-1 - Fix HIP build. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/fixme.cpp#2 edit --- api/hip/fixme.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/api/hip/fixme.cpp b/api/hip/fixme.cpp index 3d062e2dbc..5d7f8144f7 100644 --- a/api/hip/fixme.cpp +++ b/api/hip/fixme.cpp @@ -21,6 +21,7 @@ THE SOFTWARE. */ #include "cl_common.hpp" +#include KHRicdVendorDispatch amd::ICDDispatchedObject::icdVendorDispatch_[] = {0}; amd::PlatformIDS amd::PlatformID::Platform = {amd::ICDDispatchedObject::icdVendorDispatch_}; From ed497989b90d05cebd8728f20a6f0579232eaa70 Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 1 Aug 2019 12:06:04 -0400 Subject: [PATCH 168/282] P4 to Git Change 1977018 by cpaquot@cpaquot-ocl-lc-lnx on 2019/08/01 11:58:59 SWDEV-197462 - [HIP] Add HIP_INIT_API to hipFree to initialize current thread. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#64 edit --- api/hip/hip_memory.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index c3755cf028..46d84b8520 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -240,6 +240,8 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) { } hipError_t hipFree(void* ptr) { + HIP_INIT_API(ptr); + if (ptr == nullptr) { HIP_RETURN(hipSuccess); } From 99d54cb7bd613ce09f22e9a088bd93efc651b7a9 Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 1 Aug 2019 16:40:24 -0400 Subject: [PATCH 169/282] P4 to Git Change 1977240 by wchau@wc_hip_vdi on 2019/08/01 16:35:47 SWDEV-180872 - Runtime support changes for Cooperative Group Features - Taking into account of SGPRs usage to determine the block size Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#35 edit --- api/hip/hip_platform.cpp | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index d284570322..1b8a7b5c96 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -492,9 +492,19 @@ hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, // Find threads accupancy per CU => simd_per_cu * GPR usage constexpr size_t MaxWavesPerSimd = 8; // Limited by SPI 32 per CU, hence 8 per SIMD - size_t alu_accupancy = device->info().simdPerCU_ * - std::min(MaxWavesPerSimd, (wrkGrpInfo->availableVGPRs_ / amd::alignUp(wrkGrpInfo->usedVGPRs_, 4))); + size_t VgprWaves = wrkGrpInfo->availableVGPRs_ / amd::alignUp(wrkGrpInfo->usedVGPRs_, 4); + size_t GprWaves; + if (wrkGrpInfo->usedSGPRs_ > 0) { + const size_t maxSGPRs = (device->info().gfxipVersion_ < 800) ? 512 : 800; + size_t SgprWaves = maxSGPRs / amd::alignUp(wrkGrpInfo->usedSGPRs_, 16); + GprWaves = std::min(VgprWaves, SgprWaves); + } + else { + GprWaves = VgprWaves; + } + + size_t alu_accupancy = device->info().simdPerCU_ * std::min(MaxWavesPerSimd, GprWaves); alu_accupancy *= wrkGrpInfo->wavefrontSize_; // Calculate blocks occupancy per CU *numBlocks = alu_accupancy / amd::alignUp(blockSize, wrkGrpInfo->wavefrontSize_); From 44813761ba24569f8532e18498ff1821cb2bcf85 Mon Sep 17 00:00:00 2001 From: foreman Date: Sat, 3 Aug 2019 12:23:25 -0400 Subject: [PATCH 170/282] P4 to Git Change 1978068 by cpaquot@cpaquot-ocl-lc-lnx on 2019/08/03 12:16:57 SWDEV-198546 - [HIP] Implemented basic Ctx APIs that are deprecated. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_context.cpp#20 edit --- api/hip/hip_context.cpp | 43 +++++++++++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/api/hip/hip_context.cpp b/api/hip/hip_context.cpp index d519370971..d2c37c1b13 100644 --- a/api/hip/hip_context.cpp +++ b/api/hip/hip_context.cpp @@ -309,39 +309,58 @@ hipError_t hipCtxGetFlags(unsigned int* flags) { hipError_t hipDevicePrimaryCtxGetState(hipDevice_t dev, unsigned int* flags, int* active) { HIP_INIT_API(dev, flags, active); - assert(0 && "Unimplemented"); + if (static_cast(dev) >= g_devices.size()) { + HIP_RETURN(hipErrorInvalidDevice); + } - HIP_RETURN(hipErrorUnknown); + if (flags != nullptr) { + *flags = 0; + } + + if (active != nullptr) { + *active = (g_devices[dev] == hip::getCurrentContext())? 1 : 0; + } + + HIP_RETURN(hipSuccess); } hipError_t hipDevicePrimaryCtxRelease(hipDevice_t dev) { HIP_INIT_API(dev); - assert(0 && "Unimplemented"); + if (static_cast(dev) >= g_devices.size()) { + HIP_RETURN(hipErrorInvalidDevice); + } - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipSuccess); } hipError_t hipDevicePrimaryCtxRetain(hipCtx_t* pctx, hipDevice_t dev) { HIP_INIT_API(pctx, dev); - assert(0 && "Unimplemented"); + if (static_cast(dev) >= g_devices.size()) { + HIP_RETURN(hipErrorInvalidDevice); + } + if (pctx == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } - HIP_RETURN(hipErrorUnknown); + *pctx = reinterpret_cast(g_devices[dev]); + + HIP_RETURN(hipSuccess); } hipError_t hipDevicePrimaryCtxReset(hipDevice_t dev) { HIP_INIT_API(dev); - assert(0 && "Unimplemented"); - - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipSuccess); } hipError_t hipDevicePrimaryCtxSetFlags(hipDevice_t dev, unsigned int flags) { HIP_INIT_API(dev, flags); - assert(0 && "Unimplemented"); - - HIP_RETURN(hipErrorUnknown); + if (static_cast(dev) >= g_devices.size()) { + HIP_RETURN(hipErrorInvalidDevice); + } else { + HIP_RETURN(hipErrorContextAlreadyInUse); + } } From adf865621c35919e8168c4efebc8df2c280ae6a1 Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 6 Aug 2019 17:19:28 -0400 Subject: [PATCH 171/282] P4 to Git Change 1979123 by cpaquot@cpaquot-ocl-lc-lnx on 2019/08/06 17:15:10 SWDEV-198556 - [HIP] Gnarly bug due to macros: HIP_RETURN(ret) duplicates ret twice first by setting the last error then via LogDebugInfo. So if HIP_RETURN has a function as a parameter, the function would get called twice. So ihipMalloc and ihipMemcpy were being called twice (and perhaps more functions). Also logging the pointer returned by ihipMalloc so we can track memory in logs more easily. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#33 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#65 edit --- api/hip/hip_internal.hpp | 2 +- api/hip/hip_memory.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index 5d32f1049f..86221b197e 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -159,7 +159,7 @@ extern amd::Memory* getMemoryObject(const void* ptr, size_t& offset); #define HIP_RETURN(ret) \ hip::g_lastError = ret; \ - LogPrintfInfo("[%zx] %s: Returned %s", std::this_thread::get_id(), __func__, hipGetErrorName(ret)); \ + LogPrintfInfo("[%zx] %s: Returned %s", std::this_thread::get_id(), __func__, hipGetErrorName(hip::g_lastError)); \ return hip::g_lastError; \ inline std::ostream& operator<<(std::ostream& os, const dim3& s) { diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 46d84b8520..807874dc34 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -73,7 +73,7 @@ hipError_t ihipMalloc(void** ptr, size_t sizeBytes, unsigned int flags) if (*ptr == nullptr) { return hipErrorOutOfMemory; } - + LogPrintfInfo("ihipMalloc ptr=0x%zx", *ptr); return hipSuccess; } From fa2d5186ba6dfd9d00de869f0bf402210e883518 Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 7 Aug 2019 11:07:16 -0400 Subject: [PATCH 172/282] P4 to Git Change 1979460 by kjayapra@3_HIPWS_TXHDR_ROCM on 2019/08/07 11:05:27 SWDEV-145570 - Code changes after header update. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_texture.cpp#13 edit --- api/hip/hip_texture.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/hip/hip_texture.cpp b/api/hip/hip_texture.cpp index e8fd421470..4857e4ba55 100644 --- a/api/hip/hip_texture.cpp +++ b/api/hip/hip_texture.cpp @@ -442,7 +442,7 @@ hipError_t hipBindTextureToArray(textureReference* tex, hipArray_const_t array, HIP_RETURN(hipErrorUnknown); } -hipError_t ihipBindTextureToArrayImpl(int dim, enum hipTextureReadMode readMode, +hipError_t ihipBindTextureToArrayImpl(TlsData* tls, int dim, enum hipTextureReadMode readMode, hipArray_const_t array, const struct hipChannelFormatDesc& desc, textureReference* tex) { From 042a4c2bb2188571784bac7dad8c7cb7145adc91 Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 7 Aug 2019 11:30:59 -0400 Subject: [PATCH 173/282] P4 to Git Change 1979479 by cpaquot@cpaquot-ocl-lc-lnx on 2019/08/07 11:25:59 SWDEV-198424 - [HIP] Implemented missing fields for hipGetDeviceAttribute. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device.cpp#21 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device_runtime.cpp#17 edit --- api/hip/hip_device.cpp | 9 +++++++++ api/hip/hip_device_runtime.cpp | 24 ++++++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/api/hip/hip_device.cpp b/api/hip/hip_device.cpp index 622ad150a1..dc63666876 100644 --- a/api/hip/hip_device.cpp +++ b/api/hip/hip_device.cpp @@ -195,6 +195,15 @@ hipError_t hipGetDeviceProperties ( hipDeviceProp_t* props, hipDevice_t device ) deviceProps.cooperativeLaunch = info.cooperativeGroups_; deviceProps.cooperativeMultiDeviceLaunch = info.cooperativeMultiDeviceGroups_; + deviceProps.maxTexture1D = info.imageMaxBufferSize_; + deviceProps.maxTexture2D[0] = info.image2DMaxWidth_; + deviceProps.maxTexture2D[1] = info.image2DMaxHeight_; + deviceProps.maxTexture3D[0] = info.image3DMaxWidth_; + deviceProps.maxTexture3D[1] = info.image3DMaxHeight_; + deviceProps.maxTexture3D[2] = info.image3DMaxDepth_; + deviceProps.hdpMemFlushCntl = nullptr; + deviceProps.hdpRegFlushCntl = nullptr; + *props = deviceProps; HIP_RETURN(hipSuccess); } diff --git a/api/hip/hip_device_runtime.cpp b/api/hip/hip_device_runtime.cpp index eabf622f28..a039dced3e 100644 --- a/api/hip/hip_device_runtime.cpp +++ b/api/hip/hip_device_runtime.cpp @@ -241,6 +241,30 @@ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) case hipDeviceAttributeCooperativeMultiDeviceLaunch: *pi = prop.cooperativeMultiDeviceLaunch; break; + case hipDeviceAttributeMaxTexture1DWidth: + *pi = prop.maxTexture1D; + break; + case hipDeviceAttributeMaxTexture2DWidth: + *pi = prop.maxTexture2D[0]; + break; + case hipDeviceAttributeMaxTexture2DHeight: + *pi = prop.maxTexture2D[1]; + break; + case hipDeviceAttributeMaxTexture3DWidth: + *pi = prop.maxTexture3D[0]; + break; + case hipDeviceAttributeMaxTexture3DHeight: + *pi = prop.maxTexture3D[1]; + break; + case hipDeviceAttributeMaxTexture3DDepth: + *pi = prop.maxTexture3D[2]; + break; + case hipDeviceAttributeHdpMemFlushCntl: + *reinterpret_cast(pi) = prop.hdpMemFlushCntl; + break; + case hipDeviceAttributeHdpRegFlushCntl: + *reinterpret_cast(pi) = prop.hdpRegFlushCntl; + break; default: HIP_RETURN(hipErrorInvalidValue); } From 7f0373f2e3ef8688053724be677232b527447b24 Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 7 Aug 2019 11:44:31 -0400 Subject: [PATCH 174/282] P4 to Git Change 1979482 by zukhan@zukhan-ocl-win10 on 2019/08/07 11:30:33 SWDEV-196326 - [hipclang-vdi-rocm]: [FBA-80]: Runtime error when all GPUs are hidden by ROCR_VISIBLE_DEVICES - Adjusted hipGetDeviceCount to return "hipErrorNoDevice". - This was done to match the same behaviour as HIP-HCC, and API spec for when no devices are found. Rather then return "count = 0" as it did. ReviewBoardURL = http://ocltc.amd.com/reviews/r/17789/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device.cpp#22 edit --- api/hip/hip_device.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/api/hip/hip_device.cpp b/api/hip/hip_device.cpp index dc63666876..010e46a33b 100644 --- a/api/hip/hip_device.cpp +++ b/api/hip/hip_device.cpp @@ -99,6 +99,10 @@ hipError_t ihipDeviceGetCount(int* count) { // Get all available devices *count = g_devices.size(); + if (*count < 1) { + return hipErrorNoDevice; + } + return hipSuccess; } From 4092ab0f7a44f0a57e22bb53cdba229b67dadfb4 Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 7 Aug 2019 12:45:50 -0400 Subject: [PATCH 175/282] P4 to Git Change 1979518 by kjayapra@3_HIPWS_TXHDR_ROCM on 2019/08/07 12:40:50 SWDEV-198194 - Adding lock in Unregistervar. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#36 edit --- api/hip/hip_platform.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index 1b8a7b5c96..3e70f6e539 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -166,6 +166,7 @@ extern "C" std::vector< std::pair >* __hipRegisterFatBinary(c } void PlatformState::unregisterVar(hipModule_t hmod) { + amd::ScopedLock lock(lock_); auto it = vars_.begin(); while (it != vars_.end()) { DeviceVar& dvar = it->second; From 8fdb248e424a49af3ffb58aa6d908607d853b489 Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 8 Aug 2019 19:03:46 -0400 Subject: [PATCH 176/282] P4 to Git Change 1980493 by cpaquot@cpaquot-ocl-lc-lnx on 2019/08/08 18:57:21 SWDEV-198556 - [HIP] Override queue if src/dst's device doesn't match the queue's device. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#66 edit --- api/hip/hip_memory.cpp | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 807874dc34..177199d061 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -91,17 +91,26 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin amd::Memory *srcMemory = getMemoryObject(src, sOffset); size_t dOffset = 0; amd::Memory *dstMemory = getMemoryObject(dst, dOffset); + amd::Device* queueDevice = &queue.device(); if (((srcMemory == nullptr) && (dstMemory == nullptr)) || (kind == hipMemcpyHostToHost)) { memcpy(dst, src, sizeBytes); return hipSuccess; } else if ((srcMemory == nullptr) && (dstMemory != nullptr)) { - command = new amd::WriteMemoryCommand(queue, CL_COMMAND_WRITE_BUFFER, waitList, + amd::HostQueue* pQueue = &queue; + if (queueDevice != dstMemory->getContext().devices()[0]) { + pQueue = hip::getNullStream(*hip::host_context); + } + command = new amd::WriteMemoryCommand(*pQueue, CL_COMMAND_WRITE_BUFFER, waitList, *dstMemory->asBuffer(), dOffset, sizeBytes, src); isAsync = false; } else if ((srcMemory != nullptr) && (dstMemory == nullptr)) { - command = new amd::ReadMemoryCommand(queue, CL_COMMAND_READ_BUFFER, waitList, + amd::HostQueue* pQueue = &queue; + if (queueDevice != srcMemory->getContext().devices()[0]) { + pQueue = hip::getNullStream(*hip::host_context); + } + command = new amd::ReadMemoryCommand(*pQueue, CL_COMMAND_READ_BUFFER, waitList, *srcMemory->asBuffer(), sOffset, sizeBytes, dst); isAsync = false; } else if ((srcMemory != nullptr) && (dstMemory != nullptr)) { @@ -110,7 +119,6 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin kind == hipMemcpyDefault) && ((srcMemory->getMemFlags() & hostMem) == 0) && ((dstMemory->getMemFlags() & hostMem) == 0)) { - amd::Device* queueDevice = &queue.device(); if (queueDevice != srcMemory->getContext().devices()[0]) { amd::Coord3D srcOffset(sOffset, 0, 0); amd::Coord3D dstOffset(dOffset, 0, 0); From 867d521a0835b56dcce3de9807996b18a58a2e2f Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 9 Aug 2019 16:34:38 -0400 Subject: [PATCH 177/282] P4 to Git Change 1981065 by cpaquot@cpaquot-ocl-lc-lnx on 2019/08/09 16:31:44 SWDEV-198556 - [HIP] Use src/dstMemory->getContext instead of host_context. Also relax the check for P2P copies in case of hipMemcpy(hostMalloced, hipMalloced(dev1), dev0) Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#67 edit --- api/hip/hip_memory.cpp | 56 +++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 31 deletions(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 177199d061..8ae5b36d3d 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -100,7 +100,7 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin } else if ((srcMemory == nullptr) && (dstMemory != nullptr)) { amd::HostQueue* pQueue = &queue; if (queueDevice != dstMemory->getContext().devices()[0]) { - pQueue = hip::getNullStream(*hip::host_context); + pQueue = hip::getNullStream(dstMemory->getContext()); } command = new amd::WriteMemoryCommand(*pQueue, CL_COMMAND_WRITE_BUFFER, waitList, *dstMemory->asBuffer(), dOffset, sizeBytes, src); @@ -108,43 +108,37 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin } else if ((srcMemory != nullptr) && (dstMemory == nullptr)) { amd::HostQueue* pQueue = &queue; if (queueDevice != srcMemory->getContext().devices()[0]) { - pQueue = hip::getNullStream(*hip::host_context); + pQueue = hip::getNullStream(srcMemory->getContext()); } command = new amd::ReadMemoryCommand(*pQueue, CL_COMMAND_READ_BUFFER, waitList, *srcMemory->asBuffer(), sOffset, sizeBytes, dst); isAsync = false; } else if ((srcMemory != nullptr) && (dstMemory != nullptr)) { - static const uint hostMem = CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_USE_HOST_PTR; - if ((kind == hipMemcpyDeviceToDevice || - kind == hipMemcpyDefault) && - ((srcMemory->getMemFlags() & hostMem) == 0) && - ((dstMemory->getMemFlags() & hostMem) == 0)) { - if (queueDevice != srcMemory->getContext().devices()[0]) { - amd::Coord3D srcOffset(sOffset, 0, 0); - amd::Coord3D dstOffset(dOffset, 0, 0); - amd::Coord3D copySize(sizeBytes, 1, 1); - command = new amd::CopyMemoryP2PCommand(queue, CL_COMMAND_COPY_BUFFER, waitList, - *srcMemory->asBuffer(),*dstMemory->asBuffer(), srcOffset, dstOffset, copySize); - command->enqueue(); - if (!isAsync) { - command->awaitCompletion(); - } - command->release(); - return hipSuccess; + if (queueDevice != srcMemory->getContext().devices()[0]) { + amd::Coord3D srcOffset(sOffset, 0, 0); + amd::Coord3D dstOffset(dOffset, 0, 0); + amd::Coord3D copySize(sizeBytes, 1, 1); + command = new amd::CopyMemoryP2PCommand(queue, CL_COMMAND_COPY_BUFFER, waitList, + *srcMemory->asBuffer(),*dstMemory->asBuffer(), srcOffset, dstOffset, copySize); + command->enqueue(); + if (!isAsync) { + command->awaitCompletion(); } - if (queueDevice != dstMemory->getContext().devices()[0]) { - amd::Coord3D srcOffset(sOffset, 0, 0); - amd::Coord3D dstOffset(dOffset, 0, 0); - amd::Coord3D copySize(sizeBytes, 1, 1); - command = new amd::CopyMemoryP2PCommand(queue, CL_COMMAND_COPY_BUFFER, waitList, - *srcMemory->asBuffer(),*dstMemory->asBuffer(), srcOffset, dstOffset, copySize); - command->enqueue(); - if (!isAsync) { - command->awaitCompletion(); - } - command->release(); - return hipSuccess; + command->release(); + return hipSuccess; + } + if (queueDevice != dstMemory->getContext().devices()[0]) { + amd::Coord3D srcOffset(sOffset, 0, 0); + amd::Coord3D dstOffset(dOffset, 0, 0); + amd::Coord3D copySize(sizeBytes, 1, 1); + command = new amd::CopyMemoryP2PCommand(queue, CL_COMMAND_COPY_BUFFER, waitList, + *srcMemory->asBuffer(),*dstMemory->asBuffer(), srcOffset, dstOffset, copySize); + command->enqueue(); + if (!isAsync) { + command->awaitCompletion(); } + command->release(); + return hipSuccess; } command = new amd::CopyMemoryCommand(queue, CL_COMMAND_COPY_BUFFER, waitList, *srcMemory->asBuffer(),*dstMemory->asBuffer(), sOffset, dOffset, sizeBytes); From 92840c805e52c76c57a305df38ad376db603b4a2 Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 9 Aug 2019 20:41:13 -0400 Subject: [PATCH 178/282] P4 to Git Change 1981152 by cpaquot@cpaquot-ocl-lc-lnx on 2019/08/09 20:33:32 SWDEV-193430 - [HIP] Delay creating HostQueue till commands are enqueued. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_event.cpp#14 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_event.hpp#5 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#34 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#68 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#31 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_stream.cpp#21 edit --- api/hip/hip_event.cpp | 6 +-- api/hip/hip_event.hpp | 2 +- api/hip/hip_internal.hpp | 16 ++++++++ api/hip/hip_memory.cpp | 14 +++---- api/hip/hip_module.cpp | 4 +- api/hip/hip_stream.cpp | 87 ++++++++++++++++++++++++++-------------- 6 files changed, 85 insertions(+), 44 deletions(-) diff --git a/api/hip/hip_event.cpp b/api/hip/hip_event.cpp index 746d93a8b2..8262a770d0 100644 --- a/api/hip/hip_event.cpp +++ b/api/hip/hip_event.cpp @@ -98,9 +98,7 @@ hipError_t Event::elapsedTime(Event& eStop, float& ms) { return hipSuccess; } -hipError_t Event::streamWait(hipStream_t stream, uint flags) { - amd::HostQueue* hostQueue = as_amd(reinterpret_cast(stream))->asHostQueue(); - +hipError_t Event::streamWait(amd::HostQueue* hostQueue, uint flags) { if (stream_ == hostQueue) return hipSuccess; amd::ScopedLock lock(lock_); @@ -237,7 +235,7 @@ hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) { if (stream == nullptr) { queue = hip::getNullStream(); } else { - queue = as_amd(reinterpret_cast(stream))->asHostQueue(); + queue = reinterpret_cast(stream)->asHostQueue(); } amd::Command* command = queue->getLastQueuedCommand(true); diff --git a/api/hip/hip_event.hpp b/api/hip/hip_event.hpp index 4c6fb132e8..410c19c7b4 100644 --- a/api/hip/hip_event.hpp +++ b/api/hip/hip_event.hpp @@ -50,7 +50,7 @@ public: hipError_t query(); hipError_t synchronize(); hipError_t elapsedTime(Event& stop, float& ms); - hipError_t streamWait(hipStream_t stream, uint flags); + hipError_t streamWait(amd::HostQueue* queue, uint flags); void addMarker(amd::HostQueue* queue, amd::Command* command); diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index 86221b197e..6e7614f315 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -86,6 +86,22 @@ namespace hip { static Function* asFunction(hipFunction_t f) { return reinterpret_cast(f); } }; + + struct Stream { + amd::HostQueue* queue; + + amd::Device* device; + amd::Context* context; + amd::CommandQueue::Priority priority; + unsigned int flags; + + Stream(amd::Device* dev, amd::Context* ctx, amd::CommandQueue::Priority p, unsigned int f); + void create(); + amd::HostQueue* asHostQueue(); + void destroy(); + void finish(); + }; + }; struct ihipExec_t { diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 8ae5b36d3d..d00400c88b 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -177,7 +177,7 @@ hipError_t ihipMemset(void* dst, int value, size_t valueSize, size_t sizeBytes, queue = hip::getNullStream(); } else { hip::getNullStream()->finish(); - queue = as_amd(reinterpret_cast(stream))->asHostQueue(); + queue = reinterpret_cast(stream)->asHostQueue(); } if (memory != nullptr) { @@ -771,7 +771,7 @@ hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, queue = hip::getNullStream(); } else { hip::getNullStream()->finish(); - queue = as_amd(reinterpret_cast(stream))->asHostQueue(); + queue = reinterpret_cast(stream)->asHostQueue(); } HIP_RETURN(ihipMemcpy(dst, src, sizeBytes, kind, *queue, true)); @@ -789,7 +789,7 @@ hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, void* src, size_t sizeBytes, queue = hip::getNullStream(); } else { hip::getNullStream()->finish(); - queue = as_amd(reinterpret_cast(stream))->asHostQueue(); + queue = reinterpret_cast(stream)->asHostQueue(); } HIP_RETURN(ihipMemcpy(reinterpret_cast(dst), (const void*) src, sizeBytes, hipMemcpyHostToDevice, @@ -807,7 +807,7 @@ hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t siz queue = hip::getNullStream(); } else { hip::getNullStream()->finish(); - queue = as_amd(reinterpret_cast(stream))->asHostQueue(); + queue = reinterpret_cast(stream)->asHostQueue(); } HIP_RETURN(ihipMemcpy(reinterpret_cast(dst), (const void*) src, sizeBytes, hipMemcpyDeviceToDevice, @@ -825,7 +825,7 @@ hipError_t hipMemcpyDtoHAsync(void* dst, hipDeviceptr_t src, size_t sizeBytes, queue = hip::getNullStream(); } else { hip::getNullStream()->finish(); - queue = as_amd(reinterpret_cast(stream))->asHostQueue(); + queue = reinterpret_cast(stream)->asHostQueue(); } HIP_RETURN(ihipMemcpy(reinterpret_cast(dst), (const void*) src, sizeBytes, hipMemcpyDeviceToHost, @@ -928,7 +928,7 @@ hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t sp queue = hip::getNullStream(); } else { hip::getNullStream()->finish(); - queue = as_amd(reinterpret_cast(stream))->asHostQueue(); + queue = reinterpret_cast(stream)->asHostQueue(); } HIP_RETURN(ihipMemcpy2D(dst, dpitch, src, spitch, width, height, kind, *queue, true)); @@ -1355,7 +1355,7 @@ hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value, queue = hip::getNullStream(); } else { hip::getNullStream()->finish(); - queue = as_amd(reinterpret_cast(stream))->asHostQueue(); + queue = reinterpret_cast(stream)->asHostQueue(); } HIP_RETURN(ihipMemset2D(dst, pitch, value, width, height, *queue, true)); diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index f9c5a85137..315caffa31 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -231,7 +231,7 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, queue = hip::getNullStream(); } else { hip::getNullStream()->finish(); - queue = as_amd(reinterpret_cast(hStream))->asHostQueue(); + queue = reinterpret_cast(hStream)->asHostQueue(); } if ((params & amd::NDRangeKernelCommand::CooperativeGroups) && !device->info().cooperativeGroups_) { @@ -387,7 +387,7 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL for (int i = 0; i < numDevices; ++i) { hipSetDevice(i); const hipLaunchParams& launch = launchParamsList[i]; - amd::HostQueue* queue = as_amd(reinterpret_cast(launch.stream))->asHostQueue(); + amd::HostQueue* queue = reinterpret_cast(launch.stream)->asHostQueue(); hipFunction_t func = PlatformState::instance().getFunc(launch.func, i); if (func == nullptr) { HIP_RETURN(result); diff --git a/api/hip/hip_stream.cpp b/api/hip/hip_stream.cpp index cbba339efb..a87e0e0d79 100644 --- a/api/hip/hip_stream.cpp +++ b/api/hip/hip_stream.cpp @@ -26,7 +26,7 @@ THE SOFTWARE. #include "thread/monitor.hpp" static amd::Monitor streamSetLock("Guards global stream set"); -static std::unordered_set streamSet; +static std::unordered_set streamSet; // Internal structure for stream callback handler class StreamCallback { @@ -52,6 +52,37 @@ void syncStreams() { } } +Stream::Stream(amd::Device* dev, amd::Context* ctx, amd::CommandQueue::Priority p, unsigned int f) : + queue(nullptr), device(dev), context(ctx), priority(p), flags(f) {} + +void Stream::create() { + cl_command_queue_properties properties = CL_QUEUE_PROFILING_ENABLE; + queue = new amd::HostQueue(*context, *device, properties, + amd::CommandQueue::RealTimeDisabled, priority); + assert(queue != nullptr); + queue->create(); +} + +amd::HostQueue* Stream::asHostQueue() { + if (queue == nullptr) { + create(); + } + return queue; +} + +void Stream::destroy() { + if (queue != nullptr) { + queue->release(); + queue = nullptr; + } +} + +void Stream::finish() { + if (queue != nullptr) { + queue->finish(); + } +} + }; void CL_CALLBACK ihipStreamCallback(cl_event event, cl_int command_exec_status, void* user_data) { @@ -66,12 +97,9 @@ void CL_CALLBACK ihipStreamCallback(cl_event event, cl_int command_exec_status, static hipError_t ihipStreamCreate(hipStream_t *stream, unsigned int flags, amd::CommandQueue::Priority priority) { amd::Device* device = hip::getCurrentContext()->devices()[0]; - cl_command_queue_properties properties = CL_QUEUE_PROFILING_ENABLE; - amd::HostQueue* queue = new amd::HostQueue(*hip::getCurrentContext(), *device, properties, - amd::CommandQueue::RealTimeDisabled, - priority); + hip::Stream* hStream = new hip::Stream(device, hip::getCurrentContext(), priority, flags); - if (queue == nullptr || !queue->create()) { + if (hStream == nullptr) { return hipErrorOutOfMemory; } @@ -80,11 +108,11 @@ static hipError_t ihipStreamCreate(hipStream_t *stream, unsigned int flags, amd: { amd::ScopedLock lock(streamSetLock); - streamSet.insert(queue); + streamSet.insert(hStream); } } - *stream = reinterpret_cast(as_cl(queue)); + *stream = reinterpret_cast(hStream); return hipSuccess; } @@ -129,11 +157,10 @@ hipError_t hipDeviceGetStreamPriorityRange(int* leastPriority, int* greatestPrio hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags) { HIP_INIT_API(stream, flags); - amd::HostQueue* hostQueue = as_amd(reinterpret_cast(stream))->asHostQueue(); - auto it = streamSet.find(hostQueue); + hip::Stream* hStream = reinterpret_cast(stream); - if(flags != nullptr) { - *flags = (it == streamSet.end()) ? hipStreamNonBlocking : hipStreamDefault; + if(flags != nullptr && hStream != nullptr) { + *flags = hStream->flags; } else { HIP_RETURN(hipErrorInvalidValue); } @@ -150,18 +177,15 @@ hipError_t hipStreamSynchronize(hipStream_t stream) { hip::syncStreams(); hostQueue = hip::getNullStream(); + + hostQueue->finish(); } else { hip::getNullStream()->finish(); - hostQueue = as_amd(reinterpret_cast(stream))->asHostQueue(); + hip::Stream* hStream = reinterpret_cast(stream); + + hStream->finish(); } - - if (hostQueue == nullptr) { - HIP_RETURN(hipErrorUnknown); - } - - hostQueue->finish(); - HIP_RETURN(hipSuccess); } @@ -174,13 +198,12 @@ hipError_t hipStreamDestroy(hipStream_t stream) { amd::ScopedLock lock(streamSetLock); - amd::HostQueue* hostQueue = as_amd(reinterpret_cast(stream))->asHostQueue(); + hip::Stream* hStream = reinterpret_cast(stream); - // Release last tracked command - hostQueue->setLastQueuedCommand(nullptr); + hStream->destroy(); + streamSet.erase(hStream); - hostQueue->release(); - streamSet.erase(hostQueue); + delete hStream; HIP_RETURN(hipSuccess); } @@ -188,8 +211,12 @@ hipError_t hipStreamDestroy(hipStream_t stream) { hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags) { HIP_INIT_API(stream, event, flags); + amd::HostQueue* queue; + if (stream == nullptr) { - stream = reinterpret_cast(as_cl(hip::getNullStream())); + queue = hip::getNullStream(); + } else { + queue = reinterpret_cast(stream)->asHostQueue(); } if (event == nullptr) { @@ -198,7 +225,7 @@ hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int hip::Event* e = reinterpret_cast(event); - return HIP_RETURN(e->streamWait(stream, flags)); + return HIP_RETURN(e->streamWait(queue, flags)); } hipError_t hipStreamQuery(hipStream_t stream) { @@ -208,7 +235,7 @@ hipError_t hipStreamQuery(hipStream_t stream) { if (stream == nullptr) { hostQueue = hip::getNullStream(); } else { - hostQueue = as_amd(reinterpret_cast(stream))->asHostQueue(); + hostQueue = reinterpret_cast(stream)->asHostQueue(); } amd::Command* command = hostQueue->getLastQueuedCommand(false); @@ -228,8 +255,8 @@ hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback unsigned int flags) { HIP_INIT_API(stream, callback, userData, flags); - amd::HostQueue* hostQueue = as_amd(reinterpret_cast - (stream))->asHostQueue(); + amd::HostQueue* hostQueue = reinterpret_cast + (stream)->asHostQueue(); amd::Command* command = hostQueue->getLastQueuedCommand(true); amd::Event& event = command->event(); StreamCallback* cbo = new StreamCallback(stream, callback, userData, command); From 7e6c7a9b8ac4454c418777b1d6b48c63e4fe2cea Mon Sep 17 00:00:00 2001 From: foreman Date: Sun, 11 Aug 2019 18:53:11 -0400 Subject: [PATCH 179/282] P4 to Git Change 1981324 by kjayapra@3_HIPWS_TXT_ROCM on 2019/08/11 18:44:40 SWDEV-188177 - Texture API implementation and support for extern variables. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.def.in#18 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.map.in#20 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#35 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#32 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#37 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_texture.cpp#14 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#340 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.cpp#57 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.hpp#31 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#608 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.hpp#172 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.cpp#250 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.hpp#79 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#152 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.hpp#41 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.cpp#96 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.hpp#39 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.cpp#133 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.hpp#39 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.cpp#105 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.hpp#48 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/program.cpp#102 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/program.hpp#46 edit --- api/hip/hip_hcc.def.in | 1 + api/hip/hip_hcc.map.in | 1 + api/hip/hip_internal.hpp | 3 +++ api/hip/hip_module.cpp | 26 ++++++++++++++++++++++++-- api/hip/hip_platform.cpp | 21 ++++++++++++++++++++- api/hip/hip_texture.cpp | 39 ++++++++++++++++++++++++++++++++++----- 6 files changed, 83 insertions(+), 8 deletions(-) diff --git a/api/hip/hip_hcc.def.in b/api/hip/hip_hcc.def.in index 3c1a05716f..9acf1b1314 100644 --- a/api/hip/hip_hcc.def.in +++ b/api/hip/hip_hcc.def.in @@ -110,6 +110,7 @@ hipMemsetD8 hipMemset3D hipModuleGetFunction hipModuleGetGlobal +hipModuleGetTexRef hipModuleLaunchKernel hipModuleLaunchKernelExt hipLaunchCooperativeKernel diff --git a/api/hip/hip_hcc.map.in b/api/hip/hip_hcc.map.in index 624761944d..2af53915bc 100644 --- a/api/hip/hip_hcc.map.in +++ b/api/hip/hip_hcc.map.in @@ -111,6 +111,7 @@ global: hipMemset3D; hipModuleGetFunction; hipModuleGetGlobal; + hipModuleGetTexRef; hipModuleLaunchKernel; hipModuleLaunchKernelExt; hipLaunchCooperativeKernel; diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index 6e7614f315..1f7e31c9aa 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -135,7 +135,9 @@ public: std::vector functions; }; struct DeviceVar { + void* shadowVptr; std::string hostVar; + size_t size; std::vector< std::pair< hipModule_t, bool > >* modules; std::vector rvars; }; @@ -161,6 +163,7 @@ public: bool getFuncAttr(const void* hostFunction, hipFuncAttributes* func_attr); bool getGlobalVar(const void* hostVar, int deviceId, hipDeviceptr_t* dev_ptr, size_t* size_ptr); + bool getShadowVarInfo(std::string var_name, void** var_addr, size_t* var_size); void setupArgument(const void *arg, size_t size, size_t offset); void configureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem, hipStream_t stream); diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index 315caffa31..77a8a34659 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -114,7 +114,7 @@ bool ihipModuleRegisterGlobal(amd::Program* program, hipModule_t* module) { device::Program* dev_program = program->getDeviceProgram(*hip::getCurrentContext()->devices()[0]); - if (!dev_program->getGlobalSymbolsFromCodeObj(&var_names)) { + if (!dev_program->getGlobalVarFromCodeObj(&var_names)) { return false; } @@ -124,7 +124,7 @@ bool ihipModuleRegisterGlobal(amd::Program* program, hipModule_t* module) { modules->at(dev) = std::make_pair(*module, false); } - PlatformState::DeviceVar dvar{it->c_str(), modules, + PlatformState::DeviceVar dvar{nullptr, it->c_str(), 0, modules, std::vector{ g_devices.size()}}; PlatformState::instance().registerVar(it->c_str(), dvar); } @@ -417,3 +417,25 @@ hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, int numDevices, unsigned int flags) { return ihipLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags, 0); } + +hipError_t hipModuleGetTexRef(textureReference** texRef, hipModule_t hmod, const char* name) { + HIP_INIT_API(texRef, hmod, name); + + hipDeviceptr_t dptr = nullptr; + size_t bytes = 0; + + /* input args check */ + if ((texRef == nullptr) || (name == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + /* Get address and size for the global symbol */ + if (!PlatformState::instance().getGlobalVar(name, ihipGetDevice(), &dptr, + &bytes)) { + HIP_RETURN(hipErrorUnknown); + } + + *texRef = reinterpret_cast(dptr); + HIP_RETURN(hipSuccess); +} + diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index 3e70f6e539..e05bae5c68 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -209,6 +209,23 @@ bool ihipGetFuncAttributes(const char* func_name, amd::Program* program, hipFunc return true; } +bool PlatformState::getShadowVarInfo(std::string var_name, void** var_addr, size_t* var_size) { + const auto it = vars_.find(var_name); + if (it != vars_.cend()) { + DeviceVar& dvar = it->second; + *var_addr = dvar.shadowVptr; + *var_size = dvar.size; + return true; + } else { + return false; + } +} + +bool CL_CALLBACK getSvarInfo(cl_program program, std::string var_name, void** var_addr, + size_t* var_size) { + return PlatformState::instance().getShadowVarInfo(var_name, var_addr, var_size); +} + hipFunction_t PlatformState::getFunc(const void* hostFunction, int deviceId) { amd::ScopedLock lock(lock_); const auto it = functions_.find(hostFunction); @@ -218,6 +235,7 @@ hipFunction_t PlatformState::getFunc(const void* hostFunction, int deviceId) { hipModule_t module = (*devFunc.modules)[deviceId].first; if (!(*devFunc.modules)[deviceId].second) { amd::Program* program = as_amd(reinterpret_cast(module)); + program->setVarInfoCallBack(&getSvarInfo); if (CL_SUCCESS != program->build(g_devices[deviceId]->devices(), nullptr, nullptr, nullptr)) { return nullptr; } @@ -280,6 +298,7 @@ bool PlatformState::getGlobalVar(const void* hostVar, int deviceId, if (!(*dvar.modules)[deviceId].second) { amd::Program* program = as_amd(reinterpret_cast((*dvar.modules)[deviceId].first)); + program->setVarInfoCallBack(&getSvarInfo); if (CL_SUCCESS != program->build(g_devices[deviceId]->devices(), nullptr, nullptr, nullptr)) { return false; } @@ -358,7 +377,7 @@ extern "C" void __hipRegisterVar( { HIP_INIT(); - PlatformState::DeviceVar dvar{ std::string{ hostVar }, modules, + PlatformState::DeviceVar dvar{var, std::string{ hostVar }, static_cast(size), modules, std::vector{ g_devices.size() } }; PlatformState::instance().registerVar(hostVar, dvar); diff --git a/api/hip/hip_texture.cpp b/api/hip/hip_texture.cpp index 4857e4ba55..08754fd773 100644 --- a/api/hip/hip_texture.cpp +++ b/api/hip/hip_texture.cpp @@ -442,13 +442,38 @@ hipError_t hipBindTextureToArray(textureReference* tex, hipArray_const_t array, HIP_RETURN(hipErrorUnknown); } +hipError_t ihipBindTextureImpl(int dim, enum hipTextureReadMode readMode, size_t* offset, + const void* devPtr, const struct hipChannelFormatDesc* desc, + size_t size, textureReference* tex) { + HIP_INIT_API(dim, readMode, offset, devPtr, size, tex); + + assert(1 == dim); + + HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE1D, offset, tex, devPtr, desc, 1, 1, size)); +} + hipError_t ihipBindTextureToArrayImpl(TlsData* tls, int dim, enum hipTextureReadMode readMode, hipArray_const_t array, const struct hipChannelFormatDesc& desc, textureReference* tex) { - assert(0 && "Unimplemented"); + HIP_INIT_API(dim, readMode, &desc, array, tex); - return hipErrorUnknown; + cl_mem_object_type clType; + size_t offset = 0; + + switch (dim) { + case 1: + clType = CL_MEM_OBJECT_IMAGE1D_ARRAY; + break; + case 2: + clType = CL_MEM_OBJECT_IMAGE2D_ARRAY; + break; + default: + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(ihipBindTexture(clType, &offset, tex, array->data, &desc, array->width, + array->height, array->depth)); } hipError_t hipBindTextureToMipmappedArray(textureReference* tex, @@ -464,7 +489,7 @@ hipError_t hipBindTextureToMipmappedArray(textureReference* tex, hipError_t hipUnbindTexture(const textureReference* tex) { HIP_INIT_API(tex); - as_amd(reinterpret_cast(tex->textureObject))->release(); + ihipDestroyTextureObject(reinterpret_cast(tex->textureObject)); HIP_RETURN(hipSuccess); } @@ -482,9 +507,13 @@ hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_const_t array) hipError_t hipGetTextureAlignmentOffset(size_t* offset, const textureReference* tex) { HIP_INIT_API(offset, tex); - assert(0 && "Unimplemented"); + if ((offset == nullptr) || (tex == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } - HIP_RETURN(hipErrorUnknown); + *offset = 0; + + HIP_RETURN(hipSuccess); } hipError_t hipGetTextureReference(const textureReference** tex, const void* symbol) { From 4e71cb437e35ae4ab5ce22f3ed4ae5993666d9fe Mon Sep 17 00:00:00 2001 From: foreman Date: Mon, 12 Aug 2019 19:16:30 -0400 Subject: [PATCH 180/282] P4 to Git Change 1982034 by cpaquot@cpaquot-ocl-lc-lnx on 2019/08/12 19:08:13 SWDEV-199606 - [HIP] hipStreamQuery issue Don't test against CL_COMMAND_MARKER but 0 instead (user visible=false) Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_event.cpp#15 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_event.hpp#6 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_stream.cpp#22 edit --- api/hip/hip_event.cpp | 4 ++-- api/hip/hip_event.hpp | 2 +- api/hip/hip_stream.cpp | 3 +-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/api/hip/hip_event.cpp b/api/hip/hip_event.cpp index 8262a770d0..143d2eded9 100644 --- a/api/hip/hip_event.cpp +++ b/api/hip/hip_event.cpp @@ -115,7 +115,7 @@ hipError_t Event::streamWait(amd::HostQueue* hostQueue, uint flags) { amd::Command::EventWaitList eventWaitList; eventWaitList.push_back(event_); - amd::Command* command = new amd::Marker(*hostQueue, true, eventWaitList); + amd::Command* command = new amd::Marker(*hostQueue, false, eventWaitList); if (command == NULL) { return hipErrorOutOfMemory; } @@ -241,7 +241,7 @@ hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) { amd::Command* command = queue->getLastQueuedCommand(true); if (command == nullptr) { - command = new amd::Marker(*queue, true); + command = new amd::Marker(*queue, false); command->enqueue(); } diff --git a/api/hip/hip_event.hpp b/api/hip/hip_event.hpp index 410c19c7b4..ddf7a9c06b 100644 --- a/api/hip/hip_event.hpp +++ b/api/hip/hip_event.hpp @@ -30,7 +30,7 @@ namespace hip { class TimerMarker: public amd::Marker { public: - TimerMarker(amd::HostQueue& queue) : amd::Marker(queue, true) { + TimerMarker(amd::HostQueue& queue) : amd::Marker(queue, false) { profilingInfo_.enabled_ = true; profilingInfo_.callback_ = nullptr; profilingInfo_.start_ = profilingInfo_.end_ = 0; diff --git a/api/hip/hip_stream.cpp b/api/hip/hip_stream.cpp index a87e0e0d79..5dfe6f2db0 100644 --- a/api/hip/hip_stream.cpp +++ b/api/hip/hip_stream.cpp @@ -244,8 +244,7 @@ hipError_t hipStreamQuery(hipStream_t stream) { } amd::Event& event = command->event(); - - if (command->type() != CL_COMMAND_MARKER) { + if (command->type() != 0) { event.notifyCmdQueue(); } HIP_RETURN((command->status() == CL_COMPLETE) ? hipSuccess : hipErrorNotReady); From c2ffca9c1dae7ce6a7a03247c40ead1f1dc59b09 Mon Sep 17 00:00:00 2001 From: foreman Date: Mon, 12 Aug 2019 21:22:47 -0400 Subject: [PATCH 181/282] P4 to Git Change 1982056 by kjayapra@3_HIPWS_TXT_ROCM on 2019/08/12 21:16:51 SWDEV-199635 - Setting VarInfoCallBack before all program builds. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#36 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#33 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.cpp#58 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.cpp#97 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.cpp#106 edit --- api/hip/hip_internal.hpp | 2 ++ api/hip/hip_module.cpp | 2 ++ 2 files changed, 4 insertions(+) diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index 1f7e31c9aa..0df562065c 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -175,6 +175,8 @@ extern hipError_t ihipDeviceGetCount(int* count); extern int ihipGetDevice(); extern hipError_t ihipMalloc(void** ptr, size_t sizeBytes, unsigned int flags); extern amd::Memory* getMemoryObject(const void* ptr, size_t& offset); +extern bool CL_CALLBACK getSvarInfo(cl_program program, std::string var_name, void** var_addr, + size_t* var_size); #define HIP_RETURN(ret) \ hip::g_lastError = ret; \ diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index 77a8a34659..a66c2da9e9 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -143,6 +143,8 @@ hipError_t ihipModuleLoadData(hipModule_t *module, const void *image) return hipErrorOutOfMemory; } + program->setVarInfoCallBack(&getSvarInfo); + if (CL_SUCCESS != program->addDeviceProgram(*hip::getCurrentContext()->devices()[0], image, ElfSize(image)) || CL_SUCCESS != program->build(hip::getCurrentContext()->devices(), nullptr, nullptr, nullptr)) { return hipErrorUnknown; From 2296b63a866ee841784e23cb092edca70c9eb25b Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 16 Aug 2019 13:27:47 -0400 Subject: [PATCH 182/282] P4 to Git Change 1984568 by kjayapra@4_HIPWS_RCCM_ROCM on 2019/08/16 13:19:53 SWDEV-144570 - Code changes to support Texture header changes. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_texture.cpp#15 edit --- api/hip/hip_texture.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/hip/hip_texture.cpp b/api/hip/hip_texture.cpp index 08754fd773..28c8c32e04 100644 --- a/api/hip/hip_texture.cpp +++ b/api/hip/hip_texture.cpp @@ -442,7 +442,7 @@ hipError_t hipBindTextureToArray(textureReference* tex, hipArray_const_t array, HIP_RETURN(hipErrorUnknown); } -hipError_t ihipBindTextureImpl(int dim, enum hipTextureReadMode readMode, size_t* offset, +hipError_t ihipBindTextureImpl(TlsData* tls, int dim, enum hipTextureReadMode readMode, size_t* offset, const void* devPtr, const struct hipChannelFormatDesc* desc, size_t size, textureReference* tex) { HIP_INIT_API(dim, readMode, offset, devPtr, size, tex); From 182ccd1ab02b313eee1859ac64e27bc1bf4ac4e7 Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 16 Aug 2019 18:38:15 -0400 Subject: [PATCH 183/282] P4 to Git Change 1984760 by kjayapra@4_HIPWS_RCCM_ROCM on 2019/08/16 18:33:19 SWDEV-188177 - Fixing parameters passed to ihipBindTexture in case of 1D image. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_texture.cpp#16 edit --- api/hip/hip_texture.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/hip/hip_texture.cpp b/api/hip/hip_texture.cpp index 28c8c32e04..6e5cc9d668 100644 --- a/api/hip/hip_texture.cpp +++ b/api/hip/hip_texture.cpp @@ -449,7 +449,7 @@ hipError_t ihipBindTextureImpl(TlsData* tls, int dim, enum hipTextureReadMode re assert(1 == dim); - HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE1D, offset, tex, devPtr, desc, 1, 1, size)); + HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE1D, offset, tex, devPtr, desc, size, 1, 0)); } hipError_t ihipBindTextureToArrayImpl(TlsData* tls, int dim, enum hipTextureReadMode readMode, From 15685fcc88292443bcc199175e9733499bc12526 Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 20 Aug 2019 17:11:51 -0400 Subject: [PATCH 184/282] P4 to Git Change 1986255 by kjayapra@4_HIPWS_RCCM_ROCM on 2019/08/20 17:06:44 SWDEV-198194 - Making some code common between static and dynamically created module handling. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#37 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#34 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#38 edit --- api/hip/hip_internal.hpp | 14 +++++- api/hip/hip_module.cpp | 104 +++++++++++++++++++++++++-------------- api/hip/hip_platform.cpp | 59 ++++++++++------------ 3 files changed, 105 insertions(+), 72 deletions(-) diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index 0df562065c..7a19813894 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -129,6 +129,10 @@ public: amd::Memory* amd_mem_obj_; }; + struct DeviceModules { + std::vector< std::pair< hipModule_t, bool > >* modules; + }; + struct DeviceFunction { std::string deviceName; std::vector< std::pair< hipModule_t, bool > >* modules; @@ -142,6 +146,7 @@ public: std::vector rvars; }; private: + std::unordered_mapmods_; std::unordered_map functions_; std::unordered_map vars_; @@ -154,7 +159,11 @@ public: return *platform_; } - void unregisterVar(hipModule_t hmod); + void registerMod(hipModule_t hmod, const DeviceModules& dmod); + void unregisterMod(hipModule_t hmod); + std::vector< std::pair< hipModule_t, bool > >* findModules(hipModule_t hmod); + + void unregisterVar(std::vector< std::pair >* modules); void registerVar(const void* hostvar, const DeviceVar& var); void registerFunction(const void* hostFunction, const DeviceFunction& func); @@ -170,6 +179,9 @@ public: void popExec(ihipExec_t& exec); }; +std::vector< std::pair >* ihipModuleLoadModule(const void* image); +bool ihipModuleUnload(std::vector< std::pair >* modules); + extern std::vector g_devices; extern hipError_t ihipDeviceGetCount(int* count); extern int ihipGetDevice(); diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index a66c2da9e9..1a18e1b2dd 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -51,7 +51,7 @@ static uint64_t ElfSize(const void *emi) return total_size; } -hipError_t hipModuleLoad(hipModule_t *module, const char *fname) +hipError_t hipModuleLoad(hipModule_t* module, const char* fname) { HIP_INIT_API(module, fname); @@ -70,27 +70,38 @@ hipError_t hipModuleLoad(hipModule_t *module, const char *fname) HIP_RETURN(ihipModuleLoadData(module, tmp.data())); } -bool ihipModuleUnregisterGlobal(hipModule_t hmod) { - PlatformState::instance().unregisterVar(hmod); +bool ihipModuleUnload(std::vector< std::pair >* modules) { + + if (modules == nullptr) { + return false; + } + + PlatformState::instance().unregisterVar(modules); + + std::for_each(modules->begin(), modules->end(), [](std::pair module) { + if (module.first != nullptr) { + as_amd(reinterpret_cast(module.first))->release(); + } + }); + + PlatformState::instance().unregisterMod((*modules)[0].first); + delete modules; return true; } -hipError_t hipModuleUnload(hipModule_t hmod) -{ +hipError_t hipModuleUnload(hipModule_t hmod) { HIP_INIT_API(hmod); - if (hmod == nullptr) { + std::vector< std::pair >* modules + = PlatformState::instance().findModules(hmod); + if (modules == nullptr) { HIP_RETURN(hipErrorUnknown); } - amd::Program* program = as_amd(reinterpret_cast(hmod)); - - if(!ihipModuleUnregisterGlobal(hmod)) { + if (!ihipModuleUnload(modules)) { HIP_RETURN(hipErrorUnknown); } - program->release(); - HIP_RETURN(hipSuccess); } @@ -105,57 +116,74 @@ extern bool __hipExtractCodeObjectFromFatBinary(const void* data, const std::vector& devices, std::vector>& code_objs); -bool ihipModuleRegisterGlobal(amd::Program* program, hipModule_t* module) { +bool ihipModuleRegisterGlobal(amd::Program* program, int device_id, + std::vector< std::pair >* modules) { - size_t var_size = 0; - hipDeviceptr_t device_ptr = nullptr; std::vector var_names; device::Program* dev_program - = program->getDeviceProgram(*hip::getCurrentContext()->devices()[0]); + = program->getDeviceProgram(*(g_devices[device_id]->devices()[0])); if (!dev_program->getGlobalVarFromCodeObj(&var_names)) { return false; } for (auto it = var_names.begin(); it != var_names.end(); ++it) { - auto modules = new std::vector >{g_devices.size()}; - for (size_t dev = 0; dev < g_devices.size(); ++dev) { - modules->at(dev) = std::make_pair(*module, false); - } - PlatformState::DeviceVar dvar{nullptr, it->c_str(), 0, modules, - std::vector{ g_devices.size()}}; + std::vector{ g_devices.size()} }; PlatformState::instance().registerVar(it->c_str(), dvar); } return true; } -hipError_t ihipModuleLoadData(hipModule_t *module, const void *image) -{ +std::vector< std::pair >* ihipModuleLoadModule(const void* image) { + std::vector devices; std::vector> code_objs; - if (__hipExtractCodeObjectFromFatBinary(image, {hip::getCurrentContext()->devices()[0]->info().name_}, code_objs)) - image = code_objs[0].first; - - amd::Program* program = new amd::Program(*hip::getCurrentContext()); - if (program == NULL) { - return hipErrorOutOfMemory; + for (size_t dev = 0; dev < g_devices.size(); ++dev) { + amd::Context* ctx = g_devices[dev]; + devices.push_back(ctx->devices()[0]->info().name_); } - program->setVarInfoCallBack(&getSvarInfo); - - if (CL_SUCCESS != program->addDeviceProgram(*hip::getCurrentContext()->devices()[0], image, ElfSize(image)) || - CL_SUCCESS != program->build(hip::getCurrentContext()->devices(), nullptr, nullptr, nullptr)) { - return hipErrorUnknown; + if (!__hipExtractCodeObjectFromFatBinary(image, devices, code_objs)) { + return nullptr; } - *module = reinterpret_cast(as_cl(program)); - - if (!ihipModuleRegisterGlobal(program, module)) { - return hipErrorUnknown; + auto programs = new std::vector< std::pair >{g_devices.size()}; + for (size_t dev = 0; dev < g_devices.size(); ++dev) { + amd::Context* ctx = g_devices[dev]; + amd::Program* program = new amd::Program(*ctx); + if (program == nullptr) { + return nullptr; + } + if (CL_SUCCESS == program->addDeviceProgram(*ctx->devices()[0], code_objs[dev].first, code_objs[dev].second)) { + programs->at(dev) = std::make_pair(reinterpret_cast(as_cl(program)), false); + } } + PlatformState::DeviceModules dmod { programs }; + PlatformState::instance().registerMod((*dmod.modules)[0].first, dmod); + + return programs; +} + +hipError_t ihipModuleLoadData(hipModule_t* module, const void* image) { + std::vector< std::pair >* modules = ihipModuleLoadModule(image); + + for (size_t deviceId = 0; deviceId < modules->size(); ++deviceId) { + amd::Program* program = as_amd(reinterpret_cast((*modules)[deviceId].first)); + program->setVarInfoCallBack(&getSvarInfo); + program->build(g_devices[deviceId]->devices(), nullptr, nullptr, nullptr); + (*modules)[deviceId].second = true; + + if (static_cast(ihipGetDevice()) == deviceId) { + *module = reinterpret_cast(as_cl(program)); + } + + if (!ihipModuleRegisterGlobal(program, deviceId, modules)) { + return hipErrorUnknown; + } + } return hipSuccess; } diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index e05bae5c68..45f564f00d 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -139,39 +139,15 @@ extern "C" std::vector< std::pair >* __hipRegisterFatBinary(c return nullptr; } - std::vector devices; - std::vector> code_objs; - for (size_t dev = 0; dev < g_devices.size(); ++dev) { - amd::Context* ctx = g_devices[dev]; - devices.push_back(ctx->devices()[0]->info().name_); - } - - if (!__hipExtractCodeObjectFromFatBinary((char*)fbwrapper->binary, devices, code_objs)) { - return nullptr; - } - - auto programs = new std::vector< std::pair >{g_devices.size()}; - for (size_t dev = 0; dev < g_devices.size(); ++dev) { - amd::Context* ctx = g_devices[dev]; - amd::Program* program = new amd::Program(*ctx); - if (program == nullptr) { - return nullptr; - } - if (CL_SUCCESS == program->addDeviceProgram(*ctx->devices()[0], code_objs[dev].first, code_objs[dev].second)) { - programs->at(dev) = std::make_pair(reinterpret_cast(as_cl(program)) , false); - } - } - - return programs; + return ihipModuleLoadModule(fbwrapper->binary); } -void PlatformState::unregisterVar(hipModule_t hmod) { +void PlatformState::unregisterVar(std::vector< std::pair >* modules) { amd::ScopedLock lock(lock_); auto it = vars_.begin(); while (it != vars_.end()) { DeviceVar& dvar = it->second; - if ((*dvar.modules)[0].first == hmod) { - delete dvar.modules; + if (dvar.modules == modules) { vars_.erase(it++); } else { ++it; @@ -179,6 +155,28 @@ void PlatformState::unregisterVar(hipModule_t hmod) { } } +void PlatformState::registerMod(hipModule_t hmod, const DeviceModules& rmod) { + amd::ScopedLock lock(lock_); + mods_.insert(std::make_pair(hmod, rmod)); +} + +void PlatformState::unregisterMod(hipModule_t hmod) { + amd::ScopedLock lock(lock_); + auto it = mods_.find(hmod); + if (it != mods_.cend()) { + mods_.erase(it); + } +} + +std::vector< std::pair< hipModule_t, bool > >* PlatformState::findModules(hipModule_t hmod) { + amd::ScopedLock lock(lock_); + const auto it = mods_.find(hmod); + if (it == mods_.cend()) { + return nullptr; + } + return it->second.modules; +} + void PlatformState::registerVar(const void* hostvar, const DeviceVar& rvar) { amd::ScopedLock lock(lock_); @@ -387,12 +385,7 @@ extern "C" void __hipUnregisterFatBinary(std::vector< std::pairbegin(), modules->end(), [](std::pair module){ - if (module.first != nullptr) { - as_amd(reinterpret_cast(module.first))->release(); - } - }); - delete modules; + ihipModuleUnload(modules); } extern "C" hipError_t hipConfigureCall( From ea22166353290a3e481760a0fcfd820875260d0d Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 22 Aug 2019 12:41:42 -0400 Subject: [PATCH 185/282] P4 to Git Change 1987529 by kjayapra@4_HIPWS_RCCM_ROCM on 2019/08/22 12:32:42 SWDEV-200625 - Reverting back CL#1986255. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#38 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#35 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#39 edit --- api/hip/hip_internal.hpp | 14 +----- api/hip/hip_module.cpp | 102 ++++++++++++++------------------------- api/hip/hip_platform.cpp | 59 ++++++++++++---------- 3 files changed, 71 insertions(+), 104 deletions(-) diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index 7a19813894..0df562065c 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -129,10 +129,6 @@ public: amd::Memory* amd_mem_obj_; }; - struct DeviceModules { - std::vector< std::pair< hipModule_t, bool > >* modules; - }; - struct DeviceFunction { std::string deviceName; std::vector< std::pair< hipModule_t, bool > >* modules; @@ -146,7 +142,6 @@ public: std::vector rvars; }; private: - std::unordered_mapmods_; std::unordered_map functions_; std::unordered_map vars_; @@ -159,11 +154,7 @@ public: return *platform_; } - void registerMod(hipModule_t hmod, const DeviceModules& dmod); - void unregisterMod(hipModule_t hmod); - std::vector< std::pair< hipModule_t, bool > >* findModules(hipModule_t hmod); - - void unregisterVar(std::vector< std::pair >* modules); + void unregisterVar(hipModule_t hmod); void registerVar(const void* hostvar, const DeviceVar& var); void registerFunction(const void* hostFunction, const DeviceFunction& func); @@ -179,9 +170,6 @@ public: void popExec(ihipExec_t& exec); }; -std::vector< std::pair >* ihipModuleLoadModule(const void* image); -bool ihipModuleUnload(std::vector< std::pair >* modules); - extern std::vector g_devices; extern hipError_t ihipDeviceGetCount(int* count); extern int ihipGetDevice(); diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index 1a18e1b2dd..3d73891ca3 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -70,38 +70,27 @@ hipError_t hipModuleLoad(hipModule_t* module, const char* fname) HIP_RETURN(ihipModuleLoadData(module, tmp.data())); } -bool ihipModuleUnload(std::vector< std::pair >* modules) { - - if (modules == nullptr) { - return false; - } - - PlatformState::instance().unregisterVar(modules); - - std::for_each(modules->begin(), modules->end(), [](std::pair module) { - if (module.first != nullptr) { - as_amd(reinterpret_cast(module.first))->release(); - } - }); - - PlatformState::instance().unregisterMod((*modules)[0].first); - delete modules; +bool ihipModuleUnregisterGlobal(hipModule_t hmod) { + PlatformState::instance().unregisterVar(hmod); return true; } -hipError_t hipModuleUnload(hipModule_t hmod) { +hipError_t hipModuleUnload(hipModule_t hmod) +{ HIP_INIT_API(hmod); - std::vector< std::pair >* modules - = PlatformState::instance().findModules(hmod); - if (modules == nullptr) { + if (hmod == nullptr) { HIP_RETURN(hipErrorUnknown); } - if (!ihipModuleUnload(modules)) { + amd::Program* program = as_amd(reinterpret_cast(hmod)); + + if(!ihipModuleUnregisterGlobal(hmod)) { HIP_RETURN(hipErrorUnknown); } + program->release(); + HIP_RETURN(hipSuccess); } @@ -116,19 +105,25 @@ extern bool __hipExtractCodeObjectFromFatBinary(const void* data, const std::vector& devices, std::vector>& code_objs); -bool ihipModuleRegisterGlobal(amd::Program* program, int device_id, - std::vector< std::pair >* modules) { +bool ihipModuleRegisterGlobal(amd::Program* program, hipModule_t* module) { + size_t var_size = 0; + hipDeviceptr_t device_ptr = nullptr; std::vector var_names; device::Program* dev_program - = program->getDeviceProgram(*(g_devices[device_id]->devices()[0])); + = program->getDeviceProgram(*hip::getCurrentContext()->devices()[0]); if (!dev_program->getGlobalVarFromCodeObj(&var_names)) { return false; } for (auto it = var_names.begin(); it != var_names.end(); ++it) { + auto modules = new std::vector >{g_devices.size()}; + for (size_t dev = 0; dev < g_devices.size(); ++dev) { + modules->at(dev) = std::make_pair(*module, false); + } + PlatformState::DeviceVar dvar{nullptr, it->c_str(), 0, modules, std::vector{ g_devices.size()} }; PlatformState::instance().registerVar(it->c_str(), dvar); @@ -137,53 +132,30 @@ bool ihipModuleRegisterGlobal(amd::Program* program, int device_id, return true; } -std::vector< std::pair >* ihipModuleLoadModule(const void* image) { - std::vector devices; +hipError_t ihipModuleLoadData(hipModule_t *module, const void *image) +{ std::vector> code_objs; - for (size_t dev = 0; dev < g_devices.size(); ++dev) { - amd::Context* ctx = g_devices[dev]; - devices.push_back(ctx->devices()[0]->info().name_); + if (__hipExtractCodeObjectFromFatBinary(image, {hip::getCurrentContext()->devices()[0]->info().name_}, code_objs)) + image = code_objs[0].first; + + amd::Program* program = new amd::Program(*hip::getCurrentContext()); + if (program == NULL) { + return hipErrorOutOfMemory; } - if (!__hipExtractCodeObjectFromFatBinary(image, devices, code_objs)) { - return nullptr; - } + program->setVarInfoCallBack(&getSvarInfo); - auto programs = new std::vector< std::pair >{g_devices.size()}; - for (size_t dev = 0; dev < g_devices.size(); ++dev) { - amd::Context* ctx = g_devices[dev]; - amd::Program* program = new amd::Program(*ctx); - if (program == nullptr) { - return nullptr; - } - if (CL_SUCCESS == program->addDeviceProgram(*ctx->devices()[0], code_objs[dev].first, code_objs[dev].second)) { - programs->at(dev) = std::make_pair(reinterpret_cast(as_cl(program)), false); - } - } - - PlatformState::DeviceModules dmod { programs }; - PlatformState::instance().registerMod((*dmod.modules)[0].first, dmod); - - return programs; -} - -hipError_t ihipModuleLoadData(hipModule_t* module, const void* image) { - std::vector< std::pair >* modules = ihipModuleLoadModule(image); - - for (size_t deviceId = 0; deviceId < modules->size(); ++deviceId) { - amd::Program* program = as_amd(reinterpret_cast((*modules)[deviceId].first)); - program->setVarInfoCallBack(&getSvarInfo); - program->build(g_devices[deviceId]->devices(), nullptr, nullptr, nullptr); - (*modules)[deviceId].second = true; - - if (static_cast(ihipGetDevice()) == deviceId) { - *module = reinterpret_cast(as_cl(program)); - } - - if (!ihipModuleRegisterGlobal(program, deviceId, modules)) { + if (CL_SUCCESS != program->addDeviceProgram(*hip::getCurrentContext()->devices()[0], image, ElfSize(image)) || + CL_SUCCESS != program->build(hip::getCurrentContext()->devices(), nullptr, nullptr, nullptr)) { return hipErrorUnknown; - } } + + *module = reinterpret_cast(as_cl(program)); + + if (!ihipModuleRegisterGlobal(program, module)) { + return hipErrorUnknown; + } + return hipSuccess; } diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index 45f564f00d..dbc1b7fba8 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -139,15 +139,39 @@ extern "C" std::vector< std::pair >* __hipRegisterFatBinary(c return nullptr; } - return ihipModuleLoadModule(fbwrapper->binary); + std::vector devices; + std::vector> code_objs; + for (size_t dev = 0; dev < g_devices.size(); ++dev) { + amd::Context* ctx = g_devices[dev]; + devices.push_back(ctx->devices()[0]->info().name_); } -void PlatformState::unregisterVar(std::vector< std::pair >* modules) { + if (!__hipExtractCodeObjectFromFatBinary((char*)fbwrapper->binary, devices, code_objs)) { + return nullptr; + } + + auto programs = new std::vector< std::pair >{g_devices.size()}; + for (size_t dev = 0; dev < g_devices.size(); ++dev) { + amd::Context* ctx = g_devices[dev]; + amd::Program* program = new amd::Program(*ctx); + if (program == nullptr) { + return nullptr; + } + if (CL_SUCCESS == program->addDeviceProgram(*ctx->devices()[0], code_objs[dev].first, code_objs[dev].second)) { + programs->at(dev) = std::make_pair(reinterpret_cast(as_cl(program)) , false); + } + } + + return programs; +} + +void PlatformState::unregisterVar(hipModule_t hmod) { amd::ScopedLock lock(lock_); auto it = vars_.begin(); while (it != vars_.end()) { DeviceVar& dvar = it->second; - if (dvar.modules == modules) { + if ((*dvar.modules)[0].first == hmod) { + delete dvar.modules; vars_.erase(it++); } else { ++it; @@ -155,28 +179,6 @@ void PlatformState::unregisterVar(std::vector< std::pair >* m } } -void PlatformState::registerMod(hipModule_t hmod, const DeviceModules& rmod) { - amd::ScopedLock lock(lock_); - mods_.insert(std::make_pair(hmod, rmod)); -} - -void PlatformState::unregisterMod(hipModule_t hmod) { - amd::ScopedLock lock(lock_); - auto it = mods_.find(hmod); - if (it != mods_.cend()) { - mods_.erase(it); - } -} - -std::vector< std::pair< hipModule_t, bool > >* PlatformState::findModules(hipModule_t hmod) { - amd::ScopedLock lock(lock_); - const auto it = mods_.find(hmod); - if (it == mods_.cend()) { - return nullptr; - } - return it->second.modules; -} - void PlatformState::registerVar(const void* hostvar, const DeviceVar& rvar) { amd::ScopedLock lock(lock_); @@ -385,7 +387,12 @@ extern "C" void __hipUnregisterFatBinary(std::vector< std::pairbegin(), modules->end(), [](std::pair module){ + if (module.first != nullptr) { + as_amd(reinterpret_cast(module.first))->release(); + } + }); + delete modules; } extern "C" hipError_t hipConfigureCall( From 6577ecf5d3ceed24bdba4dccd6615fe408615187 Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 22 Aug 2019 13:26:57 -0400 Subject: [PATCH 186/282] P4 to Git Change 1987582 by cpaquot@cpaquot-ocl-lc-lnx on 2019/08/22 13:20:28 SWDEV-185742 - [HIP] Add a svm path to avoid analyzing the mem objs and just memcpy the kernel args Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#36 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.cpp#82 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.hpp#25 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/command.cpp#96 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/command.hpp#94 edit --- api/hip/hip_module.cpp | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index 3d73891ca3..f1659d496d 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -253,8 +253,6 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, amd::NDRangeContainer ndrange(3, globalWorkOffset, globalWorkSize, localWorkSize); amd::Command::EventWaitList waitList; - address kernargs = nullptr; - // 'extra' is a struct that contains the following info: { // HIP_LAUNCH_PARAM_BUFFER_POINTER, kernargs, // HIP_LAUNCH_PARAM_BUFFER_SIZE, &kernargs_size, @@ -264,19 +262,17 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, extra[2] != HIP_LAUNCH_PARAM_BUFFER_SIZE || extra[4] != HIP_LAUNCH_PARAM_END) { return hipErrorNotInitialized; } - kernargs = reinterpret_cast
(extra[1]); + address kernargs = reinterpret_cast
(extra[1]); + ::memcpy(kernel->parameters().values(), kernargs, *(size_t*)(extra[3])); +// assert(kernel->signature().paramsSize()>=reinterpret_cast(extra[3])); } - const amd::KernelSignature& signature = kernel->signature(); - for (size_t i = 0; i < signature.numParameters(); ++i) { - const amd::KernelParameterDescriptor& desc = signature.at(i); - if (kernelParams == nullptr) { - assert(kernargs != nullptr); - kernel->parameters().set(i, desc.size_, kernargs + desc.offset_, - desc.type_ == T_POINTER/*svmBound*/); - } else { + if (kernelParams != nullptr) { + const amd::KernelSignature& signature = kernel->signature(); + for (size_t i = 0; i < signature.numParameters(); ++i) { + const amd::KernelParameterDescriptor& desc = signature.at(i); assert(extra == nullptr); - kernel->parameters().set(i, desc.size_, kernelParams[i], desc.type_ == T_POINTER/*svmBound*/); + kernel->parameters().set(i, desc.size_, kernelParams[i], desc.type_ == T_POINTER); } } @@ -287,7 +283,7 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, } amd::NDRangeKernelCommand* command = new amd::NDRangeKernelCommand( - *queue, waitList, *kernel, ndrange, sharedMemBytes, params); + *queue, waitList, *kernel, ndrange, sharedMemBytes, params, true); if (!command) { return hipErrorOutOfMemory; } From 373d64478c3f4556ef1152165913648dd4724928 Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 22 Aug 2019 13:48:47 -0400 Subject: [PATCH 187/282] P4 to Git Change 1987598 by gandryey@gera-win10 on 2019/08/22 13:29:18 SWDEV-197289 - VDI tracing API integration in rocTracer - Add profiling activity callbacks for HIP API. The logic will utilize the current profiling mechanism inside VDI Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_activity.cpp#1 add ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.def.in#19 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.map.in#21 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#246 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#341 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#426 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.hpp#145 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#148 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.hpp#64 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.cpp#83 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.hpp#26 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/activity.cpp#1 add ... //depot/stg/opencl/drivers/opencl/runtime/platform/activity.hpp#1 add ... //depot/stg/opencl/drivers/opencl/runtime/platform/command.cpp#97 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/command.hpp#95 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/prof_protocol.h#1 add --- api/hip/hip_activity.cpp | 35 +++++++++++++++++++++++++++++++++++ api/hip/hip_hcc.def.in | 3 +++ api/hip/hip_hcc.map.in | 3 +++ 3 files changed, 41 insertions(+) create mode 100644 api/hip/hip_activity.cpp diff --git a/api/hip/hip_activity.cpp b/api/hip/hip_activity.cpp new file mode 100644 index 0000000000..00a208270a --- /dev/null +++ b/api/hip/hip_activity.cpp @@ -0,0 +1,35 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + + +#include "platform/activity.hpp" + +extern "C" void InitActivityCallback(void* id_callback, void* op_callback, void* arg) { + activity_prof::CallbacksTable::init(reinterpret_cast(id_callback), + reinterpret_cast(op_callback), + arg); +} + +extern "C" bool EnableActivityCallback(unsigned op, bool enable) { + return activity_prof::CallbacksTable::SetEnabled(op, enable); +} + +extern "C" const char* GetCmdName(unsigned op) { + return getOclCommandKindString(static_cast(op)); +} diff --git a/api/hip/hip_hcc.def.in b/api/hip/hip_hcc.def.in index 9acf1b1314..46e17da04a 100644 --- a/api/hip/hip_hcc.def.in +++ b/api/hip/hip_hcc.def.in @@ -172,3 +172,6 @@ hipHccGetAccelerator hipHccGetAcceleratorView hipCreateSurfaceObject hipDestroySurfaceObject +InitActivityCallback; +EnableActivityCallback; +GetCmdName; diff --git a/api/hip/hip_hcc.map.in b/api/hip/hip_hcc.map.in index 2af53915bc..6e21de7978 100644 --- a/api/hip/hip_hcc.map.in +++ b/api/hip/hip_hcc.map.in @@ -180,6 +180,9 @@ global: hipDestroySurfaceObject*; hipHccModuleLaunchKernel*; hipExtModuleLaunchKernel*; + InitActivityCallback; + EnableActivityCallback; + GetCmdName; }; local: *; From ffd74944fe98ae193c2aaf123342d368fffb9cea Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 23 Aug 2019 12:15:29 -0400 Subject: [PATCH 188/282] P4 to Git Change 1988234 by kjayapra@3_HIPWS_TXT_ROCM on 2019/08/23 12:12:45 SWDEV-201068 - Reverting back CL 1987582. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#37 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.cpp#84 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.hpp#27 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/command.cpp#98 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/command.hpp#96 edit --- api/hip/hip_module.cpp | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index f1659d496d..87106a73e6 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -253,6 +253,8 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, amd::NDRangeContainer ndrange(3, globalWorkOffset, globalWorkSize, localWorkSize); amd::Command::EventWaitList waitList; + address kernargs = nullptr; + // 'extra' is a struct that contains the following info: { // HIP_LAUNCH_PARAM_BUFFER_POINTER, kernargs, // HIP_LAUNCH_PARAM_BUFFER_SIZE, &kernargs_size, @@ -262,17 +264,19 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, extra[2] != HIP_LAUNCH_PARAM_BUFFER_SIZE || extra[4] != HIP_LAUNCH_PARAM_END) { return hipErrorNotInitialized; } - address kernargs = reinterpret_cast
(extra[1]); - ::memcpy(kernel->parameters().values(), kernargs, *(size_t*)(extra[3])); -// assert(kernel->signature().paramsSize()>=reinterpret_cast(extra[3])); + kernargs = reinterpret_cast
(extra[1]); } - if (kernelParams != nullptr) { const amd::KernelSignature& signature = kernel->signature(); for (size_t i = 0; i < signature.numParameters(); ++i) { const amd::KernelParameterDescriptor& desc = signature.at(i); + if (kernelParams == nullptr) { + assert(kernargs != nullptr); + kernel->parameters().set(i, desc.size_, kernargs + desc.offset_, + desc.type_ == T_POINTER/*svmBound*/); + } else { assert(extra == nullptr); - kernel->parameters().set(i, desc.size_, kernelParams[i], desc.type_ == T_POINTER); + kernel->parameters().set(i, desc.size_, kernelParams[i], desc.type_ == T_POINTER/*svmBound*/); } } @@ -283,7 +287,7 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, } amd::NDRangeKernelCommand* command = new amd::NDRangeKernelCommand( - *queue, waitList, *kernel, ndrange, sharedMemBytes, params, true); + *queue, waitList, *kernel, ndrange, sharedMemBytes, params); if (!command) { return hipErrorOutOfMemory; } From d3bbe46d39557a1dc14d576d33112eb26c70932f Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 23 Aug 2019 19:46:27 -0400 Subject: [PATCH 189/282] P4 to Git Change 1988496 by cpaquot@cpaquot-ocl-lc-lnx on 2019/08/23 19:43:16 SWDEV-199002 - [HIP] Add hipExtLaunchMultiKernelMultiDevice to in files. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.def.in#20 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.map.in#22 edit --- api/hip/hip_hcc.def.in | 1 + api/hip/hip_hcc.map.in | 1 + 2 files changed, 2 insertions(+) diff --git a/api/hip/hip_hcc.def.in b/api/hip/hip_hcc.def.in index 46e17da04a..04e335d970 100644 --- a/api/hip/hip_hcc.def.in +++ b/api/hip/hip_hcc.def.in @@ -48,6 +48,7 @@ hipEventQuery hipEventRecord hipEventSynchronize hipExtGetLinkTypeAndHopCount +hipExtLaunchMultiKernelMultiDevice hipExtMallocWithFlags hipExtModuleLaunchKernel hipFree diff --git a/api/hip/hip_hcc.map.in b/api/hip/hip_hcc.map.in index 6e21de7978..25924c5362 100644 --- a/api/hip/hip_hcc.map.in +++ b/api/hip/hip_hcc.map.in @@ -49,6 +49,7 @@ global: hipEventRecord; hipEventSynchronize; hipExtGetLinkTypeAndHopCount; + hipExtLaunchMultiKernelMultiDevice; hipExtMallocWithFlags; hipExtModuleLaunchKernel; hipFree; From 3cb2dd541a922498daf60185ec94eddc52e74e1e Mon Sep 17 00:00:00 2001 From: foreman Date: Mon, 26 Aug 2019 14:32:48 -0400 Subject: [PATCH 190/282] P4 to Git Change 1989089 by kjayapra@0_HIPWS_P2P_ROCM on 2019/08/26 14:24:21 SWDEV-198194 - Delete vars at __hipUnregisterFatBinary. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#39 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#38 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#40 edit --- api/hip/hip_internal.hpp | 2 +- api/hip/hip_module.cpp | 6 +++++- api/hip/hip_platform.cpp | 7 +++++-- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index 0df562065c..49b3757dda 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -154,7 +154,7 @@ public: return *platform_; } - void unregisterVar(hipModule_t hmod); + std::vector< std::pair >* unregisterVar(hipModule_t hmod); void registerVar(const void* hostvar, const DeviceVar& var); void registerFunction(const void* hostFunction, const DeviceFunction& func); diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index 87106a73e6..1f4a2defe9 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -71,7 +71,11 @@ hipError_t hipModuleLoad(hipModule_t* module, const char* fname) } bool ihipModuleUnregisterGlobal(hipModule_t hmod) { - PlatformState::instance().unregisterVar(hmod); + std::vector< std::pair >* modules = + PlatformState::instance().unregisterVar(hmod); + if (modules != nullptr) { + delete modules; + } return true; } diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index dbc1b7fba8..ee05e7c52a 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -165,18 +165,20 @@ extern "C" std::vector< std::pair >* __hipRegisterFatBinary(c return programs; } -void PlatformState::unregisterVar(hipModule_t hmod) { +std::vector< std::pair >* PlatformState::unregisterVar(hipModule_t hmod) { amd::ScopedLock lock(lock_); + std::vector< std::pair >* rmodules = nullptr; auto it = vars_.begin(); while (it != vars_.end()) { DeviceVar& dvar = it->second; if ((*dvar.modules)[0].first == hmod) { - delete dvar.modules; + rmodules = dvar.modules; vars_.erase(it++); } else { ++it; } } + return rmodules; } void PlatformState::registerVar(const void* hostvar, @@ -392,6 +394,7 @@ extern "C" void __hipUnregisterFatBinary(std::vector< std::pair(module.first))->release(); } }); + PlatformState::instance().unregisterVar((*modules)[0].first); delete modules; } From b05d95cb8e9c6eb997a62740a1d70ae2e88caed2 Mon Sep 17 00:00:00 2001 From: foreman Date: Mon, 26 Aug 2019 15:44:58 -0400 Subject: [PATCH 191/282] P4 to Git Change 1989149 by kjayapra@0_HIPWS_P2P_ROCM on 2019/08/26 15:30:45 SWDEV-188177 - Adding ihipTexture APIs to hip_hcc.def.in Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.def.in#21 edit --- api/hip/hip_hcc.def.in | 2 ++ 1 file changed, 2 insertions(+) diff --git a/api/hip/hip_hcc.def.in b/api/hip/hip_hcc.def.in index 04e335d970..8bb7f41083 100644 --- a/api/hip/hip_hcc.def.in +++ b/api/hip/hip_hcc.def.in @@ -166,6 +166,8 @@ hipTexRefSetAddressMode hipTexRefSetArray hipTexRefSetAddress hipTexRefSetAddress2D +ihipBindTextureImpl +ihipBindTextureToArrayImpl hipCreateChannelDesc hipProfilerStart hipProfilerStop From dd07e24c2b3379303f2b55ba98cf1d74d335ac36 Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 27 Aug 2019 20:26:25 -0400 Subject: [PATCH 192/282] P4 to Git Change 1990032 by cpaquot@cpaquot-ocl-lc-lnx on 2019/08/27 20:20:36 SWDEV-145570 - [HIP] Add missing HIP_INIT_API macros and more debug printf. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#39 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_stream.cpp#23 edit --- api/hip/hip_module.cpp | 4 ++++ api/hip/hip_stream.cpp | 2 ++ 2 files changed, 6 insertions(+) diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index 1f4a2defe9..c29fec995c 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -414,6 +414,8 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, int numDevices, unsigned int flags) { + HIP_INIT_API(launchParamsList, numDevices, flags); + return ihipLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags, (amd::NDRangeKernelCommand::CooperativeGroups | amd::NDRangeKernelCommand::CooperativeMultiDeviceGroups)); @@ -421,6 +423,8 @@ hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsLi hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, int numDevices, unsigned int flags) { + HIP_INIT_API(launchParamsList, numDevices, flags); + return ihipLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags, 0); } diff --git a/api/hip/hip_stream.cpp b/api/hip/hip_stream.cpp index 5dfe6f2db0..bc681d8a7e 100644 --- a/api/hip/hip_stream.cpp +++ b/api/hip/hip_stream.cpp @@ -114,6 +114,8 @@ static hipError_t ihipStreamCreate(hipStream_t *stream, unsigned int flags, amd: *stream = reinterpret_cast(hStream); + LogPrintfInfo("ihipStreamCreate: %zx", hStream); + return hipSuccess; } From d484ac0098851be4386d4df6779f3b104097988b Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 29 Aug 2019 17:35:38 -0400 Subject: [PATCH 193/282] P4 to Git Change 1991227 by cpaquot@cpaquot-ocl-lc-lnx on 2019/08/29 17:26:37 SWDEV-193938 - [HIP] Use fine grained pool for hipHostRegister Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#69 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.hpp#40 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocmemory.cpp#43 edit --- api/hip/hip_memory.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index d00400c88b..0d9b1bec06 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -570,7 +570,7 @@ hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) { hipError_t hipHostRegister(void* hostPtr, size_t sizeBytes, unsigned int flags) { HIP_INIT_API(hostPtr, sizeBytes, flags); if(hostPtr != nullptr) { - amd::Memory* mem = new (*hip::host_context) amd::Buffer(*hip::host_context, CL_MEM_USE_HOST_PTR, sizeBytes); + amd::Memory* mem = new (*hip::host_context) amd::Buffer(*hip::host_context, CL_MEM_USE_HOST_PTR | CL_MEM_SVM_ATOMICS, sizeBytes); constexpr bool sysMemAlloc = false; constexpr bool skipAlloc = false; From 6b675147245ee7e790e26a22bc115503ef0ce33a Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 4 Sep 2019 17:16:03 -0400 Subject: [PATCH 194/282] P4 to Git Change 1993529 by skudchad@skudchad_rocm on 2019/09/04 17:08:05 SWDEV-199293 - Initial API checkin and sym export defines Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.def.in#22 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.map.in#23 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_rtc.cpp#1 add --- api/hip/hip_hcc.def.in | 16 ++++-- api/hip/hip_hcc.map.in | 25 +++++++-- api/hip/hip_rtc.cpp | 112 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 147 insertions(+), 6 deletions(-) create mode 100644 api/hip/hip_rtc.cpp diff --git a/api/hip/hip_hcc.def.in b/api/hip/hip_hcc.def.in index 8bb7f41083..6913c777a6 100644 --- a/api/hip/hip_hcc.def.in +++ b/api/hip/hip_hcc.def.in @@ -175,6 +175,16 @@ hipHccGetAccelerator hipHccGetAcceleratorView hipCreateSurfaceObject hipDestroySurfaceObject -InitActivityCallback; -EnableActivityCallback; -GetCmdName; +InitActivityCallback +EnableActivityCallback +GetCmdName +hiprtcAddNameExpression +hiprtcCompileProgram +hiprtcCreateProgram +hiprtcDestroyProgram +hiprtcGetLoweredName +hiprtcGetProgramLog +hiprtcGetProgramLogSize +hiprtcGetCode +hiprtcGetCodeSize +hiprtcGetErrorString diff --git a/api/hip/hip_hcc.map.in b/api/hip/hip_hcc.map.in index 25924c5362..0933b51528 100644 --- a/api/hip/hip_hcc.map.in +++ b/api/hip/hip_hcc.map.in @@ -150,6 +150,15 @@ global: hipLaunchByPtr; hipProfilerStart; hipProfilerStop; + hiprtcCompileProgram; + hiprtcCreateProgram; + hiprtcDestroyProgram; + hiprtcGetLoweredName; + hiprtcGetProgramLog; + hiprtcGetProgramLogSize; + hiprtcGetCode; + hiprtcGetCodeSize; + hiprtcGetErrorString; extern "C++" { hip_impl::hipLaunchKernelGGLImpl*; hipCreateTextureObject*; @@ -181,9 +190,19 @@ global: hipDestroySurfaceObject*; hipHccModuleLaunchKernel*; hipExtModuleLaunchKernel*; - InitActivityCallback; - EnableActivityCallback; - GetCmdName; + InitActivityCallback*; + EnableActivityCallback*; + GetCmdName*; + hiprtcAddNameExpression*; + hiprtcCompileProgram*; + hiprtcCreateProgram*; + hiprtcDestroyProgram*; + hiprtcGetLoweredName*; + hiprtcGetProgramLog*; + hiprtcGetProgramLogSize*; + hiprtcGetCode*; + hiprtcGetCodeSize*; + hiprtcGetErrorString*; }; local: *; diff --git a/api/hip/hip_rtc.cpp b/api/hip/hip_rtc.cpp new file mode 100644 index 0000000000..a5dbd097ae --- /dev/null +++ b/api/hip/hip_rtc.cpp @@ -0,0 +1,112 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + +const char* hiprtcGetErrorString(hiprtcResult x) { + switch (x) { + case HIPRTC_SUCCESS: + return "HIPRTC_SUCCESS"; + case HIPRTC_ERROR_OUT_OF_MEMORY: + return "HIPRTC_ERROR_OUT_OF_MEMORY"; + case HIPRTC_ERROR_PROGRAM_CREATION_FAILURE: + return "HIPRTC_ERROR_PROGRAM_CREATION_FAILURE"; + case HIPRTC_ERROR_INVALID_INPUT: + return "HIPRTC_ERROR_INVALID_INPUT"; + case HIPRTC_ERROR_INVALID_PROGRAM: + return "HIPRTC_ERROR_INVALID_PROGRAM"; + case HIPRTC_ERROR_INVALID_OPTION: + return "HIPRTC_ERROR_INVALID_OPTION"; + case HIPRTC_ERROR_COMPILATION: + return "HIPRTC_ERROR_COMPILATION"; + case HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE: + return "HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE"; + case HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION: + return "HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION"; + case HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION: + return "HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION"; + case HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID: + return "HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID"; + case HIPRTC_ERROR_INTERNAL_ERROR: + return "HIPRTC_ERROR_INTERNAL_ERROR"; + default: + throw std::logic_error{"Invalid HIPRTC result."}; + }; +} + +hiprtcResult hiprtcAddNameExpression(hiprtcProgram p, const char* n) { + return HIPRTC_SUCCESS; +} + +hiprtcResult hiprtcCompileProgram(hiprtcProgram p, int n, const char** o) +{ + return HIPRTC_SUCCESS; +} + +hiprtcResult hiprtcCreateProgram(hiprtcProgram* p, const char* src, + const char* name, int n, const char** hdrs, + const char** incs) { + if (p == nullptr) { + return HIPRTC_ERROR_INVALID_PROGRAM; + } + if (n < 0) { + return HIPRTC_ERROR_INVALID_INPUT; + } + if (n && (hdrs == nullptr || incs == nullptr)) { + return HIPRTC_ERROR_INVALID_INPUT; + } + + return HIPRTC_SUCCESS; +} + +hiprtcResult hiprtcDestroyProgram(hiprtcProgram* p) +{ + return HIPRTC_SUCCESS; +} + +hiprtcResult hiprtcGetLoweredName(hiprtcProgram p, const char* n, + const char** ln) +{ + return HIPRTC_SUCCESS; +} + +hiprtcResult hiprtcGetProgramLog(hiprtcProgram p, char* l) +{ + return HIPRTC_SUCCESS; +} + +hiprtcResult hiprtcGetProgramLogSize(hiprtcProgram p, std::size_t* sz) +{ + return HIPRTC_SUCCESS; +} + +hiprtcResult hiprtcGetCode(hiprtcProgram p, char* c) +{ + return HIPRTC_SUCCESS; +} + +hiprtcResult hiprtcGetCodeSize(hiprtcProgram p, std::size_t* sz) +{ + return HIPRTC_SUCCESS; +} \ No newline at end of file From 9b5b4c4761c197828562e5416e1ad24168a8ac49 Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 5 Sep 2019 14:12:32 -0400 Subject: [PATCH 195/282] P4 to Git Change 1994105 by skudchad@skudchad_rocm on 2019/09/05 13:59:26 SWDEV-202136 - Export symbols for hiprtcVersion and hiprtcGetTypeName Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.def.in#23 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.map.in#24 edit --- api/hip/hip_hcc.def.in | 2 ++ api/hip/hip_hcc.map.in | 2 ++ 2 files changed, 4 insertions(+) diff --git a/api/hip/hip_hcc.def.in b/api/hip/hip_hcc.def.in index 6913c777a6..0361055e03 100644 --- a/api/hip/hip_hcc.def.in +++ b/api/hip/hip_hcc.def.in @@ -188,3 +188,5 @@ hiprtcGetProgramLogSize hiprtcGetCode hiprtcGetCodeSize hiprtcGetErrorString +hiprtcVersion; +hiprtcGetTypeName; diff --git a/api/hip/hip_hcc.map.in b/api/hip/hip_hcc.map.in index 0933b51528..b946017e5c 100644 --- a/api/hip/hip_hcc.map.in +++ b/api/hip/hip_hcc.map.in @@ -203,6 +203,8 @@ global: hiprtcGetCode*; hiprtcGetCodeSize*; hiprtcGetErrorString*; + hiprtcVersion*; + hiprtcGetTypeName*; }; local: *; From 485caa06023280272b1f7c8c2ae807a5f5651288 Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 5 Sep 2019 14:30:10 -0400 Subject: [PATCH 196/282] P4 to Git Change 1994119 by axie@axie-hip-pal on 2019/09/05 14:20:44 SWDEV-197057 - [hipclang-vdi-rocm][func] 11_texture_driver test in hip samples fails. Tests: The sample code can be built without linkage error. http://ocltc.amd.com:8111/viewModification.html?modId=125598&personal=true&tab=vcsModificationBuilds Review: http://ocltc.amd.com/reviews/r/17942/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.map.in#25 edit --- api/hip/hip_hcc.map.in | 2 ++ 1 file changed, 2 insertions(+) diff --git a/api/hip/hip_hcc.map.in b/api/hip/hip_hcc.map.in index b946017e5c..50384ec46a 100644 --- a/api/hip/hip_hcc.map.in +++ b/api/hip/hip_hcc.map.in @@ -75,9 +75,11 @@ global: hipMalloc; hipMalloc3D; hipMalloc3DArray; + hipArrayCreate; hipMallocArray; hipMallocPitch; hipMemcpy; + hipMemcpyParam2D; hipMemcpy2D; hipMemcpy2DAsync; hipMemcpy2DToArray; From 3b5927d7fe397e6f8bfefdedee5873d106343fbb Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 5 Sep 2019 14:34:47 -0400 Subject: [PATCH 197/282] P4 to Git Change 1994120 by axie@axie-hip-pal on 2019/09/05 14:22:40 SWDEV-201667 - Linker errors when using hipArrayCreate and hipMemcpyParam2D Tests: In Windows 10, the linker errors has been fixed with this code change. http://ocltc.amd.com:8111/viewModification.html?modId=125600&personal=true&tab=vcsModificationBuilds Review: http://ocltc.amd.com/reviews/r/17943/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.def.in#24 edit --- api/hip/hip_hcc.def.in | 2 ++ 1 file changed, 2 insertions(+) diff --git a/api/hip/hip_hcc.def.in b/api/hip/hip_hcc.def.in index 0361055e03..a39646f675 100644 --- a/api/hip/hip_hcc.def.in +++ b/api/hip/hip_hcc.def.in @@ -74,9 +74,11 @@ hipIpcOpenMemHandle hipMalloc hipMalloc3D hipMalloc3DArray +hipArrayCreate hipMallocArray hipMallocPitch hipMemcpy +hipMemcpyParam2D hipMemcpy2D hipMemcpy2DAsync hipMemcpy2DToArray From ee1b139bd9751caf47c6498ffa80dcbe1d62aa06 Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 5 Sep 2019 16:45:19 -0400 Subject: [PATCH 198/282] P4 to Git Change 1994199 by michliao@hliao-dev-00-hip.rocm-workspace on 2019/09/05 16:35:36 SWDEV-202136 - Fix typos in `hip_hcc.def.in`. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.def.in#25 edit --- api/hip/hip_hcc.def.in | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/hip/hip_hcc.def.in b/api/hip/hip_hcc.def.in index a39646f675..0348b5d68f 100644 --- a/api/hip/hip_hcc.def.in +++ b/api/hip/hip_hcc.def.in @@ -190,5 +190,5 @@ hiprtcGetProgramLogSize hiprtcGetCode hiprtcGetCodeSize hiprtcGetErrorString -hiprtcVersion; -hiprtcGetTypeName; +hiprtcVersion +hiprtcGetTypeName From 5db35686db5b9853b69bc067cc9a105adaf2b882 Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 6 Sep 2019 04:33:24 -0400 Subject: [PATCH 199/282] P4 to Git Change 1994498 by skudchad@skudchad_test2_win_opencl on 2019/09/06 04:25:22 SWDEV-202136 - Fix windows build Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.def.in#26 edit --- api/hip/hip_hcc.def.in | 2 -- 1 file changed, 2 deletions(-) diff --git a/api/hip/hip_hcc.def.in b/api/hip/hip_hcc.def.in index 0348b5d68f..41b71868ae 100644 --- a/api/hip/hip_hcc.def.in +++ b/api/hip/hip_hcc.def.in @@ -190,5 +190,3 @@ hiprtcGetProgramLogSize hiprtcGetCode hiprtcGetCodeSize hiprtcGetErrorString -hiprtcVersion -hiprtcGetTypeName From 8167dee7d5dcce97a0ee877e6aa4227bc06ec411 Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 6 Sep 2019 19:11:43 -0400 Subject: [PATCH 200/282] P4 to Git Change 1994974 by skudchad@skudchad_test2_win_opencl on 2019/09/06 19:09:36 SWDEV-199293 - Implement some hipRTC apis. ReviewBoardURL = http://ocltc.amd.com/reviews/r/17953/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/build/Makefile.hip#18 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_rtc.cpp#2 edit --- api/hip/hip_rtc.cpp | 216 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 188 insertions(+), 28 deletions(-) diff --git a/api/hip/hip_rtc.cpp b/api/hip/hip_rtc.cpp index a5dbd097ae..c339f01401 100644 --- a/api/hip/hip_rtc.cpp +++ b/api/hip/hip_rtc.cpp @@ -21,8 +21,9 @@ THE SOFTWARE. */ #include +#include "cl_common.hpp" #include -#include +#include const char* hiprtcGetErrorString(hiprtcResult x) { switch (x) { @@ -50,23 +51,115 @@ const char* hiprtcGetErrorString(hiprtcResult x) { return "HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID"; case HIPRTC_ERROR_INTERNAL_ERROR: return "HIPRTC_ERROR_INTERNAL_ERROR"; - default: + default: throw std::logic_error{"Invalid HIPRTC result."}; - }; + }; } -hiprtcResult hiprtcAddNameExpression(hiprtcProgram p, const char* n) { - return HIPRTC_SUCCESS; +namespace hip_impl { +inline std::string demangle(const char* x) { + if (!x) { + return {}; + } + return boost::core::demangle(x); } +} // Namespace hip_impl + +struct _hiprtcProgram { + static amd::Monitor lock_; + static std::vector> programs_; + + std::vector> headers; + std::vector> names; + std::vector loweredNames; + std::vector elf; + std::string source; + std::string name; + std::string log; + bool compiled; + + static _hiprtcProgram* build(std::string s, std::string n, + std::vector> h) { + std::unique_ptr<_hiprtcProgram> tmp{ + new _hiprtcProgram{std::move(h), {}, {}, {}, std::move(s), std::move(n), {}, false}}; + + amd::ScopedLock lock(_hiprtcProgram::lock_); + + programs_.push_back(move(tmp)); + + return programs_.back().get(); + } + + static hiprtcResult destroy(_hiprtcProgram* p) { + amd::ScopedLock lock(_hiprtcProgram::lock_); + + const auto it { + std::find_if(programs_.cbegin(), programs_.cend(), + [=](const std::unique_ptr<_hiprtcProgram>& x) + { return x.get() == p; }) }; + + if (it == programs_.cend()) { + return HIPRTC_ERROR_INVALID_PROGRAM; + } -hiprtcResult hiprtcCompileProgram(hiprtcProgram p, int n, const char** o) -{ return HIPRTC_SUCCESS; + } + + static std::string handleMangledName(std::string name) { + name = hip_impl::demangle(name.c_str()); + + if (name.empty()) { + return name; + } + + if (name.find("void ") == 0) { + name.erase(0, strlen("void ")); + } + + auto dx {name.find_first_of("(<")}; + + if (dx == std::string::npos) { + return name; + } + + if (name[dx] == '<') { + auto cnt{1u}; + do { + ++dx; + cnt += (name[dx] == '<') ? 1 : ((name[dx] == '>') ? -1 : 0); + } while (cnt); + + name.erase(++dx); + } else { + name.erase(dx); + } + + return name; + } + + static bool isValid(_hiprtcProgram* p) { + return std::find_if(programs_.cbegin(), programs_.cend(), + [=](const std::unique_ptr<_hiprtcProgram>& x) { + return x.get() == p; }) != programs_.cend(); + } +}; + +// Init +std::vector> _hiprtcProgram::programs_{}; +amd::Monitor _hiprtcProgram::lock_("hiprtcProgram lock"); + +inline bool isValidProgram(const hiprtcProgram p) { + if (p == nullptr) { + return false; + } + + amd::ScopedLock lock(_hiprtcProgram::lock_); + + return _hiprtcProgram::isValid(p); } -hiprtcResult hiprtcCreateProgram(hiprtcProgram* p, const char* src, - const char* name, int n, const char** hdrs, - const char** incs) { +hiprtcResult hiprtcCreateProgram(hiprtcProgram* p, const char* src, const char* name, int n, + const char** hdrs, const char** incs) { if (p == nullptr) { return HIPRTC_ERROR_INVALID_PROGRAM; } @@ -77,36 +170,103 @@ hiprtcResult hiprtcCreateProgram(hiprtcProgram* p, const char* src, return HIPRTC_ERROR_INVALID_INPUT; } + std::vector> h; + + for (auto i = 0; i != n; ++i) { + h.emplace_back(incs[i], hdrs[i]); + } + *p = _hiprtcProgram::build(src, name ? name : "default_name", std::move(h)); + return HIPRTC_SUCCESS; } -hiprtcResult hiprtcDestroyProgram(hiprtcProgram* p) -{ - return HIPRTC_SUCCESS; +hiprtcResult hiprtcAddNameExpression(hiprtcProgram p, const char* n) { + return HIPRTC_SUCCESS; } -hiprtcResult hiprtcGetLoweredName(hiprtcProgram p, const char* n, - const char** ln) -{ - return HIPRTC_SUCCESS; +hiprtcResult hiprtcCompileProgram(hiprtcProgram p, int n, const char** o) { + return HIPRTC_SUCCESS; } -hiprtcResult hiprtcGetProgramLog(hiprtcProgram p, char* l) -{ +hiprtcResult hiprtcDestroyProgram(hiprtcProgram* p) { + if (p == nullptr) { return HIPRTC_SUCCESS; + } + return _hiprtcProgram::destroy(*p); } -hiprtcResult hiprtcGetProgramLogSize(hiprtcProgram p, std::size_t* sz) -{ - return HIPRTC_SUCCESS; +hiprtcResult hiprtcGetLoweredName(hiprtcProgram p, const char* n, const char** ln) { + return HIPRTC_SUCCESS; } -hiprtcResult hiprtcGetCode(hiprtcProgram p, char* c) -{ - return HIPRTC_SUCCESS; +hiprtcResult hiprtcGetProgramLog(hiprtcProgram p, char* log) { + if (log == nullptr) { + return HIPRTC_ERROR_INVALID_INPUT; + } + + if (!isValidProgram(p)) { + return HIPRTC_ERROR_INVALID_PROGRAM; + } + + if (!p->compiled) { + return HIPRTC_ERROR_INVALID_PROGRAM; + } + + log = std::copy_n(p->log.data(), p->log.size(), log); + *log = '\0'; + + return HIPRTC_SUCCESS; } -hiprtcResult hiprtcGetCodeSize(hiprtcProgram p, std::size_t* sz) -{ - return HIPRTC_SUCCESS; +hiprtcResult hiprtcGetProgramLogSize(hiprtcProgram p, std::size_t* sz) { + if (sz == nullptr) { + return HIPRTC_ERROR_INVALID_INPUT; + } + + if (!isValidProgram(p)) { + return HIPRTC_ERROR_INVALID_PROGRAM; + } + + if (!p->compiled) { + return HIPRTC_ERROR_INVALID_PROGRAM; + } + + *sz = p->log.empty() ? 0 : p->log.size() + 1; + return HIPRTC_SUCCESS; +} + +hiprtcResult hiprtcGetCode(hiprtcProgram p, char* c) { + if (c == nullptr) { + return HIPRTC_ERROR_INVALID_INPUT; + } + + if (!isValidProgram(p)) { + return HIPRTC_ERROR_INVALID_PROGRAM; + } + + if (!p->compiled) { + return HIPRTC_ERROR_INVALID_PROGRAM; + } + + std::copy_n(p->elf.data(), p->elf.size(), c); + + return HIPRTC_SUCCESS; +} + +hiprtcResult hiprtcGetCodeSize(hiprtcProgram p, std::size_t* sz) { + if (sz == nullptr) { + return HIPRTC_ERROR_INVALID_INPUT; + } + + if (!isValidProgram(p)) { + return HIPRTC_ERROR_INVALID_PROGRAM; + } + + if (!p->compiled) { + return HIPRTC_ERROR_INVALID_PROGRAM; + } + + *sz = p->elf.size(); + + return HIPRTC_SUCCESS; } \ No newline at end of file From f3e2d275c58a05a5c152abe8003219d07b760914 Mon Sep 17 00:00:00 2001 From: foreman Date: Mon, 9 Sep 2019 14:42:47 -0400 Subject: [PATCH 201/282] P4 to Git Change 1995665 by skudchad@skudchad_test2_win_opencl on 2019/09/09 14:36:57 SWDEV-199293 - Implement hiprtcGetLoweredName ReviewBoardURL = http://ocltc.amd.com/reviews/r/17960/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_rtc.cpp#3 edit --- api/hip/hip_rtc.cpp | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/api/hip/hip_rtc.cpp b/api/hip/hip_rtc.cpp index c339f01401..9928dcdfd2 100644 --- a/api/hip/hip_rtc.cpp +++ b/api/hip/hip_rtc.cpp @@ -93,8 +93,7 @@ struct _hiprtcProgram { static hiprtcResult destroy(_hiprtcProgram* p) { amd::ScopedLock lock(_hiprtcProgram::lock_); - const auto it { - std::find_if(programs_.cbegin(), programs_.cend(), + const auto it{ std::find_if(programs_.cbegin(), programs_.cend(), [=](const std::unique_ptr<_hiprtcProgram>& x) { return x.get() == p; }) }; @@ -195,7 +194,29 @@ hiprtcResult hiprtcDestroyProgram(hiprtcProgram* p) { return _hiprtcProgram::destroy(*p); } -hiprtcResult hiprtcGetLoweredName(hiprtcProgram p, const char* n, const char** ln) { +hiprtcResult hiprtcGetLoweredName(hiprtcProgram p, const char* n, const char** loweredNames) { + if (n == nullptr || loweredNames == nullptr) { + return HIPRTC_ERROR_INVALID_INPUT; + } + + if (!isValidProgram(p)) { + return HIPRTC_ERROR_INVALID_PROGRAM; + } + + if (!p->compiled) { + return HIPRTC_ERROR_INVALID_PROGRAM; + } + + const auto it{ std::find_if(p->names.cbegin(), p->names.cend(), + [=](const pair& x) + { return x.first == n; })}; + + if (it == p->names.cend()) { + return HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID; + } + + *loweredNames = p->loweredNames[distance(p->names.cbegin(), it)].c_str(); + return HIPRTC_SUCCESS; } From 66a6e703f217e9ad8b0c128d720408dc134f1f47 Mon Sep 17 00:00:00 2001 From: foreman Date: Mon, 9 Sep 2019 14:50:37 -0400 Subject: [PATCH 202/282] P4 to Git Change 1995672 by skudchad@skudchad_rocm on 2019/09/09 14:45:42 SWDEV-199293 - Fix build Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_rtc.cpp#4 edit --- api/hip/hip_rtc.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/hip/hip_rtc.cpp b/api/hip/hip_rtc.cpp index 9928dcdfd2..8446b5112b 100644 --- a/api/hip/hip_rtc.cpp +++ b/api/hip/hip_rtc.cpp @@ -208,7 +208,7 @@ hiprtcResult hiprtcGetLoweredName(hiprtcProgram p, const char* n, const char** l } const auto it{ std::find_if(p->names.cbegin(), p->names.cend(), - [=](const pair& x) + [=](const std::pair& x) { return x.first == n; })}; if (it == p->names.cend()) { From a508c497ec1b91df7822014539e740c84eda372b Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 12 Sep 2019 14:35:36 -0400 Subject: [PATCH 203/282] P4 to Git Change 1997621 by cpaquot@cpaquot-ocl-lc-lnx on 2019/09/12 14:16:09 SWDEV-145570 - Fixed some logging issues regarding hip(Ext)ModuleLaunchXXX. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#40 edit --- api/hip/hip_module.cpp | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index c29fec995c..542cd89551 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -218,11 +218,6 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags = 0, uint32_t params = 0) { - HIP_INIT_API(f, gridDimX, gridDimY, gridDimZ, - blockDimX, blockDimY, blockDimZ, - sharedMemBytes, hStream, - kernelParams, extra, startEvent, stopEvent, flags); - hip::Function* function = hip::Function::asFunction(f); amd::Kernel* kernel = function->function_; amd::Device* device = hip::getCurrentContext()->devices()[0]; @@ -320,6 +315,11 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, uint32_t sharedMemBytes, hipStream_t hStream, void **kernelParams, void **extra) { + HIP_INIT_API(f, gridDimX, gridDimY, gridDimZ, + blockDimX, blockDimY, blockDimZ, + sharedMemBytes, hStream, + kernelParams, extra); + HIP_RETURN(ihipModuleLaunchKernel(f, gridDimX * blockDimX, gridDimY * blockDimY, gridDimZ * blockDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, extra, nullptr, nullptr)); @@ -332,6 +332,11 @@ hipError_t hipExtModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX, hipStream_t hStream, void** kernelParams, void** extra, hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags) { + HIP_INIT_API(f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, + localWorkSizeX, localWorkSizeY, localWorkSizeZ, + sharedMemBytes, hStream, + kernelParams, extra, startEvent, stopEvent, flags); + HIP_RETURN(ihipModuleLaunchKernel(f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, localWorkSizeX, localWorkSizeY, localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent, flags)); } @@ -346,6 +351,11 @@ hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t gridDimX, hipEvent_t startEvent, hipEvent_t stopEvent) { + HIP_INIT_API(f, gridDimX, gridDimY, gridDimZ, + blockDimX, blockDimY, blockDimZ, + sharedMemBytes, hStream, + kernelParams, extra, startEvent, stopEvent); + HIP_RETURN(ihipModuleLaunchKernel(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent)); } @@ -358,6 +368,11 @@ hipError_t hipModuleLaunchKernelExt(hipFunction_t f, uint32_t gridDimX, hipEvent_t startEvent, hipEvent_t stopEvent) { + HIP_INIT_API(f, gridDimX, gridDimY, gridDimZ, + blockDimX, blockDimY, blockDimZ, + sharedMemBytes, hStream, + kernelParams, extra, startEvent, stopEvent); + HIP_RETURN(ihipModuleLaunchKernel(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent)); } @@ -366,6 +381,9 @@ hipError_t hipLaunchCooperativeKernel(const void* f, dim3 gridDim, dim3 blockDim, void **kernelParams, uint32_t sharedMemBytes, hipStream_t hStream) { + HIP_INIT_API(f, gridDim, blockDim, + sharedMemBytes, hStream); + int deviceId = ihipGetDevice(); hipFunction_t func = PlatformState::instance().getFunc(f, deviceId); if (func == nullptr) { From 184ee3f5a05d7fc55776a9b8e95ed3e8672ac7d7 Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 13 Sep 2019 11:28:33 -0400 Subject: [PATCH 204/282] P4 to Git Change 1997981 by cpaquot@cpaquot-ocl-lc-lnx on 2019/09/13 11:17:32 SWDEV-203438 - [HIP] AllGather RCCL test issue The test tries to launch a kernel on two devices at once and they need to communicate with each other. For that, it uses a custom stream for each devices. Problem is in getNullStream we used to call syncStreams all the time and it was syncing all the streams even the ones on different devices. So that made the second kernel launch (on 2n dev) to wait for the first kernel to finish which would never occur since the first one was waiting for the second one. The fix is to not call syncStreams from getNullStream because we sync already anyway prior in general. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_context.cpp#21 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_event.cpp#16 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#40 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#70 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#41 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_stream.cpp#24 edit --- api/hip/hip_context.cpp | 14 +++++++- api/hip/hip_event.cpp | 7 +--- api/hip/hip_internal.hpp | 1 + api/hip/hip_memory.cpp | 70 +++++----------------------------------- api/hip/hip_module.cpp | 13 +++----- api/hip/hip_stream.cpp | 16 ++------- 6 files changed, 30 insertions(+), 91 deletions(-) diff --git a/api/hip/hip_context.cpp b/api/hip/hip_context.cpp index d2c37c1b13..2ec1f9db50 100644 --- a/api/hip/hip_context.cpp +++ b/api/hip/hip_context.cpp @@ -75,6 +75,19 @@ void setCurrentContext(unsigned int index) { g_context = g_devices[index]; } +amd::HostQueue* getQueue(hipStream_t stream) { + if (stream == nullptr) { + syncStreams(); + return getNullStream(); + } else { + hip::Stream* s = reinterpret_cast(stream); + if ((s->flags & hipStreamNonBlocking) == 0) { + getNullStream()->finish(); + } + return s->asHostQueue(); + } +} + amd::HostQueue* getNullStream(amd::Context& context) { auto stream = g_nullStreams.find(&context); if (stream == g_nullStreams.end()) { @@ -86,7 +99,6 @@ amd::HostQueue* getNullStream(amd::Context& context) { g_nullStreams[&context] = queue; return queue; } - syncStreams(); return stream->second; } diff --git a/api/hip/hip_event.cpp b/api/hip/hip_event.cpp index 143d2eded9..81ae93e279 100644 --- a/api/hip/hip_event.cpp +++ b/api/hip/hip_event.cpp @@ -231,12 +231,7 @@ hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) { hip::Event* e = reinterpret_cast(event); - amd::HostQueue* queue; - if (stream == nullptr) { - queue = hip::getNullStream(); - } else { - queue = reinterpret_cast(stream)->asHostQueue(); - } + amd::HostQueue* queue = hip::getQueue(stream); amd::Command* command = queue->getLastQueuedCommand(true); diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index 49b3757dda..7b0ff09ed9 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -72,6 +72,7 @@ namespace hip { extern amd::Context* getCurrentContext(); extern void setCurrentContext(unsigned int index); + extern amd::HostQueue* getQueue(hipStream_t s); extern amd::HostQueue* getNullStream(amd::Context&); extern amd::HostQueue* getNullStream(); extern void syncStreams(); diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 0d9b1bec06..b1f1f40aec 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -95,6 +95,7 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin if (((srcMemory == nullptr) && (dstMemory == nullptr)) || (kind == hipMemcpyHostToHost)) { + queue.finish(); memcpy(dst, src, sizeBytes); return hipSuccess; } else if ((srcMemory == nullptr) && (dstMemory != nullptr)) { @@ -169,17 +170,9 @@ hipError_t ihipMemset(void* dst, int value, size_t valueSize, size_t sizeBytes, } size_t offset = 0; - amd::HostQueue* queue = nullptr; + amd::HostQueue* queue = hip::getQueue(stream); amd::Memory* memory = getMemoryObject(dst, offset); - if (stream == nullptr) { - hip::syncStreams(); - queue = hip::getNullStream(); - } else { - hip::getNullStream()->finish(); - queue = reinterpret_cast(stream)->asHostQueue(); - } - if (memory != nullptr) { // Device memory amd::Command::EventWaitList waitList; @@ -764,15 +757,7 @@ hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream) { HIP_INIT_API(dst, src, sizeBytes, kind, stream); - amd::HostQueue* queue = nullptr; - - if (stream == nullptr) { - hip::syncStreams(); - queue = hip::getNullStream(); - } else { - hip::getNullStream()->finish(); - queue = reinterpret_cast(stream)->asHostQueue(); - } + amd::HostQueue* queue = hip::getQueue(stream); HIP_RETURN(ihipMemcpy(dst, src, sizeBytes, kind, *queue, true)); } @@ -782,15 +767,7 @@ hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, void* src, size_t sizeBytes, hipStream_t stream) { HIP_INIT_API(dst, src, sizeBytes, stream); - amd::HostQueue* queue = nullptr; - - if (stream == nullptr) { - hip::syncStreams(); - queue = hip::getNullStream(); - } else { - hip::getNullStream()->finish(); - queue = reinterpret_cast(stream)->asHostQueue(); - } + amd::HostQueue* queue = hip::getQueue(stream); HIP_RETURN(ihipMemcpy(reinterpret_cast(dst), (const void*) src, sizeBytes, hipMemcpyHostToDevice, *queue, true)); @@ -800,15 +777,7 @@ hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t siz hipStream_t stream) { HIP_INIT_API(dst, src, sizeBytes, stream); - amd::HostQueue* queue = nullptr; - - if (stream == nullptr) { - hip::syncStreams(); - queue = hip::getNullStream(); - } else { - hip::getNullStream()->finish(); - queue = reinterpret_cast(stream)->asHostQueue(); - } + amd::HostQueue* queue = hip::getQueue(stream); HIP_RETURN(ihipMemcpy(reinterpret_cast(dst), (const void*) src, sizeBytes, hipMemcpyDeviceToDevice, *queue, true)); @@ -818,15 +787,7 @@ hipError_t hipMemcpyDtoHAsync(void* dst, hipDeviceptr_t src, size_t sizeBytes, hipStream_t stream) { HIP_INIT_API(dst, src, sizeBytes, stream); - amd::HostQueue* queue = nullptr; - - if (stream == nullptr) { - hip::syncStreams(); - queue = hip::getNullStream(); - } else { - hip::getNullStream()->finish(); - queue = reinterpret_cast(stream)->asHostQueue(); - } + amd::HostQueue* queue = hip::getQueue(stream); HIP_RETURN(ihipMemcpy(reinterpret_cast(dst), (const void*) src, sizeBytes, hipMemcpyDeviceToHost, *queue, true)); @@ -921,15 +882,7 @@ hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t sp size_t height, hipMemcpyKind kind, hipStream_t stream) { HIP_INIT_API(dst, dpitch, src, spitch, width, height, kind, stream); - amd::HostQueue* queue; - - if (stream == nullptr) { - hip::syncStreams(); - queue = hip::getNullStream(); - } else { - hip::getNullStream()->finish(); - queue = reinterpret_cast(stream)->asHostQueue(); - } + amd::HostQueue* queue = hip::getQueue(stream); HIP_RETURN(ihipMemcpy2D(dst, dpitch, src, spitch, width, height, kind, *queue, true)); } @@ -1349,14 +1302,7 @@ hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value, size_t width, size_t height, hipStream_t stream) { HIP_INIT_API(dst, pitch, value, width, height, stream); - amd::HostQueue* queue = nullptr; - if (stream == nullptr) { - hip::syncStreams(); - queue = hip::getNullStream(); - } else { - hip::getNullStream()->finish(); - queue = reinterpret_cast(stream)->asHostQueue(); - } + amd::HostQueue* queue = hip::getQueue(stream); HIP_RETURN(ihipMemset2D(dst, pitch, value, width, height, *queue, true)); } diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index 542cd89551..0cd590886c 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -218,6 +218,9 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags = 0, uint32_t params = 0) { + HIP_INIT_API(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, + sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent, flags, params); + hip::Function* function = hip::Function::asFunction(f); amd::Kernel* kernel = function->function_; amd::Device* device = hip::getCurrentContext()->devices()[0]; @@ -226,14 +229,8 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, hip::Event* eStart = reinterpret_cast(startEvent); hip::Event* eStop = reinterpret_cast(stopEvent); - amd::HostQueue* queue; - if (hStream == nullptr) { - hip::syncStreams(); - queue = hip::getNullStream(); - } else { - hip::getNullStream()->finish(); - queue = reinterpret_cast(hStream)->asHostQueue(); - } + amd::HostQueue* queue = hip::getQueue(hStream); + if ((params & amd::NDRangeKernelCommand::CooperativeGroups) && !device->info().cooperativeGroups_) { return hipErrorLaunchFailure; diff --git a/api/hip/hip_stream.cpp b/api/hip/hip_stream.cpp index bc681d8a7e..ab26b4fc2e 100644 --- a/api/hip/hip_stream.cpp +++ b/api/hip/hip_stream.cpp @@ -173,21 +173,9 @@ hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags) { hipError_t hipStreamSynchronize(hipStream_t stream) { HIP_INIT_API(stream); - amd::HostQueue* hostQueue; + amd::HostQueue* hostQueue = hip::getQueue(stream); + hostQueue->finish(); - if (stream == nullptr) { - hip::syncStreams(); - - hostQueue = hip::getNullStream(); - - hostQueue->finish(); - } else { - hip::getNullStream()->finish(); - - hip::Stream* hStream = reinterpret_cast(stream); - - hStream->finish(); - } HIP_RETURN(hipSuccess); } From d5181ce77a68b4f1dd2d1df6f7b5017de002b95d Mon Sep 17 00:00:00 2001 From: foreman Date: Mon, 16 Sep 2019 17:58:06 -0400 Subject: [PATCH 205/282] P4 to Git Change 1998982 by cpaquot@cpaquot-ocl-lc-lnx on 2019/09/16 17:49:10 SWDEV-145570 - [HIP] Fix regression from new headers. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device.cpp#23 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device_runtime.cpp#18 edit --- api/hip/hip_device.cpp | 5 +++++ api/hip/hip_device_runtime.cpp | 15 +++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/api/hip/hip_device.cpp b/api/hip/hip_device.cpp index 010e46a33b..4dfb447404 100644 --- a/api/hip/hip_device.cpp +++ b/api/hip/hip_device.cpp @@ -208,6 +208,11 @@ hipError_t hipGetDeviceProperties ( hipDeviceProp_t* props, hipDevice_t device ) deviceProps.hdpMemFlushCntl = nullptr; deviceProps.hdpRegFlushCntl = nullptr; + deviceProps.memPitch = info.maxMemAllocSize_; + deviceProps.textureAlignment = std::max(info.imageBaseAddressAlignment_, info.imagePitchAlignment_); + deviceProps.kernelExecTimeoutEnabled = 0; + deviceProps.ECCEnabled = info.errorCorrectionSupport_? 1:0; + *props = deviceProps; HIP_RETURN(hipSuccess); } diff --git a/api/hip/hip_device_runtime.cpp b/api/hip/hip_device_runtime.cpp index a039dced3e..6aa9eae29e 100644 --- a/api/hip/hip_device_runtime.cpp +++ b/api/hip/hip_device_runtime.cpp @@ -265,6 +265,21 @@ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) case hipDeviceAttributeHdpRegFlushCntl: *reinterpret_cast(pi) = prop.hdpRegFlushCntl; break; + case hipDeviceAttributeMaxPitch: + *pi = prop.memPitch; + break; + case hipDeviceAttributeTextureAlignment: + *pi = prop.textureAlignment; + break; + case hipDeviceAttributeKernelExecTimeout: + *pi = prop.kernelExecTimeoutEnabled; + break; + case hipDeviceAttributeCanMapHostMemory: + *pi = prop.canMapHostMemory; + break; + case hipDeviceAttributeEccEnabled: + *pi = prop.ECCEnabled; + break; default: HIP_RETURN(hipErrorInvalidValue); } From 1955dc1f99b5e1c7eaeced22fe4ad45a477c4eb2 Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 17 Sep 2019 19:47:11 -0400 Subject: [PATCH 206/282] P4 to Git Change 1999915 by cpaquot@cpaquot-ocl-lc-lnx on 2019/09/17 19:43:34 SWDEV-201128 - [HIP] test_snli_cuda failure Default to sync packet Make sure GPU_NUM_MEM_DEPENDENCY is 0 for HIP No sync packet is only used when there are mem dependency check Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_context.cpp#22 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.cpp#86 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.hpp#28 edit --- api/hip/hip_context.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/api/hip/hip_context.cpp b/api/hip/hip_context.cpp index 2ec1f9db50..759676e83d 100644 --- a/api/hip/hip_context.cpp +++ b/api/hip/hip_context.cpp @@ -41,6 +41,7 @@ std::map g_nullStreams; void init() { if (!amd::Runtime::initialized()) { amd::IS_HIP = true; + GPU_NUM_MEM_DEPENDENCY = 0; amd::Runtime::init(); } From 0254f99c97b2d22110f37d742f8d3cb1c0064893 Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 18 Sep 2019 14:29:25 -0400 Subject: [PATCH 207/282] P4 to Git Change 2000486 by vsytchen@vsytchen-remote-ocl-win10 on 2019/09/18 14:25:45 SWDEV-201925 - hipArray3DCreate() not available in HIP/PAL on Windows 1. Implement hipArray3DCreate(). 2. Remove the array size calculation from hipArrayCreate() as it is not used. ReviewBoardURL = http://ocltc.amd.com/reviews/r/18005/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#71 edit --- api/hip/hip_memory.cpp | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index b1f1f40aec..00b75af5b1 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -446,11 +446,6 @@ hipError_t hipArrayCreate(hipArray** array, const HIP_ARRAY_DESCRIPTOR* pAllocat &channelOrder, &channelType); const cl_image_format image_format = { channelOrder, channelType }; - size_t size = pAllocateArray->Width; - if (pAllocateArray->Height > 0) { - size = size * pAllocateArray->Height; - } - size_t pitch = 0; hipError_t status = ihipMallocPitch(ptr, &pitch, array[0]->width, array[0]->height, 1, CL_MEM_OBJECT_IMAGE2D, &image_format); @@ -501,6 +496,33 @@ hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc, HIP_RETURN(status); } +hipError_t hipArray3DCreate(hipArray** array, const HIP_ARRAY3D_DESCRIPTOR* pAllocateArray) { + HIP_INIT_API(array, pAllocateArray); + + *array = (hipArray*)malloc(sizeof(hipArray)); + array[0]->type = pAllocateArray->Flags; + array[0]->width = pAllocateArray->Width; + array[0]->height = pAllocateArray->Height; + array[0]->depth = pAllocateArray->Depth; + array[0]->Format = pAllocateArray->Format; + array[0]->NumChannels = pAllocateArray->NumChannels; + array[0]->isDrv = true; + array[0]->textureType = hipTextureType3D; + void** ptr = &array[0]->data; + + cl_channel_order channelOrder; + cl_channel_type channelType; + getDrvChannelOrderAndType(pAllocateArray->Format, pAllocateArray->NumChannels, + &channelOrder, &channelType); + + const cl_image_format image_format = { channelOrder, channelType }; + size_t pitch = 0; + hipError_t status = ihipMallocPitch(ptr, &pitch, array[0]->width, array[0]->height, array[0]->depth, CL_MEM_OBJECT_IMAGE3D, + &image_format); + + HIP_RETURN(status); +} + hipError_t hipMalloc3DArray(hipArray_t* array, const struct hipChannelFormatDesc* desc, struct hipExtent extent, unsigned int flags) { HIP_INIT_API(array, desc, &extent, flags); From ecf4974f79f3606b0f4456ebfe5223dabe318e38 Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 24 Sep 2019 10:57:33 -0400 Subject: [PATCH 208/282] P4 to Git Change 2003196 by kjayapra@0_HIPWS_LNX1_ROCM on 2019/09/24 10:48:45 SWDEV-144570 - Implementing some Texture APIs Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.def.in#27 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.map.in#26 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_texture.cpp#17 edit --- api/hip/hip_hcc.def.in | 4 +++ api/hip/hip_hcc.map.in | 4 +++ api/hip/hip_texture.cpp | 65 +++++++++++++++++++++++++++++++++++++++-- 3 files changed, 71 insertions(+), 2 deletions(-) diff --git a/api/hip/hip_hcc.def.in b/api/hip/hip_hcc.def.in index 41b71868ae..7d359dbf28 100644 --- a/api/hip/hip_hcc.def.in +++ b/api/hip/hip_hcc.def.in @@ -168,6 +168,10 @@ hipTexRefSetAddressMode hipTexRefSetArray hipTexRefSetAddress hipTexRefSetAddress2D +hipTexRefGetAddress +hipTexRefGetAddressMode +hipTexRefGetArray +hipTexRefSetArray ihipBindTextureImpl ihipBindTextureToArrayImpl hipCreateChannelDesc diff --git a/api/hip/hip_hcc.map.in b/api/hip/hip_hcc.map.in index 50384ec46a..afe47e8df1 100644 --- a/api/hip/hip_hcc.map.in +++ b/api/hip/hip_hcc.map.in @@ -183,6 +183,10 @@ global: hipTexRefSetArray*; hipTexRefSetAddress*; hipTexRefSetAddress2D*; + hipTexRefGetAddress*; + hipTexRefGetAddressMode*; + hipTexRefGetArray*; + hipTexRefSetArray*; hipCreateChannelDesc*; ihipBindTextureImpl*; ihipBindTextureToArrayImpl*; diff --git a/api/hip/hip_texture.cpp b/api/hip/hip_texture.cpp index 6e5cc9d668..a8f642c400 100644 --- a/api/hip/hip_texture.cpp +++ b/api/hip/hip_texture.cpp @@ -561,6 +561,18 @@ hipError_t hipTexRefSetFilterMode(textureReference* tex, hipTextureFilterMode fm HIP_RETURN(hipSuccess); } +hipError_t hipTexRefGetAddressMode(hipTextureAddressMode* am, textureReference tex, int dim) { + HIP_INIT_API(am, &tex, dim); + + if ((am == nullptr) || (dim >= 3)) { + HIP_RETURN(hipErrorInvalidValue); + } + + *am = tex.addressMode[dim]; + + HIP_RETURN(hipSuccess); +} + hipError_t hipTexRefSetAddressMode(textureReference* tex, int dim, hipTextureAddressMode am) { HIP_INIT_API(tex, dim, am); @@ -573,12 +585,61 @@ hipError_t hipTexRefSetAddressMode(textureReference* tex, int dim, hipTextureAdd HIP_RETURN(hipSuccess); } +hipError_t hipTexRefGetArray(hipArray* array, textureReference tex) { + HIP_INIT_API(array, &tex); + + hip::TextureObject* texture = nullptr; + + if (array == nullptr) { + HIP_RETURN(hipErrorInvalidImage); + } + + texture = reinterpret_cast(tex.textureObject); + if(hipResourceTypeArray != texture->resDesc.resType){ + HIP_RETURN(hipErrorInvalidValue); + } + + if (texture->resDesc.res.array.array == nullptr) { + HIP_RETURN(hipErrorUnknown); + } + + *array = *(texture->resDesc.res.array.array); + + HIP_RETURN(hipSuccess); +} + hipError_t hipTexRefSetArray(textureReference* tex, hipArray_const_t array, unsigned int flags) { HIP_INIT_API(tex, array, flags); - assert(0 && "Unimplemented"); + size_t offset = 0; - HIP_RETURN(hipErrorUnknown); + if ((tex == nullptr) || (array == nullptr)) { + HIP_RETURN(hipErrorInvalidImage); + } + + HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE1D, &offset, tex, array->data, &array->desc, array->width, + array->height, array->depth)); +} + +hipError_t hipTexRefGetAddress(hipDeviceptr_t* dev_ptr, textureReference tex) { + HIP_INIT_API(dev_ptr, &tex); + + hip::TextureObject* texture = nullptr; + device::Memory* dev_mem = nullptr; + + texture = reinterpret_cast(tex.textureObject); + if ((texture == nullptr) || (texture->image == nullptr)) { + HIP_RETURN(hipErrorInvalidImage); + } + + dev_mem = texture->image->getDeviceMemory(*hip::getCurrentContext()->devices()[0]); + if (dev_mem == nullptr) { + HIP_RETURN(hipErrorInvalidImage); + } + + *dev_ptr = reinterpret_cast(dev_mem->virtualAddress()); + + HIP_RETURN(hipSuccess); } hipError_t hipTexRefSetAddress(size_t* offset, textureReference* tex, hipDeviceptr_t devPtr, From dc0b2a19cf8b209c5d1bcd05844799e0d2951a91 Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 24 Sep 2019 16:58:14 -0400 Subject: [PATCH 209/282] P4 to Git Change 2003445 by yaxunl@yaxunl-lc10 on 2019/09/24 16:56:01 SWDEV-145570 - Add new kernel launching API for hip-clang Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.def.in#28 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.map.in#27 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#41 edit --- api/hip/hip_hcc.def.in | 3 +++ api/hip/hip_hcc.map.in | 3 +++ api/hip/hip_platform.cpp | 51 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 57 insertions(+) diff --git a/api/hip/hip_hcc.def.in b/api/hip/hip_hcc.def.in index 7d359dbf28..e3ddb529a3 100644 --- a/api/hip/hip_hcc.def.in +++ b/api/hip/hip_hcc.def.in @@ -141,6 +141,8 @@ hipStreamGetFlags hipStreamQuery hipStreamSynchronize hipStreamWaitEvent +__hipPopCallConfiguration +__hipPushCallConfiguration __hipRegisterFatBinary __hipRegisterFunction __hipRegisterVar @@ -148,6 +150,7 @@ __hipUnregisterFatBinary hipConfigureCall hipSetupArgument hipLaunchByPtr +hipLaunchKernel hipCreateTextureObject hipDestroyTextureObject hipGetTextureObjectResourceDesc diff --git a/api/hip/hip_hcc.map.in b/api/hip/hip_hcc.map.in index afe47e8df1..d580b8b99b 100644 --- a/api/hip/hip_hcc.map.in +++ b/api/hip/hip_hcc.map.in @@ -141,6 +141,8 @@ global: hipStreamQuery; hipStreamSynchronize; hipStreamWaitEvent; + __hipPopCallConfiguration; + __hipPushCallConfiguration; __hipRegisterFatBinary; __hipRegisterFunction; __hipRegisterVar; @@ -150,6 +152,7 @@ global: hipConfigureCall; hipSetupArgument; hipLaunchByPtr; + hipLaunchKernel; hipProfilerStart; hipProfilerStop; hiprtcCompileProgram; diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index ee05e7c52a..8e3fd32b66 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -411,6 +411,35 @@ extern "C" hipError_t hipConfigureCall( HIP_RETURN(hipSuccess); } +extern "C" hipError_t __hipPushCallConfiguration( + dim3 gridDim, + dim3 blockDim, + size_t sharedMem, + hipStream_t stream) +{ + HIP_INIT_API(gridDim, blockDim, sharedMem, stream); + + PlatformState::instance().configureCall(gridDim, blockDim, sharedMem, stream); + + HIP_RETURN(hipSuccess); +} + +extern "C" hipError_t __hipPopCallConfiguration(dim3 *gridDim, + dim3 *blockDim, + size_t *sharedMem, + hipStream_t *stream) { + HIP_INIT_API(gridDim, blockDim, sharedMem, stream); + + ihipExec_t exec; + PlatformState::instance().popExec(exec); + *gridDim = exec.gridDim_; + *blockDim = exec.blockDim_; + *sharedMem = exec.sharedMem_; + *stream = exec.hStream_; + + HIP_RETURN(hipSuccess); +} + extern "C" hipError_t hipSetupArgument( const void *arg, size_t size, @@ -449,6 +478,28 @@ extern "C" hipError_t hipLaunchByPtr(const void *hostFunction) exec.sharedMem_, exec.hStream_, nullptr, extra)); } +extern "C" hipError_t hipLaunchKernel(const void *hostFunction, + dim3 gridDim, + dim3 blockDim, + void** args, + size_t sharedMemBytes, + hipStream_t stream) +{ + HIP_INIT_API(hostFunction, gridDim, blockDim, args, sharedMemBytes, + stream); + + int deviceId = ihipGetDevice(); + hipFunction_t func = PlatformState::instance().getFunc(hostFunction, deviceId); + if (func == nullptr) { + HIP_RETURN(hipErrorUnknown); + } + + HIP_RETURN(hipModuleLaunchKernel(func, gridDim.x, gridDim.y, gridDim.z, + blockDim.x, blockDim.y, blockDim.z, + sharedMemBytes, stream, args, nullptr)); +} + + hipError_t hipGetSymbolAddress(void** devPtr, const void* symbolName) { size_t size = 0; if(!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), devPtr, &size)) { From e37b6b6740c6e769f80db634d1601f18ebca32b1 Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 25 Sep 2019 16:53:50 -0400 Subject: [PATCH 210/282] P4 to Git Change 2004245 by axie@axie_win_opencl_nvme on 2019/09/25 16:46:31 SWDEV-203855 - Segfault when using hipArrayCreate and hipMemcpyParam2D 1. hipArrayCreate API implementation uses a wrong parameter to check width. That parameter can be null pointer because it is used to pass the pointer back to the caller. 2. Implement hipMemcpyParam2D similar to HIP-HCC implementation. Reference: https://github.com/ROCm-Developer-Tools/HIP/blob/master/src/hip_memory.cpp Tests: 1. PRE CHECK-IN build and test(no regression): http://ocltc:8111/viewModification.html?modId=126608&personal=true&init=1&tab=vcsModificationBuilds 2. GPU is VEGA10, OS is Windows 10, CPU is threadripper 1900x, run the test. There is not segfault or exit during hipArrayCreate and hipMemcpyParam2D function call. ReviewBoard: http://ocltc.amd.com/reviews/r/18037/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#72 edit --- api/hip/hip_memory.cpp | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 00b75af5b1..19262ecc54 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -429,7 +429,7 @@ hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent) hipError_t hipArrayCreate(hipArray** array, const HIP_ARRAY_DESCRIPTOR* pAllocateArray) { HIP_INIT_API(array, pAllocateArray); - if (array[0]->width == 0) { + if (pAllocateArray->Width == 0) { HIP_RETURN(hipErrorInvalidValue); } @@ -815,14 +815,6 @@ hipError_t hipMemcpyDtoHAsync(void* dst, hipDeviceptr_t src, size_t sizeBytes, *queue, true)); } -hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy) { - HIP_INIT_API(pCopy); - - assert(0 && "Unimplemented"); - - HIP_RETURN(hipErrorUnknown); -} - hipError_t ihipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind, amd::HostQueue& queue, bool isAsync = false) { @@ -889,6 +881,20 @@ hipError_t ihipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch return hipSuccess; } +hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy) { + HIP_INIT_API(pCopy); + hipError_t e = hipSuccess; + if (pCopy == nullptr) { + e = hipErrorInvalidValue; + } else { + hip::syncStreams(); + amd::HostQueue* queue = hip::getNullStream(); + e = ihipMemcpy2D(pCopy->dstArray->data, pCopy->WidthInBytes, pCopy->srcHost, pCopy->srcPitch, + pCopy->WidthInBytes, pCopy->Height, hipMemcpyDefault, *queue); + } + HIP_RETURN(e); +} + hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind) { HIP_INIT_API(dst, dpitch, src, spitch, width, height, kind); From 224e9586b08895bb3fca5c9651220b4f57b4197e Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 26 Sep 2019 17:47:32 -0400 Subject: [PATCH 211/282] P4 to Git Change 2005049 by skudchad@skudchad_rocm on 2019/09/26 17:24:59 SWDEV-199293 - HIP_RTC inprocess implementation. ReviewBoardURL = http://ocltc.amd.com/reviews/r/18014/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#41 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_rtc.cpp#5 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hiprtc_internal.hpp#1 add ... //depot/stg/opencl/drivers/opencl/runtime/platform/program.hpp#49 edit --- api/hip/hip_internal.hpp | 10 +- api/hip/hip_rtc.cpp | 363 +++++++++++++++++------------------- api/hip/hiprtc_internal.hpp | 44 +++++ 3 files changed, 219 insertions(+), 198 deletions(-) create mode 100644 api/hip/hiprtc_internal.hpp diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index 7b0ff09ed9..1b422afd3d 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -56,6 +56,11 @@ typedef struct ihipIpcMemHandle_st { } \ HIP_INIT(); +#define HIP_RETURN(ret) \ + hip::g_lastError = ret; \ + LogPrintfInfo("[%zx] %s: Returned %s", std::this_thread::get_id(), __func__, hipGetErrorName(hip::g_lastError)); \ + return hip::g_lastError; + namespace hc { class accelerator; class accelerator_view; @@ -179,11 +184,6 @@ extern amd::Memory* getMemoryObject(const void* ptr, size_t& offset); extern bool CL_CALLBACK getSvarInfo(cl_program program, std::string var_name, void** var_addr, size_t* var_size); -#define HIP_RETURN(ret) \ - hip::g_lastError = ret; \ - LogPrintfInfo("[%zx] %s: Returned %s", std::this_thread::get_id(), __func__, hipGetErrorName(hip::g_lastError)); \ - return hip::g_lastError; \ - inline std::ostream& operator<<(std::ostream& os, const dim3& s) { os << '{'; os << s.x; diff --git a/api/hip/hip_rtc.cpp b/api/hip/hip_rtc.cpp index 8446b5112b..9b378cd637 100644 --- a/api/hip/hip_rtc.cpp +++ b/api/hip/hip_rtc.cpp @@ -1,5 +1,5 @@ /* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2018 - present Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -21,9 +21,73 @@ THE SOFTWARE. */ #include -#include "cl_common.hpp" +#include "hiprtc_internal.hpp" #include -#include +#include "platform/program.hpp" + +namespace hiprtc { +thread_local hiprtcResult g_lastRtcError = HIPRTC_SUCCESS; +} + +class ProgramState { + amd::Monitor lock_; +private: + static ProgramState* programState_; + + ProgramState() : lock_("Guards program state") {} + ~ProgramState() {} + + std::unordered_map, std::vector>> progHeaders_; + std::vector nameExpresssion_; +public: + static ProgramState& instance(); + void createProgramHeaders(amd::Program* program, int numHeaders, + const char** headers, const char** headerNames); + void getProgramHeaders(amd::Program* program, int* numHeaders, char** headers, char ** headerNames); + uint32_t addNameExpression(const char* name_expression); +}; + +ProgramState* ProgramState::programState_ = nullptr; + +ProgramState& ProgramState::instance() { + if (programState_ == nullptr) { + programState_ = new ProgramState; + } + return *programState_; +} + +void ProgramState::createProgramHeaders(amd::Program* program, int numHeaders, + const char** headers, const char** headerNames) { + amd::ScopedLock lock(lock_); + std::vector vHeaderNames; + std::vector vHeaders; + for (auto i = 0; i != numHeaders; ++i) { + vHeaders.emplace_back(headers[i]); + vHeaderNames.emplace_back(headerNames[i]); + progHeaders_[program] = std::make_pair(std::move(vHeaders), std::move(vHeaderNames)); + } +} + +void ProgramState::getProgramHeaders(amd::Program* program, int* numHeaders, + char** headers, char ** headerNames) { + amd::ScopedLock lock(lock_); + + const auto it = progHeaders_.find(program); + if (it != progHeaders_.cend()) { + *numHeaders = it->second.first.size(); + *headers = reinterpret_cast(it->second.first.data()); + *headerNames = reinterpret_cast(it->second.second.data()); + } +} + + +uint32_t ProgramState::addNameExpression(const char* name_expression) { + amd::ScopedLock lock(lock_); + nameExpresssion_.emplace_back(name_expression); + return nameExpresssion_.size(); +} + const char* hiprtcGetErrorString(hiprtcResult x) { switch (x) { @@ -56,238 +120,151 @@ const char* hiprtcGetErrorString(hiprtcResult x) { }; } -namespace hip_impl { -inline std::string demangle(const char* x) { - if (!x) { - return {}; +hiprtcResult hiprtcCreateProgram(hiprtcProgram* prog, const char* src, const char* name, + int numHeaders, const char** headers, const char** headerNames) { + HIPRTC_INIT_API(prog, src, name, numHeaders, headers, headerNames); + + if (prog == nullptr) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_PROGRAM); } - return boost::core::demangle(x); -} -} // Namespace hip_impl - -struct _hiprtcProgram { - static amd::Monitor lock_; - static std::vector> programs_; - - std::vector> headers; - std::vector> names; - std::vector loweredNames; - std::vector elf; - std::string source; - std::string name; - std::string log; - bool compiled; - - static _hiprtcProgram* build(std::string s, std::string n, - std::vector> h) { - std::unique_ptr<_hiprtcProgram> tmp{ - new _hiprtcProgram{std::move(h), {}, {}, {}, std::move(s), std::move(n), {}, false}}; - - amd::ScopedLock lock(_hiprtcProgram::lock_); - - programs_.push_back(move(tmp)); - - return programs_.back().get(); + if (numHeaders < 0) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); + } + if (numHeaders && (headers == nullptr || headerNames == nullptr)) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); } - static hiprtcResult destroy(_hiprtcProgram* p) { - amd::ScopedLock lock(_hiprtcProgram::lock_); - - const auto it{ std::find_if(programs_.cbegin(), programs_.cend(), - [=](const std::unique_ptr<_hiprtcProgram>& x) - { return x.get() == p; }) }; - - if (it == programs_.cend()) { - return HIPRTC_ERROR_INVALID_PROGRAM; - } - - return HIPRTC_SUCCESS; + amd::Program* program = new amd::Program(*hip::getCurrentContext(), src, amd::Program::HIP); + if (program == NULL) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); } - static std::string handleMangledName(std::string name) { - name = hip_impl::demangle(name.c_str()); - - if (name.empty()) { - return name; - } - - if (name.find("void ") == 0) { - name.erase(0, strlen("void ")); - } - - auto dx {name.find_first_of("(<")}; - - if (dx == std::string::npos) { - return name; - } - - if (name[dx] == '<') { - auto cnt{1u}; - do { - ++dx; - cnt += (name[dx] == '<') ? 1 : ((name[dx] == '>') ? -1 : 0); - } while (cnt); - - name.erase(++dx); - } else { - name.erase(dx); - } - - return name; + if (CL_SUCCESS != program->addDeviceProgram(*hip::getCurrentContext()->devices()[0])) { + program->release(); + HIPRTC_RETURN(HIPRTC_ERROR_PROGRAM_CREATION_FAILURE); } - static bool isValid(_hiprtcProgram* p) { - return std::find_if(programs_.cbegin(), programs_.cend(), - [=](const std::unique_ptr<_hiprtcProgram>& x) { - return x.get() == p; }) != programs_.cend(); - } -}; + ProgramState::instance().createProgramHeaders(program, numHeaders, headers, headerNames); -// Init -std::vector> _hiprtcProgram::programs_{}; -amd::Monitor _hiprtcProgram::lock_("hiprtcProgram lock"); + *prog = reinterpret_cast(as_cl(program)); -inline bool isValidProgram(const hiprtcProgram p) { - if (p == nullptr) { - return false; - } - - amd::ScopedLock lock(_hiprtcProgram::lock_); - - return _hiprtcProgram::isValid(p); + HIPRTC_RETURN(HIPRTC_SUCCESS); } -hiprtcResult hiprtcCreateProgram(hiprtcProgram* p, const char* src, const char* name, int n, - const char** hdrs, const char** incs) { - if (p == nullptr) { - return HIPRTC_ERROR_INVALID_PROGRAM; - } - if (n < 0) { - return HIPRTC_ERROR_INVALID_INPUT; - } - if (n && (hdrs == nullptr || incs == nullptr)) { - return HIPRTC_ERROR_INVALID_INPUT; + +hiprtcResult hiprtcCompileProgram(hiprtcProgram prog, int numOptions, const char** options) { + + // FIXME[skudchad] Add headers to amd::Program::build and device::Program::build, + // pass the saved from ProgramState to amd::Program::build + HIPRTC_INIT_API(prog, numOptions, options); + + amd::Program* program = as_amd(reinterpret_cast(prog)); + + std::ostringstream ostrstr; + std::vector oarr(&options[0], &options[numOptions]); + std::copy(oarr.begin(), oarr.end(), std::ostream_iterator(ostrstr, " ")); + + std::vector devices{hip::getCurrentContext()->devices()[0]}; + if (CL_SUCCESS != program->build(devices, ostrstr.str().c_str(), nullptr, nullptr)) { + HIPRTC_RETURN(HIPRTC_ERROR_COMPILATION); } - std::vector> h; - - for (auto i = 0; i != n; ++i) { - h.emplace_back(incs[i], hdrs[i]); - } - *p = _hiprtcProgram::build(src, name ? name : "default_name", std::move(h)); - - return HIPRTC_SUCCESS; + HIPRTC_RETURN(HIPRTC_SUCCESS); } -hiprtcResult hiprtcAddNameExpression(hiprtcProgram p, const char* n) { - return HIPRTC_SUCCESS; +hiprtcResult hiprtcAddNameExpression(hiprtcProgram prog, const char* name_expression) { + HIPRTC_INIT_API(prog, name_expression); + + if (name_expression == nullptr) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); + } + amd::Program* program = as_amd(reinterpret_cast(prog)); + + uint32_t id = ProgramState::instance().addNameExpression(name_expression); + + const auto var{"__hiprtc_" + std::to_string(id)}; + const auto code{"\nextern \"C\" constexpr auto " + var + " = " + name_expression + ';'}; + + program->appendToSource(code.c_str()); + + HIPRTC_RETURN(HIPRTC_SUCCESS); } -hiprtcResult hiprtcCompileProgram(hiprtcProgram p, int n, const char** o) { - return HIPRTC_SUCCESS; +hiprtcResult hiprtcGetLoweredName(hiprtcProgram prog, const char* name_expression, + const char** loweredNames) { + HIPRTC_INIT_API(prog, name_expression, loweredNames); + + if (name_expression == nullptr || loweredNames == nullptr) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); + } + + HIPRTC_RETURN(HIPRTC_SUCCESS); } -hiprtcResult hiprtcDestroyProgram(hiprtcProgram* p) { - if (p == nullptr) { - return HIPRTC_SUCCESS; +hiprtcResult hiprtcDestroyProgram(hiprtcProgram* prog) { + HIPRTC_INIT_API(prog); + + if (prog == NULL) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); } - return _hiprtcProgram::destroy(*p); + amd::Program* program = as_amd(reinterpret_cast(prog)); + + program->release(); + + HIPRTC_RETURN(HIPRTC_SUCCESS); } -hiprtcResult hiprtcGetLoweredName(hiprtcProgram p, const char* n, const char** loweredNames) { - if (n == nullptr || loweredNames == nullptr) { - return HIPRTC_ERROR_INVALID_INPUT; - } +hiprtcResult hiprtcGetCode(hiprtcProgram prog, char* binaryMem) { + HIPRTC_INIT_API(prog, binaryMem); - if (!isValidProgram(p)) { - return HIPRTC_ERROR_INVALID_PROGRAM; - } - if (!p->compiled) { - return HIPRTC_ERROR_INVALID_PROGRAM; - } + amd::Program* program = as_amd(reinterpret_cast(prog)); + const device::Program::binary_t& binary = + program->getDeviceProgram(*hip::getCurrentContext()->devices()[0])->binary(); - const auto it{ std::find_if(p->names.cbegin(), p->names.cend(), - [=](const std::pair& x) - { return x.first == n; })}; + ::memcpy(binaryMem, binary.first, binary.second); - if (it == p->names.cend()) { - return HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID; - } - - *loweredNames = p->loweredNames[distance(p->names.cbegin(), it)].c_str(); - - return HIPRTC_SUCCESS; + HIPRTC_RETURN(HIPRTC_SUCCESS); } -hiprtcResult hiprtcGetProgramLog(hiprtcProgram p, char* log) { - if (log == nullptr) { - return HIPRTC_ERROR_INVALID_INPUT; - } +hiprtcResult hiprtcGetCodeSize(hiprtcProgram prog, size_t* binarySizeRet) { - if (!isValidProgram(p)) { - return HIPRTC_ERROR_INVALID_PROGRAM; - } + HIPRTC_INIT_API(prog, binarySizeRet); - if (!p->compiled) { - return HIPRTC_ERROR_INVALID_PROGRAM; - } + amd::Program* program = as_amd(reinterpret_cast(prog)); - log = std::copy_n(p->log.data(), p->log.size(), log); - *log = '\0'; + *binarySizeRet = + program->getDeviceProgram(*hip::getCurrentContext()->devices()[0])->binary().second; - return HIPRTC_SUCCESS; + HIPRTC_RETURN(HIPRTC_SUCCESS); } -hiprtcResult hiprtcGetProgramLogSize(hiprtcProgram p, std::size_t* sz) { - if (sz == nullptr) { - return HIPRTC_ERROR_INVALID_INPUT; - } +hiprtcResult hiprtcGetProgramLog(hiprtcProgram prog, char* dst) { - if (!isValidProgram(p)) { - return HIPRTC_ERROR_INVALID_PROGRAM; - } + HIPRTC_INIT_API(prog, dst); + amd::Program* program = as_amd(reinterpret_cast(prog)); + const device::Program* devProgram = + program->getDeviceProgram(*hip::getCurrentContext()->devices()[0]); - if (!p->compiled) { - return HIPRTC_ERROR_INVALID_PROGRAM; - } + auto log = program->programLog() + devProgram->buildLog().c_str(); - *sz = p->log.empty() ? 0 : p->log.size() + 1; - return HIPRTC_SUCCESS; + log.copy(dst, log.size()); + dst[log.size()] = '\0'; + + HIPRTC_RETURN(HIPRTC_SUCCESS); } -hiprtcResult hiprtcGetCode(hiprtcProgram p, char* c) { - if (c == nullptr) { - return HIPRTC_ERROR_INVALID_INPUT; - } +hiprtcResult hiprtcGetProgramLogSize(hiprtcProgram prog, size_t* logSizeRet) { - if (!isValidProgram(p)) { - return HIPRTC_ERROR_INVALID_PROGRAM; - } + HIPRTC_INIT_API(prog, logSizeRet); - if (!p->compiled) { - return HIPRTC_ERROR_INVALID_PROGRAM; - } + amd::Program* program = as_amd(reinterpret_cast(prog)); + const device::Program* devProgram = + program->getDeviceProgram(*hip::getCurrentContext()->devices()[0]); - std::copy_n(p->elf.data(), p->elf.size(), c); + auto log = program->programLog() + devProgram->buildLog().c_str(); - return HIPRTC_SUCCESS; -} + *logSizeRet = log.size() + 1; -hiprtcResult hiprtcGetCodeSize(hiprtcProgram p, std::size_t* sz) { - if (sz == nullptr) { - return HIPRTC_ERROR_INVALID_INPUT; - } - - if (!isValidProgram(p)) { - return HIPRTC_ERROR_INVALID_PROGRAM; - } - - if (!p->compiled) { - return HIPRTC_ERROR_INVALID_PROGRAM; - } - - *sz = p->elf.size(); - - return HIPRTC_SUCCESS; + HIPRTC_RETURN(HIPRTC_SUCCESS); } \ No newline at end of file diff --git a/api/hip/hiprtc_internal.hpp b/api/hip/hiprtc_internal.hpp new file mode 100644 index 0000000000..dc3371615c --- /dev/null +++ b/api/hip/hiprtc_internal.hpp @@ -0,0 +1,44 @@ +/* +Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIPRTC_SRC_HIP_INTERNAL_H +#define HIPRTC_SRC_HIP_INTERNAL_H + +#include "hip_internal.hpp" + +// This macro should be called at the beginning of every HIP RTC API. +#define HIPRTC_INIT_API(...) \ + LogPrintfInfo("[%zx] %s ( %s )", std::this_thread::get_id(), __func__, ToString( __VA_ARGS__ ).c_str()); \ + amd::Thread* thread = amd::Thread::current(); \ + if (!CL_CHECK_THREAD(thread)) { \ + HIPRTC_RETURN(HIPRTC_ERROR_INTERNAL_ERROR); \ + } \ + HIP_INIT(); + +#define HIPRTC_RETURN(ret) \ + hiprtc::g_lastRtcError = ret; \ + LogPrintfInfo("[%zx] %s: Returned %s", std::this_thread::get_id(), __func__, \ + hiprtcGetErrorString(hiprtc::g_lastRtcError)); \ + return hiprtc::g_lastRtcError; + + +#endif // HIPRTC_SRC_HIP_INTERNAL_H From 7759d32813bcf7c13db716f37ea0f285cf58e926 Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 27 Sep 2019 08:55:43 -0400 Subject: [PATCH 212/282] P4 to Git Change 2005454 by michliao@hliao-dev-00-hip.rocm-workspace on 2019/09/27 08:45:19 SWDEV-199293 - Fix build - source code should be self-sufficient, i.e., including headers directly referenced. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_rtc.cpp#6 edit --- api/hip/hip_rtc.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/api/hip/hip_rtc.cpp b/api/hip/hip_rtc.cpp index 9b378cd637..042dea0ab2 100644 --- a/api/hip/hip_rtc.cpp +++ b/api/hip/hip_rtc.cpp @@ -24,6 +24,14 @@ THE SOFTWARE. #include "hiprtc_internal.hpp" #include #include "platform/program.hpp" +#include +#include +#include +#include +#include +#include +#include +#include namespace hiprtc { thread_local hiprtcResult g_lastRtcError = HIPRTC_SUCCESS; @@ -267,4 +275,4 @@ hiprtcResult hiprtcGetProgramLogSize(hiprtcProgram prog, size_t* logSizeRet) { *logSizeRet = log.size() + 1; HIPRTC_RETURN(HIPRTC_SUCCESS); -} \ No newline at end of file +} From 4e640a6756a2ef438e62dd04f2dab9635a86dc21 Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 27 Sep 2019 13:31:58 -0400 Subject: [PATCH 213/282] P4 to Git Change 2005583 by skudchad@skudchad_test2_win_opencl on 2019/09/27 13:25:25 SWDEV-199293 - Fix build on windows. Include header in hip_internal.hpp Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#42 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_rtc.cpp#7 edit --- api/hip/hip_internal.hpp | 3 +++ api/hip/hip_rtc.cpp | 7 ------- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index 1b422afd3d..fcd40c0ace 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -30,6 +30,9 @@ THE SOFTWARE. #include #include #include +#include + + /*! IHIP IPC MEMORY Structure */ #define IHIP_IPC_MEM_HANDLE_SIZE 32 diff --git a/api/hip/hip_rtc.cpp b/api/hip/hip_rtc.cpp index 042dea0ab2..9cf3060418 100644 --- a/api/hip/hip_rtc.cpp +++ b/api/hip/hip_rtc.cpp @@ -25,13 +25,6 @@ THE SOFTWARE. #include #include "platform/program.hpp" #include -#include -#include -#include -#include -#include -#include -#include namespace hiprtc { thread_local hiprtcResult g_lastRtcError = HIPRTC_SUCCESS; From 55944b69f10638bb47b984634aa8a4c379995728 Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 27 Sep 2019 14:07:19 -0400 Subject: [PATCH 214/282] P4 to Git Change 2005601 by gandryey@gera-hip-lnx on 2019/09/27 13:58:55 SWDEV-184709 - support hipLaunchCooperativeKernel() - Switch to the device library functions for GWS sync and init Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#42 edit ... //depot/stg/opencl/drivers/opencl/make/hip.git/tests/src/runtimeApi/module/hipCooperativeGroup.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palschedcl.cpp#7 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#152 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocschedcl.cpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.cpp#87 edit --- api/hip/hip_platform.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index 8e3fd32b66..6826f20b08 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -542,7 +542,7 @@ hipError_t ihipCreateGlobalVarObj(const char* name, hipModule_t hmod, amd::Memor namespace hip_impl { hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, - const void* f, + hipFunction_t f, int blockSize, size_t dynamicSMemSize) { @@ -585,7 +585,7 @@ hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, size_t total_used_lds = wrkGrpInfo->usedLDSSize_ + dynamicSMemSize; if (total_used_lds != 0) { - // Calculate LDS occupacy per CU. lds_per_cu / (static_lsd + dynamic_lds) + // Calculate LDS occupancy per CU. lds_per_cu / (static_lsd + dynamic_lds) int lds_occupancy = static_cast(device->info().localMemSize_ / total_used_lds); *numBlocks = std::min(*numBlocks, lds_occupancy); } @@ -595,7 +595,7 @@ hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, } hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, - const void* f, + hipFunction_t f, int blockSize, size_t dynamicSMemSize) { @@ -603,7 +603,7 @@ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, } hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks, - const void* f, + hipFunction_t f, int blockSize, size_t dynamicSMemSize, unsigned int flags) From f208cb871a8018f8bee698691b06139ecc9de165 Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 27 Sep 2019 15:39:51 -0400 Subject: [PATCH 215/282] P4 to Git Change 2005676 by kjayapra@0_HIPWS_LNX1_ROCM on 2019/09/27 15:33:19 SWDEV-145570 - Accomadating Texture Header changes. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_texture.cpp#18 edit --- api/hip/hip_texture.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/api/hip/hip_texture.cpp b/api/hip/hip_texture.cpp index a8f642c400..c5fd87e9f6 100644 --- a/api/hip/hip_texture.cpp +++ b/api/hip/hip_texture.cpp @@ -585,12 +585,12 @@ hipError_t hipTexRefSetAddressMode(textureReference* tex, int dim, hipTextureAdd HIP_RETURN(hipSuccess); } -hipError_t hipTexRefGetArray(hipArray* array, textureReference tex) { +hipError_t hipTexRefGetArray(hipArray_t* array, textureReference tex) { HIP_INIT_API(array, &tex); hip::TextureObject* texture = nullptr; - if (array == nullptr) { + if ((array == nullptr) || (*array == nullptr)) { HIP_RETURN(hipErrorInvalidImage); } @@ -603,7 +603,7 @@ hipError_t hipTexRefGetArray(hipArray* array, textureReference tex) { HIP_RETURN(hipErrorUnknown); } - *array = *(texture->resDesc.res.array.array); + **array = *(texture->resDesc.res.array.array); HIP_RETURN(hipSuccess); } From c56d73ddd2ef84996b51834c514cbffb5a298bdd Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 27 Sep 2019 16:44:50 -0400 Subject: [PATCH 216/282] P4 to Git Change 2005731 by vsytchen@vsytchen-hip-win10 on 2019/09/27 16:34:38 SWDEV-192384 - [HIP CQE][HIPonPAL][19.40] hipBindTexRef1DFetch, hipTextureRef2D are failed on all ASICs for both Win/Lnx 1. Correctly set the image type for textures created from arrays. 2. Allow creating any kind of image from a buffer. ReviewBoardURL = http://ocltc.amd.com/reviews/r/18051/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_texture.cpp#19 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#166 edit --- api/hip/hip_texture.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/hip/hip_texture.cpp b/api/hip/hip_texture.cpp index c5fd87e9f6..70b11109d7 100644 --- a/api/hip/hip_texture.cpp +++ b/api/hip/hip_texture.cpp @@ -463,10 +463,10 @@ hipError_t ihipBindTextureToArrayImpl(TlsData* tls, int dim, enum hipTextureRead switch (dim) { case 1: - clType = CL_MEM_OBJECT_IMAGE1D_ARRAY; + clType = CL_MEM_OBJECT_IMAGE1D; break; case 2: - clType = CL_MEM_OBJECT_IMAGE2D_ARRAY; + clType = CL_MEM_OBJECT_IMAGE2D; break; default: HIP_RETURN(hipErrorInvalidValue); From 1bd3a99a8dfb9fbd72978149a2714ac7c937b7ba Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 27 Sep 2019 18:00:49 -0400 Subject: [PATCH 217/282] P4 to Git Change 2005774 by kjayapra@3_HIPWS_P2P_ROCM2 on 2019/09/27 17:54:03 SWDEV-144570 - Adding extern var support for dynamically loaded modules for Texture reference. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#43 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#42 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#43 edit --- api/hip/hip_internal.hpp | 3 +++ api/hip/hip_module.cpp | 50 ++++++++++++++++++++++++++++++++-------- api/hip/hip_platform.cpp | 23 +++++++++++++++++- 3 files changed, 65 insertions(+), 11 deletions(-) diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index fcd40c0ace..4d34bcbaf5 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -149,6 +149,7 @@ public: size_t size; std::vector< std::pair< hipModule_t, bool > >* modules; std::vector rvars; + bool dyn_undef; }; private: std::unordered_map functions_; @@ -172,6 +173,8 @@ public: bool getFuncAttr(const void* hostFunction, hipFuncAttributes* func_attr); bool getGlobalVar(const void* hostVar, int deviceId, hipDeviceptr_t* dev_ptr, size_t* size_ptr); + bool getTexRef(const char* hostVar, textureReference** texRef); + bool getShadowVarInfo(std::string var_name, void** var_addr, size_t* var_size); void setupArgument(const void *arg, size_t size, size_t offset); void configureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem, hipStream_t stream); diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index 0cd590886c..5f0fe540a9 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -109,6 +109,34 @@ extern bool __hipExtractCodeObjectFromFatBinary(const void* data, const std::vector& devices, std::vector>& code_objs); +bool ihipModuleRegisterUndefined(amd::Program* program, hipModule_t* module) { + + std::vector undef_vars; + device::Program* dev_program + = program->getDeviceProgram(*hip::getCurrentContext()->devices()[0]); + + if (!dev_program->getUndefinedVarFromCodeObj(&undef_vars)) { + return false; + } + + for (auto it = undef_vars.begin(); it != undef_vars.end(); ++it) { + auto modules = new std::vector >{g_devices.size()}; + for (size_t dev = 0; dev < g_devices.size(); ++dev) { + modules->at(dev) = std::make_pair(*module, true); + } + + texture* tex_hptr + = new texture(); + memset(tex_hptr, 0x00, sizeof(texture)); + + PlatformState::DeviceVar dvar{ reinterpret_cast(tex_hptr), it->c_str(), sizeof(*tex_hptr), modules, + std::vector{ g_devices.size()}, true }; + PlatformState::instance().registerVar(it->c_str(), dvar); + } + + return true; +} + bool ihipModuleRegisterGlobal(amd::Program* program, hipModule_t* module) { size_t var_size = 0; @@ -125,11 +153,11 @@ bool ihipModuleRegisterGlobal(amd::Program* program, hipModule_t* module) { for (auto it = var_names.begin(); it != var_names.end(); ++it) { auto modules = new std::vector >{g_devices.size()}; for (size_t dev = 0; dev < g_devices.size(); ++dev) { - modules->at(dev) = std::make_pair(*module, false); + modules->at(dev) = std::make_pair(*module, true); } PlatformState::DeviceVar dvar{nullptr, it->c_str(), 0, modules, - std::vector{ g_devices.size()} }; + std::vector{ g_devices.size()}, false }; PlatformState::instance().registerVar(it->c_str(), dvar); } @@ -149,8 +177,7 @@ hipError_t ihipModuleLoadData(hipModule_t *module, const void *image) program->setVarInfoCallBack(&getSvarInfo); - if (CL_SUCCESS != program->addDeviceProgram(*hip::getCurrentContext()->devices()[0], image, ElfSize(image)) || - CL_SUCCESS != program->build(hip::getCurrentContext()->devices(), nullptr, nullptr, nullptr)) { + if (CL_SUCCESS != program->addDeviceProgram(*hip::getCurrentContext()->devices()[0], image, ElfSize(image))) { return hipErrorUnknown; } @@ -160,6 +187,14 @@ hipError_t ihipModuleLoadData(hipModule_t *module, const void *image) return hipErrorUnknown; } + if (!ihipModuleRegisterUndefined(program, module)) { + return hipErrorUnknown; + } + + if(CL_SUCCESS != program->build(hip::getCurrentContext()->devices(), nullptr, nullptr, nullptr)) { + return hipErrorUnknown; + } + return hipSuccess; } @@ -446,21 +481,16 @@ hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, hipError_t hipModuleGetTexRef(textureReference** texRef, hipModule_t hmod, const char* name) { HIP_INIT_API(texRef, hmod, name); - hipDeviceptr_t dptr = nullptr; - size_t bytes = 0; - /* input args check */ if ((texRef == nullptr) || (name == nullptr)) { HIP_RETURN(hipErrorInvalidValue); } /* Get address and size for the global symbol */ - if (!PlatformState::instance().getGlobalVar(name, ihipGetDevice(), &dptr, - &bytes)) { + if (!PlatformState::instance().getTexRef(name, texRef)) { HIP_RETURN(hipErrorUnknown); } - *texRef = reinterpret_cast(dptr); HIP_RETURN(hipSuccess); } diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index 6826f20b08..940bed4d5d 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -173,6 +173,11 @@ std::vector< std::pair >* PlatformState::unregisterVar(hipMod DeviceVar& dvar = it->second; if ((*dvar.modules)[0].first == hmod) { rmodules = dvar.modules; + if (dvar.dyn_undef) { + texture* tex_hptr + = reinterpret_cast *>(dvar.shadowVptr); + delete tex_hptr; + } vars_.erase(it++); } else { ++it; @@ -287,6 +292,22 @@ bool PlatformState::getFuncAttr(const void* hostFunction, return true; } +bool PlatformState::getTexRef(const char* hostVar, textureReference** texRef) { + amd::ScopedLock lock(lock_); + const auto it = vars_.find(std::string(reinterpret_cast(hostVar))); + if (it == vars_.cend()) { + return false; + } + + DeviceVar& dvar = it->second; + if (!dvar.dyn_undef) { + return false; + } + + *texRef = reinterpret_cast(dvar.shadowVptr); + return true; +} + bool PlatformState::getGlobalVar(const void* hostVar, int deviceId, hipDeviceptr_t* dev_ptr, size_t* size_ptr) { amd::ScopedLock lock(lock_); @@ -380,7 +401,7 @@ extern "C" void __hipRegisterVar( HIP_INIT(); PlatformState::DeviceVar dvar{var, std::string{ hostVar }, static_cast(size), modules, - std::vector{ g_devices.size() } }; + std::vector{ g_devices.size() }, false }; PlatformState::instance().registerVar(hostVar, dvar); } From 7d54c3bd4f10663aaae3f00a6147983ef4221f7a Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 2 Oct 2019 13:00:29 -0400 Subject: [PATCH 218/282] P4 to Git Change 2007527 by michliao@hliao-dev-00-hip.rocm-workspace on 2019/10/02 12:55:07 SWDEV-145570 - Add the missing header of . Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/trace_helper.h#2 edit --- api/hip/trace_helper.h | 1 + 1 file changed, 1 insertion(+) diff --git a/api/hip/trace_helper.h b/api/hip/trace_helper.h index 4bb5202558..7888237160 100644 --- a/api/hip/trace_helper.h +++ b/api/hip/trace_helper.h @@ -24,6 +24,7 @@ THE SOFTWARE. #include #include +#include #include //--- // Helper functions to convert HIP function arguments into strings. From 4c3b676041a736bab4e3b06dc89e8edaf6fdc7cf Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 2 Oct 2019 16:24:33 -0400 Subject: [PATCH 219/282] P4 to Git Change 2007650 by jujiang@JJ-OCL-hip on 2019/10/02 16:04:31 SWDEV-205724 - Issue with hipTexRefSetAddress in HIP/PAL on Windows Handle nullptr channel format desc http://ocltc.amd.com/reviews/r/18065/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_texture.cpp#20 edit --- api/hip/hip_texture.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/api/hip/hip_texture.cpp b/api/hip/hip_texture.cpp index 70b11109d7..5809a75200 100644 --- a/api/hip/hip_texture.cpp +++ b/api/hip/hip_texture.cpp @@ -386,13 +386,17 @@ hipError_t ihipBindTexture(cl_mem_object_type type, case CL_MEM_OBJECT_IMAGE1D: resDesc.resType = hipResourceTypeLinear; resDesc.res.linear.devPtr = const_cast(devPtr); - resDesc.res.linear.desc = *desc; + if (nullptr != desc) { + resDesc.res.linear.desc = *desc; + } resDesc.res.linear.sizeInBytes = image->getSize(); break; case CL_MEM_OBJECT_IMAGE2D: resDesc.resType = hipResourceTypePitch2D; resDesc.res.pitch2D.devPtr = const_cast(devPtr); - resDesc.res.pitch2D.desc = *desc; + if (nullptr != desc) { + resDesc.res.pitch2D.desc = *desc; + } resDesc.res.pitch2D.width = width; resDesc.res.pitch2D.height = height; resDesc.res.pitch2D.pitchInBytes = pitch; From 4557360eac679e6ea2d2fe3342c845c3b654ab82 Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 3 Oct 2019 13:26:42 -0500 Subject: [PATCH 220/282] P4 to Git Change 2008164 by vsytchen@vsytchen-remote-ocl-win10 on 2019/10/03 14:14:21 SWDEV-201925 - hipArray3DCreate() not available in HIP/PAL on Windows 1. Update HIP's module definition files ReviewBoardURL = http://ocltc.amd.com/reviews/r/18068/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.def.in#29 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.map.in#28 edit --- api/hip/hip_hcc.def.in | 1 + api/hip/hip_hcc.map.in | 1 + 2 files changed, 2 insertions(+) diff --git a/api/hip/hip_hcc.def.in b/api/hip/hip_hcc.def.in index e3ddb529a3..70eaa66870 100644 --- a/api/hip/hip_hcc.def.in +++ b/api/hip/hip_hcc.def.in @@ -75,6 +75,7 @@ hipMalloc hipMalloc3D hipMalloc3DArray hipArrayCreate +hipArray3DCreate hipMallocArray hipMallocPitch hipMemcpy diff --git a/api/hip/hip_hcc.map.in b/api/hip/hip_hcc.map.in index d580b8b99b..217164cafb 100644 --- a/api/hip/hip_hcc.map.in +++ b/api/hip/hip_hcc.map.in @@ -76,6 +76,7 @@ global: hipMalloc3D; hipMalloc3DArray; hipArrayCreate; + hipArray3DCreate; hipMallocArray; hipMallocPitch; hipMemcpy; From 5db4c83423f973bc8f40e457460ac72127521d02 Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 4 Oct 2019 19:02:35 -0400 Subject: [PATCH 221/282] P4 to Git Change 2008906 by axie@axie-hip-vdi-pal2 on 2019/10/04 18:55:34 SWDEV-189650 - [HIP-CLANG][HIP/VDI/PAL] Hangs on test hip_threadfence_system 1. In HIP + VDI + ROCm, allow SVM atomic in VEGA10 and later ASIC. GFX8 (Tonga) was enabled before. 2. In HIP + VDI + PAL Linux driver, allow SVM atomic in VEGA10 and later ASIC. Tests: 1. In HIP + VDI + ROCm, hip_threadfence_system test passed. 2. In HIP + VDI + PAL + Linux , hip_threadfence_system test passed. 3. OpenCL + PAL, clinfo and ocltest runtime test pass. 4. OpenCL + ROCM, clinfo and ocltest runtime test pass. 5. Windows 10, VEGA 10, clinfo and and ocltest runtime test pass. hip_threadfence_system test passed by skipping the test. Teamcity presubmission test: http://ocltc.amd.com:8111/viewModification.html?modId=127083&personal=true&tab=vcsModificationBuilds http://ocltc.amd.com:8111/viewModification.html?modId=127076&personal=true&tab=vcsModificationBuilds ReviewBoard: http://ocltc.amd.com/reviews/r/18077/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#73 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#171 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.cpp#80 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.hpp#31 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.cpp#134 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocmemory.cpp#44 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.hpp#320 edit --- api/hip/hip_memory.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 19262ecc54..fad9d2aefe 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -231,7 +231,13 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) { HIP_RETURN(hipErrorInvalidValue); } - HIP_RETURN(ihipMalloc(ptr, sizeBytes, CL_MEM_SVM_FINE_GRAIN_BUFFER | (flags << 16))); + unsigned int ihipFlags = CL_MEM_SVM_FINE_GRAIN_BUFFER | (flags << 16); + if (flags & hipHostMallocCoherent || + (!(flags & hipHostMallocNonCoherent) && HIP_HOST_COHERENT)) { + ihipFlags |= CL_MEM_SVM_ATOMICS; + } + + HIP_RETURN(ihipMalloc(ptr, sizeBytes, ihipFlags)); } hipError_t hipFree(void* ptr) { From 6e7f1dea66bf87325b7c7b94a332e85b99527cee Mon Sep 17 00:00:00 2001 From: foreman Date: Mon, 7 Oct 2019 11:55:30 -0400 Subject: [PATCH 222/282] P4 to Git Change 2009236 by eshcherb@evgeny-hip on 2019/10/07 11:49:55 SWDEV-197287 - HIP tracing layer instrumentation hip_prof_str.h - generated header; as a next step a genrator will be integrated in HIP makefile (similar to HIP-HCC) prof_protocol.h - temporarily included in HIP sources. Provided by roctracer-proto package. HIP-HCC CMake has parameter to set a path to the header. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/build/Makefile.hip#23 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_context.cpp#23 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device.cpp#24 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device_runtime.cpp#19 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_error.cpp#3 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_event.cpp#17 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_intercept.cpp#1 add ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#44 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#74 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#43 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_peer.cpp#7 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#44 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_prof_api.h#1 add ... //depot/stg/opencl/drivers/opencl/api/hip/hip_profile.cpp#3 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_stream.cpp#25 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_surface.cpp#5 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_texture.cpp#21 edit ... //depot/stg/opencl/drivers/opencl/api/hip/prof_protocol.h#1 add ... //depot/stg/opencl/drivers/opencl/hip_prof_gen.py#1 add ... //depot/stg/opencl/drivers/opencl/make/hip.git/include/hip/hcc_detail/hip_prof_str.h#2 delete --- api/hip/hip_context.cpp | 44 +++--- api/hip/hip_device.cpp | 18 +-- api/hip/hip_device_runtime.cpp | 38 ++--- api/hip/hip_error.cpp | 4 +- api/hip/hip_event.cpp | 16 +-- api/hip/hip_intercept.cpp | 58 ++++++++ api/hip/hip_internal.hpp | 6 +- api/hip/hip_memory.cpp | 106 +++++++------- api/hip/hip_module.cpp | 35 +++-- api/hip/hip_peer.cpp | 20 +-- api/hip/hip_platform.cpp | 14 +- api/hip/hip_prof_api.h | 255 +++++++++++++++++++++++++++++++++ api/hip/hip_profile.cpp | 4 +- api/hip/hip_stream.cpp | 20 +-- api/hip/hip_surface.cpp | 4 +- api/hip/hip_texture.cpp | 50 +++---- api/hip/prof_protocol.h | 90 ++++++++++++ 17 files changed, 596 insertions(+), 186 deletions(-) create mode 100644 api/hip/hip_intercept.cpp create mode 100644 api/hip/hip_prof_api.h create mode 100644 api/hip/prof_protocol.h diff --git a/api/hip/hip_context.cpp b/api/hip/hip_context.cpp index 759676e83d..ed741c4883 100644 --- a/api/hip/hip_context.cpp +++ b/api/hip/hip_context.cpp @@ -112,13 +112,13 @@ amd::HostQueue* getNullStream() { using namespace hip; hipError_t hipInit(unsigned int flags) { - HIP_INIT_API(flags); + HIP_INIT_API(hipInit, flags); HIP_RETURN(hipSuccess); } hipError_t hipCtxCreate(hipCtx_t *ctx, unsigned int flags, hipDevice_t device) { - HIP_INIT_API(ctx, flags, device); + HIP_INIT_API(hipCtxCreate, ctx, flags, device); if (static_cast(device) >= g_devices.size()) { HIP_RETURN(hipErrorInvalidValue); @@ -134,7 +134,7 @@ hipError_t hipCtxCreate(hipCtx_t *ctx, unsigned int flags, hipDevice_t device) } hipError_t hipCtxSetCurrent(hipCtx_t ctx) { - HIP_INIT_API(ctx); + HIP_INIT_API(hipCtxSetCurrent, ctx); if (ctx == nullptr) { if(!g_ctxtStack.empty()) { @@ -152,7 +152,7 @@ hipError_t hipCtxSetCurrent(hipCtx_t ctx) { } hipError_t hipCtxGetCurrent(hipCtx_t* ctx) { - HIP_INIT_API(ctx); + HIP_INIT_API(hipCtxGetCurrent, ctx); *ctx = reinterpret_cast(hip::getCurrentContext()); @@ -160,7 +160,7 @@ hipError_t hipCtxGetCurrent(hipCtx_t* ctx) { } hipError_t hipCtxGetSharedMemConfig(hipSharedMemConfig* pConfig) { - HIP_INIT_API(pConfig); + HIP_INIT_API(hipCtxGetSharedMemConfig, pConfig); *pConfig = hipSharedMemBankSizeFourByte; @@ -168,7 +168,7 @@ hipError_t hipCtxGetSharedMemConfig(hipSharedMemConfig* pConfig) { } hipError_t hipRuntimeGetVersion(int *runtimeVersion) { - HIP_INIT_API(runtimeVersion); + HIP_INIT_API(hipRuntimeGetVersion, runtimeVersion); if (!runtimeVersion) { HIP_RETURN(hipErrorInvalidValue); @@ -180,7 +180,7 @@ hipError_t hipRuntimeGetVersion(int *runtimeVersion) { } hipError_t hipCtxDestroy(hipCtx_t ctx) { - HIP_INIT_API(ctx); + HIP_INIT_API(hipCtxDestroy, ctx); amd::Context* amdContext = reinterpret_cast(as_amd(ctx)); if (amdContext == nullptr) { @@ -207,7 +207,7 @@ hipError_t hipCtxDestroy(hipCtx_t ctx) { } hipError_t hipCtxPopCurrent(hipCtx_t* ctx) { - HIP_INIT_API(ctx); + HIP_INIT_API(hipCtxPopCurrent, ctx); amd::Context* amdContext = reinterpret_cast(as_amd(ctx)); if (amdContext == nullptr) { @@ -225,7 +225,7 @@ hipError_t hipCtxPopCurrent(hipCtx_t* ctx) { } hipError_t hipCtxPushCurrent(hipCtx_t ctx) { - HIP_INIT_API(ctx); + HIP_INIT_API(hipCtxPushCurrent, ctx); amd::Context* amdContext = reinterpret_cast(as_amd(ctx)); if (amdContext == nullptr) { @@ -239,7 +239,7 @@ hipError_t hipCtxPushCurrent(hipCtx_t ctx) { } hipError_t hipDriverGetVersion(int* driverVersion) { - HIP_INIT_API(driverVersion); + HIP_INIT_API(hipDriverGetVersion, driverVersion); auto* deviceHandle = g_devices[0]->devices()[0]; const auto& info = deviceHandle->info(); @@ -255,7 +255,7 @@ hipError_t hipDriverGetVersion(int* driverVersion) { } hipError_t hipCtxGetDevice(hipDevice_t* device) { - HIP_INIT_API(device); + HIP_INIT_API(hipCtxGetDevice, device); if (device != nullptr) { for (unsigned int i = 0; i < g_devices.size(); i++) { @@ -272,7 +272,7 @@ hipError_t hipCtxGetDevice(hipDevice_t* device) { } hipError_t hipCtxGetApiVersion(hipCtx_t ctx, int* apiVersion) { - HIP_INIT_API(apiVersion); + HIP_INIT_API(hipCtxGetApiVersion, apiVersion); assert(0 && "Unimplemented"); @@ -280,7 +280,7 @@ hipError_t hipCtxGetApiVersion(hipCtx_t ctx, int* apiVersion) { } hipError_t hipCtxGetCacheConfig(hipFuncCache_t* cacheConfig) { - HIP_INIT_API(cacheConfig); + HIP_INIT_API(hipCtxGetCacheConfig, cacheConfig); assert(0 && "Unimplemented"); @@ -288,7 +288,7 @@ hipError_t hipCtxGetCacheConfig(hipFuncCache_t* cacheConfig) { } hipError_t hipCtxSetCacheConfig(hipFuncCache_t cacheConfig) { - HIP_INIT_API(cacheConfig); + HIP_INIT_API(hipCtxSetCacheConfig, cacheConfig); assert(0 && "Unimplemented"); @@ -296,7 +296,7 @@ hipError_t hipCtxSetCacheConfig(hipFuncCache_t cacheConfig) { } hipError_t hipCtxSetSharedMemConfig(hipSharedMemConfig config) { - HIP_INIT_API(config); + HIP_INIT_API(hipCtxSetSharedMemConfig, config); assert(0 && "Unimplemented"); @@ -304,7 +304,7 @@ hipError_t hipCtxSetSharedMemConfig(hipSharedMemConfig config) { } hipError_t hipCtxSynchronize(void) { - HIP_INIT_API(1); + HIP_INIT_API(hipCtxSynchronize, 1); assert(0 && "Unimplemented"); @@ -312,7 +312,7 @@ hipError_t hipCtxSynchronize(void) { } hipError_t hipCtxGetFlags(unsigned int* flags) { - HIP_INIT_API(flags); + HIP_INIT_API(hipCtxGetFlags, flags); assert(0 && "Unimplemented"); @@ -320,7 +320,7 @@ hipError_t hipCtxGetFlags(unsigned int* flags) { } hipError_t hipDevicePrimaryCtxGetState(hipDevice_t dev, unsigned int* flags, int* active) { - HIP_INIT_API(dev, flags, active); + HIP_INIT_API(hipDevicePrimaryCtxGetState, dev, flags, active); if (static_cast(dev) >= g_devices.size()) { HIP_RETURN(hipErrorInvalidDevice); @@ -338,7 +338,7 @@ hipError_t hipDevicePrimaryCtxGetState(hipDevice_t dev, unsigned int* flags, int } hipError_t hipDevicePrimaryCtxRelease(hipDevice_t dev) { - HIP_INIT_API(dev); + HIP_INIT_API(hipDevicePrimaryCtxRelease, dev); if (static_cast(dev) >= g_devices.size()) { HIP_RETURN(hipErrorInvalidDevice); @@ -348,7 +348,7 @@ hipError_t hipDevicePrimaryCtxRelease(hipDevice_t dev) { } hipError_t hipDevicePrimaryCtxRetain(hipCtx_t* pctx, hipDevice_t dev) { - HIP_INIT_API(pctx, dev); + HIP_INIT_API(hipDevicePrimaryCtxRetain, pctx, dev); if (static_cast(dev) >= g_devices.size()) { HIP_RETURN(hipErrorInvalidDevice); @@ -363,13 +363,13 @@ hipError_t hipDevicePrimaryCtxRetain(hipCtx_t* pctx, hipDevice_t dev) { } hipError_t hipDevicePrimaryCtxReset(hipDevice_t dev) { - HIP_INIT_API(dev); + HIP_INIT_API(hipDevicePrimaryCtxReset, dev); HIP_RETURN(hipSuccess); } hipError_t hipDevicePrimaryCtxSetFlags(hipDevice_t dev, unsigned int flags) { - HIP_INIT_API(dev, flags); + HIP_INIT_API(hipDevicePrimaryCtxSetFlags, dev, flags); if (static_cast(dev) >= g_devices.size()) { HIP_RETURN(hipErrorInvalidDevice); diff --git a/api/hip/hip_device.cpp b/api/hip/hip_device.cpp index 4dfb447404..39c02aa0c7 100644 --- a/api/hip/hip_device.cpp +++ b/api/hip/hip_device.cpp @@ -25,7 +25,7 @@ THE SOFTWARE. #include "hip_internal.hpp" hipError_t hipDeviceGet(hipDevice_t *device, int deviceId) { - HIP_INIT_API(device, deviceId); + HIP_INIT_API(hipDeviceGet, device, deviceId); if (device != nullptr) { *device = deviceId; @@ -38,7 +38,7 @@ hipError_t hipDeviceGet(hipDevice_t *device, int deviceId) { hipError_t hipFuncSetCacheConfig (const void* func, hipFuncCache_t cacheConfig) { - HIP_INIT_API(cacheConfig); + HIP_INIT_API(hipFuncSetCacheConfig, cacheConfig); // No way to set cache config yet. @@ -47,7 +47,7 @@ hipError_t hipFuncSetCacheConfig (const void* func, hipFuncCache_t cacheConfig) hipError_t hipDeviceTotalMem (size_t *bytes, hipDevice_t device) { - HIP_INIT_API(bytes, device); + HIP_INIT_API(hipDeviceTotalMem, bytes, device); if (device < 0 || static_cast(device) >= g_devices.size()) { HIP_RETURN(hipErrorInvalidDevice); @@ -67,7 +67,7 @@ hipError_t hipDeviceTotalMem (size_t *bytes, hipDevice_t device) { hipError_t hipDeviceComputeCapability(int *major, int *minor, hipDevice_t device) { - HIP_INIT_API(major, minor, device); + HIP_INIT_API(hipDeviceComputeCapability, major, minor, device); if (device < 0 || static_cast(device) >= g_devices.size()) { HIP_RETURN(hipErrorInvalidDevice); @@ -86,7 +86,7 @@ hipError_t hipDeviceComputeCapability(int *major, int *minor, hipDevice_t device } hipError_t hipDeviceGetCount(int* count) { - HIP_INIT_API(count); + HIP_INIT_API(NONE, count); HIP_RETURN(ihipDeviceGetCount(count)); } @@ -108,7 +108,7 @@ hipError_t ihipDeviceGetCount(int* count) { hipError_t hipDeviceGetName(char *name, int len, hipDevice_t device) { - HIP_INIT_API((void*)name, len, device); + HIP_INIT_API(hipDeviceGetName, (void*)name, len, device); if (device < 0 || static_cast(device) >= g_devices.size()) { HIP_RETURN(hipErrorInvalidDevice); @@ -134,7 +134,7 @@ hipError_t hipDeviceGetName(char *name, int len, hipDevice_t device) { } hipError_t hipGetDeviceProperties ( hipDeviceProp_t* props, hipDevice_t device ) { - HIP_INIT_API(props, device); + HIP_INIT_API(hipGetDeviceProperties, props, device); if (props == nullptr) { HIP_RETURN(hipErrorInvalidValue); @@ -218,7 +218,7 @@ hipError_t hipGetDeviceProperties ( hipDeviceProp_t* props, hipDevice_t device ) } hipError_t hipHccGetAccelerator(int deviceId, hc::accelerator* acc) { - HIP_INIT_API(deviceId, acc); + HIP_INIT_API(NONE, deviceId, acc); assert(0 && "Unimplemented"); @@ -226,7 +226,7 @@ hipError_t hipHccGetAccelerator(int deviceId, hc::accelerator* acc) { } hipError_t hipHccGetAcceleratorView(hipStream_t stream, hc::accelerator_view** av) { - HIP_INIT_API(stream, av); + HIP_INIT_API(NONE, stream, av); assert(0 && "Unimplemented"); diff --git a/api/hip/hip_device_runtime.cpp b/api/hip/hip_device_runtime.cpp index 6aa9eae29e..b16be0980d 100644 --- a/api/hip/hip_device_runtime.cpp +++ b/api/hip/hip_device_runtime.cpp @@ -26,7 +26,7 @@ THE SOFTWARE. hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* properties) { - HIP_INIT_API(device, properties); + HIP_INIT_API(hipChooseDevice, device, properties); if (device == nullptr || properties == nullptr) { HIP_RETURN(hipErrorInvalidValue); @@ -140,7 +140,7 @@ hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* properties) { hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) { - HIP_INIT_API(pi, attr, device); + HIP_INIT_API(hipDeviceGetAttribute, pi, attr, device); if (pi == nullptr) { HIP_RETURN(hipErrorInvalidValue); @@ -289,7 +289,7 @@ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) hipError_t hipDeviceGetByPCIBusId(int* device, const char*pciBusIdstr) { - HIP_INIT_API(device, pciBusIdstr); + HIP_INIT_API(hipDeviceGetByPCIBusId, device, pciBusIdstr); if (device == nullptr || pciBusIdstr == nullptr) { HIP_RETURN(hipErrorInvalidValue); @@ -319,7 +319,7 @@ hipError_t hipDeviceGetByPCIBusId(int* device, const char*pciBusIdstr) { } hipError_t hipDeviceGetCacheConfig ( hipFuncCache_t * cacheConfig ) { - HIP_INIT_API(cacheConfig); + HIP_INIT_API(hipDeviceGetCacheConfig, cacheConfig); if(cacheConfig == nullptr) { HIP_RETURN(hipErrorInvalidValue); @@ -332,7 +332,7 @@ hipError_t hipDeviceGetCacheConfig ( hipFuncCache_t * cacheConfig ) { hipError_t hipDeviceGetLimit ( size_t* pValue, hipLimit_t limit ) { - HIP_INIT_API(pValue, limit); + HIP_INIT_API(hipDeviceGetLimit, pValue, limit); if(pValue == nullptr) { HIP_RETURN(hipErrorInvalidValue); @@ -357,7 +357,7 @@ hipError_t hipDeviceGetP2PAttribute ( int* value, hipDeviceP2PAttr attr, int sr hipError_t hipDeviceGetPCIBusId ( char* pciBusId, int len, int device ) { - HIP_INIT_API((void*)pciBusId, len, device); + HIP_INIT_API(hipDeviceGetPCIBusId, (void*)pciBusId, len, device); int count; ihipDeviceGetCount(&count); @@ -381,7 +381,7 @@ hipError_t hipDeviceGetPCIBusId ( char* pciBusId, int len, int device ) { } hipError_t hipDeviceGetSharedMemConfig ( hipSharedMemConfig * pConfig ) { - HIP_INIT_API(pConfig); + HIP_INIT_API(hipDeviceGetSharedMemConfig, pConfig); *pConfig = hipSharedMemBankSizeFourByte; @@ -389,7 +389,7 @@ hipError_t hipDeviceGetSharedMemConfig ( hipSharedMemConfig * pConfig ) { } hipError_t hipDeviceReset ( void ) { - HIP_INIT_API(); + HIP_INIT_API(hipDeviceReset); /* FIXME */ @@ -397,7 +397,7 @@ hipError_t hipDeviceReset ( void ) { } hipError_t hipDeviceSetCacheConfig ( hipFuncCache_t cacheConfig ) { - HIP_INIT_API(cacheConfig); + HIP_INIT_API(hipDeviceSetCacheConfig, cacheConfig); // No way to set cache config yet. @@ -409,7 +409,7 @@ hipError_t hipDeviceSetLimit ( hipLimit_t limit, size_t value ) { } hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config ) { - HIP_INIT_API(config); + HIP_INIT_API(hipDeviceSetSharedMemConfig, config); // No way to set cache config yet. @@ -417,7 +417,7 @@ hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config ) { } hipError_t hipDeviceSynchronize ( void ) { - HIP_INIT_API(); + HIP_INIT_API(hipDeviceSynchronize); hip::syncStreams(); @@ -442,7 +442,7 @@ int ihipGetDevice() { } hipError_t hipGetDevice ( int* deviceId ) { - HIP_INIT_API(deviceId); + HIP_INIT_API(hipGetDevice, deviceId); if (deviceId != nullptr) { int dev = ihipGetDevice(); @@ -455,7 +455,7 @@ hipError_t hipGetDevice ( int* deviceId ) { } hipError_t hipGetDeviceCount ( int* count ) { - HIP_INIT_API(count); + HIP_INIT_API(hipGetDeviceCount, count); HIP_RETURN(ihipDeviceGetCount(count)); } @@ -465,7 +465,7 @@ hipError_t hipGetDeviceFlags ( unsigned int* flags ) { } hipError_t hipIpcGetEventHandle ( hipIpcEventHandle_t* handle, hipEvent_t event ) { - HIP_INIT_API(handle, event); + HIP_INIT_API(NONE, handle, event); assert(0 && "Unimplemented"); @@ -473,7 +473,7 @@ hipError_t hipIpcGetEventHandle ( hipIpcEventHandle_t* handle, hipEvent_t event } hipError_t hipIpcOpenEventHandle ( hipEvent_t* event, hipIpcEventHandle_t handle ) { - HIP_INIT_API(event, handle); + HIP_INIT_API(NONE, event, handle); assert(0 && "Unimplemented"); @@ -481,7 +481,7 @@ hipError_t hipIpcOpenEventHandle ( hipEvent_t* event, hipIpcEventHandle_t handle } hipError_t hipSetDevice ( int device ) { - HIP_INIT_API(device); + HIP_INIT_API(hipSetDevice, device); if (static_cast(device) < g_devices.size()) { hip::setCurrentContext(device); @@ -492,7 +492,7 @@ hipError_t hipSetDevice ( int device ) { } hipError_t hipSetDeviceFlags ( unsigned int flags ) { - HIP_INIT_API(flags); + HIP_INIT_API(hipSetDeviceFlags, flags); /* FIXME */ /* Not all of Ctx may be implemented */ @@ -508,7 +508,7 @@ hipError_t hipSetDeviceFlags ( unsigned int flags ) { } hipError_t hipSetValidDevices ( int* device_arr, int len ) { - HIP_INIT_API(device_arr, len); + HIP_INIT_API(NONE, device_arr, len); assert(0 && "Unimplemented"); @@ -516,7 +516,7 @@ hipError_t hipSetValidDevices ( int* device_arr, int len ) { } hipError_t hipExtGetLinkTypeAndHopCount(int device1, int device2, uint32_t* linktype, uint32_t* hopcount) { - HIP_INIT_API(device1, device2, linktype, hopcount); + HIP_INIT_API(hipExtGetLinkTypeAndHopCount, device1, device2, linktype, hopcount); const int numDevices = static_cast(g_devices.size()); diff --git a/api/hip/hip_error.cpp b/api/hip/hip_error.cpp index 5f76e560c0..77b1b8148a 100644 --- a/api/hip/hip_error.cpp +++ b/api/hip/hip_error.cpp @@ -26,7 +26,7 @@ THE SOFTWARE. hipError_t hipGetLastError() { - HIP_INIT_API(); + HIP_INIT_API(hipGetLastError); hipError_t err = hip::g_lastError; hip::g_lastError = hipSuccess; return err; @@ -34,7 +34,7 @@ hipError_t hipGetLastError() hipError_t hipPeekAtLastError() { - HIP_INIT_API(); + HIP_INIT_API(hipPeekAtLastError); hipError_t err = hip::g_lastError; HIP_RETURN(err); } diff --git a/api/hip/hip_event.cpp b/api/hip/hip_event.cpp index 81ae93e279..be2c698f46 100644 --- a/api/hip/hip_event.cpp +++ b/api/hip/hip_event.cpp @@ -182,19 +182,19 @@ hipError_t ihipEventQuery(hipEvent_t event) { } hipError_t hipEventCreateWithFlags(hipEvent_t* event, unsigned flags) { - HIP_INIT_API(event, flags); + HIP_INIT_API(hipEventCreateWithFlags, event, flags); HIP_RETURN(ihipEventCreateWithFlags(event, flags)); -} +} hipError_t hipEventCreate(hipEvent_t* event) { - HIP_INIT_API(event); + HIP_INIT_API(hipEventCreate, event); HIP_RETURN(ihipEventCreateWithFlags(event, 0)); } hipError_t hipEventDestroy(hipEvent_t event) { - HIP_INIT_API(event); + HIP_INIT_API(hipEventDestroy, event); if (event == nullptr) { HIP_RETURN(hipErrorInvalidResourceHandle); @@ -206,7 +206,7 @@ hipError_t hipEventDestroy(hipEvent_t event) { } hipError_t hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop) { - HIP_INIT_API(ms, start, stop); + HIP_INIT_API(hipEventElapsedTime, ms, start, stop); if (start == nullptr || stop == nullptr) { HIP_RETURN(hipErrorInvalidResourceHandle); @@ -223,7 +223,7 @@ hipError_t hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop) { } hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) { - HIP_INIT_API(event, stream); + HIP_INIT_API(hipEventRecord, event, stream); if (event == nullptr) { HIP_RETURN(hipErrorInvalidResourceHandle); @@ -246,7 +246,7 @@ hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) { } hipError_t hipEventSynchronize(hipEvent_t event) { - HIP_INIT_API(event); + HIP_INIT_API(hipEventSynchronize, event); if (event == nullptr) { HIP_RETURN(hipErrorInvalidResourceHandle); @@ -258,7 +258,7 @@ hipError_t hipEventSynchronize(hipEvent_t event) { } hipError_t hipEventQuery(hipEvent_t event) { - HIP_INIT_API(event); + HIP_INIT_API(hipEventQuery, event); HIP_RETURN(ihipEventQuery(event)); } diff --git a/api/hip/hip_intercept.cpp b/api/hip/hip_intercept.cpp new file mode 100644 index 0000000000..b6aed14701 --- /dev/null +++ b/api/hip/hip_intercept.cpp @@ -0,0 +1,58 @@ +/* +Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "hip/hip_runtime.h" +#include "hip_prof_api.h" + +// HIP API callback/activity + +api_callbacks_table_t callbacks_table; + +extern std::string& FunctionName(const hipFunction_t f); +const char* hipKernelNameRef(const hipFunction_t f) { return FunctionName(f).c_str(); } + +hipError_t hipRegisterApiCallback(uint32_t id, void* fun, void* arg) { + return callbacks_table.set_callback(id, reinterpret_cast(fun), arg) ? + hipSuccess : hipErrorInvalidValue; +} + +hipError_t hipRemoveApiCallback(uint32_t id) { + return callbacks_table.set_callback(id, NULL, NULL) ? hipSuccess : hipErrorInvalidValue; +} + +hipError_t hipRegisterActivityCallback(uint32_t id, void* fun, void* arg) { + return callbacks_table.set_activity(id, reinterpret_cast(fun), arg) ? + hipSuccess : hipErrorInvalidValue; +} + +hipError_t hipRemoveActivityCallback(uint32_t id) { + return callbacks_table.set_activity(id, NULL, NULL) ? hipSuccess : hipErrorInvalidValue; +} + +hipError_t hipEnableTracing(bool enabled) { + callbacks_table.set_enabled(enabled); + return hipSuccess; +} + +const char* hipApiName(uint32_t id) { + return hip_api_name(id); +} diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index 4d34bcbaf5..ba4a300739 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -24,6 +24,7 @@ THE SOFTWARE. #define HIP_SRC_HIP_INTERNAL_H #include "cl_common.hpp" +#include "hip_prof_api.h" #include "trace_helper.h" #include "utils/debug.hpp" #include @@ -51,13 +52,14 @@ typedef struct ihipIpcMemHandle_st { } // This macro should be called at the beginning of every HIP API. -#define HIP_INIT_API(...) \ +#define HIP_INIT_API(cid, ...) \ LogPrintfInfo("[%zx] %s ( %s )", std::this_thread::get_id(), __func__, ToString( __VA_ARGS__ ).c_str()); \ amd::Thread* thread = amd::Thread::current(); \ if (!CL_CHECK_THREAD(thread)) { \ HIP_RETURN(hipErrorOutOfMemory); \ } \ - HIP_INIT(); + HIP_INIT(); \ + HIP_CB_SPAWNER_OBJECT(cid); #define HIP_RETURN(ret) \ hip::g_lastError = ret; \ diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index fad9d2aefe..5abfb7e365 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -200,7 +200,7 @@ hipError_t ihipMemset(void* dst, int value, size_t valueSize, size_t sizeBytes, } hipError_t hipExtMallocWithFlags(void** ptr, size_t sizeBytes, unsigned int flags) { - HIP_INIT_API(ptr, sizeBytes, flags); + HIP_INIT_API(hipExtMallocWithFlags, ptr, sizeBytes, flags); if (flags != hipDeviceMallocDefault && flags != hipDeviceMallocFinegrained) { @@ -211,13 +211,13 @@ hipError_t hipExtMallocWithFlags(void** ptr, size_t sizeBytes, unsigned int flag } hipError_t hipMalloc(void** ptr, size_t sizeBytes) { - HIP_INIT_API(ptr, sizeBytes); + HIP_INIT_API(hipMalloc, ptr, sizeBytes); HIP_RETURN(ihipMalloc(ptr, sizeBytes, 0)); } hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) { - HIP_INIT_API(ptr, sizeBytes, flags); + HIP_INIT_API(hipHostMalloc, ptr, sizeBytes, flags); if (ptr == nullptr) { HIP_RETURN(hipErrorInvalidValue); @@ -241,7 +241,7 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) { } hipError_t hipFree(void* ptr) { - HIP_INIT_API(ptr); + HIP_INIT_API(hipFree, ptr); if (ptr == nullptr) { HIP_RETURN(hipSuccess); @@ -256,7 +256,7 @@ hipError_t hipFree(void* ptr) { } hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind) { - HIP_INIT_API(dst, src, sizeBytes, kind); + HIP_INIT_API(hipMemcpy, dst, src, sizeBytes, kind); hip::syncStreams(); amd::HostQueue* queue = hip::getNullStream(); @@ -264,31 +264,31 @@ hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind } hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t stream) { - HIP_INIT_API(dst, value, sizeBytes, stream); + HIP_INIT_API(hipMemsetAsync, dst, value, sizeBytes, stream); HIP_RETURN(ihipMemset(dst, value, sizeof(char), sizeBytes, stream, true)); } hipError_t hipMemsetD32Async(hipDeviceptr_t dst, int value, size_t count, hipStream_t stream) { - HIP_INIT_API(dst, value, count, stream); + HIP_INIT_API(hipMemsetD32Async, dst, value, count, stream); HIP_RETURN(ihipMemset(dst, value, sizeof(int), count * sizeof(int), stream, true)); } hipError_t hipMemset(void* dst, int value, size_t sizeBytes) { - HIP_INIT_API(dst, value, sizeBytes); + HIP_INIT_API(hipMemset, dst, value, sizeBytes); HIP_RETURN(ihipMemset(dst, value, sizeof(char), sizeBytes, nullptr)); } hipError_t hipMemsetD32(hipDeviceptr_t dst, int value, size_t count) { - HIP_INIT_API(dst, value, count); + HIP_INIT_API(hipMemsetD32, dst, value, count); HIP_RETURN(ihipMemset(dst, value, sizeof(int), count * sizeof(int), nullptr)); } hipError_t hipMemPtrGetInfo(void *ptr, size_t *size) { - HIP_INIT_API(ptr, size); + HIP_INIT_API(hipMemPtrGetInfo, ptr, size); size_t offset = 0; amd::Memory* svmMem = getMemoryObject(ptr, offset); @@ -303,7 +303,7 @@ hipError_t hipMemPtrGetInfo(void *ptr, size_t *size) { } hipError_t hipHostFree(void* ptr) { - HIP_INIT_API(ptr); + HIP_INIT_API(hipHostFree, ptr); if (amd::SvmBuffer::malloced(ptr)) { amd::SvmBuffer::free(*hip::getCurrentContext(), ptr); @@ -313,7 +313,7 @@ hipError_t hipHostFree(void* ptr) { } hipError_t hipFreeArray(hipArray* array) { - HIP_INIT_API(array); + HIP_INIT_API(hipFreeArray, array); if (amd::SvmBuffer::malloced(array->data)) { amd::SvmBuffer::free(*hip::getCurrentContext(), array->data); @@ -323,7 +323,7 @@ hipError_t hipFreeArray(hipArray* array) { } hipError_t hipMemGetAddressRange(hipDeviceptr_t* pbase, size_t* psize, hipDeviceptr_t dptr) { - HIP_INIT_API(pbase, psize, dptr); + HIP_INIT_API(hipMemGetAddressRange, pbase, psize, dptr); // Since we are using SVM buffer DevicePtr and HostPtr is the same void* ptr = dptr; @@ -341,7 +341,7 @@ hipError_t hipMemGetAddressRange(hipDeviceptr_t* pbase, size_t* psize, hipDevice } hipError_t hipMemGetInfo(size_t* free, size_t* total) { - HIP_INIT_API(free, total); + HIP_INIT_API(hipMemGetInfo, free, total); size_t freeMemory[2]; amd::Device* device = hip::getCurrentContext()->devices()[0]; @@ -394,14 +394,14 @@ hipError_t ihipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t heigh hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height) { - HIP_INIT_API(ptr, pitch, width, height); + HIP_INIT_API(hipMallocPitch, ptr, pitch, width, height); const cl_image_format image_format = { CL_R, CL_UNSIGNED_INT8 }; HIP_RETURN(ihipMallocPitch(ptr, pitch, width, height, 1, CL_MEM_OBJECT_IMAGE2D, &image_format)); } hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent) { - HIP_INIT_API(pitchedDevPtr, &extent); + HIP_INIT_API(hipMalloc3D, pitchedDevPtr, &extent); size_t pitch = 0; @@ -424,7 +424,7 @@ hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent) { } hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent) { - HIP_INIT_API(pitchedDevPtr, value, &extent); + HIP_INIT_API(hipMemset3D, pitchedDevPtr, value, &extent); void *dst = pitchedDevPtr.ptr; size_t sizeBytes = pitchedDevPtr.pitch * extent.height * extent.depth; @@ -433,7 +433,7 @@ hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent) } hipError_t hipArrayCreate(hipArray** array, const HIP_ARRAY_DESCRIPTOR* pAllocateArray) { - HIP_INIT_API(array, pAllocateArray); + HIP_INIT_API(hipArrayCreate, array, pAllocateArray); if (pAllocateArray->Width == 0) { HIP_RETURN(hipErrorInvalidValue); @@ -461,7 +461,7 @@ hipError_t hipArrayCreate(hipArray** array, const HIP_ARRAY_DESCRIPTOR* pAllocat hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc, size_t width, size_t height, unsigned int flags) { - HIP_INIT_API(array, desc, width, height, flags); + HIP_INIT_API(hipMallocArray, array, desc, width, height, flags); if (width == 0) { HIP_RETURN(hipErrorInvalidValue); @@ -503,7 +503,7 @@ hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc, } hipError_t hipArray3DCreate(hipArray** array, const HIP_ARRAY3D_DESCRIPTOR* pAllocateArray) { - HIP_INIT_API(array, pAllocateArray); + HIP_INIT_API(hipArray3DCreate, array, pAllocateArray); *array = (hipArray*)malloc(sizeof(hipArray)); array[0]->type = pAllocateArray->Flags; @@ -531,7 +531,7 @@ hipError_t hipArray3DCreate(hipArray** array, const HIP_ARRAY3D_DESCRIPTOR* pAll hipError_t hipMalloc3DArray(hipArray_t* array, const struct hipChannelFormatDesc* desc, struct hipExtent extent, unsigned int flags) { - HIP_INIT_API(array, desc, &extent, flags); + HIP_INIT_API(hipMalloc3DArray, array, desc, &extent, flags); *array = (hipArray*)malloc(sizeof(hipArray)); array[0]->type = flags; @@ -569,7 +569,7 @@ hipError_t hipMalloc3DArray(hipArray_t* array, const struct hipChannelFormatDesc } hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) { - HIP_INIT_API(flagsPtr, hostPtr); + HIP_INIT_API(hipHostGetFlags, flagsPtr, hostPtr); if (flagsPtr == nullptr || hostPtr == nullptr) { @@ -589,7 +589,7 @@ hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) { } hipError_t hipHostRegister(void* hostPtr, size_t sizeBytes, unsigned int flags) { - HIP_INIT_API(hostPtr, sizeBytes, flags); + HIP_INIT_API(hipHostRegister, hostPtr, sizeBytes, flags); if(hostPtr != nullptr) { amd::Memory* mem = new (*hip::host_context) amd::Buffer(*hip::host_context, CL_MEM_USE_HOST_PTR | CL_MEM_SVM_ATOMICS, sizeBytes); @@ -616,7 +616,7 @@ hipError_t hipHostRegister(void* hostPtr, size_t sizeBytes, unsigned int flags) } hipError_t hipHostUnregister(void* hostPtr) { - HIP_INIT_API(hostPtr); + HIP_INIT_API(hipHostUnregister, hostPtr); if (amd::SvmBuffer::malloced(hostPtr)) { hip::syncStreams(); @@ -651,7 +651,7 @@ hipError_t hipHostAlloc(void** ptr, size_t sizeBytes, unsigned int flags) { hipError_t hipMemcpyToSymbol(const void* symbolName, const void* src, size_t count, size_t offset, hipMemcpyKind kind) { - HIP_INIT_API(symbolName, src, count, offset, kind); + HIP_INIT_API(hipMemcpyToSymbol, symbolName, src, count, offset, kind); size_t sym_size = 0; hipDeviceptr_t device_ptr = nullptr; @@ -675,7 +675,7 @@ hipError_t hipMemcpyToSymbol(const void* symbolName, const void* src, size_t cou hipError_t hipMemcpyFromSymbol(void* dst, const void* symbolName, size_t count, size_t offset, hipMemcpyKind kind) { - HIP_INIT_API(symbolName, dst, count, offset, kind); + HIP_INIT_API(hipMemcpyFromSymbol, symbolName, dst, count, offset, kind); size_t sym_size = 0; hipDeviceptr_t device_ptr = nullptr; @@ -699,7 +699,7 @@ hipError_t hipMemcpyFromSymbol(void* dst, const void* symbolName, size_t count, hipError_t hipMemcpyToSymbolAsync(const void* symbolName, const void* src, size_t count, size_t offset, hipMemcpyKind kind, hipStream_t stream) { - HIP_INIT_API(symbolName, src, count, offset, kind, stream); + HIP_INIT_API(hipMemcpyToSymbolAsync, symbolName, src, count, offset, kind, stream); size_t sym_size = 0; hipDeviceptr_t device_ptr = nullptr; @@ -723,7 +723,7 @@ hipError_t hipMemcpyToSymbolAsync(const void* symbolName, const void* src, size_ hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolName, size_t count, size_t offset, hipMemcpyKind kind, hipStream_t stream) { - HIP_INIT_API(symbolName, dst, count, offset, kind, stream); + HIP_INIT_API(hipMemcpyFromSymbolAsync, symbolName, dst, count, offset, kind, stream); size_t sym_size = 0; hipDeviceptr_t device_ptr = nullptr; @@ -746,7 +746,7 @@ hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolName, size_t co } hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void* src, size_t sizeBytes) { - HIP_INIT_API(dst, src, sizeBytes); + HIP_INIT_API(hipMemcpyHtoD, dst, src, sizeBytes); hip::syncStreams(); amd::HostQueue* queue = hip::getNullStream(); @@ -755,7 +755,7 @@ hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void* src, size_t sizeBytes) { } hipError_t hipMemcpyDtoH(void* dst, hipDeviceptr_t src, size_t sizeBytes) { - HIP_INIT_API(dst, src, sizeBytes); + HIP_INIT_API(hipMemcpyDtoH, dst, src, sizeBytes); hip::syncStreams(); amd::HostQueue* queue = hip::getNullStream(); @@ -764,7 +764,7 @@ hipError_t hipMemcpyDtoH(void* dst, hipDeviceptr_t src, size_t sizeBytes) { } hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes) { - HIP_INIT_API(dst, src, sizeBytes); + HIP_INIT_API(hipMemcpyDtoD, dst, src, sizeBytes); hip::syncStreams(); amd::HostQueue* queue = hip::getNullStream(); @@ -773,7 +773,7 @@ hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeByte } hipError_t hipMemcpyHtoH(void* dst, void* src, size_t sizeBytes) { - HIP_INIT_API(dst, src, sizeBytes); + HIP_INIT_API(NONE, dst, src, sizeBytes); hip::syncStreams(); amd::HostQueue* queue = hip::getNullStream(); @@ -783,7 +783,7 @@ hipError_t hipMemcpyHtoH(void* dst, void* src, size_t sizeBytes) { hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream) { - HIP_INIT_API(dst, src, sizeBytes, kind, stream); + HIP_INIT_API(hipMemcpyAsync, dst, src, sizeBytes, kind, stream); amd::HostQueue* queue = hip::getQueue(stream); @@ -793,7 +793,7 @@ hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, void* src, size_t sizeBytes, hipStream_t stream) { - HIP_INIT_API(dst, src, sizeBytes, stream); + HIP_INIT_API(hipMemcpyHtoDAsync, dst, src, sizeBytes, stream); amd::HostQueue* queue = hip::getQueue(stream); @@ -803,7 +803,7 @@ hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, void* src, size_t sizeBytes, hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes, hipStream_t stream) { - HIP_INIT_API(dst, src, sizeBytes, stream); + HIP_INIT_API(hipMemcpyDtoDAsync, dst, src, sizeBytes, stream); amd::HostQueue* queue = hip::getQueue(stream); @@ -813,7 +813,7 @@ hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t siz hipError_t hipMemcpyDtoHAsync(void* dst, hipDeviceptr_t src, size_t sizeBytes, hipStream_t stream) { - HIP_INIT_API(dst, src, sizeBytes, stream); + HIP_INIT_API(hipMemcpyDtoHAsync, dst, src, sizeBytes, stream); amd::HostQueue* queue = hip::getQueue(stream); @@ -888,7 +888,7 @@ hipError_t ihipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch } hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy) { - HIP_INIT_API(pCopy); + HIP_INIT_API(hipMemcpyParam2D, pCopy); hipError_t e = hipSuccess; if (pCopy == nullptr) { e = hipErrorInvalidValue; @@ -903,7 +903,7 @@ hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy) { hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind) { - HIP_INIT_API(dst, dpitch, src, spitch, width, height, kind); + HIP_INIT_API(hipMemcpy2D, dst, dpitch, src, spitch, width, height, kind); hip::syncStreams(); amd::HostQueue* queue = hip::getNullStream(); @@ -914,7 +914,7 @@ hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream) { - HIP_INIT_API(dst, dpitch, src, spitch, width, height, kind, stream); + HIP_INIT_API(hipMemcpy2DAsync, dst, dpitch, src, spitch, width, height, kind, stream); amd::HostQueue* queue = hip::getQueue(stream); @@ -923,7 +923,7 @@ hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t sp hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind) { - HIP_INIT_API(dst, wOffset, hOffset, src, spitch, width, height, kind); + HIP_INIT_API(hipMemcpy2DToArray, dst, wOffset, hOffset, src, spitch, width, height, kind); if (dst->data == nullptr) { HIP_RETURN(hipErrorUnknown); @@ -1018,7 +1018,7 @@ hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, con hipError_t hipMemcpyToArray(hipArray* dstArray, size_t wOffset, size_t hOffset, const void* src, size_t count, hipMemcpyKind kind) { - HIP_INIT_API(dstArray, wOffset, hOffset, src, count, kind); + HIP_INIT_API(hipMemcpyToArray, dstArray, wOffset, hOffset, src, count, kind); hip::syncStreams(); amd::HostQueue* queue = hip::getNullStream(); @@ -1058,7 +1058,7 @@ hipError_t hipMemcpyToArray(hipArray* dstArray, size_t wOffset, size_t hOffset, hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t srcArray, size_t wOffset, size_t hOffset, size_t count, hipMemcpyKind kind) { - HIP_INIT_API(dst, srcArray, wOffset, hOffset, count, kind); + HIP_INIT_API(hipMemcpyFromArray, dst, srcArray, wOffset, hOffset, count, kind); hip::syncStreams(); amd::HostQueue* queue = hip::getNullStream(); @@ -1097,7 +1097,7 @@ hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t srcArray, size_t wOffs } hipError_t hipMemcpyHtoA(hipArray* dstArray, size_t dstOffset, const void* srcHost, size_t count) { - HIP_INIT_API(dstArray, dstOffset, srcHost, count); + HIP_INIT_API(hipMemcpyHtoA, dstArray, dstOffset, srcHost, count); hip::syncStreams(); amd::HostQueue* queue = hip::getNullStream(); @@ -1134,7 +1134,7 @@ hipError_t hipMemcpyHtoA(hipArray* dstArray, size_t dstOffset, const void* srcHo } hipError_t hipMemcpyAtoH(void* dst, hipArray* srcArray, size_t srcOffset, size_t count) { - HIP_INIT_API(dst, srcArray, srcOffset, count); + HIP_INIT_API(hipMemcpyAtoH, dst, srcArray, srcOffset, count); hip::syncStreams(); amd::HostQueue* queue = hip::getNullStream(); @@ -1171,7 +1171,7 @@ hipError_t hipMemcpyAtoH(void* dst, hipArray* srcArray, size_t srcOffset, size_t } hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) { - HIP_INIT_API(p); + HIP_INIT_API(hipMemcpy3D, p); hip::syncStreams(); amd::HostQueue* queue = hip::getNullStream(); @@ -1325,7 +1325,7 @@ hipError_t ihipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t } hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height) { - HIP_INIT_API(dst, pitch, value, width, height); + HIP_INIT_API(hipMemset2D, dst, pitch, value, width, height); hip::syncStreams(); amd::HostQueue* queue = hip::getNullStream(); @@ -1334,7 +1334,7 @@ hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value, size_t width, size_t height, hipStream_t stream) { - HIP_INIT_API(dst, pitch, value, width, height, stream); + HIP_INIT_API(hipMemset2DAsync, dst, pitch, value, width, height, stream); amd::HostQueue* queue = hip::getQueue(stream); @@ -1342,7 +1342,7 @@ hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value, } hipError_t hipMemsetD8(hipDeviceptr_t dst, unsigned char value, size_t sizeBytes) { - HIP_INIT_API(dst, value, sizeBytes); + HIP_INIT_API(hipMemsetD8, dst, value, sizeBytes); if (dst == nullptr) { HIP_RETURN(hipErrorInvalidValue); @@ -1377,7 +1377,7 @@ hipError_t hipMemsetD8(hipDeviceptr_t dst, unsigned char value, size_t sizeBytes } hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* dev_ptr) { - HIP_INIT_API(handle, dev_ptr); + HIP_INIT_API(hipIpcGetMemHandle, handle, dev_ptr); size_t offset = 0; amd::Memory* amd_mem_obj = nullptr; @@ -1408,7 +1408,7 @@ hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* dev_ptr) { } hipError_t hipIpcOpenMemHandle(void** dev_ptr, hipIpcMemHandle_t handle, unsigned int flags) { - HIP_INIT_API(dev_ptr, &handle, flags); + HIP_INIT_API(hipIpcOpenMemHandle, dev_ptr, &handle, flags); amd::Memory* amd_mem_obj = nullptr; amd::Device* device = nullptr; @@ -1434,7 +1434,7 @@ hipError_t hipIpcOpenMemHandle(void** dev_ptr, hipIpcMemHandle_t handle, unsigne } hipError_t hipIpcCloseMemHandle(void* dev_ptr) { - HIP_INIT_API(dev_ptr); + HIP_INIT_API(hipIpcCloseMemHandle, dev_ptr); size_t offset = 0; amd::Device* device = nullptr; @@ -1479,7 +1479,7 @@ hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, hipChannel } hipError_t hipHostGetDevicePointer(void** devicePointer, void* hostPointer, unsigned flags) { - HIP_INIT_API(devicePointer, hostPointer, flags); + HIP_INIT_API(hipHostGetDevicePointer, devicePointer, hostPointer, flags); size_t offset = 0; @@ -1493,7 +1493,7 @@ hipError_t hipHostGetDevicePointer(void** devicePointer, void* hostPointer, unsi } hipError_t hipPointerGetAttributes(hipPointerAttribute_t* attributes, const void* ptr) { - HIP_INIT_API(attributes, ptr); + HIP_INIT_API(hipPointerGetAttributes, attributes, ptr); size_t offset = 0; amd::Memory* memObj = getMemoryObject(ptr, offset); diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index 5f0fe540a9..ad637d3967 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -30,6 +30,11 @@ THE SOFTWARE. hipError_t ihipModuleLoadData(hipModule_t *module, const void *image); +const std::string& FunctionName(const hipFunction_t f) +{ + return hip::Function::asFunction(f)->function_->name(); +} + static uint64_t ElfSize(const void *emi) { const Elf64_Ehdr *ehdr = (const Elf64_Ehdr*)emi; @@ -53,7 +58,7 @@ static uint64_t ElfSize(const void *emi) hipError_t hipModuleLoad(hipModule_t* module, const char* fname) { - HIP_INIT_API(module, fname); + HIP_INIT_API(hipModuleLoad, module, fname); if (!fname) { HIP_RETURN(hipErrorInvalidValue); @@ -81,7 +86,7 @@ bool ihipModuleUnregisterGlobal(hipModule_t hmod) { hipError_t hipModuleUnload(hipModule_t hmod) { - HIP_INIT_API(hmod); + HIP_INIT_API(hipModuleUnload, hmod); if (hmod == nullptr) { HIP_RETURN(hipErrorUnknown); @@ -100,7 +105,7 @@ hipError_t hipModuleUnload(hipModule_t hmod) hipError_t hipModuleLoadData(hipModule_t *module, const void *image) { - HIP_INIT_API(module, image); + HIP_INIT_API(hipModuleLoadData, module, image); HIP_RETURN(ihipModuleLoadData(module, image)); } @@ -200,7 +205,7 @@ hipError_t ihipModuleLoadData(hipModule_t *module, const void *image) hipError_t hipModuleGetFunction(hipFunction_t *hfunc, hipModule_t hmod, const char *name) { - HIP_INIT_API(hfunc, hmod, name); + HIP_INIT_API(hipModuleGetFunction, hfunc, hmod, name); amd::Program* program = as_amd(reinterpret_cast(hmod)); @@ -222,7 +227,7 @@ hipError_t hipModuleGetFunction(hipFunction_t *hfunc, hipModule_t hmod, const ch hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod, const char* name) { - HIP_INIT_API(dptr, bytes, hmod, name); + HIP_INIT_API(hipModuleGetGlobal, dptr, bytes, hmod, name); /* Get address and size for the global symbol */ if (!PlatformState::instance().getGlobalVar(name, ihipGetDevice(), dptr, @@ -235,7 +240,7 @@ hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t h hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func) { - HIP_INIT_API(attr, func); + HIP_INIT_API(hipFuncGetAttributes, attr, func); if (!PlatformState::instance().getFuncAttr(func, attr)) { HIP_RETURN(hipErrorUnknown); @@ -253,7 +258,7 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags = 0, uint32_t params = 0) { - HIP_INIT_API(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, + HIP_INIT_API(NONE, f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent, flags, params); hip::Function* function = hip::Function::asFunction(f); @@ -347,7 +352,7 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, uint32_t sharedMemBytes, hipStream_t hStream, void **kernelParams, void **extra) { - HIP_INIT_API(f, gridDimX, gridDimY, gridDimZ, + HIP_INIT_API(hipModuleLaunchKernel, f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, extra); @@ -364,7 +369,7 @@ hipError_t hipExtModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX, hipStream_t hStream, void** kernelParams, void** extra, hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags) { - HIP_INIT_API(f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, + HIP_INIT_API(NONE, f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, localWorkSizeX, localWorkSizeY, localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent, flags); @@ -383,7 +388,7 @@ hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t gridDimX, hipEvent_t startEvent, hipEvent_t stopEvent) { - HIP_INIT_API(f, gridDimX, gridDimY, gridDimZ, + HIP_INIT_API(NONE, f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent); @@ -400,7 +405,7 @@ hipError_t hipModuleLaunchKernelExt(hipFunction_t f, uint32_t gridDimX, hipEvent_t startEvent, hipEvent_t stopEvent) { - HIP_INIT_API(f, gridDimX, gridDimY, gridDimZ, + HIP_INIT_API(NONE, f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent); @@ -413,7 +418,7 @@ hipError_t hipLaunchCooperativeKernel(const void* f, dim3 gridDim, dim3 blockDim, void **kernelParams, uint32_t sharedMemBytes, hipStream_t hStream) { - HIP_INIT_API(f, gridDim, blockDim, + HIP_INIT_API(hipLaunchCooperativeKernel, f, gridDim, blockDim, sharedMemBytes, hStream); int deviceId = ihipGetDevice(); @@ -464,7 +469,7 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, int numDevices, unsigned int flags) { - HIP_INIT_API(launchParamsList, numDevices, flags); + HIP_INIT_API(hipLaunchCooperativeKernelMultiDevice, launchParamsList, numDevices, flags); return ihipLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags, (amd::NDRangeKernelCommand::CooperativeGroups | @@ -473,13 +478,13 @@ hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsLi hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, int numDevices, unsigned int flags) { - HIP_INIT_API(launchParamsList, numDevices, flags); + HIP_INIT_API(hipExtLaunchMultiKernelMultiDevice, launchParamsList, numDevices, flags); return ihipLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags, 0); } hipError_t hipModuleGetTexRef(textureReference** texRef, hipModule_t hmod, const char* name) { - HIP_INIT_API(texRef, hmod, name); + HIP_INIT_API(hipModuleGetTexRef, texRef, hmod, name); /* input args check */ if ((texRef == nullptr) || (name == nullptr)) { diff --git a/api/hip/hip_peer.cpp b/api/hip/hip_peer.cpp index cdfec74f0e..95cf206e9c 100644 --- a/api/hip/hip_peer.cpp +++ b/api/hip/hip_peer.cpp @@ -25,7 +25,7 @@ THE SOFTWARE. #include "hip_internal.hpp" hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, hipCtx_t thisCtx, hipCtx_t peerCtx) { - HIP_INIT_API(canAccessPeer, thisCtx, peerCtx); + HIP_INIT_API(NONE, canAccessPeer, thisCtx, peerCtx); assert(0 && "Unimplemented"); @@ -34,7 +34,7 @@ hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, hipCtx_t thisCtx, hipCtx_t hipError_t hipMemcpyPeer(void* dst, hipCtx_t dstCtx, const void* src, hipCtx_t srcCtx, size_t sizeBytes) { - HIP_INIT_API(dst, dstCtx, src, srcCtx, sizeBytes); + HIP_INIT_API(NONE, dst, dstCtx, src, srcCtx, sizeBytes); assert(0 && "Unimplemented"); @@ -43,7 +43,7 @@ hipError_t hipMemcpyPeer(void* dst, hipCtx_t dstCtx, const void* src, hipCtx_t s hipError_t hipMemcpyPeerAsync(void* dst, hipCtx_t dstDevice, const void* src, hipCtx_t srcDevice, size_t sizeBytes, hipStream_t stream) { - HIP_INIT_API(dst, dstDevice, src, srcDevice, sizeBytes, stream); + HIP_INIT_API(NONE, dst, dstDevice, src, srcDevice, sizeBytes, stream); assert(0 && "Unimplemented"); @@ -51,7 +51,7 @@ hipError_t hipMemcpyPeerAsync(void* dst, hipCtx_t dstDevice, const void* src, hi } hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, int deviceId, int peerDeviceId) { - HIP_INIT_API(canAccessPeer, deviceId, peerDeviceId); + HIP_INIT_API(hipDeviceCanAccessPeer, canAccessPeer, deviceId, peerDeviceId); amd::Device* device = nullptr; amd::Device* peer_device = nullptr; @@ -83,39 +83,39 @@ hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, int deviceId, int peerDevi } hipError_t hipDeviceDisablePeerAccess(int peerDeviceId) { - HIP_INIT_API(peerDeviceId); + HIP_INIT_API(hipDeviceDisablePeerAccess, peerDeviceId); HIP_RETURN(hipSuccess); } hipError_t hipDeviceEnablePeerAccess(int peerDeviceId, unsigned int flags) { - HIP_INIT_API(peerDeviceId, flags); + HIP_INIT_API(hipDeviceEnablePeerAccess, peerDeviceId, flags); HIP_RETURN(hipSuccess); } hipError_t hipMemcpyPeer(void* dst, int dstDevice, const void* src, int srcDevice, size_t sizeBytes) { - HIP_INIT_API(dst, dstDevice, src, srcDevice, sizeBytes); + HIP_INIT_API(hipMemcpyPeer, dst, dstDevice, src, srcDevice, sizeBytes); HIP_RETURN(hipMemcpy(dst, src, sizeBytes, hipMemcpyDeviceToDevice)); } hipError_t hipMemcpyPeerAsync(void* dst, int dstDevice, const void* src, int srcDevice, size_t sizeBytes, hipStream_t stream) { - HIP_INIT_API(dst, dstDevice, src, srcDevice, sizeBytes, stream); + HIP_INIT_API(hipMemcpyPeerAsync, dst, dstDevice, src, srcDevice, sizeBytes, stream); HIP_RETURN(hipMemcpyAsync(dst, src, sizeBytes, hipMemcpyDeviceToDevice, stream)); } hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags) { - HIP_INIT_API(peerCtx, flags); + HIP_INIT_API(hipCtxEnablePeerAccess, peerCtx, flags); HIP_RETURN(hipSuccess); } hipError_t hipCtxDisablePeerAccess(hipCtx_t peerCtx) { - HIP_INIT_API(peerCtx); + HIP_INIT_API(hipCtxDisablePeerAccess, peerCtx); HIP_RETURN(hipSuccess); } diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index 940bed4d5d..b6ea5070e2 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -425,7 +425,7 @@ extern "C" hipError_t hipConfigureCall( size_t sharedMem, hipStream_t stream) { - HIP_INIT_API(gridDim, blockDim, sharedMem, stream); + HIP_INIT_API(NONE, gridDim, blockDim, sharedMem, stream); PlatformState::instance().configureCall(gridDim, blockDim, sharedMem, stream); @@ -438,7 +438,7 @@ extern "C" hipError_t __hipPushCallConfiguration( size_t sharedMem, hipStream_t stream) { - HIP_INIT_API(gridDim, blockDim, sharedMem, stream); + HIP_INIT_API(NONE, gridDim, blockDim, sharedMem, stream); PlatformState::instance().configureCall(gridDim, blockDim, sharedMem, stream); @@ -449,7 +449,7 @@ extern "C" hipError_t __hipPopCallConfiguration(dim3 *gridDim, dim3 *blockDim, size_t *sharedMem, hipStream_t *stream) { - HIP_INIT_API(gridDim, blockDim, sharedMem, stream); + HIP_INIT_API(NONE, gridDim, blockDim, sharedMem, stream); ihipExec_t exec; PlatformState::instance().popExec(exec); @@ -466,7 +466,7 @@ extern "C" hipError_t hipSetupArgument( size_t size, size_t offset) { - HIP_INIT_API(arg, size, offset); + HIP_INIT_API(NONE, arg, size, offset); PlatformState::instance().setupArgument(arg, size, offset); @@ -475,7 +475,7 @@ extern "C" hipError_t hipSetupArgument( extern "C" hipError_t hipLaunchByPtr(const void *hostFunction) { - HIP_INIT_API(hostFunction); + HIP_INIT_API(NONE, hostFunction); int deviceId = ihipGetDevice(); hipFunction_t func = PlatformState::instance().getFunc(hostFunction, deviceId); @@ -506,7 +506,7 @@ extern "C" hipError_t hipLaunchKernel(const void *hostFunction, size_t sharedMemBytes, hipStream_t stream) { - HIP_INIT_API(hostFunction, gridDim, blockDim, args, sharedMemBytes, + HIP_INIT_API(NONE, hostFunction, gridDim, blockDim, args, sharedMemBytes, stream); int deviceId = ihipGetDevice(); @@ -567,7 +567,7 @@ hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, int blockSize, size_t dynamicSMemSize) { - HIP_INIT_API(f, blockSize, dynamicSMemSize); + HIP_INIT_API(NONE, f, blockSize, dynamicSMemSize); int deviceId = ihipGetDevice(); hipFunction_t func = PlatformState::instance().getFunc(f, deviceId); if (func == nullptr) { diff --git a/api/hip/hip_prof_api.h b/api/hip/hip_prof_api.h new file mode 100644 index 0000000000..32679f403a --- /dev/null +++ b/api/hip/hip_prof_api.h @@ -0,0 +1,255 @@ +/* +Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_SRC_HIP_PROF_API_H +#define HIP_SRC_HIP_PROF_API_H + +#include +#include +#include + +#include "hip/hcc_detail/hip_prof_str.h" + +template +class api_callbacks_table_templ { + public: + typedef std::recursive_mutex mutex_t; + + typedef Record record_t; + typedef Fun fun_t; + typedef Act act_t; + + // HIP API callbacks table + struct hip_cb_table_entry_t { + volatile std::atomic sync; + volatile std::atomic sem; + act_t act; + void* a_arg; + fun_t fun; + void* arg; + }; + + struct hip_cb_table_t { + hip_cb_table_entry_t arr[HIP_API_ID_NUMBER]; + }; + + api_callbacks_table_templ() { + memset(&callbacks_table_, 0, sizeof(callbacks_table_)); + } + + bool set_activity(uint32_t id, act_t fun, void* arg) { + std::lock_guard lock(mutex_); + bool ret = true; + if (id == HIP_API_ID_ANY) { + for (unsigned i = 0; i < HIP_API_ID_NUMBER; ++i) set_activity(i, fun, arg); + } else if (id < HIP_API_ID_NUMBER) { + cb_sync(id); + callbacks_table_.arr[id].act = fun; + callbacks_table_.arr[id].a_arg = arg; + enabled_ = true; + cb_release(id); + } else { + ret = false; + } + + return ret; + } + + bool set_callback(uint32_t id, fun_t fun, void* arg) { + std::lock_guard lock(mutex_); + bool ret = true; + if (id == HIP_API_ID_ANY) { + for (unsigned i = 0; i < HIP_API_ID_NUMBER; ++i) set_callback(i, fun, arg); + } else if (id < HIP_API_ID_NUMBER) { + cb_sync(id); + callbacks_table_.arr[id].fun = fun; + callbacks_table_.arr[id].arg = arg; + enabled_ = true; + cb_release(id); + } else { + ret = false; + } + + return ret; + } + + void set_enabled(const bool& enabled) { + enabled_ = enabled; + } + + inline hip_cb_table_entry_t& entry(const uint32_t& id) { + return callbacks_table_.arr[id]; + } + + inline void sem_sync(const uint32_t& id) { + sem_increment(id); + if (entry(id).sync.load() == true) sync_wait(id); + } + + inline void sem_release(const uint32_t& id) { + sem_decrement(id); + } + + inline bool is_enabled() const { + return enabled_; + } + + private: + inline void cb_sync(const uint32_t& id) { + entry(id).sync.store(true); + while (entry(id).sem.load() != 0) {} + } + + inline void cb_release(const uint32_t& id) { + entry(id).sync.store(false); + } + + inline void sem_increment(const uint32_t& id) { + const uint32_t prev = entry(id).sem.fetch_add(1); + if (prev == UINT32_MAX) { + std::cerr << "sem overflow id = " << id << std::endl << std::flush; + abort(); + } + } + + inline void sem_decrement(const uint32_t& id) { + const uint32_t prev = entry(id).sem.fetch_sub(1); + if (prev == 0) { + std::cerr << "sem corrupted id = " << id << std::endl << std::flush; + abort(); + } + } + + void sync_wait(const uint32_t& id) { + sem_decrement(id); + while (entry(id).sync.load() == true) {} + sem_increment(id); + } + + mutex_t mutex_; + hip_cb_table_t callbacks_table_; + bool enabled_; +}; + + +#if USE_PROF_API +#include "prof_protocol.h" + +static const uint32_t HIP_DOMAIN_ID = ACTIVITY_DOMAIN_HIP_API; +typedef activity_record_t hip_api_record_t; +typedef activity_rtapi_callback_t hip_api_callback_t; +typedef activity_sync_callback_t hip_act_callback_t; + +// HIP API callbacks spawner object macro +#define HIP_CB_SPAWNER_OBJECT(CB_ID) \ + api_callbacks_spawner_t __api_tracer(HIP_API_ID_##CB_ID); \ + { \ + hip_api_data_t* api_data = __api_tracer.get_api_data_ptr(); \ + if (api_data != NULL) { \ + hip_api_data_t& api_data_ref = *api_data; \ + INIT_CB_ARGS_DATA(CB_ID, api_data_ref); \ + __api_tracer.call(); \ + } \ + } + +typedef api_callbacks_table_templ api_callbacks_table_t; +extern api_callbacks_table_t callbacks_table; + +template +class api_callbacks_spawner_t { + public: + api_callbacks_spawner_t(const hip_api_id_t& cid) : + api_data_(NULL) + { + if (!is_enabled()) return; + + if (cid_ >= HIP_API_ID_NUMBER) { + fprintf(stderr, "HIP %s bad id %d\n", __FUNCTION__, cid_); + abort(); + } + callbacks_table.sem_sync(cid_); + + hip_act_callback_t act = entry(cid_).act; + if (act != NULL) api_data_ = (hip_api_data_t*) act(cid_, NULL, NULL, NULL); + } + + void call() { + hip_api_callback_t fun = entry(cid_).fun; + void* arg = entry(cid_).arg; + if (fun != NULL) fun(HIP_DOMAIN_ID, cid_, api_data_, arg); + } + + ~api_callbacks_spawner_t() { + if (api_data_ == NULL) return; + + hip_act_callback_t act = entry(cid_).act; + void* a_arg = entry(cid_).a_arg; + hip_api_callback_t fun = entry(cid_).fun; + void* arg = entry(cid_).arg; + if (act != NULL) act(cid_, NULL, api_data_, a_arg); + if (fun != NULL) fun(HIP_DOMAIN_ID, cid_, api_data_, arg); + + callbacks_table.sem_release(cid_); + } + + hip_api_data_t* get_api_data_ptr() { + return api_data_; + } + + bool is_enabled() const { + return callbacks_table.is_enabled(); + } + + private: + inline api_callbacks_table_t::hip_cb_table_entry_t& entry(const uint32_t& id) { + return callbacks_table.entry(id); + } + + hip_api_data_t* api_data_; +}; + +template <> +class api_callbacks_spawner_t { + public: + api_callbacks_spawner_t(const hip_api_id_t& cid) {} + void call() {} + hip_api_data_t* get_api_data_ptr() { return NULL; } + bool is_enabled() const { return false; } +}; + +#else + +#define HIP_CB_SPAWNER_OBJECT(x) do {} while(0) + +class api_callbacks_table_t { + public: + typedef void* act_t; + typedef void* fun_t; + bool set_activity(uint32_t id, act_t fun, void* arg) { return false; } + bool set_callback(uint32_t id, fun_t fun, void* arg) { return false; } +}; + +#endif + +#endif // HIP_SRC_HIP_PROF_API_H diff --git a/api/hip/hip_profile.cpp b/api/hip/hip_profile.cpp index 8b3d42cbbc..b8c8f28e94 100644 --- a/api/hip/hip_profile.cpp +++ b/api/hip/hip_profile.cpp @@ -25,7 +25,7 @@ THE SOFTWARE. #include "hip_internal.hpp" hipError_t hipProfilerStart() { - HIP_INIT_API(); + HIP_INIT_API(hipProfilerStart); assert(0 && "Unimplemented"); @@ -34,7 +34,7 @@ hipError_t hipProfilerStart() { hipError_t hipProfilerStop() { - HIP_INIT_API(); + HIP_INIT_API(hipProfilerStop); assert(0 && "Unimplemented"); diff --git a/api/hip/hip_stream.cpp b/api/hip/hip_stream.cpp index ab26b4fc2e..14439ebc40 100644 --- a/api/hip/hip_stream.cpp +++ b/api/hip/hip_stream.cpp @@ -120,19 +120,19 @@ static hipError_t ihipStreamCreate(hipStream_t *stream, unsigned int flags, amd: } hipError_t hipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags) { - HIP_INIT_API(stream, flags); + HIP_INIT_API(hipStreamCreateWithFlags, stream, flags); HIP_RETURN(ihipStreamCreate(stream, flags, amd::CommandQueue::Priority::Normal)); } hipError_t hipStreamCreate(hipStream_t *stream) { - HIP_INIT_API(stream); + HIP_INIT_API(hipStreamCreate, stream); HIP_RETURN(ihipStreamCreate(stream, hipStreamDefault, amd::CommandQueue::Priority::Normal)); } hipError_t hipStreamCreateWithPriority(hipStream_t* stream, unsigned int flags, int priority) { - HIP_INIT_API(stream, flags, priority); + HIP_INIT_API(hipStreamCreateWithPriority, stream, flags, priority); if (priority > static_cast(amd::CommandQueue::Priority::High)) { priority = static_cast(amd::CommandQueue::Priority::High); @@ -144,7 +144,7 @@ hipError_t hipStreamCreateWithPriority(hipStream_t* stream, unsigned int flags, } hipError_t hipDeviceGetStreamPriorityRange(int* leastPriority, int* greatestPriority) { - HIP_INIT_API(leastPriority, greatestPriority); + HIP_INIT_API(hipDeviceGetStreamPriorityRange, leastPriority, greatestPriority); if (leastPriority != nullptr) { *leastPriority = static_cast(amd::CommandQueue::Priority::Normal); @@ -157,7 +157,7 @@ hipError_t hipDeviceGetStreamPriorityRange(int* leastPriority, int* greatestPrio } hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags) { - HIP_INIT_API(stream, flags); + HIP_INIT_API(hipStreamGetFlags, stream, flags); hip::Stream* hStream = reinterpret_cast(stream); @@ -171,7 +171,7 @@ hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags) { } hipError_t hipStreamSynchronize(hipStream_t stream) { - HIP_INIT_API(stream); + HIP_INIT_API(hipStreamSynchronize, stream); amd::HostQueue* hostQueue = hip::getQueue(stream); hostQueue->finish(); @@ -180,7 +180,7 @@ hipError_t hipStreamSynchronize(hipStream_t stream) { } hipError_t hipStreamDestroy(hipStream_t stream) { - HIP_INIT_API(stream); + HIP_INIT_API(hipStreamDestroy, stream); if (stream == nullptr) { HIP_RETURN(hipErrorInvalidResourceHandle); @@ -199,7 +199,7 @@ hipError_t hipStreamDestroy(hipStream_t stream) { } hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags) { - HIP_INIT_API(stream, event, flags); + HIP_INIT_API(hipStreamWaitEvent, stream, event, flags); amd::HostQueue* queue; @@ -219,7 +219,7 @@ hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int } hipError_t hipStreamQuery(hipStream_t stream) { - HIP_INIT_API(stream); + HIP_INIT_API(hipStreamQuery, stream); amd::HostQueue* hostQueue; if (stream == nullptr) { @@ -242,7 +242,7 @@ hipError_t hipStreamQuery(hipStream_t stream) { hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, void* userData, unsigned int flags) { - HIP_INIT_API(stream, callback, userData, flags); + HIP_INIT_API(hipStreamAddCallback, stream, callback, userData, flags); amd::HostQueue* hostQueue = reinterpret_cast (stream)->asHostQueue(); diff --git a/api/hip/hip_surface.cpp b/api/hip/hip_surface.cpp index 2323209cae..42741add7a 100644 --- a/api/hip/hip_surface.cpp +++ b/api/hip/hip_surface.cpp @@ -47,7 +47,7 @@ using namespace hip; hipError_t hipCreateSurfaceObject(hipSurfaceObject_t* pSurfObject, const hipResourceDesc* pResDesc) { - HIP_INIT_API(pSurfObject, pResDesc); + HIP_INIT_API(NONE, pSurfObject, pResDesc); hipSurface* pSurface = new hipSurface(pResDesc); assert(pSurface != nullptr); @@ -82,7 +82,7 @@ hipError_t hipCreateSurfaceObject(hipSurfaceObject_t* pSurfObject, hipError_t hipDestroySurfaceObject(hipSurfaceObject_t surfaceObject) { - HIP_INIT_API(surfaceObject); + HIP_INIT_API(NONE, surfaceObject); amd::ScopedLock lock(surfaceLock); hipSurface* pSurface = surfaceHash[surfaceObject]; diff --git a/api/hip/hip_texture.cpp b/api/hip/hip_texture.cpp index 5809a75200..28328932c3 100644 --- a/api/hip/hip_texture.cpp +++ b/api/hip/hip_texture.cpp @@ -215,7 +215,7 @@ hip::TextureObject* ihipCreateTextureObject(const hipResourceDesc& resDesc, amd: hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, const hipResourceDesc* pResDesc, const hipTextureDesc* pTexDesc, const hipResourceViewDesc* pResViewDesc) { - HIP_INIT_API(pTexObject, pResDesc, pTexDesc, pResViewDesc); + HIP_INIT_API(NONE, pTexObject, pResDesc, pTexDesc, pResViewDesc); amd::Device* device = hip::getCurrentContext()->devices()[0]; @@ -307,7 +307,7 @@ void ihipDestroyTextureObject(hip::TextureObject* texture) { } hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject) { - HIP_INIT_API(textureObject); + HIP_INIT_API(NONE, textureObject); hip::TextureObject* texture = reinterpret_cast(textureObject); @@ -318,7 +318,7 @@ hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject) { hipError_t hipGetTextureObjectResourceDesc(hipResourceDesc* pResDesc, hipTextureObject_t textureObject) { - HIP_INIT_API(pResDesc, textureObject); + HIP_INIT_API(NONE, pResDesc, textureObject); hip::TextureObject* texture = reinterpret_cast(textureObject); @@ -331,7 +331,7 @@ hipError_t hipGetTextureObjectResourceDesc(hipResourceDesc* pResDesc, hipError_t hipGetTextureObjectResourceViewDesc(hipResourceViewDesc* pResViewDesc, hipTextureObject_t textureObject) { - HIP_INIT_API(pResViewDesc, textureObject); + HIP_INIT_API(NONE, pResViewDesc, textureObject); assert(0 && "Unimplemented"); @@ -340,7 +340,7 @@ hipError_t hipGetTextureObjectResourceViewDesc(hipResourceViewDesc* pResViewDesc hipError_t hipGetTextureObjectTextureDesc(hipTextureDesc* pTexDesc, hipTextureObject_t textureObject) { - HIP_INIT_API(pTexDesc, textureObject); + HIP_INIT_API(NONE, pTexDesc, textureObject); assert(0 && "Unimplemented"); @@ -416,7 +416,7 @@ hipError_t ihipBindTexture(cl_mem_object_type type, hipError_t hipBindTexture(size_t* offset, textureReference* tex, const void* devPtr, const hipChannelFormatDesc* desc, size_t size) { - HIP_INIT_API(offset, tex, devPtr, desc, size); + HIP_INIT_API(NONE, offset, tex, devPtr, desc, size); if (desc == nullptr) { HIP_RETURN(hipErrorInvalidValue); @@ -432,14 +432,14 @@ hipError_t hipBindTexture(size_t* offset, textureReference* tex, const void* dev hipError_t hipBindTexture2D(size_t* offset, textureReference* tex, const void* devPtr, const hipChannelFormatDesc* desc, size_t width, size_t height, size_t pitch) { - HIP_INIT_API(offset, tex, devPtr, desc, width, height, pitch); + HIP_INIT_API(NONE, offset, tex, devPtr, desc, width, height, pitch); HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE2D, offset, tex, devPtr, desc, width, height, pitch)); } hipError_t hipBindTextureToArray(textureReference* tex, hipArray_const_t array, const hipChannelFormatDesc* desc) { - HIP_INIT_API(tex, array, desc); + HIP_INIT_API(NONE, tex, array, desc); assert(0 && "Unimplemented"); @@ -449,7 +449,7 @@ hipError_t hipBindTextureToArray(textureReference* tex, hipArray_const_t array, hipError_t ihipBindTextureImpl(TlsData* tls, int dim, enum hipTextureReadMode readMode, size_t* offset, const void* devPtr, const struct hipChannelFormatDesc* desc, size_t size, textureReference* tex) { - HIP_INIT_API(dim, readMode, offset, devPtr, size, tex); + HIP_INIT_API(NONE, dim, readMode, offset, devPtr, size, tex); assert(1 == dim); @@ -460,7 +460,7 @@ hipError_t ihipBindTextureToArrayImpl(TlsData* tls, int dim, enum hipTextureRead hipArray_const_t array, const struct hipChannelFormatDesc& desc, textureReference* tex) { - HIP_INIT_API(dim, readMode, &desc, array, tex); + HIP_INIT_API(NONE, dim, readMode, &desc, array, tex); cl_mem_object_type clType; size_t offset = 0; @@ -483,7 +483,7 @@ hipError_t ihipBindTextureToArrayImpl(TlsData* tls, int dim, enum hipTextureRead hipError_t hipBindTextureToMipmappedArray(textureReference* tex, hipMipmappedArray_const_t mipmappedArray, const hipChannelFormatDesc* desc) { - HIP_INIT_API(tex, mipmappedArray, desc); + HIP_INIT_API(NONE, tex, mipmappedArray, desc); assert(0 && "Unimplemented"); @@ -491,7 +491,7 @@ hipError_t hipBindTextureToMipmappedArray(textureReference* tex, } hipError_t hipUnbindTexture(const textureReference* tex) { - HIP_INIT_API(tex); + HIP_INIT_API(NONE, tex); ihipDestroyTextureObject(reinterpret_cast(tex->textureObject)); @@ -499,7 +499,7 @@ hipError_t hipUnbindTexture(const textureReference* tex) { } hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_const_t array) { - HIP_INIT_API(desc, array); + HIP_INIT_API(NONE, desc, array); if (desc != nullptr) { *desc = array->desc; @@ -509,7 +509,7 @@ hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_const_t array) } hipError_t hipGetTextureAlignmentOffset(size_t* offset, const textureReference* tex) { - HIP_INIT_API(offset, tex); + HIP_INIT_API(NONE, offset, tex); if ((offset == nullptr) || (tex == nullptr)) { HIP_RETURN(hipErrorInvalidValue); @@ -521,7 +521,7 @@ hipError_t hipGetTextureAlignmentOffset(size_t* offset, const textureReference* } hipError_t hipGetTextureReference(const textureReference** tex, const void* symbol) { - HIP_INIT_API(tex, symbol); + HIP_INIT_API(NONE, tex, symbol); assert(0 && "Unimplemented"); @@ -529,7 +529,7 @@ hipError_t hipGetTextureReference(const textureReference** tex, const void* symb } hipError_t hipTexRefSetFormat(textureReference* tex, hipArray_Format fmt, int NumPackedComponents) { - HIP_INIT_API(tex, fmt, NumPackedComponents); + HIP_INIT_API(NONE, tex, fmt, NumPackedComponents); if (tex == nullptr) { HIP_RETURN(hipErrorInvalidImage); @@ -542,7 +542,7 @@ hipError_t hipTexRefSetFormat(textureReference* tex, hipArray_Format fmt, int Nu } hipError_t hipTexRefSetFlags(textureReference* tex, unsigned int flags) { - HIP_INIT_API(tex, flags); + HIP_INIT_API(NONE, tex, flags); if (tex == nullptr) { HIP_RETURN(hipErrorInvalidImage); @@ -554,7 +554,7 @@ hipError_t hipTexRefSetFlags(textureReference* tex, unsigned int flags) { } hipError_t hipTexRefSetFilterMode(textureReference* tex, hipTextureFilterMode fm) { - HIP_INIT_API(tex, fm); + HIP_INIT_API(NONE, tex, fm); if (tex == nullptr) { HIP_RETURN(hipErrorInvalidImage); @@ -566,7 +566,7 @@ hipError_t hipTexRefSetFilterMode(textureReference* tex, hipTextureFilterMode fm } hipError_t hipTexRefGetAddressMode(hipTextureAddressMode* am, textureReference tex, int dim) { - HIP_INIT_API(am, &tex, dim); + HIP_INIT_API(NONE, am, &tex, dim); if ((am == nullptr) || (dim >= 3)) { HIP_RETURN(hipErrorInvalidValue); @@ -578,7 +578,7 @@ hipError_t hipTexRefGetAddressMode(hipTextureAddressMode* am, textureReference t } hipError_t hipTexRefSetAddressMode(textureReference* tex, int dim, hipTextureAddressMode am) { - HIP_INIT_API(tex, dim, am); + HIP_INIT_API(NONE, tex, dim, am); if (tex == nullptr) { HIP_RETURN(hipErrorInvalidImage); @@ -590,7 +590,7 @@ hipError_t hipTexRefSetAddressMode(textureReference* tex, int dim, hipTextureAdd } hipError_t hipTexRefGetArray(hipArray_t* array, textureReference tex) { - HIP_INIT_API(array, &tex); + HIP_INIT_API(NONE, array, &tex); hip::TextureObject* texture = nullptr; @@ -613,7 +613,7 @@ hipError_t hipTexRefGetArray(hipArray_t* array, textureReference tex) { } hipError_t hipTexRefSetArray(textureReference* tex, hipArray_const_t array, unsigned int flags) { - HIP_INIT_API(tex, array, flags); + HIP_INIT_API(NONE, tex, array, flags); size_t offset = 0; @@ -626,7 +626,7 @@ hipError_t hipTexRefSetArray(textureReference* tex, hipArray_const_t array, unsi } hipError_t hipTexRefGetAddress(hipDeviceptr_t* dev_ptr, textureReference tex) { - HIP_INIT_API(dev_ptr, &tex); + HIP_INIT_API(NONE, dev_ptr, &tex); hip::TextureObject* texture = nullptr; device::Memory* dev_mem = nullptr; @@ -648,7 +648,7 @@ hipError_t hipTexRefGetAddress(hipDeviceptr_t* dev_ptr, textureReference tex) { hipError_t hipTexRefSetAddress(size_t* offset, textureReference* tex, hipDeviceptr_t devPtr, size_t size) { - HIP_INIT_API(offset, tex, devPtr, size); + HIP_INIT_API(NONE, offset, tex, devPtr, size); if (tex == nullptr) { HIP_RETURN(hipErrorInvalidImage); @@ -664,7 +664,7 @@ hipError_t hipTexRefSetAddress(size_t* offset, textureReference* tex, hipDevicep hipError_t hipTexRefSetAddress2D(textureReference* tex, const HIP_ARRAY_DESCRIPTOR* desc, hipDeviceptr_t devPtr, size_t pitch) { - HIP_INIT_API(tex, desc, devPtr, pitch); + HIP_INIT_API(NONE, tex, desc, devPtr, pitch); if (desc == nullptr) { HIP_RETURN(hipErrorInvalidValue); diff --git a/api/hip/prof_protocol.h b/api/hip/prof_protocol.h new file mode 100644 index 0000000000..f058968ca3 --- /dev/null +++ b/api/hip/prof_protocol.h @@ -0,0 +1,90 @@ +/* +Copyright (c) 2019 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef INC_EXT_PROF_PROTOCOL_H_ +#define INC_EXT_PROF_PROTOCOL_H_ + +#include + +// Traced API domains +typedef enum { + ACTIVITY_DOMAIN_HSA_API = 0, // HSA API domain + ACTIVITY_DOMAIN_HSA_OPS = 1, // HSA async activity domain + ACTIVITY_DOMAIN_HCC_OPS = 2, // HCC async activity domain + ACTIVITY_DOMAIN_HIP_API = 3, // HIP API domain + ACTIVITY_DOMAIN_EXT_API = 4, // External ID domain + ACTIVITY_DOMAIN_ROCTX = 5, // ROCTX domain + ACTIVITY_DOMAIN_NUMBER +} activity_domain_t; + +// Extension API opcodes +typedef enum { + ACTIVITY_EXT_OP_MARK = 0, + ACTIVITY_EXT_OP_EXTERN_ID = 1 +} activity_ext_op_t; + +// API calback type +typedef void (*activity_rtapi_callback_t)(uint32_t domain, uint32_t cid, const void* data, void* arg); +typedef uint32_t activity_kind_t; +typedef uint32_t activity_op_t; + +// API callback phase +typedef enum { + ACTIVITY_API_PHASE_ENTER = 0, + ACTIVITY_API_PHASE_EXIT = 1 +} activity_api_phase_t; + +// Trace record types +// Correlation id +typedef uint64_t activity_correlation_id_t; + +// Activity record type +struct activity_record_t { + uint32_t domain; // activity domain id + activity_kind_t kind; // activity kind + activity_op_t op; // activity op + activity_correlation_id_t correlation_id; // activity ID + uint64_t begin_ns; // host begin timestamp + uint64_t end_ns; // host end timestamp + union { + struct { + int device_id; // device id + uint64_t queue_id; // queue id + }; + struct { + uint32_t process_id; // device id + uint32_t thread_id; // thread id + }; + struct { + activity_correlation_id_t external_id; // external correlatino id + }; + }; + size_t bytes; // data size bytes +}; + +// Activity sync calback type +typedef void* (*activity_sync_callback_t)(uint32_t cid, activity_record_t* record, const void* data, void* arg); +// Activity async calback type +typedef void (*activity_id_callback_t)(activity_correlation_id_t id); +typedef void (*activity_async_callback_t)(uint32_t op, void* record, void* arg); + +#endif // INC_EXT_PROF_PROTOCOL_H_ From 3990498a1ea844f55c7a2b6f3758ddf15513d7cc Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 8 Oct 2019 12:46:05 -0400 Subject: [PATCH 223/282] P4 to Git Change 2009953 by eshcherb@evgeny-hip on 2019/10/08 12:36:36 SWDEV-197287 - HIP tracing layer instrumentation - Windows link issues. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_intercept.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_prof_api.h#2 edit --- api/hip/hip_intercept.cpp | 2 +- api/hip/hip_prof_api.h | 58 ++++++++++++++++++--------------------- 2 files changed, 27 insertions(+), 33 deletions(-) diff --git a/api/hip/hip_intercept.cpp b/api/hip/hip_intercept.cpp index b6aed14701..f4936c065d 100644 --- a/api/hip/hip_intercept.cpp +++ b/api/hip/hip_intercept.cpp @@ -27,7 +27,7 @@ THE SOFTWARE. api_callbacks_table_t callbacks_table; -extern std::string& FunctionName(const hipFunction_t f); +extern const std::string& FunctionName(const hipFunction_t f); const char* hipKernelNameRef(const hipFunction_t f) { return FunctionName(f).c_str(); } hipError_t hipRegisterApiCallback(uint32_t id, void* fun, void* arg) { diff --git a/api/hip/hip_prof_api.h b/api/hip/hip_prof_api.h index 32679f403a..f0cc5881c4 100644 --- a/api/hip/hip_prof_api.h +++ b/api/hip/hip_prof_api.h @@ -27,16 +27,34 @@ THE SOFTWARE. #include #include +#if USE_PROF_API #include "hip/hcc_detail/hip_prof_str.h" +#include "prof_protocol.h" -template -class api_callbacks_table_templ { +// HIP API callbacks spawner object macro +#define HIP_CB_SPAWNER_OBJECT(CB_ID) \ + api_callbacks_spawner_t __api_tracer; \ + { \ + hip_api_data_t* api_data = __api_tracer.get_api_data_ptr(); \ + if (api_data != NULL) { \ + hip_api_data_t& api_data_ref = *api_data; \ + INIT_CB_ARGS_DATA(CB_ID, api_data_ref); \ + __api_tracer.call(); \ + } \ + } + +static const uint32_t HIP_DOMAIN_ID = ACTIVITY_DOMAIN_HIP_API; +typedef activity_record_t hip_api_record_t; +typedef activity_rtapi_callback_t hip_api_callback_t; +typedef activity_sync_callback_t hip_act_callback_t; + +class api_callbacks_table_t { public: typedef std::recursive_mutex mutex_t; - typedef Record record_t; - typedef Fun fun_t; - typedef Act act_t; + typedef hip_api_record_t record_t; + typedef hip_api_callback_t fun_t; + typedef hip_act_callback_t act_t; // HIP API callbacks table struct hip_cb_table_entry_t { @@ -52,7 +70,7 @@ class api_callbacks_table_templ { hip_cb_table_entry_t arr[HIP_API_ID_NUMBER]; }; - api_callbacks_table_templ() { + api_callbacks_table_t() { memset(&callbacks_table_, 0, sizeof(callbacks_table_)); } @@ -150,36 +168,12 @@ class api_callbacks_table_templ { bool enabled_; }; - -#if USE_PROF_API -#include "prof_protocol.h" - -static const uint32_t HIP_DOMAIN_ID = ACTIVITY_DOMAIN_HIP_API; -typedef activity_record_t hip_api_record_t; -typedef activity_rtapi_callback_t hip_api_callback_t; -typedef activity_sync_callback_t hip_act_callback_t; - -// HIP API callbacks spawner object macro -#define HIP_CB_SPAWNER_OBJECT(CB_ID) \ - api_callbacks_spawner_t __api_tracer(HIP_API_ID_##CB_ID); \ - { \ - hip_api_data_t* api_data = __api_tracer.get_api_data_ptr(); \ - if (api_data != NULL) { \ - hip_api_data_t& api_data_ref = *api_data; \ - INIT_CB_ARGS_DATA(CB_ID, api_data_ref); \ - __api_tracer.call(); \ - } \ - } - -typedef api_callbacks_table_templ api_callbacks_table_t; extern api_callbacks_table_t callbacks_table; template class api_callbacks_spawner_t { public: - api_callbacks_spawner_t(const hip_api_id_t& cid) : + api_callbacks_spawner_t() : api_data_(NULL) { if (!is_enabled()) return; @@ -232,7 +226,7 @@ class api_callbacks_spawner_t { template <> class api_callbacks_spawner_t { public: - api_callbacks_spawner_t(const hip_api_id_t& cid) {} + api_callbacks_spawner_t() {} void call() {} hip_api_data_t* get_api_data_ptr() { return NULL; } bool is_enabled() const { return false; } From e455ddb5b373fe0aa0ce277a2618f6eae59f88d6 Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 8 Oct 2019 14:55:27 -0400 Subject: [PATCH 224/282] P4 to Git Change 2010058 by kjayapra@0_HIPWS_LNX1_ROCM on 2019/10/08 14:44:53 SWDEV-144570 - Implementation of APIs hipMemsetD8Async, hipMemAllocPitch, hipMemAllocHost and template for hipMemsetD16, hipMemsetD16Async. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.def.in#30 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.map.in#29 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#75 edit --- api/hip/hip_hcc.def.in | 5 ++++ api/hip/hip_hcc.map.in | 5 ++++ api/hip/hip_memory.cpp | 52 +++++++++++++++++++++++++++++++++++++----- 3 files changed, 56 insertions(+), 6 deletions(-) diff --git a/api/hip/hip_hcc.def.in b/api/hip/hip_hcc.def.in index 70eaa66870..45aa1503cc 100644 --- a/api/hip/hip_hcc.def.in +++ b/api/hip/hip_hcc.def.in @@ -60,6 +60,7 @@ hipGetDeviceProperties hipGetErrorName hipGetErrorString hipGetLastError +hipMemAllocHost hipHostAlloc hipHostFree hipHostGetDevicePointer @@ -77,6 +78,7 @@ hipMalloc3DArray hipArrayCreate hipArray3DCreate hipMallocArray +hipMemAllocPitch hipMallocPitch hipMemcpy hipMemcpyParam2D @@ -105,9 +107,12 @@ hipGetSymbolSize hipMemGetInfo hipMemPtrGetInfo hipMemset +hipMemsetD16 hipMemsetD32 hipMemset2D hipMemsetAsync +hipMemsetD8Async +hipMemsetD16Async hipMemsetD32Async hipMemset2DAsync hipMemsetD8 diff --git a/api/hip/hip_hcc.map.in b/api/hip/hip_hcc.map.in index 217164cafb..8535ab72f1 100644 --- a/api/hip/hip_hcc.map.in +++ b/api/hip/hip_hcc.map.in @@ -61,6 +61,7 @@ global: hipGetErrorName; hipGetErrorString; hipGetLastError; + hipMemAllocHost; hipHostAlloc; hipHostFree; hipHostGetDevicePointer; @@ -79,6 +80,7 @@ global: hipArray3DCreate; hipMallocArray; hipMallocPitch; + hipMemAllocPitch; hipMemcpy; hipMemcpyParam2D; hipMemcpy2D; @@ -106,9 +108,12 @@ global: hipMemGetInfo; hipMemPtrGetInfo; hipMemset; + hipMemsetD16; hipMemsetD32; hipMemset2D; hipMemsetAsync; + hipMemsetD8Async; + hipMemsetD16Async; hipMemsetD32Async; hipMemset2DAsync; hipMemsetD8; diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 5abfb7e365..174bd14e5b 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -269,6 +269,14 @@ hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t st HIP_RETURN(ihipMemset(dst, value, sizeof(char), sizeBytes, stream, true)); } +hipError_t hipMemsetD16Async(hipDeviceptr_t dst, unsigned short value, size_t count, hipStream_t stream){ + HIP_INIT_API(hipMemsetD16Async, dst, value, count, stream); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorUnknown); +} + hipError_t hipMemsetD32Async(hipDeviceptr_t dst, int value, size_t count, hipStream_t stream) { HIP_INIT_API(hipMemsetD32Async, dst, value, count, stream); @@ -281,6 +289,14 @@ hipError_t hipMemset(void* dst, int value, size_t sizeBytes) { HIP_RETURN(ihipMemset(dst, value, sizeof(char), sizeBytes, nullptr)); } +hipError_t hipMemsetD16(hipDeviceptr_t dst, unsigned short value, size_t count){ + HIP_INIT_API(hipMemsetD16, dst, value, count); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorUnknown); +} + hipError_t hipMemsetD32(hipDeviceptr_t dst, int value, size_t count) { HIP_INIT_API(hipMemsetD32, dst, value, count); @@ -1341,16 +1357,14 @@ hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value, HIP_RETURN(ihipMemset2D(dst, pitch, value, width, height, *queue, true)); } -hipError_t hipMemsetD8(hipDeviceptr_t dst, unsigned char value, size_t sizeBytes) { - HIP_INIT_API(hipMemsetD8, dst, value, sizeBytes); +hipError_t ihipMemsetD8(hipDeviceptr_t dst, unsigned char value, size_t sizeBytes, hipStream_t stream) { if (dst == nullptr) { - HIP_RETURN(hipErrorInvalidValue); + return hipErrorInvalidValue; } - hip::syncStreams(); - amd::HostQueue* queue = hip::getNullStream(); size_t offset = 0; + amd::HostQueue* queue = hip::getQueue(stream); amd::Command::EventWaitList waitList; amd::Memory* memory = getMemoryObject(dst, offset); if (memory != nullptr) { @@ -1373,7 +1387,33 @@ hipError_t hipMemsetD8(hipDeviceptr_t dst, unsigned char value, size_t sizeBytes memset(dst, value, sizeBytes); } - HIP_RETURN(hipSuccess); + return hipSuccess; +} + +hipError_t hipMemsetD8(hipDeviceptr_t dst, unsigned char value, size_t sizeBytes) { + HIP_INIT_API(hipMemsetD8, dst, value, sizeBytes); + + HIP_RETURN(ihipMemsetD8(dst, value, sizeBytes, nullptr)); +} + +hipError_t hipMemsetD8Async(hipDeviceptr_t dst, unsigned char value, size_t sizeBytes, + hipStream_t stream) { + HIP_INIT_API(hipMemsetD8Async, dst, value, sizeBytes, stream); + + HIP_RETURN(ihipMemsetD8(dst, value, sizeBytes, stream)); +} + +hipError_t hipMemAllocPitch(hipDeviceptr_t* dptr, size_t* pitch, size_t widthInBytes, + size_t height, unsigned int elementSizeBytes) { + HIP_INIT_API(hipMemAllocPitch, dptr, pitch, widthInBytes, height, elementSizeBytes); + + HIP_RETURN(hipMallocPitch(dptr, pitch, widthInBytes, height)); +} + +hipError_t hipMemAllocHost(void** ptr, size_t size) { + HIP_INIT_API(hipMemAllocHost, ptr, size); + + HIP_RETURN(hipHostMalloc(ptr, size, 0)); } hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* dev_ptr) { From 980623d5e14dae466bbd0b3b4a5c0b55d9a42cad Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 10 Oct 2019 20:23:33 -0400 Subject: [PATCH 225/282] P4 to Git Change 2011990 by eshcherb@evgeny-hip on 2019/10/10 20:09:32 SWDEV-197287 - HIP tracing layer: any-api-id removing Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_prof_api.h#3 edit ... //depot/stg/opencl/drivers/opencl/hip_prof_gen.py#3 edit --- api/hip/hip_prof_api.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/api/hip/hip_prof_api.h b/api/hip/hip_prof_api.h index f0cc5881c4..bfea1a6859 100644 --- a/api/hip/hip_prof_api.h +++ b/api/hip/hip_prof_api.h @@ -77,9 +77,8 @@ class api_callbacks_table_t { bool set_activity(uint32_t id, act_t fun, void* arg) { std::lock_guard lock(mutex_); bool ret = true; - if (id == HIP_API_ID_ANY) { - for (unsigned i = 0; i < HIP_API_ID_NUMBER; ++i) set_activity(i, fun, arg); - } else if (id < HIP_API_ID_NUMBER) { + + if (id < HIP_API_ID_NUMBER) { cb_sync(id); callbacks_table_.arr[id].act = fun; callbacks_table_.arr[id].a_arg = arg; @@ -95,9 +94,8 @@ class api_callbacks_table_t { bool set_callback(uint32_t id, fun_t fun, void* arg) { std::lock_guard lock(mutex_); bool ret = true; - if (id == HIP_API_ID_ANY) { - for (unsigned i = 0; i < HIP_API_ID_NUMBER; ++i) set_callback(i, fun, arg); - } else if (id < HIP_API_ID_NUMBER) { + + if (id < HIP_API_ID_NUMBER) { cb_sync(id); callbacks_table_.arr[id].fun = fun; callbacks_table_.arr[id].arg = arg; From 001f07ae83c3d165199cadfd16e36bbc4a0b93c6 Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 11 Oct 2019 14:43:24 -0400 Subject: [PATCH 226/282] P4 to Git Change 2012424 by axie@axie_win_opencl_ssd on 2019/10/11 14:32:12 SWDEV-206239 - [hipclang-vdi-rocm][perf]RCCL: finegrain VRAM does not work xGMI may not work. Otherwise, several GPUs and CPU can access the same host memory atomically by API hipExtMallocWithFlags Tests: http://ocltc:8111/viewModification.html?modId=127344&personal=true&init=1&tab=vcsModificationBuilds ReviewBoard: http://ocltc.amd.com/reviews/r/18109/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#76 edit --- api/hip/hip_memory.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 174bd14e5b..750deef6e5 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -207,7 +207,7 @@ hipError_t hipExtMallocWithFlags(void** ptr, size_t sizeBytes, unsigned int flag HIP_RETURN(hipErrorInvalidValue); } - HIP_RETURN(ihipMalloc(ptr, sizeBytes, (flags & hipDeviceMallocFinegrained)? CL_MEM_SVM_FINE_GRAIN_BUFFER : 0)); + HIP_RETURN(ihipMalloc(ptr, sizeBytes, (flags & hipDeviceMallocFinegrained)? CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS: 0)); } hipError_t hipMalloc(void** ptr, size_t sizeBytes) { From 87dbe184aaed2555dd6386f84a65126599dd9cd7 Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 15 Oct 2019 16:43:10 -0500 Subject: [PATCH 227/282] P4 to Git Change 2013937 by kjayapra@1_HIPWS_LNX1_PAL on 2019/10/15 17:40:44 SWDEV-203394 - Setting Array Channel Desc in hipArrayCreate. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#77 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_texture.cpp#22 edit --- api/hip/hip_memory.cpp | 4 ++++ api/hip/hip_texture.cpp | 49 +++++++++++++++++++++++++++++++++++++++-- 2 files changed, 51 insertions(+), 2 deletions(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 750deef6e5..f72531c70f 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -36,6 +36,8 @@ extern void getDrvChannelOrderAndType(const enum hipArray_Format Format, cl_channel_order* channelOrder, cl_channel_type* channelType); +extern void setDescFromChannelType(cl_channel_type channelType, hipChannelFormatDesc* desc); + amd::Memory* getMemoryObject(const void* ptr, size_t& offset) { amd::Memory *memObj = amd::MemObjMap::FindMemObj(ptr); if (memObj != nullptr) { @@ -468,6 +470,8 @@ hipError_t hipArrayCreate(hipArray** array, const HIP_ARRAY_DESCRIPTOR* pAllocat &channelOrder, &channelType); const cl_image_format image_format = { channelOrder, channelType }; + setDescFromChannelType(channelType, &(array[0]->desc)); + size_t pitch = 0; hipError_t status = ihipMallocPitch(ptr, &pitch, array[0]->width, array[0]->height, 1, CL_MEM_OBJECT_IMAGE2D, &image_format); diff --git a/api/hip/hip_texture.cpp b/api/hip/hip_texture.cpp index 28328932c3..b7e6293b95 100644 --- a/api/hip/hip_texture.cpp +++ b/api/hip/hip_texture.cpp @@ -76,6 +76,51 @@ void getDrvChannelOrderAndType(const enum hipArray_Format Format, unsigned int N } } +void setDescFromChannelType(cl_channel_type channelType, hipChannelFormatDesc* desc) { + + memset(desc, 0x00, sizeof(hipChannelFormatDesc)); + + switch (channelType) { + case CL_SIGNED_INT8: + case CL_SIGNED_INT16: + case CL_SIGNED_INT32: + desc->f = hipChannelFormatKindSigned; + break; + case CL_UNSIGNED_INT8: + case CL_UNSIGNED_INT16: + case CL_UNSIGNED_INT32: + desc->f = hipChannelFormatKindUnsigned; + break; + case CL_HALF_FLOAT: + case CL_FLOAT: + desc->f = hipChannelFormatKindFloat; + break; + default: + desc->f = hipChannelFormatKindNone; + break; + } + + switch (channelType) { + case CL_SIGNED_INT8: + case CL_UNSIGNED_INT8: + desc->x = 8; + break; + case CL_SIGNED_INT16: + case CL_UNSIGNED_INT16: + case CL_HALF_FLOAT: + desc->x = 16; + break; + case CL_SIGNED_INT32: + case CL_UNSIGNED_INT32: + case CL_FLOAT: + desc->x = 32; + break; + default: + desc->x = 0; + break; + } +} + void getChannelOrderAndType(const hipChannelFormatDesc& desc, enum hipTextureReadMode readMode, cl_channel_order* channelOrder, cl_channel_type* channelType) { if (desc.x != 0 && desc.y != 0 && desc.z != 0 && desc.w != 0) { @@ -621,8 +666,8 @@ hipError_t hipTexRefSetArray(textureReference* tex, hipArray_const_t array, unsi HIP_RETURN(hipErrorInvalidImage); } - HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE1D, &offset, tex, array->data, &array->desc, array->width, - array->height, array->depth)); + HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE2D, &offset, tex, array->data, &array->desc, array->width, + array->height, 0)); } hipError_t hipTexRefGetAddress(hipDeviceptr_t* dev_ptr, textureReference tex) { From 70a52b9cd713c0bf26771d25c702b28f35b4ef9f Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 16 Oct 2019 11:24:09 -0400 Subject: [PATCH 228/282] P4 to Git Change 2014404 by gandryey@gera-win10 on 2019/10/16 11:13:37 SWDEV-184710 - Support hipLaunchCooperativeKernelMultiDevice() - Add support for multi grid launch in hip - Detect the new hidden argument and pass the required information for the kernel launch - Memory for synchronization is allocated as a single object and then the offset for each GPU is found Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#44 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#343 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/devkernel.cpp#25 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/devkernel.hpp#17 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#82 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.cpp#136 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.hpp#42 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.cpp#90 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.hpp#30 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/command.cpp#99 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/command.hpp#97 edit --- api/hip/hip_module.cpp | 48 ++++++++++++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 13 deletions(-) diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index ad637d3967..20a5fbdef1 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -256,27 +256,27 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, uint32_t sharedMemBytes, hipStream_t hStream, void **kernelParams, void **extra, hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags = 0, - uint32_t params = 0) -{ + uint32_t params = 0, uint32_t gridId = 0, uint32_t numGrids = 0, + uint64_t prevGridSum = 0, uint64_t allGridSum = 0, uint32_t firstDevice = 0) { HIP_INIT_API(NONE, f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent, flags, params); hip::Function* function = hip::Function::asFunction(f); amd::Kernel* kernel = function->function_; - amd::Device* device = hip::getCurrentContext()->devices()[0]; amd::ScopedLock lock(function->lock_); hip::Event* eStart = reinterpret_cast(startEvent); hip::Event* eStop = reinterpret_cast(stopEvent); amd::HostQueue* queue = hip::getQueue(hStream); + const amd::Device& device = queue->vdev()->device(); if ((params & amd::NDRangeKernelCommand::CooperativeGroups) && - !device->info().cooperativeGroups_) { + !device.info().cooperativeGroups_) { return hipErrorLaunchFailure; } if ((params & amd::NDRangeKernelCommand::CooperativeMultiDeviceGroups) && - !device->info().cooperativeMultiDeviceGroups_) { + !device.info().cooperativeMultiDeviceGroups_) { return hipErrorLaunchFailure; } if (!queue) { @@ -323,7 +323,8 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, } amd::NDRangeKernelCommand* command = new amd::NDRangeKernelCommand( - *queue, waitList, *kernel, ndrange, sharedMemBytes, params); + *queue, waitList, *kernel, ndrange, sharedMemBytes, + params, gridId, numGrids, prevGridSum, allGridSum, firstDevice); if (!command) { return hipErrorOutOfMemory; } @@ -436,33 +437,54 @@ hipError_t hipLaunchCooperativeKernel(const void* f, hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, int numDevices, unsigned int flags, uint32_t extFlags) { - int currentDevice = ihipGetDevice(); int numActiveGPUs = 0; ihipDeviceGetCount(&numActiveGPUs); if ((numDevices > numActiveGPUs) || (launchParamsList == nullptr)) { return hipErrorInvalidValue; } - + hipError_t result = hipErrorUnknown; + uint64_t allGridSize = 0; + for (int i = 0; i < numDevices; ++i) { + const hipLaunchParams& launch = launchParamsList[i]; + allGridSize += launch.gridDim.x * launch.gridDim.y * launch.gridDim.z; + } + uint64_t prevGridSize = 0; + uint32_t firstDevice = 0; for (int i = 0; i < numDevices; ++i) { - hipSetDevice(i); const hipLaunchParams& launch = launchParamsList[i]; amd::HostQueue* queue = reinterpret_cast(launch.stream)->asHostQueue(); - hipFunction_t func = PlatformState::instance().getFunc(launch.func, i); + hipFunction_t func = nullptr; + // The order of devices in the launch may not match the order in the global array + for (size_t dev = 0; dev < g_devices.size(); ++dev) { + // Find the matching device and request the kernel function + if (&queue->vdev()->device() == g_devices[dev]->devices()[0]) { + func = PlatformState::instance().getFunc(launch.func, dev); + // Save VDI index of the first device in the launch + if (i == 0) { + firstDevice = queue->vdev()->device().index(); + } + break; + } + } if (func == nullptr) { HIP_RETURN(result); } + result = ihipModuleLaunchKernel(func, launch.gridDim.x * launch.blockDim.x, launch.gridDim.y * launch.blockDim.y, launch.gridDim.z * launch.blockDim.z, launch.blockDim.x, launch.blockDim.y, launch.blockDim.z, - launch.sharedMem, launch.stream, - launch.args, nullptr, nullptr, nullptr, flags, extFlags); + launch.sharedMem, launch.stream, launch.args, nullptr, nullptr, nullptr, + flags, extFlags, i, numDevices, prevGridSize, allGridSize, firstDevice); + if (result != hipSuccess) { + break; + } + prevGridSize += launch.gridDim.x * launch.gridDim.y * launch.gridDim.z; } - hipSetDevice(currentDevice); return result; } From ef14b8b361a76e33a249c695d8c8136576452eed Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 16 Oct 2019 12:03:49 -0400 Subject: [PATCH 229/282] P4 to Git Change 2014458 by jujiang@JJ-HIP on 2019/10/16 11:52:39 SWDEV-205925 - Update HIP texture APIs for issue in hipTexRefSetAddress in HIP/PAL on Windows - Remove the nullptr possibility http://ocltc.amd.com/reviews/r/18121/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_texture.cpp#23 edit --- api/hip/hip_texture.cpp | 35 +++++++++++++---------------------- 1 file changed, 13 insertions(+), 22 deletions(-) diff --git a/api/hip/hip_texture.cpp b/api/hip/hip_texture.cpp index b7e6293b95..5d4e964686 100644 --- a/api/hip/hip_texture.cpp +++ b/api/hip/hip_texture.cpp @@ -394,21 +394,15 @@ hipError_t hipGetTextureObjectTextureDesc(hipTextureDesc* pTexDesc, hipError_t ihipBindTexture(cl_mem_object_type type, size_t* offset, textureReference* tex, const void* devPtr, - const hipChannelFormatDesc* desc, size_t width, size_t height, + const hipChannelFormatDesc& desc, size_t width, size_t height, size_t pitch) { if (tex == nullptr) { return hipErrorInvalidImage; } if (hip::getCurrentContext()) { cl_image_format image_format; - - if (nullptr == desc) { - getDrvChannelOrderAndType(tex->format, tex->numChannels, + getChannelOrderAndType(desc, hipReadModeElementType, &image_format.image_channel_order, &image_format.image_channel_data_type); - } else { - getChannelOrderAndType(*desc, hipReadModeElementType, - &image_format.image_channel_order, &image_format.image_channel_data_type); - } const amd::Image::Format imageFormat(image_format); amd::Memory* memory = getMemoryObject(devPtr, *offset); amd::Image* image = new (*hip::getCurrentContext()) amd::Image(*memory->asBuffer(), @@ -431,17 +425,13 @@ hipError_t ihipBindTexture(cl_mem_object_type type, case CL_MEM_OBJECT_IMAGE1D: resDesc.resType = hipResourceTypeLinear; resDesc.res.linear.devPtr = const_cast(devPtr); - if (nullptr != desc) { - resDesc.res.linear.desc = *desc; - } + resDesc.res.linear.desc = desc; resDesc.res.linear.sizeInBytes = image->getSize(); break; case CL_MEM_OBJECT_IMAGE2D: resDesc.resType = hipResourceTypePitch2D; resDesc.res.pitch2D.devPtr = const_cast(devPtr); - if (nullptr != desc) { - resDesc.res.pitch2D.desc = *desc; - } + resDesc.res.pitch2D.desc = desc; resDesc.res.pitch2D.width = width; resDesc.res.pitch2D.height = height; resDesc.res.pitch2D.pitchInBytes = pitch; @@ -471,7 +461,7 @@ hipError_t hipBindTexture(size_t* offset, textureReference* tex, const void* dev &image_format.image_channel_order, &image_format.image_channel_data_type); const amd::Image::Format imageFormat(image_format); - HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE1D, offset, tex, devPtr, desc, size / imageFormat.getElementSize(), 1, size)); + HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE1D, offset, tex, devPtr, *desc, size / imageFormat.getElementSize(), 1, size)); } hipError_t hipBindTexture2D(size_t* offset, textureReference* tex, const void* devPtr, @@ -479,7 +469,7 @@ hipError_t hipBindTexture2D(size_t* offset, textureReference* tex, const void* d size_t pitch) { HIP_INIT_API(NONE, offset, tex, devPtr, desc, width, height, pitch); - HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE2D, offset, tex, devPtr, desc, width, height, pitch)); + HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE2D, offset, tex, devPtr, *desc, width, height, pitch)); } hipError_t hipBindTextureToArray(textureReference* tex, hipArray_const_t array, @@ -498,7 +488,7 @@ hipError_t ihipBindTextureImpl(TlsData* tls, int dim, enum hipTextureReadMode re assert(1 == dim); - HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE1D, offset, tex, devPtr, desc, size, 1, 0)); + HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE1D, offset, tex, devPtr, *desc, size, 1, 0)); } hipError_t ihipBindTextureToArrayImpl(TlsData* tls, int dim, enum hipTextureReadMode readMode, @@ -521,7 +511,7 @@ hipError_t ihipBindTextureToArrayImpl(TlsData* tls, int dim, enum hipTextureRead HIP_RETURN(hipErrorInvalidValue); } - HIP_RETURN(ihipBindTexture(clType, &offset, tex, array->data, &desc, array->width, + HIP_RETURN(ihipBindTexture(clType, &offset, tex, array->data, desc, array->width, array->height, array->depth)); } @@ -666,7 +656,7 @@ hipError_t hipTexRefSetArray(textureReference* tex, hipArray_const_t array, unsi HIP_RETURN(hipErrorInvalidImage); } - HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE2D, &offset, tex, array->data, &array->desc, array->width, + HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE2D, &offset, tex, array->data, array->desc, array->width, array->height, 0)); } @@ -703,8 +693,8 @@ hipError_t hipTexRefSetAddress(size_t* offset, textureReference* tex, hipDevicep getDrvChannelOrderAndType(tex->format, tex->numChannels, &image_format.image_channel_order, &image_format.image_channel_data_type); const amd::Image::Format imageFormat(image_format); - - HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE1D, offset, tex, devPtr, nullptr, size / imageFormat.getElementSize(), 1, size)); + hipChannelFormatDesc channelDesc = hipCreateChannelDesc(); + HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE1D, offset, tex, devPtr, channelDesc, size / imageFormat.getElementSize(), 1, size)); } hipError_t hipTexRefSetAddress2D(textureReference* tex, const HIP_ARRAY_DESCRIPTOR* desc, @@ -716,5 +706,6 @@ hipError_t hipTexRefSetAddress2D(textureReference* tex, const HIP_ARRAY_DESCRIPT } size_t offset; - HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE2D, &offset, tex, devPtr, nullptr, desc->Width, desc->Height, pitch)); + hipChannelFormatDesc channelDesc = hipCreateChannelDesc(); + HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE2D, &offset, tex, devPtr, channelDesc, desc->Width, desc->Height, pitch)); } From d43f0eedcc9249506ef7e4dc7c1febd1bca61346 Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 16 Oct 2019 17:47:55 -0400 Subject: [PATCH 230/282] P4 to Git Change 2014744 by vsytchen@vsytchen-ocl-win10-2 on 2019/10/16 17:37:46 SWDEV-207449 - [HIP CQE][HIPonPAL][LNX][QR] 6 hiptests failed on all ASICs hipTestHalf fails to build on Windows due to linker error "unresolved external symbol __gnu_h2f_ieee" 1. Expose __gnu_h2f_ieee() and __gnu_f2h_ieee() for Windows builds. ReviewBoardURL = http://ocltc.amd.com/reviews/r/18127/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.def.in#31 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#45 edit --- api/hip/hip_hcc.def.in | 2 ++ api/hip/hip_platform.cpp | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/api/hip/hip_hcc.def.in b/api/hip/hip_hcc.def.in index 45aa1503cc..9df5287092 100644 --- a/api/hip/hip_hcc.def.in +++ b/api/hip/hip_hcc.def.in @@ -153,6 +153,8 @@ __hipRegisterFatBinary __hipRegisterFunction __hipRegisterVar __hipUnregisterFatBinary +__gnu_h2f_ieee +__gnu_f2h_ieee hipConfigureCall hipSetupArgument hipLaunchByPtr diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index b6ea5070e2..9773251bb0 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -869,6 +869,8 @@ void hipLaunchCooperativeKernelGGLImpl( } +#endif // defined(ATI_OS_LINUX) + // conversion routines between float and half precision static inline std::uint32_t f32_as_u32(float f) { union { float f; std::uint32_t u; } v; v.f = f; return v.u; } static inline float u32_as_f32(std::uint32_t u) { union { float f; std::uint32_t u; } v; v.u = u; return v.f; } @@ -908,5 +910,3 @@ extern "C" float __gnu_h2f_ieee(unsigned short h){ extern "C" unsigned short __gnu_f2h_ieee(float f){ return (unsigned short)__convert_float_to_half(f); } - -#endif // defined(ATI_OS_LINUX) From 238a71c4ca6d77fcaba37c1b13b17c1b844d2cfe Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 17 Oct 2019 14:09:46 -0400 Subject: [PATCH 231/282] P4 to Git Change 2015320 by vsytchen@vsytchen-hip-win10 on 2019/10/17 14:00:39 SWDEV-207100 - [HIP CQE][HIPonPAL][WIN][QR] 5 hiptests failed in 19H1 Windows on all ASICs 1. Reshuffle locations of the hipMemset functions to make them all next to each other. 2. Update the declarations of hipMemsetD8, hipMemsetD8Async, hipMemsetD16, hipMemsetD16Async. These functions are type aware and take in as their third argument the number of elements in the buffer, not the buffer size. Change the name of this argument from sizeBytes to count to align with the above description. Changes for the header are tracked here https://github.com/ROCm-Developer-Tools/HIP/pull/1544 3. Add the actual implementation of hipMemsetD8, hipMemsetD8Async, hipMemsetD16, hipMemsetD16Async. 4. Remove ihipMemset2D() as it is essentially a copy of ihipMemset(). Change hipMemset2D()/hipMemset2DAsync() to use ihipMemset(). 5. Implement hipMemset3DAsync(). 6. Update the test script to pick up the updated command line options for hipMemset and hipMemset3D. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.def.in#32 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.map.in#30 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#78 edit ... //depot/stg/opencl/drivers/opencl/make/hip.git/tests/scripts/hip_runtimeapi_tests.txt#13 edit --- api/hip/hip_hcc.def.in | 15 +-- api/hip/hip_hcc.map.in | 15 +-- api/hip/hip_memory.cpp | 216 ++++++++++++++--------------------------- 3 files changed, 89 insertions(+), 157 deletions(-) diff --git a/api/hip/hip_hcc.def.in b/api/hip/hip_hcc.def.in index 9df5287092..4d95549d3d 100644 --- a/api/hip/hip_hcc.def.in +++ b/api/hip/hip_hcc.def.in @@ -107,16 +107,17 @@ hipGetSymbolSize hipMemGetInfo hipMemPtrGetInfo hipMemset -hipMemsetD16 -hipMemsetD32 -hipMemset2D hipMemsetAsync -hipMemsetD8Async -hipMemsetD16Async -hipMemsetD32Async -hipMemset2DAsync hipMemsetD8 +hipMemsetD8Async +hipMemsetD16 +hipMemsetD16Async +hipMemsetD32 +hipMemsetD32Async +hipMemset2D +hipMemset2DAsync hipMemset3D +hipMemset3DAsync hipModuleGetFunction hipModuleGetGlobal hipModuleGetTexRef diff --git a/api/hip/hip_hcc.map.in b/api/hip/hip_hcc.map.in index 8535ab72f1..4a28d40b2a 100644 --- a/api/hip/hip_hcc.map.in +++ b/api/hip/hip_hcc.map.in @@ -108,16 +108,17 @@ global: hipMemGetInfo; hipMemPtrGetInfo; hipMemset; - hipMemsetD16; - hipMemsetD32; - hipMemset2D; hipMemsetAsync; - hipMemsetD8Async; - hipMemsetD16Async; - hipMemsetD32Async; - hipMemset2DAsync; hipMemsetD8; + hipMemsetD8Async; + hipMemsetD16; + hipMemsetD16Async; + hipMemsetD32; + hipMemsetD32Async; + hipMemset2D; + hipMemset2DAsync; hipMemset3D; + hipMemset3DAsync; hipModuleGetFunction; hipModuleGetGlobal; hipModuleGetTexRef; diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index f72531c70f..eeeacd0391 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -160,47 +160,6 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin return hipSuccess; } -hipError_t ihipMemset(void* dst, int value, size_t valueSize, size_t sizeBytes, - hipStream_t stream, bool isAsync = false) { - if (sizeBytes == 0) { - // Skip if nothing needs filling. - return hipSuccess; - } - - if (dst == nullptr) { - return hipErrorInvalidValue; - } - - size_t offset = 0; - amd::HostQueue* queue = hip::getQueue(stream); - amd::Memory* memory = getMemoryObject(dst, offset); - - if (memory != nullptr) { - // Device memory - amd::Command::EventWaitList waitList; - amd::Coord3D fillOffset(offset, 0, 0); - amd::Coord3D fillSize(sizeBytes, 1, 1); - amd::FillMemoryCommand* command = - new amd::FillMemoryCommand(*queue, CL_COMMAND_FILL_BUFFER, waitList, *memory->asBuffer(), - &value, valueSize, fillOffset, fillSize); - - if (command == nullptr) { - return hipErrorOutOfMemory; - } - - command->enqueue(); - if (!isAsync) { - command->awaitCompletion(); - } - command->release(); - } else { - // Host alloced memory - memset(dst, value, sizeBytes); - } - - return hipSuccess; -} - hipError_t hipExtMallocWithFlags(void** ptr, size_t sizeBytes, unsigned int flags) { HIP_INIT_API(hipExtMallocWithFlags, ptr, sizeBytes, flags); @@ -265,46 +224,6 @@ hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind HIP_RETURN(ihipMemcpy(dst, src, sizeBytes, kind, *queue)); } -hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t stream) { - HIP_INIT_API(hipMemsetAsync, dst, value, sizeBytes, stream); - - HIP_RETURN(ihipMemset(dst, value, sizeof(char), sizeBytes, stream, true)); -} - -hipError_t hipMemsetD16Async(hipDeviceptr_t dst, unsigned short value, size_t count, hipStream_t stream){ - HIP_INIT_API(hipMemsetD16Async, dst, value, count, stream); - - assert(0 && "Unimplemented"); - - HIP_RETURN(hipErrorUnknown); -} - -hipError_t hipMemsetD32Async(hipDeviceptr_t dst, int value, size_t count, hipStream_t stream) { - HIP_INIT_API(hipMemsetD32Async, dst, value, count, stream); - - HIP_RETURN(ihipMemset(dst, value, sizeof(int), count * sizeof(int), stream, true)); -} - -hipError_t hipMemset(void* dst, int value, size_t sizeBytes) { - HIP_INIT_API(hipMemset, dst, value, sizeBytes); - - HIP_RETURN(ihipMemset(dst, value, sizeof(char), sizeBytes, nullptr)); -} - -hipError_t hipMemsetD16(hipDeviceptr_t dst, unsigned short value, size_t count){ - HIP_INIT_API(hipMemsetD16, dst, value, count); - - assert(0 && "Unimplemented"); - - HIP_RETURN(hipErrorUnknown); -} - -hipError_t hipMemsetD32(hipDeviceptr_t dst, int value, size_t count) { - HIP_INIT_API(hipMemsetD32, dst, value, count); - - HIP_RETURN(ihipMemset(dst, value, sizeof(int), count * sizeof(int), nullptr)); -} - hipError_t hipMemPtrGetInfo(void *ptr, size_t *size) { HIP_INIT_API(hipMemPtrGetInfo, ptr, size); @@ -441,15 +360,6 @@ hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent) { HIP_RETURN(status); } -hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent) { - HIP_INIT_API(hipMemset3D, pitchedDevPtr, value, &extent); - - void *dst = pitchedDevPtr.ptr; - size_t sizeBytes = pitchedDevPtr.pitch * extent.height * extent.depth; - - HIP_RETURN(ihipMemset(dst, value, sizeof(char), sizeBytes, nullptr)); -} - hipError_t hipArrayCreate(hipArray** array, const HIP_ARRAY_DESCRIPTOR* pAllocateArray) { HIP_INIT_API(hipArrayCreate, array, pAllocateArray); @@ -1304,16 +1214,19 @@ hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) { HIP_RETURN(hipSuccess); } -hipError_t ihipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height, - amd::HostQueue& queue, bool isAsync = false) { +hipError_t ihipMemset(void* dst, int value, size_t valueSize, size_t sizeBytes, + hipStream_t stream, bool isAsync = false) { + if (sizeBytes == 0) { + // Skip if nothing needs filling. + return hipSuccess; + } if (dst == nullptr) { return hipErrorInvalidValue; } - size_t sizeBytes = pitch * height; size_t offset = 0; - + amd::HostQueue* queue = hip::getQueue(stream); amd::Memory* memory = getMemoryObject(dst, offset); if (memory != nullptr) { @@ -1321,18 +1234,16 @@ hipError_t ihipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t amd::Command::EventWaitList waitList; amd::Coord3D fillOffset(offset, 0, 0); amd::Coord3D fillSize(sizeBytes, 1, 1); - - // TODO: Byte copies are inefficient. Combine multiple writes inside runtime amd::FillMemoryCommand* command = - new amd::FillMemoryCommand(queue, CL_COMMAND_FILL_BUFFER, waitList, *memory->asBuffer(), - &value, sizeof(char), fillOffset, fillSize); + new amd::FillMemoryCommand(*queue, CL_COMMAND_FILL_BUFFER, waitList, *memory->asBuffer(), + &value, valueSize, fillOffset, fillSize); if (command == nullptr) { return hipErrorOutOfMemory; } command->enqueue(); - if(!isAsync) { + if (!isAsync) { command->awaitCompletion(); } command->release(); @@ -1344,67 +1255,86 @@ hipError_t ihipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t return hipSuccess; } +hipError_t hipMemset(void* dst, int value, size_t sizeBytes) { + HIP_INIT_API(hipMemset, dst, value, sizeBytes); + + HIP_RETURN(ihipMemset(dst, value, sizeof(int8_t), sizeBytes, nullptr)); +} + +hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t stream) { + HIP_INIT_API(hipMemsetAsync, dst, value, sizeBytes, stream); + + HIP_RETURN(ihipMemset(dst, value, sizeof(int8_t), sizeBytes, stream, true)); +} + +hipError_t hipMemsetD8(hipDeviceptr_t dst, unsigned char value, size_t count) { + HIP_INIT_API(hipMemsetD8, dst, value, count); + + HIP_RETURN(ihipMemset(dst, value, sizeof(int8_t), count * sizeof(int8_t), nullptr)); +} + +hipError_t hipMemsetD8Async(hipDeviceptr_t dst, unsigned char value, size_t count, + hipStream_t stream) { + HIP_INIT_API(hipMemsetD8Async, dst, value, count, stream); + + HIP_RETURN(ihipMemset(dst, value, sizeof(int8_t), count * sizeof(int8_t), stream, true)); +} + +hipError_t hipMemsetD16(hipDeviceptr_t dst, unsigned short value, size_t count) { + HIP_INIT_API(hipMemsetD16, dst, value, count); + + HIP_RETURN(ihipMemset(dst, value, sizeof(int16_t), count * sizeof(int16_t), nullptr)); +} + +hipError_t hipMemsetD16Async(hipDeviceptr_t dst, unsigned short value, size_t count, + hipStream_t stream) { + HIP_INIT_API(hipMemsetD16Async, dst, value, count, stream); + + HIP_RETURN(ihipMemset(dst, value, sizeof(int16_t), count * sizeof(int16_t), stream, true)); +} + +hipError_t hipMemsetD32(hipDeviceptr_t dst, int value, size_t count) { + HIP_INIT_API(hipMemsetD32, dst, value, count); + + HIP_RETURN(ihipMemset(dst, value, sizeof(int32_t), count * sizeof(int32_t), nullptr)); +} + +hipError_t hipMemsetD32Async(hipDeviceptr_t dst, int value, size_t count, + hipStream_t stream) { + HIP_INIT_API(hipMemsetD32Async, dst, value, count, stream); + + HIP_RETURN(ihipMemset(dst, value, sizeof(int32_t), count * sizeof(int32_t), stream, true)); +} + hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height) { HIP_INIT_API(hipMemset2D, dst, pitch, value, width, height); - hip::syncStreams(); - amd::HostQueue* queue = hip::getNullStream(); - HIP_RETURN(ihipMemset2D(dst, pitch, value, width, height, *queue)); + HIP_RETURN(ihipMemset(dst, value, sizeof(int8_t), pitch * height, nullptr)); } hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value, size_t width, size_t height, hipStream_t stream) { HIP_INIT_API(hipMemset2DAsync, dst, pitch, value, width, height, stream); - amd::HostQueue* queue = hip::getQueue(stream); - - HIP_RETURN(ihipMemset2D(dst, pitch, value, width, height, *queue, true)); + HIP_RETURN(ihipMemset(dst, value, sizeof(int8_t), pitch * height, stream, true)); } -hipError_t ihipMemsetD8(hipDeviceptr_t dst, unsigned char value, size_t sizeBytes, hipStream_t stream) { +hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent) { + HIP_INIT_API(hipMemset3D, pitchedDevPtr, value, &extent); - if (dst == nullptr) { - return hipErrorInvalidValue; - } + void *dst = pitchedDevPtr.ptr; + size_t sizeBytes = pitchedDevPtr.pitch * extent.height * extent.depth; - size_t offset = 0; - amd::HostQueue* queue = hip::getQueue(stream); - amd::Command::EventWaitList waitList; - amd::Memory* memory = getMemoryObject(dst, offset); - if (memory != nullptr) { - // Device memory - amd::Coord3D fillOffset(offset, 0, 0); - amd::Coord3D fillSize(sizeBytes, 1, 1); - amd::FillMemoryCommand* command = - new amd::FillMemoryCommand(*queue, CL_COMMAND_FILL_BUFFER, waitList, *memory->asBuffer(), - &value, sizeof(char), fillOffset, fillSize); - - if (command == nullptr) { - HIP_RETURN(hipErrorOutOfMemory); - } - - command->enqueue(); - command->awaitCompletion(); - command->release(); - } else { - // Host alloced memory - memset(dst, value, sizeBytes); - } - - return hipSuccess; + HIP_RETURN(ihipMemset(dst, value, sizeof(int8_t), sizeBytes, nullptr)); } -hipError_t hipMemsetD8(hipDeviceptr_t dst, unsigned char value, size_t sizeBytes) { - HIP_INIT_API(hipMemsetD8, dst, value, sizeBytes); +hipError_t hipMemset3DAsync(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent, hipStream_t stream) { + HIP_INIT_API(hipMemset3DAsync, pitchedDevPtr, value, &extent, stream); - HIP_RETURN(ihipMemsetD8(dst, value, sizeBytes, nullptr)); -} + void *dst = pitchedDevPtr.ptr; + size_t sizeBytes = pitchedDevPtr.pitch * extent.height * extent.depth; -hipError_t hipMemsetD8Async(hipDeviceptr_t dst, unsigned char value, size_t sizeBytes, - hipStream_t stream) { - HIP_INIT_API(hipMemsetD8Async, dst, value, sizeBytes, stream); - - HIP_RETURN(ihipMemsetD8(dst, value, sizeBytes, stream)); + HIP_RETURN(ihipMemset(dst, value, sizeof(int8_t), sizeBytes, stream, true)); } hipError_t hipMemAllocPitch(hipDeviceptr_t* dptr, size_t* pitch, size_t widthInBytes, From 4ec9d181e053a282ac7a07d62c55280edcd0aa63 Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 18 Oct 2019 14:10:57 -0400 Subject: [PATCH 232/282] P4 to Git Change 2016115 by cpaquot@cpaquot-ocl-lc-lnx on 2019/10/18 13:58:30 SWDEV-207366 - [HIP] 'hipErrorInvalidValue' (1011) with hipMemcpy3D We need to divide by sizeByte and not multiply the WidthInBytes to get pixel width Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#79 edit --- api/hip/hip_memory.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index eeeacd0391..a2ae036d29 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -1134,8 +1134,8 @@ hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) { } region[2] = p->Depth; region[1] = p->Height; - region[0] = p->WidthInBytes * byteSize; - srcOrigin[0] = p->srcXInBytes/byteSize; + region[0] = p->WidthInBytes / byteSize; + srcOrigin[0] = p->srcXInBytes / byteSize; srcOrigin[1] = p->srcY; srcOrigin[2] = p->srcZ; dstPitchInbytes = p->dstArray->width * byteSize; From 06c37696fc78e47b370ab3f8f0c0207e2ea2e995 Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 22 Oct 2019 12:38:12 -0400 Subject: [PATCH 233/282] P4 to Git Change 2017443 by eshcherb@evgeny-hip on 2019/10/22 12:00:47 SWDEV-197287 - prof_protocol fix Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_prof_api.h#4 edit ... //depot/stg/opencl/drivers/opencl/api/hip/prof_protocol.h#2 delete ... //depot/stg/opencl/drivers/opencl/runtime/platform/prof_protocol.h#2 edit --- api/hip/hip_prof_api.h | 2 +- api/hip/prof_protocol.h | 90 ----------------------------------------- 2 files changed, 1 insertion(+), 91 deletions(-) delete mode 100644 api/hip/prof_protocol.h diff --git a/api/hip/hip_prof_api.h b/api/hip/hip_prof_api.h index bfea1a6859..1d405dc249 100644 --- a/api/hip/hip_prof_api.h +++ b/api/hip/hip_prof_api.h @@ -29,7 +29,7 @@ THE SOFTWARE. #if USE_PROF_API #include "hip/hcc_detail/hip_prof_str.h" -#include "prof_protocol.h" +#include "platform/prof_protocol.h" // HIP API callbacks spawner object macro #define HIP_CB_SPAWNER_OBJECT(CB_ID) \ diff --git a/api/hip/prof_protocol.h b/api/hip/prof_protocol.h deleted file mode 100644 index f058968ca3..0000000000 --- a/api/hip/prof_protocol.h +++ /dev/null @@ -1,90 +0,0 @@ -/* -Copyright (c) 2019 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#ifndef INC_EXT_PROF_PROTOCOL_H_ -#define INC_EXT_PROF_PROTOCOL_H_ - -#include - -// Traced API domains -typedef enum { - ACTIVITY_DOMAIN_HSA_API = 0, // HSA API domain - ACTIVITY_DOMAIN_HSA_OPS = 1, // HSA async activity domain - ACTIVITY_DOMAIN_HCC_OPS = 2, // HCC async activity domain - ACTIVITY_DOMAIN_HIP_API = 3, // HIP API domain - ACTIVITY_DOMAIN_EXT_API = 4, // External ID domain - ACTIVITY_DOMAIN_ROCTX = 5, // ROCTX domain - ACTIVITY_DOMAIN_NUMBER -} activity_domain_t; - -// Extension API opcodes -typedef enum { - ACTIVITY_EXT_OP_MARK = 0, - ACTIVITY_EXT_OP_EXTERN_ID = 1 -} activity_ext_op_t; - -// API calback type -typedef void (*activity_rtapi_callback_t)(uint32_t domain, uint32_t cid, const void* data, void* arg); -typedef uint32_t activity_kind_t; -typedef uint32_t activity_op_t; - -// API callback phase -typedef enum { - ACTIVITY_API_PHASE_ENTER = 0, - ACTIVITY_API_PHASE_EXIT = 1 -} activity_api_phase_t; - -// Trace record types -// Correlation id -typedef uint64_t activity_correlation_id_t; - -// Activity record type -struct activity_record_t { - uint32_t domain; // activity domain id - activity_kind_t kind; // activity kind - activity_op_t op; // activity op - activity_correlation_id_t correlation_id; // activity ID - uint64_t begin_ns; // host begin timestamp - uint64_t end_ns; // host end timestamp - union { - struct { - int device_id; // device id - uint64_t queue_id; // queue id - }; - struct { - uint32_t process_id; // device id - uint32_t thread_id; // thread id - }; - struct { - activity_correlation_id_t external_id; // external correlatino id - }; - }; - size_t bytes; // data size bytes -}; - -// Activity sync calback type -typedef void* (*activity_sync_callback_t)(uint32_t cid, activity_record_t* record, const void* data, void* arg); -// Activity async calback type -typedef void (*activity_id_callback_t)(activity_correlation_id_t id); -typedef void (*activity_async_callback_t)(uint32_t op, void* record, void* arg); - -#endif // INC_EXT_PROF_PROTOCOL_H_ From 04b0f820b0bed5f321cb0694aa45deed1353c70b Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 22 Oct 2019 17:06:07 -0400 Subject: [PATCH 234/282] P4 to Git Change 2017678 by jujiang@JJ-OCL-hip on 2019/10/22 17:01:21 SWDEV-208594 - Output mismatch with texture functions Revert CL#2014458 for now Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_texture.cpp#24 edit --- api/hip/hip_texture.cpp | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/api/hip/hip_texture.cpp b/api/hip/hip_texture.cpp index 5d4e964686..b7e6293b95 100644 --- a/api/hip/hip_texture.cpp +++ b/api/hip/hip_texture.cpp @@ -394,15 +394,21 @@ hipError_t hipGetTextureObjectTextureDesc(hipTextureDesc* pTexDesc, hipError_t ihipBindTexture(cl_mem_object_type type, size_t* offset, textureReference* tex, const void* devPtr, - const hipChannelFormatDesc& desc, size_t width, size_t height, + const hipChannelFormatDesc* desc, size_t width, size_t height, size_t pitch) { if (tex == nullptr) { return hipErrorInvalidImage; } if (hip::getCurrentContext()) { cl_image_format image_format; - getChannelOrderAndType(desc, hipReadModeElementType, + + if (nullptr == desc) { + getDrvChannelOrderAndType(tex->format, tex->numChannels, &image_format.image_channel_order, &image_format.image_channel_data_type); + } else { + getChannelOrderAndType(*desc, hipReadModeElementType, + &image_format.image_channel_order, &image_format.image_channel_data_type); + } const amd::Image::Format imageFormat(image_format); amd::Memory* memory = getMemoryObject(devPtr, *offset); amd::Image* image = new (*hip::getCurrentContext()) amd::Image(*memory->asBuffer(), @@ -425,13 +431,17 @@ hipError_t ihipBindTexture(cl_mem_object_type type, case CL_MEM_OBJECT_IMAGE1D: resDesc.resType = hipResourceTypeLinear; resDesc.res.linear.devPtr = const_cast(devPtr); - resDesc.res.linear.desc = desc; + if (nullptr != desc) { + resDesc.res.linear.desc = *desc; + } resDesc.res.linear.sizeInBytes = image->getSize(); break; case CL_MEM_OBJECT_IMAGE2D: resDesc.resType = hipResourceTypePitch2D; resDesc.res.pitch2D.devPtr = const_cast(devPtr); - resDesc.res.pitch2D.desc = desc; + if (nullptr != desc) { + resDesc.res.pitch2D.desc = *desc; + } resDesc.res.pitch2D.width = width; resDesc.res.pitch2D.height = height; resDesc.res.pitch2D.pitchInBytes = pitch; @@ -461,7 +471,7 @@ hipError_t hipBindTexture(size_t* offset, textureReference* tex, const void* dev &image_format.image_channel_order, &image_format.image_channel_data_type); const amd::Image::Format imageFormat(image_format); - HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE1D, offset, tex, devPtr, *desc, size / imageFormat.getElementSize(), 1, size)); + HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE1D, offset, tex, devPtr, desc, size / imageFormat.getElementSize(), 1, size)); } hipError_t hipBindTexture2D(size_t* offset, textureReference* tex, const void* devPtr, @@ -469,7 +479,7 @@ hipError_t hipBindTexture2D(size_t* offset, textureReference* tex, const void* d size_t pitch) { HIP_INIT_API(NONE, offset, tex, devPtr, desc, width, height, pitch); - HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE2D, offset, tex, devPtr, *desc, width, height, pitch)); + HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE2D, offset, tex, devPtr, desc, width, height, pitch)); } hipError_t hipBindTextureToArray(textureReference* tex, hipArray_const_t array, @@ -488,7 +498,7 @@ hipError_t ihipBindTextureImpl(TlsData* tls, int dim, enum hipTextureReadMode re assert(1 == dim); - HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE1D, offset, tex, devPtr, *desc, size, 1, 0)); + HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE1D, offset, tex, devPtr, desc, size, 1, 0)); } hipError_t ihipBindTextureToArrayImpl(TlsData* tls, int dim, enum hipTextureReadMode readMode, @@ -511,7 +521,7 @@ hipError_t ihipBindTextureToArrayImpl(TlsData* tls, int dim, enum hipTextureRead HIP_RETURN(hipErrorInvalidValue); } - HIP_RETURN(ihipBindTexture(clType, &offset, tex, array->data, desc, array->width, + HIP_RETURN(ihipBindTexture(clType, &offset, tex, array->data, &desc, array->width, array->height, array->depth)); } @@ -656,7 +666,7 @@ hipError_t hipTexRefSetArray(textureReference* tex, hipArray_const_t array, unsi HIP_RETURN(hipErrorInvalidImage); } - HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE2D, &offset, tex, array->data, array->desc, array->width, + HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE2D, &offset, tex, array->data, &array->desc, array->width, array->height, 0)); } @@ -693,8 +703,8 @@ hipError_t hipTexRefSetAddress(size_t* offset, textureReference* tex, hipDevicep getDrvChannelOrderAndType(tex->format, tex->numChannels, &image_format.image_channel_order, &image_format.image_channel_data_type); const amd::Image::Format imageFormat(image_format); - hipChannelFormatDesc channelDesc = hipCreateChannelDesc(); - HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE1D, offset, tex, devPtr, channelDesc, size / imageFormat.getElementSize(), 1, size)); + + HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE1D, offset, tex, devPtr, nullptr, size / imageFormat.getElementSize(), 1, size)); } hipError_t hipTexRefSetAddress2D(textureReference* tex, const HIP_ARRAY_DESCRIPTOR* desc, @@ -706,6 +716,5 @@ hipError_t hipTexRefSetAddress2D(textureReference* tex, const HIP_ARRAY_DESCRIPT } size_t offset; - hipChannelFormatDesc channelDesc = hipCreateChannelDesc(); - HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE2D, &offset, tex, devPtr, channelDesc, desc->Width, desc->Height, pitch)); + HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE2D, &offset, tex, devPtr, nullptr, desc->Width, desc->Height, pitch)); } From ff8a4e3b3ba242150c437fc8d54ba713b74a2716 Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 23 Oct 2019 11:45:28 -0400 Subject: [PATCH 235/282] P4 to Git Change 2018213 by eshcherb@evgeny-hip on 2019/10/23 11:36:28 SWDEV-197287 - exporting interception functions Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.def.in#33 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.map.in#31 edit --- api/hip/hip_hcc.def.in | 6 ++++++ api/hip/hip_hcc.map.in | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/api/hip/hip_hcc.def.in b/api/hip/hip_hcc.def.in index 4d95549d3d..abe083ac23 100644 --- a/api/hip/hip_hcc.def.in +++ b/api/hip/hip_hcc.def.in @@ -160,6 +160,12 @@ hipConfigureCall hipSetupArgument hipLaunchByPtr hipLaunchKernel +hipRegisterApiCallback +hipRemoveApiCallback +hipRegisterActivityCallback +hipRemoveActivityCallback +hipApiName +hipKernelNameRef hipCreateTextureObject hipDestroyTextureObject hipGetTextureObjectResourceDesc diff --git a/api/hip/hip_hcc.map.in b/api/hip/hip_hcc.map.in index 4a28d40b2a..1dd522e4e2 100644 --- a/api/hip/hip_hcc.map.in +++ b/api/hip/hip_hcc.map.in @@ -160,6 +160,12 @@ global: hipSetupArgument; hipLaunchByPtr; hipLaunchKernel; + hipRegisterApiCallback; + hipRemoveApiCallback; + hipRegisterActivityCallback; + hipRemoveActivityCallback; + hipApiName; + hipKernelNameRef; hipProfilerStart; hipProfilerStop; hiprtcCompileProgram; From 63a26884aa1ea38869f1aba630bef5cc24c7aa3e Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 24 Oct 2019 14:40:22 -0400 Subject: [PATCH 236/282] P4 to Git Change 2018917 by gandryey@gera-win10 on 2019/10/24 14:33:36 SWDEV-197289 - VDI tracing API integration in rocTracer - Change the names of the functions according to the new interface Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_activity.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.def.in#34 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.map.in#32 edit --- api/hip/hip_activity.cpp | 7 +++---- api/hip/hip_hcc.def.in | 6 +++--- api/hip/hip_hcc.map.in | 6 +++--- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/api/hip/hip_activity.cpp b/api/hip/hip_activity.cpp index 00a208270a..045abc7380 100644 --- a/api/hip/hip_activity.cpp +++ b/api/hip/hip_activity.cpp @@ -17,19 +17,18 @@ OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ - #include "platform/activity.hpp" -extern "C" void InitActivityCallback(void* id_callback, void* op_callback, void* arg) { +extern "C" void hipInitActivityCallback(void* id_callback, void* op_callback, void* arg) { activity_prof::CallbacksTable::init(reinterpret_cast(id_callback), reinterpret_cast(op_callback), arg); } -extern "C" bool EnableActivityCallback(unsigned op, bool enable) { +extern "C" bool hipEnableActivityCallback(unsigned op, bool enable) { return activity_prof::CallbacksTable::SetEnabled(op, enable); } -extern "C" const char* GetCmdName(unsigned op) { +extern "C" const char* hipGetCmdName(unsigned op) { return getOclCommandKindString(static_cast(op)); } diff --git a/api/hip/hip_hcc.def.in b/api/hip/hip_hcc.def.in index abe083ac23..a2eade8ae7 100644 --- a/api/hip/hip_hcc.def.in +++ b/api/hip/hip_hcc.def.in @@ -199,9 +199,9 @@ hipHccGetAccelerator hipHccGetAcceleratorView hipCreateSurfaceObject hipDestroySurfaceObject -InitActivityCallback -EnableActivityCallback -GetCmdName +hipInitActivityCallback +hipEnableActivityCallback +hipGetCmdName hiprtcAddNameExpression hiprtcCompileProgram hiprtcCreateProgram diff --git a/api/hip/hip_hcc.map.in b/api/hip/hip_hcc.map.in index 1dd522e4e2..378ec5bf80 100644 --- a/api/hip/hip_hcc.map.in +++ b/api/hip/hip_hcc.map.in @@ -212,9 +212,9 @@ global: hipDestroySurfaceObject*; hipHccModuleLaunchKernel*; hipExtModuleLaunchKernel*; - InitActivityCallback*; - EnableActivityCallback*; - GetCmdName*; + hipInitActivityCallback*; + hipEnableActivityCallback*; + hipGetCmdName*; hiprtcAddNameExpression*; hiprtcCompileProgram*; hiprtcCreateProgram*; From a0f8995e3a6d43e9218b41b201c2a6af36853ebc Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 30 Oct 2019 01:19:24 -0400 Subject: [PATCH 237/282] P4 to Git Change 2021608 by michliao@hliao-dev-00-hip.rocm-workspace on 2019/10/30 01:13:58 SWDEV-145570 - [HIP] Fix occupancy API prototype. - They need to be C API, i.e. extern "C". - Follow the current API and use `uint32_t` instead of `int`. + TODO: We need to revert them back once that APIs are changed to be compatible with CUDA. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#46 edit --- api/hip/hip_platform.cpp | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index 9773251bb0..777a710b11 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -615,21 +615,31 @@ hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, } } -hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, +extern "C" { +// FIXME: Need to replace `uint32_t` with `int` finally. +hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(uint32_t* numBlocks, hipFunction_t f, - int blockSize, + uint32_t blockSize, size_t dynamicSMemSize) { - HIP_RETURN(hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, f, blockSize, dynamicSMemSize)); + int NB; + hipError_t Ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(&NB, f, blockSize, dynamicSMemSize); + *numBlocks = NB; + HIP_RETURN(Ret); } -hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks, +// FIXME: Need to replace `uint32_t` with `int` finally. +hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(uint32_t* numBlocks, hipFunction_t f, - int blockSize, + uint32_t blockSize, size_t dynamicSMemSize, unsigned int flags) { - HIP_RETURN(hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, f, blockSize, dynamicSMemSize)); + int NB; + hipError_t Ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(&NB, f, blockSize, dynamicSMemSize); + *numBlocks = NB; + HIP_RETURN(Ret); +} } From e833ad571f2590b52ee73926dd0768f69e5a8b2e Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 30 Oct 2019 13:37:03 -0400 Subject: [PATCH 238/282] P4 to Git Change 2021977 by kjayapra@1_HIPWS_LNX1_PAL on 2019/10/30 13:27:19 SWDEV-209747 - Improve HIP Error codes and debug messages. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_context.cpp#24 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device.cpp#25 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device_runtime.cpp#20 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_event.cpp#18 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#80 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#45 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_peer.cpp#8 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#47 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_profile.cpp#4 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_surface.cpp#6 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_texture.cpp#25 edit --- api/hip/hip_context.cpp | 14 +++++++------- api/hip/hip_device.cpp | 4 ++-- api/hip/hip_device_runtime.cpp | 10 +++++----- api/hip/hip_event.cpp | 2 +- api/hip/hip_memory.cpp | 20 ++++++++++---------- api/hip/hip_module.cpp | 23 ++++++++++++----------- api/hip/hip_peer.cpp | 6 +++--- api/hip/hip_platform.cpp | 14 +++++++------- api/hip/hip_profile.cpp | 4 ++-- api/hip/hip_surface.cpp | 2 +- api/hip/hip_texture.cpp | 20 ++++++++++---------- 11 files changed, 60 insertions(+), 59 deletions(-) diff --git a/api/hip/hip_context.cpp b/api/hip/hip_context.cpp index ed741c4883..6ff36dc987 100644 --- a/api/hip/hip_context.cpp +++ b/api/hip/hip_context.cpp @@ -268,7 +268,7 @@ hipError_t hipCtxGetDevice(hipDevice_t* device) { HIP_RETURN(hipErrorInvalidValue); } - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipErrorInvalidContext); } hipError_t hipCtxGetApiVersion(hipCtx_t ctx, int* apiVersion) { @@ -276,7 +276,7 @@ hipError_t hipCtxGetApiVersion(hipCtx_t ctx, int* apiVersion) { assert(0 && "Unimplemented"); - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipErrorNotSupported); } hipError_t hipCtxGetCacheConfig(hipFuncCache_t* cacheConfig) { @@ -284,7 +284,7 @@ hipError_t hipCtxGetCacheConfig(hipFuncCache_t* cacheConfig) { assert(0 && "Unimplemented"); - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipErrorNotSupported); } hipError_t hipCtxSetCacheConfig(hipFuncCache_t cacheConfig) { @@ -292,7 +292,7 @@ hipError_t hipCtxSetCacheConfig(hipFuncCache_t cacheConfig) { assert(0 && "Unimplemented"); - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipErrorNotSupported); } hipError_t hipCtxSetSharedMemConfig(hipSharedMemConfig config) { @@ -300,7 +300,7 @@ hipError_t hipCtxSetSharedMemConfig(hipSharedMemConfig config) { assert(0 && "Unimplemented"); - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipErrorNotSupported); } hipError_t hipCtxSynchronize(void) { @@ -308,7 +308,7 @@ hipError_t hipCtxSynchronize(void) { assert(0 && "Unimplemented"); - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipErrorNotSupported); } hipError_t hipCtxGetFlags(unsigned int* flags) { @@ -316,7 +316,7 @@ hipError_t hipCtxGetFlags(unsigned int* flags) { assert(0 && "Unimplemented"); - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipErrorNotSupported); } hipError_t hipDevicePrimaryCtxGetState(hipDevice_t dev, unsigned int* flags, int* active) { diff --git a/api/hip/hip_device.cpp b/api/hip/hip_device.cpp index 39c02aa0c7..16d14b2b12 100644 --- a/api/hip/hip_device.cpp +++ b/api/hip/hip_device.cpp @@ -222,7 +222,7 @@ hipError_t hipHccGetAccelerator(int deviceId, hc::accelerator* acc) { assert(0 && "Unimplemented"); - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipErrorNotSupported); } hipError_t hipHccGetAcceleratorView(hipStream_t stream, hc::accelerator_view** av) { @@ -230,5 +230,5 @@ hipError_t hipHccGetAcceleratorView(hipStream_t stream, hc::accelerator_view** a assert(0 && "Unimplemented"); - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipErrorNotSupported); } diff --git a/api/hip/hip_device_runtime.cpp b/api/hip/hip_device_runtime.cpp index b16be0980d..636ec612c1 100644 --- a/api/hip/hip_device_runtime.cpp +++ b/api/hip/hip_device_runtime.cpp @@ -405,7 +405,7 @@ hipError_t hipDeviceSetCacheConfig ( hipFuncCache_t cacheConfig ) { } hipError_t hipDeviceSetLimit ( hipLimit_t limit, size_t value ) { - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipErrorNotSupported); } hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config ) { @@ -461,7 +461,7 @@ hipError_t hipGetDeviceCount ( int* count ) { } hipError_t hipGetDeviceFlags ( unsigned int* flags ) { - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipErrorNotSupported); } hipError_t hipIpcGetEventHandle ( hipIpcEventHandle_t* handle, hipEvent_t event ) { @@ -469,7 +469,7 @@ hipError_t hipIpcGetEventHandle ( hipIpcEventHandle_t* handle, hipEvent_t event assert(0 && "Unimplemented"); - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipErrorNotSupported); } hipError_t hipIpcOpenEventHandle ( hipEvent_t* event, hipIpcEventHandle_t handle ) { @@ -477,7 +477,7 @@ hipError_t hipIpcOpenEventHandle ( hipEvent_t* event, hipIpcEventHandle_t handle assert(0 && "Unimplemented"); - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipErrorNotSupported); } hipError_t hipSetDevice ( int device ) { @@ -512,7 +512,7 @@ hipError_t hipSetValidDevices ( int* device_arr, int len ) { assert(0 && "Unimplemented"); - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipErrorNotSupported); } hipError_t hipExtGetLinkTypeAndHopCount(int device1, int device2, uint32_t* linktype, uint32_t* hopcount) { diff --git a/api/hip/hip_event.cpp b/api/hip/hip_event.cpp index be2c698f46..f283aa6c5d 100644 --- a/api/hip/hip_event.cpp +++ b/api/hip/hip_event.cpp @@ -110,7 +110,7 @@ hipError_t Event::streamWait(amd::HostQueue* hostQueue, uint flags) { } if (!event_->notifyCmdQueue()) { - return hipErrorUnknown; + return hipErrorLaunchOutOfResources; } amd::Command::EventWaitList eventWaitList; eventWaitList.push_back(event_); diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index a2ae036d29..6e07ddd30b 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -589,12 +589,12 @@ hipError_t hipMemcpyToSymbol(const void* symbolName, const void* src, size_t cou /* Get address and size for the global symbol */ if (!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), &device_ptr, &sym_size)) { - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipErrorInvalidSymbol); } /* Size Check to make sure offset is correct */ if ((offset + count) != sym_size) { - return HIP_RETURN(hipErrorUnknown); + return HIP_RETURN(hipErrorInvalidDevicePointer); } device_ptr = reinterpret_cast
(device_ptr) + offset; @@ -613,12 +613,12 @@ hipError_t hipMemcpyFromSymbol(void* dst, const void* symbolName, size_t count, /* Get address and size for the global symbol */ if (!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), &device_ptr, &sym_size)) { - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipErrorInvalidSymbol); } /* Size Check to make sure offset is correct */ if ((offset + count) != sym_size) { - return HIP_RETURN(hipErrorUnknown); + return HIP_RETURN(hipErrorInvalidDevicePointer); } device_ptr = reinterpret_cast
(device_ptr) + offset; @@ -637,12 +637,12 @@ hipError_t hipMemcpyToSymbolAsync(const void* symbolName, const void* src, size_ /* Get address and size for the global symbol */ if (!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), &device_ptr, &sym_size)) { - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipErrorInvalidSymbol); } /* Size Check to make sure offset is correct */ if ((offset + count) != sym_size) { - return HIP_RETURN(hipErrorUnknown); + return HIP_RETURN(hipErrorInvalidDevicePointer); } device_ptr = reinterpret_cast
(device_ptr) + offset; @@ -661,12 +661,12 @@ hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolName, size_t co /* Get address and size for the global symbol */ if (!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), &device_ptr, &sym_size)) { - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipErrorInvalidSymbol); } /* Size Check to make sure offset is correct */ if ((offset + count) != sym_size) { - return HIP_RETURN(hipErrorUnknown); + return HIP_RETURN(hipErrorInvalidDevicePointer); } device_ptr = reinterpret_cast
(device_ptr) + offset; @@ -856,7 +856,7 @@ hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, con HIP_INIT_API(hipMemcpy2DToArray, dst, wOffset, hOffset, src, spitch, width, height, kind); if (dst->data == nullptr) { - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipErrorInvalidValue); } hip::syncStreams(); @@ -883,7 +883,7 @@ hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, con } if ((wOffset + width > (dpitch)) || width > spitch) { - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipErrorInvalidDevicePointer); } // Create buffer rectangle info structure diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index 20a5fbdef1..77a2359614 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -89,13 +89,13 @@ hipError_t hipModuleUnload(hipModule_t hmod) HIP_INIT_API(hipModuleUnload, hmod); if (hmod == nullptr) { - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipErrorInvalidValue); } amd::Program* program = as_amd(reinterpret_cast(hmod)); if(!ihipModuleUnregisterGlobal(hmod)) { - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipErrorInvalidSymbol); } program->release(); @@ -183,21 +183,21 @@ hipError_t ihipModuleLoadData(hipModule_t *module, const void *image) program->setVarInfoCallBack(&getSvarInfo); if (CL_SUCCESS != program->addDeviceProgram(*hip::getCurrentContext()->devices()[0], image, ElfSize(image))) { - return hipErrorUnknown; + return hipErrorInvalidKernelFile; } - *module = reinterpret_cast(as_cl(program)); + *module = reinterpret_cast(as_cl(program)); if (!ihipModuleRegisterGlobal(program, module)) { - return hipErrorUnknown; + return hipErrorSharedObjectSymbolNotFound; } if (!ihipModuleRegisterUndefined(program, module)) { - return hipErrorUnknown; + return hipErrorSharedObjectSymbolNotFound; } if(CL_SUCCESS != program->build(hip::getCurrentContext()->devices(), nullptr, nullptr, nullptr)) { - return hipErrorUnknown; + return hipErrorSharedObjectInitFailed; } return hipSuccess; @@ -232,7 +232,7 @@ hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t h /* Get address and size for the global symbol */ if (!PlatformState::instance().getGlobalVar(name, ihipGetDevice(), dptr, bytes)) { - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipErrorNotFound); } HIP_RETURN(hipSuccess); @@ -243,7 +243,7 @@ hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func) HIP_INIT_API(hipFuncGetAttributes, attr, func); if (!PlatformState::instance().getFuncAttr(func, attr)) { - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipErrorInvalidDeviceFunction); } HIP_RETURN(hipSuccess); @@ -425,7 +425,7 @@ hipError_t hipLaunchCooperativeKernel(const void* f, int deviceId = ihipGetDevice(); hipFunction_t func = PlatformState::instance().getFunc(f, deviceId); if (func == nullptr) { - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipErrorInvalidDeviceFunction); } HIP_RETURN(ihipModuleLaunchKernel(func, gridDim.x * blockDim.x, gridDim.y * blockDim.y, gridDim.z * blockDim.z, @@ -469,6 +469,7 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL } } if (func == nullptr) { + result = hipErrorInvalidDeviceFunction; HIP_RETURN(result); } @@ -515,7 +516,7 @@ hipError_t hipModuleGetTexRef(textureReference** texRef, hipModule_t hmod, const /* Get address and size for the global symbol */ if (!PlatformState::instance().getTexRef(name, texRef)) { - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipErrorNotFound); } HIP_RETURN(hipSuccess); diff --git a/api/hip/hip_peer.cpp b/api/hip/hip_peer.cpp index 95cf206e9c..65d92de363 100644 --- a/api/hip/hip_peer.cpp +++ b/api/hip/hip_peer.cpp @@ -29,7 +29,7 @@ hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, hipCtx_t thisCtx, hipCtx_t assert(0 && "Unimplemented"); - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipErrorNotSupported); } hipError_t hipMemcpyPeer(void* dst, hipCtx_t dstCtx, const void* src, hipCtx_t srcCtx, @@ -38,7 +38,7 @@ hipError_t hipMemcpyPeer(void* dst, hipCtx_t dstCtx, const void* src, hipCtx_t s assert(0 && "Unimplemented"); - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipErrorNotSupported); } hipError_t hipMemcpyPeerAsync(void* dst, hipCtx_t dstDevice, const void* src, hipCtx_t srcDevice, @@ -47,7 +47,7 @@ hipError_t hipMemcpyPeerAsync(void* dst, hipCtx_t dstDevice, const void* src, hi assert(0 && "Unimplemented"); - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipErrorNotSupported); } hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, int deviceId, int peerDeviceId) { diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index 777a710b11..bd6cd55317 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -480,7 +480,7 @@ extern "C" hipError_t hipLaunchByPtr(const void *hostFunction) int deviceId = ihipGetDevice(); hipFunction_t func = PlatformState::instance().getFunc(hostFunction, deviceId); if (func == nullptr) { - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipErrorInvalidDeviceFunction); } ihipExec_t exec; @@ -512,7 +512,7 @@ extern "C" hipError_t hipLaunchKernel(const void *hostFunction, int deviceId = ihipGetDevice(); hipFunction_t func = PlatformState::instance().getFunc(hostFunction, deviceId); if (func == nullptr) { - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipErrorInvalidDeviceFunction); } HIP_RETURN(hipModuleLaunchKernel(func, gridDim.x, gridDim.y, gridDim.z, @@ -524,7 +524,7 @@ extern "C" hipError_t hipLaunchKernel(const void *hostFunction, hipError_t hipGetSymbolAddress(void** devPtr, const void* symbolName) { size_t size = 0; if(!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), devPtr, &size)) { - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipErrorInvalidSymbol); } HIP_RETURN(hipSuccess); } @@ -532,7 +532,7 @@ hipError_t hipGetSymbolAddress(void** devPtr, const void* symbolName) { hipError_t hipGetSymbolSize(size_t* sizePtr, const void* symbolName) { hipDeviceptr_t devPtr = nullptr; if (!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), &devPtr, sizePtr)) { - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipErrorInvalidSymbol); } HIP_RETURN(hipSuccess); } @@ -549,11 +549,11 @@ hipError_t ihipCreateGlobalVarObj(const char* name, hipModule_t hmod, amd::Memor dev_program = program->getDeviceProgram(*hip::getCurrentContext()->devices()[0]); if (dev_program == nullptr) { - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipErrorInvalidDeviceFunction); } /* Find the global Symbols */ if(!dev_program->createGlobalVarObj(amd_mem_obj, dptr, bytes, name)) { - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipErrorInvalidSymbol); } HIP_RETURN(hipSuccess); @@ -571,7 +571,7 @@ hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, int deviceId = ihipGetDevice(); hipFunction_t func = PlatformState::instance().getFunc(f, deviceId); if (func == nullptr) { - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipErrorInvalidDeviceFunction); } hip::Function* function = hip::Function::asFunction(func); diff --git a/api/hip/hip_profile.cpp b/api/hip/hip_profile.cpp index b8c8f28e94..7ff93445c4 100644 --- a/api/hip/hip_profile.cpp +++ b/api/hip/hip_profile.cpp @@ -29,7 +29,7 @@ hipError_t hipProfilerStart() { assert(0 && "Unimplemented"); - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipErrorNotSupported); } @@ -38,5 +38,5 @@ hipError_t hipProfilerStop() { assert(0 && "Unimplemented"); - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipErrorNotSupported); } \ No newline at end of file diff --git a/api/hip/hip_surface.cpp b/api/hip/hip_surface.cpp index 42741add7a..2a7b58d191 100644 --- a/api/hip/hip_surface.cpp +++ b/api/hip/hip_surface.cpp @@ -92,5 +92,5 @@ hipError_t hipDestroySurfaceObject(hipSurfaceObject_t surfaceObject) { HIP_RETURN(hipFree(reinterpret_cast(surfaceObject))); } - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipErrorInvalidValue); } diff --git a/api/hip/hip_texture.cpp b/api/hip/hip_texture.cpp index b7e6293b95..2634d799c1 100644 --- a/api/hip/hip_texture.cpp +++ b/api/hip/hip_texture.cpp @@ -334,7 +334,7 @@ hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, const hipResou if (!image->create()) { delete image; - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipErrorMemoryAllocation); } amd::Sampler* sampler = fillSamplerDescriptor(pTexDesc->addressMode[0], pTexDesc->filterMode, pTexDesc->normalizedCoords); @@ -371,7 +371,7 @@ hipError_t hipGetTextureObjectResourceDesc(hipResourceDesc* pResDesc, memcpy(pResDesc, &(texture->resDesc), sizeof(hipResourceDesc)); } - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipErrorInvalidValue); } hipError_t hipGetTextureObjectResourceViewDesc(hipResourceViewDesc* pResViewDesc, @@ -380,7 +380,7 @@ hipError_t hipGetTextureObjectResourceViewDesc(hipResourceViewDesc* pResViewDesc assert(0 && "Unimplemented"); - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipErrorNotSupported); } hipError_t hipGetTextureObjectTextureDesc(hipTextureDesc* pTexDesc, @@ -389,7 +389,7 @@ hipError_t hipGetTextureObjectTextureDesc(hipTextureDesc* pTexDesc, assert(0 && "Unimplemented"); - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipErrorNotSupported); } hipError_t ihipBindTexture(cl_mem_object_type type, @@ -416,7 +416,7 @@ hipError_t ihipBindTexture(cl_mem_object_type type, if (!image->create()) { delete image; - return hipErrorUnknown; + return hipErrorMemoryAllocation; } *offset = 0; @@ -456,7 +456,7 @@ hipError_t ihipBindTexture(cl_mem_object_type type, return hipSuccess; } - return hipErrorUnknown; + return hipErrorInvalidValue; } hipError_t hipBindTexture(size_t* offset, textureReference* tex, const void* devPtr, @@ -488,7 +488,7 @@ hipError_t hipBindTextureToArray(textureReference* tex, hipArray_const_t array, assert(0 && "Unimplemented"); - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipErrorNotSupported); } hipError_t ihipBindTextureImpl(TlsData* tls, int dim, enum hipTextureReadMode readMode, size_t* offset, @@ -532,7 +532,7 @@ hipError_t hipBindTextureToMipmappedArray(textureReference* tex, assert(0 && "Unimplemented"); - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipErrorNotSupported); } hipError_t hipUnbindTexture(const textureReference* tex) { @@ -570,7 +570,7 @@ hipError_t hipGetTextureReference(const textureReference** tex, const void* symb assert(0 && "Unimplemented"); - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipErrorNotSupported); } hipError_t hipTexRefSetFormat(textureReference* tex, hipArray_Format fmt, int NumPackedComponents) { @@ -649,7 +649,7 @@ hipError_t hipTexRefGetArray(hipArray_t* array, textureReference tex) { } if (texture->resDesc.res.array.array == nullptr) { - HIP_RETURN(hipErrorUnknown); + HIP_RETURN(hipErrorInvalidValue); } **array = *(texture->resDesc.res.array.array); From 819b29f92dad67ef256a39bac2eea2d685b48120 Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 30 Oct 2019 18:11:27 -0400 Subject: [PATCH 239/282] P4 to Git Change 2022168 by jujiang@JJ-OCL-hip on 2019/10/30 18:06:41 SWDEV-208594 - [HIP][Windows] Output mismatch with texture ref test functions with float4 type -Fix TexRef1D float/float4, and TexRef2D tests. http://ocltc.amd.com/reviews/r/18203/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_texture.cpp#26 edit --- api/hip/hip_texture.cpp | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/api/hip/hip_texture.cpp b/api/hip/hip_texture.cpp index 2634d799c1..23c1566c09 100644 --- a/api/hip/hip_texture.cpp +++ b/api/hip/hip_texture.cpp @@ -402,13 +402,8 @@ hipError_t ihipBindTexture(cl_mem_object_type type, if (hip::getCurrentContext()) { cl_image_format image_format; - if (nullptr == desc) { - getDrvChannelOrderAndType(tex->format, tex->numChannels, - &image_format.image_channel_order, &image_format.image_channel_data_type); - } else { - getChannelOrderAndType(*desc, hipReadModeElementType, - &image_format.image_channel_order, &image_format.image_channel_data_type); - } + getChannelOrderAndType(*desc, hipReadModeElementType, + &image_format.image_channel_order, &image_format.image_channel_data_type); const amd::Image::Format imageFormat(image_format); amd::Memory* memory = getMemoryObject(devPtr, *offset); amd::Image* image = new (*hip::getCurrentContext()) amd::Image(*memory->asBuffer(), @@ -431,17 +426,13 @@ hipError_t ihipBindTexture(cl_mem_object_type type, case CL_MEM_OBJECT_IMAGE1D: resDesc.resType = hipResourceTypeLinear; resDesc.res.linear.devPtr = const_cast(devPtr); - if (nullptr != desc) { - resDesc.res.linear.desc = *desc; - } + resDesc.res.linear.desc = *desc; resDesc.res.linear.sizeInBytes = image->getSize(); break; case CL_MEM_OBJECT_IMAGE2D: resDesc.resType = hipResourceTypePitch2D; resDesc.res.pitch2D.devPtr = const_cast(devPtr); - if (nullptr != desc) { - resDesc.res.pitch2D.desc = *desc; - } + resDesc.res.pitch2D.desc = *desc; resDesc.res.pitch2D.width = width; resDesc.res.pitch2D.height = height; resDesc.res.pitch2D.pitchInBytes = pitch; @@ -704,7 +695,7 @@ hipError_t hipTexRefSetAddress(size_t* offset, textureReference* tex, hipDevicep &image_format.image_channel_order, &image_format.image_channel_data_type); const amd::Image::Format imageFormat(image_format); - HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE1D, offset, tex, devPtr, nullptr, size / imageFormat.getElementSize(), 1, size)); + HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE1D, offset, tex, devPtr, &tex->channelDesc, size / imageFormat.getElementSize(), 1, size)); } hipError_t hipTexRefSetAddress2D(textureReference* tex, const HIP_ARRAY_DESCRIPTOR* desc, @@ -716,5 +707,5 @@ hipError_t hipTexRefSetAddress2D(textureReference* tex, const HIP_ARRAY_DESCRIPT } size_t offset; - HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE2D, &offset, tex, devPtr, nullptr, desc->Width, desc->Height, pitch)); + HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE2D, &offset, tex, devPtr, &tex->channelDesc, desc->Width, desc->Height, pitch)); } From 33038437b3eda6998d04ac7a4cf29ead945e3ac8 Mon Sep 17 00:00:00 2001 From: foreman Date: Mon, 4 Nov 2019 10:13:20 -0500 Subject: [PATCH 240/282] P4 to Git Change 2024251 by kjayapra@0_HIPWS_LNX1_ROCM on 2019/11/04 10:07:42 SWDEV-206759 - Adding support for duplicate global vars Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#45 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#81 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#46 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#48 edit ... //depot/stg/opencl/drivers/opencl/make/hip.git/tests/build/Makefile.hip_tests#30 edit --- api/hip/hip_internal.hpp | 11 +++-- api/hip/hip_memory.cpp | 16 +++---- api/hip/hip_module.cpp | 16 ++++++- api/hip/hip_platform.cpp | 96 ++++++++++++++++++++++++++++------------ 4 files changed, 96 insertions(+), 43 deletions(-) diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index ba4a300739..1ce259f708 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -155,7 +155,7 @@ public: }; private: std::unordered_map functions_; - std::unordered_map vars_; + std::unordered_multimap vars_; static PlatformState* platform_; @@ -168,16 +168,19 @@ public: std::vector< std::pair >* unregisterVar(hipModule_t hmod); + + PlatformState::DeviceVar* findVar(std::string hostVar, int deviceId, hipModule_t hmod); void registerVar(const void* hostvar, const DeviceVar& var); void registerFunction(const void* hostFunction, const DeviceFunction& func); hipFunction_t getFunc(const void* hostFunction, int deviceId); bool getFuncAttr(const void* hostFunction, hipFuncAttributes* func_attr); - bool getGlobalVar(const void* hostVar, int deviceId, hipDeviceptr_t* dev_ptr, - size_t* size_ptr); + bool getGlobalVar(const void* hostVar, int deviceId, hipModule_t hmod, + hipDeviceptr_t* dev_ptr, size_t* size_ptr); bool getTexRef(const char* hostVar, textureReference** texRef); - bool getShadowVarInfo(std::string var_name, void** var_addr, size_t* var_size); + bool getShadowVarInfo(std::string var_name, hipModule_t hmod, + void** var_addr, size_t* var_size); void setupArgument(const void *arg, size_t size, size_t offset); void configureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem, hipStream_t stream); diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 6e07ddd30b..8d35a5aef7 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -587,8 +587,8 @@ hipError_t hipMemcpyToSymbol(const void* symbolName, const void* src, size_t cou hipDeviceptr_t device_ptr = nullptr; /* Get address and size for the global symbol */ - if (!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), &device_ptr, - &sym_size)) { + if (!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), nullptr, + &device_ptr, &sym_size)) { HIP_RETURN(hipErrorInvalidSymbol); } @@ -611,8 +611,8 @@ hipError_t hipMemcpyFromSymbol(void* dst, const void* symbolName, size_t count, hipDeviceptr_t device_ptr = nullptr; /* Get address and size for the global symbol */ - if (!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), &device_ptr, - &sym_size)) { + if (!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), nullptr, + &device_ptr, &sym_size)) { HIP_RETURN(hipErrorInvalidSymbol); } @@ -635,8 +635,8 @@ hipError_t hipMemcpyToSymbolAsync(const void* symbolName, const void* src, size_ hipDeviceptr_t device_ptr = nullptr; /* Get address and size for the global symbol */ - if (!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), &device_ptr, - &sym_size)) { + if (!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), nullptr, + &device_ptr, &sym_size)) { HIP_RETURN(hipErrorInvalidSymbol); } @@ -659,8 +659,8 @@ hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolName, size_t co hipDeviceptr_t device_ptr = nullptr; /* Get address and size for the global symbol */ - if (!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), &device_ptr, - &sym_size)) { + if (!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), nullptr, + &device_ptr, &sym_size)) { HIP_RETURN(hipErrorInvalidSymbol); } diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index 77a2359614..280e804570 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -183,13 +183,25 @@ hipError_t ihipModuleLoadData(hipModule_t *module, const void *image) program->setVarInfoCallBack(&getSvarInfo); if (CL_SUCCESS != program->addDeviceProgram(*hip::getCurrentContext()->devices()[0], image, ElfSize(image))) { +>>>> ORIGINAL //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#44 + return hipErrorUnknown; +==== THEIRS //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#45 return hipErrorInvalidKernelFile; +==== YOURS //0_HIPWS_LNX1_ROCM/main/drivers/opencl/api/hip/hip_module.cpp + return hipErrorUnknown; +<<<< } *module = reinterpret_cast(as_cl(program)); if (!ihipModuleRegisterGlobal(program, module)) { +>>>> ORIGINAL //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#44 + return hipErrorUnknown; +==== THEIRS //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#45 return hipErrorSharedObjectSymbolNotFound; +==== YOURS //0_HIPWS_LNX1_ROCM/main/drivers/opencl/api/hip/hip_module.cpp + return hipErrorUnknown; +<<<< } if (!ihipModuleRegisterUndefined(program, module)) { @@ -230,8 +242,8 @@ hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t h HIP_INIT_API(hipModuleGetGlobal, dptr, bytes, hmod, name); /* Get address and size for the global symbol */ - if (!PlatformState::instance().getGlobalVar(name, ihipGetDevice(), dptr, - bytes)) { + if (!PlatformState::instance().getGlobalVar(name, ihipGetDevice(), hmod, + dptr, bytes)) { HIP_RETURN(hipErrorNotFound); } diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index bd6cd55317..0902ce1a08 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -186,6 +186,42 @@ std::vector< std::pair >* PlatformState::unregisterVar(hipMod return rmodules; } +PlatformState::DeviceVar* PlatformState::findVar(std::string hostVar, int deviceId, hipModule_t hmod) { + DeviceVar* dvar = nullptr; + + if (hmod != nullptr) { + // If module is provided, then get the var only from that module + auto var_range = vars_.equal_range(hostVar); + for (auto it = var_range.first; it != var_range.second; ++it) { + if ((*it->second.modules)[deviceId].first == hmod) { + dvar = &(it->second); + break; + } + } + } else { + // If var count is < 2, return the var + if (vars_.count(hostVar) < 2) { + auto it = vars_.find(hostVar); + dvar = ((it == vars_.end()) ? nullptr : &(it->second)); + } else { + // If var count is > 2, return the original var, + // if original var count != 1, return vars_.end()/Invalid + size_t orig_global_count = 0; + auto var_range = vars_.equal_range(hostVar); + for (auto it = var_range.first; it != var_range.second; ++it) { + // when dyn_undef is set, it is a shadow var + if (it->second.dyn_undef == false) { + ++orig_global_count; + dvar = &(it->second); + } + } + dvar = ((orig_global_count == 1) ? dvar : nullptr); + } + } + + return dvar; +} + void PlatformState::registerVar(const void* hostvar, const DeviceVar& rvar) { amd::ScopedLock lock(lock_); @@ -216,12 +252,12 @@ bool ihipGetFuncAttributes(const char* func_name, amd::Program* program, hipFunc return true; } -bool PlatformState::getShadowVarInfo(std::string var_name, void** var_addr, size_t* var_size) { - const auto it = vars_.find(var_name); - if (it != vars_.cend()) { - DeviceVar& dvar = it->second; - *var_addr = dvar.shadowVptr; - *var_size = dvar.size; +bool PlatformState::getShadowVarInfo(std::string var_name, hipModule_t hmod, + void** var_addr, size_t* var_size) { + DeviceVar* dvar = findVar(var_name, ihipGetDevice(), hmod); + if (dvar != nullptr) { + *var_addr = dvar->shadowVptr; + *var_size = dvar->size; return true; } else { return false; @@ -230,7 +266,8 @@ bool PlatformState::getShadowVarInfo(std::string var_name, void** var_addr, size bool CL_CALLBACK getSvarInfo(cl_program program, std::string var_name, void** var_addr, size_t* var_size) { - return PlatformState::instance().getShadowVarInfo(var_name, var_addr, var_size); + return PlatformState::instance().getShadowVarInfo(var_name, reinterpret_cast(program), + var_addr, var_size); } hipFunction_t PlatformState::getFunc(const void* hostFunction, int deviceId) { @@ -294,52 +331,50 @@ bool PlatformState::getFuncAttr(const void* hostFunction, bool PlatformState::getTexRef(const char* hostVar, textureReference** texRef) { amd::ScopedLock lock(lock_); - const auto it = vars_.find(std::string(reinterpret_cast(hostVar))); - if (it == vars_.cend()) { + DeviceVar* dvar = findVar(std::string(hostVar), ihipGetDevice(), nullptr); + if (dvar == nullptr) { return false; } - DeviceVar& dvar = it->second; - if (!dvar.dyn_undef) { + if (!dvar->dyn_undef) { return false; } - *texRef = reinterpret_cast(dvar.shadowVptr); + *texRef = reinterpret_cast(dvar->shadowVptr); return true; } -bool PlatformState::getGlobalVar(const void* hostVar, int deviceId, +bool PlatformState::getGlobalVar(const void* hostVar, int deviceId, hipModule_t hmod, hipDeviceptr_t* dev_ptr, size_t* size_ptr) { amd::ScopedLock lock(lock_); - const auto it = vars_.find(std::string(reinterpret_cast(hostVar))); - if (it != vars_.cend()) { - DeviceVar& dvar = it->second; - if (dvar.rvars[deviceId].getdeviceptr() == nullptr) { + DeviceVar* dvar = findVar(std::string(reinterpret_cast(hostVar)), deviceId, hmod); + if (dvar != nullptr) { + if (dvar->rvars[deviceId].getdeviceptr() == nullptr) { size_t sym_size = 0; hipDeviceptr_t device_ptr = nullptr; amd::Memory* amd_mem_obj = nullptr; - if (!(*dvar.modules)[deviceId].second) { - amd::Program* program = as_amd(reinterpret_cast((*dvar.modules)[deviceId].first)); + if (!(*dvar->modules)[deviceId].second) { + amd::Program* program = as_amd(reinterpret_cast((*dvar->modules)[deviceId].first)); program->setVarInfoCallBack(&getSvarInfo); if (CL_SUCCESS != program->build(g_devices[deviceId]->devices(), nullptr, nullptr, nullptr)) { return false; } - (*dvar.modules)[deviceId].second = true; + (*dvar->modules)[deviceId].second = true; } - if((hipSuccess == ihipCreateGlobalVarObj(dvar.hostVar.c_str(), (*dvar.modules)[deviceId].first, + if((hipSuccess == ihipCreateGlobalVarObj(dvar->hostVar.c_str(), (*dvar->modules)[deviceId].first, &amd_mem_obj, &device_ptr, &sym_size)) && (device_ptr != nullptr)) { - dvar.rvars[deviceId].size_ = sym_size; - dvar.rvars[deviceId].devicePtr_ = device_ptr; - dvar.rvars[deviceId].amd_mem_obj_ = amd_mem_obj; + dvar->rvars[deviceId].size_ = sym_size; + dvar->rvars[deviceId].devicePtr_ = device_ptr; + dvar->rvars[deviceId].amd_mem_obj_ = amd_mem_obj; amd::MemObjMap::AddMemObj(device_ptr, amd_mem_obj); } else { LogError("[HIP] __hipRegisterVar cannot find kernel for device \n"); } } - *size_ptr = dvar.rvars[deviceId].getvarsize(); - *dev_ptr = dvar.rvars[deviceId].getdeviceptr(); + *size_ptr = dvar->rvars[deviceId].getvarsize(); + *dev_ptr = dvar->rvars[deviceId].getdeviceptr(); return true; } else { return false; @@ -523,7 +558,8 @@ extern "C" hipError_t hipLaunchKernel(const void *hostFunction, hipError_t hipGetSymbolAddress(void** devPtr, const void* symbolName) { size_t size = 0; - if(!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), devPtr, &size)) { + if(!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), nullptr, + devPtr, &size)) { HIP_RETURN(hipErrorInvalidSymbol); } HIP_RETURN(hipSuccess); @@ -531,13 +567,15 @@ hipError_t hipGetSymbolAddress(void** devPtr, const void* symbolName) { hipError_t hipGetSymbolSize(size_t* sizePtr, const void* symbolName) { hipDeviceptr_t devPtr = nullptr; - if (!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), &devPtr, sizePtr)) { + if (!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), nullptr, + &devPtr, sizePtr)) { HIP_RETURN(hipErrorInvalidSymbol); } HIP_RETURN(hipSuccess); } -hipError_t ihipCreateGlobalVarObj(const char* name, hipModule_t hmod, amd::Memory** amd_mem_obj, hipDeviceptr_t* dptr, size_t* bytes) +hipError_t ihipCreateGlobalVarObj(const char* name, hipModule_t hmod, amd::Memory** amd_mem_obj, + hipDeviceptr_t* dptr, size_t* bytes) { HIP_INIT(); From 60e81906f59536e807eb52891ecc40ddb56f5935 Mon Sep 17 00:00:00 2001 From: foreman Date: Mon, 4 Nov 2019 11:53:27 -0500 Subject: [PATCH 241/282] P4 to Git Change 2024314 by kjayapra@0_HIPWS_LNX1_ROCM on 2019/11/04 11:44:45 SWDEV-206759 - Fixing Compilation error. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#47 edit --- api/hip/hip_module.cpp | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index 280e804570..5fa4c1f457 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -183,25 +183,13 @@ hipError_t ihipModuleLoadData(hipModule_t *module, const void *image) program->setVarInfoCallBack(&getSvarInfo); if (CL_SUCCESS != program->addDeviceProgram(*hip::getCurrentContext()->devices()[0], image, ElfSize(image))) { ->>>> ORIGINAL //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#44 - return hipErrorUnknown; -==== THEIRS //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#45 return hipErrorInvalidKernelFile; -==== YOURS //0_HIPWS_LNX1_ROCM/main/drivers/opencl/api/hip/hip_module.cpp - return hipErrorUnknown; -<<<< } *module = reinterpret_cast(as_cl(program)); if (!ihipModuleRegisterGlobal(program, module)) { ->>>> ORIGINAL //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#44 - return hipErrorUnknown; -==== THEIRS //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#45 return hipErrorSharedObjectSymbolNotFound; -==== YOURS //0_HIPWS_LNX1_ROCM/main/drivers/opencl/api/hip/hip_module.cpp - return hipErrorUnknown; -<<<< } if (!ihipModuleRegisterUndefined(program, module)) { From 007687bf53a746d6b6eee9c8e166821965a26779 Mon Sep 17 00:00:00 2001 From: foreman Date: Mon, 4 Nov 2019 14:44:59 -0500 Subject: [PATCH 242/282] P4 to Git Change 2024454 by axie@axie-hip-rocm on 2019/11/04 14:38:31 SWDEV-198863 - Options for hip-clang-vdi path to provide the chicken bits, or functional equivalents to HCC_DB (phase 1) 1. The log macros is turned off for release build. So log functions has zero impact to release build. 2. The log macros have level, mask, condition control. So we can have more control to avoid log flooding. I also adjusted some existing log to use new log functions. 1. To excercise and test the new log functions. 2. To improve performance slightly. 3. The change is mainly for HIP-ROCM, we can move more in next phases for PAL or ORCA. 4. I make these log feature unavailable for release build. We can revert to old log functions for release build in a case by case method. Tests: 1. http://ocltc.amd.com:8111/viewModification.html?modId=128289&personal=true&tab=vcsModificationBuilds http://ocltc.amd.com:8111/viewModification.html?modId=128358&personal=true&tab=vcsModificationBuilds 2. release build, run hip program, there is no log 3. fastdebug build, run hip program, export LOG_LEVEL=3 export GPU_LOG_MASK=4294967295 There was a lot of logs. 4. fastdebug build, run hip program, export LOG_LEVEL=2 export GPU_LOG_MASK=4294967295 There was no logs. 5. fastdebug build, run hip program, export LOG_LEVEL=3 export GPU_LOG_MASK=4294967294 There was much less logs. 6. fastdebug build, run hip program, export LOG_LEVEL=3 export GPU_LOG_MASK=47102 There was even much less logs. The logs was expected according to the mask. 7. Tested step 2 to 6 similarily in Windows and Linux ReviewBoard: http://ocltc.amd.com/reviews/r/18215 Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#46 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#82 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_stream.cpp#26 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hiprtc_internal.hpp#2 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_svm.cpp#29 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/comgrctx.cpp#6 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/devkernel.cpp#29 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.cpp#68 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.cpp#137 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.cpp#91 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/command.cpp#100 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/commandqueue.cpp#32 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/runtime.cpp#40 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/debug.hpp#10 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.hpp#323 edit --- api/hip/hip_internal.hpp | 4 ++-- api/hip/hip_memory.cpp | 2 +- api/hip/hip_stream.cpp | 2 +- api/hip/hiprtc_internal.hpp | 6 +++--- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index 1ce259f708..cfe5cca7d1 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -53,7 +53,7 @@ typedef struct ihipIpcMemHandle_st { // This macro should be called at the beginning of every HIP API. #define HIP_INIT_API(cid, ...) \ - LogPrintfInfo("[%zx] %s ( %s )", std::this_thread::get_id(), __func__, ToString( __VA_ARGS__ ).c_str()); \ + ClPrint(amd::LOG_INFO, amd::LOG_API, "[%zx] %s ( %s )", std::this_thread::get_id(), __func__, ToString( __VA_ARGS__ ).c_str()); \ amd::Thread* thread = amd::Thread::current(); \ if (!CL_CHECK_THREAD(thread)) { \ HIP_RETURN(hipErrorOutOfMemory); \ @@ -63,7 +63,7 @@ typedef struct ihipIpcMemHandle_st { #define HIP_RETURN(ret) \ hip::g_lastError = ret; \ - LogPrintfInfo("[%zx] %s: Returned %s", std::this_thread::get_id(), __func__, hipGetErrorName(hip::g_lastError)); \ + ClPrint(amd::LOG_INFO, amd::LOG_API, "[%zx] %s: Returned %s", std::this_thread::get_id(), __func__, hipGetErrorName(hip::g_lastError)); \ return hip::g_lastError; namespace hc { diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 8d35a5aef7..69275bbb67 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -75,7 +75,7 @@ hipError_t ihipMalloc(void** ptr, size_t sizeBytes, unsigned int flags) if (*ptr == nullptr) { return hipErrorOutOfMemory; } - LogPrintfInfo("ihipMalloc ptr=0x%zx", *ptr); + ClPrint(amd::LOG_INFO, amd::LOG_API, "ihipMalloc ptr=0x%zx", *ptr); return hipSuccess; } diff --git a/api/hip/hip_stream.cpp b/api/hip/hip_stream.cpp index 14439ebc40..c500c663f0 100644 --- a/api/hip/hip_stream.cpp +++ b/api/hip/hip_stream.cpp @@ -114,7 +114,7 @@ static hipError_t ihipStreamCreate(hipStream_t *stream, unsigned int flags, amd: *stream = reinterpret_cast(hStream); - LogPrintfInfo("ihipStreamCreate: %zx", hStream); + ClPrint(amd::LOG_INFO, amd::LOG_API, "ihipStreamCreate: %zx", hStream); return hipSuccess; } diff --git a/api/hip/hiprtc_internal.hpp b/api/hip/hiprtc_internal.hpp index dc3371615c..e97ac9eb09 100644 --- a/api/hip/hiprtc_internal.hpp +++ b/api/hip/hiprtc_internal.hpp @@ -27,7 +27,7 @@ THE SOFTWARE. // This macro should be called at the beginning of every HIP RTC API. #define HIPRTC_INIT_API(...) \ - LogPrintfInfo("[%zx] %s ( %s )", std::this_thread::get_id(), __func__, ToString( __VA_ARGS__ ).c_str()); \ + ClPrint(amd::LOG_INFO, amd::LOG_API, "[%zx] %s ( %s )", std::this_thread::get_id(), __func__, ToString( __VA_ARGS__ ).c_str()); \ amd::Thread* thread = amd::Thread::current(); \ if (!CL_CHECK_THREAD(thread)) { \ HIPRTC_RETURN(HIPRTC_ERROR_INTERNAL_ERROR); \ @@ -36,8 +36,8 @@ THE SOFTWARE. #define HIPRTC_RETURN(ret) \ hiprtc::g_lastRtcError = ret; \ - LogPrintfInfo("[%zx] %s: Returned %s", std::this_thread::get_id(), __func__, \ - hiprtcGetErrorString(hiprtc::g_lastRtcError)); \ + ClPrint(amd::LOG_INFO, amd::LOG_API, "[%zx] %s: Returned %s", std::this_thread::get_id(), __func__, \ + hiprtcGetErrorString(hiprtc::g_lastRtcError)); \ return hiprtc::g_lastRtcError; From 571b8d625df4ffc4380d4598faab1622aeda8f2d Mon Sep 17 00:00:00 2001 From: foreman Date: Mon, 4 Nov 2019 16:39:35 -0500 Subject: [PATCH 243/282] P4 to Git Change 2024543 by cpaquot@cpaquot-ocl-lc-lnx on 2019/11/04 16:32:23 SWDEV-206239 - [HIP] Return hipErrorMemoryAllocation for fine grained VRAM for now Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#83 edit --- api/hip/hip_memory.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 69275bbb67..6eed6e2c23 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -168,6 +168,11 @@ hipError_t hipExtMallocWithFlags(void** ptr, size_t sizeBytes, unsigned int flag HIP_RETURN(hipErrorInvalidValue); } + // XXX Till we have fine grained VRAM support... + if ((flags & hipDeviceMallocFinegrained) != 0) { + HIP_RETURN(hipErrorMemoryAllocation); + } + HIP_RETURN(ihipMalloc(ptr, sizeBytes, (flags & hipDeviceMallocFinegrained)? CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS: 0)); } From c536429a6b2e08d86c39f55317548762f713ad9e Mon Sep 17 00:00:00 2001 From: foreman Date: Mon, 4 Nov 2019 16:42:31 -0500 Subject: [PATCH 244/282] P4 to Git Change 2024544 by cpaquot@cpaquot-ocl-lc-lnx on 2019/11/04 16:35:14 SWDEV-145570 - [HIP] Fetched properties from current device and not default 0 one. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device_runtime.cpp#21 edit --- api/hip/hip_device_runtime.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/hip/hip_device_runtime.cpp b/api/hip/hip_device_runtime.cpp index 636ec612c1..e0bed1742c 100644 --- a/api/hip/hip_device_runtime.cpp +++ b/api/hip/hip_device_runtime.cpp @@ -339,7 +339,7 @@ hipError_t hipDeviceGetLimit ( size_t* pValue, hipLimit_t limit ) { } if(limit == hipLimitMallocHeapSize) { hipDeviceProp_t prop; - hipGetDeviceProperties(&prop, 0); + hipGetDeviceProperties(&prop, ihipGetDevice()); *pValue = prop.totalGlobalMem; HIP_RETURN(hipSuccess); From 649cb369770636f12262924741ab36b46132aaeb Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 6 Nov 2019 18:00:44 -0500 Subject: [PATCH 245/282] P4 to Git Change 2026152 by cpaquot@cpaquot-ocl-lc-lnx on 2019/11/06 17:50:08 SWDEV-206239 - [HIP] RCCL: finegrain VRAM does not work Implemented fine grained VRAM allocation via ATOMICS. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#84 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.cpp#138 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.hpp#43 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocmemory.cpp#45 edit --- api/hip/hip_memory.cpp | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 6eed6e2c23..a9dcd5b55c 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -168,12 +168,7 @@ hipError_t hipExtMallocWithFlags(void** ptr, size_t sizeBytes, unsigned int flag HIP_RETURN(hipErrorInvalidValue); } - // XXX Till we have fine grained VRAM support... - if ((flags & hipDeviceMallocFinegrained) != 0) { - HIP_RETURN(hipErrorMemoryAllocation); - } - - HIP_RETURN(ihipMalloc(ptr, sizeBytes, (flags & hipDeviceMallocFinegrained)? CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS: 0)); + HIP_RETURN(ihipMalloc(ptr, sizeBytes, (flags & hipDeviceMallocFinegrained)? CL_MEM_SVM_ATOMICS: 0)); } hipError_t hipMalloc(void** ptr, size_t sizeBytes) { From 692477f9648c648af6cf6e5b235907c64ff6a172 Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 6 Nov 2019 19:47:10 -0500 Subject: [PATCH 246/282] P4 to Git Change 2026174 by eshcherb@evgeny-hip on 2019/11/06 19:37:26 SWDEV-197289 - tracing callbacks call fix Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_prof_api.h#5 edit --- api/hip/hip_prof_api.h | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/api/hip/hip_prof_api.h b/api/hip/hip_prof_api.h index 1d405dc249..9d6db95ba5 100644 --- a/api/hip/hip_prof_api.h +++ b/api/hip/hip_prof_api.h @@ -189,18 +189,23 @@ class api_callbacks_spawner_t { void call() { hip_api_callback_t fun = entry(cid_).fun; void* arg = entry(cid_).arg; - if (fun != NULL) fun(HIP_DOMAIN_ID, cid_, api_data_, arg); + if (fun != NULL) { + fun(HIP_DOMAIN_ID, cid_, api_data_, arg); + api_data_->phase = ACTIVITY_API_PHASE_EXIT; + } } ~api_callbacks_spawner_t() { - if (api_data_ == NULL) return; + if (!is_enabled()) return; - hip_act_callback_t act = entry(cid_).act; - void* a_arg = entry(cid_).a_arg; - hip_api_callback_t fun = entry(cid_).fun; - void* arg = entry(cid_).arg; - if (act != NULL) act(cid_, NULL, api_data_, a_arg); - if (fun != NULL) fun(HIP_DOMAIN_ID, cid_, api_data_, arg); + if (api_data_ != NULL) { + hip_api_callback_t fun = entry(cid_).fun; + void* arg = entry(cid_).arg; + hip_act_callback_t act = entry(cid_).act; + void* a_arg = entry(cid_).a_arg; + if (fun != NULL) fun(HIP_DOMAIN_ID, cid_, api_data_, arg); + if (act != NULL) act(cid_, NULL, NULL, a_arg); + } callbacks_table.sem_release(cid_); } From ceebda15efbdebce32c4ed2781e2e258aa3cfb6e Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 12 Nov 2019 12:55:17 -0500 Subject: [PATCH 247/282] P4 to Git Change 2029039 by kjayapra@0_HIPWS_LNX1_ROCM on 2019/11/12 12:52:46 SWDEV-210844 - Implementing hipExtGetLinkTypeAndHopCount Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device_runtime.cpp#22 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#344 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.hpp#47 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.cpp#141 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.hpp#44 edit --- api/hip/hip_device_runtime.cpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/api/hip/hip_device_runtime.cpp b/api/hip/hip_device_runtime.cpp index e0bed1742c..d3d0b10b87 100644 --- a/api/hip/hip_device_runtime.cpp +++ b/api/hip/hip_device_runtime.cpp @@ -518,17 +518,23 @@ hipError_t hipSetValidDevices ( int* device_arr, int len ) { hipError_t hipExtGetLinkTypeAndHopCount(int device1, int device2, uint32_t* linktype, uint32_t* hopcount) { HIP_INIT_API(hipExtGetLinkTypeAndHopCount, device1, device2, linktype, hopcount); + amd::Device* amd_dev_obj1 = nullptr; + amd::Device* amd_dev_obj2 = nullptr; const int numDevices = static_cast(g_devices.size()); if ((device1 < 0) || (device1 >= numDevices) || (device2 < 0) || (device2 >= numDevices)) { HIP_RETURN(hipErrorInvalidDevice); } - if (linktype != nullptr) { - *linktype = 0; + if ((linktype == nullptr) || (hopcount == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); } - if (hopcount != nullptr) { - *hopcount = 1; + + amd_dev_obj1 = g_devices[device1]->devices()[0]; + amd_dev_obj2 = g_devices[device2]->devices()[0]; + + if (!amd_dev_obj1->findLinkTypeAndHopCount(amd_dev_obj2, linktype, hopcount)) { + HIP_RETURN(hipErrorInvalidResourceHandle); } HIP_RETURN(hipSuccess); From c4e4e370a9941d5139996f62dc708d2ae01cbcc4 Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 13 Nov 2019 12:34:13 -0500 Subject: [PATCH 248/282] P4 to Git Change 2029899 by kjayapra@0_HIPWS_LNX1_ROCM on 2019/11/13 12:29:19 SWDEV-144570 - Implementing ihipUnbindTextureImpl API. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.def.in#35 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.map.in#33 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_texture.cpp#27 edit --- api/hip/hip_hcc.def.in | 1 + api/hip/hip_hcc.map.in | 1 + api/hip/hip_texture.cpp | 7 +++++++ 3 files changed, 9 insertions(+) diff --git a/api/hip/hip_hcc.def.in b/api/hip/hip_hcc.def.in index a2eade8ae7..51f0bcefe1 100644 --- a/api/hip/hip_hcc.def.in +++ b/api/hip/hip_hcc.def.in @@ -176,6 +176,7 @@ hipBindTexture2D hipBindTextureToArray hipBindTextureToMipmappedArray hipUnbindTexture +ihipUnbindTextureImpl hipGetChannelDesc hipGetTextureAlignmentOffset hipGetTextureReference diff --git a/api/hip/hip_hcc.map.in b/api/hip/hip_hcc.map.in index 378ec5bf80..e9cb1498e9 100644 --- a/api/hip/hip_hcc.map.in +++ b/api/hip/hip_hcc.map.in @@ -189,6 +189,7 @@ global: hipBindTextureToArray*; hipBindTextureToMipmappedArray*; hipUnbindTexture*; + ihipUnbindTextureImpl*; hipGetChannelDesc*; hipGetTextureAlignmentOffset*; hipGetTextureReference*; diff --git a/api/hip/hip_texture.cpp b/api/hip/hip_texture.cpp index 23c1566c09..d0dac5d910 100644 --- a/api/hip/hip_texture.cpp +++ b/api/hip/hip_texture.cpp @@ -526,6 +526,13 @@ hipError_t hipBindTextureToMipmappedArray(textureReference* tex, HIP_RETURN(hipErrorNotSupported); } +hipError_t ihipUnbindTextureImpl(const hipTextureObject_t& textureObject) { + + ihipDestroyTextureObject(reinterpret_cast(textureObject)); + + return hipSuccess; +} + hipError_t hipUnbindTexture(const textureReference* tex) { HIP_INIT_API(NONE, tex); From 9529795fab8778b8e9bfbbe8704a991dea039c44 Mon Sep 17 00:00:00 2001 From: foreman Date: Sat, 16 Nov 2019 17:25:59 -0500 Subject: [PATCH 249/282] P4 to Git Change 2031768 by cpaquot@cpaquot-ocl-lc-lnx on 2019/11/16 17:21:58 SWDEV-212440 - [HIP] Memory access fault observed on Pytorch while running performance tests with Microbenchmarking script We need to loop through all the default stream to sync them in case the app call hipFree on a different current stream and another current stream is using the memory. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#85 edit --- api/hip/hip_memory.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index a9dcd5b55c..a249397605 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -209,7 +209,9 @@ hipError_t hipFree(void* ptr) { } if (amd::SvmBuffer::malloced(ptr)) { hip::syncStreams(); - hip::getNullStream()->finish(); + for (size_t i=0; ifinish(); + } amd::SvmBuffer::free(*hip::getCurrentContext(), ptr); HIP_RETURN(hipSuccess); } From 02f8c57eb5c803e585c208581468a7ff35591d5a Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 19 Nov 2019 13:34:00 -0500 Subject: [PATCH 250/282] P4 to Git Change 2033170 by cpaquot@cpaquot-ocl-lc-lnx on 2019/11/19 13:26:16 SWDEV-212215 - HIP-VDI runtime should not lazy load device code objects Temporarly disable lazy load of code objects for debugger. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#49 edit --- api/hip/hip_platform.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index 0902ce1a08..1915576aa4 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -416,6 +416,9 @@ extern "C" void __hipRegisterFunction( HIP_INIT(); PlatformState::DeviceFunction func{ std::string{deviceName}, modules, std::vector{ g_devices.size() }}; PlatformState::instance().registerFunction(hostFunction, func); + for (size_t i = 0; i < g_devices.size(); ++i) { + PlatformState::instance().getFunc(hostFunction, i); + } } // Registers a device-side global variable. From 3705a045f03797358f0a0af1e6a4e75a303f0a02 Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 20 Nov 2019 07:24:20 -0500 Subject: [PATCH 251/282] P4 to Git Change 2033779 by kjayapra@0_HIPWS_LNX1_ROCM on 2019/11/20 07:17:56 SWDEV-144570 - Update function global during hipModuleLoad. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#47 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#48 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#50 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.cpp#69 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.hpp#35 edit --- api/hip/hip_internal.hpp | 5 +++-- api/hip/hip_module.cpp | 30 ++++++++++++++++++++++++++++-- api/hip/hip_platform.cpp | 10 +++++----- 3 files changed, 36 insertions(+), 9 deletions(-) diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index cfe5cca7d1..57847e4cbe 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -154,12 +154,13 @@ public: bool dyn_undef; }; private: - std::unordered_map functions_; + std::unordered_map functions_; std::unordered_multimap vars_; static PlatformState* platform_; - PlatformState() : lock_("Guards global function map") {} + // the lock_ is recursive + PlatformState() : lock_("Guards global function map", true) {} ~PlatformState() {} public: static PlatformState& instance() { diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index 5fa4c1f457..e7a34c896b 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -114,6 +114,28 @@ extern bool __hipExtractCodeObjectFromFatBinary(const void* data, const std::vector& devices, std::vector>& code_objs); +bool ihipModuleRegisterFunc(amd::Program* program, hipModule_t* module) { + std::vector func_names; + device::Program* dev_program + = program->getDeviceProgram(*hip::getCurrentContext()->devices()[0]); + + if (!dev_program->getFuncsFromCodeObj(&func_names)) { + return false; + } + + for (auto it = func_names.begin(); it != func_names.end(); ++it) { + auto modules = new std::vector>(g_devices.size()); + for (size_t dev = 0; dev < g_devices.size(); ++dev) { + modules->at(dev) = std::make_pair(*module, true); + } + + PlatformState::DeviceFunction dfunc{std::string{it->c_str()}, modules, + std::vector(g_devices.size())}; + PlatformState::instance().registerFunction(it->c_str(), dfunc); + } + + return true; +} bool ihipModuleRegisterUndefined(amd::Program* program, hipModule_t* module) { std::vector undef_vars; @@ -125,7 +147,7 @@ bool ihipModuleRegisterUndefined(amd::Program* program, hipModule_t* module) { } for (auto it = undef_vars.begin(); it != undef_vars.end(); ++it) { - auto modules = new std::vector >{g_devices.size()}; + auto modules = new std::vector >(g_devices.size()); for (size_t dev = 0; dev < g_devices.size(); ++dev) { modules->at(dev) = std::make_pair(*module, true); } @@ -156,7 +178,7 @@ bool ihipModuleRegisterGlobal(amd::Program* program, hipModule_t* module) { } for (auto it = var_names.begin(); it != var_names.end(); ++it) { - auto modules = new std::vector >{g_devices.size()}; + auto modules = new std::vector >(g_devices.size()); for (size_t dev = 0; dev < g_devices.size(); ++dev) { modules->at(dev) = std::make_pair(*module, true); } @@ -196,6 +218,10 @@ hipError_t ihipModuleLoadData(hipModule_t *module, const void *image) return hipErrorSharedObjectSymbolNotFound; } + if (!ihipModuleRegisterFunc(program, module)) { + return hipErrorSharedObjectSymbolNotFound; + } + if(CL_SUCCESS != program->build(hip::getCurrentContext()->devices(), nullptr, nullptr, nullptr)) { return hipErrorSharedObjectInitFailed; } diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index 1915576aa4..e4b4ed8a49 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -150,7 +150,7 @@ extern "C" std::vector< std::pair >* __hipRegisterFatBinary(c return nullptr; } - auto programs = new std::vector< std::pair >{g_devices.size()}; + auto programs = new std::vector< std::pair >(g_devices.size()); for (size_t dev = 0; dev < g_devices.size(); ++dev) { amd::Context* ctx = g_devices[dev]; amd::Program* program = new amd::Program(*ctx); @@ -231,7 +231,7 @@ void PlatformState::registerVar(const void* hostvar, void PlatformState::registerFunction(const void* hostFunction, const DeviceFunction& func) { amd::ScopedLock lock(lock_); - functions_.insert(std::make_pair(hostFunction, func)); + functions_.insert(std::make_pair(std::string(reinterpret_cast(hostFunction)), func)); } bool ihipGetFuncAttributes(const char* func_name, amd::Program* program, hipFuncAttributes* func_attr) { @@ -272,7 +272,7 @@ bool CL_CALLBACK getSvarInfo(cl_program program, std::string var_name, void** va hipFunction_t PlatformState::getFunc(const void* hostFunction, int deviceId) { amd::ScopedLock lock(lock_); - const auto it = functions_.find(hostFunction); + const auto it = functions_.find(std::string(reinterpret_cast(hostFunction))); if (it != functions_.cend()) { PlatformState::DeviceFunction& devFunc = it->second; if (devFunc.functions[deviceId] == 0) { @@ -302,12 +302,12 @@ hipFunction_t PlatformState::getFunc(const void* hostFunction, int deviceId) { bool PlatformState::getFuncAttr(const void* hostFunction, hipFuncAttributes* func_attr) { - + amd::ScopedLock lock(lock_); if (func_attr == nullptr) { return false; } - const auto it = functions_.find(hostFunction); + const auto it = functions_.find(std::string(reinterpret_cast(hostFunction))); if (it == functions_.cend()) { return false; } From de09e11c0a560e03ed67f34cbaf418cb80867c18 Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 21 Nov 2019 19:33:05 -0500 Subject: [PATCH 252/282] P4 to Git Change 2034910 by kjayapra@0_HIPWS_LNX1_ROCM on 2019/11/21 19:28:10 SWDEV-145570 - Revering CL#2033779 for now. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#48 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#49 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#51 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.cpp#70 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.hpp#36 edit --- api/hip/hip_internal.hpp | 5 ++--- api/hip/hip_module.cpp | 26 -------------------------- api/hip/hip_platform.cpp | 8 ++++---- 3 files changed, 6 insertions(+), 33 deletions(-) diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index 57847e4cbe..cfe5cca7d1 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -154,13 +154,12 @@ public: bool dyn_undef; }; private: - std::unordered_map functions_; + std::unordered_map functions_; std::unordered_multimap vars_; static PlatformState* platform_; - // the lock_ is recursive - PlatformState() : lock_("Guards global function map", true) {} + PlatformState() : lock_("Guards global function map") {} ~PlatformState() {} public: static PlatformState& instance() { diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index e7a34c896b..bdd792e8a0 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -114,28 +114,6 @@ extern bool __hipExtractCodeObjectFromFatBinary(const void* data, const std::vector& devices, std::vector>& code_objs); -bool ihipModuleRegisterFunc(amd::Program* program, hipModule_t* module) { - std::vector func_names; - device::Program* dev_program - = program->getDeviceProgram(*hip::getCurrentContext()->devices()[0]); - - if (!dev_program->getFuncsFromCodeObj(&func_names)) { - return false; - } - - for (auto it = func_names.begin(); it != func_names.end(); ++it) { - auto modules = new std::vector>(g_devices.size()); - for (size_t dev = 0; dev < g_devices.size(); ++dev) { - modules->at(dev) = std::make_pair(*module, true); - } - - PlatformState::DeviceFunction dfunc{std::string{it->c_str()}, modules, - std::vector(g_devices.size())}; - PlatformState::instance().registerFunction(it->c_str(), dfunc); - } - - return true; -} bool ihipModuleRegisterUndefined(amd::Program* program, hipModule_t* module) { std::vector undef_vars; @@ -218,10 +196,6 @@ hipError_t ihipModuleLoadData(hipModule_t *module, const void *image) return hipErrorSharedObjectSymbolNotFound; } - if (!ihipModuleRegisterFunc(program, module)) { - return hipErrorSharedObjectSymbolNotFound; - } - if(CL_SUCCESS != program->build(hip::getCurrentContext()->devices(), nullptr, nullptr, nullptr)) { return hipErrorSharedObjectInitFailed; } diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index e4b4ed8a49..37d022faa9 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -231,7 +231,7 @@ void PlatformState::registerVar(const void* hostvar, void PlatformState::registerFunction(const void* hostFunction, const DeviceFunction& func) { amd::ScopedLock lock(lock_); - functions_.insert(std::make_pair(std::string(reinterpret_cast(hostFunction)), func)); + functions_.insert(std::make_pair(hostFunction, func)); } bool ihipGetFuncAttributes(const char* func_name, amd::Program* program, hipFuncAttributes* func_attr) { @@ -272,7 +272,7 @@ bool CL_CALLBACK getSvarInfo(cl_program program, std::string var_name, void** va hipFunction_t PlatformState::getFunc(const void* hostFunction, int deviceId) { amd::ScopedLock lock(lock_); - const auto it = functions_.find(std::string(reinterpret_cast(hostFunction))); + const auto it = functions_.find(hostFunction); if (it != functions_.cend()) { PlatformState::DeviceFunction& devFunc = it->second; if (devFunc.functions[deviceId] == 0) { @@ -302,12 +302,12 @@ hipFunction_t PlatformState::getFunc(const void* hostFunction, int deviceId) { bool PlatformState::getFuncAttr(const void* hostFunction, hipFuncAttributes* func_attr) { - amd::ScopedLock lock(lock_); + if (func_attr == nullptr) { return false; } - const auto it = functions_.find(std::string(reinterpret_cast(hostFunction))); + const auto it = functions_.find(hostFunction); if (it == functions_.cend()) { return false; } From 90546b742e176940492e05d7dee5600b64156b1a Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 22 Nov 2019 11:29:49 -0500 Subject: [PATCH 253/282] P4 to Git Change 2035286 by kjayapra@0_HIPWS_LNX1_ROCM on 2019/11/22 11:19:57 SWDEV-145570 - Revering CL 2033170 Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#52 edit --- api/hip/hip_platform.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index 37d022faa9..df2107974e 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -416,9 +416,6 @@ extern "C" void __hipRegisterFunction( HIP_INIT(); PlatformState::DeviceFunction func{ std::string{deviceName}, modules, std::vector{ g_devices.size() }}; PlatformState::instance().registerFunction(hostFunction, func); - for (size_t i = 0; i < g_devices.size(); ++i) { - PlatformState::instance().getFunc(hostFunction, i); - } } // Registers a device-side global variable. From 5c5588bf20a13f7025f196d93d020495e1e2aaa6 Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 22 Nov 2019 17:56:07 -0500 Subject: [PATCH 254/282] P4 to Git Change 2035631 by cpaquot@cpaquot-ocl-lc-lnx on 2019/11/22 17:52:20 SWDEV-213526 - pytorch tests fail with hipErrorOutofMemory There's a bug in ROCr when loading a lot of kernels and not syncing So for now, if an allocation fails, sync devices and retry before returning hipErrorOutOfMemory error. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#86 edit --- api/hip/hip_memory.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index a249397605..c1cc189bb7 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -73,7 +73,14 @@ hipError_t ihipMalloc(void** ptr, size_t sizeBytes, unsigned int flags) *ptr = amd::SvmBuffer::malloc(*amdContext, flags, sizeBytes, amdContext->devices()[0]->info().memBaseAddrAlign_); if (*ptr == nullptr) { - return hipErrorOutOfMemory; + + hip::syncStreams(); + hip::getNullStream()->finish(); + + *ptr = amd::SvmBuffer::malloc(*amdContext, flags, sizeBytes, amdContext->devices()[0]->info().memBaseAddrAlign_); + if (*ptr == nullptr) { + return hipErrorOutOfMemory; + } } ClPrint(amd::LOG_INFO, amd::LOG_API, "ihipMalloc ptr=0x%zx", *ptr); return hipSuccess; From 601b88ad18fc162e41ce89f1cd9e6e79e1d06c38 Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 26 Nov 2019 17:03:41 -0500 Subject: [PATCH 255/282] P4 to Git Change 2037147 by kjayapra@1_HIPWS_LNX1_PAL on 2019/11/26 16:58:07 SWDEV-144570 - Adding hipMemcpyWithStream API Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.def.in#36 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.map.in#34 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#87 edit --- api/hip/hip_hcc.def.in | 1 + api/hip/hip_hcc.map.in | 1 + api/hip/hip_memory.cpp | 9 +++++++++ 3 files changed, 11 insertions(+) diff --git a/api/hip/hip_hcc.def.in b/api/hip/hip_hcc.def.in index 51f0bcefe1..145e8ef2cc 100644 --- a/api/hip/hip_hcc.def.in +++ b/api/hip/hip_hcc.def.in @@ -81,6 +81,7 @@ hipMallocArray hipMemAllocPitch hipMallocPitch hipMemcpy +hipMemcpyWithStream hipMemcpyParam2D hipMemcpy2D hipMemcpy2DAsync diff --git a/api/hip/hip_hcc.map.in b/api/hip/hip_hcc.map.in index e9cb1498e9..55defef49a 100644 --- a/api/hip/hip_hcc.map.in +++ b/api/hip/hip_hcc.map.in @@ -82,6 +82,7 @@ global: hipMallocPitch; hipMemAllocPitch; hipMemcpy; + hipMemcpyWithStream; hipMemcpyParam2D; hipMemcpy2D; hipMemcpy2DAsync; diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index c1cc189bb7..0a6cc7f966 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -233,6 +233,15 @@ hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind HIP_RETURN(ihipMemcpy(dst, src, sizeBytes, kind, *queue)); } +hipError_t hipMemcpyWithStream(void* dst, const void* src, size_t sizeBytes, + hipMemcpyKind kind, hipStream_t stream) { + HIP_INIT_API(hipMemcpyWithStream, dst, src, sizeBytes, kind, stream); + + amd::HostQueue* queue = hip::getQueue(stream); + + HIP_RETURN(ihipMemcpy(dst, src, sizeBytes, kind, *queue, true)); +} + hipError_t hipMemPtrGetInfo(void *ptr, size_t *size) { HIP_INIT_API(hipMemPtrGetInfo, ptr, size); From 5349bd80365aad5f21a3884f08f3500ae0ee5b48 Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 29 Nov 2019 10:17:28 -0500 Subject: [PATCH 256/282] P4 to Git Change 2038688 by jujiang@JJ-HIP on 2019/11/29 10:14:49 SWDEV-214490 - Update HIP RT for texture3D in HIP/PAL on Windows - Update function hipMemcpy3D for Texture Array - Add hipArrayCubemap support in hipMalloc3DArray http://ocltc.amd.com/reviews/r/18328/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#88 edit --- api/hip/hip_memory.cpp | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 0a6cc7f966..825745dca9 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -500,11 +500,11 @@ hipError_t hipMalloc3DArray(hipArray_t* array, const struct hipChannelFormatDesc // Dummy flags check switch (flags) { case hipArrayLayered: - case hipArrayCubemap: case hipArraySurfaceLoadStore: case hipArrayTextureGather: assert(0 && "Unspported"); break; + case hipArrayCubemap: case hipArrayDefault: default: break; @@ -1150,19 +1150,19 @@ hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) { byteSize = 1; break; } - region[2] = p->Depth; - region[1] = p->Height; - region[0] = p->WidthInBytes / byteSize; - srcOrigin[0] = p->srcXInBytes / byteSize; - srcOrigin[1] = p->srcY; - srcOrigin[2] = p->srcZ; + region[2] = p->extent.depth; + region[1] = p->extent.height; + region[0] = p->extent.width; + srcOrigin[0] = p->srcPos.x; + srcOrigin[1] = p->srcPos.y; + srcOrigin[2] = p->srcPos.z; dstPitchInbytes = p->dstArray->width * byteSize; - srcPitchInBytes = p->srcPitch; - srcPtr = (void*)p->srcHost; + srcPitchInBytes = p->srcPtr.pitch; + srcPtr = (void*)p->srcPtr.ptr; dstPtr = p->dstArray->data; - dstOrigin[0] = p->dstXInBytes/byteSize; - dstOrigin[1] = p->dstY; - dstOrigin[2] = p->dstZ; + dstOrigin[0] = p->dstPos.x; + dstOrigin[1] = p->dstPos.y; + dstOrigin[2] = p->dstPos.z; } else { region[2] = p->extent.depth; region[1] = p->extent.height; @@ -1188,8 +1188,8 @@ hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) { amd::Memory* dstMemory = getMemoryObject(dstPtr, offset); assert(offset == 0); - size_t src_slice_pitch = srcPitchInBytes * p->srcHeight; - size_t dst_slice_pitch = dstPitchInbytes * p->dstHeight; + size_t src_slice_pitch = srcPitchInBytes * p->extent.height; + size_t dst_slice_pitch = dstPitchInbytes * p->extent.height; if (!srcRect.create(srcOrigin, region, srcPitchInBytes, src_slice_pitch) || !dstRect.create(dstOrigin, region, dstPitchInbytes, dst_slice_pitch)) { From 69dcd2b862fa1d7fccdfbf147896d7962aa0f119 Mon Sep 17 00:00:00 2001 From: foreman Date: Mon, 2 Dec 2019 13:37:35 -0600 Subject: [PATCH 257/282] P4 to Git Change 2039536 by jujiang@JJ-HIP on 2019/12/02 14:31:13 SWDEV-214490 - Update HIP RT for texture3D in HIP/PAL on Windows -Update ihipBindTexture http://ocltc.amd.com/reviews/r/18333/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#89 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_texture.cpp#28 edit --- api/hip/hip_memory.cpp | 39 +++---------------------- api/hip/hip_texture.cpp | 65 +++++++++++++++++++++++++++++++++++++++-- 2 files changed, 66 insertions(+), 38 deletions(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 825745dca9..bd82e2634d 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -38,6 +38,8 @@ extern void getDrvChannelOrderAndType(const enum hipArray_Format Format, extern void setDescFromChannelType(cl_channel_type channelType, hipChannelFormatDesc* desc); +extern void getByteSizeFromChannelFormatKind(enum hipChannelFormatKind channelFormatKind, size_t* byteSize); + amd::Memory* getMemoryObject(const void* ptr, size_t& offset) { amd::Memory *memObj = amd::MemObjMap::FindMemObj(ptr); if (memObj != nullptr) { @@ -881,24 +883,7 @@ hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, con amd::HostQueue* queue = hip::getNullStream(); size_t dpitch = dst->width; - - switch (dst[0].desc.f) { - case hipChannelFormatKindSigned: - dpitch *= sizeof(int); - break; - case hipChannelFormatKindUnsigned: - dpitch *= sizeof(unsigned int); - break; - case hipChannelFormatKindFloat: - dpitch *= sizeof(float); - break; - case hipChannelFormatKindNone: - dpitch *= sizeof(size_t); - break; - default: - dpitch *= 1; - break; - } + getByteSizeFromChannelFormatKind(dst[0].desc.f, &dpitch); if ((wOffset + width > (dpitch)) || width > spitch) { HIP_RETURN(hipErrorInvalidDevicePointer); @@ -1133,23 +1118,7 @@ hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) { size_t dstOrigin[3]; size_t region[3]; if (p->dstArray != nullptr) { - switch (p->dstArray->desc.f) { - case hipChannelFormatKindSigned: - byteSize = sizeof(int); - break; - case hipChannelFormatKindUnsigned: - byteSize = sizeof(unsigned int); - break; - case hipChannelFormatKindFloat: - byteSize = sizeof(float); - break; - case hipChannelFormatKindNone: - byteSize = sizeof(size_t); - break; - default: - byteSize = 1; - break; - } + getByteSizeFromChannelFormatKind(p->dstArray->desc.f, &byteSize); region[2] = p->extent.depth; region[1] = p->extent.height; region[0] = p->extent.width; diff --git a/api/hip/hip_texture.cpp b/api/hip/hip_texture.cpp index d0dac5d910..1edd0b8aca 100644 --- a/api/hip/hip_texture.cpp +++ b/api/hip/hip_texture.cpp @@ -193,6 +193,27 @@ void getChannelOrderAndType(const hipChannelFormatDesc& desc, enum hipTextureRea } } +void getByteSizeFromChannelFormatKind(enum hipChannelFormatKind channelFormatKind, size_t* byteSize) { + switch (channelFormatKind) + { + case hipChannelFormatKindSigned: + *byteSize = sizeof(int); + break; + case hipChannelFormatKindUnsigned: + *byteSize = sizeof(unsigned int); + break; + case hipChannelFormatKindFloat: + *byteSize = sizeof(float); + break; + case hipChannelFormatKindNone: + *byteSize = sizeof(size_t); + break; + default: + *byteSize = 1; + break; + } +} + amd::Sampler* fillSamplerDescriptor(enum hipTextureAddressMode addressMode, enum hipTextureFilterMode filterMode, int normalizedCoords) { #ifndef CL_FILTER_NONE @@ -401,14 +422,33 @@ hipError_t ihipBindTexture(cl_mem_object_type type, } if (hip::getCurrentContext()) { cl_image_format image_format; + size_t byteSize; + size_t rowPitch = 0; + size_t depth = 0; + size_t slicePitch = 0; getChannelOrderAndType(*desc, hipReadModeElementType, &image_format.image_channel_order, &image_format.image_channel_data_type); + getByteSizeFromChannelFormatKind(desc->f, &byteSize); const amd::Image::Format imageFormat(image_format); amd::Memory* memory = getMemoryObject(devPtr, *offset); - amd::Image* image = new (*hip::getCurrentContext()) amd::Image(*memory->asBuffer(), - type, memory->getMemFlags(), imageFormat, width, height, 1, pitch, 0); + switch (type) { + case CL_MEM_OBJECT_IMAGE3D: + rowPitch = width * byteSize; + depth = pitch; + slicePitch = rowPitch * height; + break; + case CL_MEM_OBJECT_IMAGE2D: + default: + rowPitch = pitch; + depth = 1; + slicePitch = 0; + break; + } + + amd::Image* image = new (*hip::getCurrentContext()) amd::Image(*memory->asBuffer(), + type, memory->getMemFlags(), imageFormat, width, height, depth, rowPitch, slicePitch); if (!image->create()) { delete image; return hipErrorMemoryAllocation; @@ -437,6 +477,19 @@ hipError_t ihipBindTexture(cl_mem_object_type type, resDesc.res.pitch2D.height = height; resDesc.res.pitch2D.pitchInBytes = pitch; break; + case CL_MEM_OBJECT_IMAGE3D: + resDesc.resType = hipResourceTypeArray; + resDesc.res.array.array = (hipArray*)malloc(sizeof(hipArray)); + resDesc.res.array.array->desc = *desc; + resDesc.res.array.array->width = width; + resDesc.res.array.array->height = height; + resDesc.res.array.array->depth = depth; + resDesc.res.array.array->Format = tex->format; + resDesc.res.array.array->NumChannels = tex->numChannels; + resDesc.res.array.array->isDrv = false; + resDesc.res.array.array->textureType = hipTextureType3D; + resDesc.res.array.array->data = const_cast(devPtr); + break; default: resDesc.resType = hipResourceTypeArray; resDesc.res.array.array = nullptr; @@ -444,7 +497,10 @@ hipError_t ihipBindTexture(cl_mem_object_type type, } tex->textureObject = reinterpret_cast(ihipCreateTextureObject(resDesc, *image, *sampler)); - + if(type == CL_MEM_OBJECT_IMAGE3D) { + free(resDesc.res.array.array); + } + memset(&resDesc, 0, sizeof(hipResourceDesc)); return hipSuccess; } return hipErrorInvalidValue; @@ -508,6 +564,9 @@ hipError_t ihipBindTextureToArrayImpl(TlsData* tls, int dim, enum hipTextureRead case 2: clType = CL_MEM_OBJECT_IMAGE2D; break; + case 3: + clType = CL_MEM_OBJECT_IMAGE3D; + break; default: HIP_RETURN(hipErrorInvalidValue); } From a077140ae266e6dabd130f4a8515710845a737f9 Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 4 Dec 2019 13:34:03 -0500 Subject: [PATCH 258/282] P4 to Git Change 2040867 by kjayapra@0_HIPWS_LNX1_ROCM on 2019/12/04 13:32:27 SWDEV-145570 - Adding back the lazy kernel changes because the OOM issue is because of KFD/RocR. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#53 edit --- api/hip/hip_platform.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index df2107974e..37d022faa9 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -416,6 +416,9 @@ extern "C" void __hipRegisterFunction( HIP_INIT(); PlatformState::DeviceFunction func{ std::string{deviceName}, modules, std::vector{ g_devices.size() }}; PlatformState::instance().registerFunction(hostFunction, func); + for (size_t i = 0; i < g_devices.size(); ++i) { + PlatformState::instance().getFunc(hostFunction, i); + } } // Registers a device-side global variable. From 188a3575273748d148caf844fd0efc28da2b91f6 Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 4 Dec 2019 14:06:53 -0500 Subject: [PATCH 259/282] P4 to Git Change 2040884 by jujiang@JJ-HIP on 2019/12/04 13:59:15 SWDEV-214490 - Update HIP RT for texture3D in HIP/PAL on Windows -Update hipMemcpy3D function http://ocltc.amd.com/reviews/r/18346/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#90 edit --- api/hip/hip_memory.cpp | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index bd82e2634d..89b06aca95 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -1117,35 +1117,28 @@ hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) { size_t srcOrigin[3]; size_t dstOrigin[3]; size_t region[3]; + + region[2] = p->extent.depth; + region[1] = p->extent.height; + region[0] = p->extent.width; + srcOrigin[0] = p->srcPos.x; + srcOrigin[1] = p->srcPos.y; + srcOrigin[2] = p->srcPos.z; + dstOrigin[0] = p->dstPos.x; + dstOrigin[1] = p->dstPos.y; + dstOrigin[2] = p->dstPos.z; + if (p->dstArray != nullptr) { getByteSizeFromChannelFormatKind(p->dstArray->desc.f, &byteSize); - region[2] = p->extent.depth; - region[1] = p->extent.height; - region[0] = p->extent.width; - srcOrigin[0] = p->srcPos.x; - srcOrigin[1] = p->srcPos.y; - srcOrigin[2] = p->srcPos.z; dstPitchInbytes = p->dstArray->width * byteSize; srcPitchInBytes = p->srcPtr.pitch; srcPtr = (void*)p->srcPtr.ptr; dstPtr = p->dstArray->data; - dstOrigin[0] = p->dstPos.x; - dstOrigin[1] = p->dstPos.y; - dstOrigin[2] = p->dstPos.z; } else { - region[2] = p->extent.depth; - region[1] = p->extent.height; - region[0] = p->extent.width; - srcOrigin[0] = p->srcXInBytes; - srcOrigin[1] = p->srcY; - srcOrigin[2] = p->srcZ; srcPitchInBytes = p->srcPtr.pitch; dstPitchInbytes = p->dstPtr.pitch; srcPtr = p->srcPtr.ptr; dstPtr = p->dstPtr.ptr; - dstOrigin[0] = p->dstXInBytes; - dstOrigin[1] = p->dstY; - dstOrigin[2] = p->dstZ; } // Create buffer rectangle info structure From 65fd4723646fdca442423528dfcf6b71cec1c022 Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 4 Dec 2019 14:22:03 -0500 Subject: [PATCH 260/282] P4 to Git Change 2040890 by skudchad@skudchad_test2_win_opencl on 2019/12/04 14:16:51 SWDEV-203814 - HIPRTC Inprocess runtime changes(part2) ReviewBoardURL = http://ocltc.amd.com/reviews/r/18342/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.def.in#37 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.map.in#35 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#50 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_rtc.cpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.cpp#73 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.hpp#38 edit --- api/hip/hip_hcc.def.in | 1 + api/hip/hip_hcc.map.in | 2 + api/hip/hip_module.cpp | 10 ++++ api/hip/hip_rtc.cpp | 122 ++++++++++++++++++++++++++++++++++++++--- 4 files changed, 126 insertions(+), 9 deletions(-) diff --git a/api/hip/hip_hcc.def.in b/api/hip/hip_hcc.def.in index 145e8ef2cc..6aba1f5a25 100644 --- a/api/hip/hip_hcc.def.in +++ b/api/hip/hip_hcc.def.in @@ -129,6 +129,7 @@ hipLaunchCooperativeKernelMultiDevice hipHccModuleLaunchKernel hipModuleLoad hipModuleLoadData +hipModuleLoadDataEx hipModuleUnload hipOccupancyMaxActiveBlocksPerMultiprocessor hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags diff --git a/api/hip/hip_hcc.map.in b/api/hip/hip_hcc.map.in index 55defef49a..f80d7db23a 100644 --- a/api/hip/hip_hcc.map.in +++ b/api/hip/hip_hcc.map.in @@ -129,6 +129,7 @@ global: hipLaunchCooperativeKernelMultiDevice; hipModuleLoad; hipModuleLoadData; + hipModuleLoadDataEx; hipModuleUnload; hipOccupancyMaxActiveBlocksPerMultiprocessor; hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags; @@ -180,6 +181,7 @@ global: hiprtcGetErrorString; extern "C++" { hip_impl::hipLaunchKernelGGLImpl*; + hip_impl::demangle*; hipCreateTextureObject*; hipDestroyTextureObject*; hipGetTextureObjectResourceDesc*; diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index bdd792e8a0..c13a077a0e 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -110,6 +110,16 @@ hipError_t hipModuleLoadData(hipModule_t *module, const void *image) HIP_RETURN(ihipModuleLoadData(module, image)); } +hipError_t hipModuleLoadDataEx(hipModule_t *module, const void *image, + unsigned int numOptions, hipJitOption* options, + void** optionsValues) +{ + /* TODO: Pass options to Program */ + HIP_INIT_API(hipModuleLoadData, module, image); + + HIP_RETURN(ihipModuleLoadData(module, image)); +} + extern bool __hipExtractCodeObjectFromFatBinary(const void* data, const std::vector& devices, std::vector>& code_objs); diff --git a/api/hip/hip_rtc.cpp b/api/hip/hip_rtc.cpp index 9cf3060418..4e2c99e6df 100644 --- a/api/hip/hip_rtc.cpp +++ b/api/hip/hip_rtc.cpp @@ -24,7 +24,10 @@ THE SOFTWARE. #include "hiprtc_internal.hpp" #include #include "platform/program.hpp" -#include + +extern "C" char * __cxa_demangle(const char *mangled_name, char *output_buffer, + size_t *length, int *status); + namespace hiprtc { thread_local hiprtcResult g_lastRtcError = HIPRTC_SUCCESS; @@ -37,16 +40,18 @@ private: ProgramState() : lock_("Guards program state") {} ~ProgramState() {} - +public: std::unordered_map, std::vector>> progHeaders_; - std::vector nameExpresssion_; -public: + + std::map> nameExpresssion_; + static ProgramState& instance(); void createProgramHeaders(amd::Program* program, int numHeaders, const char** headers, const char** headerNames); void getProgramHeaders(amd::Program* program, int* numHeaders, char** headers, char ** headerNames); uint32_t addNameExpression(const char* name_expression); + char* getLoweredName(const char* name_expression); }; ProgramState* ProgramState::programState_ = nullptr; @@ -82,13 +87,86 @@ void ProgramState::getProgramHeaders(amd::Program* program, int* numHeaders, } } - uint32_t ProgramState::addNameExpression(const char* name_expression) { amd::ScopedLock lock(lock_); - nameExpresssion_.emplace_back(name_expression); + + // Strip clean of any '(' or ')' or '&' + std::string strippedName(name_expression); + if (strippedName.back() == ')') { + strippedName.pop_back(); + strippedName.erase(0, strippedName.find('(')); + } + if (strippedName.front() == '&') { + strippedName.erase(0, 1); + } + auto it = nameExpresssion_.find(name_expression); + if (it == nameExpresssion_.end()) { + nameExpresssion_.insert(std::pair> + (name_expression, std::make_pair(strippedName,""))); + } return nameExpresssion_.size(); } +namespace hip_impl { + +inline std::string demangle(const char* x) { +#ifdef ATI_OS_LINUX + if (!x) { + return {}; + } + + int s = 0; + std::unique_ptr tmp{ + __cxa_demangle(x, nullptr, nullptr, &s), + std::free}; + if (s != 0) { + return {}; + } + + return tmp.get(); +#else + return {}; +#endif +} +} // hip_impl + +std::string handleMangledName(std::string name) { + std::string demangled; + demangled = hip_impl::demangle(name.c_str()); + + if (demangled.empty()) { + return name; + } + + if (demangled.find(".kd") != std::string::npos) { + return {}; + } + + if (demangled.find("void ") == 0) { + demangled.erase(0, strlen("void ")); + } + + auto dx{demangled.find_first_of("(<")}; + + if (dx == std::string::npos) { + return demangled; + } + + if (demangled[dx] == '<') { + auto cnt{1u}; + do { + ++dx; + cnt += (demangled[dx] == '<') ? 1 : ((demangled[dx] == '>') ? -1 : 0); + } while (cnt); + + demangled.erase(++dx); + } else { + demangled.erase(dx); + } + + return demangled; +} + const char* hiprtcGetErrorString(hiprtcResult x) { switch (x) { @@ -192,13 +270,39 @@ hiprtcResult hiprtcAddNameExpression(hiprtcProgram prog, const char* name_expres } hiprtcResult hiprtcGetLoweredName(hiprtcProgram prog, const char* name_expression, - const char** loweredNames) { - HIPRTC_INIT_API(prog, name_expression, loweredNames); + const char** loweredName) { + HIPRTC_INIT_API(prog, name_expression, loweredName); - if (name_expression == nullptr || loweredNames == nullptr) { + if (name_expression == nullptr || loweredName == nullptr) { HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); } + amd::Program* program = as_amd(reinterpret_cast(prog)); + + device::Program* dev_program + = program->getDeviceProgram(*hip::getCurrentContext()->devices()[0]); + + auto it = ProgramState::instance().nameExpresssion_.find(name_expression); + if (it == ProgramState::instance().nameExpresssion_.end()) { + return HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID; + } + + std::string strippedName = it->second.first; + std::vector mangledNames; + + if (!dev_program->getLoweredNames(&mangledNames)) { + HIPRTC_RETURN(HIPRTC_ERROR_COMPILATION); + } + + for (auto &name : mangledNames) { + std::string demangledName = handleMangledName(name); + if (demangledName == strippedName) { + it->second.second.assign(name); + } + } + + *loweredName = it->second.second.c_str(); + HIPRTC_RETURN(HIPRTC_SUCCESS); } From c1d63fbdbba7f760eaa471eec80a13af68c1b877 Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 6 Dec 2019 00:33:55 -0500 Subject: [PATCH 261/282] P4 to Git Change 2041756 by cpaquot@cpaquot-ocl-lc-lnx on 2019/12/06 00:27:07 SWDEV-213526 - [hip] OOM issue Restore lazy kernel uploads for 2.10 release. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#54 edit --- api/hip/hip_platform.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index 37d022faa9..0250ebfb09 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -416,9 +416,9 @@ extern "C" void __hipRegisterFunction( HIP_INIT(); PlatformState::DeviceFunction func{ std::string{deviceName}, modules, std::vector{ g_devices.size() }}; PlatformState::instance().registerFunction(hostFunction, func); - for (size_t i = 0; i < g_devices.size(); ++i) { - PlatformState::instance().getFunc(hostFunction, i); - } +// for (size_t i = 0; i < g_devices.size(); ++i) { +// PlatformState::instance().getFunc(hostFunction, i); +// } } // Registers a device-side global variable. From f4df28905e27f5adff4deee874d4d1fb08f2d766 Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 6 Dec 2019 14:53:41 -0500 Subject: [PATCH 262/282] P4 to Git Change 2042093 by jujiang@JJ-HIP on 2019/12/06 14:44:16 SWDEV-214490 - Update HIP RT for texture3D in HIP/PAL on Windows -Update hipTexRefSetArray http://ocltc.amd.com/reviews/r/18356/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_texture.cpp#29 edit --- api/hip/hip_texture.cpp | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/api/hip/hip_texture.cpp b/api/hip/hip_texture.cpp index 1edd0b8aca..d1c228707b 100644 --- a/api/hip/hip_texture.cpp +++ b/api/hip/hip_texture.cpp @@ -718,13 +718,27 @@ hipError_t hipTexRefSetArray(textureReference* tex, hipArray_const_t array, unsi HIP_INIT_API(NONE, tex, array, flags); size_t offset = 0; + cl_mem_object_type clType; if ((tex == nullptr) || (array == nullptr)) { HIP_RETURN(hipErrorInvalidImage); } - HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE2D, &offset, tex, array->data, &array->desc, array->width, - array->height, 0)); + switch(array->textureType) { + case hipTextureType3D: + clType = CL_MEM_OBJECT_IMAGE3D; + break; + case hipTextureType2D: + clType = CL_MEM_OBJECT_IMAGE2D; + break; + case hipTextureType1D: + clType = CL_MEM_OBJECT_IMAGE1D; + break; + default: + HIP_RETURN(hipErrorInvalidValue); + } + HIP_RETURN(ihipBindTexture(clType, &offset, tex, array->data, &array->desc, array->width, + array->height, array->depth)); } hipError_t hipTexRefGetAddress(hipDeviceptr_t* dev_ptr, textureReference tex) { From 0705efa384c0d624fc5d0405525b56ef66cf1362 Mon Sep 17 00:00:00 2001 From: foreman Date: Mon, 9 Dec 2019 11:22:57 -0500 Subject: [PATCH 263/282] P4 to Git Change 2042612 by eshcherb@evgeny-hip on 2019/12/09 11:11:38 SWDEV-197289 - HIP-VDI tracing activity suppressing of type is zero; fixing callbacks table recursive mutex; Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_prof_api.h#6 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/command.cpp#101 edit --- api/hip/hip_prof_api.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/hip/hip_prof_api.h b/api/hip/hip_prof_api.h index 9d6db95ba5..b05e50608d 100644 --- a/api/hip/hip_prof_api.h +++ b/api/hip/hip_prof_api.h @@ -50,7 +50,7 @@ typedef activity_sync_callback_t hip_act_callback_t; class api_callbacks_table_t { public: - typedef std::recursive_mutex mutex_t; + typedef std::mutex mutex_t; typedef hip_api_record_t record_t; typedef hip_api_callback_t fun_t; From 921fa13d81b3e0863342bd84e4833406403337d5 Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 11 Dec 2019 03:11:19 -0500 Subject: [PATCH 264/282] P4 to Git Change 2043802 by cpaquot@cpaquot-ocl-lc-lnx on 2019/12/11 03:06:02 SWDEV-213526 - [hip] OOM issue Delay any access to device layers till HIP API is called by app. This allows the app to fork the process first and then call HIP which is legal. Doing hip calls then fork isn't legal nor supported by ROCm. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_context.cpp#25 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#49 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#55 edit --- api/hip/hip_context.cpp | 2 ++ api/hip/hip_internal.hpp | 26 +++++++++++++++-- api/hip/hip_platform.cpp | 60 +++++++++++++++++++++++----------------- 3 files changed, 61 insertions(+), 27 deletions(-) diff --git a/api/hip/hip_context.cpp b/api/hip/hip_context.cpp index 6ff36dc987..cc590678c5 100644 --- a/api/hip/hip_context.cpp +++ b/api/hip/hip_context.cpp @@ -65,6 +65,8 @@ void init() { if (host_context && CL_SUCCESS != host_context->create(nullptr)) { host_context->release(); } + + PlatformState::instance().init(); } amd::Context* getCurrentContext() { diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index cfe5cca7d1..a44b5a0b5a 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -58,7 +58,7 @@ typedef struct ihipIpcMemHandle_st { if (!CL_CHECK_THREAD(thread)) { \ HIP_RETURN(hipErrorOutOfMemory); \ } \ - HIP_INIT(); \ + HIP_INIT() \ HIP_CB_SPAWNER_OBJECT(cid); #define HIP_RETURN(ret) \ @@ -126,7 +126,29 @@ struct ihipExec_t { class PlatformState { amd::Monitor lock_; + std::unordered_map>> modules_; + bool initialized_; + + void digestFatBinary(const void* data, std::vector>& programs); public: + void init(); + std::vector>* addFatBinary(const void*data) + { + if (initialized_) { + digestFatBinary(data, modules_[data]); + } + return &modules_[data]; + } + void removeFatBinary(std::vector>* module) + { + for (auto& mod : modules_) { + if (&mod.second == module) { + modules_.erase(&mod); + return; + } + } + } + struct RegisteredVar { public: RegisteredVar(): size_(0), devicePtr_(nullptr), amd_mem_obj_(nullptr) {} @@ -159,7 +181,7 @@ private: static PlatformState* platform_; - PlatformState() : lock_("Guards global function map") {} + PlatformState() : lock_("Guards global function map"), initialized_(false) {} ~PlatformState() {} public: static PlatformState& instance() { diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index 0250ebfb09..6ce138e7a3 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -85,8 +85,6 @@ bool __hipExtractCodeObjectFromFatBinary(const void* data, const std::vector& devices, std::vector>& code_objs) { - HIP_INIT(); - std::string magic((const char*)data, sizeof(CLANG_OFFLOAD_BUNDLER_MAGIC_STR) - 1); if (magic.compare(CLANG_OFFLOAD_BUNDLER_MAGIC_STR)) { return false; @@ -127,42 +125,59 @@ bool __hipExtractCodeObjectFromFatBinary(const void* data, return false; } -extern "C" std::vector< std::pair >* __hipRegisterFatBinary(const void* data) +extern "C" std::vector>* __hipRegisterFatBinary(const void* data) { - HIP_INIT(); - - if(g_devices.empty()) { - return nullptr; - } const __CudaFatBinaryWrapper* fbwrapper = reinterpret_cast(data); if (fbwrapper->magic != __hipFatMAGIC2 || fbwrapper->version != 1) { return nullptr; } - std::vector devices; + return PlatformState::instance().addFatBinary(fbwrapper->binary); +} + +void PlatformState::digestFatBinary(const void* data, std::vector>& programs) +{ std::vector> code_objs; + std::vector devices; for (size_t dev = 0; dev < g_devices.size(); ++dev) { amd::Context* ctx = g_devices[dev]; devices.push_back(ctx->devices()[0]->info().name_); -} - - if (!__hipExtractCodeObjectFromFatBinary((char*)fbwrapper->binary, devices, code_objs)) { - return nullptr; } - auto programs = new std::vector< std::pair >(g_devices.size()); + if (!__hipExtractCodeObjectFromFatBinary((char*)data, devices, code_objs)) { + return; + } + + programs.resize(g_devices.size()); + for (size_t dev = 0; dev < g_devices.size(); ++dev) { amd::Context* ctx = g_devices[dev]; amd::Program* program = new amd::Program(*ctx); if (program == nullptr) { - return nullptr; + return; } if (CL_SUCCESS == program->addDeviceProgram(*ctx->devices()[0], code_objs[dev].first, code_objs[dev].second)) { - programs->at(dev) = std::make_pair(reinterpret_cast(as_cl(program)) , false); + programs.at(dev) = std::make_pair(reinterpret_cast(as_cl(program)) , false); } } +} - return programs; +void PlatformState::init() +{ + if(initialized_ || g_devices.empty()) { + return; + } + initialized_ = true; + + for (auto& it : modules_) { + digestFatBinary(it.first, it.second); + } + for (auto& it : functions_) { + it.second.functions.resize(g_devices.size()); + } + for (auto& it : vars_) { + it.second.rvars.resize(g_devices.size()); + } } std::vector< std::pair >* PlatformState::unregisterVar(hipModule_t hmod) { @@ -188,7 +203,6 @@ std::vector< std::pair >* PlatformState::unregisterVar(hipMod PlatformState::DeviceVar* PlatformState::findVar(std::string hostVar, int deviceId, hipModule_t hmod) { DeviceVar* dvar = nullptr; - if (hmod != nullptr) { // If module is provided, then get the var only from that module auto var_range = vars_.equal_range(hostVar); @@ -302,7 +316,6 @@ hipFunction_t PlatformState::getFunc(const void* hostFunction, int deviceId) { bool PlatformState::getFuncAttr(const void* hostFunction, hipFuncAttributes* func_attr) { - if (func_attr == nullptr) { return false; } @@ -413,8 +426,7 @@ extern "C" void __hipRegisterFunction( dim3* gridDim, int* wSize) { - HIP_INIT(); - PlatformState::DeviceFunction func{ std::string{deviceName}, modules, std::vector{ g_devices.size() }}; + PlatformState::DeviceFunction func{ std::string{deviceName}, modules, std::vector{g_devices.size()}}; PlatformState::instance().registerFunction(hostFunction, func); // for (size_t i = 0; i < g_devices.size(); ++i) { // PlatformState::instance().getFunc(hostFunction, i); @@ -436,10 +448,8 @@ extern "C" void __hipRegisterVar( int constant, // Whether this variable is constant int global) // Unknown, always 0 { - HIP_INIT(); - PlatformState::DeviceVar dvar{var, std::string{ hostVar }, static_cast(size), modules, - std::vector{ g_devices.size() }, false }; + std::vector{g_devices.size()}, false }; PlatformState::instance().registerVar(hostVar, dvar); } @@ -454,7 +464,7 @@ extern "C" void __hipUnregisterFatBinary(std::vector< std::pair Date: Wed, 11 Dec 2019 16:30:48 -0500 Subject: [PATCH 265/282] P4 to Git Change 2044223 by jujiang@JJ-HIP on 2019/12/11 16:16:37 SWDEV-214490 - Update HIP RT for texture3D in HIP/PAL on Windows -Update hipCreateTextureObject http://ocltc.amd.com/reviews/r/18382/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#91 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_texture.cpp#30 edit --- api/hip/hip_memory.cpp | 4 ++-- api/hip/hip_texture.cpp | 22 +++++++++++++++++++--- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 89b06aca95..e6f3f2f622 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -500,13 +500,13 @@ hipError_t hipMalloc3DArray(hipArray_t* array, const struct hipChannelFormatDesc const cl_image_format image_format = { channelOrder, channelType }; // Dummy flags check - switch (flags) { + switch (flags) { case hipArrayLayered: + case hipArrayCubemap: case hipArraySurfaceLoadStore: case hipArrayTextureGather: assert(0 && "Unspported"); break; - case hipArrayCubemap: case hipArrayDefault: default: break; diff --git a/api/hip/hip_texture.cpp b/api/hip/hip_texture.cpp index d1c228707b..a4712de1cb 100644 --- a/api/hip/hip_texture.cpp +++ b/api/hip/hip_texture.cpp @@ -299,6 +299,8 @@ hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, const hipResou amd::Memory* memory = nullptr; size_t offset = 0; + cl_mem_object_type clType; + switch (pResDesc->resType) { case hipResourceTypeArray: { @@ -316,9 +318,23 @@ hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, const hipResou case hipArrayTextureGather: case hipArrayDefault: default: - image = new (*hip::getCurrentContext()) amd::Image(*memory->asBuffer(), - CL_MEM_OBJECT_IMAGE2D, memory->getMemFlags(), imageFormat, - pResDesc->res.array.array->width, pResDesc->res.array.array->height, 1, 0, 0); + switch(pResDesc->res.array.array->textureType) { + case hipTextureType3D: + clType = CL_MEM_OBJECT_IMAGE3D; + image = new (*hip::getCurrentContext()) amd::Image(*memory->asBuffer(), + clType, memory->getMemFlags(), imageFormat, + pResDesc->res.array.array->width, pResDesc->res.array.array->height, + pResDesc->res.array.array->depth, 0, 0); + break; + case hipTextureType2D: + clType = CL_MEM_OBJECT_IMAGE2D; + image = new (*hip::getCurrentContext()) amd::Image(*memory->asBuffer(), + clType, memory->getMemFlags(), imageFormat, + pResDesc->res.array.array->width, pResDesc->res.array.array->height, 1, 0, 0); + break; + default: + break; + } break; } } From de46a0e2051b6145adae090b03a2497190bed5f5 Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 11 Dec 2019 19:26:50 -0500 Subject: [PATCH 266/282] P4 to Git Change 2044309 by cpaquot@cpaquot-ocl-lc-lnx on 2019/12/11 19:19:15 SWDEV-145570 - [hip] special case const char* for logs in case it's a null pointer. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/trace_helper.h#3 edit --- api/hip/trace_helper.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/api/hip/trace_helper.h b/api/hip/trace_helper.h index 7888237160..3f87fbe1b3 100644 --- a/api/hip/trace_helper.h +++ b/api/hip/trace_helper.h @@ -125,6 +125,11 @@ inline std::string ToString(T v) { return ss.str(); }; +template <> +inline std::string ToString(const char* v) { + return ToHexString(v); +}; + template <> inline std::string ToString(hipFunction_t v) { std::ostringstream ss; From dac5ef4e22ba2904bbad947840e933529eda63f1 Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 11 Dec 2019 19:31:20 -0500 Subject: [PATCH 267/282] P4 to Git Change 2044310 by cpaquot@cpaquot-ocl-lc-lnx on 2019/12/11 19:20:38 SWDEV-213526 - [hip] OOM issue Clean up a bit from Laurent's comments. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#50 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#56 edit --- api/hip/hip_internal.hpp | 6 +++--- api/hip/hip_platform.cpp | 6 ++++++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index a44b5a0b5a..7e1b5e941c 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -124,10 +124,10 @@ struct ihipExec_t { }; class PlatformState { - amd::Monitor lock_; + amd::Monitor lock_{"Guards global function map"}; std::unordered_map>> modules_; - bool initialized_; + bool initialized_{false}; void digestFatBinary(const void* data, std::vector>& programs); public: @@ -181,7 +181,7 @@ private: static PlatformState* platform_; - PlatformState() : lock_("Guards global function map"), initialized_(false) {} + PlatformState() {} ~PlatformState() {} public: static PlatformState& instance() { diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index 6ce138e7a3..abb7cdb740 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -137,6 +137,10 @@ extern "C" std::vector>* __hipRegisterFatBinary(con void PlatformState::digestFatBinary(const void* data, std::vector>& programs) { + if (programs.size() > 0) { + return; + } + std::vector> code_objs; std::vector devices; for (size_t dev = 0; dev < g_devices.size(); ++dev) { @@ -164,6 +168,8 @@ void PlatformState::digestFatBinary(const void* data, std::vector Date: Thu, 12 Dec 2019 11:47:12 -0500 Subject: [PATCH 268/282] P4 to Git Change 2044900 by skudchad@skudchad_test2_win_opencl on 2019/12/12 11:42:43 SWDEV-203814 - Add cases to handle char* and char** for tracehelper ReviewBoardURL = http://ocltc.amd.com/reviews/r/18384/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/trace_helper.h#4 edit --- api/hip/trace_helper.h | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/api/hip/trace_helper.h b/api/hip/trace_helper.h index 3f87fbe1b3..650ee89422 100644 --- a/api/hip/trace_helper.h +++ b/api/hip/trace_helper.h @@ -113,6 +113,27 @@ inline std::string ToHexString(T v) { return ss.str(); }; +template +inline std::string ToString(T* v) { + std::ostringstream ss; + if (v == NULL) { + ss << "char array:"; + } else { + ss << v; + } + return ss.str(); +}; + +template +inline std::string ToString(T** v) { + std::ostringstream ss; + if (v == NULL) { + ss << "char array:"; + } else { + ss << v; + } + return ss.str(); +}; //--- // Template overloads for ToString to handle specific types @@ -125,11 +146,6 @@ inline std::string ToString(T v) { return ss.str(); }; -template <> -inline std::string ToString(const char* v) { - return ToHexString(v); -}; - template <> inline std::string ToString(hipFunction_t v) { std::ostringstream ss; From 455c3a91efb6d7e2d8c718379afce1694f418de9 Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 12 Dec 2019 16:01:14 -0500 Subject: [PATCH 269/282] P4 to Git Change 2045076 by skudchad@skudchad_test2_win_opencl on 2019/12/12 15:57:20 SWDEV-213031 - Check the functions_ map else interpret as a hip::Function for now. Function may not be a device function and may have been obtaiend via hipModuleGetFunction and thus not in the functions_ map ReviewBoardURL = http://ocltc.amd.com/reviews/r/18388/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#57 edit --- api/hip/hip_platform.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index abb7cdb740..05c8d6fce7 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -626,12 +626,17 @@ hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, { HIP_INIT_API(NONE, f, blockSize, dynamicSMemSize); int deviceId = ihipGetDevice(); + // FIXME: Function may not be a device function and may have been obtaiend via + // hipModuleGetFunction and thus not in the functions_ map. Check the map + // else interpret as a hip::Function for now. hipFunction_t func = PlatformState::instance().getFunc(f, deviceId); if (func == nullptr) { + func = f; + } + hip::Function* function = hip::Function::asFunction(func); + if (function == nullptr) { HIP_RETURN(hipErrorInvalidDeviceFunction); } - - hip::Function* function = hip::Function::asFunction(func); amd::Kernel* kernel = function->function_; if (!kernel) { HIP_RETURN(hipErrorOutOfMemory); @@ -890,8 +895,9 @@ const std::unordered_map& functions() for (auto&& function : function_names()) { for (auto&& module : modules()) { hipFunction_t f; - if (hipSuccess == hipModuleGetFunction(&f, module, function.second.c_str())) + if (hipSuccess == hipModuleGetFunction(&f, module, function.second.c_str())) { r[function.first] = f; + } } } }); From f6d38a725c64bc1b82ac9cce097ae6988d481d9f Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 17 Dec 2019 20:18:36 -0500 Subject: [PATCH 270/282] P4 to Git Change 2047424 by lmoriche@lmoriche_opencl_dev2 on 2019/12/17 20:13:01 SWDEV-216705 - [hipclang-vdi-rocm][FBA-80]Test crash when all GPUs are hidden by ROCR_VISIBLE_DEVICES Return an error instead of dereferencing a null pointer. This should address the issue described in the ticket, but more places need fixing in the runtime to avoid crashes for corner cases. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_context.cpp#26 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device_runtime.cpp#23 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#92 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#58 edit --- api/hip/hip_context.cpp | 3 ++- api/hip/hip_device_runtime.cpp | 5 +++-- api/hip/hip_memory.cpp | 4 ++++ api/hip/hip_platform.cpp | 7 ++++++- 4 files changed, 15 insertions(+), 4 deletions(-) diff --git a/api/hip/hip_context.cpp b/api/hip/hip_context.cpp index cc590678c5..c4a68777ab 100644 --- a/api/hip/hip_context.cpp +++ b/api/hip/hip_context.cpp @@ -106,7 +106,8 @@ amd::HostQueue* getNullStream(amd::Context& context) { } amd::HostQueue* getNullStream() { - return getNullStream(*getCurrentContext()); + amd::Context* context = getCurrentContext(); + return context ? getNullStream(*context) : nullptr; } }; diff --git a/api/hip/hip_device_runtime.cpp b/api/hip/hip_device_runtime.cpp index d3d0b10b87..a36cad8c3b 100644 --- a/api/hip/hip_device_runtime.cpp +++ b/api/hip/hip_device_runtime.cpp @@ -437,7 +437,6 @@ int ihipGetDevice() { return i; } } - assert(0 && "Current device not found?!"); return -1; } @@ -446,7 +445,9 @@ hipError_t hipGetDevice ( int* deviceId ) { if (deviceId != nullptr) { int dev = ihipGetDevice(); - assert(dev != -1); + if (dev == -1) { + HIP_RETURN(hipErrorNoDevice); + } *deviceId = dev; HIP_RETURN(hipSuccess); } else { diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index e6f3f2f622..49895e67e9 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -69,6 +69,10 @@ hipError_t ihipMalloc(void** ptr, size_t sizeBytes, unsigned int flags) amd::Context* amdContext = ((flags & CL_MEM_SVM_FINE_GRAIN_BUFFER) != 0)? hip::host_context : hip::getCurrentContext(); + if (amdContext == nullptr) { + return hipErrorMemoryAllocation; + } + if (amdContext->devices()[0]->info().maxMemAllocSize_ < sizeBytes) { return hipErrorMemoryAllocation; } diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index 05c8d6fce7..1aa356cd36 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -469,7 +469,9 @@ extern "C" void __hipUnregisterFatBinary(std::vector< std::pair(module.first))->release(); } }); - PlatformState::instance().unregisterVar((*modules)[0].first); + if (modules->size() > 0) { + PlatformState::instance().unregisterVar((*modules)[0].first); + } PlatformState::instance().removeFatBinary(modules); } @@ -532,6 +534,9 @@ extern "C" hipError_t hipLaunchByPtr(const void *hostFunction) HIP_INIT_API(NONE, hostFunction); int deviceId = ihipGetDevice(); + if (deviceId == -1) { + HIP_RETURN(hipErrorNoDevice); + } hipFunction_t func = PlatformState::instance().getFunc(hostFunction, deviceId); if (func == nullptr) { HIP_RETURN(hipErrorInvalidDeviceFunction); From 3beb70b72a1d3c0e8e21f40b5709849430aca76f Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 18 Dec 2019 14:42:18 -0500 Subject: [PATCH 271/282] P4 to Git Change 2047959 by skudchad@skudchad_test2_win_opencl on 2019/12/18 14:40:28 SWDEV-213000 - Add notifyCmdQueue in ihipModuleLaunchKernel. This torques timing and flushes the queue immediately. Investigation pending. ReviewBoardURL = http://ocltc.amd.com/reviews/r/18408/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#51 edit --- api/hip/hip_module.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index c13a077a0e..91635fa12f 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -354,6 +354,11 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, command->release(); + // FIXME: SWDEV-213000 - Force notifyCmdQueue to indicate immediate dispatch to HW + // This offsets the commandqueue timing and solves 213000. Investigation pending + + command->notifyCmdQueue(); + return hipSuccess; } From f02b4ffd82512528e9949aaaff0eb3d98e35e85a Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 19 Dec 2019 15:57:37 -0500 Subject: [PATCH 272/282] P4 to Git Change 2048698 by jujiang@JJ-HIP on 2019/12/19 15:52:21 SWDEV-215536 - [HIP][Windows]Output mismatch with tex2DLayered() Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#93 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_texture.cpp#31 edit --- api/hip/hip_memory.cpp | 2 +- api/hip/hip_texture.cpp | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 49895e67e9..7e4b41aa5a 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -505,12 +505,12 @@ hipError_t hipMalloc3DArray(hipArray_t* array, const struct hipChannelFormatDesc // Dummy flags check switch (flags) { - case hipArrayLayered: case hipArrayCubemap: case hipArraySurfaceLoadStore: case hipArrayTextureGather: assert(0 && "Unspported"); break; + case hipArrayLayered: case hipArrayDefault: default: break; diff --git a/api/hip/hip_texture.cpp b/api/hip/hip_texture.cpp index a4712de1cb..8d7f7945bb 100644 --- a/api/hip/hip_texture.cpp +++ b/api/hip/hip_texture.cpp @@ -581,6 +581,7 @@ hipError_t ihipBindTextureToArrayImpl(TlsData* tls, int dim, enum hipTextureRead clType = CL_MEM_OBJECT_IMAGE2D; break; case 3: + case hipTextureType2DLayered: clType = CL_MEM_OBJECT_IMAGE3D; break; default: From 0c35aa5f1c13a375e774253164e35881abbef664 Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 20 Dec 2019 13:02:52 -0600 Subject: [PATCH 273/282] P4 to Git Change 2049240 by lmoriche@lmoriche_opencl_dev2 on 2019/12/20 13:54:17 SWDEV-216705 - [hipclang-vdi-rocm][FBA-80]Test crash when all GPUs are hidden by ROCR_VISIBLE_DEVICES Return an error instead of dereferencing a null pointer. This should address the issue described in the ticket, but more places need fixing in the runtime to avoid crashes for corner cases. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#59 edit --- api/hip/hip_platform.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/api/hip/hip_platform.cpp b/api/hip/hip_platform.cpp index 1aa356cd36..c91199c077 100644 --- a/api/hip/hip_platform.cpp +++ b/api/hip/hip_platform.cpp @@ -569,6 +569,9 @@ extern "C" hipError_t hipLaunchKernel(const void *hostFunction, stream); int deviceId = ihipGetDevice(); + if (deviceId == -1) { + HIP_RETURN(hipErrorNoDevice); + } hipFunction_t func = PlatformState::instance().getFunc(hostFunction, deviceId); if (func == nullptr) { HIP_RETURN(hipErrorInvalidDeviceFunction); From 2989140f784fcb95617f84d77eb9b3c4f516f6d9 Mon Sep 17 00:00:00 2001 From: foreman Date: Mon, 23 Dec 2019 12:42:24 -0500 Subject: [PATCH 274/282] P4 to Git Change 2049729 by skudchad@skudchad_test2_win_opencl on 2019/12/23 12:36:57 SWDEV-203814 - Change the return type of the hiprtcGetTypeName to have compatibility across ABIs ReviewBoardURL = http://ocltc.amd.com/reviews/r/18392/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_rtc.cpp#9 edit --- api/hip/hip_rtc.cpp | 54 ++++++++++++++++++++++----------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/api/hip/hip_rtc.cpp b/api/hip/hip_rtc.cpp index 4e2c99e6df..e3cfd3f885 100644 --- a/api/hip/hip_rtc.cpp +++ b/api/hip/hip_rtc.cpp @@ -109,62 +109,62 @@ uint32_t ProgramState::addNameExpression(const char* name_expression) { namespace hip_impl { -inline std::string demangle(const char* x) { +char* demangle(const char* loweredName) { #ifdef ATI_OS_LINUX - if (!x) { - return {}; + if (!loweredName) { + return nullptr; } - int s = 0; - std::unique_ptr tmp{ - __cxa_demangle(x, nullptr, nullptr, &s), - std::free}; - if (s != 0) { - return {}; + int status = 0; + char* demangledName = __cxa_demangle(loweredName, nullptr, nullptr, &status); + if (status != 0) { + return nullptr; } - return tmp.get(); + return demangledName; #else - return {}; + return nullptr; #endif } } // hip_impl -std::string handleMangledName(std::string name) { - std::string demangled; - demangled = hip_impl::demangle(name.c_str()); +static std::string handleMangledName(std::string name) { + std::string loweredName; + char* demangled = hip_impl::demangle(name.c_str()); + loweredName.assign(demangled == nullptr ? std::string() : demangled); + free(demangled); - if (demangled.empty()) { + if (loweredName.empty()) { return name; } - if (demangled.find(".kd") != std::string::npos) { + if (loweredName.find(".kd") != std::string::npos) { return {}; } - if (demangled.find("void ") == 0) { - demangled.erase(0, strlen("void ")); + if (loweredName.find("void ") == 0) { + loweredName.erase(0, strlen("void ")); } - auto dx{demangled.find_first_of("(<")}; + auto dx{loweredName.find_first_of("(<")}; if (dx == std::string::npos) { - return demangled; + return loweredName; } - if (demangled[dx] == '<') { - auto cnt{1u}; + if (loweredName[dx] == '<') { + uint32_t count = 1; do { ++dx; - cnt += (demangled[dx] == '<') ? 1 : ((demangled[dx] == '>') ? -1 : 0); - } while (cnt); + count += (loweredName[dx] == '<') ? 1 : ((loweredName[dx] == '>') ? -1 : 0); + } while (count); - demangled.erase(++dx); + loweredName.erase(++dx); } else { - demangled.erase(dx); + loweredName.erase(dx); } - return demangled; + return loweredName; } From 9386736b41dedcaa0080ef8236d35f21a66c16ec Mon Sep 17 00:00:00 2001 From: foreman Date: Mon, 30 Dec 2019 16:46:39 -0500 Subject: [PATCH 275/282] P4 to Git Change 2051060 by michliao@hliao-dev-00-hip.rocm-workspace on 2019/12/30 16:42:07 SWDEV-145570 - Clean up after `hipError_t` unification. - Replace deprecated `hipError_t` enums. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device_runtime.cpp#24 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_error.cpp#4 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_event.cpp#19 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#94 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#52 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_stream.cpp#27 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_texture.cpp#32 edit ... //depot/stg/opencl/drivers/opencl/api/hip/trace_helper.h#5 edit --- api/hip/hip_device_runtime.cpp | 2 +- api/hip/hip_error.cpp | 6 ------ api/hip/hip_event.cpp | 22 +++++++++++----------- api/hip/hip_memory.cpp | 8 ++++---- api/hip/hip_module.cpp | 2 +- api/hip/hip_stream.cpp | 6 +++--- api/hip/hip_texture.cpp | 4 ++-- api/hip/trace_helper.h | 3 --- 8 files changed, 22 insertions(+), 31 deletions(-) diff --git a/api/hip/hip_device_runtime.cpp b/api/hip/hip_device_runtime.cpp index a36cad8c3b..1abefdf609 100644 --- a/api/hip/hip_device_runtime.cpp +++ b/api/hip/hip_device_runtime.cpp @@ -535,7 +535,7 @@ hipError_t hipExtGetLinkTypeAndHopCount(int device1, int device2, uint32_t* link amd_dev_obj2 = g_devices[device2]->devices()[0]; if (!amd_dev_obj1->findLinkTypeAndHopCount(amd_dev_obj2, linktype, hopcount)) { - HIP_RETURN(hipErrorInvalidResourceHandle); + HIP_RETURN(hipErrorInvalidHandle); } HIP_RETURN(hipSuccess); diff --git a/api/hip/hip_error.cpp b/api/hip/hip_error.cpp index 77b1b8148a..6c0a01e215 100644 --- a/api/hip/hip_error.cpp +++ b/api/hip/hip_error.cpp @@ -114,10 +114,6 @@ const char *hipGetErrorName(hipError_t hip_error) return "hipErrorIllegalAddress"; case hipErrorMissingConfiguration: return "hipErrorMissingConfiguration"; - case hipErrorMemoryAllocation: - return "hipErrorMemoryAllocation"; - case hipErrorInitializationError: - return "hipErrorInitializationError"; case hipErrorLaunchFailure: return "hipErrorLaunchFailure"; case hipErrorPriorLaunchFailure: @@ -140,8 +136,6 @@ const char *hipGetErrorName(hipError_t hip_error) return "hipErrorInvalidMemcpyDirection"; case hipErrorUnknown: return "hipErrorUnknown"; - case hipErrorInvalidResourceHandle: - return "hipErrorInvalidResourceHandle"; case hipErrorNotReady: return "hipErrorNotReady"; case hipErrorNoDevice: diff --git a/api/hip/hip_event.cpp b/api/hip/hip_event.cpp index f283aa6c5d..1de32d8fff 100644 --- a/api/hip/hip_event.cpp +++ b/api/hip/hip_event.cpp @@ -36,7 +36,7 @@ hipError_t Event::query() { amd::ScopedLock lock(lock_); if (event_ == nullptr) { - return hipErrorInvalidResourceHandle; + return hipErrorInvalidHandle; } return ready() ? hipSuccess : hipErrorNotReady; @@ -46,7 +46,7 @@ hipError_t Event::synchronize() { amd::ScopedLock lock(lock_); if (event_ == nullptr) { - return hipErrorInvalidResourceHandle; + return hipErrorInvalidHandle; } event_->awaitCompletion(); @@ -59,11 +59,11 @@ hipError_t Event::elapsedTime(Event& eStop, float& ms) { if (this == &eStop) { if (event_ == nullptr) { - return hipErrorInvalidResourceHandle; + return hipErrorInvalidHandle; } if (flags & hipEventDisableTiming) { - return hipErrorInvalidResourceHandle; + return hipErrorInvalidHandle; } if (!ready()) { @@ -77,11 +77,11 @@ hipError_t Event::elapsedTime(Event& eStop, float& ms) { if (event_ == nullptr || eStop.event_ == nullptr) { - return hipErrorInvalidResourceHandle; + return hipErrorInvalidHandle; } if ((flags | eStop.flags) & hipEventDisableTiming) { - return hipErrorInvalidResourceHandle; + return hipErrorInvalidHandle; } if (!ready() || !eStop.ready()) { @@ -173,7 +173,7 @@ hipError_t ihipEventCreateWithFlags(hipEvent_t* event, unsigned flags) { hipError_t ihipEventQuery(hipEvent_t event) { if (event == nullptr) { - return hipErrorInvalidResourceHandle; + return hipErrorInvalidHandle; } hip::Event* e = reinterpret_cast(event); @@ -197,7 +197,7 @@ hipError_t hipEventDestroy(hipEvent_t event) { HIP_INIT_API(hipEventDestroy, event); if (event == nullptr) { - HIP_RETURN(hipErrorInvalidResourceHandle); + HIP_RETURN(hipErrorInvalidHandle); } delete reinterpret_cast(event); @@ -209,7 +209,7 @@ hipError_t hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop) { HIP_INIT_API(hipEventElapsedTime, ms, start, stop); if (start == nullptr || stop == nullptr) { - HIP_RETURN(hipErrorInvalidResourceHandle); + HIP_RETURN(hipErrorInvalidHandle); } if (ms == nullptr) { @@ -226,7 +226,7 @@ hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) { HIP_INIT_API(hipEventRecord, event, stream); if (event == nullptr) { - HIP_RETURN(hipErrorInvalidResourceHandle); + HIP_RETURN(hipErrorInvalidHandle); } hip::Event* e = reinterpret_cast(event); @@ -249,7 +249,7 @@ hipError_t hipEventSynchronize(hipEvent_t event) { HIP_INIT_API(hipEventSynchronize, event); if (event == nullptr) { - HIP_RETURN(hipErrorInvalidResourceHandle); + HIP_RETURN(hipErrorInvalidHandle); } hip::Event* e = reinterpret_cast(event); diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 7e4b41aa5a..11e08384fc 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -70,11 +70,11 @@ hipError_t ihipMalloc(void** ptr, size_t sizeBytes, unsigned int flags) hip::host_context : hip::getCurrentContext(); if (amdContext == nullptr) { - return hipErrorMemoryAllocation; + return hipErrorOutOfMemory; } if (amdContext->devices()[0]->info().maxMemAllocSize_ < sizeBytes) { - return hipErrorMemoryAllocation; + return hipErrorOutOfMemory; } *ptr = amd::SvmBuffer::malloc(*amdContext, flags, sizeBytes, amdContext->devices()[0]->info().memBaseAddrAlign_); @@ -347,7 +347,7 @@ hipError_t ihipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t heigh device->info().memBaseAddrAlign_); if (*ptr == nullptr) { - return hipErrorMemoryAllocation; + return hipErrorOutOfMemory; } return hipSuccess; @@ -552,7 +552,7 @@ hipError_t hipHostRegister(void* hostPtr, size_t sizeBytes, unsigned int flags) constexpr bool forceAlloc = true; if (!mem->create(hostPtr, sysMemAlloc, skipAlloc, forceAlloc)) { mem->release(); - HIP_RETURN(hipErrorMemoryAllocation); + HIP_RETURN(hipErrorOutOfMemory); } for (const auto& device: hip::getCurrentContext()->devices()) { diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index 91635fa12f..5d8774aea6 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -342,7 +342,7 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, // Capture the kernel arguments if (CL_SUCCESS != command->captureAndValidate()) { delete command; - return hipErrorMemoryAllocation; + return hipErrorOutOfMemory; } command->enqueue(); diff --git a/api/hip/hip_stream.cpp b/api/hip/hip_stream.cpp index c500c663f0..d01a863980 100644 --- a/api/hip/hip_stream.cpp +++ b/api/hip/hip_stream.cpp @@ -183,7 +183,7 @@ hipError_t hipStreamDestroy(hipStream_t stream) { HIP_INIT_API(hipStreamDestroy, stream); if (stream == nullptr) { - HIP_RETURN(hipErrorInvalidResourceHandle); + HIP_RETURN(hipErrorInvalidHandle); } amd::ScopedLock lock(streamSetLock); @@ -210,7 +210,7 @@ hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int } if (event == nullptr) { - HIP_RETURN(hipErrorInvalidResourceHandle); + HIP_RETURN(hipErrorInvalidHandle); } hip::Event* e = reinterpret_cast(event); @@ -252,7 +252,7 @@ hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback if(!event.setCallback(CL_COMPLETE, ihipStreamCallback, reinterpret_cast(cbo))) { command->release(); - return hipErrorInvalidResourceHandle; + return hipErrorInvalidHandle; } event.notifyCmdQueue(); diff --git a/api/hip/hip_texture.cpp b/api/hip/hip_texture.cpp index 8d7f7945bb..d48f5f2fcf 100644 --- a/api/hip/hip_texture.cpp +++ b/api/hip/hip_texture.cpp @@ -371,7 +371,7 @@ hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, const hipResou if (!image->create()) { delete image; - HIP_RETURN(hipErrorMemoryAllocation); + HIP_RETURN(hipErrorOutOfMemory); } amd::Sampler* sampler = fillSamplerDescriptor(pTexDesc->addressMode[0], pTexDesc->filterMode, pTexDesc->normalizedCoords); @@ -467,7 +467,7 @@ hipError_t ihipBindTexture(cl_mem_object_type type, type, memory->getMemFlags(), imageFormat, width, height, depth, rowPitch, slicePitch); if (!image->create()) { delete image; - return hipErrorMemoryAllocation; + return hipErrorOutOfMemory; } *offset = 0; diff --git a/api/hip/trace_helper.h b/api/hip/trace_helper.h index 650ee89422..5beb3f34e4 100644 --- a/api/hip/trace_helper.h +++ b/api/hip/trace_helper.h @@ -77,8 +77,6 @@ inline const char* ihipErrorString(hipError_t hip_error) { CASE_STR(hipErrorNotFound); CASE_STR(hipErrorIllegalAddress); CASE_STR(hipErrorMissingConfiguration); - CASE_STR(hipErrorMemoryAllocation); - CASE_STR(hipErrorInitializationError); CASE_STR(hipErrorLaunchFailure); CASE_STR(hipErrorPriorLaunchFailure); CASE_STR(hipErrorLaunchTimeOut); @@ -90,7 +88,6 @@ inline const char* ihipErrorString(hipError_t hip_error) { CASE_STR(hipErrorInvalidDevicePointer); CASE_STR(hipErrorInvalidMemcpyDirection); CASE_STR(hipErrorUnknown); - CASE_STR(hipErrorInvalidResourceHandle); CASE_STR(hipErrorNotReady); CASE_STR(hipErrorNoDevice); CASE_STR(hipErrorPeerAccessAlreadyEnabled); From a42f6272f5a55fb5ddb1ba2e31c6c18851323ea2 Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 3 Jan 2020 16:02:59 -0500 Subject: [PATCH 276/282] P4 to Git Change 2052191 by skudchad@skudchad_test2_win_opencl on 2020/01/03 15:54:23 SWDEV-213000 - Backout change to notifyCmdQueue as the real issue has been identified Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#53 edit --- api/hip/hip_module.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/api/hip/hip_module.cpp b/api/hip/hip_module.cpp index 5d8774aea6..2d9fd4468c 100644 --- a/api/hip/hip_module.cpp +++ b/api/hip/hip_module.cpp @@ -354,11 +354,6 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, command->release(); - // FIXME: SWDEV-213000 - Force notifyCmdQueue to indicate immediate dispatch to HW - // This offsets the commandqueue timing and solves 213000. Investigation pending - - command->notifyCmdQueue(); - return hipSuccess; } From 3cbd083c8dcb3582cbb4d9b0082130b87df9694d Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 7 Jan 2020 15:59:02 -0500 Subject: [PATCH 277/282] P4 to Git Change 2053320 by vsytchen@vsytchen-hip-win10 on 2020/01/07 15:54:34 SWDEV-215533 - [HIP][Windows]Output mismatch with tex3D() 1. Implement hipMemcpy3DAsync(). 2. Add logic in hipMemcpy3D() to determine based on src/dst parameters if the user intended on calling cudaMemcpy3D() or cuMemcpy3D(). ReviewBoardURL = http://ocltc.amd.com/reviews/r/18444/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.def.in#38 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.map.in#36 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#95 edit --- api/hip/hip_hcc.def.in | 1 + api/hip/hip_hcc.map.in | 1 + api/hip/hip_memory.cpp | 344 +++++++++++++++++++++++++++++++++-------- 3 files changed, 284 insertions(+), 62 deletions(-) diff --git a/api/hip/hip_hcc.def.in b/api/hip/hip_hcc.def.in index 6aba1f5a25..fd51a5701a 100644 --- a/api/hip/hip_hcc.def.in +++ b/api/hip/hip_hcc.def.in @@ -87,6 +87,7 @@ hipMemcpy2D hipMemcpy2DAsync hipMemcpy2DToArray hipMemcpy3D +hipMemcpy3DAsync hipMemcpyAsync hipMemcpyDtoD hipMemcpyDtoDAsync diff --git a/api/hip/hip_hcc.map.in b/api/hip/hip_hcc.map.in index f80d7db23a..c0e2bf9458 100644 --- a/api/hip/hip_hcc.map.in +++ b/api/hip/hip_hcc.map.in @@ -88,6 +88,7 @@ global: hipMemcpy2DAsync; hipMemcpy2DToArray; hipMemcpy3D; + hipMemcpy3DAsync; hipMemcpyAsync; hipMemcpyDtoD; hipMemcpyDtoDAsync; diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 11e08384fc..6b23913386 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -1107,95 +1107,315 @@ hipError_t hipMemcpyAtoH(void* dst, hipArray* srcArray, size_t srcOffset, size_t HIP_RETURN(hipSuccess); } -hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) { - HIP_INIT_API(hipMemcpy3D, p); - - hip::syncStreams(); - amd::HostQueue* queue = hip::getNullStream(); - - size_t byteSize; - size_t srcPitchInBytes; - size_t dstPitchInbytes; - void* srcPtr; - void* dstPtr; - size_t srcOrigin[3]; - size_t dstOrigin[3]; - size_t region[3]; - - region[2] = p->extent.depth; - region[1] = p->extent.height; - region[0] = p->extent.width; - srcOrigin[0] = p->srcPos.x; - srcOrigin[1] = p->srcPos.y; - srcOrigin[2] = p->srcPos.z; - dstOrigin[0] = p->dstPos.x; - dstOrigin[1] = p->dstPos.y; - dstOrigin[2] = p->dstPos.z; - - if (p->dstArray != nullptr) { - getByteSizeFromChannelFormatKind(p->dstArray->desc.f, &byteSize); - dstPitchInbytes = p->dstArray->width * byteSize; - srcPitchInBytes = p->srcPtr.pitch; - srcPtr = (void*)p->srcPtr.ptr; - dstPtr = p->dstArray->data; +hipError_t ihipMemcpy3D_V1(const struct hipMemcpy3DParms* p, hipStream_t stream, bool isAsync = false) { + const void* srcPtr = nullptr; + size_t srcElementSizeInBytes = sizeof(unsigned char); + size_t srcRowPitchInBytes = 0; + size_t srcSlicePitchInBytes = 0; + if (p->srcMemoryType == hipMemoryTypeHost) { + srcPtr = p->srcHost; + srcRowPitchInBytes = p->srcPitch; + srcSlicePitchInBytes = srcRowPitchInBytes * p->srcHeight; + } else if ((p->srcMemoryType == hipMemoryTypeDevice) || + (p->srcMemoryType == hipMemoryTypeUnified)) { + srcPtr = p->srcDevice; + srcRowPitchInBytes = p->srcPitch; + srcSlicePitchInBytes = srcRowPitchInBytes * p->srcHeight; + } else if (p->srcMemoryType == hipMemoryTypeArray) { + srcPtr = p->srcArray->data; + getByteSizeFromChannelFormatKind(p->srcArray->desc.f, &srcElementSizeInBytes); + srcElementSizeInBytes *= p->srcArray->NumChannels; + srcRowPitchInBytes = srcElementSizeInBytes * p->srcArray->width; + srcSlicePitchInBytes = srcRowPitchInBytes * p->srcArray->height; } else { - srcPitchInBytes = p->srcPtr.pitch; - dstPitchInbytes = p->dstPtr.pitch; - srcPtr = p->srcPtr.ptr; - dstPtr = p->dstPtr.ptr; + ShouldNotReachHere(); } - // Create buffer rectangle info structure + void* dstPtr = nullptr; + size_t dstElementSizeInBytes = sizeof(unsigned char); + size_t dstRowPitchInBytes = 0; + size_t dstSlicePitchInBytes = 0; + if (p->dstMemoryType == hipMemoryTypeHost) { + dstPtr = p->dstHost; + dstRowPitchInBytes = p->dstPitch; + dstSlicePitchInBytes = dstRowPitchInBytes * p->dstHeight; + } else if ((p->dstMemoryType == hipMemoryTypeDevice) || + (p->dstMemoryType == hipMemoryTypeUnified)) { + dstPtr = p->dstDevice; + dstRowPitchInBytes = p->dstPitch; + dstSlicePitchInBytes = dstRowPitchInBytes * p->dstHeight; + } else if (p->dstMemoryType == hipMemoryTypeArray) { + dstPtr = p->dstArray->data; + getByteSizeFromChannelFormatKind(p->dstArray->desc.f, &dstElementSizeInBytes); + dstElementSizeInBytes *= p->dstArray->NumChannels; + dstRowPitchInBytes = dstElementSizeInBytes * p->dstArray->width; + dstSlicePitchInBytes = dstRowPitchInBytes * p->dstArray->height; + } else { + ShouldNotReachHere(); + } + + // For HIP arrays, srcXInBytes must be evenly divisible by the array element size. + if ((p->srcMemoryType == hipMemoryTypeArray) && + ((p->srcXInBytes % srcElementSizeInBytes) != 0)) { + return hipErrorInvalidValue; + } + + // If specified, srcPitch must be greater than or equal to WidthInBytes + srcXInBytes + if ((p->srcMemoryType != hipMemoryTypeArray) && + (p->srcPitch < (p->WidthInBytes + p->srcXInBytes))) { + return hipErrorInvalidValue; + } + + // If specified, srcHeight must be greater than or equal to Height + srcY + if ((p->srcMemoryType != hipMemoryTypeArray) && + (p->srcHeight < (p->Height + p->srcY))) { + return hipErrorInvalidValue; + } + + // For HIP arrays, dstXInBytes must be evenly divisible by the array element size. + if ((p->dstMemoryType == hipMemoryTypeArray) && + ((p->dstXInBytes % dstElementSizeInBytes) != 0)) { + return hipErrorInvalidValue; + } + + // If specified, srcPitch must be greater than or equal to WidthInBytes + srcXInBytes + if ((p->dstMemoryType != hipMemoryTypeArray) && + (p->dstPitch < (p->WidthInBytes + p->dstXInBytes))) { + return hipErrorInvalidValue; + } + + // If specified, srcHeight must be greater than or equal to Height + srcY + if ((p->dstMemoryType != hipMemoryTypeArray) && + (p->dstHeight < (p->Height + p->dstY))) { + return hipErrorInvalidValue; + } + + // The srcLOD and dstLOD members of the CUDA_MEMCPY3D structure must be set to 0. + if ((p->srcLOD != 0) || (p->dstLOD != 0)) { + return hipErrorInvalidValue; + } + + size_t region[3]; + region[0] = p->WidthInBytes; + region[1] = p->Height; + region[2] = p->Depth; + + size_t srcOrigin[3]; + srcOrigin[0] = p->srcXInBytes; + srcOrigin[1] = p->srcY; + srcOrigin[2] = p->srcZ; + + size_t dstOrigin[3]; + dstOrigin[0] = p->dstXInBytes; + dstOrigin[1] = p->dstY; + dstOrigin[2] = p->dstZ; + amd::BufferRect srcRect; - amd::BufferRect dstRect; - size_t offset = 0; - amd::Memory* srcMemory = getMemoryObject(srcPtr, offset); - assert(offset == 0); - amd::Memory* dstMemory = getMemoryObject(dstPtr, offset); - assert(offset == 0); - - size_t src_slice_pitch = srcPitchInBytes * p->extent.height; - size_t dst_slice_pitch = dstPitchInbytes * p->extent.height; - - if (!srcRect.create(srcOrigin, region, srcPitchInBytes, src_slice_pitch) || - !dstRect.create(dstOrigin, region, dstPitchInbytes, dst_slice_pitch)) { - HIP_RETURN(hipErrorInvalidValue); + if (!srcRect.create(srcOrigin, region, srcRowPitchInBytes, srcSlicePitchInBytes)) { + return hipErrorInvalidValue; } - hipMemcpyKind kind = p->kind; + size_t srcMemoryOffset = 0; + amd::Memory* srcMemory = getMemoryObject(srcPtr, srcMemoryOffset); + amd::Coord3D srcStart(srcRect.start_ + srcMemoryOffset, 0, 0); + + amd::BufferRect dstRect; + if (!dstRect.create(dstOrigin, region, dstRowPitchInBytes, dstSlicePitchInBytes)) { + return hipErrorInvalidValue; + } + + size_t dstMemoryOffset = 0; + amd::Memory* dstMemory = getMemoryObject(dstPtr, dstMemoryOffset); + amd::Coord3D dstStart(dstRect.start_ + dstMemoryOffset, 0, 0); amd::Command* command = nullptr; amd::Command::EventWaitList waitList; - - amd::Coord3D srcStart(srcRect.start_, 0, 0); - amd::Coord3D dstStart(dstRect.start_, 0, 0); - amd::Coord3D size(region[0], region[1], region[2]); + amd::HostQueue* queue = hip::getQueue(stream); + amd::Coord3D regionSize(region[0], region[1], region[2]); if (((srcMemory == nullptr) && (dstMemory == nullptr)) || - (kind == hipMemcpyHostToHost)) { + (p->kind == hipMemcpyHostToHost)) { memcpy(dstPtr, srcPtr, region[0] * region[1] * region[2]); - HIP_RETURN(hipSuccess); + return hipSuccess; } else if ((srcMemory == nullptr) && (dstMemory != nullptr)) { command = new amd::WriteMemoryCommand(*queue, CL_COMMAND_WRITE_BUFFER_RECT, waitList, - *dstMemory->asBuffer(), srcStart, size, srcPtr, srcRect, dstRect); + *dstMemory->asBuffer(), srcStart, regionSize, srcPtr, srcRect, dstRect); } else if ((srcMemory != nullptr) && (dstMemory == nullptr)) { command = new amd::ReadMemoryCommand(*queue, CL_COMMAND_READ_BUFFER_RECT, waitList, - *srcMemory->asBuffer(), srcStart, size, dstPtr, srcRect, dstRect); + *srcMemory->asBuffer(), srcStart, regionSize, dstPtr, srcRect, dstRect); } else if ((srcMemory != nullptr) && (dstMemory != nullptr)) { command = new amd::CopyMemoryCommand(*queue, CL_COMMAND_COPY_BUFFER_RECT, waitList, - *srcMemory->asBuffer(),*dstMemory->asBuffer(), srcStart, dstStart, size, + *srcMemory->asBuffer(),*dstMemory->asBuffer(), srcStart, dstStart, regionSize, srcRect, dstRect); } if (command == nullptr) { - HIP_RETURN(hipErrorOutOfMemory); + return hipErrorOutOfMemory; } command->enqueue(); - command->awaitCompletion(); + if (!isAsync) { + command->awaitCompletion(); + } command->release(); - HIP_RETURN(hipSuccess); + return hipSuccess; +} + +hipError_t ihipMemcpy3D_V2(const struct hipMemcpy3DParms* p, hipStream_t stream, bool isAsync = false) { + void* srcPtr = nullptr; + size_t srcElementSizeInBytes = sizeof(unsigned char); + size_t srcRowPitchInBytes = 0; + size_t srcSlicePitchInBytes = 0; + if ((p->srcArray != nullptr) && (p->srcPtr.ptr == nullptr)) { + srcPtr = p->srcArray->data; + getByteSizeFromChannelFormatKind(p->srcArray->desc.f, &srcElementSizeInBytes); + srcElementSizeInBytes *= p->srcArray->NumChannels; + srcRowPitchInBytes = srcElementSizeInBytes * p->srcArray->width; + srcSlicePitchInBytes = srcRowPitchInBytes * p->srcArray->height; + } else if ((p->srcArray == nullptr) && (p->srcPtr.ptr != nullptr)) { + srcPtr = p->srcPtr.ptr; + srcRowPitchInBytes = p->srcPtr.pitch; + srcSlicePitchInBytes = srcRowPitchInBytes * p->srcPtr.ysize; + } else { + ShouldNotReachHere(); + } + + void* dstPtr = nullptr; + size_t dstElementSizeInBytes = sizeof(unsigned char); + size_t dstRowPitchInBytes = 0; + size_t dstSlicePitchInBytes = 0; + if ((p->dstArray != nullptr) && (p->dstPtr.ptr == nullptr)) { + dstPtr = p->dstArray->data; + getByteSizeFromChannelFormatKind(p->dstArray->desc.f, &dstElementSizeInBytes); + dstElementSizeInBytes *= p->dstArray->NumChannels; + dstRowPitchInBytes = dstElementSizeInBytes * p->dstArray->width; + dstSlicePitchInBytes = dstRowPitchInBytes * p->dstArray->height; + } else if ((p->dstArray == nullptr) && (p->dstPtr.ptr != nullptr)) { + dstPtr = p->dstPtr.ptr; + dstRowPitchInBytes = p->srcPtr.pitch; + dstSlicePitchInBytes = dstRowPitchInBytes * p->dstPtr.ysize; + } else { + ShouldNotReachHere(); + } + + // If the source and destination are both arrays, they must have the same element size. + if (((p->srcArray != nullptr) && (p->dstArray != nullptr)) && + (srcElementSizeInBytes != dstElementSizeInBytes)) { + return hipErrorInvalidValue; + } + + // If a HIP array is participating in the copy, the extent is defined in terms of that array's elements. + // If no HIP array is participating in the copy, the extent is defined in elements of unsigned char. + size_t region[3]; + if (p->srcArray != nullptr) { + region[0] = srcRowPitchInBytes; + } else if (p->dstArray != nullptr) { + region[0] = dstRowPitchInBytes; + } else { + region[0] = sizeof(unsigned char) * p->extent.width; + } + region[1] = p->extent.height; + region[2] = p->extent.depth; + + // The offset into the object is defined in units of the object's elements. + size_t srcOrigin[3]; + srcOrigin[0] = srcElementSizeInBytes * p->srcPos.x; + srcOrigin[1] = p->srcPos.y; + srcOrigin[2] = p->srcPos.z; + + amd::BufferRect srcRect; + if (!srcRect.create(srcOrigin, region, srcRowPitchInBytes, srcSlicePitchInBytes)) { + return hipErrorInvalidValue; + } + + size_t srcMemoryOffset = 0; + amd::Memory* srcMemory = getMemoryObject(srcPtr, srcMemoryOffset); + amd::Coord3D srcStart(srcRect.start_ + srcMemoryOffset, 0, 0); + + size_t dstOrigin[3]; + dstOrigin[0] = dstElementSizeInBytes * p->dstPos.x; + dstOrigin[1] = p->dstPos.y; + dstOrigin[2] = p->dstPos.z; + + amd::BufferRect dstRect; + if (!dstRect.create(dstOrigin, region, dstRowPitchInBytes, dstSlicePitchInBytes)) { + return hipErrorInvalidValue; + } + + size_t dstMemoryOffset = 0; + amd::Memory* dstMemory = getMemoryObject(dstPtr, dstMemoryOffset); + amd::Coord3D dstStart(dstRect.start_ + dstMemoryOffset, 0, 0); + + amd::Command* command = nullptr; + amd::Command::EventWaitList waitList; + amd::HostQueue* queue = hip::getQueue(stream); + amd::Coord3D regionSize(region[0], region[1], region[2]); + + if (((srcMemory == nullptr) && (dstMemory == nullptr)) || + (p->kind == hipMemcpyHostToHost)) { + memcpy(dstPtr, srcPtr, region[0] * region[1] * region[2]); + return hipSuccess; + } else if ((srcMemory == nullptr) && (dstMemory != nullptr)) { + command = new amd::WriteMemoryCommand(*queue, CL_COMMAND_WRITE_BUFFER_RECT, waitList, + *dstMemory->asBuffer(), srcStart, regionSize, srcPtr, srcRect, dstRect); + } else if ((srcMemory != nullptr) && (dstMemory == nullptr)) { + command = new amd::ReadMemoryCommand(*queue, CL_COMMAND_READ_BUFFER_RECT, waitList, + *srcMemory->asBuffer(), srcStart, regionSize, dstPtr, srcRect, dstRect); + } else if ((srcMemory != nullptr) && (dstMemory != nullptr)) { + command = new amd::CopyMemoryCommand(*queue, CL_COMMAND_COPY_BUFFER_RECT, waitList, + *srcMemory->asBuffer(),*dstMemory->asBuffer(), srcStart, dstStart, regionSize, + srcRect, dstRect); + } + + if (command == nullptr) { + return hipErrorOutOfMemory; + } + + command->enqueue(); + if (!isAsync) { + command->awaitCompletion(); + } + command->release(); + + return hipSuccess; +} + +hipError_t ihipMemcpy3D(const struct hipMemcpy3DParms* p, hipStream_t stream, bool isAsync = false) { + // Having src and dst be an array is ambigous, since we can't tell if the user intended to call hipMemcpy3D_V1() or hipMemcpy3D_V2(). + // For now hope that we never encounter this case. + assert((p->srcArray == nullptr) || (p->dstArray == nullptr)); + + // When calling hipMemcpy3D_V1(), the user must specify + // one of srcHost, srcDevice or srcArray and + // one of dstHost, dstDevice or dstArray. + if (((p->srcHost != nullptr) || (p->srcDevice != nullptr) || (p->srcArray != nullptr)) && + ((p->dstHost != nullptr) || (p->dstDevice != nullptr) || (p->dstArray != nullptr))) { + return ihipMemcpy3D_V1(p, stream, isAsync); + } + + // When calling hipMemcpy3D_V2(), the user must specify + // one of srcArray or srcPtr and + // one of dstArray or dstPtr. + if (((p->srcArray != nullptr) || (p->srcPtr.ptr != nullptr)) && + ((p->dstArray != nullptr) || (p->dstPtr.ptr != nullptr))) { + return ihipMemcpy3D_V2(p, stream, isAsync); + } + + // If we got here, then the user specified an invalid combination of src/dst parameters. + return hipErrorInvalidValue; +} + +hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) { + HIP_INIT_API(hipMemcpy3D, p); + + HIP_RETURN(ihipMemcpy3D(p, nullptr)); +} + +hipError_t hipMemcpy3DAsync(const struct hipMemcpy3DParms* p, hipStream_t stream) { + HIP_INIT_API(hipMemcpy3DAsync, p, stream); + + HIP_RETURN(ihipMemcpy3D(p, stream, true)); } hipError_t ihipMemset(void* dst, int value, size_t valueSize, size_t sizeBytes, From d8c1215113a147b5dc197ac57eddc1be8b0f29e9 Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 10 Jan 2020 16:43:46 -0500 Subject: [PATCH 278/282] P4 to Git Change 2055228 by jujiang@JJ-HIP on 2020/01/10 16:36:01 SWDEV-205925 - Update HIP texture APIs for issue in hipTexRefSetAddress in HIP/PAL on Windows -Update HIP texture function parameter to use hipChannelFormatDesc& desc http://ocltc.amd.com/reviews/r/18456/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_texture.cpp#33 edit --- api/hip/hip_texture.cpp | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/api/hip/hip_texture.cpp b/api/hip/hip_texture.cpp index d48f5f2fcf..0d7683a2d2 100644 --- a/api/hip/hip_texture.cpp +++ b/api/hip/hip_texture.cpp @@ -431,7 +431,7 @@ hipError_t hipGetTextureObjectTextureDesc(hipTextureDesc* pTexDesc, hipError_t ihipBindTexture(cl_mem_object_type type, size_t* offset, textureReference* tex, const void* devPtr, - const hipChannelFormatDesc* desc, size_t width, size_t height, + const hipChannelFormatDesc& desc, size_t width, size_t height, size_t pitch) { if (tex == nullptr) { return hipErrorInvalidImage; @@ -443,9 +443,9 @@ hipError_t ihipBindTexture(cl_mem_object_type type, size_t depth = 0; size_t slicePitch = 0; - getChannelOrderAndType(*desc, hipReadModeElementType, + getChannelOrderAndType(desc, hipReadModeElementType, &image_format.image_channel_order, &image_format.image_channel_data_type); - getByteSizeFromChannelFormatKind(desc->f, &byteSize); + getByteSizeFromChannelFormatKind(desc.f, &byteSize); const amd::Image::Format imageFormat(image_format); amd::Memory* memory = getMemoryObject(devPtr, *offset); @@ -482,13 +482,13 @@ hipError_t ihipBindTexture(cl_mem_object_type type, case CL_MEM_OBJECT_IMAGE1D: resDesc.resType = hipResourceTypeLinear; resDesc.res.linear.devPtr = const_cast(devPtr); - resDesc.res.linear.desc = *desc; + resDesc.res.linear.desc = desc; resDesc.res.linear.sizeInBytes = image->getSize(); break; case CL_MEM_OBJECT_IMAGE2D: resDesc.resType = hipResourceTypePitch2D; resDesc.res.pitch2D.devPtr = const_cast(devPtr); - resDesc.res.pitch2D.desc = *desc; + resDesc.res.pitch2D.desc = desc; resDesc.res.pitch2D.width = width; resDesc.res.pitch2D.height = height; resDesc.res.pitch2D.pitchInBytes = pitch; @@ -496,7 +496,7 @@ hipError_t ihipBindTexture(cl_mem_object_type type, case CL_MEM_OBJECT_IMAGE3D: resDesc.resType = hipResourceTypeArray; resDesc.res.array.array = (hipArray*)malloc(sizeof(hipArray)); - resDesc.res.array.array->desc = *desc; + resDesc.res.array.array->desc = desc; resDesc.res.array.array->width = width; resDesc.res.array.array->height = height; resDesc.res.array.array->depth = depth; @@ -534,7 +534,7 @@ hipError_t hipBindTexture(size_t* offset, textureReference* tex, const void* dev &image_format.image_channel_order, &image_format.image_channel_data_type); const amd::Image::Format imageFormat(image_format); - HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE1D, offset, tex, devPtr, desc, size / imageFormat.getElementSize(), 1, size)); + HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE1D, offset, tex, devPtr, *desc, size / imageFormat.getElementSize(), 1, size)); } hipError_t hipBindTexture2D(size_t* offset, textureReference* tex, const void* devPtr, @@ -542,7 +542,7 @@ hipError_t hipBindTexture2D(size_t* offset, textureReference* tex, const void* d size_t pitch) { HIP_INIT_API(NONE, offset, tex, devPtr, desc, width, height, pitch); - HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE2D, offset, tex, devPtr, desc, width, height, pitch)); + HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE2D, offset, tex, devPtr, *desc, width, height, pitch)); } hipError_t hipBindTextureToArray(textureReference* tex, hipArray_const_t array, @@ -561,7 +561,7 @@ hipError_t ihipBindTextureImpl(TlsData* tls, int dim, enum hipTextureReadMode re assert(1 == dim); - HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE1D, offset, tex, devPtr, desc, size, 1, 0)); + HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE1D, offset, tex, devPtr, *desc, size, 1, 0)); } hipError_t ihipBindTextureToArrayImpl(TlsData* tls, int dim, enum hipTextureReadMode readMode, @@ -588,7 +588,7 @@ hipError_t ihipBindTextureToArrayImpl(TlsData* tls, int dim, enum hipTextureRead HIP_RETURN(hipErrorInvalidValue); } - HIP_RETURN(ihipBindTexture(clType, &offset, tex, array->data, &desc, array->width, + HIP_RETURN(ihipBindTexture(clType, &offset, tex, array->data, desc, array->width, array->height, array->depth)); } @@ -754,7 +754,7 @@ hipError_t hipTexRefSetArray(textureReference* tex, hipArray_const_t array, unsi default: HIP_RETURN(hipErrorInvalidValue); } - HIP_RETURN(ihipBindTexture(clType, &offset, tex, array->data, &array->desc, array->width, + HIP_RETURN(ihipBindTexture(clType, &offset, tex, array->data, array->desc, array->width, array->height, array->depth)); } @@ -792,7 +792,7 @@ hipError_t hipTexRefSetAddress(size_t* offset, textureReference* tex, hipDevicep &image_format.image_channel_order, &image_format.image_channel_data_type); const amd::Image::Format imageFormat(image_format); - HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE1D, offset, tex, devPtr, &tex->channelDesc, size / imageFormat.getElementSize(), 1, size)); + HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE1D, offset, tex, devPtr, tex->channelDesc, size / imageFormat.getElementSize(), 1, size)); } hipError_t hipTexRefSetAddress2D(textureReference* tex, const HIP_ARRAY_DESCRIPTOR* desc, @@ -804,5 +804,5 @@ hipError_t hipTexRefSetAddress2D(textureReference* tex, const HIP_ARRAY_DESCRIPT } size_t offset; - HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE2D, &offset, tex, devPtr, &tex->channelDesc, desc->Width, desc->Height, pitch)); + HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE2D, &offset, tex, devPtr, tex->channelDesc, desc->Width, desc->Height, pitch)); } From 97a6164702e827a0b49f14ded10f871954f19709 Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 15 Jan 2020 18:32:44 -0500 Subject: [PATCH 279/282] P4 to Git Change 2057410 by skudchad@skudchad_rocm on 2020/01/15 18:24:33 SWDEV-219341 - Free memory correctly. hiprtcProgram is a pointer by itself. So release *prog Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_rtc.cpp#10 edit --- api/hip/hip_rtc.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/api/hip/hip_rtc.cpp b/api/hip/hip_rtc.cpp index e3cfd3f885..093c828975 100644 --- a/api/hip/hip_rtc.cpp +++ b/api/hip/hip_rtc.cpp @@ -312,7 +312,9 @@ hiprtcResult hiprtcDestroyProgram(hiprtcProgram* prog) { if (prog == NULL) { HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); } - amd::Program* program = as_amd(reinterpret_cast(prog)); + + // Release program. hiprtcProgram is a double pointer so free *prog + amd::Program* program = as_amd(reinterpret_cast(*prog)); program->release(); From d18baabc7078e82852af53d7071cd2717a5cecb6 Mon Sep 17 00:00:00 2001 From: foreman Date: Mon, 20 Jan 2020 19:05:16 -0500 Subject: [PATCH 280/282] P4 to Git Change 2060446 by vsytchen@vsytchen-ocl-win10-2 on 2020/01/20 18:58:37 SWDEV-215533 - [HIP][Windows]Output mismatch with tex3D() When calculating the pitch for hipMallocPitch() align it to the device's image pitch alignment. If we don't, PAL backend will overalign the pitch when creating an image buffer. ReviewBoardURL = http://ocltc.amd.com/reviews/r/18496/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#96 edit --- api/hip/hip_memory.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 6b23913386..9cd5034eeb 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -340,7 +340,7 @@ hipError_t ihipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t heigh const amd::Image::Format imageFormat(*image_format); - *pitch = width * imageFormat.getElementSize(); + *pitch = amd::alignUp(width * imageFormat.getElementSize(), device->info().imagePitchAlignment_); size_t sizeBytes = *pitch * height * depth; *ptr = amd::SvmBuffer::malloc(*hip::getCurrentContext(), 0, sizeBytes, From ca165f2fbb98d365616a0df357262898668399b6 Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 21 Jan 2020 16:52:40 -0500 Subject: [PATCH 281/282] P4 to Git Change 2061101 by skudchad@skudchad_test2_win_opencl on 2020/01/21 16:47:25 SWDEV-219917 - [VDI Cleanup] Remove some direct OpenCL references, introduce a common functionality. ReviewBoardURL = http://ocltc.amd.com/reviews/r/18488/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/build/Makefile.hip#30 edit ... //depot/stg/opencl/drivers/opencl/api/hip/fixme.cpp#3 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#51 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hiprtc_internal.hpp#3 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/build/Makefile.api#190 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_common.hpp#25 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_context.cpp#61 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_device.cpp#75 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_execute.cpp#31 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_icd.cpp#36 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_platform_amd.cpp#3 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_program.cpp#54 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#610 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#180 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.cpp#150 edit ... //depot/stg/opencl/drivers/opencl/runtime/include/vdi_agent_amd.h#1 add ... //depot/stg/opencl/drivers/opencl/runtime/include/vdi_common.hpp#1 add ... //depot/stg/opencl/drivers/opencl/runtime/os/os.hpp#32 edit ... //depot/stg/opencl/drivers/opencl/runtime/os/os_posix.cpp#49 edit ... //depot/stg/opencl/drivers/opencl/runtime/os/os_win32.cpp#50 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/agent.cpp#9 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/agent.hpp#7 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/context.cpp#54 edit ... //depot/stg/opencl/drivers/opencl/runtime/runtimedefs#54 edit --- api/hip/fixme.cpp | 2 +- api/hip/hip_internal.hpp | 5 ++--- api/hip/hiprtc_internal.hpp | 4 ++-- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/api/hip/fixme.cpp b/api/hip/fixme.cpp index 5d7f8144f7..95880cd630 100644 --- a/api/hip/fixme.cpp +++ b/api/hip/fixme.cpp @@ -20,7 +20,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "cl_common.hpp" +#include "vdi_common.hpp" #include KHRicdVendorDispatch amd::ICDDispatchedObject::icdVendorDispatch_[] = {0}; diff --git a/api/hip/hip_internal.hpp b/api/hip/hip_internal.hpp index 7e1b5e941c..dd6639488f 100644 --- a/api/hip/hip_internal.hpp +++ b/api/hip/hip_internal.hpp @@ -23,7 +23,7 @@ THE SOFTWARE. #ifndef HIP_SRC_HIP_INTERNAL_H #define HIP_SRC_HIP_INTERNAL_H -#include "cl_common.hpp" +#include "vdi_common.hpp" #include "hip_prof_api.h" #include "trace_helper.h" #include "utils/debug.hpp" @@ -34,7 +34,6 @@ THE SOFTWARE. #include - /*! IHIP IPC MEMORY Structure */ #define IHIP_IPC_MEM_HANDLE_SIZE 32 #define IHIP_IPC_MEM_RESERVED_SIZE LP64_SWITCH(28,24) @@ -55,7 +54,7 @@ typedef struct ihipIpcMemHandle_st { #define HIP_INIT_API(cid, ...) \ ClPrint(amd::LOG_INFO, amd::LOG_API, "[%zx] %s ( %s )", std::this_thread::get_id(), __func__, ToString( __VA_ARGS__ ).c_str()); \ amd::Thread* thread = amd::Thread::current(); \ - if (!CL_CHECK_THREAD(thread)) { \ + if (!VDI_CHECK_THREAD(thread)) { \ HIP_RETURN(hipErrorOutOfMemory); \ } \ HIP_INIT() \ diff --git a/api/hip/hiprtc_internal.hpp b/api/hip/hiprtc_internal.hpp index e97ac9eb09..5f1838ffac 100644 --- a/api/hip/hiprtc_internal.hpp +++ b/api/hip/hiprtc_internal.hpp @@ -29,8 +29,8 @@ THE SOFTWARE. #define HIPRTC_INIT_API(...) \ ClPrint(amd::LOG_INFO, amd::LOG_API, "[%zx] %s ( %s )", std::this_thread::get_id(), __func__, ToString( __VA_ARGS__ ).c_str()); \ amd::Thread* thread = amd::Thread::current(); \ - if (!CL_CHECK_THREAD(thread)) { \ - HIPRTC_RETURN(HIPRTC_ERROR_INTERNAL_ERROR); \ + if (!VDI_CHECK_THREAD(thread)) { \ + HIPRTC_RETURN(HIPRTC_ERROR_INTERNAL_ERROR); \ } \ HIP_INIT(); From 2fb1d6a60424686d4ed1b880eb728afcc2f9a158 Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 23 Jan 2020 16:13:57 -0500 Subject: [PATCH 282/282] P4 to Git Change 2062258 by cpaquot@cpaquot-ocl-lc-lnx on 2020/01/23 16:04:59 SWDEV-145570 - [hip] hipMallocManaged Basic implementation of hipMallocManaged behaving like hipHostMalloc for hipMemAttachGlobal. Return hipErrorInvalidValue for the hipMemAttachHost for now. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.def.in#39 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.map.in#37 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#97 edit --- api/hip/hip_hcc.def.in | 1 + api/hip/hip_hcc.map.in | 1 + api/hip/hip_memory.cpp | 11 +++++++++++ 3 files changed, 13 insertions(+) diff --git a/api/hip/hip_hcc.def.in b/api/hip/hip_hcc.def.in index fd51a5701a..54492eb48e 100644 --- a/api/hip/hip_hcc.def.in +++ b/api/hip/hip_hcc.def.in @@ -75,6 +75,7 @@ hipIpcOpenMemHandle hipMalloc hipMalloc3D hipMalloc3DArray +hipMallocManaged hipArrayCreate hipArray3DCreate hipMallocArray diff --git a/api/hip/hip_hcc.map.in b/api/hip/hip_hcc.map.in index c0e2bf9458..554da1d41b 100644 --- a/api/hip/hip_hcc.map.in +++ b/api/hip/hip_hcc.map.in @@ -76,6 +76,7 @@ global: hipMalloc; hipMalloc3D; hipMalloc3DArray; + hipMallocManaged; hipArrayCreate; hipArray3DCreate; hipMallocArray; diff --git a/api/hip/hip_memory.cpp b/api/hip/hip_memory.cpp index 9cd5034eeb..a61b4d83e1 100644 --- a/api/hip/hip_memory.cpp +++ b/api/hip/hip_memory.cpp @@ -214,6 +214,17 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) { HIP_RETURN(ihipMalloc(ptr, sizeBytes, ihipFlags)); } +hipError_t hipMallocManaged(void** devPtr, size_t size, + unsigned int flags) { + HIP_INIT_API(hipMallocManaged, devPtr, size, flags); + + if (flags != hipMemAttachGlobal) { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(ihipMalloc(devPtr, size, CL_MEM_SVM_FINE_GRAIN_BUFFER)); +} + hipError_t hipFree(void* ptr) { HIP_INIT_API(hipFree, ptr);