d09ca72f74
SWDEV-79445 - OCL generic changes and code clean-up 1. This change replaces the use of std::map with std::unordered_map to improve lookup/insert time. 2. Replace the use of std::make_pair and std::pair constructor with uniform initialization for cleaner code. 3. Replace the use of std::Container::iterator type with the auto keyword for cleaner code. 4. Use range based for loops where needed. ReviewBoardURL = http://ocltc.amd.com/reviews/r/14517/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#4 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_context.cpp#58 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d10.cpp#16 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d10_amd.hpp#9 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d11.cpp#24 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d11_amd.hpp#13 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d9.cpp#34 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d9_amd.hpp#17 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_gl.cpp#57 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_pipe.cpp#7 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_program.cpp#46 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_svm.cpp#23 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/appprofile.hpp#14 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpuprogram.cpp#72 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpuvirtual.cpp#27 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#216 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#297 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuappprofile.cpp#13 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpubinary.cpp#59 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpucompiler.cpp#158 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#587 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.cpp#322 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprintf.cpp#46 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.cpp#237 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.hpp#70 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuresource.cpp#242 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#415 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.hpp#143 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palappprofile.cpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palcompiler.cpp#22 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#79 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprintf.cpp#9 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.cpp#59 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.cpp#60 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#84 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.hpp#46 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/CMakeLists.txt#11 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/pro/prodevice.cpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/pro/prodevice.hpp#5 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocbinary.hpp#6 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/roccompiler.cpp#42 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/roccounters.cpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprintf.cpp#10 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.cpp#81 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/command.cpp#81 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/command.hpp#89 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/commandqueue.cpp#24 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/context.cpp#49 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/context.hpp#29 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/memory.cpp#129 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/memory.hpp#102 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/perfctr.hpp#7 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/program.cpp#91 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/program.hpp#43 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/sampler.hpp#9 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.cpp#17 edit
372 lines
10 KiB
C++
372 lines
10 KiB
C++
//
|
|
// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
|
|
//
|
|
|
|
#include "platform/context.hpp"
|
|
#include "amdocl/cl_gl_amd.hpp"
|
|
#include "amdocl/cl_common.hpp"
|
|
#include "platform/commandqueue.hpp"
|
|
|
|
#include <algorithm>
|
|
#include <functional>
|
|
|
|
#ifdef _WIN32
|
|
#include <d3d10_1.h>
|
|
#include <dxgi.h>
|
|
#include "CL/cl_d3d10.h"
|
|
#include "CL/cl_d3d11.h"
|
|
#include "CL/cl_dx9_media_sharing.h"
|
|
#endif //_WIN32
|
|
|
|
#ifdef WITH_LIQUID_FLASH
|
|
#include "lf.h"
|
|
#endif
|
|
|
|
namespace amd {
|
|
|
|
Context::Context(const std::vector<Device*>& devices, const Info& info)
|
|
: devices_(devices),
|
|
info_(info),
|
|
properties_(NULL),
|
|
glenv_(NULL),
|
|
customHostAllocDevice_(NULL) {
|
|
for (const auto& device : devices) {
|
|
device->retain();
|
|
if (customHostAllocDevice_ == NULL && device->customHostAllocator()) {
|
|
customHostAllocDevice_ = device;
|
|
}
|
|
if (device->svmSupport()) {
|
|
svmAllocDevice_.push_back(device);
|
|
}
|
|
}
|
|
if (svmAllocDevice_.size() > 1) {
|
|
// make sure the CPU is the last device to do allocation.
|
|
if ((svmAllocDevice_.front()->type() == CL_DEVICE_TYPE_CPU)) {
|
|
std::swap(svmAllocDevice_.front(), svmAllocDevice_.back());
|
|
}
|
|
|
|
uint isFirstDeviceFGSEnabled = svmAllocDevice_.front()->isFineGrainedSystem(true);
|
|
for (auto& dev : svmAllocDevice_) {
|
|
// allocation on fine - grained system incapable device first
|
|
if (isFirstDeviceFGSEnabled && (dev->type() == CL_DEVICE_TYPE_GPU) &&
|
|
(!(dev->isFineGrainedSystem(true)))) {
|
|
std::swap(svmAllocDevice_.front(), dev);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
Context::~Context() {
|
|
static const bool VALIDATE_ONLY = false;
|
|
|
|
// Dissociate OCL context with any external device
|
|
if (info_.flags_ & (GLDeviceKhr | D3D10DeviceKhr | D3D11DeviceKhr)) {
|
|
// Loop through all devices
|
|
for (const auto& it : devices_) {
|
|
it->unbindExternalDevice(info_.flags_, info_.hDev_, info_.hCtx_, VALIDATE_ONLY);
|
|
}
|
|
}
|
|
|
|
if (properties_ != NULL) {
|
|
delete[] properties_;
|
|
}
|
|
if (glenv_ != NULL) {
|
|
delete glenv_;
|
|
glenv_ = NULL;
|
|
}
|
|
|
|
std::for_each(devices_.begin(), devices_.end(), std::mem_fun(&Device::release));
|
|
|
|
#ifdef WITH_LIQUID_FLASH
|
|
lfTerminate();
|
|
#endif
|
|
}
|
|
|
|
int Context::checkProperties(const cl_context_properties* properties, Context::Info* info) {
|
|
cl_platform_id pfmId = 0;
|
|
uint count = 0;
|
|
|
|
const struct Element {
|
|
intptr_t name;
|
|
void* ptr;
|
|
}* p = reinterpret_cast<const Element*>(properties);
|
|
|
|
// Clear the context infor structure
|
|
::memset(info, 0, sizeof(Context::Info));
|
|
|
|
if (properties == NULL) {
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
// Process all properties
|
|
while (p->name != 0) {
|
|
switch (p->name) {
|
|
case CL_CONTEXT_INTEROP_USER_SYNC:
|
|
if (p->ptr == reinterpret_cast<void*>(CL_TRUE)) {
|
|
info->flags_ |= InteropUserSync;
|
|
}
|
|
break;
|
|
#ifdef _WIN32
|
|
case CL_CONTEXT_D3D10_DEVICE_KHR:
|
|
if (p->ptr == NULL) {
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
info->hDev_[D3D10DeviceKhrIdx] = p->ptr;
|
|
info->flags_ |= D3D10DeviceKhr;
|
|
break;
|
|
case CL_CONTEXT_D3D11_DEVICE_KHR:
|
|
if (p->ptr == NULL) {
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
info->hDev_[D3D11DeviceKhrIdx] = p->ptr;
|
|
info->flags_ |= D3D11DeviceKhr;
|
|
break;
|
|
case CL_CONTEXT_ADAPTER_D3D9_KHR:
|
|
if (p->ptr == NULL) { // not supported for xp
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
info->hDev_[D3D9DeviceKhrIdx] = p->ptr;
|
|
info->flags_ |= D3D9DeviceKhr;
|
|
break;
|
|
case CL_CONTEXT_ADAPTER_D3D9EX_KHR:
|
|
if (p->ptr == NULL) {
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
info->hDev_[D3D9DeviceEXKhrIdx] = p->ptr;
|
|
info->flags_ |= D3D9DeviceEXKhr;
|
|
break;
|
|
case CL_CONTEXT_ADAPTER_DXVA_KHR:
|
|
if (p->ptr == NULL) {
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
info->hDev_[D3D9DeviceVAKhrIdx] = p->ptr;
|
|
info->flags_ |= D3D9DeviceVAKhr;
|
|
break;
|
|
#endif //_WIN32
|
|
|
|
case CL_EGL_DISPLAY_KHR:
|
|
info->flags_ |= EGLDeviceKhr;
|
|
|
|
#ifdef _WIN32
|
|
case CL_WGL_HDC_KHR:
|
|
#endif //_WIN32
|
|
|
|
#if defined(__linux__)
|
|
case CL_GLX_DISPLAY_KHR:
|
|
#endif // linux
|
|
info->hDev_[GLDeviceKhrIdx] = p->ptr;
|
|
|
|
#if defined(__APPLE__) || defined(__MACOSX)
|
|
case CL_CGL_SHAREGROUP_KHR:
|
|
Unimplemented();
|
|
break;
|
|
#endif //__APPLE__ || MACOS
|
|
|
|
case CL_GL_CONTEXT_KHR:
|
|
if (p->ptr == NULL) {
|
|
return CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR;
|
|
}
|
|
if (p->name == CL_GL_CONTEXT_KHR) {
|
|
info->hCtx_ = p->ptr;
|
|
}
|
|
info->flags_ |= GLDeviceKhr;
|
|
break;
|
|
case CL_CONTEXT_PLATFORM:
|
|
pfmId = reinterpret_cast<cl_platform_id>(p->ptr);
|
|
if ((NULL != pfmId) && (AMD_PLATFORM != pfmId)) {
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
break;
|
|
case CL_CONTEXT_OFFLINE_DEVICES_AMD:
|
|
if (p->ptr != reinterpret_cast<void*>(1)) {
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
// Set the offline device flag
|
|
info->flags_ |= OfflineDevices;
|
|
break;
|
|
case CL_CONTEXT_COMMAND_INTERCEPT_CALLBACK_AMD:
|
|
// Set the command intercept flag
|
|
info->commandIntercept_ = (cl_int(CL_CALLBACK*)(cl_event, cl_int*))p->ptr;
|
|
info->flags_ |= CommandIntercept;
|
|
break;
|
|
default:
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
p++;
|
|
count++;
|
|
}
|
|
|
|
info->propertiesSize_ = count * sizeof(Element) + sizeof(intptr_t);
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
int Context::create(const intptr_t* properties) {
|
|
static const bool VALIDATE_ONLY = false;
|
|
int result = CL_SUCCESS;
|
|
|
|
if (properties != NULL) {
|
|
properties_ = new cl_context_properties[info().propertiesSize_ / sizeof(cl_context_properties)];
|
|
if (properties_ == NULL) {
|
|
return CL_OUT_OF_HOST_MEMORY;
|
|
}
|
|
|
|
::memcpy(properties_, properties, info().propertiesSize_);
|
|
}
|
|
|
|
// Check if OCL context can be associated with any external device
|
|
if (info_.flags_ & (D3D10DeviceKhr | D3D11DeviceKhr | GLDeviceKhr | D3D9DeviceKhr |
|
|
D3D9DeviceEXKhr | D3D9DeviceVAKhr)) {
|
|
// Loop through all devices
|
|
for (const auto& it : devices_) {
|
|
if (!it->bindExternalDevice(info_.flags_, info_.hDev_, info_.hCtx_, VALIDATE_ONLY)) {
|
|
result = CL_INVALID_VALUE;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Check if the device binding wasn't successful
|
|
if (result != CL_SUCCESS) {
|
|
if (info_.flags_ & GLDeviceKhr) {
|
|
result = CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR;
|
|
} else if (info_.flags_ & D3D10DeviceKhr) {
|
|
// return CL_INVALID_VALUE; // FIXME_odintsov: CL_INVALID_D3D_INTEROP;
|
|
} else if (info_.flags_ & D3D11DeviceKhr) {
|
|
// return CL_INVALID_VALUE; // FIXME_odintsov: CL_INVALID_D3D_INTEROP;
|
|
} else if (info_.flags_ & (D3D9DeviceKhr | D3D9DeviceEXKhr | D3D9DeviceVAKhr)) {
|
|
// return CL_INVALID_DX9_MEDIA_ADAPTER_KHR;
|
|
}
|
|
} else {
|
|
if (info_.flags_ & GLDeviceKhr) {
|
|
// Init context for GL interop
|
|
if (glenv_ == NULL) {
|
|
HMODULE h = (HMODULE)Os::loadLibrary(
|
|
#ifdef _WIN32
|
|
"OpenGL32.dll"
|
|
#else //!_WIN32
|
|
"libGL.so.1"
|
|
#endif //!_WIN32
|
|
);
|
|
|
|
if (h && (glenv_ = new GLFunctions(h, (info_.flags_ & Flags::EGLDeviceKhr) != 0))) {
|
|
if (!glenv_->init(reinterpret_cast<intptr_t>(info_.hDev_[GLDeviceKhrIdx]),
|
|
reinterpret_cast<intptr_t>(info_.hCtx_))) {
|
|
delete glenv_;
|
|
glenv_ = NULL;
|
|
result = CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#ifdef WITH_LIQUID_FLASH
|
|
lfInit();
|
|
#endif
|
|
|
|
return result;
|
|
}
|
|
|
|
void* Context::hostAlloc(size_t size, size_t alignment, bool atomics) const {
|
|
if (customHostAllocDevice_ != NULL) {
|
|
return customHostAllocDevice_->hostAlloc(size, alignment, atomics);
|
|
}
|
|
return AlignedMemory::allocate(size, alignment);
|
|
}
|
|
|
|
void Context::hostFree(void* ptr) const {
|
|
if (customHostAllocDevice_ != NULL) {
|
|
customHostAllocDevice_->hostFree(ptr);
|
|
return;
|
|
}
|
|
AlignedMemory::deallocate(ptr);
|
|
}
|
|
|
|
void* Context::svmAlloc(size_t size, size_t alignment, cl_svm_mem_flags flags) {
|
|
unsigned int numSVMDev = svmAllocDevice_.size();
|
|
if (numSVMDev < 1) {
|
|
return NULL;
|
|
}
|
|
|
|
if (svmAllocDevice_.front()->type() == CL_DEVICE_TYPE_CPU) {
|
|
return AlignedMemory::allocate(size, alignment);
|
|
} else {
|
|
void* svmPtrAlloced = NULL;
|
|
void* tempPtr = NULL;
|
|
|
|
amd::ScopedLock lock(&ctxLock_);
|
|
for (const auto& dev : svmAllocDevice_) {
|
|
if (dev->type() == CL_DEVICE_TYPE_GPU) {
|
|
// check if the device support svm platform atomics,
|
|
// skipped allocation for platform atomics if not supported by this device
|
|
if ((flags & CL_MEM_SVM_ATOMICS) &&
|
|
!(dev->info().svmCapabilities_ & CL_DEVICE_SVM_ATOMICS)) {
|
|
continue;
|
|
}
|
|
svmPtrAlloced = dev->svmAlloc(*this, size, alignment, flags, svmPtrAlloced);
|
|
if (svmPtrAlloced == NULL) {
|
|
return NULL;
|
|
}
|
|
}
|
|
}
|
|
return svmPtrAlloced;
|
|
}
|
|
}
|
|
|
|
void Context::svmFree(void* ptr) const {
|
|
if (svmAllocDevice_.front()->type() == CL_DEVICE_TYPE_CPU) {
|
|
AlignedMemory::deallocate(ptr);
|
|
return;
|
|
}
|
|
|
|
amd::ScopedLock lock(&ctxLock_);
|
|
for (const auto& dev : svmAllocDevice_) {
|
|
if (dev->type() == CL_DEVICE_TYPE_GPU) {
|
|
dev->svmFree(ptr);
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
|
|
bool Context::containsDevice(const Device* device) const {
|
|
|
|
for (const auto& it : devices_) {
|
|
if (device == it || it->isAncestor(device)) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
DeviceQueue* Context::defDeviceQueue(const Device& dev) const {
|
|
const auto it = deviceQueues_.find(&dev);
|
|
if (it != deviceQueues_.cend()) {
|
|
return it->second.defDeviceQueue_;
|
|
} else {
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
bool Context::isDevQueuePossible(const Device& dev) {
|
|
return (deviceQueues_[&dev].deviceQueueCnt_ < dev.info().maxOnDeviceQueues_) ? true : false;
|
|
}
|
|
|
|
void Context::addDeviceQueue(const Device& dev, DeviceQueue* queue, bool defDevQueue) {
|
|
DeviceQueueInfo& info = deviceQueues_[&dev];
|
|
info.deviceQueueCnt_++;
|
|
if (defDevQueue) {
|
|
info.defDeviceQueue_ = queue;
|
|
}
|
|
}
|
|
|
|
void Context::removeDeviceQueue(const Device& dev, DeviceQueue* queue) {
|
|
DeviceQueueInfo& info = deviceQueues_[&dev];
|
|
assert((info.deviceQueueCnt_ != 0) && "The device queue map is empty!");
|
|
info.deviceQueueCnt_--;
|
|
if (info.defDeviceQueue_ == queue) {
|
|
info.defDeviceQueue_ = NULL;
|
|
}
|
|
}
|
|
|
|
} // namespace amd
|