Files
rocm-systems/rocclr/runtime/platform/context.cpp
T
foreman d09ca72f74 P4 to Git Change 1536925 by vsytchen@vsytchen-ocl-win10 on 2018/04/04 17:20:38
SWDEV-79445 - OCL generic changes and code clean-up

	1. This change replaces the use of std::map with std::unordered_map to improve lookup/insert time.
	2. Replace the use of std::make_pair and std::pair constructor with uniform initialization for cleaner code.
	3. Replace the use of std::Container::iterator type with the auto keyword for cleaner code.
	4. Use range based for loops where needed.

	ReviewBoardURL = http://ocltc.amd.com/reviews/r/14517/diff/

Affected files ...

... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#4 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_context.cpp#58 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d10.cpp#16 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d10_amd.hpp#9 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d11.cpp#24 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d11_amd.hpp#13 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d9.cpp#34 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d9_amd.hpp#17 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_gl.cpp#57 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_pipe.cpp#7 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_program.cpp#46 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_svm.cpp#23 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/appprofile.hpp#14 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpuprogram.cpp#72 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpuvirtual.cpp#27 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#216 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#297 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuappprofile.cpp#13 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpubinary.cpp#59 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpucompiler.cpp#158 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#587 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.cpp#322 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprintf.cpp#46 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.cpp#237 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.hpp#70 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuresource.cpp#242 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#415 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.hpp#143 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palappprofile.cpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palcompiler.cpp#22 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#79 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprintf.cpp#9 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.cpp#59 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.cpp#60 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#84 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.hpp#46 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/CMakeLists.txt#11 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/pro/prodevice.cpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/pro/prodevice.hpp#5 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocbinary.hpp#6 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/roccompiler.cpp#42 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/roccounters.cpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprintf.cpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.cpp#81 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/command.cpp#81 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/command.hpp#89 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/commandqueue.cpp#24 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/context.cpp#49 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/context.hpp#29 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/memory.cpp#129 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/memory.hpp#102 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/perfctr.hpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/program.cpp#91 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/program.hpp#43 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/sampler.hpp#9 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.cpp#17 edit
2018-04-04 18:00:17 -04:00

372 lines
10 KiB
C++

//
// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
//
#include "platform/context.hpp"
#include "amdocl/cl_gl_amd.hpp"
#include "amdocl/cl_common.hpp"
#include "platform/commandqueue.hpp"
#include <algorithm>
#include <functional>
#ifdef _WIN32
#include <d3d10_1.h>
#include <dxgi.h>
#include "CL/cl_d3d10.h"
#include "CL/cl_d3d11.h"
#include "CL/cl_dx9_media_sharing.h"
#endif //_WIN32
#ifdef WITH_LIQUID_FLASH
#include "lf.h"
#endif
namespace amd {
Context::Context(const std::vector<Device*>& devices, const Info& info)
: devices_(devices),
info_(info),
properties_(NULL),
glenv_(NULL),
customHostAllocDevice_(NULL) {
for (const auto& device : devices) {
device->retain();
if (customHostAllocDevice_ == NULL && device->customHostAllocator()) {
customHostAllocDevice_ = device;
}
if (device->svmSupport()) {
svmAllocDevice_.push_back(device);
}
}
if (svmAllocDevice_.size() > 1) {
// make sure the CPU is the last device to do allocation.
if ((svmAllocDevice_.front()->type() == CL_DEVICE_TYPE_CPU)) {
std::swap(svmAllocDevice_.front(), svmAllocDevice_.back());
}
uint isFirstDeviceFGSEnabled = svmAllocDevice_.front()->isFineGrainedSystem(true);
for (auto& dev : svmAllocDevice_) {
// allocation on fine - grained system incapable device first
if (isFirstDeviceFGSEnabled && (dev->type() == CL_DEVICE_TYPE_GPU) &&
(!(dev->isFineGrainedSystem(true)))) {
std::swap(svmAllocDevice_.front(), dev);
break;
}
}
}
}
Context::~Context() {
static const bool VALIDATE_ONLY = false;
// Dissociate OCL context with any external device
if (info_.flags_ & (GLDeviceKhr | D3D10DeviceKhr | D3D11DeviceKhr)) {
// Loop through all devices
for (const auto& it : devices_) {
it->unbindExternalDevice(info_.flags_, info_.hDev_, info_.hCtx_, VALIDATE_ONLY);
}
}
if (properties_ != NULL) {
delete[] properties_;
}
if (glenv_ != NULL) {
delete glenv_;
glenv_ = NULL;
}
std::for_each(devices_.begin(), devices_.end(), std::mem_fun(&Device::release));
#ifdef WITH_LIQUID_FLASH
lfTerminate();
#endif
}
int Context::checkProperties(const cl_context_properties* properties, Context::Info* info) {
cl_platform_id pfmId = 0;
uint count = 0;
const struct Element {
intptr_t name;
void* ptr;
}* p = reinterpret_cast<const Element*>(properties);
// Clear the context infor structure
::memset(info, 0, sizeof(Context::Info));
if (properties == NULL) {
return CL_SUCCESS;
}
// Process all properties
while (p->name != 0) {
switch (p->name) {
case CL_CONTEXT_INTEROP_USER_SYNC:
if (p->ptr == reinterpret_cast<void*>(CL_TRUE)) {
info->flags_ |= InteropUserSync;
}
break;
#ifdef _WIN32
case CL_CONTEXT_D3D10_DEVICE_KHR:
if (p->ptr == NULL) {
return CL_INVALID_VALUE;
}
info->hDev_[D3D10DeviceKhrIdx] = p->ptr;
info->flags_ |= D3D10DeviceKhr;
break;
case CL_CONTEXT_D3D11_DEVICE_KHR:
if (p->ptr == NULL) {
return CL_INVALID_VALUE;
}
info->hDev_[D3D11DeviceKhrIdx] = p->ptr;
info->flags_ |= D3D11DeviceKhr;
break;
case CL_CONTEXT_ADAPTER_D3D9_KHR:
if (p->ptr == NULL) { // not supported for xp
return CL_INVALID_VALUE;
}
info->hDev_[D3D9DeviceKhrIdx] = p->ptr;
info->flags_ |= D3D9DeviceKhr;
break;
case CL_CONTEXT_ADAPTER_D3D9EX_KHR:
if (p->ptr == NULL) {
return CL_INVALID_VALUE;
}
info->hDev_[D3D9DeviceEXKhrIdx] = p->ptr;
info->flags_ |= D3D9DeviceEXKhr;
break;
case CL_CONTEXT_ADAPTER_DXVA_KHR:
if (p->ptr == NULL) {
return CL_INVALID_VALUE;
}
info->hDev_[D3D9DeviceVAKhrIdx] = p->ptr;
info->flags_ |= D3D9DeviceVAKhr;
break;
#endif //_WIN32
case CL_EGL_DISPLAY_KHR:
info->flags_ |= EGLDeviceKhr;
#ifdef _WIN32
case CL_WGL_HDC_KHR:
#endif //_WIN32
#if defined(__linux__)
case CL_GLX_DISPLAY_KHR:
#endif // linux
info->hDev_[GLDeviceKhrIdx] = p->ptr;
#if defined(__APPLE__) || defined(__MACOSX)
case CL_CGL_SHAREGROUP_KHR:
Unimplemented();
break;
#endif //__APPLE__ || MACOS
case CL_GL_CONTEXT_KHR:
if (p->ptr == NULL) {
return CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR;
}
if (p->name == CL_GL_CONTEXT_KHR) {
info->hCtx_ = p->ptr;
}
info->flags_ |= GLDeviceKhr;
break;
case CL_CONTEXT_PLATFORM:
pfmId = reinterpret_cast<cl_platform_id>(p->ptr);
if ((NULL != pfmId) && (AMD_PLATFORM != pfmId)) {
return CL_INVALID_VALUE;
}
break;
case CL_CONTEXT_OFFLINE_DEVICES_AMD:
if (p->ptr != reinterpret_cast<void*>(1)) {
return CL_INVALID_VALUE;
}
// Set the offline device flag
info->flags_ |= OfflineDevices;
break;
case CL_CONTEXT_COMMAND_INTERCEPT_CALLBACK_AMD:
// Set the command intercept flag
info->commandIntercept_ = (cl_int(CL_CALLBACK*)(cl_event, cl_int*))p->ptr;
info->flags_ |= CommandIntercept;
break;
default:
return CL_INVALID_VALUE;
}
p++;
count++;
}
info->propertiesSize_ = count * sizeof(Element) + sizeof(intptr_t);
return CL_SUCCESS;
}
int Context::create(const intptr_t* properties) {
static const bool VALIDATE_ONLY = false;
int result = CL_SUCCESS;
if (properties != NULL) {
properties_ = new cl_context_properties[info().propertiesSize_ / sizeof(cl_context_properties)];
if (properties_ == NULL) {
return CL_OUT_OF_HOST_MEMORY;
}
::memcpy(properties_, properties, info().propertiesSize_);
}
// Check if OCL context can be associated with any external device
if (info_.flags_ & (D3D10DeviceKhr | D3D11DeviceKhr | GLDeviceKhr | D3D9DeviceKhr |
D3D9DeviceEXKhr | D3D9DeviceVAKhr)) {
// Loop through all devices
for (const auto& it : devices_) {
if (!it->bindExternalDevice(info_.flags_, info_.hDev_, info_.hCtx_, VALIDATE_ONLY)) {
result = CL_INVALID_VALUE;
}
}
}
// Check if the device binding wasn't successful
if (result != CL_SUCCESS) {
if (info_.flags_ & GLDeviceKhr) {
result = CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR;
} else if (info_.flags_ & D3D10DeviceKhr) {
// return CL_INVALID_VALUE; // FIXME_odintsov: CL_INVALID_D3D_INTEROP;
} else if (info_.flags_ & D3D11DeviceKhr) {
// return CL_INVALID_VALUE; // FIXME_odintsov: CL_INVALID_D3D_INTEROP;
} else if (info_.flags_ & (D3D9DeviceKhr | D3D9DeviceEXKhr | D3D9DeviceVAKhr)) {
// return CL_INVALID_DX9_MEDIA_ADAPTER_KHR;
}
} else {
if (info_.flags_ & GLDeviceKhr) {
// Init context for GL interop
if (glenv_ == NULL) {
HMODULE h = (HMODULE)Os::loadLibrary(
#ifdef _WIN32
"OpenGL32.dll"
#else //!_WIN32
"libGL.so.1"
#endif //!_WIN32
);
if (h && (glenv_ = new GLFunctions(h, (info_.flags_ & Flags::EGLDeviceKhr) != 0))) {
if (!glenv_->init(reinterpret_cast<intptr_t>(info_.hDev_[GLDeviceKhrIdx]),
reinterpret_cast<intptr_t>(info_.hCtx_))) {
delete glenv_;
glenv_ = NULL;
result = CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR;
}
}
}
}
}
#ifdef WITH_LIQUID_FLASH
lfInit();
#endif
return result;
}
void* Context::hostAlloc(size_t size, size_t alignment, bool atomics) const {
if (customHostAllocDevice_ != NULL) {
return customHostAllocDevice_->hostAlloc(size, alignment, atomics);
}
return AlignedMemory::allocate(size, alignment);
}
void Context::hostFree(void* ptr) const {
if (customHostAllocDevice_ != NULL) {
customHostAllocDevice_->hostFree(ptr);
return;
}
AlignedMemory::deallocate(ptr);
}
void* Context::svmAlloc(size_t size, size_t alignment, cl_svm_mem_flags flags) {
unsigned int numSVMDev = svmAllocDevice_.size();
if (numSVMDev < 1) {
return NULL;
}
if (svmAllocDevice_.front()->type() == CL_DEVICE_TYPE_CPU) {
return AlignedMemory::allocate(size, alignment);
} else {
void* svmPtrAlloced = NULL;
void* tempPtr = NULL;
amd::ScopedLock lock(&ctxLock_);
for (const auto& dev : svmAllocDevice_) {
if (dev->type() == CL_DEVICE_TYPE_GPU) {
// check if the device support svm platform atomics,
// skipped allocation for platform atomics if not supported by this device
if ((flags & CL_MEM_SVM_ATOMICS) &&
!(dev->info().svmCapabilities_ & CL_DEVICE_SVM_ATOMICS)) {
continue;
}
svmPtrAlloced = dev->svmAlloc(*this, size, alignment, flags, svmPtrAlloced);
if (svmPtrAlloced == NULL) {
return NULL;
}
}
}
return svmPtrAlloced;
}
}
void Context::svmFree(void* ptr) const {
if (svmAllocDevice_.front()->type() == CL_DEVICE_TYPE_CPU) {
AlignedMemory::deallocate(ptr);
return;
}
amd::ScopedLock lock(&ctxLock_);
for (const auto& dev : svmAllocDevice_) {
if (dev->type() == CL_DEVICE_TYPE_GPU) {
dev->svmFree(ptr);
}
}
return;
}
bool Context::containsDevice(const Device* device) const {
for (const auto& it : devices_) {
if (device == it || it->isAncestor(device)) {
return true;
}
}
return false;
}
DeviceQueue* Context::defDeviceQueue(const Device& dev) const {
const auto it = deviceQueues_.find(&dev);
if (it != deviceQueues_.cend()) {
return it->second.defDeviceQueue_;
} else {
return NULL;
}
}
bool Context::isDevQueuePossible(const Device& dev) {
return (deviceQueues_[&dev].deviceQueueCnt_ < dev.info().maxOnDeviceQueues_) ? true : false;
}
void Context::addDeviceQueue(const Device& dev, DeviceQueue* queue, bool defDevQueue) {
DeviceQueueInfo& info = deviceQueues_[&dev];
info.deviceQueueCnt_++;
if (defDevQueue) {
info.defDeviceQueue_ = queue;
}
}
void Context::removeDeviceQueue(const Device& dev, DeviceQueue* queue) {
DeviceQueueInfo& info = deviceQueues_[&dev];
assert((info.deviceQueueCnt_ != 0) && "The device queue map is empty!");
info.deviceQueueCnt_--;
if (info.defDeviceQueue_ == queue) {
info.defDeviceQueue_ = NULL;
}
}
} // namespace amd