Files
rocm-systems/rocclr/runtime/platform/memory.cpp
T
foreman 465c1c0287 P4 to Git Change 1398097 by lmoriche@lmoriche_opencl_dev2 on 2017/04/13 13:01:56
SWDEV-102733 - [OCL-LC-ROCm] Cmake build Write CMakeLists.txt to enable building with and without the DK environment
	- Change the coding convention of the runtime files. Use Google's Style (https://google.github.io/styleguide/cppguide.html).

Affected files ...

... //depot/stg/opencl/drivers/opencl/.clang-format#1 add
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_agent_amd.h#2 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_command.cpp#13 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_context.cpp#53 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_counter.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d10.cpp#15 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d11.cpp#22 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d9.cpp#32 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_debugger_amd.cpp#8 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_debugger_amd.h#7 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_device.cpp#61 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_event.cpp#10 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_execute.cpp#23 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_gl.cpp#53 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_icd.cpp#27 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_icd_amd.h#18 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_kernel.h#24 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_kernel_info_amd.cpp#3 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_kernel_info_amd.h#4 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_lqdflash_amd.cpp#17 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_lqdflash_amd.h#6 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_memobj.cpp#81 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_object.cpp#3 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_pipe.cpp#6 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_platform_amd.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_platform_amd.h#2 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_profile_amd.cpp#3 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_profile_amd.h#2 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_program.cpp#41 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_sampler.cpp#6 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_sdi_amd.cpp#3 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_sdi_amd.h#2 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_semaphore_amd.h#3 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_svm.cpp#20 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_thread_trace_amd.cpp#8 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_thread_trace_amd.h#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/appprofile.cpp#17 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/appprofile.hpp#12 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/blit.cpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/blit.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/blitcl.cpp#11 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpubinary.cpp#11 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpubinary.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpubuiltins.cpp#13 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpubuiltins.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpucommand.cpp#66 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpucommand.hpp#40 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpudevice.cpp#280 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpudevice.hpp#96 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpufeat.hpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpukernel.hpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpumapping.cpp#6 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpumapping.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpuprogram.cpp#70 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpuprogram.hpp#14 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpusettings.cpp#33 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpusettings.hpp#2 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cputables.hpp#5 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpuvirtual.cpp#26 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpuvirtual.hpp#13 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#209 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#284 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuappprofile.cpp#12 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuappprofile.hpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpubinary.cpp#58 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpubinary.hpp#27 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpublit.cpp#126 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpublit.hpp#41 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpucompiler.cpp#156 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuconstbuf.cpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuconstbuf.hpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpucounters.cpp#12 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpucounters.hpp#9 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudebugger.hpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudebugmanager.cpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudebugmanager.hpp#6 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudefs.hpp#147 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#567 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.hpp#163 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.cpp#318 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.hpp#126 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpumemory.cpp#131 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpumemory.hpp#50 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprintf.cpp#44 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprintf.hpp#15 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.cpp#232 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.hpp#69 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuresource.cpp#238 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuresource.hpp#87 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpusched.hpp#19 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuschedcl.cpp#35 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuscsi.cpp#37 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpusettings.cpp#350 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpusettings.hpp#98 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gputhreadtrace.cpp#9 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gputhreadtrace.hpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gputimestamp.cpp#27 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gputimestamp.hpp#16 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gputrap.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#410 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.hpp#140 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuwavelimiter.cpp#13 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuwavelimiter.hpp#9 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hwdebug.cpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hwdebug.hpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palappprofile.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palappprofile.hpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palbinary.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palbinary.hpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palblit.cpp#13 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palblit.hpp#5 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palcompiler.cpp#15 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palconstbuf.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palconstbuf.hpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palcounters.cpp#11 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palcounters.hpp#9 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldebugger.hpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldebugmanager.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldebugmanager.hpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldefs.hpp#16 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#45 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.hpp#16 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldeviced3d10.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldeviced3d11.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldeviced3d9.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevicegl.cpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#34 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.hpp#11 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palmemory.cpp#13 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palmemory.hpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprintf.cpp#5 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprintf.hpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.cpp#39 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.hpp#17 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.cpp#28 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.hpp#12 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsched.hpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palschedcl.cpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.cpp#24 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.hpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palthreadtrace.cpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palthreadtrace.hpp#5 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paltimestamp.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paltimestamp.hpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paltrap.hpp#2 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#48 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.hpp#21 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palwavelimiter.cpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palwavelimiter.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/mesa_glinterop.h#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocappprofile.cpp#6 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocappprofile.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocbinary.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocblit.cpp#17 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocblit.hpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/roccompiler.cpp#32 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/roccompilerlib.cpp#6 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/roccompilerlib.hpp#5 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdefs.hpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.cpp#48 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.hpp#20 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocglinterop.cpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocglinterop.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.cpp#22 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.hpp#16 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocmemory.cpp#15 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocmemory.hpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprintf.cpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprintf.hpp#5 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.cpp#64 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.hpp#23 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocregisters.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocsettings.cpp#17 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocsettings.hpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.cpp#34 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.hpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/os/alloc.cpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/os/alloc.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/os/os.cpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/os/os.hpp#30 edit
... //depot/stg/opencl/drivers/opencl/runtime/os/os_posix.cpp#42 edit
... //depot/stg/opencl/drivers/opencl/runtime/os/os_win32.cpp#47 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/agent.cpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/agent.hpp#6 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/command.cpp#78 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/command.hpp#83 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/commandqueue.cpp#23 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/commandqueue.hpp#18 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/context.cpp#42 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/context.hpp#26 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/counter.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/interop.hpp#12 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/kernel.cpp#23 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/kernel.hpp#18 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/memory.cpp#127 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/memory.hpp#100 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/ndrange.cpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/ndrange.hpp#9 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/object.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/object.hpp#17 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/perfctr.hpp#5 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/program.cpp#86 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/program.hpp#41 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/runtime.cpp#35 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/runtime.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/sampler.hpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/threadtrace.hpp#6 edit
... //depot/stg/opencl/drivers/opencl/runtime/thread/atomic.hpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/thread/monitor.cpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/thread/monitor.hpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/thread/semaphore.cpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/thread/semaphore.hpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/thread/thread.cpp#14 edit
... //depot/stg/opencl/drivers/opencl/runtime/thread/thread.hpp#15 edit
... //depot/stg/opencl/drivers/opencl/runtime/top.hpp#26 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/concurrent.hpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/debug.cpp#5 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/debug.hpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.cpp#16 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.hpp#271 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/macros.hpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/util.hpp#12 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/versions.hpp#2150 edit
2017-04-13 13:56:38 -04:00

1400 строки
41 KiB
C++

//
// Copyright 2010 Advanced Micro Devices, Inc. All rights reserved.
//
#include "amdocl/cl_common.hpp"
#include "os/alloc.hpp"
#include "platform/context.hpp"
#include "platform/object.hpp"
#include "platform/memory.hpp"
#include "device/device.hpp"
namespace amd {
bool BufferRect::create(const size_t* bufferOrigin, const size_t* region, size_t bufferRowPitch,
size_t bufferSlicePitch) {
bool valid = false;
// Find the buffer's row pitch
rowPitch_ = (bufferRowPitch != 0) ? bufferRowPitch : region[0];
// Find the buffer's slice pitch
slicePitch_ = (bufferSlicePitch != 0) ? bufferSlicePitch : rowPitch_ * region[1];
// Find the region start offset
start_ = bufferOrigin[2] * slicePitch_ + bufferOrigin[1] * rowPitch_ + bufferOrigin[0];
// Find the region relative end offset
end_ = (region[2] - 1) * slicePitch_ + (region[1] - 1) * rowPitch_ + region[0];
// Make sure we have a valid region
if ((rowPitch_ >= region[0]) && (slicePitch_ >= (region[1] * rowPitch_)) &&
((slicePitch_ % rowPitch_) == 0)) {
valid = true;
}
return valid;
}
bool HostMemoryReference::allocateMemory(size_t size, const Context& context) {
assert(!alloced_ && "Runtime should not reallocate system memory!");
size_t memoryAlignment = (CPU_MEMORY_ALIGNMENT_SIZE <= 0) ? 256 : CPU_MEMORY_ALIGNMENT_SIZE;
size_ = amd::alignUp(size, memoryAlignment);
//! \note memory size must be aligned for CAL pinning
hostMem_ = CPU_MEMORY_GUARD_PAGES
? GuardedMemory::allocate(size_, MEMOBJ_BASE_ADDR_ALIGN, CPU_MEMORY_GUARD_PAGE_SIZE * Ki)
: context.hostAlloc(size_, MEMOBJ_BASE_ADDR_ALIGN);
alloced_ = (hostMem_ != NULL);
return alloced_;
}
// Frees system memory if it was allocated
void HostMemoryReference::deallocateMemory(const Context& context) {
if (alloced_) {
if (CPU_MEMORY_GUARD_PAGES)
GuardedMemory::deallocate(hostMem_);
else
context.hostFree(hostMem_);
size_ = 0;
alloced_ = false;
hostMem_ = NULL;
}
}
Memory::Memory(Context& context, Type type, Flags flags, size_t size, void* svmPtr)
: numDevices_(0),
deviceMemories_(NULL),
destructorCallbacks_(NULL),
context_(context),
parent_(NULL),
type_(type),
hostMemRef_(NULL),
origin_(0),
size_(size),
flags_(flags),
version_(0),
lastWriter_(NULL),
interopObj_(NULL),
isParent_(false),
vDev_(NULL),
forceSysMemAlloc_(false),
mapCount_(0),
svmHostAddress_(svmPtr),
svmPtrCommited_(false),
canBeCached_(true),
lockMemoryOps_("Memory Ops Lock", true) {}
Memory::Memory(Memory& parent, Flags flags, size_t origin, size_t size, Type type)
: numDevices_(0),
deviceMemories_(NULL),
destructorCallbacks_(NULL),
context_(parent.getContext()),
parent_(&parent),
type_((type == 0) ? parent.type_ : type),
hostMemRef_(NULL),
origin_(origin),
size_(size),
flags_(flags),
version_(parent.getVersion()),
lastWriter_(parent.getLastWriter()),
interopObj_(parent.getInteropObj()),
isParent_(false),
vDev_(NULL),
forceSysMemAlloc_(false),
mapCount_(0),
svmHostAddress_(parent.getSvmPtr()),
svmPtrCommited_(parent.isSvmPtrCommited()),
canBeCached_(true),
lockMemoryOps_("Memory Ops Lock", true) {
parent_->retain();
parent_->isParent_ = true;
if (parent.getHostMem() != nullptr) {
setHostMem(reinterpret_cast<address>(parent.getHostMem()) + origin);
}
// Inherit memory flags from the parent
if ((flags_ & (CL_MEM_READ_WRITE | CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY)) == 0) {
flags_ |= parent_->getMemFlags() & (CL_MEM_READ_WRITE | CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY);
}
flags_ |=
parent_->getMemFlags() & (CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR);
if ((flags_ & (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)) == 0) {
flags_ |= parent_->getMemFlags() &
(CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS);
}
}
void Memory::initDeviceMemory() {
deviceMemories_ = reinterpret_cast<DeviceMemory*>(reinterpret_cast<char*>(this) + sizeof(Memory));
memset(deviceMemories_, 0, context_().devices().size() * sizeof(DeviceMemory));
}
void* Memory::operator new(size_t size, const Context& context) {
return RuntimeObject::operator new(size + context.devices().size() * sizeof(DeviceMemory));
}
void Memory::operator delete(void* p) { RuntimeObject::operator delete(p); }
void Memory::operator delete(void* p, const Context& context) { Memory::operator delete(p); }
void Memory::addSubBuffer(Memory* view) {
amd::ScopedLock lock(lockMemoryOps());
subBuffers_.push_back(view);
}
void Memory::removeSubBuffer(Memory* view) {
amd::ScopedLock lock(lockMemoryOps());
subBuffers_.remove(view);
}
bool Memory::allocHostMemory(void* initFrom, bool allocHostMem, bool forceCopy) {
// Sanity checks (the parameters should have been prevalidated by the API)
assert(!(flags_ & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR) && (initFrom == NULL) &&
!allocHostMem && !isSvmPtrCommited()));
assert(
!((initFrom != NULL) && !forceCopy &&
!(flags_ & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR | CL_MEM_EXTERNAL_PHYSICAL_AMD))));
assert(!(flags_ & CL_MEM_COPY_HOST_PTR && flags_ & CL_MEM_USE_HOST_PTR));
const std::vector<Device*>& devices = context_().devices();
// Find if a non GPU device was created with the context
for (size_t i = 0; i < devices.size(); i++) {
if (!(devices[i]->info().type_ & CL_DEVICE_TYPE_GPU)) {
allocHostMem = true;
break;
}
}
// This allocation is necessary to use coherency mechanism
// for the initialization
if (getMemFlags() & (CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_HOST_PTR)) {
allocHostMem = true;
}
// Did application request to use host memory?
if (getMemFlags() & CL_MEM_USE_HOST_PTR) {
setHostMem(initFrom);
// Recalculate image size according to pitch
Image* image = asImage();
if (image != NULL) {
if (image->getDims() < 3) {
size_ = image->getRowPitch() * image->getHeight();
} else {
size_ = image->getSlicePitch() * image->getDepth();
}
}
}
// Allocate host memory buffer if needed
else if (allocHostMem && !isInterop()) {
if (!hostMemRef_.allocateMemory(size_, context_())) {
return false;
}
// Copy data to the backing store if the app has requested
if (((flags_ & CL_MEM_COPY_HOST_PTR) || forceCopy) && (initFrom != NULL)) {
copyToBackingStore(initFrom);
}
}
if (allocHostMem && type_ == CL_MEM_OBJECT_PIPE) {
// Initialize the pipe for a CPU device
clk_pipe_t* pipe = reinterpret_cast<clk_pipe_t*>(getHostMem());
pipe->read_idx = 0;
pipe->write_idx = 0;
pipe->end_idx = asPipe()->getMaxNumPackets();
}
if ((flags_ & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)) && (NULL == lastWriter_)) {
// Signal write, so coherency mechanism will initialize
// memory on all devices
signalWrite(NULL);
}
return true;
}
bool Memory::create(void* initFrom, bool sysMemAlloc) {
static const bool forceAllocHostMem = false;
initDeviceMemory();
// Check if it's a subbuffer allocation
if (parent_ != NULL) {
// Find host memory pointer for subbuffer
if (parent_->getHostMem() != NULL) {
setHostMem((address)parent_->getHostMem() + origin_);
}
// Add a new subbuffer to the list
parent_->addSubBuffer(this);
}
// Allocate host memory if requested
else if (!allocHostMemory(initFrom, forceAllocHostMem)) {
return false;
}
const std::vector<Device*>& devices = context_().devices();
// Forces system memory allocation on the device,
// instead of device memory
forceSysMemAlloc_ = sysMemAlloc;
// Create memory on all available devices
for (size_t i = 0; i < devices.size(); i++) {
deviceAlloced_[devices[i]] = AllocInit;
// Only GPU devices have device memory objects
if (devices[i]->info().type_ & CL_DEVICE_TYPE_GPU) {
deviceMemories_[i].ref_ = devices[i];
deviceMemories_[i].value_ = NULL;
}
if (DISABLE_DEFERRED_ALLOC) {
device::Memory* mem = getDeviceMemory(*devices[i]);
if (NULL == mem) {
LogPrintfError("Can't allocate memory size - 0x%08X bytes!", getSize());
return false;
}
}
}
return true;
}
bool Memory::addDeviceMemory(const Device* dev) {
bool result = false;
AllocState create = AllocCreate;
AllocState init = AllocInit;
if (make_atomic(deviceAlloced_[dev]).compareAndSet(init, create)) {
device::Memory* dm = dev->createMemory(*this);
// Add the new memory allocation to the device map
if (NULL != dm) {
deviceMemories_[numDevices_].ref_ = dev;
deviceMemories_[numDevices_].value_ = dm;
numDevices_++;
assert((numDevices() <= context_().devices().size()) && "Too many device objects");
// Mark the allocation with the complete flag
deviceAlloced_[dev] = AllocComplete;
if (getSvmPtr() != nullptr) {
svmBase_ = dm;
}
} else {
// Mark the allocation as an empty
deviceAlloced_[dev] = AllocInit;
}
}
// Make sure runtime finished memory allocation.
// Loop if in the create state
while (deviceAlloced_[dev] == AllocCreate) {
Os::yield();
}
if (deviceAlloced_[dev] == AllocComplete) {
result = true;
}
return result;
}
void Memory::replaceDeviceMemory(const Device* dev, device::Memory* dm) {
uint i;
for (i = 0; i < numDevices_; ++i) {
if (deviceMemories_[i].ref_ == dev) {
delete deviceMemories_[i].value_;
break;
}
}
if (numDevices_ == 0) {
++numDevices_;
deviceMemories_[0].ref_ = dev;
}
deviceMemories_[i].value_ = dm;
deviceAlloced_[dev] = AllocRealloced;
}
device::Memory* Memory::getDeviceMemory(const Device& dev, bool alloc) {
device::Memory* dm = NULL;
for (uint i = 0; i < numDevices_; ++i) {
if (deviceMemories_[i].ref_ == &dev) {
dm = deviceMemories_[i].value_;
break;
}
}
if ((NULL == dm) && alloc) {
if (!addDeviceMemory(&dev)) {
LogError("Video memory allocation failed!");
return NULL;
}
dm = deviceMemories_[numDevices() - 1].value_;
}
return dm;
}
Memory::~Memory() {
// For_each destructor callback:
DestructorCallBackEntry* entry;
for (entry = destructorCallbacks_; entry != NULL; entry = entry->next_) {
// invoke the callback function.
entry->callback_(const_cast<cl_mem>(as_cl(this)), entry->data_);
}
// Release the parent.
if (NULL != parent_) {
// Update cache if runtime destroys a subbuffer
if (NULL != parent_->getHostMem() && (vDev_ == NULL)) {
cacheWriteBack();
}
parent_->removeSubBuffer(this);
}
if (NULL != deviceMemories_) {
// Destroy all device memory objects
for (uint i = 0; i < numDevices_; ++i) {
delete deviceMemories_[i].value_;
}
}
// Sanity check
if (subBuffers_.size() != 0) {
LogError("Can't have views if parent is destroyed!");
}
// Destroy the destructor callback entries
DestructorCallBackEntry* callback = destructorCallbacks_;
while (callback != NULL) {
DestructorCallBackEntry* next = callback->next_;
delete callback;
callback = next;
}
// Make sure runtime destroys the parent only after subbuffer destruction
if (NULL != parent_) {
parent_->release();
}
hostMemRef_.deallocateMemory(context_());
}
bool Memory::setDestructorCallback(DestructorCallBackFunction callback, void* data) {
DestructorCallBackEntry* entry = new DestructorCallBackEntry(callback, data);
if (entry == NULL) {
return false;
}
entry->next_ = destructorCallbacks_;
while (!destructorCallbacks_.compare_exchange_weak(entry->next_, entry))
; // Someone else is also updating the head of the linked list! reload.
return true;
}
void Memory::signalWrite(const Device* writer) {
// (the potential race condition below doesn't matter, no critical
// section needed)
++version_;
lastWriter_ = writer;
// Update all subbuffers for this object
for (auto buf : subBuffers_) {
buf->signalWrite(writer);
}
}
void Memory::cacheWriteBack() {
if (NULL != lastWriter_) {
device::Memory* dmem = getDeviceMemory(*lastWriter_);
//! @note It's a special condition, when a subbuffer was created,
//! but never used. Thus dev memory is still NULL and lastWriter_
//! was passed from the parent.
if (NULL != dmem) {
dmem->syncHostFromCache();
}
} else if (isParent()) {
// On CPU parent can't be synchronized, because lastWriter_ could be NULL
// and syncHostFromCache() won't be called.
for (uint i = 0; i < numDevices_; ++i) {
deviceMemories_[i].value_->syncHostFromCache();
}
}
}
void Memory::copyToBackingStore(void* initFrom) { memcpy(getHostMem(), initFrom, size_); }
bool Memory::usesSvmPointer() const {
if (!(flags_ & CL_MEM_USE_HOST_PTR)) {
return false;
}
// If the application host pointer lies within a SVM region, so does the
// sub-buffer host pointer - so the following check works in both cases
return (SvmBuffer::malloced(getHostMem()) || NULL != svmHostAddress_);
}
void Memory::commitSvmMemory() {
ScopedLock lock(lockMemoryOps_);
if (!svmPtrCommited_) {
amd::Os::commitMemory(svmHostAddress_, size_, amd::Os::MEM_PROT_RW);
svmPtrCommited_ = true;
}
}
void Buffer::initDeviceMemory() {
deviceMemories_ = reinterpret_cast<DeviceMemory*>(reinterpret_cast<char*>(this) + sizeof(Buffer));
memset(deviceMemories_, 0, context_().devices().size() * sizeof(DeviceMemory));
}
bool Buffer::create(void* initFrom, bool sysMemAlloc) {
if ((getMemFlags() & CL_MEM_EXTERNAL_PHYSICAL_AMD) && (initFrom != NULL)) {
busAddress_ = *(reinterpret_cast<cl_bus_address_amd*>(initFrom));
initFrom = NULL;
} else {
busAddress_.surface_bus_address = 0;
busAddress_.marker_bus_address = 0;
}
return Memory::create(initFrom, sysMemAlloc);
}
bool Buffer::isEntirelyCovered(const Coord3D& origin, const Coord3D& region) const {
return ((origin[0] == 0) && (region[0] == getSize())) ? true : false;
}
bool Buffer::validateRegion(const Coord3D& origin, const Coord3D& region) const {
return ((region[0] > 0) && (origin[0] < getSize()) && ((origin[0] + region[0]) <= getSize()))
? true
: false;
}
void Pipe::initDeviceMemory() {
deviceMemories_ = reinterpret_cast<DeviceMemory*>(reinterpret_cast<char*>(this) + sizeof(Pipe));
memset(deviceMemories_, 0, context_().devices().size() * sizeof(DeviceMemory));
}
#define GETMIPDIM(dim, mip) (((dim >> mip) > 0) ? (dim >> mip) : 1)
Image::Image(const Format& format, Image& parent, uint baseMipLevel, cl_mem_flags flags)
: Memory(parent, flags, 0,
parent.getWidth() * parent.getHeight() * parent.getDepth() * format.getElementSize()),
impl_(format, Coord3D(parent.getWidth() * parent.getImageFormat().getElementSize() /
format.getElementSize(),
parent.getHeight(), parent.getDepth()),
parent.getRowPitch(), parent.getSlicePitch(), parent.getBytePitch()),
mipLevels_(1),
baseMipLevel_(baseMipLevel) {
if (baseMipLevel > 0) {
impl_.region_.c[0] = GETMIPDIM(parent.getWidth(), baseMipLevel) *
parent.getImageFormat().getElementSize() / format.getElementSize();
impl_.region_.c[1] = GETMIPDIM(parent.getHeight(), baseMipLevel);
impl_.region_.c[2] = GETMIPDIM(parent.getDepth(), baseMipLevel);
if (parent.getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
impl_.region_.c[1] = parent.getHeight();
} else if (parent.getType() == CL_MEM_OBJECT_IMAGE2D_ARRAY) {
impl_.region_.c[2] = parent.getDepth();
}
size_ = getWidth() * getHeight() * parent.getDepth() * format.getElementSize();
}
initDimension();
}
Image::Image(Context& context, Type type, Flags flags, const Format& format, size_t width,
size_t height, size_t depth, size_t rowPitch, size_t slicePitch, uint mipLevels)
: Memory(context, type, flags, width * height * depth * format.getElementSize()),
impl_(format, Coord3D(width, height, depth), rowPitch, slicePitch),
mipLevels_(mipLevels),
baseMipLevel_(0) {
initDimension();
}
Image::Image(Buffer& buffer, Type type, Flags flags, const Format& format, size_t width,
size_t height, size_t depth, size_t rowPitch, size_t slicePitch)
: Memory(buffer, flags, 0, buffer.getSize(), type),
impl_(format, Coord3D(width, height, depth), rowPitch, slicePitch),
mipLevels_(1),
baseMipLevel_(0) {
initDimension();
}
bool Image::validateDimensions(const std::vector<amd::Device*>& devices, cl_mem_object_type type,
size_t width, size_t height, size_t depth, size_t arraySize) {
bool sizePass = false;
switch (type) {
case CL_MEM_OBJECT_IMAGE3D:
if ((width == 0) || (height == 0) || (depth < 1)) {
return false;
}
for (const auto& dev : devices) {
if ((dev->info().image3DMaxWidth_ >= width) && (dev->info().image3DMaxHeight_ >= height) &&
(dev->info().image3DMaxDepth_ >= depth)) {
return true;
}
}
break;
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
if (arraySize == 0) {
return false;
}
for (const auto& dev : devices) {
if (dev->info().imageMaxArraySize_ >= arraySize) {
sizePass = true;
break;
}
}
if (!sizePass) {
return false;
}
// Fall through...
case CL_MEM_OBJECT_IMAGE2D:
if ((width == 0) || (height == 0)) {
return false;
}
for (const auto dev : devices) {
if ((dev->info().image2DMaxHeight_ >= height) && (dev->info().image2DMaxWidth_ >= width)) {
return true;
}
}
break;
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
if (arraySize == 0) {
return false;
}
for (const auto& dev : devices) {
if (dev->info().imageMaxArraySize_ >= arraySize) {
sizePass = true;
break;
}
}
if (!sizePass) {
return false;
}
// Fall through...
case CL_MEM_OBJECT_IMAGE1D:
if (width == 0) {
return false;
}
for (const auto& dev : devices) {
if (dev->info().image2DMaxWidth_ >= width) {
return true;
}
}
break;
case CL_MEM_OBJECT_IMAGE1D_BUFFER:
if (width == 0) {
return false;
}
for (const auto& dev : devices) {
if (dev->info().imageMaxBufferSize_ >= width) {
return true;
}
}
break;
default:
break;
}
return false;
}
void Image::initDimension() {
const size_t elemSize = impl_.format_.getElementSize();
if (impl_.rp_ == 0) {
impl_.rp_ = impl_.region_[0] * elemSize;
}
switch (type_) {
case CL_MEM_OBJECT_IMAGE3D:
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
dim_ = 3;
if (impl_.sp_ == 0) {
impl_.sp_ = impl_.region_[0] * impl_.region_[1] * elemSize;
}
break;
case CL_MEM_OBJECT_IMAGE2D:
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
dim_ = 2;
if ((impl_.sp_ == 0) && (type_ == CL_MEM_OBJECT_IMAGE1D_ARRAY)) {
impl_.sp_ = impl_.rp_;
}
break;
case CL_MEM_OBJECT_IMAGE1D:
case CL_MEM_OBJECT_IMAGE1D_BUFFER:
default:
dim_ = 1;
break;
}
}
void Image::initDeviceMemory() {
deviceMemories_ = reinterpret_cast<DeviceMemory*>(reinterpret_cast<char*>(this) + sizeof(Image));
memset(deviceMemories_, 0, context_().devices().size() * sizeof(DeviceMemory));
}
bool Image::create(void* initFrom) { return Memory::create(initFrom); }
size_t Image::Format::getNumChannels() const {
switch (image_channel_order) {
case CL_RG:
case CL_RA:
return 2;
case CL_RGB:
case CL_sRGB:
case CL_sRGBx:
return 3;
case CL_RGBA:
case CL_BGRA:
case CL_ARGB:
case CL_sRGBA:
case CL_sBGRA:
return 4;
}
return 1;
}
size_t Image::Format::getElementSize() const {
size_t bytesPerPixel = getNumChannels();
switch (image_channel_data_type) {
case CL_SNORM_INT8:
case CL_UNORM_INT8:
case CL_SIGNED_INT8:
case CL_UNSIGNED_INT8:
break;
case CL_UNORM_INT_101010:
bytesPerPixel = 4;
break;
case CL_SIGNED_INT32:
case CL_UNSIGNED_INT32:
case CL_FLOAT:
bytesPerPixel *= 4;
break;
default:
bytesPerPixel *= 2;
break;
}
return bytesPerPixel;
}
bool Image::Format::isValid() const {
switch (image_channel_data_type) {
case CL_SNORM_INT8:
case CL_SNORM_INT16:
case CL_UNORM_INT8:
case CL_UNORM_INT16:
case CL_UNORM_SHORT_565:
case CL_UNORM_SHORT_555:
case CL_UNORM_INT_101010:
case CL_SIGNED_INT8:
case CL_SIGNED_INT16:
case CL_SIGNED_INT32:
case CL_UNSIGNED_INT8:
case CL_UNSIGNED_INT16:
case CL_UNSIGNED_INT32:
case CL_HALF_FLOAT:
case CL_FLOAT:
break;
default:
return false;
}
switch (image_channel_order) {
case CL_R:
case CL_A:
case CL_RG:
case CL_RA:
case CL_RGBA:
break;
case CL_INTENSITY:
case CL_LUMINANCE:
switch (image_channel_data_type) {
case CL_SNORM_INT8:
case CL_SNORM_INT16:
case CL_UNORM_INT8:
case CL_UNORM_INT16:
case CL_HALF_FLOAT:
case CL_FLOAT:
break;
default:
return false;
}
break;
case CL_RGB:
switch (image_channel_data_type) {
case CL_UNORM_SHORT_565:
case CL_UNORM_SHORT_555:
case CL_UNORM_INT_101010:
break;
default:
return false;
}
break;
case CL_BGRA:
case CL_ARGB:
switch (image_channel_data_type) {
case CL_SNORM_INT8:
case CL_UNORM_INT8:
case CL_SIGNED_INT8:
case CL_UNSIGNED_INT8:
break;
default:
return false;
}
break;
case CL_sRGB:
case CL_sRGBx:
case CL_sRGBA:
case CL_sBGRA:
switch (image_channel_data_type) {
case CL_UNORM_INT8:
break;
default:
return false;
}
break;
case CL_DEPTH:
switch (image_channel_data_type) {
case CL_UNORM_INT16:
case CL_FLOAT:
break;
default:
return false;
}
break;
default:
return false;
}
return true;
}
// definition of list of supported formats
cl_image_format Image::supportedFormats[] = {
// R
{CL_R, CL_SNORM_INT8},
{CL_R, CL_SNORM_INT16},
{CL_R, CL_UNORM_INT8},
{CL_R, CL_UNORM_INT16},
{CL_R, CL_SIGNED_INT8},
{CL_R, CL_SIGNED_INT16},
{CL_R, CL_SIGNED_INT32},
{CL_R, CL_UNSIGNED_INT8},
{CL_R, CL_UNSIGNED_INT16},
{CL_R, CL_UNSIGNED_INT32},
{CL_R, CL_HALF_FLOAT},
{CL_R, CL_FLOAT},
// A
{CL_A, CL_SNORM_INT8},
{CL_A, CL_SNORM_INT16},
{CL_A, CL_UNORM_INT8},
{CL_A, CL_UNORM_INT16},
{CL_A, CL_SIGNED_INT8},
{CL_A, CL_SIGNED_INT16},
{CL_A, CL_SIGNED_INT32},
{CL_A, CL_UNSIGNED_INT8},
{CL_A, CL_UNSIGNED_INT16},
{CL_A, CL_UNSIGNED_INT32},
{CL_A, CL_HALF_FLOAT},
{CL_A, CL_FLOAT},
// RG
{CL_RG, CL_SNORM_INT8},
{CL_RG, CL_SNORM_INT16},
{CL_RG, CL_UNORM_INT8},
{CL_RG, CL_UNORM_INT16},
{CL_RG, CL_SIGNED_INT8},
{CL_RG, CL_SIGNED_INT16},
{CL_RG, CL_SIGNED_INT32},
{CL_RG, CL_UNSIGNED_INT8},
{CL_RG, CL_UNSIGNED_INT16},
{CL_RG, CL_UNSIGNED_INT32},
{CL_RG, CL_HALF_FLOAT},
{CL_RG, CL_FLOAT},
// RGBA
{CL_RGBA, CL_SNORM_INT8},
{CL_RGBA, CL_SNORM_INT16},
{CL_RGBA, CL_UNORM_INT8},
{CL_RGBA, CL_UNORM_INT16},
{CL_RGBA, CL_SIGNED_INT8},
{CL_RGBA, CL_SIGNED_INT16},
{CL_RGBA, CL_SIGNED_INT32},
{CL_RGBA, CL_UNSIGNED_INT8},
{CL_RGBA, CL_UNSIGNED_INT16},
{CL_RGBA, CL_UNSIGNED_INT32},
{CL_RGBA, CL_HALF_FLOAT},
{CL_RGBA, CL_FLOAT},
// ARGB
{CL_ARGB, CL_SNORM_INT8},
{CL_ARGB, CL_UNORM_INT8},
{CL_ARGB, CL_SIGNED_INT8},
{CL_ARGB, CL_UNSIGNED_INT8},
// BGRA
{CL_BGRA, CL_SNORM_INT8},
{CL_BGRA, CL_UNORM_INT8},
{CL_BGRA, CL_SIGNED_INT8},
{CL_BGRA, CL_UNSIGNED_INT8},
// LUMINANCE
{CL_LUMINANCE, CL_SNORM_INT8},
{CL_LUMINANCE, CL_SNORM_INT16},
{CL_LUMINANCE, CL_UNORM_INT8},
{CL_LUMINANCE, CL_UNORM_INT16},
{CL_LUMINANCE, CL_HALF_FLOAT},
{CL_LUMINANCE, CL_FLOAT},
// INTENSITY
{CL_INTENSITY, CL_SNORM_INT8},
{CL_INTENSITY, CL_SNORM_INT16},
{CL_INTENSITY, CL_UNORM_INT8},
{CL_INTENSITY, CL_UNORM_INT16},
{CL_INTENSITY, CL_HALF_FLOAT},
{CL_INTENSITY, CL_FLOAT},
// RGB
{CL_RGB, CL_UNORM_INT_101010},
// sRGB
{CL_sRGBA, CL_UNORM_INT8},
// DEPTH
{CL_DEPTH, CL_UNORM_INT16},
{CL_DEPTH, CL_FLOAT},
};
const cl_uint NUM_CHANNEL_ORDER_OF_RGB = 1; // The number of channel orders of RGB at the end of
// the table supportedFormats above and before sRGB and
// depth.
const cl_uint NUM_CHANNEL_ORDER_OF_sRGB = 1; // The number of channel orders of sRGB at the end of
// the table supportedFormats above and before depth.
const cl_uint NUM_CHANNEL_ORDER_OF_DEPTH =
2; // The number of channel orders of DEPTH at the end of the table supportedFormats above.
// definition of list of supported RA formats
cl_image_format Image::supportedFormatsRA[] = {
{CL_RA, CL_SNORM_INT8}, {CL_RA, CL_SNORM_INT16}, {CL_RA, CL_UNORM_INT8},
{CL_RA, CL_UNORM_INT16}, {CL_RA, CL_SIGNED_INT8}, {CL_RA, CL_SIGNED_INT16},
{CL_RA, CL_SIGNED_INT32}, {CL_RA, CL_UNSIGNED_INT8}, {CL_RA, CL_UNSIGNED_INT16},
{CL_RA, CL_UNSIGNED_INT32}, {CL_RA, CL_HALF_FLOAT}, {CL_RA, CL_FLOAT},
};
cl_image_format Image::supportedDepthStencilFormats[] = {
// DEPTH STENCIL
{CL_DEPTH_STENCIL, CL_FLOAT},
{CL_DEPTH_STENCIL, CL_UNORM_INT24}};
cl_uint Image::numSupportedFormats(const Context& context, cl_mem_object_type image_type,
cl_mem_flags flags) {
const std::vector<amd::Device*>& devices = context.devices();
uint numFormats = sizeof(supportedFormats) / sizeof(cl_image_format);
bool supportRA = false;
bool supportDepthsRGB = false;
bool supportDepthStencil = false;
// Add RA if RA is supported.
for (uint i = 0; i < devices.size(); i++) {
if (devices[i]->settings().supportRA_) {
supportRA = true;
}
if (devices[i]->settings().supportDepthsRGB_) {
supportDepthsRGB = true;
}
if (devices[i]->settings().checkExtension(ClKhrGLDepthImages) &&
(context.info().flags_ & Context::GLDeviceKhr)) {
supportDepthStencil = true;
}
}
if (supportDepthsRGB) {
if ((image_type != CL_MEM_OBJECT_IMAGE2D) && (image_type != CL_MEM_OBJECT_IMAGE2D_ARRAY) &&
(image_type != 0)) {
numFormats -= NUM_CHANNEL_ORDER_OF_DEPTH; // substract channel order of DEPTH type.
}
// Currently we are not supported sRGB for write_imagef (extension cl_khr_srgb_image_writes)
if ((image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) ||
((flags & (CL_MEM_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_KERNEL_READ_AND_WRITE)) != 0)) {
numFormats -= NUM_CHANNEL_ORDER_OF_sRGB;
}
} else {
numFormats -= NUM_CHANNEL_ORDER_OF_RGB; // substract channel order of RGB type.
numFormats -= NUM_CHANNEL_ORDER_OF_sRGB; // substract channel order of sRGB type.
numFormats -= NUM_CHANNEL_ORDER_OF_DEPTH; // substract channel order of DEPTH type.
}
// Add RA if RA is supported. RA isn't supported on SI.
if (supportRA) {
numFormats +=
sizeof(supportedFormatsRA) / sizeof(cl_image_format); // Add channel order of RA type.
}
if (supportDepthStencil) {
if (flags & CL_MEM_READ_ONLY) {
numFormats += sizeof(supportedDepthStencilFormats) / sizeof(cl_image_format);
}
}
return numFormats;
}
cl_uint Image::getSupportedFormats(const Context& context, cl_mem_object_type image_type,
const cl_uint num_entries, cl_image_format* image_formats,
cl_mem_flags flags) {
const std::vector<amd::Device*>& devices = context.devices();
uint numFormats = 0;
bool supportRA = false;
bool supportDepthsRGB = false;
bool supportDepthStencil = false;
// Add RA if RA is supported.
for (uint i = 0; i < devices.size(); i++) {
if (devices[i]->settings().supportRA_) {
supportRA = true;
}
if (devices[i]->settings().supportDepthsRGB_) {
supportDepthsRGB = true;
}
if (devices[i]->settings().checkExtension(ClKhrGLDepthImages) &&
(context.info().flags_ & Context::GLDeviceKhr)) {
supportDepthStencil = true;
}
}
cl_image_format* format = image_formats;
uint numSupportedFormats = sizeof(supportedFormats) / sizeof(cl_image_format);
bool srgbWriteSupported = true;
if (supportDepthsRGB) {
if ((image_type != CL_MEM_OBJECT_IMAGE2D) && (image_type != CL_MEM_OBJECT_IMAGE2D_ARRAY) &&
(image_type != 0)) {
numSupportedFormats -= NUM_CHANNEL_ORDER_OF_DEPTH;
}
// Currently we are not supported sRGB for write_imagef (extension cl_khr_srgb_image_writes)
if ((image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) ||
((flags & (CL_MEM_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_KERNEL_READ_AND_WRITE)) != 0)) {
srgbWriteSupported = false;
}
} else {
numSupportedFormats -= NUM_CHANNEL_ORDER_OF_RGB; // substract channel order of RGB type.
numSupportedFormats -= NUM_CHANNEL_ORDER_OF_sRGB; // substract channel order of sRGB type.
numSupportedFormats -= NUM_CHANNEL_ORDER_OF_DEPTH; // substract channel order of DEPTH type.
}
for (uint i = 0; i < numSupportedFormats; i++) {
if (numFormats == num_entries) {
break;
}
if (!srgbWriteSupported) {
if ((amd::Image::supportedFormats[i].image_channel_order == CL_sRGBA) ||
(amd::Image::supportedFormats[i].image_channel_order == CL_sRGB) ||
(amd::Image::supportedFormats[i].image_channel_order == CL_sRGBx) ||
(amd::Image::supportedFormats[i].image_channel_order == CL_sBGRA)) {
continue;
}
}
*format++ = amd::Image::supportedFormats[i];
numFormats++;
}
// Add RA if RA is supported.
if (supportRA) {
for (uint i = 0; i < sizeof(supportedFormatsRA) / sizeof(cl_image_format); i++) {
if (numFormats == num_entries) {
break;
}
*format++ = amd::Image::supportedFormatsRA[i];
numFormats++;
}
}
if (supportDepthStencil) {
if (flags & CL_MEM_READ_ONLY) {
for (uint i = 0; i < sizeof(supportedDepthStencilFormats) / sizeof(cl_image_format); i++) {
if (numFormats == num_entries) {
break;
}
*format++ = amd::Image::supportedDepthStencilFormats[i];
numFormats++;
}
}
}
return numFormats;
}
bool Image::Format::isSupported(const Context& context, cl_mem_object_type image_type,
cl_mem_flags flags) const {
uint numFormats = numSupportedFormats(context, image_type, flags);
std::vector<cl_image_format> image_formats(numFormats);
getSupportedFormats(context, image_type, numFormats, image_formats.data(), flags);
for (uint i = 0; i < numFormats; i++) {
if (*this == image_formats[i]) {
return true;
}
}
return false;
}
Image* Image::createView(const Context& context, const Format& format, device::VirtualDevice* vDev,
uint baseMipLevel, cl_mem_flags flags) {
Image* view = NULL;
// Find the image dimensions and create a corresponding object
view = new (context) Image(format, *this, baseMipLevel, flags);
// Set GPU virtual device for this view
view->setVirtualDevice(vDev);
if (view != NULL) {
// Initialize view
view->initDeviceMemory();
}
return view;
}
bool Image::isEntirelyCovered(const Coord3D& origin, const Coord3D& region) const {
return (origin[0] == 0 && origin[1] == 0 && origin[2] == 0 && region[0] == getWidth() &&
region[1] == getHeight() && region[2] == getDepth())
? true
: false;
}
bool Image::validateRegion(const Coord3D& origin, const Coord3D& region) const {
return ((region[0] > 0) && (region[1] > 0) && (region[2] > 0) && (origin[0] < getWidth()) &&
(region[0] != 0) && (origin[1] < getHeight()) && (region[1] != 0) &&
(origin[2] < getDepth()) && (region[2] != 0) && ((origin[0] + region[0]) <= getWidth()) &&
((origin[1] + region[1]) <= getHeight()) && ((origin[2] + region[2]) <= getDepth()))
? true
: false;
}
bool Image::isRowSliceValid(size_t rowPitch, size_t slice, size_t width, size_t height) const {
size_t tmpHeight = (getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) ? 1 : height;
bool valid = (rowPitch == 0) ||
((rowPitch != 0) && (rowPitch >= width * getImageFormat().getElementSize()));
return ((slice == 0) || ((slice != 0) && (slice >= rowPitch * tmpHeight))) ? valid : false;
}
void Image::copyToBackingStore(void* initFrom) {
char* src;
char* dst = reinterpret_cast<char*>(getHostMem());
size_t cpySize = getWidth() * getImageFormat().getElementSize();
for (uint z = 0; z < getDepth(); ++z) {
src = reinterpret_cast<char*>(initFrom) + z * getSlicePitch();
for (uint y = 0; y < getHeight(); ++y) {
memcpy(dst, src, cpySize);
dst += cpySize;
src += getRowPitch();
}
}
impl_.rp_ = cpySize;
if (impl_.sp_ != 0) {
impl_.sp_ = impl_.rp_;
if (getDims() == 3) {
impl_.sp_ *= getHeight();
}
}
}
static int round_to_even(float v) {
// clamp overflow
if (v >= -(float)std::numeric_limits<int>::min()) {
return std::numeric_limits<int>::max();
}
if (v <= (float)std::numeric_limits<int>::min()) {
return std::numeric_limits<int>::min();
}
static const unsigned int magic[2] = {0x4b000000u, 0xcb000000u};
// round fractional values to integer value
if (fabsf(v) < *reinterpret_cast<const float*>(&magic[0])) {
float magicVal = *reinterpret_cast<const float*>(&magic[v < 0.0f]);
v += magicVal;
v -= magicVal;
}
return static_cast<int>(v);
}
static uint16_t float2half_rtz(float f) {
union {
float f;
cl_uint u;
} u = {f};
cl_uint sign = (u.u >> 16) & 0x8000;
float x = fabsf(f);
// Nan
if (x != x) {
u.u >>= (24 - 11);
u.u &= 0x7fff;
u.u |= 0x0200; // silence the NaN
return u.u | sign;
}
int values[5] = {0x47800000, 0x33800000, 0x38800000, 0x4b800000, 0x7f800000};
// overflow
if (x >= *reinterpret_cast<float*>(&values[0])) {
if (x == *reinterpret_cast<float*>(&values[4])) {
return 0x7c00 | sign;
}
return 0x7bff | sign;
}
// underflow
if (x < *reinterpret_cast<float*>(&values[1])) {
return sign; // The halfway case can return 0x0001 or 0. 0 is even.
}
// half denormal
if (x < *reinterpret_cast<float*>(&values[2])) {
x *= *reinterpret_cast<float*>(&values[3]);
return static_cast<uint16_t>((int)x | sign);
}
u.u &= 0xFFFFE000U;
u.u -= 0x38000000U;
return (u.u >> (24 - 11)) | sign;
}
void Image::Format::getChannelOrder(uint8_t* channelOrder) const {
enum { CH_ORDER_R = 0, CH_ORDER_G, CH_ORDER_B, CH_ORDER_A };
switch (image_channel_order) {
case CL_A:
channelOrder[0] = CH_ORDER_A;
break;
case CL_RA:
channelOrder[0] = CH_ORDER_R;
channelOrder[1] = CH_ORDER_A;
break;
case CL_BGRA:
channelOrder[0] = CH_ORDER_B;
channelOrder[1] = CH_ORDER_G;
channelOrder[2] = CH_ORDER_R;
channelOrder[3] = CH_ORDER_A;
break;
case CL_ARGB:
channelOrder[0] = CH_ORDER_A;
channelOrder[1] = CH_ORDER_R;
channelOrder[2] = CH_ORDER_G;
channelOrder[3] = CH_ORDER_B;
break;
default:
channelOrder[0] = CH_ORDER_R;
channelOrder[1] = CH_ORDER_G;
channelOrder[2] = CH_ORDER_B;
channelOrder[3] = CH_ORDER_A;
break;
}
}
// "colorRGBA" is a four component RGBA floating-point color value if the image
// channel data type is not an unnormalized signed and unsigned integer type,
// is a four component signed integer value if the image channel data type is
// an unnormalized signed integer type and is a four component unsigned integer
// value if the image channel data type is an unormalized unsigned integer type.
void Image::Format::formatColor(const void* colorRGBA, void* colorFormat) const {
union t565 {
struct {
uint16_t r_ : 5;
uint16_t g_ : 6;
uint16_t b_ : 5;
};
uint16_t rgba_;
};
union t555 {
struct {
uint16_t r_ : 5;
uint16_t g_ : 5;
uint16_t b_ : 5;
uint16_t a_ : 1;
};
uint16_t rgba_;
};
union t101010 {
struct {
uint32_t b_ : 10;
uint32_t g_ : 10;
uint32_t r_ : 10;
uint32_t a_ : 2;
};
uint32_t rgba_;
};
const float* colorRGBAf = reinterpret_cast<const float*>(colorRGBA);
const int32_t* colorRGBAi = reinterpret_cast<const int32_t*>(colorRGBA);
const uint32_t* colorRGBAui = reinterpret_cast<const uint32_t*>(colorRGBA);
size_t chCount = getNumChannels();
uint8_t chOrder[4];
getChannelOrder(chOrder);
bool allChannels = false;
for (size_t i = 0; i < chCount && !allChannels; ++i) {
switch (image_channel_data_type) {
case CL_SNORM_INT8: {
int8_t* color = reinterpret_cast<int8_t*>(colorFormat);
color[i] = round_to_even(INT8_MAX * colorRGBAf[chOrder[i]]);
} break;
case CL_SNORM_INT16: {
int16_t* color = reinterpret_cast<int16_t*>(colorFormat);
color[i] = round_to_even(INT16_MAX * colorRGBAf[chOrder[i]]);
} break;
case CL_UNORM_INT8: {
uint8_t* color = reinterpret_cast<uint8_t*>(colorFormat);
color[i] = round_to_even(UINT8_MAX * colorRGBAf[chOrder[i]]);
} break;
case CL_UNORM_INT16: {
uint16_t* color = reinterpret_cast<uint16_t*>(colorFormat);
color[i] = round_to_even(UINT16_MAX * colorRGBAf[chOrder[i]]);
} break;
case CL_UNORM_SHORT_565: {
t565* color = reinterpret_cast<t565*>(colorFormat);
color->r_ = round_to_even(0x1F * colorRGBAf[0]);
color->g_ = round_to_even(0x3F * colorRGBAf[1]);
color->b_ = round_to_even(0x1F * colorRGBAf[2]);
allChannels = true;
} break;
case CL_UNORM_SHORT_555: {
t555* color = reinterpret_cast<t555*>(colorFormat);
color->r_ = round_to_even(0x1F * colorRGBAf[0]);
color->g_ = round_to_even(0x1F * colorRGBAf[1]);
color->b_ = round_to_even(0x1F * colorRGBAf[2]);
color->a_ = round_to_even(colorRGBAf[3]);
allChannels = true;
} break;
case CL_UNORM_INT_101010: {
t101010* color = reinterpret_cast<t101010*>(colorFormat);
color->r_ = round_to_even(0x3FF * colorRGBAf[0]);
color->g_ = round_to_even(0x3FF * colorRGBAf[1]);
color->b_ = round_to_even(0x3FF * colorRGBAf[2]);
color->a_ = round_to_even(0x3 * colorRGBAf[3]);
allChannels = true;
} break;
case CL_SIGNED_INT8: {
int8_t* color = reinterpret_cast<int8_t*>(colorFormat);
color[i] = colorRGBAi[chOrder[i]];
} break;
case CL_SIGNED_INT16: {
int16_t* color = reinterpret_cast<int16_t*>(colorFormat);
color[i] = colorRGBAi[chOrder[i]];
} break;
case CL_SIGNED_INT32: {
int32_t* color = reinterpret_cast<int32_t*>(colorFormat);
color[i] = colorRGBAi[chOrder[i]];
} break;
case CL_UNSIGNED_INT8: {
uint8_t* color = reinterpret_cast<uint8_t*>(colorFormat);
color[i] = colorRGBAui[chOrder[i]];
} break;
case CL_UNSIGNED_INT16: {
uint16_t* color = reinterpret_cast<uint16_t*>(colorFormat);
color[i] = colorRGBAui[chOrder[i]];
} break;
case CL_UNSIGNED_INT32: {
uint32_t* color = reinterpret_cast<uint32_t*>(colorFormat);
color[i] = colorRGBAui[chOrder[i]];
} break;
case CL_HALF_FLOAT: {
uint16_t* color = reinterpret_cast<uint16_t*>(colorFormat);
color[i] = float2half_rtz(colorRGBAf[chOrder[i]]);
} break;
case CL_FLOAT: {
float* color = reinterpret_cast<float*>(colorFormat);
color[i] = colorRGBAf[chOrder[i]];
} break;
}
}
}
std::map<uintptr_t, uintptr_t> SvmBuffer::Allocated_;
Monitor SvmBuffer::AllocatedLock_("Guards SVM allocation list");
void SvmBuffer::Add(uintptr_t k, uintptr_t v) {
ScopedLock lock(AllocatedLock_);
Allocated_.insert(std::pair<uintptr_t, uintptr_t>(k, v));
}
void SvmBuffer::Remove(uintptr_t k) {
ScopedLock lock(AllocatedLock_);
Allocated_.erase(k);
}
bool SvmBuffer::Contains(uintptr_t ptr) {
ScopedLock lock(AllocatedLock_);
auto it = Allocated_.upper_bound(ptr);
if (it == Allocated_.begin()) {
return false;
}
--it;
return ptr >= it->first && ptr < it->second;
}
// The allocation flags are ignored for now.
void* SvmBuffer::malloc(Context& context, cl_svm_mem_flags flags, size_t size, size_t alignment) {
bool atomics = (flags & CL_MEM_SVM_ATOMICS) != 0;
void* ret = context.svmAlloc(size, alignment, flags);
if (ret == NULL) {
LogError("Unable to allocate aligned memory");
return NULL;
}
uintptr_t ret_u = reinterpret_cast<uintptr_t>(ret);
Add(ret_u, ret_u + size);
return ret;
}
void SvmBuffer::free(const Context& context, void* ptr) {
Remove(reinterpret_cast<uintptr_t>(ptr));
context.svmFree(ptr);
}
void SvmBuffer::memFill(void* dst, const void* src, size_t srcSize, size_t times) {
address dstAddress = reinterpret_cast<address>(dst);
const_address srcAddress = reinterpret_cast<const_address>(src);
for (size_t i = 0; i < times; i++) {
::memcpy(dstAddress + i * srcSize, srcAddress, srcSize);
}
}
bool SvmBuffer::malloced(const void* ptr) { return Contains(reinterpret_cast<uintptr_t>(ptr)); }
} // namespace amd