465c1c0287
SWDEV-102733 - [OCL-LC-ROCm] Cmake build Write CMakeLists.txt to enable building with and without the DK environment - Change the coding convention of the runtime files. Use Google's Style (https://google.github.io/styleguide/cppguide.html). Affected files ... ... //depot/stg/opencl/drivers/opencl/.clang-format#1 add ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_agent_amd.h#2 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_command.cpp#13 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_context.cpp#53 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_counter.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d10.cpp#15 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d11.cpp#22 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d9.cpp#32 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_debugger_amd.cpp#8 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_debugger_amd.h#7 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_device.cpp#61 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_event.cpp#10 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_execute.cpp#23 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_gl.cpp#53 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_icd.cpp#27 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_icd_amd.h#18 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_kernel.h#24 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_kernel_info_amd.cpp#3 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_kernel_info_amd.h#4 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_lqdflash_amd.cpp#17 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_lqdflash_amd.h#6 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_memobj.cpp#81 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_object.cpp#3 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_pipe.cpp#6 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_platform_amd.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_platform_amd.h#2 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_profile_amd.cpp#3 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_profile_amd.h#2 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_program.cpp#41 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_sampler.cpp#6 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_sdi_amd.cpp#3 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_sdi_amd.h#2 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_semaphore_amd.h#3 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_svm.cpp#20 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_thread_trace_amd.cpp#8 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_thread_trace_amd.h#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/appprofile.cpp#17 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/appprofile.hpp#12 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/blit.cpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/blit.hpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/blitcl.cpp#11 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpubinary.cpp#11 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpubinary.hpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpubuiltins.cpp#13 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpubuiltins.hpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpucommand.cpp#66 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpucommand.hpp#40 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpudevice.cpp#280 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpudevice.hpp#96 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpufeat.hpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpukernel.hpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpumapping.cpp#6 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpumapping.hpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpuprogram.cpp#70 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpuprogram.hpp#14 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpusettings.cpp#33 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpusettings.hpp#2 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cputables.hpp#5 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpuvirtual.cpp#26 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpuvirtual.hpp#13 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#209 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#284 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuappprofile.cpp#12 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuappprofile.hpp#7 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpubinary.cpp#58 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpubinary.hpp#27 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpublit.cpp#126 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpublit.hpp#41 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpucompiler.cpp#156 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuconstbuf.cpp#10 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuconstbuf.hpp#7 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpucounters.cpp#12 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpucounters.hpp#9 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudebugger.hpp#7 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudebugmanager.cpp#10 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudebugmanager.hpp#6 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudefs.hpp#147 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#567 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.hpp#163 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.cpp#318 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.hpp#126 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpumemory.cpp#131 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpumemory.hpp#50 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprintf.cpp#44 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprintf.hpp#15 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.cpp#232 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.hpp#69 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuresource.cpp#238 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuresource.hpp#87 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpusched.hpp#19 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuschedcl.cpp#35 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuscsi.cpp#37 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpusettings.cpp#350 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpusettings.hpp#98 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gputhreadtrace.cpp#9 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gputhreadtrace.hpp#7 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gputimestamp.cpp#27 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gputimestamp.hpp#16 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gputrap.hpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#410 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.hpp#140 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuwavelimiter.cpp#13 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuwavelimiter.hpp#9 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/hwdebug.cpp#7 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/hwdebug.hpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palappprofile.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palappprofile.hpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palbinary.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palbinary.hpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palblit.cpp#13 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palblit.hpp#5 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palcompiler.cpp#15 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palconstbuf.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palconstbuf.hpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palcounters.cpp#11 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palcounters.hpp#9 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldebugger.hpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldebugmanager.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldebugmanager.hpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldefs.hpp#16 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#45 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.hpp#16 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldeviced3d10.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldeviced3d11.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldeviced3d9.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevicegl.cpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#34 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.hpp#11 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palmemory.cpp#13 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palmemory.hpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprintf.cpp#5 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprintf.hpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.cpp#39 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.hpp#17 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.cpp#28 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.hpp#12 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsched.hpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palschedcl.cpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.cpp#24 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.hpp#10 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palthreadtrace.cpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palthreadtrace.hpp#5 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paltimestamp.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paltimestamp.hpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paltrap.hpp#2 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#48 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.hpp#21 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palwavelimiter.cpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palwavelimiter.hpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/mesa_glinterop.h#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocappprofile.cpp#6 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocappprofile.hpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocbinary.hpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocblit.cpp#17 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocblit.hpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/roccompiler.cpp#32 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/roccompilerlib.cpp#6 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/roccompilerlib.hpp#5 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdefs.hpp#10 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.cpp#48 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.hpp#20 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocglinterop.cpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocglinterop.hpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.cpp#22 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.hpp#16 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocmemory.cpp#15 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocmemory.hpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprintf.cpp#7 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprintf.hpp#5 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.cpp#64 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.hpp#23 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocregisters.hpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocsettings.cpp#17 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocsettings.hpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.cpp#34 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.hpp#10 edit ... //depot/stg/opencl/drivers/opencl/runtime/os/alloc.cpp#7 edit ... //depot/stg/opencl/drivers/opencl/runtime/os/alloc.hpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/os/os.cpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/os/os.hpp#30 edit ... //depot/stg/opencl/drivers/opencl/runtime/os/os_posix.cpp#42 edit ... //depot/stg/opencl/drivers/opencl/runtime/os/os_win32.cpp#47 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/agent.cpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/agent.hpp#6 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/command.cpp#78 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/command.hpp#83 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/commandqueue.cpp#23 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/commandqueue.hpp#18 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/context.cpp#42 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/context.hpp#26 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/counter.hpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/interop.hpp#12 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/kernel.cpp#23 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/kernel.hpp#18 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/memory.cpp#127 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/memory.hpp#100 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/ndrange.cpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/ndrange.hpp#9 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/object.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/object.hpp#17 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/perfctr.hpp#5 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/program.cpp#86 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/program.hpp#41 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/runtime.cpp#35 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/runtime.hpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/sampler.hpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/threadtrace.hpp#6 edit ... //depot/stg/opencl/drivers/opencl/runtime/thread/atomic.hpp#7 edit ... //depot/stg/opencl/drivers/opencl/runtime/thread/monitor.cpp#7 edit ... //depot/stg/opencl/drivers/opencl/runtime/thread/monitor.hpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/thread/semaphore.cpp#10 edit ... //depot/stg/opencl/drivers/opencl/runtime/thread/semaphore.hpp#7 edit ... //depot/stg/opencl/drivers/opencl/runtime/thread/thread.cpp#14 edit ... //depot/stg/opencl/drivers/opencl/runtime/thread/thread.hpp#15 edit ... //depot/stg/opencl/drivers/opencl/runtime/top.hpp#26 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/concurrent.hpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/debug.cpp#5 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/debug.hpp#7 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.cpp#16 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.hpp#271 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/macros.hpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/util.hpp#12 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/versions.hpp#2150 edit
1166 行
36 KiB
C++
1166 行
36 KiB
C++
//! Implementation of GPU device memory management
|
|
|
|
#include "top.hpp"
|
|
#include "thread/thread.hpp"
|
|
#include "thread/monitor.hpp"
|
|
#include "device/device.hpp"
|
|
#include "device/gpu/gpudevice.hpp"
|
|
#include "device/gpu/gpublit.hpp"
|
|
|
|
#ifdef _WIN32
|
|
#include <d3d10_1.h>
|
|
#include "amdocl/cl_d3d9_amd.hpp"
|
|
#include "amdocl/cl_d3d10_amd.hpp"
|
|
#include "amdocl/cl_d3d11_amd.hpp"
|
|
#endif //_WIN32
|
|
#include "amdocl/cl_gl_amd.hpp"
|
|
|
|
#include <string>
|
|
#include <fstream>
|
|
#include <sstream>
|
|
#include <iostream>
|
|
|
|
//! Turn this on to enable sanity checks before and after every heap operation.
|
|
#if DEBUG
|
|
#define EXTRA_HEAP_CHECKS 1
|
|
#endif // DEBUG
|
|
|
|
namespace gpu {
|
|
|
|
Memory::Memory(const Device& gpuDev, amd::Memory& owner, size_t size)
|
|
: device::Memory(owner),
|
|
Resource(gpuDev, size / Device::Heap::ElementSize, Device::Heap::ElementType) {
|
|
init();
|
|
|
|
if (owner.parent() != NULL) {
|
|
flags_ |= SubMemoryObject;
|
|
}
|
|
}
|
|
|
|
Memory::Memory(const Device& gpuDev, size_t size)
|
|
: device::Memory(size),
|
|
Resource(gpuDev, amd::alignUp(size, Device::Heap::ElementSize) / Device::Heap::ElementSize,
|
|
Device::Heap::ElementType) {
|
|
init();
|
|
}
|
|
|
|
Memory::Memory(const Device& gpuDev, amd::Memory& owner, size_t width, cmSurfFmt format)
|
|
: device::Memory(owner), Resource(gpuDev, width, format) {
|
|
init();
|
|
|
|
if (owner.parent() != NULL) {
|
|
flags_ |= SubMemoryObject;
|
|
}
|
|
}
|
|
|
|
Memory::Memory(const Device& gpuDev, size_t size, size_t width, cmSurfFmt format)
|
|
: device::Memory(size), Resource(gpuDev, width, format) {
|
|
init();
|
|
}
|
|
|
|
Memory::Memory(const Device& gpuDev, amd::Memory& owner, size_t width, size_t height, size_t depth,
|
|
cmSurfFmt format, gslChannelOrder chOrder, cl_mem_object_type imageType,
|
|
uint mipLevels)
|
|
: device::Memory(owner),
|
|
Resource(gpuDev, width, height, depth, format, chOrder, imageType, mipLevels) {
|
|
init();
|
|
|
|
if (owner.parent() != NULL) {
|
|
flags_ |= SubMemoryObject;
|
|
}
|
|
}
|
|
|
|
Memory::Memory(const Device& gpuDev, size_t size, size_t width, size_t height, size_t depth,
|
|
cmSurfFmt format, gslChannelOrder chOrder, cl_mem_object_type imageType,
|
|
uint mipLevels)
|
|
: device::Memory(size),
|
|
Resource(gpuDev, width, height, depth, format, chOrder, imageType, mipLevels) {
|
|
init();
|
|
}
|
|
|
|
void Memory::init() {
|
|
indirectMapCount_ = 0;
|
|
interopType_ = InteropNone;
|
|
interopMemory_ = NULL;
|
|
pinnedMemory_ = NULL;
|
|
parent_ = NULL;
|
|
}
|
|
|
|
#ifdef _WIN32
|
|
static HANDLE getSharedHandle(IUnknown* pIface) {
|
|
// Sanity checks
|
|
assert(pIface != NULL);
|
|
|
|
HRESULT hRes;
|
|
HANDLE hShared;
|
|
IDXGIResource* pDxgiRes = NULL;
|
|
if ((hRes = (const_cast<IUnknown*>(pIface))
|
|
->QueryInterface(__uuidof(IDXGIResource), (void**)&pDxgiRes)) != S_OK) {
|
|
return (HANDLE)0;
|
|
}
|
|
if (!pDxgiRes) {
|
|
return (HANDLE)0;
|
|
}
|
|
hRes = pDxgiRes->GetSharedHandle(&hShared);
|
|
pDxgiRes->Release();
|
|
if (hRes != S_OK) {
|
|
return (HANDLE)0;
|
|
}
|
|
return hShared;
|
|
}
|
|
#endif //_WIN32
|
|
|
|
bool Memory::create(Resource::MemoryType memType, Resource::CreateParams* params) {
|
|
bool result;
|
|
|
|
// Reset the flag in case we reallocate the heap in local/remote
|
|
flags_ &= ~HostMemoryDirectAccess;
|
|
|
|
// Create a resource in CAL
|
|
result = Resource::create(memType, params);
|
|
|
|
// Check if CAL created a resource
|
|
if (result) {
|
|
switch (memoryType()) {
|
|
case Resource::Pinned:
|
|
case Resource::ExternalPhysical:
|
|
// Marks memory object for direct GPU access to the host memory
|
|
flags_ |= HostMemoryDirectAccess;
|
|
break;
|
|
case Resource::Remote:
|
|
case Resource::RemoteUSWC:
|
|
if (!cal()->tiled_) {
|
|
// Marks memory object for direct GPU access to the host memory
|
|
flags_ |= HostMemoryDirectAccess;
|
|
}
|
|
break;
|
|
case Resource::View: {
|
|
Resource::ViewParams* view = reinterpret_cast<Resource::ViewParams*>(params);
|
|
if (view->resource_->memoryType() == Resource::Persistent) {
|
|
flags_ |= HostMemoryDirectAccess;
|
|
}
|
|
// Check if parent was allocated in system memory
|
|
if ((view->resource_->memoryType() == Resource::Pinned) ||
|
|
(((view->resource_->memoryType() == Resource::Remote) ||
|
|
(view->resource_->memoryType() == Resource::RemoteUSWC)) &&
|
|
// @todo Enable unconditional optimization for remote memory
|
|
// Check for external allocation, to avoid the optimization
|
|
// for non-VM (double copy) mode
|
|
(owner() != NULL) &&
|
|
((owner()->getMemFlags() & CL_MEM_ALLOC_HOST_PTR) || dev().settings().remoteAlloc_))) {
|
|
// Marks memory object for direct GPU access to the host memory
|
|
flags_ |= HostMemoryDirectAccess;
|
|
}
|
|
if ((view->owner_ != NULL) && (view->owner_->parent() != NULL)) {
|
|
parent_ = reinterpret_cast<const Memory*>(view->memory_);
|
|
flags_ |= SubMemoryObject;
|
|
}
|
|
break;
|
|
}
|
|
case Resource::ImageView: {
|
|
Resource::ImageViewParams* view = reinterpret_cast<Resource::ImageViewParams*>(params);
|
|
parent_ = reinterpret_cast<const Memory*>(view->memory_);
|
|
flags_ |= SubMemoryObject | (parent_->flags_ & HostMemoryDirectAccess);
|
|
break;
|
|
}
|
|
case Resource::ImageBuffer: {
|
|
Resource::ImageBufferParams* view = reinterpret_cast<Resource::ImageBufferParams*>(params);
|
|
parent_ = reinterpret_cast<const Memory*>(view->memory_);
|
|
flags_ |= SubMemoryObject | (parent_->flags_ & HostMemoryDirectAccess);
|
|
break;
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
bool Memory::processGLResource(GLResourceOP operation) {
|
|
bool retVal = false;
|
|
switch (operation) {
|
|
case GLDecompressResource:
|
|
retVal = gslGLAcquire();
|
|
break;
|
|
case GLInvalidateFBO:
|
|
retVal = gslGLRelease();
|
|
break;
|
|
default:
|
|
assert(false && "unknown GLResourceOP");
|
|
}
|
|
return retVal;
|
|
}
|
|
|
|
|
|
bool Memory::createInterop(InteropType type) {
|
|
Resource::MemoryType memType = Resource::Empty;
|
|
Resource::OGLInteropParams oglRes;
|
|
#ifdef _WIN32
|
|
Resource::D3DInteropParams d3dRes;
|
|
#endif //_WIN32
|
|
|
|
// Only external objects support interop
|
|
assert(owner() != NULL);
|
|
|
|
Resource::CreateParams* createParams = NULL;
|
|
|
|
amd::InteropObject* interop = owner()->getInteropObj();
|
|
assert((interop != NULL) && "An invalid interop object is impossible!");
|
|
|
|
amd::GLObject* glObject = interop->asGLObject();
|
|
|
|
#ifdef _WIN32
|
|
amd::D3D10Object* d3d10Object = interop->asD3D10Object();
|
|
amd::D3D11Object* d3d11Object = interop->asD3D11Object();
|
|
amd::D3D9Object* d3d9Object = interop->asD3D9Object();
|
|
|
|
if (d3d10Object != NULL) {
|
|
createParams = &d3dRes;
|
|
|
|
d3dRes.owner_ = owner();
|
|
|
|
const amd::D3D10ObjDesc_t* objDesc = d3d10Object->getObjDesc();
|
|
|
|
memType = Resource::D3D10Interop;
|
|
|
|
// Get shared handle
|
|
if ((d3dRes.handle_ = getSharedHandle(d3d10Object->getD3D10Resource()))) {
|
|
d3dRes.iDirect3D_ = static_cast<void*>(d3d10Object->getD3D10Resource());
|
|
d3dRes.type_ = Resource::InteropTypeless;
|
|
}
|
|
|
|
d3dRes.misc = 0;
|
|
// Find D3D10 object type
|
|
switch (objDesc->objDim_) {
|
|
case D3D10_RESOURCE_DIMENSION_BUFFER:
|
|
d3dRes.type_ = Resource::InteropVertexBuffer;
|
|
break;
|
|
case D3D10_RESOURCE_DIMENSION_TEXTURE1D:
|
|
case D3D10_RESOURCE_DIMENSION_TEXTURE2D:
|
|
case D3D10_RESOURCE_DIMENSION_TEXTURE3D:
|
|
d3dRes.type_ = Resource::InteropTexture;
|
|
if (objDesc->mipLevels_ > 1) {
|
|
d3dRes.type_ = Resource::InteropTextureViewLevel;
|
|
|
|
if (objDesc->arraySize_ > 1) {
|
|
d3dRes.layer_ = d3d10Object->getSubresource() / objDesc->mipLevels_;
|
|
d3dRes.mipLevel_ = d3d10Object->getSubresource() % objDesc->mipLevels_;
|
|
} else {
|
|
d3dRes.layer_ = 0;
|
|
d3dRes.mipLevel_ = d3d10Object->getSubresource();
|
|
}
|
|
}
|
|
break;
|
|
default:
|
|
return false;
|
|
break;
|
|
}
|
|
} else if (d3d11Object != NULL) {
|
|
createParams = &d3dRes;
|
|
|
|
d3dRes.owner_ = owner();
|
|
|
|
const amd::D3D11ObjDesc_t* objDesc = d3d11Object->getObjDesc();
|
|
|
|
memType = Resource::D3D11Interop;
|
|
|
|
// Get shared handle
|
|
if ((d3dRes.handle_ = getSharedHandle(d3d11Object->getD3D11Resource()))) {
|
|
d3dRes.iDirect3D_ = static_cast<void*>(d3d11Object->getD3D11Resource());
|
|
d3dRes.type_ = Resource::InteropTypeless;
|
|
}
|
|
|
|
d3dRes.misc = 0;
|
|
// Find D3D11 object type
|
|
switch (objDesc->objDim_) {
|
|
case D3D11_RESOURCE_DIMENSION_BUFFER:
|
|
d3dRes.type_ = Resource::InteropVertexBuffer;
|
|
break;
|
|
case D3D11_RESOURCE_DIMENSION_TEXTURE1D:
|
|
case D3D11_RESOURCE_DIMENSION_TEXTURE2D:
|
|
case D3D11_RESOURCE_DIMENSION_TEXTURE3D:
|
|
d3dRes.type_ = Resource::InteropTexture;
|
|
d3dRes.layer_ = d3d11Object->getPlane();
|
|
d3dRes.misc = d3d11Object->getMiscFlag();
|
|
if (objDesc->mipLevels_ > 1) {
|
|
d3dRes.type_ = Resource::InteropTextureViewLevel;
|
|
|
|
if (objDesc->arraySize_ > 1) {
|
|
d3dRes.layer_ = d3d11Object->getSubresource() / objDesc->mipLevels_;
|
|
d3dRes.mipLevel_ = d3d11Object->getSubresource() % objDesc->mipLevels_;
|
|
} else {
|
|
d3dRes.layer_ = 0;
|
|
d3dRes.mipLevel_ = d3d11Object->getSubresource();
|
|
}
|
|
}
|
|
break;
|
|
default:
|
|
return false;
|
|
break;
|
|
}
|
|
} else if (d3d9Object != NULL) {
|
|
createParams = &d3dRes;
|
|
|
|
d3dRes.owner_ = owner();
|
|
|
|
const amd::D3D9ObjDesc_t* objDesc = d3d9Object->getObjDesc();
|
|
|
|
memType = Resource::D3D9Interop;
|
|
|
|
// Get shared handle
|
|
if ((d3dRes.handle_ = d3d9Object->getD3D9SharedHandle())) {
|
|
d3dRes.iDirect3D_ = static_cast<void*>(d3d9Object->getD3D9Resource());
|
|
d3dRes.type_ = Resource::InteropSurface;
|
|
d3dRes.mipLevel_ = 0;
|
|
d3dRes.layer_ = d3d9Object->getPlane();
|
|
d3dRes.misc = d3d9Object->getMiscFlag();
|
|
}
|
|
} else
|
|
#endif //_WIN32
|
|
if (glObject != NULL) {
|
|
createParams = &oglRes;
|
|
|
|
oglRes.owner_ = owner();
|
|
|
|
memType = Resource::OGLInterop;
|
|
|
|
// Fill the interop creation parameters
|
|
oglRes.handle_ = static_cast<CALuint>(glObject->getGLName());
|
|
|
|
// Find OGL object type
|
|
switch (glObject->getCLGLObjectType()) {
|
|
case CL_GL_OBJECT_BUFFER:
|
|
oglRes.type_ = Resource::InteropVertexBuffer;
|
|
break;
|
|
case CL_GL_OBJECT_TEXTURE_BUFFER:
|
|
case CL_GL_OBJECT_TEXTURE1D:
|
|
case CL_GL_OBJECT_TEXTURE1D_ARRAY:
|
|
case CL_GL_OBJECT_TEXTURE2D:
|
|
case CL_GL_OBJECT_TEXTURE2D_ARRAY:
|
|
case CL_GL_OBJECT_TEXTURE3D:
|
|
oglRes.type_ = Resource::InteropTexture;
|
|
if (GL_TEXTURE_CUBE_MAP == glObject->getGLTarget()) {
|
|
switch (glObject->getCubemapFace()) {
|
|
case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
|
|
case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
|
|
case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
|
|
case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
|
|
case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
|
|
case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
|
|
oglRes.type_ = Resource::InteropTextureViewCube;
|
|
oglRes.layer_ = glObject->getCubemapFace() - GL_TEXTURE_CUBE_MAP_POSITIVE_X;
|
|
oglRes.mipLevel_ = glObject->getGLMipLevel();
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
} else if (glObject->getGLMipLevel() != 0) {
|
|
oglRes.type_ = Resource::InteropTextureViewLevel;
|
|
oglRes.layer_ = 0;
|
|
oglRes.mipLevel_ = glObject->getGLMipLevel();
|
|
}
|
|
break;
|
|
case CL_GL_OBJECT_RENDERBUFFER:
|
|
oglRes.type_ = Resource::InteropRenderBuffer;
|
|
break;
|
|
default:
|
|
return false;
|
|
break;
|
|
}
|
|
|
|
oglRes.glPlatformContext_ = owner()->getContext().info().hCtx_;
|
|
oglRes.glDeviceContext_ =
|
|
owner()->getContext().info().hDev_[amd::Context::DeviceFlagIdx::GLDeviceKhrIdx];
|
|
// We dont pass any flags here for the GL Resource.
|
|
oglRes.flags_ = 0;
|
|
} else {
|
|
return false;
|
|
}
|
|
|
|
// Get the interop settings
|
|
if (type == InteropDirectAccess) {
|
|
// Create memory object
|
|
if (!create(memType, createParams)) {
|
|
return false;
|
|
}
|
|
} else {
|
|
// Allocate Resource object for interop as buffer
|
|
interopMemory_ = new Memory(
|
|
dev(), size(), amd::alignUp(size(), Device::Heap::ElementSize) / Device::Heap::ElementSize,
|
|
Device::Heap::ElementType);
|
|
|
|
// Create the interop object in CAL
|
|
if (NULL == interopMemory_ || !interopMemory_->create(memType, createParams)) {
|
|
delete interopMemory_;
|
|
interopMemory_ = NULL;
|
|
return false;
|
|
}
|
|
}
|
|
|
|
setInteropType(type);
|
|
|
|
return true;
|
|
}
|
|
|
|
Memory::~Memory() {
|
|
// Clean VA cache
|
|
dev().removeVACache(this);
|
|
|
|
delete interopMemory_;
|
|
|
|
// Release associated map target, if any
|
|
if (NULL != mapMemory_) {
|
|
mapMemory()->unmap(NULL);
|
|
mapMemory_->release();
|
|
}
|
|
|
|
// Destory pinned memory
|
|
if (flags_ & PinnedMemoryAlloced) {
|
|
delete pinnedMemory_;
|
|
}
|
|
|
|
if ((owner() != NULL) && isHostMemDirectAccess() && !(flags_ & SubMemoryObject) &&
|
|
(memoryType() != Resource::ExternalPhysical)) {
|
|
// Unmap memory if direct access was requested
|
|
unmap(NULL);
|
|
}
|
|
}
|
|
|
|
void Memory::syncCacheFromHost(VirtualGPU& gpu, device::Memory::SyncFlags syncFlags) {
|
|
// If the last writer was another GPU, then make a writeback
|
|
if (!isHostMemDirectAccess() && (owner()->getLastWriter() != NULL) &&
|
|
(&dev() != owner()->getLastWriter())) {
|
|
mgpuCacheWriteBack();
|
|
}
|
|
|
|
// If host memory doesn't have direct access, then we have to synchronize
|
|
if (!isHostMemDirectAccess() && (NULL != owner()->getHostMem())) {
|
|
bool hasUpdates = true;
|
|
|
|
// Make sure the parent of subbuffer is up to date
|
|
if (!syncFlags.skipParent_ && (flags_ & SubMemoryObject)) {
|
|
gpu::Memory* gpuMemory = dev().getGpuMemory(owner()->parent());
|
|
|
|
//! \note: Skipping the sync for a view doesn't reflect the parent settings,
|
|
//! since a view is a small portion of parent
|
|
device::Memory::SyncFlags syncFlagsTmp;
|
|
|
|
// Sync parent from a view, so views have to be skipped
|
|
syncFlagsTmp.skipViews_ = true;
|
|
|
|
// Make sure the parent sync is an unique operation.
|
|
// If the app uses multiple subbuffers from multiple queues,
|
|
// then the parent sync can be called from multiple threads
|
|
amd::ScopedLock lock(owner()->parent()->lockMemoryOps());
|
|
gpuMemory->syncCacheFromHost(gpu, syncFlagsTmp);
|
|
//! \note Don't do early exit here, since we still have to sync
|
|
//! this view, if the parent sync operation was a NOP.
|
|
//! If parent was synchronized, then this view sync will be a NOP
|
|
}
|
|
|
|
// Is this a NOP?
|
|
if ((version_ == owner()->getVersion()) || (&dev() == owner()->getLastWriter())) {
|
|
hasUpdates = false;
|
|
}
|
|
|
|
// Update all available views, since we sync the parent
|
|
if ((owner()->subBuffers().size() != 0) && (hasUpdates || !syncFlags.skipViews_)) {
|
|
device::Memory::SyncFlags syncFlagsTmp;
|
|
|
|
// Sync views from parent, so parent has to be skipped
|
|
syncFlagsTmp.skipParent_ = true;
|
|
|
|
if (hasUpdates) {
|
|
// Parent will be synced so update all views with a skip
|
|
syncFlagsTmp.skipEntire_ = true;
|
|
} else {
|
|
// Passthrough the skip entire flag to the views, since
|
|
// any view is a submemory of the parent
|
|
syncFlagsTmp.skipEntire_ = syncFlags.skipEntire_;
|
|
}
|
|
|
|
amd::ScopedLock lock(owner()->lockMemoryOps());
|
|
for (auto& sub : owner()->subBuffers()) {
|
|
//! \note Don't allow subbuffer's allocation in the worker thread.
|
|
//! It may cause a system lock, because possible resource
|
|
//! destruction, heap reallocation or subbuffer allocation
|
|
static const bool AllocSubBuffer = false;
|
|
device::Memory* devSub = sub->getDeviceMemory(dev(), AllocSubBuffer);
|
|
if (NULL != devSub) {
|
|
gpu::Memory* gpuSub = reinterpret_cast<gpu::Memory*>(devSub);
|
|
gpuSub->syncCacheFromHost(gpu, syncFlagsTmp);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Make sure we didn't have a NOP,
|
|
// because this GPU device was the last writer
|
|
if (&dev() != owner()->getLastWriter()) {
|
|
// Update the latest version
|
|
version_ = owner()->getVersion();
|
|
}
|
|
|
|
// Exit if sync is a NOP or sync can be skipped
|
|
if (!hasUpdates || syncFlags.skipEntire_) {
|
|
return;
|
|
}
|
|
|
|
bool result = false;
|
|
static const bool Entire = true;
|
|
amd::Coord3D origin(0, 0, 0);
|
|
|
|
// If host memory was pinned then make a transfer
|
|
if (flags_ & PinnedMemoryAlloced) {
|
|
if (cal()->buffer_) {
|
|
amd::Coord3D region(owner()->getSize());
|
|
result = gpu.blitMgr().copyBuffer(*pinnedMemory_, *this, origin, origin, region, Entire);
|
|
} else {
|
|
amd::Image& image = *static_cast<amd::Image*>(owner());
|
|
result = gpu.blitMgr().copyBufferToImage(*pinnedMemory_, *this, origin, origin,
|
|
image.getRegion(), Entire, image.getRowPitch(),
|
|
image.getSlicePitch());
|
|
}
|
|
}
|
|
|
|
if (!result) {
|
|
if (cal()->buffer_) {
|
|
amd::Coord3D region(owner()->getSize());
|
|
result = gpu.blitMgr().writeBuffer(owner()->getHostMem(), *this, origin, region, Entire);
|
|
} else {
|
|
amd::Image& image = *static_cast<amd::Image*>(owner());
|
|
result = gpu.blitMgr().writeImage(owner()->getHostMem(), *this, origin, image.getRegion(),
|
|
image.getRowPitch(), image.getSlicePitch(), Entire);
|
|
}
|
|
}
|
|
|
|
//!@todo A wait isn't really necessary. However
|
|
//! Linux no-VM may have extra random failures.
|
|
wait(gpu);
|
|
|
|
// Should never fail
|
|
assert(result && "Memory synchronization failed!");
|
|
}
|
|
}
|
|
|
|
void Memory::syncHostFromCache(device::Memory::SyncFlags syncFlags) {
|
|
// Sanity checks
|
|
assert(owner() != NULL);
|
|
|
|
// If host memory doesn't have direct access, then we have to synchronize
|
|
if (!isHostMemDirectAccess()) {
|
|
bool hasUpdates = true;
|
|
|
|
// Make sure the parent of subbuffer is up to date
|
|
if (!syncFlags.skipParent_ && (flags_ & SubMemoryObject)) {
|
|
device::Memory* m = owner()->parent()->getDeviceMemory(dev());
|
|
|
|
//! \note: Skipping the sync for a view doesn't reflect the parent settings,
|
|
//! since a view is a small portion of parent
|
|
device::Memory::SyncFlags syncFlagsTmp;
|
|
|
|
// Sync parent from a view, so views have to be skipped
|
|
syncFlagsTmp.skipViews_ = true;
|
|
|
|
// Make sure the parent sync is an unique operation.
|
|
// If the app uses multiple subbuffers from multiple queues,
|
|
// then the parent sync can be called from multiple threads
|
|
amd::ScopedLock lock(owner()->parent()->lockMemoryOps());
|
|
m->syncHostFromCache(syncFlagsTmp);
|
|
//! \note Don't do early exit here, since we still have to sync
|
|
//! this view, if the parent sync operation was a NOP.
|
|
//! If parent was synchronized, then this view sync will be a NOP
|
|
}
|
|
|
|
// Is this a NOP?
|
|
if ((NULL == owner()->getLastWriter()) || (version_ == owner()->getVersion())) {
|
|
hasUpdates = false;
|
|
}
|
|
|
|
// Update all available views, since we sync the parent
|
|
if ((owner()->subBuffers().size() != 0) && (hasUpdates || !syncFlags.skipViews_)) {
|
|
device::Memory::SyncFlags syncFlagsTmp;
|
|
|
|
// Sync views from parent, so parent has to be skipped
|
|
syncFlagsTmp.skipParent_ = true;
|
|
|
|
if (hasUpdates) {
|
|
// Parent will be synced so update all views with a skip
|
|
syncFlagsTmp.skipEntire_ = true;
|
|
} else {
|
|
// Passthrough the skip entire flag to the views, since
|
|
// any view is a submemory of the parent
|
|
syncFlagsTmp.skipEntire_ = syncFlags.skipEntire_;
|
|
}
|
|
|
|
amd::ScopedLock lock(owner()->lockMemoryOps());
|
|
for (auto& sub : owner()->subBuffers()) {
|
|
//! \note Don't allow subbuffer's allocation in the worker thread.
|
|
//! It may cause a system lock, because possible resource
|
|
//! destruction, heap reallocation or subbuffer allocation
|
|
static const bool AllocSubBuffer = false;
|
|
device::Memory* devSub = sub->getDeviceMemory(dev(), AllocSubBuffer);
|
|
if (NULL != devSub) {
|
|
gpu::Memory* gpuSub = reinterpret_cast<gpu::Memory*>(devSub);
|
|
gpuSub->syncHostFromCache(syncFlagsTmp);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Make sure we didn't have a NOP,
|
|
// because CPU was the last writer
|
|
if (NULL != owner()->getLastWriter()) {
|
|
// Mark parent as up to date, set our version accordingly
|
|
version_ = owner()->getVersion();
|
|
}
|
|
|
|
// Exit if sync is a NOP or sync can be skipped
|
|
if (!hasUpdates || syncFlags.skipEntire_) {
|
|
return;
|
|
}
|
|
|
|
bool result = false;
|
|
static const bool Entire = true;
|
|
amd::Coord3D origin(0, 0, 0);
|
|
|
|
// If backing store was pinned then make a transfer
|
|
if (flags_ & PinnedMemoryAlloced) {
|
|
if (cal()->buffer_) {
|
|
amd::Coord3D region(owner()->getSize());
|
|
result = dev().xferMgr().copyBuffer(*this, *pinnedMemory_, origin, origin, region, Entire);
|
|
} else {
|
|
amd::Image& image = *static_cast<amd::Image*>(owner());
|
|
result = dev().xferMgr().copyImageToBuffer(*this, *pinnedMemory_, origin, origin,
|
|
image.getRegion(), Entire, image.getRowPitch(),
|
|
image.getSlicePitch());
|
|
}
|
|
}
|
|
|
|
// Just do a basic host read
|
|
if (!result) {
|
|
if (cal()->buffer_) {
|
|
amd::Coord3D region(owner()->getSize());
|
|
result = dev().xferMgr().readBuffer(*this, owner()->getHostMem(), origin, region, Entire);
|
|
} else {
|
|
amd::Image& image = *static_cast<amd::Image*>(owner());
|
|
result = dev().xferMgr().readImage(*this, owner()->getHostMem(), origin, image.getRegion(),
|
|
image.getRowPitch(), image.getSlicePitch(), Entire);
|
|
}
|
|
}
|
|
|
|
// Should never fail
|
|
assert(result && "Memory synchronization failed!");
|
|
}
|
|
}
|
|
|
|
gpu::Memory* Memory::createBufferView(amd::Memory& subBufferOwner) {
|
|
gpu::Memory* viewMemory;
|
|
Resource::ViewParams params;
|
|
|
|
size_t offset = subBufferOwner.getOrigin();
|
|
size_t size = subBufferOwner.getSize();
|
|
|
|
// Create a memory object
|
|
viewMemory = new gpu::Memory(dev(), subBufferOwner, size);
|
|
if (NULL == viewMemory) {
|
|
return NULL;
|
|
}
|
|
|
|
params.owner_ = &subBufferOwner;
|
|
params.gpu_ = static_cast<VirtualGPU*>(subBufferOwner.getVirtualDevice());
|
|
params.offset_ = offset;
|
|
params.size_ = size;
|
|
params.resource_ = this;
|
|
params.memory_ = this;
|
|
if (!viewMemory->create(Resource::View, ¶ms)) {
|
|
delete viewMemory;
|
|
return NULL;
|
|
}
|
|
|
|
// Explicitly set the host memory location,
|
|
// because the parent location could change after reallocation
|
|
if (NULL != owner()->getHostMem()) {
|
|
subBufferOwner.setHostMem(reinterpret_cast<char*>(owner()->getHostMem()) + offset);
|
|
} else {
|
|
subBufferOwner.setHostMem(NULL);
|
|
}
|
|
|
|
return viewMemory;
|
|
}
|
|
|
|
void Memory::decIndMapCount() {
|
|
// Map/unmap must be serialized
|
|
amd::ScopedLock lock(owner()->lockMemoryOps());
|
|
|
|
if (indirectMapCount_ == 0) {
|
|
if (!mipMapped()) {
|
|
LogError("decIndMapCount() called when indirectMapCount_ already zero");
|
|
}
|
|
return;
|
|
}
|
|
|
|
// Decrement the counter and release indirect map if it's the last op
|
|
if (--indirectMapCount_ == 0) {
|
|
if (NULL != mapMemory_) {
|
|
amd::Memory* memory = mapMemory_;
|
|
amd::Memory* empty = NULL;
|
|
|
|
// Get GPU memory
|
|
Memory* gpuMemory = mapMemory();
|
|
gpuMemory->unmap(NULL);
|
|
|
|
if (!dev().addMapTarget(memory)) {
|
|
memory->release();
|
|
}
|
|
|
|
// Map/unamp is serialized for the same memory object,
|
|
// so it's safe to clear the pointer
|
|
assert((mapMemory_ != NULL) && "Mapped buffer should be valid");
|
|
mapMemory_ = NULL;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Note - must be called by the device under the async lock, so no spinning
|
|
// or long pauses allowed in this function.
|
|
void* Memory::allocMapTarget(const amd::Coord3D& origin, const amd::Coord3D& region, uint mapFlags,
|
|
size_t* rowPitch, size_t* slicePitch) {
|
|
// Sanity checks
|
|
assert(owner() != NULL);
|
|
|
|
// Map/unmap must be serialized
|
|
amd::ScopedLock lock(owner()->lockMemoryOps());
|
|
|
|
address mapAddress = NULL;
|
|
size_t offset = origin[0];
|
|
|
|
// For SVM implementation, we cannot use cached map. if svm space, use the svm host pointer
|
|
void* initHostPtr = owner()->getSvmPtr();
|
|
if (NULL != initHostPtr) {
|
|
owner()->commitSvmMemory();
|
|
}
|
|
|
|
if (owner()->numDevices() > 1) {
|
|
if ((NULL == initHostPtr) && (owner()->getHostMem() == NULL)) {
|
|
static const bool forceAllocHostMem = true;
|
|
if (!owner()->allocHostMemory(NULL, forceAllocHostMem)) {
|
|
return NULL;
|
|
}
|
|
//! \note Ignore pinning result
|
|
// bool ok = pinSystemMemory(owner()->getHostMem(), owner()->getSize());
|
|
}
|
|
}
|
|
|
|
incIndMapCount();
|
|
// If host memory exists, use it
|
|
if ((owner()->getHostMem() != NULL) && isDirectMap()) {
|
|
mapAddress = reinterpret_cast<address>(owner()->getHostMem());
|
|
}
|
|
// If resource is a persistent allocation, we can use it directly
|
|
else if (isPersistentDirectMap()) {
|
|
if (NULL == map(NULL)) {
|
|
LogError("Could not map target persistent resource");
|
|
decIndMapCount();
|
|
return NULL;
|
|
}
|
|
mapAddress = data();
|
|
}
|
|
// Otherwise we can use a remote resource:
|
|
else {
|
|
// Are we in range?
|
|
size_t elementCount = cal()->width_;
|
|
size_t rSize = elementCount * elementSize();
|
|
if (offset >= rSize || offset + region[0] > rSize) {
|
|
LogWarning("Memory::allocMapTarget() - offset/size out of bounds");
|
|
return NULL;
|
|
}
|
|
|
|
// Allocate a map resource if there isn't any yet
|
|
if (indirectMapCount_ == 1) {
|
|
const static bool SysMem = true;
|
|
bool failed = false;
|
|
amd::Memory* memory = NULL;
|
|
// Search for a possible indirect resource
|
|
cl_mem_flags flag = 0;
|
|
bool canBeCached = true;
|
|
if (NULL != initHostPtr) {
|
|
// make sure the host memory is committed already, or we have a big problem.
|
|
assert(owner()->isSvmPtrCommited() && "The host svm memory not committed yet!");
|
|
flag = CL_MEM_USE_HOST_PTR;
|
|
canBeCached = false;
|
|
} else {
|
|
memory = dev().findMapTarget(owner()->getSize());
|
|
}
|
|
|
|
if (memory == NULL) {
|
|
// for map target of svm buffer , we need use svm host ptr
|
|
memory = new (dev().context()) amd::Buffer(dev().context(), flag, owner()->getSize());
|
|
Memory* gpuMemory;
|
|
|
|
do {
|
|
if ((memory == NULL) || !memory->create(initHostPtr, SysMem)) {
|
|
failed = true;
|
|
break;
|
|
}
|
|
memory->setCacheStatus(canBeCached);
|
|
|
|
gpuMemory = reinterpret_cast<Memory*>(memory->getDeviceMemory(dev()));
|
|
|
|
// Create, Map and get the base pointer for the resource
|
|
if ((gpuMemory == NULL) || (NULL == gpuMemory->map(NULL))) {
|
|
failed = true;
|
|
break;
|
|
}
|
|
} while (false);
|
|
}
|
|
|
|
if (failed) {
|
|
if (memory != NULL) {
|
|
memory->release();
|
|
}
|
|
decIndMapCount();
|
|
LogError("Could not map target resource");
|
|
return NULL;
|
|
}
|
|
|
|
// Map/unamp is serialized for the same memory object,
|
|
// so it's safe to assign the new pointer
|
|
assert((mapMemory_ == NULL) && "Mapped buffer can't be valid");
|
|
mapMemory_ = memory;
|
|
} else {
|
|
// Did the map resource allocation fail?
|
|
if (mapMemory_ == NULL) {
|
|
LogError("Could not map target resource");
|
|
return NULL;
|
|
}
|
|
}
|
|
mapAddress = mapMemory()->data();
|
|
}
|
|
|
|
return mapAddress + offset;
|
|
}
|
|
|
|
bool Memory::pinSystemMemory(void* hostPtr, size_t size) {
|
|
bool result = false;
|
|
|
|
// If memory has a direct access already, then skip the host memory pinning
|
|
if (isHostMemDirectAccess()) {
|
|
return true;
|
|
}
|
|
|
|
// Check if memory is pinned already
|
|
if (flags_ & PinnedMemoryAlloced) {
|
|
return true;
|
|
}
|
|
|
|
// Allocate memory for the pinned object
|
|
pinnedMemory_ = new Memory(dev(), size);
|
|
|
|
if (pinnedMemory_ == NULL) {
|
|
return false;
|
|
}
|
|
|
|
// Check if it's a view
|
|
if (flags_ & SubMemoryObject) {
|
|
const gpu::Memory* gpuMemory;
|
|
if (owner() != NULL) {
|
|
gpuMemory = dev().getGpuMemory(owner()->parent());
|
|
} else {
|
|
gpuMemory = parent();
|
|
}
|
|
|
|
if (gpuMemory->flags_ & PinnedMemoryAlloced) {
|
|
Resource::ViewParams params;
|
|
params.owner_ = owner();
|
|
params.offset_ = owner()->getOrigin();
|
|
params.size_ = owner()->getSize();
|
|
params.resource_ = gpuMemory->pinnedMemory_;
|
|
params.memory_ = NULL;
|
|
result = pinnedMemory_->create(Resource::View, ¶ms);
|
|
}
|
|
} else {
|
|
Resource::PinnedParams params;
|
|
// Fill resource creation parameters
|
|
params.owner_ = owner();
|
|
params.hostMemRef_ = owner()->getHostMemRef();
|
|
params.size_ = size;
|
|
|
|
// Create resource
|
|
result = pinnedMemory_->create(Resource::Pinned, ¶ms);
|
|
}
|
|
|
|
if (!result) {
|
|
delete pinnedMemory_;
|
|
pinnedMemory_ = NULL;
|
|
return false;
|
|
}
|
|
|
|
flags_ |= PinnedMemoryAlloced;
|
|
return true;
|
|
}
|
|
|
|
void* Memory::cpuMap(device::VirtualDevice& vDev, uint flags, uint startLayer, uint numLayers,
|
|
size_t* rowPitch, size_t* slicePitch) {
|
|
uint resFlags = 0;
|
|
if (flags == Memory::CpuReadOnly) {
|
|
resFlags = Resource::ReadOnly;
|
|
} else if (flags == Memory::CpuWriteOnly) {
|
|
resFlags = Resource::WriteOnly;
|
|
}
|
|
|
|
void* ptr = map(&static_cast<VirtualGPU&>(vDev), resFlags, startLayer, numLayers);
|
|
if (!cal()->buffer_) {
|
|
*rowPitch = cal()->pitch_ * elementSize();
|
|
*slicePitch = cal()->slice_ * elementSize();
|
|
}
|
|
return ptr;
|
|
}
|
|
|
|
void Memory::cpuUnmap(device::VirtualDevice& vDev) { unmap(&static_cast<VirtualGPU&>(vDev)); }
|
|
|
|
//! \note moveTo() must be called only from outside of
|
|
//! VirtualGPU submit command methods.
|
|
//! Otherwise a deadlock in lockVgpus() is possible.
|
|
//! Also the logic in this function is very specific to
|
|
//! the zero-copy functionality.
|
|
|
|
bool Memory::moveTo(Memory& dst) {
|
|
bool result = false;
|
|
|
|
// Make sure that all virtual devices don't process any commands
|
|
Device::ScopedLockVgpus lock(dev());
|
|
|
|
// Wait for idle on all virtual GPUs
|
|
//!@note It's enough to wait on the active queue only
|
|
for (uint idx = 0; idx < dev().vgpus().size(); ++idx) {
|
|
wait(*(dev().vgpus()[idx]));
|
|
}
|
|
|
|
static const bool Entire = true;
|
|
amd::Coord3D origin(0, 0, 0);
|
|
amd::Coord3D region(size());
|
|
|
|
// Transfer the data from old location to a new one
|
|
if (dev().xferMgr().copyBuffer(*this, dst, origin, origin, region, Entire)) {
|
|
// Move all properties to the new object
|
|
dst.mapMemory_ = mapMemory_;
|
|
mapMemory_ = NULL;
|
|
|
|
dst.flags_ |= flags_ & ~HostMemoryDirectAccess;
|
|
flags_ &= HostMemoryDirectAccess;
|
|
|
|
dst.indirectMapCount_ = indirectMapCount_;
|
|
indirectMapCount_ = 0;
|
|
|
|
dst.pinnedMemory_ = pinnedMemory_;
|
|
pinnedMemory_ = NULL;
|
|
|
|
// Replace the device memory object
|
|
//! @note: current object will be destroyed
|
|
owner()->replaceDeviceMemory(&dev(), &dst);
|
|
result = true;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
Memory* Memory::mapMemory() const {
|
|
Memory* map = NULL;
|
|
if (NULL != mapMemory_) {
|
|
map = reinterpret_cast<Memory*>(mapMemory_->getDeviceMemory(dev()));
|
|
}
|
|
return map;
|
|
}
|
|
|
|
void Memory::mgpuCacheWriteBack() {
|
|
// Lock memory object, so only one write back can occur
|
|
amd::ScopedLock lock(owner()->lockMemoryOps());
|
|
|
|
// Attempt to allocate a staging buffer if don't have any
|
|
if (owner()->getHostMem() == NULL) {
|
|
if (nullptr != owner()->getSvmPtr()) {
|
|
owner()->commitSvmMemory();
|
|
owner()->setHostMem(owner()->getSvmPtr());
|
|
} else {
|
|
static const bool forceAllocHostMem = true;
|
|
owner()->allocHostMemory(nullptr, forceAllocHostMem);
|
|
}
|
|
}
|
|
// Make synchronization
|
|
if (owner()->getHostMem() != NULL) {
|
|
//! \note Ignore pinning result
|
|
bool ok = pinSystemMemory(owner()->getHostMem(), owner()->getSize());
|
|
owner()->cacheWriteBack();
|
|
}
|
|
}
|
|
|
|
Memory* Buffer::createBufferView(amd::Memory& subBufferOwner) const {
|
|
gpu::Memory* subBuffer;
|
|
Resource::ViewParams params;
|
|
|
|
size_t offset = subBufferOwner.getOrigin();
|
|
size_t size = subBufferOwner.getSize();
|
|
|
|
// Create a memory object
|
|
subBuffer = new gpu::Buffer(dev(), subBufferOwner, size);
|
|
if (NULL == subBuffer) {
|
|
return NULL;
|
|
}
|
|
|
|
// Allocate a view for this buffer object
|
|
params.owner_ = &subBufferOwner;
|
|
params.offset_ = offset;
|
|
params.size_ = size;
|
|
params.resource_ = this;
|
|
params.memory_ = this;
|
|
|
|
if (!subBuffer->create(Resource::View, ¶ms)) {
|
|
delete subBuffer;
|
|
return NULL;
|
|
}
|
|
|
|
return subBuffer;
|
|
}
|
|
|
|
void* Image::allocMapTarget(const amd::Coord3D& origin, const amd::Coord3D& region, uint mapFlags,
|
|
size_t* rowPitch, size_t* slicePitch) {
|
|
// Sanity checks
|
|
assert(owner() != NULL);
|
|
bool useRemoteResource = true;
|
|
size_t slicePitchTmp = 0;
|
|
size_t height = cal()->height_;
|
|
size_t depth = cal()->depth_;
|
|
|
|
// Map/unmap must be serialized
|
|
amd::ScopedLock lock(owner()->lockMemoryOps());
|
|
|
|
address mapAddress = NULL;
|
|
size_t offset = origin[0];
|
|
|
|
incIndMapCount();
|
|
|
|
// If host memory exists, use it
|
|
if ((owner()->getHostMem() != NULL) && isDirectMap()) {
|
|
useRemoteResource = false;
|
|
mapAddress = reinterpret_cast<address>(owner()->getHostMem());
|
|
amd::Image* amdImage = owner()->asImage();
|
|
|
|
// Calculate the offset in bytes
|
|
offset *= elementSize();
|
|
|
|
// Update the row and slice pitches value
|
|
*rowPitch =
|
|
(amdImage->getRowPitch() == 0) ? (cal()->width_ * elementSize()) : amdImage->getRowPitch();
|
|
slicePitchTmp =
|
|
(amdImage->getSlicePitch() == 0) ? (height * (*rowPitch)) : amdImage->getSlicePitch();
|
|
|
|
// Adjust the offset in Y and Z dimensions
|
|
offset += origin[1] * (*rowPitch);
|
|
offset += origin[2] * slicePitchTmp;
|
|
}
|
|
// If resource is a persistent allocation, we can use it directly
|
|
//! @note Even if resource is a persistent allocation,
|
|
//! runtime can't use it directly,
|
|
//! because CAL volume map doesn't work properly.
|
|
//! @todo arrays can be added for persistent lock with some CAL changes
|
|
else if (isPersistentDirectMap()) {
|
|
if (NULL == map(NULL)) {
|
|
useRemoteResource = true;
|
|
LogError("Could not map target persistent resource, try remote resource");
|
|
} else {
|
|
useRemoteResource = false;
|
|
mapAddress = data();
|
|
|
|
// Calculate the offset in bytes
|
|
offset *= elementSize();
|
|
|
|
// Update the row pitch value
|
|
*rowPitch = cal()->pitch_ * elementSize();
|
|
|
|
// Adjust the offset in Y dimension
|
|
offset += origin[1] * (*rowPitch);
|
|
}
|
|
}
|
|
|
|
// Otherwise we can use a remote resource:
|
|
if (useRemoteResource) {
|
|
// Calculate X offset in bytes
|
|
offset *= elementSize();
|
|
|
|
// Allocate a map resource if there isn't any yet
|
|
if (indirectMapCount_ == 1) {
|
|
const static bool SysMem = true;
|
|
bool failed = false;
|
|
amd::Memory* memory;
|
|
|
|
// Search for a possible indirect resource
|
|
memory = dev().findMapTarget(owner()->getSize());
|
|
|
|
if (memory == NULL) {
|
|
// Allocate a new buffer to use as the map target
|
|
//! @note Allocate a 1D buffer, since CAL issues with 3D
|
|
//! Also HW doesn't support untiled images
|
|
memory = new (dev().context())
|
|
amd::Buffer(dev().context(), 0, cal()->width_ * height * depth * elementSize());
|
|
memory->setVirtualDevice(owner()->getVirtualDevice());
|
|
|
|
Memory* gpuMemory;
|
|
do {
|
|
if ((memory == NULL) || !memory->create(NULL, SysMem)) {
|
|
failed = true;
|
|
break;
|
|
}
|
|
|
|
gpuMemory = reinterpret_cast<Memory*>(memory->getDeviceMemory(dev()));
|
|
|
|
// Create, Map and get the base pointer for the resource
|
|
if ((gpuMemory == NULL) || (NULL == gpuMemory->map(NULL))) {
|
|
failed = true;
|
|
break;
|
|
}
|
|
} while (false);
|
|
}
|
|
|
|
if (failed) {
|
|
if (memory != NULL) {
|
|
memory->release();
|
|
}
|
|
decIndMapCount();
|
|
LogError("Could not map target resource");
|
|
return NULL;
|
|
}
|
|
|
|
// Map/unamp is serialized for the same memory object,
|
|
// so it's safe to assign the new pointer
|
|
assert((mapMemory_ == NULL) && "Mapped buffer can't be valid");
|
|
mapMemory_ = memory;
|
|
} else {
|
|
// Did the map resource allocation fail?
|
|
if (mapMemory_ == NULL) {
|
|
LogError("Could not map target resource");
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
mapAddress = mapMemory()->data();
|
|
|
|
// Update the row and slice pitches value
|
|
*rowPitch = region[0] * elementSize();
|
|
if (cal()->dimension_ == GSL_MOA_TEXTURE_1D_ARRAY) {
|
|
slicePitchTmp = *rowPitch;
|
|
} else {
|
|
slicePitchTmp = *rowPitch * region[1];
|
|
}
|
|
// Use start of the indirect buffer
|
|
offset = 0;
|
|
}
|
|
|
|
if (slicePitch != NULL) {
|
|
*slicePitch = slicePitchTmp;
|
|
}
|
|
|
|
return mapAddress + offset;
|
|
}
|
|
|
|
} // namespace gpu
|