P4 to Git Change 1381244 by gandryey@gera-w8 on 2017/03/03 17:58:38

SWDEV-107546 - [ROCm CQE][OCL][LC/HSAIL][mGPU][G] WF conf test "Buffers" fails in mGPU configs
	- Add MGPU coherency layer support

Affected files ...

... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocblit.cpp#14 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocblit.hpp#5 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.cpp#42 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.hpp#17 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocmemory.cpp#12 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocmemory.hpp#6 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.cpp#32 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/memory.cpp#125 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/memory.hpp#99 edit


[ROCm/clr commit: 8a1b72640a]
Этот коммит содержится в:
foreman
2017-03-03 18:02:55 -05:00
родитель 1cb396361b
Коммит b03a26d066
9 изменённых файлов: 542 добавлений и 140 удалений
+18 -17
Просмотреть файл
@@ -953,7 +953,7 @@ KernelBlitManager::copyBufferToImage(
size_t imgSlicePitch = imgRowPitch * size[1];
if (setup_.disableCopyBufferToImage_) {
result = DmaBlitManager::copyBufferToImage(
result = HostBlitManager::copyBufferToImage(
srcMemory, dstMemory, srcOrigin, dstOrigin, size,
entire, rowPitch, slicePitch);
synchronize();
@@ -1061,7 +1061,7 @@ KernelBlitManager::copyBufferToImageKernel(
// todo ROC runtime has a problem with a view for this format
(gpuMem(dstMemory).owner()->asImage()->
getImageFormat().image_channel_data_type != CL_UNORM_INT_101010)) {
dstView = createView(gpuMem(dstMemory), newFormat);
dstView = createView(gpuMem(dstMemory), newFormat, CL_MEM_WRITE_ONLY);
if (dstView != NULL) {
rejected = false;
releaseView = true;
@@ -1189,7 +1189,7 @@ KernelBlitManager::copyImageToBuffer(
size_t imgSlicePitch = imgRowPitch * size[1];
if (setup_.disableCopyImageToBuffer_) {
result = HostBlitManager::copyImageToBuffer(
result = DmaBlitManager::copyImageToBuffer(
srcMemory, dstMemory, srcOrigin, dstOrigin,
size, entire, rowPitch, slicePitch);
synchronize();
@@ -1265,7 +1265,7 @@ KernelBlitManager::copyImageToBufferKernel(
// todo ROC runtime has a problem with a view for this format
(gpuMem(srcMemory).owner()->asImage()->
getImageFormat().image_channel_data_type != CL_UNORM_INT_101010)) {
srcView = createView(gpuMem(srcMemory), newFormat);
srcView = createView(gpuMem(srcMemory), newFormat, CL_MEM_READ_ONLY);
if (srcView != NULL) {
rejected = false;
releaseView = true;
@@ -1417,9 +1417,9 @@ KernelBlitManager::copyImage(
// Attempt to create a view if the format was rejected
if (rejected) {
srcView = createView(gpuMem(srcMemory), newFormat);
srcView = createView(gpuMem(srcMemory), newFormat, CL_MEM_READ_ONLY);
if (srcView != NULL) {
dstView = createView(gpuMem(dstMemory), newFormat);
dstView = createView(gpuMem(dstMemory), newFormat, CL_MEM_WRITE_ONLY);
if (dstView != NULL) {
rejected = false;
releaseView = true;
@@ -1433,7 +1433,7 @@ KernelBlitManager::copyImage(
// Fall into the host path for the entire 2D copy or
// if the image format was rejected
if (rejected) {
result = HostBlitManager::copyImage(srcMemory, dstMemory,
result = DmaBlitManager::copyImage(srcMemory, dstMemory,
srcOrigin, dstOrigin, size, entire);
synchronize();
return result;
@@ -1584,7 +1584,7 @@ KernelBlitManager::readImage(
if (amdMemory == NULL) {
// Force SW copy
result = HostBlitManager::readImage(srcMemory, dstHost,
result = DmaBlitManager::readImage(srcMemory, dstHost,
origin, size, rowPitch, slicePitch, entire);
synchronize();
return result;
@@ -1638,7 +1638,7 @@ KernelBlitManager::writeImage(
if (amdMemory == NULL) {
// Force SW copy
result = HostBlitManager::writeImage(
result = DmaBlitManager::writeImage(
srcHost, dstMemory, origin, size, rowPitch, slicePitch, entire);
synchronize();
return result;
@@ -1679,7 +1679,7 @@ KernelBlitManager::copyBufferRect(
// Fall into the ROC path for rejected transfers
if (setup_.disableCopyBufferRect_ ||
gpuMem(srcMemory).isHostMemDirectAccess() || gpuMem(dstMemory).isHostMemDirectAccess()) {
result = DmaBlitManager::copyBufferRect(srcMemory, dstMemory,
result = HostBlitManager::copyBufferRect(srcMemory, dstMemory,
srcRectIn, dstRectIn, sizeIn, entire);
if (result) {
@@ -1819,7 +1819,7 @@ KernelBlitManager::readBuffer(
if (amdMemory == NULL) {
// Force SW copy
result = HostBlitManager::readBuffer(
result = DmaBlitManager::readBuffer(
srcMemory, dstHost, origin, size, entire);
synchronize();
return result;
@@ -1875,7 +1875,7 @@ KernelBlitManager::readBufferRect(
if (amdMemory == NULL) {
// Force SW copy
result = HostBlitManager::readBufferRect(
result = DmaBlitManager::readBufferRect(
srcMemory, dstHost, bufRect, hostRect, size, entire);
synchronize();
return result;
@@ -1933,7 +1933,7 @@ KernelBlitManager::writeBuffer(
if (amdMemory == NULL) {
// Force SW copy
result = HostBlitManager::writeBuffer(
result = DmaBlitManager::writeBuffer(
srcHost, dstMemory, origin, size, entire);
synchronize();
return result;
@@ -2264,7 +2264,7 @@ KernelBlitManager::fillImage(
}
// If the image format was rejected, then attempt to create a view
if (rejected) {
memView = createView(gpuMem(memory), newFormat);
memView = createView(gpuMem(memory), newFormat, CL_MEM_WRITE_ONLY);
if (memView != NULL) {
rejected = false;
releaseView = true;
@@ -2419,11 +2419,12 @@ DmaBlitManager::pinHostMemory(
Memory*
KernelBlitManager::createView(
const Memory& parent,
const cl_image_format format) const
cl_image_format format,
cl_mem_flags flags) const
{
assert((parent.owner()->asBuffer() == nullptr) && "View supports images only");
amd::Image *image =
parent.owner()->asImage()->createView(parent.owner()->getContext(), format, &gpu());
amd::Image *image = parent.owner()->asImage()->createView(
parent.owner()->getContext(), format, &gpu(), 0, flags);
if (image == NULL) {
LogError("[OCL] Fail to allocate view of image object");
+3 -2
Просмотреть файл
@@ -439,8 +439,9 @@ private:
//! Creates a view memory object
Memory* createView(
const Memory& parent, //!< Parent memory object
const cl_image_format format //!< The new format for a view
const Memory& parent, //!< Parent memory object
cl_image_format format, //!< The new format for a view
cl_mem_flags flags //!< Memory flags
) const;
//! Disable copy constructor
+13 -7
Просмотреть файл
@@ -1382,9 +1382,12 @@ Device::createMemory(amd::Memory &owner) const
return NULL;
}
// Transfer data only if OCL context has one device.
// Cache coherency layer will update data for multiple devices
if (!memory->isHostMemDirectAccess() && owner.asImage() &&
owner.parent() == NULL &&
(owner.getMemFlags() & (CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR))) {
(owner.parent() == nullptr) &&
(owner.getMemFlags() & CL_MEM_COPY_HOST_PTR) &&
(owner.getContext().devices().size() == 1)) {
// To avoid recurssive call to Device::createMemory, we perform
// data transfer to the view of the image.
amd::Image* imageView = owner.asImage()->createView(
@@ -1417,15 +1420,18 @@ Device::createMemory(amd::Memory &owner) const
amd::Coord3D(0, 0, 0), imageView->getRegion(),
0,
0, true);
// Release host memory for single device, since runtime copied data
if ((owner.getMemFlags() & CL_MEM_COPY_HOST_PTR) &&
(owner.getContext().devices().size() == 1)) {
owner.setHostMem(nullptr);
}
// Release host memory, since runtime copied data
owner.setHostMem(nullptr);
imageView->release();
}
// Prepin sysmem buffer for possible data synchronization between CPU and GPU
if (!memory->isHostMemDirectAccess() && (owner.getHostMem() != nullptr)) {
memory->pinSystemMemory(owner.getHostMem(), owner.getSize());
}
if (!result) {
delete memory;
return NULL;
+4 -2
Просмотреть файл
@@ -411,6 +411,8 @@ public:
amd::Memory* mem //!< Pointer to AMD memory object
) const;
amd::Context& context() const { return *context_; }
private:
static hsa_ven_amd_loader_1_00_pfn_t amd_loader_ext_table;
@@ -431,8 +433,8 @@ private:
size_t gpuvm_segment_max_alloc_;
size_t alloc_granularity_;
static const bool offlineDevice_;
amd::Context *context_; //!< A dummy context for internal data transfer
VirtualGPU *xferQueue_; //!< Transfer queue, created on demand
amd::Context* context_; //!< A dummy context for internal data transfer
VirtualGPU* xferQueue_; //!< Transfer queue, created on demand
VirtualGPU* xferQueue() const;
+385 -46
Просмотреть файл
@@ -29,6 +29,7 @@ Memory::Memory(const roc::Device &dev, amd::Memory &owner)
, dev_(dev)
, deviceMemory_(NULL)
, kind_(MEMORY_KIND_NORMAL)
, pinnedMemory_(nullptr)
{
}
@@ -37,12 +38,18 @@ Memory::Memory(const roc::Device &dev, size_t size)
, dev_(dev)
, deviceMemory_(NULL)
, kind_(MEMORY_KIND_NORMAL)
, pinnedMemory_(nullptr)
{
}
Memory::~Memory()
{
dev_.removeVACache(this);
// Destory pinned memory
if (flags_ & PinnedMemoryAlloced) {
pinnedMemory_->release();
}
dev().removeVACache(this);
if (nullptr != mapMemory_) {
mapMemory_->release();
}
@@ -55,13 +62,11 @@ Memory::allocateMapMemory(size_t allocationSize)
void *mapData = NULL;
amd::Memory* mapMemory = dev_.findMapTarget(owner()->getSize());
amd::Memory* mapMemory = dev().findMapTarget(owner()->getSize());
if (mapMemory == nullptr) {
// Create buffer object to contain the map target.
mapMemory =
new(owner()->getContext()) amd::Buffer(
owner()->getContext(), CL_MEM_ALLOC_HOST_PTR, owner()->getSize());
mapMemory = new (dev().context()) amd::Buffer(
dev().context(), CL_MEM_ALLOC_HOST_PTR, owner()->getSize());
if ((mapMemory == NULL) || (!mapMemory->create())) {
LogError("[OCL] Fail to allocate map target object");
@@ -96,7 +101,6 @@ Memory::allocMapTarget(
amd::ScopedLock lock(owner()->lockMemoryOps());
incIndMapCount();
// If the device backing storage is direct accessible, use it.
if (isHostMemDirectAccess()) {
if (owner()->getHostMem() != nullptr) {
@@ -126,7 +130,6 @@ Memory::allocMapTarget(
return NULL;
}
}
return reinterpret_cast<address>(mapMemory_->getHostMem()) + origin[0];
}
@@ -144,7 +147,7 @@ Memory::decIndMapCount()
// Decrement the counter and release indirect map if it's the last op
if (--indirectMapCount_ == 0 &&
mapMemory_ != NULL) {
if (!dev_.addMapTarget(mapMemory_)) {
if (!dev().addMapTarget(mapMemory_)) {
// Release the buffer object containing the map data.
mapMemory_->release();
}
@@ -219,11 +222,11 @@ bool Memory::createInteropBuffer(GLenum targetType, int miplevel, size_t* metada
in.out_driver_data_size=0;
in.out_driver_data=NULL;
if(!dev_.mesa().Export(in, out))
if(!dev().mesa().Export(in, out))
return false;
size_t size;
hsa_agent_t agent=dev_.getBackendDevice();
hsa_agent_t agent=dev().getBackendDevice();
hsa_status_t status=hsa_amd_interop_map_buffer(1, &agent, out.dmabuf_fd, 0, &size, &deviceMemory_, metadata_size, (const void**)metadata);
close(out.dmabuf_fd);
@@ -244,6 +247,344 @@ void Memory::destroyInteropBuffer()
deviceMemory_=NULL;
}
bool
Memory::pinSystemMemory(void* hostPtr, size_t size)
{
size_t pinAllocSize;
const static bool SysMem = true;
amd::Memory* amdMemory = nullptr;
amd::Memory* amdParent = owner()->parent();
// If memory has a direct access already, then skip the host memory pinning
if (isHostMemDirectAccess()) {
return true;
}
// Memory was pinned already
if (flags_ & PinnedMemoryAlloced) {
return true;
}
// Check if runtime allocates a parent object
if (amdParent != nullptr) {
Memory* parent = dev().getRocMemory(amdParent);
amd::Memory* amdPinned = parent->pinnedMemory_;
if (amdPinned != nullptr) {
// Create view on the parent's pinned memory
amdMemory = new (amdPinned->getContext()) amd::Buffer(
*amdPinned, 0, owner()->getOrigin(), owner()->getSize());
if ((amdMemory != nullptr) && !amdMemory->create()) {
amdMemory->release();
amdMemory = nullptr;
}
}
}
if (amdMemory == nullptr) {
amdMemory = new (dev().context())
amd::Buffer(dev().context(), CL_MEM_USE_HOST_PTR, size);
if ((amdMemory != nullptr) && !amdMemory->create(hostPtr, SysMem)) {
amdMemory->release();
return false;
}
}
// Get device memory for this virtual device
// @note: This will force real memory pinning
Memory* srcMemory = dev().getRocMemory(amdMemory);
if (srcMemory == nullptr) {
// Release memory
amdMemory->release();
return false;
}
else {
pinnedMemory_ = amdMemory;
flags_ |= PinnedMemoryAlloced;
}
return true;
}
void
Memory::syncCacheFromHost(VirtualGPU& gpu, device::Memory::SyncFlags syncFlags)
{
// If the last writer was another GPU, then make a writeback
if (!isHostMemDirectAccess() &&
(owner()->getLastWriter() != nullptr) &&
(&dev() != owner()->getLastWriter())) {
mgpuCacheWriteBack();
}
// If host memory doesn't have direct access, then we have to synchronize
if (!isHostMemDirectAccess() && (nullptr != owner()->getHostMem())) {
bool hasUpdates = true;
amd::Memory* amdParent = owner()->parent();
// Make sure the parent of subbuffer is up to date
if (!syncFlags.skipParent_ && (amdParent != nullptr)) {
Memory* gpuMemory = dev().getRocMemory(amdParent);
//! \note: Skipping the sync for a view doesn't reflect the parent settings,
//! since a view is a small portion of parent
device::Memory::SyncFlags syncFlagsTmp;
// Sync parent from a view, so views have to be skipped
syncFlagsTmp.skipViews_ = true;
// Make sure the parent sync is an unique operation.
// If the app uses multiple subbuffers from multiple queues,
// then the parent sync can be called from multiple threads
amd::ScopedLock lock(owner()->parent()->lockMemoryOps());
gpuMemory->syncCacheFromHost(gpu, syncFlagsTmp);
//! \note Don't do early exit here, since we still have to sync
//! this view, if the parent sync operation was a NOP.
//! If parent was synchronized, then this view sync will be a NOP
}
// Is this a NOP?
if ((version_ == owner()->getVersion()) ||
(&dev() == owner()->getLastWriter())) {
hasUpdates = false;
}
// Update all available views, since we sync the parent
if ((owner()->subBuffers().size() != 0) &&
(hasUpdates || !syncFlags.skipViews_)) {
device::Memory::SyncFlags syncFlagsTmp;
// Sync views from parent, so parent has to be skipped
syncFlagsTmp.skipParent_ = true;
if (hasUpdates) {
// Parent will be synced so update all views with a skip
syncFlagsTmp.skipEntire_ = true;
}
else {
// Passthrough the skip entire flag to the views, since
// any view is a submemory of the parent
syncFlagsTmp.skipEntire_ = syncFlags.skipEntire_;
}
amd::ScopedLock lock(owner()->lockMemoryOps());
for (auto& sub : owner()->subBuffers()) {
//! \note Don't allow subbuffer's allocation in the worker thread.
//! It may cause a system lock, because possible resource
//! destruction, heap reallocation or subbuffer allocation
static const bool AllocSubBuffer = false;
device::Memory* devSub =
sub->getDeviceMemory(dev(), AllocSubBuffer);
if (nullptr != devSub) {
Memory* gpuSub = reinterpret_cast<Memory*>(devSub);
gpuSub->syncCacheFromHost(gpu, syncFlagsTmp);
}
}
}
// Make sure we didn't have a NOP,
// because this GPU device was the last writer
if (&dev() != owner()->getLastWriter()) {
// Update the latest version
version_ = owner()->getVersion();
}
// Exit if sync is a NOP or sync can be skipped
if (!hasUpdates || syncFlags.skipEntire_) {
return;
}
bool result = false;
static const bool Entire = true;
amd::Coord3D origin(0, 0, 0);
// If host memory was pinned then make a transfer
if (flags_ & PinnedMemoryAlloced) {
Memory& pinned = *dev().getRocMemory(pinnedMemory_);
if (owner()->getType() == CL_MEM_OBJECT_BUFFER) {
amd::Coord3D region(owner()->getSize());
result = gpu.blitMgr().copyBuffer(pinned,
*this, origin, origin, region, Entire);
}
else {
amd::Image& image = static_cast<amd::Image&>(*owner());
result = gpu.blitMgr().copyBufferToImage(pinned,
*this, origin, origin, image.getRegion(), Entire,
image.getRowPitch(), image.getSlicePitch());
}
}
if (!result) {
if (owner()->getType() == CL_MEM_OBJECT_BUFFER) {
amd::Coord3D region(owner()->getSize());
result = gpu.blitMgr().writeBuffer(owner()->getHostMem(),
*this, origin, region, Entire);
}
else {
amd::Image& image = static_cast<amd::Image&>(*owner());
result = gpu.blitMgr().writeImage(owner()->getHostMem(),
*this, origin, image.getRegion(),
image.getRowPitch(), image.getSlicePitch(), Entire);
}
}
//!@todo A wait isn't really necessary. However processMemObjects()
// may lose the track of dependencies with a compute transfer(if sdma failed).
wait(gpu);
// Should never fail
assert(result && "Memory synchronization failed!");
}
}
void
Memory::syncHostFromCache(device::Memory::SyncFlags syncFlags)
{
// Sanity checks
assert(owner() != nullptr);
// If host memory doesn't have direct access, then we have to synchronize
if (!isHostMemDirectAccess()) {
bool hasUpdates = true;
amd::Memory* amdParent = owner()->parent();
// Make sure the parent of subbuffer is up to date
if (!syncFlags.skipParent_ && (amdParent != nullptr)) {
device::Memory* m = dev().getRocMemory(amdParent);
//! \note: Skipping the sync for a view doesn't reflect the parent settings,
//! since a view is a small portion of parent
device::Memory::SyncFlags syncFlagsTmp;
// Sync parent from a view, so views have to be skipped
syncFlagsTmp.skipViews_ = true;
// Make sure the parent sync is an unique operation.
// If the app uses multiple subbuffers from multiple queues,
// then the parent sync can be called from multiple threads
amd::ScopedLock lock(owner()->parent()->lockMemoryOps());
m->syncHostFromCache(syncFlagsTmp);
//! \note Don't do early exit here, since we still have to sync
//! this view, if the parent sync operation was a NOP.
//! If parent was synchronized, then this view sync will be a NOP
}
// Is this a NOP?
if ((nullptr == owner()->getLastWriter()) ||
(version_ == owner()->getVersion())) {
hasUpdates = false;
}
// Update all available views, since we sync the parent
if ((owner()->subBuffers().size() != 0) &&
(hasUpdates || !syncFlags.skipViews_)) {
device::Memory::SyncFlags syncFlagsTmp;
// Sync views from parent, so parent has to be skipped
syncFlagsTmp.skipParent_ = true;
if (hasUpdates) {
// Parent will be synced so update all views with a skip
syncFlagsTmp.skipEntire_ = true;
}
else {
// Passthrough the skip entire flag to the views, since
// any view is a submemory of the parent
syncFlagsTmp.skipEntire_ = syncFlags.skipEntire_;
}
amd::ScopedLock lock(owner()->lockMemoryOps());
for (auto& sub : owner()->subBuffers()) {
//! \note Don't allow subbuffer's allocation in the worker thread.
//! It may cause a system lock, because possible resource
//! destruction, heap reallocation or subbuffer allocation
static const bool AllocSubBuffer = false;
device::Memory* devSub =
sub->getDeviceMemory(dev(), AllocSubBuffer);
if (nullptr != devSub) {
Memory* gpuSub = reinterpret_cast<Memory*>(devSub);
gpuSub->syncHostFromCache(syncFlagsTmp);
}
}
}
// Make sure we didn't have a NOP,
// because CPU was the last writer
if (nullptr != owner()->getLastWriter()) {
// Mark parent as up to date, set our version accordingly
version_ = owner()->getVersion();
}
// Exit if sync is a NOP or sync can be skipped
if (!hasUpdates || syncFlags.skipEntire_) {
return;
}
bool result = false;
static const bool Entire = true;
amd::Coord3D origin(0, 0, 0);
// If backing store was pinned then make a transfer
if (flags_ & PinnedMemoryAlloced) {
Memory& pinned = *dev().getRocMemory(pinnedMemory_);
if (owner()->getType() == CL_MEM_OBJECT_BUFFER) {
amd::Coord3D region(owner()->getSize());
result = dev().xferMgr().copyBuffer(*this,
pinned, origin, origin, region, Entire);
}
else {
amd::Image& image = static_cast<amd::Image&>(*owner());
result = dev().xferMgr().copyImageToBuffer(*this,
pinned, origin, origin, image.getRegion(), Entire,
image.getRowPitch(), image.getSlicePitch());
}
}
// Just do a basic host read
if (!result) {
if (owner()->getType() == CL_MEM_OBJECT_BUFFER) {
amd::Coord3D region(owner()->getSize());
result = dev().xferMgr().readBuffer(*this,
owner()->getHostMem(), origin, region, Entire);
}
else {
amd::Image& image = static_cast<amd::Image&>(*owner());
result = dev().xferMgr().readImage(*this,
owner()->getHostMem(), origin, image.getRegion(),
image.getRowPitch(), image.getSlicePitch(), Entire);
}
}
// Should never fail
assert(result && "Memory synchronization failed!");
}
}
void
Memory::mgpuCacheWriteBack()
{
// Lock memory object, so only one write back can occur
amd::ScopedLock lock(owner()->lockMemoryOps());
// Attempt to allocate a staging buffer if don't have any
if (owner()->getHostMem() == nullptr) {
if (nullptr != owner()->getSvmPtr()) {
owner()->commitSvmMemory();
owner()->setHostMem(owner()->getSvmPtr());
}
else {
static const bool forceAllocHostMem = true;
owner()->allocHostMemory(nullptr, forceAllocHostMem);
}
}
// Make synchronization
if (owner()->getHostMem() != nullptr) {
//! \note Ignore pinning result
bool ok = pinSystemMemory(owner()->getHostMem(), owner()->getSize());
owner()->cacheWriteBack();
}
}
/////////////////////////////////roc::Buffer//////////////////////////////
Buffer::Buffer(const roc::Device &dev, amd::Memory &owner)
@@ -257,7 +598,7 @@ Buffer::Buffer(const roc::Device &dev, size_t size)
Buffer::~Buffer()
{
if (owner() == nullptr) {
dev_.hostFree(deviceMemory_, size());
dev().hostFree(deviceMemory_, size());
}
else {
destroy();
@@ -285,18 +626,18 @@ Buffer::destroy()
// deallocated later on => avoid double deallocation
if (isHostMemDirectAccess()) {
if (memFlags & (CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR)) {
if (dev_.agent_profile() != HSA_PROFILE_FULL) {
if (dev().agent_profile() != HSA_PROFILE_FULL) {
hsa_amd_memory_unlock(owner()->getHostMem());
}
}
}
else {
dev_.memFree(deviceMemory_, size());
dev().memFree(deviceMemory_, size());
}
}
if (memFlags & CL_MEM_USE_HOST_PTR) {
if (dev_.agent_profile() == HSA_PROFILE_FULL) {
if (dev().agent_profile() == HSA_PROFILE_FULL) {
hsa_memory_deregister(owner()->getHostMem(), size());
}
}
@@ -306,7 +647,7 @@ bool
Buffer::create()
{
if (owner() == nullptr) {
deviceMemory_ = dev_.hostAlloc(size(), 1, false);
deviceMemory_ = dev().hostAlloc(size(), 1, false);
if (deviceMemory_ != nullptr) {
flags_ |= HostMemoryDirectAccess;
return true;
@@ -332,7 +673,6 @@ Buffer::create()
const size_t offset = owner()->getOrigin();
deviceMemory_ = parentBuffer->getDeviceMemory() + offset;
flags_ |= SubMemoryObject;
flags_ |= parentBuffer->isHostMemDirectAccess() ?
HostMemoryDirectAccess : 0;
@@ -352,32 +692,35 @@ Buffer::create()
// Allocate backing storage in device local memory unless UHP or AHP are set
const cl_mem_flags memFlags = owner()->getMemFlags();
if (!(memFlags & (CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR))) {
deviceMemory_ = dev_.deviceLocalAlloc(size());
deviceMemory_ = dev().deviceLocalAlloc(size());
if (deviceMemory_ == NULL) {
// TODO: device memory is not enabled yet.
// Fallback to system memory if exist.
flags_ |= HostMemoryDirectAccess;
if (dev_.agent_profile() == HSA_PROFILE_FULL &&
if (dev().agent_profile() == HSA_PROFILE_FULL &&
owner()->getHostMem() != NULL) {
deviceMemory_ = owner()->getHostMem();
assert(
amd::isMultipleOf(
deviceMemory_,
static_cast<size_t>(dev_.info().memBaseAddrAlign_)));
static_cast<size_t>(dev().info().memBaseAddrAlign_)));
return true;
}
deviceMemory_ = dev_.hostAlloc(size(), 1, false);
deviceMemory_ = dev().hostAlloc(size(), 1, false);
owner()->setHostMem(deviceMemory_);
}
assert(
amd::isMultipleOf(
deviceMemory_,
static_cast<size_t>(dev_.info().memBaseAddrAlign_)));
static_cast<size_t>(dev().info().memBaseAddrAlign_)));
if (deviceMemory_ && (memFlags & CL_MEM_COPY_HOST_PTR)) {
// Transfer data only if OCL context has one device.
// Cache coherency layer will update data for multiple devices
if (deviceMemory_ && (memFlags & CL_MEM_COPY_HOST_PTR) &&
(owner()->getContext().devices().size() == 1) ) {
// To avoid recurssive call to Device::createMemory, we perform
// data transfer to the view of the buffer.
amd::Buffer *bufferView = new (owner()->getContext()) amd::Buffer(
@@ -390,16 +733,12 @@ Buffer::create()
bufferView->replaceDeviceMemory(&dev_, devBufferView);
bool ret = dev_.xferMgr().writeBuffer(
bool ret = dev().xferMgr().writeBuffer(
owner()->getHostMem(), *devBufferView, amd::Coord3D(0),
amd::Coord3D(size()), true);
// Release host memory for single device,
// since runtime copied data
if (owner()->getContext().devices().size() == 1) {
owner()->setHostMem(nullptr);
}
// Release host memory, since runtime copied data
owner()->setHostMem(nullptr);
bufferView->release();
return ret;
}
@@ -410,7 +749,7 @@ Buffer::create()
flags_ |= HostMemoryDirectAccess;
if (dev_.agent_profile() == HSA_PROFILE_FULL) {
if (dev().agent_profile() == HSA_PROFILE_FULL) {
deviceMemory_ = owner()->getHostMem();
if (memFlags & CL_MEM_USE_HOST_PTR) {
@@ -422,9 +761,8 @@ Buffer::create()
if (owner()->getSvmPtr() != owner()->getHostMem()) {
if (memFlags & (CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR)) {
hsa_agent_t agent = dev_.getBackendDevice();
hsa_status_t status = hsa_amd_memory_lock(
owner()->getHostMem(), owner()->getSize(), &agent, 1, &deviceMemory_);
owner()->getHostMem(), owner()->getSize(), nullptr, 0, &deviceMemory_);
if (status != HSA_STATUS_SUCCESS) {
deviceMemory_ = nullptr;
}
@@ -622,7 +960,7 @@ Image::createInteropImage()
originalDeviceMemory_=deviceMemory_;
hsa_status_t err=hsa_amd_image_create(dev_.getBackendDevice(), &imageDescriptor_, amdImageDesc_, originalDeviceMemory_, permission_, &hsaImageObject_);
hsa_status_t err=hsa_amd_image_create(dev().getBackendDevice(), &imageDescriptor_, amdImageDesc_, originalDeviceMemory_, permission_, &hsaImageObject_);
if(err!=HSA_STATUS_SUCCESS)
return false;
@@ -654,7 +992,7 @@ Image::create()
// Get memory size requirement for device specific image.
hsa_status_t status = hsa_ext_image_data_get_info(
dev_.getBackendDevice(), &imageDescriptor_,
dev().getBackendDevice(), &imageDescriptor_,
permission_, &deviceImageInfo_);
if (status != HSA_STATUS_SUCCESS) {
@@ -666,16 +1004,16 @@ Image::create()
// support alignment larger than HSA memory region allocation granularity.
// In this case, the user manages the alignment.
const size_t alloc_size =
(deviceImageInfo_.alignment <= dev_.alloc_granularity())
(deviceImageInfo_.alignment <= dev().alloc_granularity())
? deviceImageInfo_.size
: deviceImageInfo_.size + deviceImageInfo_.alignment;
if (!(owner()->getMemFlags() & CL_MEM_ALLOC_HOST_PTR)) {
originalDeviceMemory_ = dev_.deviceLocalAlloc(alloc_size);
originalDeviceMemory_ = dev().deviceLocalAlloc(alloc_size);
}
if (originalDeviceMemory_ == NULL) {
originalDeviceMemory_ = dev_.hostAlloc(alloc_size, 1, false);
originalDeviceMemory_ = dev().hostAlloc(alloc_size, 1, false);
}
deviceMemory_ = reinterpret_cast<void *>(
@@ -686,7 +1024,7 @@ Image::create()
deviceMemory_, static_cast<size_t>(deviceImageInfo_.alignment)));
status = hsa_ext_image_create(
dev_.getBackendDevice(), &imageDescriptor_, deviceMemory_,
dev().getBackendDevice(), &imageDescriptor_, deviceMemory_,
permission_, &hsaImageObject_);
if (status != HSA_STATUS_SUCCESS) {
@@ -712,10 +1050,11 @@ Image::createView(const Memory &parent)
}
kind_ = parent.getKind();
version_ = parent.version();
hsa_status_t status;
if (kind_ == MEMORY_KIND_INTEROP) {
status = hsa_amd_image_create(dev_.getBackendDevice(), &imageDescriptor_,
status = hsa_amd_image_create(dev().getBackendDevice(), &imageDescriptor_,
amdImageDesc_, deviceMemory_, permission_, &hsaImageObject_);
}
else if (oldestParent->asBuffer()) {
@@ -732,15 +1071,15 @@ Image::createView(const Memory &parent)
// Make sure the row pitch is aligned to pixels
rowPitch = elementSize *
amd::alignUp(rowPitch, dev_.info().imagePitchAlignment_);
amd::alignUp(rowPitch, dev().info().imagePitchAlignment_);
status = hsa_ext_image_create_with_layout(dev_.getBackendDevice(),
status = hsa_ext_image_create_with_layout(dev().getBackendDevice(),
&imageDescriptor_, deviceMemory_, permission_,
HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR, rowPitch, 0,
&hsaImageObject_);
}
else {
status= hsa_ext_image_create(dev_.getBackendDevice(), &imageDescriptor_,
status= hsa_ext_image_create(dev().getBackendDevice(), &imageDescriptor_,
deviceMemory_, permission_, &hsaImageObject_);
}
@@ -830,7 +1169,7 @@ Image::destroy()
{
if (hsaImageObject_.handle != 0) {
hsa_status_t status =
hsa_ext_image_destroy(dev_.getBackendDevice(), hsaImageObject_);
hsa_ext_image_destroy(dev().getBackendDevice(), hsaImageObject_);
assert(status == HSA_STATUS_SUCCESS);
}
@@ -847,7 +1186,7 @@ Image::destroy()
}
if (originalDeviceMemory_ != NULL) {
dev_.memFree(originalDeviceMemory_, deviceImageInfo_.size);
dev().memFree(originalDeviceMemory_, deviceImageInfo_.size);
}
}
}
+18 -9
Просмотреть файл
@@ -39,17 +39,21 @@ class Memory : public device::Memory {
// Pins system memory associated with this memory object.
virtual bool pinSystemMemory(void *hostPtr, // System memory address
size_t size // Size of allocated system memory
) {
Unimplemented();
return true;
}
);
//! Updates device memory from the owner's host allocation
void syncCacheFromHost(
VirtualGPU& gpu, //!< Virtual GPU device object
//! Synchronization flags
device::Memory::SyncFlags syncFlags = device::Memory::SyncFlags()
);
// Immediate blocking write from device cache to owners's backing store.
// Marks owner as "current" by resetting the last writer to NULL.
virtual void syncHostFromCache(SyncFlags syncFlags = SyncFlags())
{
// Need to revisit this when multi-devices is supported.
}
virtual void syncHostFromCache(SyncFlags syncFlags = SyncFlags());
//! Allocates host memory for synchronization with MGPU context
void mgpuCacheWriteBack();
// Releases indirect map surface
void releaseIndirectMap() { decIndMapCount(); }
@@ -78,6 +82,10 @@ class Memory : public device::Memory {
MEMORY_KIND getKind() const { return kind_; }
const roc::Device& dev() const { return dev_; }
size_t version() const { return version_; }
protected:
bool allocateMapMemory(size_t allocationSize);
@@ -102,13 +110,14 @@ class Memory : public device::Memory {
// Track if this memory is interop, lock, gart, or normal.
MEMORY_KIND kind_;
private:
private:
// Disable copy constructor
Memory(const Memory &);
// Disable operator=
Memory &operator=(const Memory &);
amd::Memory* pinnedMemory_; //!< Memory used as pinned system memory
};
class Buffer : public roc::Memory {
+86 -49
Просмотреть файл
@@ -261,11 +261,14 @@ VirtualGPU::processMemObjects(
}
}
else {
Memory* gpuMemory = static_cast<Memory*>(memory->getDeviceMemory(dev()));
if (NULL != gpuMemory) {
Memory* rocMemory = static_cast<Memory*>(memory->getDeviceMemory(dev()));
if (NULL != rocMemory) {
// Synchronize data with other memory instances if necessary
rocMemory->syncCacheFromHost(*this);
const static bool IsReadOnly = false;
// Validate SVM passed in the non argument list
memoryDependency().validate(*this, gpuMemory, IsReadOnly);
memoryDependency().validate(*this, rocMemory, IsReadOnly);
}
else {
return false;
@@ -305,6 +308,12 @@ VirtualGPU::processMemObjects(
else {
memory = static_cast<Memory*>(svmMem->getDeviceMemory(dev()));
}
// Don't sync for internal objects,
// since they are not shared between devices
if (memory->owner()->getVirtualDevice() == nullptr) {
// Synchronize data with other memory instances if necessary
memory->syncCacheFromHost(*this);
}
}
if (memory != NULL) {
@@ -480,6 +489,8 @@ VirtualGPU::VirtualGPU(Device &device)
VirtualGPU::~VirtualGPU()
{
releasePinnedMem();
if (timestamp_ != NULL) {
delete timestamp_;
timestamp_ = NULL;
@@ -821,7 +832,10 @@ void VirtualGPU::submitReadMemory(amd::ReadMemoryCommand &cmd)
// Find if virtual address is a CL allocation
device::Memory* hostMemory = dev().findMemoryFromVA(cmd.destination(), &offset);
device::Memory *devMem = cmd.source().getDeviceMemory(dev());
Memory* devMem = dev().getRocMemory(&cmd.source());
// Synchronize data with other memory instances if necessary
devMem->syncCacheFromHost(*this);
void *dst = cmd.destination();
amd::Coord3D size = cmd.size();
@@ -896,8 +910,14 @@ void VirtualGPU::submitWriteMemory(amd::WriteMemoryCommand &cmd)
// Find if virtual address is a CL allocation
device::Memory* hostMemory = dev().findMemoryFromVA(cmd.source(), &offset);
device::Memory *devMem = cmd.destination().getDeviceMemory(dev());
const char *src = static_cast<const char *>(cmd.source());
Memory* devMem = dev().getRocMemory(&cmd.destination());
// Synchronize memory from host if necessary
device::Memory::SyncFlags syncFlags;
syncFlags.skipEntire_ = cmd.isEntireMemory();
devMem->syncCacheFromHost(*this, syncFlags);
const char* src = static_cast<const char*>(cmd.source());
amd::Coord3D size = cmd.size();
//! @todo add multi-devices synchronization when supported.
@@ -1008,11 +1028,16 @@ void VirtualGPU::submitCopyMemory(amd::CopyMemoryCommand &cmd)
profilingBegin(cmd);
device::Memory *srcDevMem = cmd.source().getDeviceMemory(dev());
device::Memory *destDevMem = cmd.destination().getDeviceMemory(dev());
amd::Coord3D size = cmd.size();
Memory* srcDevMem = dev().getRocMemory(&cmd.source());
Memory* dstDevMem = dev().getRocMemory(&cmd.destination());
//! @todo add multi-devices synchronization when supported.
// Synchronize source and destination memory
device::Memory::SyncFlags syncFlags;
syncFlags.skipEntire_ = cmd.isEntireMemory();
dstDevMem->syncCacheFromHost(*this, syncFlags);
srcDevMem->syncCacheFromHost(*this);
amd::Coord3D size = cmd.size();
cl_command_type type = cmd.type();
bool result = false;
@@ -1051,31 +1076,31 @@ void VirtualGPU::submitCopyMemory(amd::CopyMemoryCommand &cmd)
}
result = blitMgr().copyBuffer(
*srcDevMem, *destDevMem, srcOrigin,
*srcDevMem, *dstDevMem, srcOrigin,
dstOrigin, size, cmd.isEntireMemory());
break;
}
case CL_COMMAND_COPY_BUFFER_RECT: {
result = blitMgr().copyBufferRect(
*srcDevMem, *destDevMem, cmd.srcRect(),
*srcDevMem, *dstDevMem, cmd.srcRect(),
cmd.dstRect(), size, cmd.isEntireMemory());
break;
}
case CL_COMMAND_COPY_IMAGE: {
result = blitMgr().copyImage(
*srcDevMem, *destDevMem, cmd.srcOrigin(),
*srcDevMem, *dstDevMem, cmd.srcOrigin(),
cmd.dstOrigin(), size, cmd.isEntireMemory());
break;
}
case CL_COMMAND_COPY_IMAGE_TO_BUFFER: {
result = blitMgr().copyImageToBuffer(
*srcDevMem, *destDevMem, cmd.srcOrigin(),
*srcDevMem, *dstDevMem, cmd.srcOrigin(),
cmd.dstOrigin(), size, cmd.isEntireMemory());
break;
}
case CL_COMMAND_COPY_BUFFER_TO_IMAGE: {
result = blitMgr().copyBufferToImage(
*srcDevMem, *destDevMem, cmd.srcOrigin(),
*srcDevMem, *dstDevMem, cmd.srcOrigin(),
cmd.dstOrigin(), size, cmd.isEntireMemory());
break;
}
@@ -1121,7 +1146,7 @@ void VirtualGPU::submitMapMemory(amd::MapMemoryCommand &cmd)
//! @todo add multi-devices synchronization when supported.
roc::Memory *devMemory = reinterpret_cast<roc::Memory *>(
roc::Memory* devMemory = reinterpret_cast<roc::Memory *>(
cmd.memory().getDeviceMemory(dev(), false));
cl_command_type type = cmd.type();
@@ -1139,12 +1164,17 @@ void VirtualGPU::submitMapMemory(amd::MapMemoryCommand &cmd)
mapFlag, cmd.isEntireMemory());
// Sync to the map target.
if (devMemory->isHostMemDirectAccess()) {
// Add memory to VA cache, so rutnime can detect direct access to VA
dev().addVACache(devMemory);
// If we have host memory, use it
if (devMemory->owner()->getHostMem() != nullptr) {
// Target is the backing store, so just ensure that owner is up-to-date
devMemory->owner()->cacheWriteBack();
if (devMemory->isHostMemDirectAccess()) {
// Add memory to VA cache, so rutnime can detect direct access to VA
dev().addVACache(devMemory);
}
}
if ((!devMemory->isHostMemDirectAccess()) &&
(mapFlag & (CL_MAP_READ | CL_MAP_WRITE))) {
else if (mapFlag & (CL_MAP_READ | CL_MAP_WRITE)) {
bool result = false;
roc::Memory *hsaMemory = static_cast<roc::Memory *>(devMemory);
@@ -1176,7 +1206,6 @@ void VirtualGPU::submitMapMemory(amd::MapMemoryCommand &cmd)
*hsaMemory, static_cast<char *>(hostPtr)+origin[0],
origin, size, cmd.isEntireMemory());
}
}
else if (type == CL_COMMAND_MAP_IMAGE) {
amd::Image* image = cmd.memory().asImage();
@@ -1225,11 +1254,19 @@ void VirtualGPU::submitUnmapMemory(amd::UnmapMemoryCommand &cmd)
// Force buffer write for IMAGE1D_BUFFER
bool imageBuffer = (cmd.memory().getType() == CL_MEM_OBJECT_IMAGE1D_BUFFER);
if (devMemory->isHostMemDirectAccess()) {
// Remove memory from VA cache
dev().removeVACache(devMemory);
// We used host memory
if (devMemory->owner()->getHostMem() != nullptr) {
if (mapInfo->isUnmapWrite()) {
// Target is the backing store, so sync
devMemory->owner()->signalWrite(nullptr);
devMemory->syncCacheFromHost(*this);
}
if (devMemory->isHostMemDirectAccess()) {
// Remove memory from VA cache
dev().removeVACache(devMemory);
}
}
if (mapInfo->isUnmapWrite()) {
else if (mapInfo->isUnmapWrite()) {
// Commit the changes made by the user.
if (!devMemory->isHostMemDirectAccess()) {
bool result = false;
@@ -1299,9 +1336,13 @@ void VirtualGPU::submitFillMemory(amd::FillMemoryCommand &cmd)
profilingBegin(cmd);
device::Memory *devMemory = cmd.memory().getDeviceMemory(dev(), false);
Memory* memory = dev().getRocMemory(&cmd.memory());
//! @todo add multi-devices synchronization when supported.
bool entire = cmd.isEntireMemory();
// Synchronize memory from host if necessary
device::Memory::SyncFlags syncFlags;
syncFlags.skipEntire_ = entire;
memory->syncCacheFromHost(*this, syncFlags);
cl_command_type type = cmd.type();
bool result = false;
@@ -1335,14 +1376,12 @@ void VirtualGPU::submitFillMemory(amd::FillMemoryCommand &cmd)
patternSize = elemSize;
}
result = blitMgr().fillBuffer(
*devMemory, pattern, patternSize, origin, size,
cmd.isEntireMemory());
*memory, pattern, patternSize, origin, size, entire);
break;
}
case CL_COMMAND_FILL_IMAGE: {
result = blitMgr().fillImage(
*devMemory, cmd.pattern(), cmd.origin(), cmd.size(),
cmd.isEntireMemory());
*memory, cmd.pattern(), cmd.origin(), cmd.size(), entire);
break;
}
default:
@@ -1367,21 +1406,21 @@ void VirtualGPU::submitMigrateMemObjects(amd::MigrateMemObjectsCommand &vcmd)
profilingBegin(vcmd);
std::vector<amd::Memory *>::const_iterator itr;
for (itr = vcmd.memObjects().begin();
itr != vcmd.memObjects().end();
itr++) {
for (auto itr : vcmd.memObjects()) {
// Find device memory
device::Memory *m = (*itr)->getDeviceMemory(dev());
roc::Memory *memory = static_cast<roc::Memory *>(m);
Memory* memory = dev().getRocMemory(&(*itr));
if (vcmd.migrationFlags() & CL_MIGRATE_MEM_OBJECT_HOST) {
//! @todo revisit this when multi devices is supported.
} else if (vcmd.migrationFlags() &
CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED) {
//! @todo revisit this when multi devices is supported.
} else {
memory->mgpuCacheWriteBack();
}
else if (vcmd.migrationFlags() & CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED) {
// Synchronize memory from host if necessary.
// The sync function will perform memory migration from
// another device if necessary
device::Memory::SyncFlags syncFlags;
memory->syncCacheFromHost(*this, syncFlags);
}
else {
LogWarning("Unknown operation for memory migration!");
}
}
@@ -1638,8 +1677,7 @@ VirtualGPU::submitKernelInternal(
argPtr = addArg(argPtr, &globalAddress, arg->size_, arg->alignment_);
//! @todo Compiler has to return read/write attributes
const cl_mem_flags flags = mem->getMemFlags();
if (!flags || (flags & (CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY))) {
if ((mem->getMemFlags() & CL_MEM_READ_ONLY) == 0) {
mem->signalWrite(&dev());
}
break;
@@ -1677,8 +1715,7 @@ VirtualGPU::submitKernelInternal(
}
//! @todo Compiler has to return read/write attributes
const cl_mem_flags flags = mem->getMemFlags();
if (!flags || (flags & (CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY))) {
if ((mem->getMemFlags() & CL_MEM_READ_ONLY) == 0) {
mem->signalWrite(&dev());
}
break;
@@ -1828,7 +1865,7 @@ void VirtualGPU::flush(amd::Command *list, bool wait)
{
releaseGpuMemoryFence();
updateCommandsState(list);
// Rlease all pinned memory
// Release all pinned memory
releasePinnedMem();
}
+10 -5
Просмотреть файл
@@ -125,6 +125,9 @@ Memory::Memory(
parent_->retain();
parent_->isParent_ = true;
if (parent.getHostMem() != nullptr) {
setHostMem(reinterpret_cast<address>(parent.getHostMem()) + origin);
}
// Inherit memory flags from the parent
if ((flags_ & (CL_MEM_READ_WRITE | CL_MEM_READ_ONLY |
CL_MEM_WRITE_ONLY)) == 0) {
@@ -407,7 +410,7 @@ Memory::~Memory()
// Release the parent.
if (NULL != parent_) {
// Update cache if runtime destroys a subbuffer
if (NULL != parent_->getHostMem()) {
if (NULL != parent_->getHostMem() && (vDev_ == NULL)) {
cacheWriteBack();
}
parent_->removeSubBuffer(this);
@@ -567,8 +570,9 @@ Pipe::initDeviceMemory()
Image::Image(
const Format& format,
Image& parent,
uint baseMipLevel)
: Memory(parent, 0, 0, parent.getWidth() * parent.getHeight() *
uint baseMipLevel,
cl_mem_flags flags)
: Memory(parent, flags, 0, parent.getWidth() * parent.getHeight() *
parent.getDepth() * format.getElementSize())
, impl_(format, Coord3D(parent.getWidth() *
parent.getImageFormat().getElementSize() /
@@ -1193,12 +1197,13 @@ Image::createView(
const Context& context,
const Format& format,
device::VirtualDevice* vDev,
uint baseMipLevel)
uint baseMipLevel,
cl_mem_flags flags)
{
Image* view = NULL;
// Find the image dimensions and create a corresponding object
view = new (context) Image(format, *this, baseMipLevel);
view = new (context) Image(format, *this, baseMipLevel, flags);
// Set GPU virtual device for this view
view->setVirtualDevice(vDev);
+5 -3
Просмотреть файл
@@ -170,7 +170,7 @@ protected:
bool isParent_; //!< This object is a parent
device::VirtualDevice* vDev_; //!< Memory object belongs to a virtual device only
bool forceSysMemAlloc_; //!< Forces system memory allocation
std::atomic_uint mapCount_; //!< Keep track of number of mappings for a memory object
std::atomic_uint mapCount_; //!< Keep track of number of mappings for a memory object
void * svmHostAddress_; //!< svm host address;
bool svmPtrCommited_; //!< svm host address committed flag;
bool canBeCached_; //!< flag to if the object can be cached;
@@ -516,7 +516,8 @@ protected:
Image(
const Format& format,
Image& parent,
uint baseMipLevel = 0);
uint baseMipLevel = 0,
cl_mem_flags flags = 0);
///! Initializes the device memory array which is nested
// after'Image' object in memory layout.
@@ -593,7 +594,8 @@ public:
const Context& context, //!< Context for a view creation
const Format& format, //!< The new format for a view
device::VirtualDevice* vDev, //!< Virtual device object
uint baseMipLevel = 0 //!< Base mip level for a view
uint baseMipLevel = 0, //!< Base mip level for a view
cl_mem_flags flags = 0 //!< Memory allocation flags
);
//! Returns the impl for this image.