From 953d3c86e06e1ca655e43c8a475e19eaab4ab115 Mon Sep 17 00:00:00 2001
From: foreman
Date: Thu, 8 Mar 2018 17:37:53 -0500
Subject: [PATCH] P4 to Git Change 1524674 by gandryey@gera-w8 on 2018/03/08
17:26:08
SWDEV-79445 - OCL generic changes and code clean-up
- Remove pinOffset_ field, since the pinning offset can be combined with global offset_ field
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#75 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#44 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.cpp#50 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.hpp#16 edit
---
rocclr/runtime/device/pal/paldevice.cpp | 60 ++--------------
rocclr/runtime/device/pal/palkernel.cpp | 9 +--
rocclr/runtime/device/pal/palresource.cpp | 84 +++++++++--------------
rocclr/runtime/device/pal/palresource.hpp | 4 --
4 files changed, 41 insertions(+), 116 deletions(-)
diff --git a/rocclr/runtime/device/pal/paldevice.cpp b/rocclr/runtime/device/pal/paldevice.cpp
index 916e369d09..b8c59aa5df 100644
--- a/rocclr/runtime/device/pal/paldevice.cpp
+++ b/rocclr/runtime/device/pal/paldevice.cpp
@@ -1538,63 +1538,11 @@ bool Device::createSampler(const amd::Sampler& owner, device::Sampler** sampler)
return true;
}
-//! \note reallocMemory() must be called only from outside of
-//! VirtualGPU submit commands methods.
-//! Otherwise a deadlock in lockVgpus() is possible
-
+//! \note reallocMemory() must be called only from outside of VirtualGPU submit
+//! commands methods. Otherwise a deadlock in lockVgpus() is possible
bool Device::reallocMemory(amd::Memory& owner) const {
- // For now we have to serialize reallocation code
- amd::ScopedLock lk(*lockAsyncOps_);
-
- // Read device memory after the lock,
- // since realloc from another thread can replace the pointer
- pal::Memory* gpuMemory = getGpuMemory(&owner);
- if (gpuMemory == nullptr) {
- return false;
- }
-
- if (gpuMemory->pinOffset() == 0) {
- return true;
- } else if (nullptr != owner.parent()) {
- if (!reallocMemory(*owner.parent())) {
- return false;
- }
- }
-
- if (owner.asBuffer()) {
- gpuMemory = createBuffer(owner, false);
- } else if (owner.asImage()) {
- return true;
- } else {
- LogError("Unknown memory type!");
- }
-
- if (gpuMemory != nullptr) {
- pal::Memory* newMemory = gpuMemory;
- pal::Memory* oldMemory = getGpuMemory(&owner);
-
- // Transfer the object
- if (oldMemory != nullptr) {
- if (!oldMemory->moveTo(*newMemory)) {
- delete newMemory;
- return false;
- }
- }
-
- // Attempt to pin system memory
- if ((newMemory->memoryType() != Resource::Pinned) &&
- ((owner.getHostMem() != nullptr) ||
- ((nullptr != owner.parent()) && (owner.getHostMem() != nullptr)))) {
- bool ok = newMemory->pinSystemMemory(owner.getHostMem(), (owner.getHostMemRef()->size())
- ? owner.getHostMemRef()->size()
- : owner.getSize());
- //! \note: Ignore the pinning result for now
- }
-
- return true;
- }
-
- return false;
+ // Empty body, since HSAIL path doesn't require memory reallocations
+ return true;
}
device::Memory* Device::createView(amd::Memory& owner, const device::Memory& parent) const {
diff --git a/rocclr/runtime/device/pal/palkernel.cpp b/rocclr/runtime/device/pal/palkernel.cpp
index e0369c1c0a..b7e9302cd7 100644
--- a/rocclr/runtime/device/pal/palkernel.cpp
+++ b/rocclr/runtime/device/pal/palkernel.cpp
@@ -749,11 +749,8 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
}
bool HSAILKernel::validateMemory(uint idx, amd::Memory* amdMem) const {
- // Check if memory doesn't require reallocation
- bool noRealloc = true;
- // amdMem->reallocedDeviceMemory(&dev()));
-
- return noRealloc;
+ // HSAIL path doesn't require memory reallocations
+ return true;
}
const Device& HSAILKernel::dev() const { return reinterpret_cast(dev_); }
@@ -1026,7 +1023,7 @@ hsa_kernel_dispatch_packet_t* HSAILKernel::loadArguments(
}
//! 64 bit isn't supported with 32 bit binary
- uint64_t globalAddress = gpuMem->vmAddress() + gpuMem->pinOffset();
+ uint64_t globalAddress = gpuMem->vmAddress();
WriteAqlArg(&aqlArgBuf, &globalAddress, arg->size_, arg->alignment_);
// Wait for resource if it was used on an inactive engine
diff --git a/rocclr/runtime/device/pal/palresource.cpp b/rocclr/runtime/device/pal/palresource.cpp
index 1af120863d..9e67e72b7b 100644
--- a/rocclr/runtime/device/pal/palresource.cpp
+++ b/rocclr/runtime/device/pal/palresource.cpp
@@ -189,7 +189,6 @@ Resource::Resource(const Device& gpuDev, size_t size)
curRename_(0),
memRef_(nullptr),
viewOwner_(nullptr),
- pinOffset_(0),
image_(nullptr),
hwSrd_(0) {
// Fill resource descriptor fields
@@ -226,7 +225,6 @@ Resource::Resource(const Device& gpuDev, size_t width, size_t height, size_t dep
curRename_(0),
memRef_(nullptr),
viewOwner_(nullptr),
- pinOffset_(0),
image_(nullptr),
hwSrd_(0) {
// Fill resource descriptor fields
@@ -949,7 +947,6 @@ bool Resource::create(MemoryType memType, CreateParams* params) {
if (viewOwner_->data() != nullptr) {
address_ = viewOwner_->data() + view->offset_;
}
- pinOffset_ = viewOwner_->pinOffset();
memRef_ = viewOwner_->memRef_;
memRef_->retain();
desc_.cardMemory_ = viewOwner_->desc().cardMemory_;
@@ -974,7 +971,7 @@ bool Resource::create(MemoryType memType, CreateParams* params) {
// Find the partial size for unaligned copy
hostMemOffset = static_cast(reinterpret_cast(address_) - tmpHost);
- pinOffset_ = hostMemOffset;
+ offset_ = hostMemOffset;
pinAddress = tmpHost;
@@ -1012,7 +1009,6 @@ bool Resource::create(MemoryType memType, CreateParams* params) {
memRef_ = GpuMemoryReference::Create(dev(), createInfo);
if (nullptr == memRef_) {
LogError("Failed PAL memory allocation!");
- pinOffset_ = 0;
return false;
}
desc_.cardMemory_ = false;
@@ -1218,17 +1214,6 @@ bool Resource::partialMemCopyTo(VirtualGPU& gpu, const amd::Coord3D& srcOrigin,
assert(!(desc().cardMemory_ && dstResource.desc().cardMemory_) && "Unsupported configuraiton!");
- size_t calSrcOrigin[3], calDstOrigin[3], calSize[3];
- calSrcOrigin[0] = srcOrigin[0] + pinOffset();
- calSrcOrigin[1] = srcOrigin[1];
- calSrcOrigin[2] = srcOrigin[2];
- calDstOrigin[0] = dstOrigin[0] + dstResource.pinOffset();
- calDstOrigin[1] = dstOrigin[1];
- calDstOrigin[2] = dstOrigin[2];
- calSize[0] = size[0];
- calSize[1] = size[1];
- calSize[2] = size[2];
-
uint64_t gpuMemoryOffset = 0;
uint64_t gpuMemoryRowPitch = 0;
uint64_t imageOffsetx = 0;
@@ -1236,16 +1221,16 @@ bool Resource::partialMemCopyTo(VirtualGPU& gpu, const amd::Coord3D& srcOrigin,
bool img2Darray = false;
if (desc().buffer_ && !dstResource.desc().buffer_) {
- imageOffsetx = calDstOrigin[0] % dstResource.elementSize();
- gpuMemoryOffset = calSrcOrigin[0] + offset();
+ imageOffsetx = dstOrigin[0] % dstResource.elementSize();
+ gpuMemoryOffset = srcOrigin[0] + offset();
gpuMemoryRowPitch =
- (calSrcOrigin[1]) ? calSrcOrigin[1] : calSize[0] * dstResource.elementSize();
+ (srcOrigin[1]) ? srcOrigin[1] : size[0] * dstResource.elementSize();
img1Darray = (dstResource.desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY);
img2Darray = (dstResource.desc().topology_ == CL_MEM_OBJECT_IMAGE2D_ARRAY);
} else if (!desc().buffer_ && dstResource.desc().buffer_) {
- imageOffsetx = calSrcOrigin[0] % elementSize();
- gpuMemoryOffset = calDstOrigin[0] + dstResource.offset();
- gpuMemoryRowPitch = (calDstOrigin[1]) ? calDstOrigin[1] : calSize[0] * elementSize();
+ imageOffsetx = srcOrigin[0] % elementSize();
+ gpuMemoryOffset = dstOrigin[0] + dstResource.offset();
+ gpuMemoryRowPitch = (dstOrigin[1]) ? dstOrigin[1] : size[0] * elementSize();
img1Darray = (desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY);
img2Darray = (desc().topology_ == CL_MEM_OBJECT_IMAGE2D_ARRAY);
}
@@ -1280,12 +1265,12 @@ bool Resource::partialMemCopyTo(VirtualGPU& gpu, const amd::Coord3D& srcOrigin,
Pal::SubresId ImgSubresId = {Pal::ImageAspect::Color, dstResource.desc().baseLevel_, 0};
Pal::MemoryImageCopyRegion copyRegion = {};
copyRegion.imageSubres = ImgSubresId;
- copyRegion.imageOffset.x = calDstOrigin[0];
- copyRegion.imageOffset.y = calDstOrigin[1];
- copyRegion.imageOffset.z = calDstOrigin[2];
- copyRegion.imageExtent.width = calSize[0];
- copyRegion.imageExtent.height = calSize[1];
- copyRegion.imageExtent.depth = calSize[2];
+ copyRegion.imageOffset.x = dstOrigin[0];
+ copyRegion.imageOffset.y = dstOrigin[1];
+ copyRegion.imageOffset.z = dstOrigin[2];
+ copyRegion.imageExtent.width = size[0];
+ copyRegion.imageExtent.height = size[1];
+ copyRegion.imageExtent.depth = size[2];
copyRegion.numSlices = 1;
if (img1Darray) {
copyRegion.numSlices = copyRegion.imageExtent.height;
@@ -1296,20 +1281,20 @@ bool Resource::partialMemCopyTo(VirtualGPU& gpu, const amd::Coord3D& srcOrigin,
}
copyRegion.gpuMemoryOffset = gpuMemoryOffset;
copyRegion.gpuMemoryRowPitch = gpuMemoryRowPitch;
- copyRegion.gpuMemoryDepthPitch = (calSrcOrigin[2])
- ? calSrcOrigin[2]
+ copyRegion.gpuMemoryDepthPitch = (srcOrigin[2])
+ ? srcOrigin[2]
: copyRegion.gpuMemoryRowPitch * copyRegion.imageExtent.height;
gpu.iCmd()->CmdCopyMemoryToImage(*iMem(), *dstResource.image_, imgLayout, 1, ©Region);
} else if (!desc().buffer_ && dstResource.desc().buffer_) {
Pal::MemoryImageCopyRegion copyRegion = {};
Pal::SubresId ImgSubresId = {Pal::ImageAspect::Color, desc().baseLevel_, 0};
copyRegion.imageSubres = ImgSubresId;
- copyRegion.imageOffset.x = calSrcOrigin[0];
- copyRegion.imageOffset.y = calSrcOrigin[1];
- copyRegion.imageOffset.z = calSrcOrigin[2];
- copyRegion.imageExtent.width = calSize[0];
- copyRegion.imageExtent.height = calSize[1];
- copyRegion.imageExtent.depth = calSize[2];
+ copyRegion.imageOffset.x = srcOrigin[0];
+ copyRegion.imageOffset.y = srcOrigin[1];
+ copyRegion.imageOffset.z = srcOrigin[2];
+ copyRegion.imageExtent.width = size[0];
+ copyRegion.imageExtent.height = size[1];
+ copyRegion.imageExtent.depth = size[2];
copyRegion.numSlices = 1;
if (img1Darray) {
copyRegion.numSlices = copyRegion.imageExtent.height;
@@ -1320,8 +1305,7 @@ bool Resource::partialMemCopyTo(VirtualGPU& gpu, const amd::Coord3D& srcOrigin,
}
copyRegion.gpuMemoryOffset = gpuMemoryOffset;
copyRegion.gpuMemoryRowPitch = gpuMemoryRowPitch;
- copyRegion.gpuMemoryDepthPitch = (calDstOrigin[2])
- ? calDstOrigin[2]
+ copyRegion.gpuMemoryDepthPitch = (dstOrigin[2]) ? dstOrigin[2]
: copyRegion.gpuMemoryRowPitch * copyRegion.imageExtent.height;
gpu.iCmd()->CmdCopyImageToMemory(*image_, imgLayout, *dstResource.iMem(), 1, ©Region);
} else {
@@ -1331,23 +1315,23 @@ bool Resource::partialMemCopyTo(VirtualGPU& gpu, const amd::Coord3D& srcOrigin,
Pal::ChannelSwizzle::Z, Pal::ChannelSwizzle::W};
copyRegion.srcBuffer.swizzledFormat.format = ChannelFmt(bytesPerElement);
copyRegion.srcBuffer.swizzledFormat.swizzle = channels;
- copyRegion.srcBuffer.offset = calSrcOrigin[0] + offset();
- copyRegion.srcBuffer.rowPitch = calSrcOrigin[1];
- copyRegion.srcBuffer.depthPitch = calSrcOrigin[2];
- copyRegion.extent.width = calSize[0] / bytesPerElement;
- copyRegion.extent.height = calSize[1];
- copyRegion.extent.depth = calSize[2];
+ copyRegion.srcBuffer.offset = srcOrigin[0] + offset();
+ copyRegion.srcBuffer.rowPitch = srcOrigin[1];
+ copyRegion.srcBuffer.depthPitch = srcOrigin[2];
+ copyRegion.extent.width = size[0] / bytesPerElement;
+ copyRegion.extent.height = size[1];
+ copyRegion.extent.depth = size[2];
copyRegion.dstBuffer.swizzledFormat.format = ChannelFmt(bytesPerElement);
copyRegion.dstBuffer.swizzledFormat.swizzle = channels;
- copyRegion.dstBuffer.offset = calDstOrigin[0] + dstResource.offset();
- copyRegion.dstBuffer.rowPitch = calDstOrigin[1];
- copyRegion.dstBuffer.depthPitch = calDstOrigin[2];
+ copyRegion.dstBuffer.offset = dstOrigin[0] + dstResource.offset();
+ copyRegion.dstBuffer.rowPitch = dstOrigin[1];
+ copyRegion.dstBuffer.depthPitch = dstOrigin[2];
gpu.iCmd()->CmdCopyTypedBuffer(*iMem(), *dstResource.iMem(), 1, ©Region);
} else {
Pal::MemoryCopyRegion copyRegion = {};
- copyRegion.srcOffset = calSrcOrigin[0] + offset();
- copyRegion.dstOffset = calDstOrigin[0] + dstResource.offset();
- copyRegion.copySize = calSize[0];
+ copyRegion.srcOffset = srcOrigin[0] + offset();
+ copyRegion.dstOffset = dstOrigin[0] + dstResource.offset();
+ copyRegion.copySize = size[0];
gpu.iCmd()->CmdCopyMemory(*iMem(), *dstResource.iMem(), 1, ©Region);
}
}
diff --git a/rocclr/runtime/device/pal/palresource.hpp b/rocclr/runtime/device/pal/palresource.hpp
index b315fbcae4..0118681006 100644
--- a/rocclr/runtime/device/pal/palresource.hpp
+++ b/rocclr/runtime/device/pal/palresource.hpp
@@ -247,9 +247,6 @@ class Resource : public amd::HeapObject {
//! Returns the offset in GPU memory for aliases
size_t offset() const { return offset_; }
- //! Returns the pinned memory offset
- uint64_t pinOffset() const { return pinOffset_; }
-
//! Returns the GPU device that owns this resource
const Device& dev() const { return gpuDevice_; }
@@ -428,7 +425,6 @@ class Resource : public amd::HeapObject {
RenameList renames_; //!< Rename resource list
GpuMemoryReference* memRef_; //!< PAL resource reference
const Resource* viewOwner_; //!< GPU resource, which owns this view
- uint64_t pinOffset_; //!< Pinned memory offset
void* glInteropMbRes_; //!< Mb Res handle
uint32_t glType_; //!< GL interop type
void* glPlatformContext_;