P4 to Git Change 1524674 by gandryey@gera-w8 on 2018/03/08 17:26:08

SWDEV-79445 - OCL generic changes and code clean-up
	- Remove pinOffset_ field, since the pinning offset can be combined with global offset_ field

Affected files ...

... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#75 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#44 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.cpp#50 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.hpp#16 edit
Этот коммит содержится в:
foreman
2018-03-08 17:37:53 -05:00
родитель 3c0e80074b
Коммит 953d3c86e0
4 изменённых файлов: 41 добавлений и 116 удалений
+4 -56
Просмотреть файл
@@ -1538,63 +1538,11 @@ bool Device::createSampler(const amd::Sampler& owner, device::Sampler** sampler)
return true;
}
//! \note reallocMemory() must be called only from outside of
//! VirtualGPU submit commands methods.
//! Otherwise a deadlock in lockVgpus() is possible
//! \note reallocMemory() must be called only from outside of VirtualGPU submit
//! commands methods. Otherwise a deadlock in lockVgpus() is possible
bool Device::reallocMemory(amd::Memory& owner) const {
// For now we have to serialize reallocation code
amd::ScopedLock lk(*lockAsyncOps_);
// Read device memory after the lock,
// since realloc from another thread can replace the pointer
pal::Memory* gpuMemory = getGpuMemory(&owner);
if (gpuMemory == nullptr) {
return false;
}
if (gpuMemory->pinOffset() == 0) {
return true;
} else if (nullptr != owner.parent()) {
if (!reallocMemory(*owner.parent())) {
return false;
}
}
if (owner.asBuffer()) {
gpuMemory = createBuffer(owner, false);
} else if (owner.asImage()) {
return true;
} else {
LogError("Unknown memory type!");
}
if (gpuMemory != nullptr) {
pal::Memory* newMemory = gpuMemory;
pal::Memory* oldMemory = getGpuMemory(&owner);
// Transfer the object
if (oldMemory != nullptr) {
if (!oldMemory->moveTo(*newMemory)) {
delete newMemory;
return false;
}
}
// Attempt to pin system memory
if ((newMemory->memoryType() != Resource::Pinned) &&
((owner.getHostMem() != nullptr) ||
((nullptr != owner.parent()) && (owner.getHostMem() != nullptr)))) {
bool ok = newMemory->pinSystemMemory(owner.getHostMem(), (owner.getHostMemRef()->size())
? owner.getHostMemRef()->size()
: owner.getSize());
//! \note: Ignore the pinning result for now
}
return true;
}
return false;
// Empty body, since HSAIL path doesn't require memory reallocations
return true;
}
device::Memory* Device::createView(amd::Memory& owner, const device::Memory& parent) const {
+3 -6
Просмотреть файл
@@ -749,11 +749,8 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
}
bool HSAILKernel::validateMemory(uint idx, amd::Memory* amdMem) const {
// Check if memory doesn't require reallocation
bool noRealloc = true;
// amdMem->reallocedDeviceMemory(&dev()));
return noRealloc;
// HSAIL path doesn't require memory reallocations
return true;
}
const Device& HSAILKernel::dev() const { return reinterpret_cast<const Device&>(dev_); }
@@ -1026,7 +1023,7 @@ hsa_kernel_dispatch_packet_t* HSAILKernel::loadArguments(
}
//! 64 bit isn't supported with 32 bit binary
uint64_t globalAddress = gpuMem->vmAddress() + gpuMem->pinOffset();
uint64_t globalAddress = gpuMem->vmAddress();
WriteAqlArg(&aqlArgBuf, &globalAddress, arg->size_, arg->alignment_);
// Wait for resource if it was used on an inactive engine
+34 -50
Просмотреть файл
@@ -189,7 +189,6 @@ Resource::Resource(const Device& gpuDev, size_t size)
curRename_(0),
memRef_(nullptr),
viewOwner_(nullptr),
pinOffset_(0),
image_(nullptr),
hwSrd_(0) {
// Fill resource descriptor fields
@@ -226,7 +225,6 @@ Resource::Resource(const Device& gpuDev, size_t width, size_t height, size_t dep
curRename_(0),
memRef_(nullptr),
viewOwner_(nullptr),
pinOffset_(0),
image_(nullptr),
hwSrd_(0) {
// Fill resource descriptor fields
@@ -949,7 +947,6 @@ bool Resource::create(MemoryType memType, CreateParams* params) {
if (viewOwner_->data() != nullptr) {
address_ = viewOwner_->data() + view->offset_;
}
pinOffset_ = viewOwner_->pinOffset();
memRef_ = viewOwner_->memRef_;
memRef_->retain();
desc_.cardMemory_ = viewOwner_->desc().cardMemory_;
@@ -974,7 +971,7 @@ bool Resource::create(MemoryType memType, CreateParams* params) {
// Find the partial size for unaligned copy
hostMemOffset = static_cast<uint>(reinterpret_cast<const char*>(address_) - tmpHost);
pinOffset_ = hostMemOffset;
offset_ = hostMemOffset;
pinAddress = tmpHost;
@@ -1012,7 +1009,6 @@ bool Resource::create(MemoryType memType, CreateParams* params) {
memRef_ = GpuMemoryReference::Create(dev(), createInfo);
if (nullptr == memRef_) {
LogError("Failed PAL memory allocation!");
pinOffset_ = 0;
return false;
}
desc_.cardMemory_ = false;
@@ -1218,17 +1214,6 @@ bool Resource::partialMemCopyTo(VirtualGPU& gpu, const amd::Coord3D& srcOrigin,
assert(!(desc().cardMemory_ && dstResource.desc().cardMemory_) && "Unsupported configuraiton!");
size_t calSrcOrigin[3], calDstOrigin[3], calSize[3];
calSrcOrigin[0] = srcOrigin[0] + pinOffset();
calSrcOrigin[1] = srcOrigin[1];
calSrcOrigin[2] = srcOrigin[2];
calDstOrigin[0] = dstOrigin[0] + dstResource.pinOffset();
calDstOrigin[1] = dstOrigin[1];
calDstOrigin[2] = dstOrigin[2];
calSize[0] = size[0];
calSize[1] = size[1];
calSize[2] = size[2];
uint64_t gpuMemoryOffset = 0;
uint64_t gpuMemoryRowPitch = 0;
uint64_t imageOffsetx = 0;
@@ -1236,16 +1221,16 @@ bool Resource::partialMemCopyTo(VirtualGPU& gpu, const amd::Coord3D& srcOrigin,
bool img2Darray = false;
if (desc().buffer_ && !dstResource.desc().buffer_) {
imageOffsetx = calDstOrigin[0] % dstResource.elementSize();
gpuMemoryOffset = calSrcOrigin[0] + offset();
imageOffsetx = dstOrigin[0] % dstResource.elementSize();
gpuMemoryOffset = srcOrigin[0] + offset();
gpuMemoryRowPitch =
(calSrcOrigin[1]) ? calSrcOrigin[1] : calSize[0] * dstResource.elementSize();
(srcOrigin[1]) ? srcOrigin[1] : size[0] * dstResource.elementSize();
img1Darray = (dstResource.desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY);
img2Darray = (dstResource.desc().topology_ == CL_MEM_OBJECT_IMAGE2D_ARRAY);
} else if (!desc().buffer_ && dstResource.desc().buffer_) {
imageOffsetx = calSrcOrigin[0] % elementSize();
gpuMemoryOffset = calDstOrigin[0] + dstResource.offset();
gpuMemoryRowPitch = (calDstOrigin[1]) ? calDstOrigin[1] : calSize[0] * elementSize();
imageOffsetx = srcOrigin[0] % elementSize();
gpuMemoryOffset = dstOrigin[0] + dstResource.offset();
gpuMemoryRowPitch = (dstOrigin[1]) ? dstOrigin[1] : size[0] * elementSize();
img1Darray = (desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY);
img2Darray = (desc().topology_ == CL_MEM_OBJECT_IMAGE2D_ARRAY);
}
@@ -1280,12 +1265,12 @@ bool Resource::partialMemCopyTo(VirtualGPU& gpu, const amd::Coord3D& srcOrigin,
Pal::SubresId ImgSubresId = {Pal::ImageAspect::Color, dstResource.desc().baseLevel_, 0};
Pal::MemoryImageCopyRegion copyRegion = {};
copyRegion.imageSubres = ImgSubresId;
copyRegion.imageOffset.x = calDstOrigin[0];
copyRegion.imageOffset.y = calDstOrigin[1];
copyRegion.imageOffset.z = calDstOrigin[2];
copyRegion.imageExtent.width = calSize[0];
copyRegion.imageExtent.height = calSize[1];
copyRegion.imageExtent.depth = calSize[2];
copyRegion.imageOffset.x = dstOrigin[0];
copyRegion.imageOffset.y = dstOrigin[1];
copyRegion.imageOffset.z = dstOrigin[2];
copyRegion.imageExtent.width = size[0];
copyRegion.imageExtent.height = size[1];
copyRegion.imageExtent.depth = size[2];
copyRegion.numSlices = 1;
if (img1Darray) {
copyRegion.numSlices = copyRegion.imageExtent.height;
@@ -1296,20 +1281,20 @@ bool Resource::partialMemCopyTo(VirtualGPU& gpu, const amd::Coord3D& srcOrigin,
}
copyRegion.gpuMemoryOffset = gpuMemoryOffset;
copyRegion.gpuMemoryRowPitch = gpuMemoryRowPitch;
copyRegion.gpuMemoryDepthPitch = (calSrcOrigin[2])
? calSrcOrigin[2]
copyRegion.gpuMemoryDepthPitch = (srcOrigin[2])
? srcOrigin[2]
: copyRegion.gpuMemoryRowPitch * copyRegion.imageExtent.height;
gpu.iCmd()->CmdCopyMemoryToImage(*iMem(), *dstResource.image_, imgLayout, 1, &copyRegion);
} else if (!desc().buffer_ && dstResource.desc().buffer_) {
Pal::MemoryImageCopyRegion copyRegion = {};
Pal::SubresId ImgSubresId = {Pal::ImageAspect::Color, desc().baseLevel_, 0};
copyRegion.imageSubres = ImgSubresId;
copyRegion.imageOffset.x = calSrcOrigin[0];
copyRegion.imageOffset.y = calSrcOrigin[1];
copyRegion.imageOffset.z = calSrcOrigin[2];
copyRegion.imageExtent.width = calSize[0];
copyRegion.imageExtent.height = calSize[1];
copyRegion.imageExtent.depth = calSize[2];
copyRegion.imageOffset.x = srcOrigin[0];
copyRegion.imageOffset.y = srcOrigin[1];
copyRegion.imageOffset.z = srcOrigin[2];
copyRegion.imageExtent.width = size[0];
copyRegion.imageExtent.height = size[1];
copyRegion.imageExtent.depth = size[2];
copyRegion.numSlices = 1;
if (img1Darray) {
copyRegion.numSlices = copyRegion.imageExtent.height;
@@ -1320,8 +1305,7 @@ bool Resource::partialMemCopyTo(VirtualGPU& gpu, const amd::Coord3D& srcOrigin,
}
copyRegion.gpuMemoryOffset = gpuMemoryOffset;
copyRegion.gpuMemoryRowPitch = gpuMemoryRowPitch;
copyRegion.gpuMemoryDepthPitch = (calDstOrigin[2])
? calDstOrigin[2]
copyRegion.gpuMemoryDepthPitch = (dstOrigin[2]) ? dstOrigin[2]
: copyRegion.gpuMemoryRowPitch * copyRegion.imageExtent.height;
gpu.iCmd()->CmdCopyImageToMemory(*image_, imgLayout, *dstResource.iMem(), 1, &copyRegion);
} else {
@@ -1331,23 +1315,23 @@ bool Resource::partialMemCopyTo(VirtualGPU& gpu, const amd::Coord3D& srcOrigin,
Pal::ChannelSwizzle::Z, Pal::ChannelSwizzle::W};
copyRegion.srcBuffer.swizzledFormat.format = ChannelFmt(bytesPerElement);
copyRegion.srcBuffer.swizzledFormat.swizzle = channels;
copyRegion.srcBuffer.offset = calSrcOrigin[0] + offset();
copyRegion.srcBuffer.rowPitch = calSrcOrigin[1];
copyRegion.srcBuffer.depthPitch = calSrcOrigin[2];
copyRegion.extent.width = calSize[0] / bytesPerElement;
copyRegion.extent.height = calSize[1];
copyRegion.extent.depth = calSize[2];
copyRegion.srcBuffer.offset = srcOrigin[0] + offset();
copyRegion.srcBuffer.rowPitch = srcOrigin[1];
copyRegion.srcBuffer.depthPitch = srcOrigin[2];
copyRegion.extent.width = size[0] / bytesPerElement;
copyRegion.extent.height = size[1];
copyRegion.extent.depth = size[2];
copyRegion.dstBuffer.swizzledFormat.format = ChannelFmt(bytesPerElement);
copyRegion.dstBuffer.swizzledFormat.swizzle = channels;
copyRegion.dstBuffer.offset = calDstOrigin[0] + dstResource.offset();
copyRegion.dstBuffer.rowPitch = calDstOrigin[1];
copyRegion.dstBuffer.depthPitch = calDstOrigin[2];
copyRegion.dstBuffer.offset = dstOrigin[0] + dstResource.offset();
copyRegion.dstBuffer.rowPitch = dstOrigin[1];
copyRegion.dstBuffer.depthPitch = dstOrigin[2];
gpu.iCmd()->CmdCopyTypedBuffer(*iMem(), *dstResource.iMem(), 1, &copyRegion);
} else {
Pal::MemoryCopyRegion copyRegion = {};
copyRegion.srcOffset = calSrcOrigin[0] + offset();
copyRegion.dstOffset = calDstOrigin[0] + dstResource.offset();
copyRegion.copySize = calSize[0];
copyRegion.srcOffset = srcOrigin[0] + offset();
copyRegion.dstOffset = dstOrigin[0] + dstResource.offset();
copyRegion.copySize = size[0];
gpu.iCmd()->CmdCopyMemory(*iMem(), *dstResource.iMem(), 1, &copyRegion);
}
}
-4
Просмотреть файл
@@ -247,9 +247,6 @@ class Resource : public amd::HeapObject {
//! Returns the offset in GPU memory for aliases
size_t offset() const { return offset_; }
//! Returns the pinned memory offset
uint64_t pinOffset() const { return pinOffset_; }
//! Returns the GPU device that owns this resource
const Device& dev() const { return gpuDevice_; }
@@ -428,7 +425,6 @@ class Resource : public amd::HeapObject {
RenameList renames_; //!< Rename resource list
GpuMemoryReference* memRef_; //!< PAL resource reference
const Resource* viewOwner_; //!< GPU resource, which owns this view
uint64_t pinOffset_; //!< Pinned memory offset
void* glInteropMbRes_; //!< Mb Res handle
uint32_t glType_; //!< GL interop type
void* glPlatformContext_;