P4 to Git Change 1524674 by gandryey@gera-w8 on 2018/03/08 17:26:08
SWDEV-79445 - OCL generic changes and code clean-up - Remove pinOffset_ field, since the pinning offset can be combined with global offset_ field Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#75 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#44 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.cpp#50 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.hpp#16 edit
Этот коммит содержится в:
@@ -1538,63 +1538,11 @@ bool Device::createSampler(const amd::Sampler& owner, device::Sampler** sampler)
|
||||
return true;
|
||||
}
|
||||
|
||||
//! \note reallocMemory() must be called only from outside of
|
||||
//! VirtualGPU submit commands methods.
|
||||
//! Otherwise a deadlock in lockVgpus() is possible
|
||||
|
||||
//! \note reallocMemory() must be called only from outside of VirtualGPU submit
|
||||
//! commands methods. Otherwise a deadlock in lockVgpus() is possible
|
||||
bool Device::reallocMemory(amd::Memory& owner) const {
|
||||
// For now we have to serialize reallocation code
|
||||
amd::ScopedLock lk(*lockAsyncOps_);
|
||||
|
||||
// Read device memory after the lock,
|
||||
// since realloc from another thread can replace the pointer
|
||||
pal::Memory* gpuMemory = getGpuMemory(&owner);
|
||||
if (gpuMemory == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (gpuMemory->pinOffset() == 0) {
|
||||
return true;
|
||||
} else if (nullptr != owner.parent()) {
|
||||
if (!reallocMemory(*owner.parent())) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (owner.asBuffer()) {
|
||||
gpuMemory = createBuffer(owner, false);
|
||||
} else if (owner.asImage()) {
|
||||
return true;
|
||||
} else {
|
||||
LogError("Unknown memory type!");
|
||||
}
|
||||
|
||||
if (gpuMemory != nullptr) {
|
||||
pal::Memory* newMemory = gpuMemory;
|
||||
pal::Memory* oldMemory = getGpuMemory(&owner);
|
||||
|
||||
// Transfer the object
|
||||
if (oldMemory != nullptr) {
|
||||
if (!oldMemory->moveTo(*newMemory)) {
|
||||
delete newMemory;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Attempt to pin system memory
|
||||
if ((newMemory->memoryType() != Resource::Pinned) &&
|
||||
((owner.getHostMem() != nullptr) ||
|
||||
((nullptr != owner.parent()) && (owner.getHostMem() != nullptr)))) {
|
||||
bool ok = newMemory->pinSystemMemory(owner.getHostMem(), (owner.getHostMemRef()->size())
|
||||
? owner.getHostMemRef()->size()
|
||||
: owner.getSize());
|
||||
//! \note: Ignore the pinning result for now
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
// Empty body, since HSAIL path doesn't require memory reallocations
|
||||
return true;
|
||||
}
|
||||
|
||||
device::Memory* Device::createView(amd::Memory& owner, const device::Memory& parent) const {
|
||||
|
||||
@@ -749,11 +749,8 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
|
||||
}
|
||||
|
||||
bool HSAILKernel::validateMemory(uint idx, amd::Memory* amdMem) const {
|
||||
// Check if memory doesn't require reallocation
|
||||
bool noRealloc = true;
|
||||
// amdMem->reallocedDeviceMemory(&dev()));
|
||||
|
||||
return noRealloc;
|
||||
// HSAIL path doesn't require memory reallocations
|
||||
return true;
|
||||
}
|
||||
|
||||
const Device& HSAILKernel::dev() const { return reinterpret_cast<const Device&>(dev_); }
|
||||
@@ -1026,7 +1023,7 @@ hsa_kernel_dispatch_packet_t* HSAILKernel::loadArguments(
|
||||
}
|
||||
|
||||
//! 64 bit isn't supported with 32 bit binary
|
||||
uint64_t globalAddress = gpuMem->vmAddress() + gpuMem->pinOffset();
|
||||
uint64_t globalAddress = gpuMem->vmAddress();
|
||||
WriteAqlArg(&aqlArgBuf, &globalAddress, arg->size_, arg->alignment_);
|
||||
|
||||
// Wait for resource if it was used on an inactive engine
|
||||
|
||||
@@ -189,7 +189,6 @@ Resource::Resource(const Device& gpuDev, size_t size)
|
||||
curRename_(0),
|
||||
memRef_(nullptr),
|
||||
viewOwner_(nullptr),
|
||||
pinOffset_(0),
|
||||
image_(nullptr),
|
||||
hwSrd_(0) {
|
||||
// Fill resource descriptor fields
|
||||
@@ -226,7 +225,6 @@ Resource::Resource(const Device& gpuDev, size_t width, size_t height, size_t dep
|
||||
curRename_(0),
|
||||
memRef_(nullptr),
|
||||
viewOwner_(nullptr),
|
||||
pinOffset_(0),
|
||||
image_(nullptr),
|
||||
hwSrd_(0) {
|
||||
// Fill resource descriptor fields
|
||||
@@ -949,7 +947,6 @@ bool Resource::create(MemoryType memType, CreateParams* params) {
|
||||
if (viewOwner_->data() != nullptr) {
|
||||
address_ = viewOwner_->data() + view->offset_;
|
||||
}
|
||||
pinOffset_ = viewOwner_->pinOffset();
|
||||
memRef_ = viewOwner_->memRef_;
|
||||
memRef_->retain();
|
||||
desc_.cardMemory_ = viewOwner_->desc().cardMemory_;
|
||||
@@ -974,7 +971,7 @@ bool Resource::create(MemoryType memType, CreateParams* params) {
|
||||
// Find the partial size for unaligned copy
|
||||
hostMemOffset = static_cast<uint>(reinterpret_cast<const char*>(address_) - tmpHost);
|
||||
|
||||
pinOffset_ = hostMemOffset;
|
||||
offset_ = hostMemOffset;
|
||||
|
||||
pinAddress = tmpHost;
|
||||
|
||||
@@ -1012,7 +1009,6 @@ bool Resource::create(MemoryType memType, CreateParams* params) {
|
||||
memRef_ = GpuMemoryReference::Create(dev(), createInfo);
|
||||
if (nullptr == memRef_) {
|
||||
LogError("Failed PAL memory allocation!");
|
||||
pinOffset_ = 0;
|
||||
return false;
|
||||
}
|
||||
desc_.cardMemory_ = false;
|
||||
@@ -1218,17 +1214,6 @@ bool Resource::partialMemCopyTo(VirtualGPU& gpu, const amd::Coord3D& srcOrigin,
|
||||
|
||||
assert(!(desc().cardMemory_ && dstResource.desc().cardMemory_) && "Unsupported configuraiton!");
|
||||
|
||||
size_t calSrcOrigin[3], calDstOrigin[3], calSize[3];
|
||||
calSrcOrigin[0] = srcOrigin[0] + pinOffset();
|
||||
calSrcOrigin[1] = srcOrigin[1];
|
||||
calSrcOrigin[2] = srcOrigin[2];
|
||||
calDstOrigin[0] = dstOrigin[0] + dstResource.pinOffset();
|
||||
calDstOrigin[1] = dstOrigin[1];
|
||||
calDstOrigin[2] = dstOrigin[2];
|
||||
calSize[0] = size[0];
|
||||
calSize[1] = size[1];
|
||||
calSize[2] = size[2];
|
||||
|
||||
uint64_t gpuMemoryOffset = 0;
|
||||
uint64_t gpuMemoryRowPitch = 0;
|
||||
uint64_t imageOffsetx = 0;
|
||||
@@ -1236,16 +1221,16 @@ bool Resource::partialMemCopyTo(VirtualGPU& gpu, const amd::Coord3D& srcOrigin,
|
||||
bool img2Darray = false;
|
||||
|
||||
if (desc().buffer_ && !dstResource.desc().buffer_) {
|
||||
imageOffsetx = calDstOrigin[0] % dstResource.elementSize();
|
||||
gpuMemoryOffset = calSrcOrigin[0] + offset();
|
||||
imageOffsetx = dstOrigin[0] % dstResource.elementSize();
|
||||
gpuMemoryOffset = srcOrigin[0] + offset();
|
||||
gpuMemoryRowPitch =
|
||||
(calSrcOrigin[1]) ? calSrcOrigin[1] : calSize[0] * dstResource.elementSize();
|
||||
(srcOrigin[1]) ? srcOrigin[1] : size[0] * dstResource.elementSize();
|
||||
img1Darray = (dstResource.desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY);
|
||||
img2Darray = (dstResource.desc().topology_ == CL_MEM_OBJECT_IMAGE2D_ARRAY);
|
||||
} else if (!desc().buffer_ && dstResource.desc().buffer_) {
|
||||
imageOffsetx = calSrcOrigin[0] % elementSize();
|
||||
gpuMemoryOffset = calDstOrigin[0] + dstResource.offset();
|
||||
gpuMemoryRowPitch = (calDstOrigin[1]) ? calDstOrigin[1] : calSize[0] * elementSize();
|
||||
imageOffsetx = srcOrigin[0] % elementSize();
|
||||
gpuMemoryOffset = dstOrigin[0] + dstResource.offset();
|
||||
gpuMemoryRowPitch = (dstOrigin[1]) ? dstOrigin[1] : size[0] * elementSize();
|
||||
img1Darray = (desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY);
|
||||
img2Darray = (desc().topology_ == CL_MEM_OBJECT_IMAGE2D_ARRAY);
|
||||
}
|
||||
@@ -1280,12 +1265,12 @@ bool Resource::partialMemCopyTo(VirtualGPU& gpu, const amd::Coord3D& srcOrigin,
|
||||
Pal::SubresId ImgSubresId = {Pal::ImageAspect::Color, dstResource.desc().baseLevel_, 0};
|
||||
Pal::MemoryImageCopyRegion copyRegion = {};
|
||||
copyRegion.imageSubres = ImgSubresId;
|
||||
copyRegion.imageOffset.x = calDstOrigin[0];
|
||||
copyRegion.imageOffset.y = calDstOrigin[1];
|
||||
copyRegion.imageOffset.z = calDstOrigin[2];
|
||||
copyRegion.imageExtent.width = calSize[0];
|
||||
copyRegion.imageExtent.height = calSize[1];
|
||||
copyRegion.imageExtent.depth = calSize[2];
|
||||
copyRegion.imageOffset.x = dstOrigin[0];
|
||||
copyRegion.imageOffset.y = dstOrigin[1];
|
||||
copyRegion.imageOffset.z = dstOrigin[2];
|
||||
copyRegion.imageExtent.width = size[0];
|
||||
copyRegion.imageExtent.height = size[1];
|
||||
copyRegion.imageExtent.depth = size[2];
|
||||
copyRegion.numSlices = 1;
|
||||
if (img1Darray) {
|
||||
copyRegion.numSlices = copyRegion.imageExtent.height;
|
||||
@@ -1296,20 +1281,20 @@ bool Resource::partialMemCopyTo(VirtualGPU& gpu, const amd::Coord3D& srcOrigin,
|
||||
}
|
||||
copyRegion.gpuMemoryOffset = gpuMemoryOffset;
|
||||
copyRegion.gpuMemoryRowPitch = gpuMemoryRowPitch;
|
||||
copyRegion.gpuMemoryDepthPitch = (calSrcOrigin[2])
|
||||
? calSrcOrigin[2]
|
||||
copyRegion.gpuMemoryDepthPitch = (srcOrigin[2])
|
||||
? srcOrigin[2]
|
||||
: copyRegion.gpuMemoryRowPitch * copyRegion.imageExtent.height;
|
||||
gpu.iCmd()->CmdCopyMemoryToImage(*iMem(), *dstResource.image_, imgLayout, 1, ©Region);
|
||||
} else if (!desc().buffer_ && dstResource.desc().buffer_) {
|
||||
Pal::MemoryImageCopyRegion copyRegion = {};
|
||||
Pal::SubresId ImgSubresId = {Pal::ImageAspect::Color, desc().baseLevel_, 0};
|
||||
copyRegion.imageSubres = ImgSubresId;
|
||||
copyRegion.imageOffset.x = calSrcOrigin[0];
|
||||
copyRegion.imageOffset.y = calSrcOrigin[1];
|
||||
copyRegion.imageOffset.z = calSrcOrigin[2];
|
||||
copyRegion.imageExtent.width = calSize[0];
|
||||
copyRegion.imageExtent.height = calSize[1];
|
||||
copyRegion.imageExtent.depth = calSize[2];
|
||||
copyRegion.imageOffset.x = srcOrigin[0];
|
||||
copyRegion.imageOffset.y = srcOrigin[1];
|
||||
copyRegion.imageOffset.z = srcOrigin[2];
|
||||
copyRegion.imageExtent.width = size[0];
|
||||
copyRegion.imageExtent.height = size[1];
|
||||
copyRegion.imageExtent.depth = size[2];
|
||||
copyRegion.numSlices = 1;
|
||||
if (img1Darray) {
|
||||
copyRegion.numSlices = copyRegion.imageExtent.height;
|
||||
@@ -1320,8 +1305,7 @@ bool Resource::partialMemCopyTo(VirtualGPU& gpu, const amd::Coord3D& srcOrigin,
|
||||
}
|
||||
copyRegion.gpuMemoryOffset = gpuMemoryOffset;
|
||||
copyRegion.gpuMemoryRowPitch = gpuMemoryRowPitch;
|
||||
copyRegion.gpuMemoryDepthPitch = (calDstOrigin[2])
|
||||
? calDstOrigin[2]
|
||||
copyRegion.gpuMemoryDepthPitch = (dstOrigin[2]) ? dstOrigin[2]
|
||||
: copyRegion.gpuMemoryRowPitch * copyRegion.imageExtent.height;
|
||||
gpu.iCmd()->CmdCopyImageToMemory(*image_, imgLayout, *dstResource.iMem(), 1, ©Region);
|
||||
} else {
|
||||
@@ -1331,23 +1315,23 @@ bool Resource::partialMemCopyTo(VirtualGPU& gpu, const amd::Coord3D& srcOrigin,
|
||||
Pal::ChannelSwizzle::Z, Pal::ChannelSwizzle::W};
|
||||
copyRegion.srcBuffer.swizzledFormat.format = ChannelFmt(bytesPerElement);
|
||||
copyRegion.srcBuffer.swizzledFormat.swizzle = channels;
|
||||
copyRegion.srcBuffer.offset = calSrcOrigin[0] + offset();
|
||||
copyRegion.srcBuffer.rowPitch = calSrcOrigin[1];
|
||||
copyRegion.srcBuffer.depthPitch = calSrcOrigin[2];
|
||||
copyRegion.extent.width = calSize[0] / bytesPerElement;
|
||||
copyRegion.extent.height = calSize[1];
|
||||
copyRegion.extent.depth = calSize[2];
|
||||
copyRegion.srcBuffer.offset = srcOrigin[0] + offset();
|
||||
copyRegion.srcBuffer.rowPitch = srcOrigin[1];
|
||||
copyRegion.srcBuffer.depthPitch = srcOrigin[2];
|
||||
copyRegion.extent.width = size[0] / bytesPerElement;
|
||||
copyRegion.extent.height = size[1];
|
||||
copyRegion.extent.depth = size[2];
|
||||
copyRegion.dstBuffer.swizzledFormat.format = ChannelFmt(bytesPerElement);
|
||||
copyRegion.dstBuffer.swizzledFormat.swizzle = channels;
|
||||
copyRegion.dstBuffer.offset = calDstOrigin[0] + dstResource.offset();
|
||||
copyRegion.dstBuffer.rowPitch = calDstOrigin[1];
|
||||
copyRegion.dstBuffer.depthPitch = calDstOrigin[2];
|
||||
copyRegion.dstBuffer.offset = dstOrigin[0] + dstResource.offset();
|
||||
copyRegion.dstBuffer.rowPitch = dstOrigin[1];
|
||||
copyRegion.dstBuffer.depthPitch = dstOrigin[2];
|
||||
gpu.iCmd()->CmdCopyTypedBuffer(*iMem(), *dstResource.iMem(), 1, ©Region);
|
||||
} else {
|
||||
Pal::MemoryCopyRegion copyRegion = {};
|
||||
copyRegion.srcOffset = calSrcOrigin[0] + offset();
|
||||
copyRegion.dstOffset = calDstOrigin[0] + dstResource.offset();
|
||||
copyRegion.copySize = calSize[0];
|
||||
copyRegion.srcOffset = srcOrigin[0] + offset();
|
||||
copyRegion.dstOffset = dstOrigin[0] + dstResource.offset();
|
||||
copyRegion.copySize = size[0];
|
||||
gpu.iCmd()->CmdCopyMemory(*iMem(), *dstResource.iMem(), 1, ©Region);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -247,9 +247,6 @@ class Resource : public amd::HeapObject {
|
||||
//! Returns the offset in GPU memory for aliases
|
||||
size_t offset() const { return offset_; }
|
||||
|
||||
//! Returns the pinned memory offset
|
||||
uint64_t pinOffset() const { return pinOffset_; }
|
||||
|
||||
//! Returns the GPU device that owns this resource
|
||||
const Device& dev() const { return gpuDevice_; }
|
||||
|
||||
@@ -428,7 +425,6 @@ class Resource : public amd::HeapObject {
|
||||
RenameList renames_; //!< Rename resource list
|
||||
GpuMemoryReference* memRef_; //!< PAL resource reference
|
||||
const Resource* viewOwner_; //!< GPU resource, which owns this view
|
||||
uint64_t pinOffset_; //!< Pinned memory offset
|
||||
void* glInteropMbRes_; //!< Mb Res handle
|
||||
uint32_t glType_; //!< GL interop type
|
||||
void* glPlatformContext_;
|
||||
|
||||
Ссылка в новой задаче
Block a user