Fix a deadlock in ROCr backend
When OCL ROCr backend performs CL_MEM_COPY_HOST_PTR it may attempt
to have access to amd::Memory object it's currently creating,
but it's not ready yet. The logic creates a temporary dummy object
to perform a copy transfer. The new change will make sure runtime
skips allocation of the same device::Memory object second time.
Change-Id: I14c6a00a3941fdcaa6aea299e9f096e4c3f5cadf
[ROCm/clr commit: 1fde842703]
此提交包含在:
@@ -1681,7 +1681,7 @@ Memory* Device::getRocMemory(amd::Memory* mem) const {
|
||||
return static_cast<roc::Memory*>(mem->getDeviceMemory(*this));
|
||||
}
|
||||
|
||||
|
||||
// ================================================================================================
|
||||
device::Memory* Device::createMemory(amd::Memory& owner) const {
|
||||
roc::Memory* memory = nullptr;
|
||||
if (owner.asBuffer()) {
|
||||
@@ -1712,17 +1712,18 @@ device::Memory* Device::createMemory(amd::Memory& owner) const {
|
||||
// Pipe initialize in order read_idx, write_idx, end_idx. Refer clk_pipe_t structure.
|
||||
// Init with 3 DWORDS for 32bit addressing and 6 DWORDS for 64bit
|
||||
size_t pipeInit[3] = { 0, 0, owner.asPipe()->getMaxNumPackets() };
|
||||
xferMgr().writeBuffer((void *)pipeInit, *memory, amd::Coord3D(0), amd::Coord3D(sizeof(pipeInit)));
|
||||
xferMgr().writeBuffer(pipeInit, *memory, amd::Coord3D(0), amd::Coord3D(sizeof(pipeInit)));
|
||||
}
|
||||
|
||||
// Transfer data only if OCL context has one device.
|
||||
// Cache coherency layer will update data for multiple devices
|
||||
if (!memory->isHostMemDirectAccess() && owner.asImage() && (owner.parent() == nullptr) &&
|
||||
(owner.getMemFlags() & CL_MEM_COPY_HOST_PTR) && (owner.getContext().devices().size() == 1)) {
|
||||
(owner.getMemFlags() & CL_MEM_COPY_HOST_PTR) &&
|
||||
(owner.getContext().devices().size() == 1)) {
|
||||
// To avoid recurssive call to Device::createMemory, we perform
|
||||
// data transfer to the view of the image.
|
||||
amd::Image* imageView = owner.asImage()->createView(
|
||||
owner.getContext(), owner.asImage()->getImageFormat(), xferQueue());
|
||||
// data transfer to the view of the image
|
||||
amd::Image* imageView = owner.asImage()->createView(owner.getContext(),
|
||||
owner.asImage()->getImageFormat(), xferQueue());
|
||||
|
||||
if (imageView == nullptr) {
|
||||
LogError("[OCL] Fail to allocate view of image object");
|
||||
|
||||
@@ -1167,10 +1167,10 @@ Image* Image::createView(const Context& context, const Format& format, device::V
|
||||
// Find the image dimensions and create a corresponding object
|
||||
view = new (context) Image(format, *this, baseMipLevel, flags);
|
||||
|
||||
// Set GPU virtual device for this view
|
||||
view->setVirtualDevice(vDev);
|
||||
|
||||
if (view != nullptr) {
|
||||
// Set GPU virtual device for this view
|
||||
view->setVirtualDevice(vDev);
|
||||
|
||||
view->resetAllocationState();
|
||||
|
||||
// Initialize array of the device memory pointers
|
||||
@@ -1178,7 +1178,14 @@ Image* Image::createView(const Context& context, const Format& format, device::V
|
||||
|
||||
// Check if runtime has to allocate memory
|
||||
if ((context.devices().size() == 1) || DISABLE_DEFERRED_ALLOC) {
|
||||
device::Memory* mem = view->getDeviceMemory(*context.devices()[0]);
|
||||
for (uint i = 0; i < numDevices_; ++i) {
|
||||
// Make sure the parent's device memory is avaialbe
|
||||
if ((deviceMemories_[i].ref_ == context.devices()[i]) &&
|
||||
(deviceMemories_[i].value_ != nullptr)) {
|
||||
device::Memory* mem = view->getDeviceMemory(*context.devices()[i]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
新增問題並參考
封鎖使用者