diff --git a/projects/clr/rocclr/runtime/device/gpu/gpumemory.cpp b/projects/clr/rocclr/runtime/device/gpu/gpumemory.cpp index 1cf44ba8bd..ac6272daba 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpumemory.cpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpumemory.cpp @@ -171,7 +171,6 @@ Memory::create( // Check if CAL created a resource if (result) { switch (memoryType()) { - case Resource::Persistent: case Resource::Pinned: case Resource::ExternalPhysical: // Marks memory object for direct GPU access to the host memory diff --git a/projects/clr/rocclr/runtime/device/gpu/gpuresource.cpp b/projects/clr/rocclr/runtime/device/gpu/gpuresource.cpp index d601553123..a8fb50bf6d 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpuresource.cpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpuresource.cpp @@ -1567,7 +1567,9 @@ Resource::map(VirtualGPU* gpu, uint flags, uint startLayer, uint numLayers) // guarantee that the address will be valid, // since GSL could still process the first map if (address_ == NULL) { - amd::Os::sleep(10); + for (uint i = 0; address_ == NULL && i < 10; ++i) { + amd::Os::sleep(1); + } assert((address_ != NULL) && "Multiple maps failed!"); } diff --git a/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp b/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp index dcace89e26..85c9718977 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp @@ -3562,31 +3562,56 @@ VirtualGPU::submitTransferBufferFromFile(amd::TransferBufferFileCommand& cmd) { size_t copySize = cmd.size()[0]; size_t fileOffset = cmd.fileOffset(); - size_t srcDstOffset = cmd.origin()[0]; Memory* mem = dev().getGpuMemory(&cmd.memory()); uint idx = 0; assert((cmd.type() == CL_COMMAND_READ_SSG_FILE_AMD) || - (cmd.type() == CL_COMMAND_WRITE_SSG_FILE_AMD)); - bool writeBuffer(cmd.type() == CL_COMMAND_READ_SSG_FILE_AMD); + (cmd.type() == CL_COMMAND_WRITE_SSG_FILE_AMD)); + const bool writeBuffer(cmd.type() == CL_COMMAND_READ_SSG_FILE_AMD); - while (copySize > 0) { - Memory* staging = dev().getGpuMemory(&cmd.staging(idx)); - size_t srcDstSize = amd::TransferBufferFileCommand::StagingBufferSize; - srcDstSize = std::min(srcDstSize, copySize); - void* srcDstBuffer = staging->cpuMap(*this); - if (!cmd.file()->transferBlock(writeBuffer, - srcDstBuffer, staging->size(), fileOffset, 0, srcDstSize)) { - cmd.setStatus(CL_INVALID_OPERATION); - return; + if (writeBuffer) { + size_t dstOffset = cmd.origin()[0]; + while (copySize > 0) { + Memory* staging = dev().getGpuMemory(&cmd.staging(idx)); + size_t dstSize = amd::TransferBufferFileCommand::StagingBufferSize; + dstSize = std::min(dstSize, copySize); + void* dstBuffer = staging->cpuMap(*this); + if (!cmd.file()->transferBlock(writeBuffer, + dstBuffer, staging->size(), fileOffset, 0, dstSize)) { + cmd.setStatus(CL_INVALID_OPERATION); + return; + } + staging->cpuUnmap(*this); + + bool result = blitMgr().copyBuffer(*staging, *mem, + 0, dstOffset, dstSize, false); + flushDMA(getGpuEvent(staging->gslResource())->engineId_); + fileOffset += dstSize; + dstOffset += dstSize; + copySize -= dstSize; } - staging->cpuUnmap(*this); + } + else { + size_t srcOffset = cmd.origin()[0]; + while (copySize > 0) { + Memory* staging = dev().getGpuMemory(&cmd.staging(idx)); + size_t srcSize = amd::TransferBufferFileCommand::StagingBufferSize; + srcSize = std::min(srcSize, copySize); + bool result = blitMgr().copyBuffer(*mem, *staging, + srcOffset, 0, srcSize, false); - bool result = blitMgr().copyBuffer(*staging, *mem, - fileOffset, srcDstOffset, srcDstSize, false); - flushDMA(getGpuEvent(staging->gslResource())->engineId_); - srcDstOffset += srcDstSize; - copySize -= srcDstSize; + void* srcBuffer = staging->cpuMap(*this); + if (!cmd.file()->transferBlock(writeBuffer, + srcBuffer, staging->size(), fileOffset, 0, srcSize)) { + cmd.setStatus(CL_INVALID_OPERATION); + return; + } + staging->cpuUnmap(*this); + + fileOffset += srcSize; + srcOffset += srcSize; + copySize -= srcSize; + } } } diff --git a/projects/clr/rocclr/runtime/device/pal/palresource.cpp b/projects/clr/rocclr/runtime/device/pal/palresource.cpp index 6a87c2a6c3..0b890d9ab6 100644 --- a/projects/clr/rocclr/runtime/device/pal/palresource.cpp +++ b/projects/clr/rocclr/runtime/device/pal/palresource.cpp @@ -1654,9 +1654,11 @@ Resource::map(VirtualGPU* gpu, uint flags, uint startLayer, uint numLayers) //! \note the atomic operation with counter doesn't // guarantee that the address will be valid, - // since GSL could still process the first map + // since PAL could still process the first map if (address_ == nullptr) { - amd::Os::sleep(10); + for (uint i = 0; address_ == NULL && i < 10; ++i) { + amd::Os::sleep(1); + } assert((address_ != nullptr) && "Multiple maps failed!"); } diff --git a/projects/clr/rocclr/runtime/platform/command.cpp b/projects/clr/rocclr/runtime/platform/command.cpp index 3bee36f48e..003fa38302 100644 --- a/projects/clr/rocclr/runtime/platform/command.cpp +++ b/projects/clr/rocclr/runtime/platform/command.cpp @@ -592,7 +592,15 @@ TransferBufferFileCommand::submit(device::VirtualDevice& device) device::Memory* mem = memory_->getDeviceMemory(queue()->device()); if (memory_->getMemFlags() & (CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | CL_MEM_USE_PERSISTENT_MEM_AMD)) { - void* srcDstBuffer = mem->cpuMap(device); + void* srcDstBuffer = nullptr; + if (memory_->getMemFlags() & CL_MEM_USE_PERSISTENT_MEM_AMD) { + // Lock protected multiple maps for persistent memory + amd::ScopedLock lock(mem->owner()->lockMemoryOps()); + srcDstBuffer = mem->cpuMap(device); + } + else { + srcDstBuffer = mem->cpuMap(device); + } // Make HD transfer to the host accessible memory bool writeBuffer(type() == CL_COMMAND_READ_SSG_FILE_AMD); if (!file()->transferBlock(writeBuffer, srcDstBuffer, mem->size(), @@ -600,7 +608,14 @@ TransferBufferFileCommand::submit(device::VirtualDevice& device) setStatus(CL_INVALID_OPERATION); return; } - mem->cpuUnmap(device); + if (memory_->getMemFlags() & CL_MEM_USE_PERSISTENT_MEM_AMD) { + // Lock protected multiple maps for persistent memory + amd::ScopedLock lock(mem->owner()->lockMemoryOps()); + mem->cpuUnmap(device); + } + else { + mem->cpuUnmap(device); + } } else { device.submitTransferBufferFromFile(*this);