diff --git a/projects/clr/rocclr/runtime/device/device.hpp b/projects/clr/rocclr/runtime/device/device.hpp index 7bc494d8c0..7c7edecd0d 100644 --- a/projects/clr/rocclr/runtime/device/device.hpp +++ b/projects/clr/rocclr/runtime/device/device.hpp @@ -58,7 +58,7 @@ class SvmCopyMemoryCommand; class SvmFillMemoryCommand; class SvmMapMemoryCommand; class SvmUnmapMemoryCommand; -class WriteBufferFromFileCommand; +class TransferBufferFileCommand; class HwDebugManager; class Device; struct KernelParameterDescriptor; @@ -1452,7 +1452,7 @@ public: /// Optional extensions virtual void submitSignal(amd::SignalCommand & cmd) = 0; virtual void submitMakeBuffersResident(amd::MakeBuffersResidentCommand & cmd) = 0; - virtual void submitWriteBufferFromFile(amd::WriteBufferFromFileCommand& cmd) { ShouldNotReachHere(); } + virtual void submitTransferBufferFromFile(amd::TransferBufferFileCommand& cmd) { ShouldNotReachHere(); } //! Get the blit manager object device::BlitManager& blitMgr() const { return *blitMgr_; } diff --git a/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp b/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp index 1f867f711c..1e9813461d 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp @@ -3515,32 +3515,36 @@ VirtualGPU::assignDebugTrapHandler(const DebugToolInfo& dbgSetting, addVmMemory(trapBufferMem); addVmMemory(rtTrapHandlerMem); addVmMemory(rtTrapBufferMem); - } void -VirtualGPU::submitWriteBufferFromFile(amd::WriteBufferFromFileCommand& cmd) +VirtualGPU::submitTransferBufferFromFile(amd::TransferBufferFileCommand& cmd) { size_t copySize = cmd.size()[0]; size_t fileOffset = cmd.fileOffset(); - size_t dstOffset = cmd.origin()[0]; + size_t srcDstOffset = cmd.origin()[0]; Memory* mem = dev().getGpuMemory(&cmd.memory()); uint idx = 0; + + assert((cmd.type() == CL_COMMAND_WRITE_BUFFER_FROM_FILE_AMD) || + (cmd.type() == CL_COMMAND_READ_BUFFER_FROM_FILE_AMD)); + bool writeBuffer(cmd.type() == CL_COMMAND_WRITE_BUFFER_FROM_FILE_AMD); + while (copySize > 0) { Memory* staging = dev().getGpuMemory(&cmd.staging(idx)); - size_t dstSize = amd::WriteBufferFromFileCommand::StagingBufferSize; - dstSize = std::min(dstSize, copySize); - void* dstBuffer = staging->cpuMap(*this); - if (!cmd.file()->readBlock(dstBuffer, fileOffset, 0, dstSize)) { + size_t srcDstSize = amd::TransferBufferFileCommand::StagingBufferSize; + srcDstSize = std::min(srcDstSize, copySize); + void* srcDstBuffer = staging->cpuMap(*this); + if (!cmd.file()->transferBlock(writeBuffer, srcDstBuffer, fileOffset, 0, srcDstSize)) { return; } staging->cpuUnmap(*this); bool result = blitMgr().copyBuffer(*staging, *mem, - fileOffset, dstOffset, dstSize, false); + fileOffset, srcDstOffset, srcDstSize, false); flushDMA(getGpuEvent(staging->gslResource())->engineId_); - dstOffset += dstSize; - copySize -= dstSize; + srcDstOffset += srcDstSize; + copySize -= srcDstSize; } } diff --git a/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.hpp b/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.hpp index 1aabaf5f04..c8dc7a9bc0 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.hpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.hpp @@ -243,7 +243,7 @@ public: virtual void submitSvmFillMemory(amd::SvmFillMemoryCommand& cmd); virtual void submitSvmMapMemory(amd::SvmMapMemoryCommand& cmd); virtual void submitSvmUnmapMemory(amd::SvmUnmapMemoryCommand& cmd); - virtual void submitWriteBufferFromFile(amd::WriteBufferFromFileCommand& cmd); + virtual void submitTransferBufferFromFile(amd::TransferBufferFileCommand& cmd); void releaseMemory(gslMemObject gslResource, bool wait = true); void releaseKernel(CALimage calImage); diff --git a/projects/clr/rocclr/runtime/platform/command.cpp b/projects/clr/rocclr/runtime/platform/command.cpp index 6a250dffc1..194a04f7b9 100644 --- a/projects/clr/rocclr/runtime/platform/command.cpp +++ b/projects/clr/rocclr/runtime/platform/command.cpp @@ -574,7 +574,7 @@ ThreadTraceMemObjectsCommand::validateMemory() } void -WriteBufferFromFileCommand::releaseResources() +TransferBufferFileCommand::releaseResources() { for (uint i = 0; i < NumStagingBuffers; ++i) { if (NULL != staging_[i]) { @@ -587,25 +587,27 @@ WriteBufferFromFileCommand::releaseResources() } void -WriteBufferFromFileCommand::submit(device::VirtualDevice& device) +TransferBufferFileCommand::submit(device::VirtualDevice& device) { device::Memory* mem = memory_->getDeviceMemory(queue()->device()); if (memory_->getMemFlags() & (CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | CL_MEM_USE_PERSISTENT_MEM_AMD)) { - void* dstBuffer = mem->cpuMap(device); + void* srcDstBuffer = mem->cpuMap(device); // Make HD transfer to the host accessible memory - if (!file()->readBlock(dstBuffer, fileOffset(), origin()[0], size()[0])) { + bool writeBuffer(type() == CL_COMMAND_WRITE_BUFFER_FROM_FILE_AMD); + if (!file()->transferBlock(writeBuffer, srcDstBuffer, + fileOffset(), origin()[0], size()[0])) { return; } mem->cpuUnmap(device); } else { - device.submitWriteBufferFromFile(*this); + device.submitTransferBufferFromFile(*this); } } bool -WriteBufferFromFileCommand::validateMemory() +TransferBufferFileCommand::validateMemory() { if (queue()->device().info().type_ & CL_DEVICE_TYPE_GPU) { // Check if the destination buffer has direct host access diff --git a/projects/clr/rocclr/runtime/platform/command.hpp b/projects/clr/rocclr/runtime/platform/command.hpp index 3ae4d4c379..7d03925990 100644 --- a/projects/clr/rocclr/runtime/platform/command.hpp +++ b/projects/clr/rocclr/runtime/platform/command.hpp @@ -1459,20 +1459,20 @@ public: Memory* getSvmMem() const {return svmMem_;} }; -/*! \brief A generic write memory from file command. +/*! \brief A generic transfer memory from/to file command. * * \details Currently supports buffers only. Buffers * are treated as 1D structures so origin_[0] and size_[0] * are equivalent to offset_ and count_ respectively. */ -class WriteBufferFromFileCommand : public OneMemoryArgCommand +class TransferBufferFileCommand : public OneMemoryArgCommand { public: static const uint NumStagingBuffers = 2; static const size_t StagingBufferSize = 4 * Mi; static const uint StagingBufferMemType = CL_MEM_USE_PERSISTENT_MEM_AMD; -private: +protected: const Coord3D origin_; //!< Origin of the region to write to const Coord3D size_; //!< Size of the region to write to LiquidFlashFile* file_; //!< The file object for data read @@ -1480,12 +1480,13 @@ private: amd::Memory* staging_[NumStagingBuffers]; //!< Staging buffers for transfer public: - WriteBufferFromFileCommand( + TransferBufferFileCommand( + cl_command_type type, HostQueue& queue, const EventWaitList& eventWaitList, Memory& memory, const Coord3D& origin, const Coord3D& size, LiquidFlashFile* file, size_t fileOffset) - : OneMemoryArgCommand(queue, CL_COMMAND_WRITE_BUFFER_FROM_FILE_AMD, + : OneMemoryArgCommand(queue, type, eventWaitList, memory) , origin_(origin) , size_(size) diff --git a/projects/clr/rocclr/runtime/platform/memory.hpp b/projects/clr/rocclr/runtime/platform/memory.hpp index 62b95bc7a3..1dbab69cd6 100644 --- a/projects/clr/rocclr/runtime/platform/memory.hpp +++ b/projects/clr/rocclr/runtime/platform/memory.hpp @@ -692,7 +692,8 @@ public: uint32_t blockSize() const { return blockSize_; }; uint64_t fileSize() const { return fileSize_; }; - bool readBlock( + bool transferBlock( + bool read, void* dst, uint64_t fileOffset, uint64_t bufferOffset,