diff --git a/projects/clr/rocclr/runtime/device/pal/palsettings.cpp b/projects/clr/rocclr/runtime/device/pal/palsettings.cpp index e29b86741d..eb4e4dc907 100644 --- a/projects/clr/rocclr/runtime/device/pal/palsettings.cpp +++ b/projects/clr/rocclr/runtime/device/pal/palsettings.cpp @@ -281,6 +281,10 @@ Settings::create( enableExtension(ClAmdVec3); enableExtension(ClAmdPrintf); enableExtension(ClKhrImage2dFromBuffer); + // Enable some platform extensions + enableExtension(ClAmdDeviceAttributeQuery); + enableExtension(ClKhrSpir); + enableExtension(ClAMDLiquidFlash); hwLDSSize_ = 32 * Ki; @@ -324,10 +328,6 @@ Settings::create( svmAtomics_ = (calAttr.svmAtomics || calAttr.isSVMFineGrainSystem) ? true : false; */ - // Enable some platform extensions - enableExtension(ClAmdDeviceAttributeQuery); - - enableExtension(ClKhrSpir); // SVM is not currently supported for DX Interop #if defined(_WIN32) diff --git a/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp b/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp index 5954396bed..a0972dcd0c 100644 --- a/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp +++ b/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp @@ -833,11 +833,10 @@ VirtualGPU::allocHsaQueueMem() // Provide private and local heap addresses const static uint addressShift = LP64_SWITCH(0, 32); - LogWarning("Private/Shared aperture isn't set"); -/* queue->private_segment_aperture_base_hi = - static_cast(dev().gslCtx()->getPrivateApertureBase()>>addressShift); - queue->group_segment_aperture_base_hi = - static_cast(dev().gslCtx()->getSharedApertureBase()>>addressShift); +/* queue->private_segment_aperture_base_hi = static_cast( + dev().properties().gpuMemoryProperties.privateApertureBase >> addressShift); + queue->group_segment_aperture_base_hi = static_cast( + dev().properties().gpuMemoryProperties.sharedApertureBase >> addressShift); */ hsaQueueMem_->unmap(nullptr); return true; @@ -3438,4 +3437,34 @@ VirtualGPU::validateSdmaOverlap(const Resource& src, const Resource& dst) return false; } +void +VirtualGPU::submitTransferBufferFromFile(amd::TransferBufferFileCommand& cmd) +{ + size_t copySize = cmd.size()[0]; + size_t fileOffset = cmd.fileOffset(); + size_t srcDstOffset = cmd.origin()[0]; + Memory* mem = dev().getGpuMemory(&cmd.memory()); + uint idx = 0; + + assert((cmd.type() == CL_COMMAND_WRITE_BUFFER_FROM_FILE_AMD) || + (cmd.type() == CL_COMMAND_READ_BUFFER_FROM_FILE_AMD)); + bool writeBuffer(cmd.type() == CL_COMMAND_WRITE_BUFFER_FROM_FILE_AMD); + + while (copySize > 0) { + Memory* staging = dev().getGpuMemory(&cmd.staging(idx)); + size_t srcDstSize = amd::TransferBufferFileCommand::StagingBufferSize; + srcDstSize = std::min(srcDstSize, copySize); + void* srcDstBuffer = staging->cpuMap(*this); + if (!cmd.file()->transferBlock(writeBuffer, srcDstBuffer, fileOffset, 0, srcDstSize)) { + return; + } + staging->cpuUnmap(*this); + + bool result = blitMgr().copyBuffer(*staging, *mem, + fileOffset, srcDstOffset, srcDstSize, false); + flushDMA(getGpuEvent(staging->iMem())->engineId_); + srcDstOffset += srcDstSize; + copySize -= srcDstSize; + } +} } // namespace pal diff --git a/projects/clr/rocclr/runtime/device/pal/palvirtual.hpp b/projects/clr/rocclr/runtime/device/pal/palvirtual.hpp index 98c23c79d4..c566a3f646 100644 --- a/projects/clr/rocclr/runtime/device/pal/palvirtual.hpp +++ b/projects/clr/rocclr/runtime/device/pal/palvirtual.hpp @@ -253,6 +253,7 @@ public: virtual void submitSvmFillMemory(amd::SvmFillMemoryCommand& cmd); virtual void submitSvmMapMemory(amd::SvmMapMemoryCommand& cmd); virtual void submitSvmUnmapMemory(amd::SvmUnmapMemoryCommand& cmd); + virtual void submitTransferBufferFromFile(amd::TransferBufferFileCommand& cmd); void releaseMemory(Pal::IGpuMemory* iMem, bool wait = true);