diff --git a/projects/clr/rocclr/device/pal/palvirtual.cpp b/projects/clr/rocclr/device/pal/palvirtual.cpp index 0a5e1484c5..dfbd198b4b 100644 --- a/projects/clr/rocclr/device/pal/palvirtual.cpp +++ b/projects/clr/rocclr/device/pal/palvirtual.cpp @@ -1872,11 +1872,21 @@ void VirtualGPU::submitFillMemory(amd::FillMemoryCommand& cmd) { amd::BufferRect rect; rect.create(static_cast(origin), static_cast(region), pitch, 0); + + bool force_blit = false; + if (amd::IS_HIP) { + constexpr uint32_t kManagedAlloc = (CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_ALLOC_HOST_PTR); + // In case of HMM, use blit kernel instead of CPU memcpy + if ((cmd.memory().getMemFlags() & kManagedAlloc) == kManagedAlloc) { + force_blit = true; + } + } + for (size_t slice = 0; slice < depth; slice++) { for (size_t row = 0; row < height; row++) { const size_t rowOffset = rect.offset(0, row, slice); if (!fillMemory(cmd.type(), &cmd.memory(), cmd.pattern(), cmd.patternSize(), - amd::Coord3D{rowOffset, 0, 0}, amd::Coord3D{width, 1, 1})) { + amd::Coord3D{rowOffset, 0, 0}, amd::Coord3D{width, 1, 1}, force_blit)) { cmd.setStatus(CL_INVALID_OPERATION); } } diff --git a/projects/clr/rocclr/device/rocm/rocvirtual.cpp b/projects/clr/rocclr/device/rocm/rocvirtual.cpp index 61daa52240..bba29a253b 100644 --- a/projects/clr/rocclr/device/rocm/rocvirtual.cpp +++ b/projects/clr/rocclr/device/rocm/rocvirtual.cpp @@ -2296,8 +2296,18 @@ void VirtualGPU::submitFillMemory(amd::FillMemoryCommand& cmd) { amd::ScopedLock lock(execution()); profilingBegin(cmd); + + bool force_blit = false; + if (amd::IS_HIP) { + constexpr uint32_t kManagedAlloc = (CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_ALLOC_HOST_PTR); + // In case of HMM, use blit kernel instead of CPU memcpy + if ((cmd.memory().getMemFlags() & kManagedAlloc) == kManagedAlloc) { + force_blit = true; + } + } + if (!fillMemory(cmd.type(), &cmd.memory(), cmd.pattern(), cmd.patternSize(), - cmd.surface(), cmd.origin(), cmd.size())) { + cmd.surface(), cmd.origin(), cmd.size(), force_blit)) { cmd.setStatus(CL_INVALID_OPERATION); } profilingEnd(cmd);