diff --git a/projects/clr/rocclr/device/pal/paldevice.cpp b/projects/clr/rocclr/device/pal/paldevice.cpp index 7db9a686ec..225d949895 100644 --- a/projects/clr/rocclr/device/pal/paldevice.cpp +++ b/projects/clr/rocclr/device/pal/paldevice.cpp @@ -1789,9 +1789,11 @@ pal::Memory* Device::createImage(amd::Memory& owner, bool directAccess) const { owner.setHostMem(nullptr); } else { amd::Coord3D origin(0, 0, 0); - static const bool Entire = true; - if (xferMgr().writeImage(owner.getHostMem(), *gpuImage, origin, image.getRegion(), 0, 0, - Entire)) { + // Copy data with the original pitch values, since runtime doesn't perform + // extra sysmem allocation for one device + constexpr bool kEntire = true; + if (xferMgr().writeImage(owner.getHostMem(), *gpuImage, origin, image.getRegion(), + image.getRowPitch(), image.getSlicePitch(), kEntire)) { // Clear CHP memory owner.setHostMem(nullptr); } diff --git a/projects/clr/rocclr/device/rocm/rocdevice.cpp b/projects/clr/rocclr/device/rocm/rocdevice.cpp index 5c9d383435..02e27e14ce 100644 --- a/projects/clr/rocclr/device/rocm/rocdevice.cpp +++ b/projects/clr/rocclr/device/rocm/rocdevice.cpp @@ -2086,8 +2086,11 @@ device::Memory* Device::createMemory(amd::Memory& owner) const { imageView->replaceDeviceMemory(this, devImageView); + // Copy data with the original pitch values, since runtime doesn't perform + // extra sysmem allocation for one device + const auto image = owner.asImage(); result = xferMgr().writeImage(owner.getHostMem(), *devImageView, amd::Coord3D(0, 0, 0), - imageView->getRegion(), 0, 0, true); + imageView->getRegion(), image->getRowPitch(), image->getSlicePitch(), true); // Release host memory, since runtime copied data owner.setHostMem(nullptr);