diff --git a/rocclr/device/rocm/rocvirtual.cpp b/rocclr/device/rocm/rocvirtual.cpp index f5957f6851..cfec1ecbd2 100644 --- a/rocclr/device/rocm/rocvirtual.cpp +++ b/rocclr/device/rocm/rocvirtual.cpp @@ -3115,6 +3115,13 @@ static inline void nontemporalMemcpy( _mm_stream_si32(reinterpret_cast(dst)++, *reinterpret_cast(src)++); } + + size = size % sizeof(int); + // Copy remaining bytes for unaligned size + std::memcpy(dst, src, size); + + // Add memory fence + _mm_sfence(); #else std::memcpy(dst, src, size); #endif