diff --git a/projects/clr/rocclr/device/rocm/rocvirtual.cpp b/projects/clr/rocclr/device/rocm/rocvirtual.cpp index 5abfd73284..ddabe0c192 100644 --- a/projects/clr/rocclr/device/rocm/rocvirtual.cpp +++ b/projects/clr/rocclr/device/rocm/rocvirtual.cpp @@ -3411,49 +3411,7 @@ bool VirtualGPU::createVirtualQueue(uint deviceQueueSize) { #if IS_LINUX __attribute__((optimize("unroll-all-loops"), always_inline)) static inline void nontemporalMemcpy( void* __restrict dst, const void* __restrict src, size_t size) { -#if defined(ATI_ARCH_X86) -#if defined(__AVX512F__) - for (auto i = 0u; i != size / sizeof(__m512i); ++i) { - _mm512_stream_si512(reinterpret_cast<__m512i* __restrict&>(dst)++, - *reinterpret_cast(src)++); - } - size = size % sizeof(__m512i); -#endif - -#if defined(__AVX__) - for (auto i = 0u; i != size / sizeof(__m256i); ++i) { - _mm256_stream_si256(reinterpret_cast<__m256i* __restrict&>(dst)++, - *reinterpret_cast(src)++); - } - size = size % sizeof(__m256i); -#endif - - for (auto i = 0u; i != size / sizeof(__m128i); ++i) { - _mm_stream_si128(reinterpret_cast<__m128i* __restrict&>(dst)++, - *(reinterpret_cast(src)++)); - } - size = size % sizeof(__m128i); - - for (auto i = 0u; i != size / sizeof(long long); ++i) { - _mm_stream_si64(reinterpret_cast(dst)++, - *reinterpret_cast(src)++); - } - size = size % sizeof(long long); - - for (auto i = 0u; i != size / sizeof(int); ++i) { - _mm_stream_si32(reinterpret_cast(dst)++, - *reinterpret_cast(src)++); - } - - size = size % sizeof(int); - // Copy remaining bytes for unaligned size std::memcpy(dst, src, size); - - // Add memory fence - _mm_sfence(); -#else - std::memcpy(dst, src, size); -#endif } #else static inline void nontemporalMemcpy(void* __restrict dst, const void* __restrict src, @@ -3710,10 +3668,10 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const *dev().info().hdpMemFlushCntl = 1u; auto kSentinel = *reinterpret_cast(dev().info().hdpMemFlushCntl); } else if (kernArgImpl == KernelArgImpl::DeviceKernelArgsReadback && argSize != 0) { - _mm_sfence(); + //_mm_sfence(); *(argBuffer + argSize - 1) = *(parameters + argSize - 1); - _mm_mfence(); - auto kSentinel = *reinterpret_cast(argBuffer + argSize - 1); + //_mm_mfence(); + //auto kSentinel = *reinterpret_cast(argBuffer + argSize - 1); } } }