From 4f5bdb071cef41d1e35082d43453fb147bb5d8c3 Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Fri, 18 May 2018 21:09:50 +0530 Subject: [PATCH] Fix for memcpy2DAsync for pinned host memory case --- hipamd/src/hip_memory.cpp | 93 ++++++++++++++++++++++++++++++--------- 1 file changed, 72 insertions(+), 21 deletions(-) diff --git a/hipamd/src/hip_memory.cpp b/hipamd/src/hip_memory.cpp index 8f4b64c51e..e2202e7860 100644 --- a/hipamd/src/hip_memory.cpp +++ b/hipamd/src/hip_memory.cpp @@ -1303,27 +1303,6 @@ hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy) { return ihipLogStatus(e); } -hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, - size_t height, hipMemcpyKind kind, hipStream_t stream) { - HIP_INIT_SPECIAL_API((TRACE_MCMD), dst, dpitch, src, spitch, width, height, kind, stream); - if (dst == nullptr || src == nullptr || width > dpitch || width > spitch) return ihipLogStatus(hipErrorInvalidValue); - hipError_t e = hipSuccess; - if((width == dpitch) && (width == spitch)) { - hip_internal::memcpyAsync(dst, src, width*height, kind, stream); - } else { - try { - for (int i = 0; i < height; ++i) { - e = hip_internal::memcpyAsync((unsigned char*)dst + i * dpitch, - (unsigned char*)src + i * spitch, width, kind, stream); - } - } catch (ihipException& ex) { - e = ex._code; - } - } - - return ihipLogStatus(e); -} - hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind) { HIP_INIT_SPECIAL_API((TRACE_MCMD), dst, wOffset, hOffset, src, spitch, width, height, kind); @@ -1554,6 +1533,19 @@ inline const T& clamp_integer(const T& x, const T& lower, const T& upper) { return std::min(upper, std::max(x, lower)); } + +template +__global__ void hip_copy_n(T* dst, const T* src, size_t n) { + const uint32_t grid_dim = gridDim.x * blockDim.x; + + size_t idx = blockIdx.x * block_dim + threadIdx.x; + while (idx < n) { + // __builtin_memcpy(reinterpret_cast(dst+idx), reinterpret_cast(src+idx), + // sizeof(T)); + dst[idx] = src[idx]; + idx += grid_dim; + } +} } // namespace template @@ -1566,6 +1558,16 @@ void ihipMemsetKernel(hipStream_t stream, T* ptr, T val, size_t sizeBytes) { sizeBytes, std::move(val)); } +template +void ihipMemcpyKernel(hipStream_t stream, T* dst, const T* src, size_t sizeBytes) { + static constexpr uint32_t block_dim_ = 256; + + const uint32_t grid_dim = clamp_integer(sizeBytes / block_dim_, 1, UINT32_MAX); + + hipLaunchKernelGGL(hip_copy_n, dim3(grid_dim), dim3{block_dim_}, 0u, stream, dst, src, + sizeBytes); +} + typedef enum ihipMemsetDataType { ihipMemsetDataTypeChar = 0, ihipMemsetDataTypeShort = 1, @@ -1623,6 +1625,55 @@ hipError_t ihipMemset(void* dst, int value, size_t sizeBytes, hipStream_t strea return e; }; +int isLockedPointer(const void *ptr) +{ + hsa_amd_pointer_info_t info; + int isLocked = 0; + + info.size = sizeof(info); + hsa_status_t hsa_status = hsa_amd_pointer_info(const_cast(ptr), &info, nullptr, nullptr, nullptr); + if(hsa_status != HSA_STATUS_SUCCESS) { + return -1; + } + + if((info.type == HSA_EXT_POINTER_TYPE_HSA) || (info.type == HSA_EXT_POINTER_TYPE_LOCKED)) { + isLocked = 1; + } + + return isLocked; +} + +hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, + size_t height, hipMemcpyKind kind, hipStream_t stream) { + HIP_INIT_SPECIAL_API((TRACE_MCMD), dst, dpitch, src, spitch, width, height, kind, stream); + if (dst == nullptr || src == nullptr || width > dpitch || width > spitch) return ihipLogStatus(hipErrorInvalidValue); + hipError_t e = hipSuccess; + int isLocked = 0; + if(kind == hipMemcpyHostToDevice) { + isLocked = isLockedPointer(src); + } else if(kind == hipMemcpyDeviceToHost) { + isLocked = isLockedPointer(dst); + } + if((width == dpitch) && (width == spitch)) { + hip_internal::memcpyAsync(dst, src, width*height, kind, stream); + } else { + try { + for (int i = 0; i < height; ++i) { + if(!isLocked) { + e = hip_internal::memcpyAsync((unsigned char*)dst + i * dpitch, + (unsigned char*)src + i * spitch, width, kind, stream); + } else{ + size_t sizeBytes = width*height; + ihipMemcpyKernel (stream, static_cast (dst), static_cast (src), sizeBytes/sizeof(uint32_t)); + } + } + } catch (ihipException& ex) { + e = ex._code; + } + } + + return ihipLogStatus(e); +} // TODO-sync: function is async unless target is pinned host memory - then these are fully sync. hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t stream) {