diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index 8a291f3ea4..2955db5e37 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -2117,8 +2117,9 @@ void ihipStream_t::locked_copySync(void* dst, const void* src, size_t sizeBytes, } } -void ihipStream_t::locked_copy2DSync(void* dst, const void* src, size_t width, size_t height, size_t srcPitch, size_t dstPitch, unsigned kind, +bool ihipStream_t::locked_copy2DSync(void* dst, const void* src, size_t width, size_t height, size_t srcPitch, size_t dstPitch, unsigned kind, bool resolveOn) { + bool retStatus = true; ihipCtx_t* ctx = this->getCtx(); const ihipDevice_t* device = ctx->getDevice(); @@ -2163,11 +2164,20 @@ void ihipStream_t::locked_copy2DSync(void* dst, const void* src, size_t width, s sizeBytes, hcMemcpyStr(hcCopyDir), forceUnpinnedCopy); printPointerInfo(DB_COPY, " dst", dst, dstPtrInfo); printPointerInfo(DB_COPY, " src", src, srcPtrInfo); - +#if (__hcc_workweek__ >= 19101) + if(!crit->_av.copy2d_ext(src, dst, width, height, srcPitch, dstPitch, hcCopyDir, srcPtrInfo, dstPtrInfo, + copyDevice ? ©Device->getDevice()->_acc : nullptr, + forceUnpinnedCopy)) { + tprintf(DB_COPY,"locked_copy2DSync failed to use SDMA\n"); + retStatus = false; + } +#else crit->_av.copy2d_ext(src, dst, width, height, srcPitch, dstPitch, hcCopyDir, srcPtrInfo, dstPtrInfo, copyDevice ? ©Device->getDevice()->_acc : nullptr, forceUnpinnedCopy); +#endif } + return retStatus; } void ihipStream_t::addSymbolPtrToTracker(hc::accelerator& acc, void* ptr, size_t sizeBytes) { @@ -2340,8 +2350,9 @@ void ihipStream_t::locked_copyAsync(void* dst, const void* src, size_t sizeBytes } } -void ihipStream_t::locked_copy2DAsync(void* dst, const void* src, size_t width, size_t height, size_t srcPitch, size_t dstPitch, unsigned kind) +bool ihipStream_t::locked_copy2DAsync(void* dst, const void* src, size_t width, size_t height, size_t srcPitch, size_t dstPitch, unsigned kind) { + bool retStatus = true; const ihipCtx_t* ctx = this->getCtx(); if ((ctx == nullptr) || (ctx->getDevice() == nullptr)) { @@ -2376,13 +2387,26 @@ void ihipStream_t::locked_copy2DAsync(void* dst, const void* src, size_t width, try { if (HIP_FORCE_SYNC_COPY) { +#if (__hcc_workweek__ >= 19101) + if(!crit->_av.copy2d_ext(src, dst, width, height, srcPitch, dstPitch, hcCopyDir, srcPtrInfo, dstPtrInfo, + ©Device->getDevice()->_acc, + forceUnpinnedCopy)){ + tprintf(DB_COPY,"locked_copy2DASync with HIP_FORCE_SYNC_COPY failed to use SDMA\n"); + retStatus = false; + } +#else crit->_av.copy2d_ext(src, dst, width, height, srcPitch, dstPitch, hcCopyDir, srcPtrInfo, dstPtrInfo, ©Device->getDevice()->_acc, forceUnpinnedCopy); +#endif } else { - crit->_av.copy2d_async_ext(src, dst, width, height, srcPitch, dstPitch, hcCopyDir, srcPtrInfo, dstPtrInfo, + const auto& future = crit->_av.copy2d_async_ext(src, dst, width, height, srcPitch, dstPitch, hcCopyDir, srcPtrInfo, dstPtrInfo, ©Device->getDevice()->_acc); + if(!future.valid()) { + tprintf(DB_COPY, "locked_copy2DAsync failed to use SDMA\n"); + retStatus = false; + } } } catch (Kalmar::runtime_exception) { throw ihipException(hipErrorRuntimeOther); @@ -2397,10 +2421,20 @@ void ihipStream_t::locked_copy2DAsync(void* dst, const void* src, size_t width, } else { //Do sync 2D copy LockedAccessor_StreamCrit_t crit(_criticalData); +#if (__hcc_workweek__ >= 19101) + if(!crit->_av.copy2d_ext(src, dst, width, height, srcPitch, dstPitch, hcCopyDir, srcPtrInfo, dstPtrInfo, + copyDevice ? ©Device->getDevice()->_acc : nullptr, + forceUnpinnedCopy)){ + tprintf(DB_COPY, "locked_copy2DAsync Sync copy failed to use SDMA\n"); + retStatus = false; + } +#else crit->_av.copy2d_ext(src, dst, width, height, srcPitch, dstPitch, hcCopyDir, srcPtrInfo, dstPtrInfo, copyDevice ? ©Device->getDevice()->_acc : nullptr, forceUnpinnedCopy); +#endif } + return retStatus; } //------------------------------------------------------------------------------------------------- diff --git a/src/hip_hcc_internal.h b/src/hip_hcc_internal.h index 95a65b55c9..fb1588cc54 100644 --- a/src/hip_hcc_internal.h +++ b/src/hip_hcc_internal.h @@ -529,12 +529,12 @@ class ihipStream_t { void locked_copySync(void* dst, const void* src, size_t sizeBytes, unsigned kind, bool resolveOn = true); - void locked_copy2DSync(void* dst, const void* src, size_t width, size_t height, size_t srcPitch, size_t dstPitch, unsigned kind, + bool locked_copy2DSync(void* dst, const void* src, size_t width, size_t height, size_t srcPitch, size_t dstPitch, unsigned kind, bool resolveOn = true); void locked_copyAsync(void* dst, const void* src, size_t sizeBytes, unsigned kind); - void locked_copy2DAsync(void* dst, const void* src, size_t width, size_t height, size_t srcPitch, size_t dstPitch, unsigned kind); + bool locked_copy2DAsync(void* dst, const void* src, size_t width, size_t height, size_t srcPitch, size_t dstPitch, unsigned kind); void lockedSymbolCopySync(hc::accelerator& acc, void* dst, void* src, size_t sizeBytes, size_t offset, unsigned kind); diff --git a/src/hip_memory.cpp b/src/hip_memory.cpp index 9eebcfb28c..cdd56ae2f8 100644 --- a/src/hip_memory.cpp +++ b/src/hip_memory.cpp @@ -1573,7 +1573,10 @@ hipError_t ihipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch stream->locked_copySync((unsigned char*)dst + i * dpitch, (unsigned char*)src + i * spitch, width, kind); } else { - stream->locked_copy2DSync(dst, src, width, height, spitch, dpitch, kind); + if(!stream->locked_copy2DSync(dst, src, width, height, spitch, dpitch, kind)){ + ihipMemcpy2dKernel (stream, static_cast (dst), static_cast (src), width, height, dpitch, spitch); + stream->locked_wait(); + } } } catch (ihipException& ex) { e = ex._code; @@ -1621,7 +1624,9 @@ hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t sp e = hip_internal::memcpyAsync((unsigned char*)dst + i * dpitch, (unsigned char*)src + i * spitch, width, kind, stream); } else{ - stream->locked_copy2DAsync(dst, src, width, height, spitch, dpitch, kind); + if(!stream->locked_copy2DAsync(dst, src, width, height, spitch, dpitch, kind)){ + ihipMemcpy2dKernel (stream, static_cast (dst), static_cast (src), width, height, dpitch, spitch); + } } } catch (ihipException& ex) { e = ex._code;