Merge pull request #962 from gargrahul/add_2d_copy_fallback
Add 2D fallback to use copy kernel
This commit is contained in:
+38
-4
@@ -2117,8 +2117,9 @@ void ihipStream_t::locked_copySync(void* dst, const void* src, size_t sizeBytes,
|
||||
}
|
||||
}
|
||||
|
||||
void ihipStream_t::locked_copy2DSync(void* dst, const void* src, size_t width, size_t height, size_t srcPitch, size_t dstPitch, unsigned kind,
|
||||
bool ihipStream_t::locked_copy2DSync(void* dst, const void* src, size_t width, size_t height, size_t srcPitch, size_t dstPitch, unsigned kind,
|
||||
bool resolveOn) {
|
||||
bool retStatus = true;
|
||||
ihipCtx_t* ctx = this->getCtx();
|
||||
const ihipDevice_t* device = ctx->getDevice();
|
||||
|
||||
@@ -2163,11 +2164,20 @@ void ihipStream_t::locked_copy2DSync(void* dst, const void* src, size_t width, s
|
||||
sizeBytes, hcMemcpyStr(hcCopyDir), forceUnpinnedCopy);
|
||||
printPointerInfo(DB_COPY, " dst", dst, dstPtrInfo);
|
||||
printPointerInfo(DB_COPY, " src", src, srcPtrInfo);
|
||||
|
||||
#if (__hcc_workweek__ >= 19101)
|
||||
if(!crit->_av.copy2d_ext(src, dst, width, height, srcPitch, dstPitch, hcCopyDir, srcPtrInfo, dstPtrInfo,
|
||||
copyDevice ? ©Device->getDevice()->_acc : nullptr,
|
||||
forceUnpinnedCopy)) {
|
||||
tprintf(DB_COPY,"locked_copy2DSync failed to use SDMA\n");
|
||||
retStatus = false;
|
||||
}
|
||||
#else
|
||||
crit->_av.copy2d_ext(src, dst, width, height, srcPitch, dstPitch, hcCopyDir, srcPtrInfo, dstPtrInfo,
|
||||
copyDevice ? ©Device->getDevice()->_acc : nullptr,
|
||||
forceUnpinnedCopy);
|
||||
#endif
|
||||
}
|
||||
return retStatus;
|
||||
}
|
||||
|
||||
void ihipStream_t::addSymbolPtrToTracker(hc::accelerator& acc, void* ptr, size_t sizeBytes) {
|
||||
@@ -2340,8 +2350,9 @@ void ihipStream_t::locked_copyAsync(void* dst, const void* src, size_t sizeBytes
|
||||
}
|
||||
}
|
||||
|
||||
void ihipStream_t::locked_copy2DAsync(void* dst, const void* src, size_t width, size_t height, size_t srcPitch, size_t dstPitch, unsigned kind)
|
||||
bool ihipStream_t::locked_copy2DAsync(void* dst, const void* src, size_t width, size_t height, size_t srcPitch, size_t dstPitch, unsigned kind)
|
||||
{
|
||||
bool retStatus = true;
|
||||
const ihipCtx_t* ctx = this->getCtx();
|
||||
|
||||
if ((ctx == nullptr) || (ctx->getDevice() == nullptr)) {
|
||||
@@ -2376,13 +2387,26 @@ void ihipStream_t::locked_copy2DAsync(void* dst, const void* src, size_t width,
|
||||
|
||||
try {
|
||||
if (HIP_FORCE_SYNC_COPY) {
|
||||
#if (__hcc_workweek__ >= 19101)
|
||||
if(!crit->_av.copy2d_ext(src, dst, width, height, srcPitch, dstPitch, hcCopyDir, srcPtrInfo, dstPtrInfo,
|
||||
©Device->getDevice()->_acc,
|
||||
forceUnpinnedCopy)){
|
||||
tprintf(DB_COPY,"locked_copy2DASync with HIP_FORCE_SYNC_COPY failed to use SDMA\n");
|
||||
retStatus = false;
|
||||
}
|
||||
#else
|
||||
crit->_av.copy2d_ext(src, dst, width, height, srcPitch, dstPitch, hcCopyDir, srcPtrInfo, dstPtrInfo,
|
||||
©Device->getDevice()->_acc,
|
||||
forceUnpinnedCopy);
|
||||
#endif
|
||||
|
||||
} else {
|
||||
crit->_av.copy2d_async_ext(src, dst, width, height, srcPitch, dstPitch, hcCopyDir, srcPtrInfo, dstPtrInfo,
|
||||
const auto& future = crit->_av.copy2d_async_ext(src, dst, width, height, srcPitch, dstPitch, hcCopyDir, srcPtrInfo, dstPtrInfo,
|
||||
©Device->getDevice()->_acc);
|
||||
if(!future.valid()) {
|
||||
tprintf(DB_COPY, "locked_copy2DAsync failed to use SDMA\n");
|
||||
retStatus = false;
|
||||
}
|
||||
}
|
||||
} catch (Kalmar::runtime_exception) {
|
||||
throw ihipException(hipErrorRuntimeOther);
|
||||
@@ -2397,10 +2421,20 @@ void ihipStream_t::locked_copy2DAsync(void* dst, const void* src, size_t width,
|
||||
} else {
|
||||
//Do sync 2D copy
|
||||
LockedAccessor_StreamCrit_t crit(_criticalData);
|
||||
#if (__hcc_workweek__ >= 19101)
|
||||
if(!crit->_av.copy2d_ext(src, dst, width, height, srcPitch, dstPitch, hcCopyDir, srcPtrInfo, dstPtrInfo,
|
||||
copyDevice ? ©Device->getDevice()->_acc : nullptr,
|
||||
forceUnpinnedCopy)){
|
||||
tprintf(DB_COPY, "locked_copy2DAsync Sync copy failed to use SDMA\n");
|
||||
retStatus = false;
|
||||
}
|
||||
#else
|
||||
crit->_av.copy2d_ext(src, dst, width, height, srcPitch, dstPitch, hcCopyDir, srcPtrInfo, dstPtrInfo,
|
||||
copyDevice ? ©Device->getDevice()->_acc : nullptr,
|
||||
forceUnpinnedCopy);
|
||||
#endif
|
||||
}
|
||||
return retStatus;
|
||||
}
|
||||
|
||||
//-------------------------------------------------------------------------------------------------
|
||||
|
||||
@@ -529,12 +529,12 @@ class ihipStream_t {
|
||||
void locked_copySync(void* dst, const void* src, size_t sizeBytes, unsigned kind,
|
||||
bool resolveOn = true);
|
||||
|
||||
void locked_copy2DSync(void* dst, const void* src, size_t width, size_t height, size_t srcPitch, size_t dstPitch, unsigned kind,
|
||||
bool locked_copy2DSync(void* dst, const void* src, size_t width, size_t height, size_t srcPitch, size_t dstPitch, unsigned kind,
|
||||
bool resolveOn = true);
|
||||
|
||||
void locked_copyAsync(void* dst, const void* src, size_t sizeBytes, unsigned kind);
|
||||
|
||||
void locked_copy2DAsync(void* dst, const void* src, size_t width, size_t height, size_t srcPitch, size_t dstPitch, unsigned kind);
|
||||
bool locked_copy2DAsync(void* dst, const void* src, size_t width, size_t height, size_t srcPitch, size_t dstPitch, unsigned kind);
|
||||
|
||||
void lockedSymbolCopySync(hc::accelerator& acc, void* dst, void* src, size_t sizeBytes,
|
||||
size_t offset, unsigned kind);
|
||||
|
||||
+7
-2
@@ -1573,7 +1573,10 @@ hipError_t ihipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch
|
||||
stream->locked_copySync((unsigned char*)dst + i * dpitch,
|
||||
(unsigned char*)src + i * spitch, width, kind);
|
||||
} else {
|
||||
stream->locked_copy2DSync(dst, src, width, height, spitch, dpitch, kind);
|
||||
if(!stream->locked_copy2DSync(dst, src, width, height, spitch, dpitch, kind)){
|
||||
ihipMemcpy2dKernel<uint8_t> (stream, static_cast<uint8_t*> (dst), static_cast<const uint8_t*> (src), width, height, dpitch, spitch);
|
||||
stream->locked_wait();
|
||||
}
|
||||
}
|
||||
} catch (ihipException& ex) {
|
||||
e = ex._code;
|
||||
@@ -1621,7 +1624,9 @@ hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t sp
|
||||
e = hip_internal::memcpyAsync((unsigned char*)dst + i * dpitch,
|
||||
(unsigned char*)src + i * spitch, width, kind, stream);
|
||||
} else{
|
||||
stream->locked_copy2DAsync(dst, src, width, height, spitch, dpitch, kind);
|
||||
if(!stream->locked_copy2DAsync(dst, src, width, height, spitch, dpitch, kind)){
|
||||
ihipMemcpy2dKernel<uint8_t> (stream, static_cast<uint8_t*> (dst), static_cast<const uint8_t*> (src), width, height, dpitch, spitch);
|
||||
}
|
||||
}
|
||||
} catch (ihipException& ex) {
|
||||
e = ex._code;
|
||||
|
||||
Reference in New Issue
Block a user