Merge pull request #962 from gargrahul/add_2d_copy_fallback

Add 2D fallback to use copy kernel
This commit is contained in:
Maneesh Gupta
2019-03-25 07:46:43 +00:00
committed by GitHub
3 changed files with 47 additions and 8 deletions
+38 -4
View File
@@ -2117,8 +2117,9 @@ void ihipStream_t::locked_copySync(void* dst, const void* src, size_t sizeBytes,
}
}
void ihipStream_t::locked_copy2DSync(void* dst, const void* src, size_t width, size_t height, size_t srcPitch, size_t dstPitch, unsigned kind,
bool ihipStream_t::locked_copy2DSync(void* dst, const void* src, size_t width, size_t height, size_t srcPitch, size_t dstPitch, unsigned kind,
bool resolveOn) {
bool retStatus = true;
ihipCtx_t* ctx = this->getCtx();
const ihipDevice_t* device = ctx->getDevice();
@@ -2163,11 +2164,20 @@ void ihipStream_t::locked_copy2DSync(void* dst, const void* src, size_t width, s
sizeBytes, hcMemcpyStr(hcCopyDir), forceUnpinnedCopy);
printPointerInfo(DB_COPY, " dst", dst, dstPtrInfo);
printPointerInfo(DB_COPY, " src", src, srcPtrInfo);
#if (__hcc_workweek__ >= 19101)
if(!crit->_av.copy2d_ext(src, dst, width, height, srcPitch, dstPitch, hcCopyDir, srcPtrInfo, dstPtrInfo,
copyDevice ? &copyDevice->getDevice()->_acc : nullptr,
forceUnpinnedCopy)) {
tprintf(DB_COPY,"locked_copy2DSync failed to use SDMA\n");
retStatus = false;
}
#else
crit->_av.copy2d_ext(src, dst, width, height, srcPitch, dstPitch, hcCopyDir, srcPtrInfo, dstPtrInfo,
copyDevice ? &copyDevice->getDevice()->_acc : nullptr,
forceUnpinnedCopy);
#endif
}
return retStatus;
}
void ihipStream_t::addSymbolPtrToTracker(hc::accelerator& acc, void* ptr, size_t sizeBytes) {
@@ -2340,8 +2350,9 @@ void ihipStream_t::locked_copyAsync(void* dst, const void* src, size_t sizeBytes
}
}
void ihipStream_t::locked_copy2DAsync(void* dst, const void* src, size_t width, size_t height, size_t srcPitch, size_t dstPitch, unsigned kind)
bool ihipStream_t::locked_copy2DAsync(void* dst, const void* src, size_t width, size_t height, size_t srcPitch, size_t dstPitch, unsigned kind)
{
bool retStatus = true;
const ihipCtx_t* ctx = this->getCtx();
if ((ctx == nullptr) || (ctx->getDevice() == nullptr)) {
@@ -2376,13 +2387,26 @@ void ihipStream_t::locked_copy2DAsync(void* dst, const void* src, size_t width,
try {
if (HIP_FORCE_SYNC_COPY) {
#if (__hcc_workweek__ >= 19101)
if(!crit->_av.copy2d_ext(src, dst, width, height, srcPitch, dstPitch, hcCopyDir, srcPtrInfo, dstPtrInfo,
&copyDevice->getDevice()->_acc,
forceUnpinnedCopy)){
tprintf(DB_COPY,"locked_copy2DASync with HIP_FORCE_SYNC_COPY failed to use SDMA\n");
retStatus = false;
}
#else
crit->_av.copy2d_ext(src, dst, width, height, srcPitch, dstPitch, hcCopyDir, srcPtrInfo, dstPtrInfo,
&copyDevice->getDevice()->_acc,
forceUnpinnedCopy);
#endif
} else {
crit->_av.copy2d_async_ext(src, dst, width, height, srcPitch, dstPitch, hcCopyDir, srcPtrInfo, dstPtrInfo,
const auto& future = crit->_av.copy2d_async_ext(src, dst, width, height, srcPitch, dstPitch, hcCopyDir, srcPtrInfo, dstPtrInfo,
&copyDevice->getDevice()->_acc);
if(!future.valid()) {
tprintf(DB_COPY, "locked_copy2DAsync failed to use SDMA\n");
retStatus = false;
}
}
} catch (Kalmar::runtime_exception) {
throw ihipException(hipErrorRuntimeOther);
@@ -2397,10 +2421,20 @@ void ihipStream_t::locked_copy2DAsync(void* dst, const void* src, size_t width,
} else {
//Do sync 2D copy
LockedAccessor_StreamCrit_t crit(_criticalData);
#if (__hcc_workweek__ >= 19101)
if(!crit->_av.copy2d_ext(src, dst, width, height, srcPitch, dstPitch, hcCopyDir, srcPtrInfo, dstPtrInfo,
copyDevice ? &copyDevice->getDevice()->_acc : nullptr,
forceUnpinnedCopy)){
tprintf(DB_COPY, "locked_copy2DAsync Sync copy failed to use SDMA\n");
retStatus = false;
}
#else
crit->_av.copy2d_ext(src, dst, width, height, srcPitch, dstPitch, hcCopyDir, srcPtrInfo, dstPtrInfo,
copyDevice ? &copyDevice->getDevice()->_acc : nullptr,
forceUnpinnedCopy);
#endif
}
return retStatus;
}
//-------------------------------------------------------------------------------------------------
+2 -2
View File
@@ -529,12 +529,12 @@ class ihipStream_t {
void locked_copySync(void* dst, const void* src, size_t sizeBytes, unsigned kind,
bool resolveOn = true);
void locked_copy2DSync(void* dst, const void* src, size_t width, size_t height, size_t srcPitch, size_t dstPitch, unsigned kind,
bool locked_copy2DSync(void* dst, const void* src, size_t width, size_t height, size_t srcPitch, size_t dstPitch, unsigned kind,
bool resolveOn = true);
void locked_copyAsync(void* dst, const void* src, size_t sizeBytes, unsigned kind);
void locked_copy2DAsync(void* dst, const void* src, size_t width, size_t height, size_t srcPitch, size_t dstPitch, unsigned kind);
bool locked_copy2DAsync(void* dst, const void* src, size_t width, size_t height, size_t srcPitch, size_t dstPitch, unsigned kind);
void lockedSymbolCopySync(hc::accelerator& acc, void* dst, void* src, size_t sizeBytes,
size_t offset, unsigned kind);
+7 -2
View File
@@ -1573,7 +1573,10 @@ hipError_t ihipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch
stream->locked_copySync((unsigned char*)dst + i * dpitch,
(unsigned char*)src + i * spitch, width, kind);
} else {
stream->locked_copy2DSync(dst, src, width, height, spitch, dpitch, kind);
if(!stream->locked_copy2DSync(dst, src, width, height, spitch, dpitch, kind)){
ihipMemcpy2dKernel<uint8_t> (stream, static_cast<uint8_t*> (dst), static_cast<const uint8_t*> (src), width, height, dpitch, spitch);
stream->locked_wait();
}
}
} catch (ihipException& ex) {
e = ex._code;
@@ -1621,7 +1624,9 @@ hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t sp
e = hip_internal::memcpyAsync((unsigned char*)dst + i * dpitch,
(unsigned char*)src + i * spitch, width, kind, stream);
} else{
stream->locked_copy2DAsync(dst, src, width, height, spitch, dpitch, kind);
if(!stream->locked_copy2DAsync(dst, src, width, height, spitch, dpitch, kind)){
ihipMemcpy2dKernel<uint8_t> (stream, static_cast<uint8_t*> (dst), static_cast<const uint8_t*> (src), width, height, dpitch, spitch);
}
}
} catch (ihipException& ex) {
e = ex._code;