Add 2D fallback to use copy kernel

[ROCm/clr commit: af72cde0a1]
Этот коммит содержится в:
Rahul Garg
2019-03-14 13:03:06 +05:30
родитель 32fc302ba1
Коммит da6653482d
3 изменённых файлов: 48 добавлений и 9 удалений
+39 -5
Просмотреть файл
@@ -2115,8 +2115,9 @@ void ihipStream_t::locked_copySync(void* dst, const void* src, size_t sizeBytes,
}
}
void ihipStream_t::locked_copy2DSync(void* dst, const void* src, size_t width, size_t height, size_t srcPitch, size_t dstPitch, unsigned kind,
bool ihipStream_t::locked_copy2DSync(void* dst, const void* src, size_t width, size_t height, size_t srcPitch, size_t dstPitch, unsigned kind,
bool resolveOn) {
bool retStatus = true;
ihipCtx_t* ctx = this->getCtx();
const ihipDevice_t* device = ctx->getDevice();
@@ -2161,11 +2162,20 @@ void ihipStream_t::locked_copy2DSync(void* dst, const void* src, size_t width, s
sizeBytes, hcMemcpyStr(hcCopyDir), forceUnpinnedCopy);
printPointerInfo(DB_COPY, " dst", dst, dstPtrInfo);
printPointerInfo(DB_COPY, " src", src, srcPtrInfo);
#if (__hcc_workweek__ >= 19062)
if(!crit->_av.copy2d_ext(src, dst, width, height, srcPitch, dstPitch, hcCopyDir, srcPtrInfo, dstPtrInfo,
copyDevice ? &copyDevice->getDevice()->_acc : nullptr,
forceUnpinnedCopy)) {
tprintf(DB_COPY,"locked_copy2DSync failed to use SDMA\n");
retStatus = false;
}
#else
crit->_av.copy2d_ext(src, dst, width, height, srcPitch, dstPitch, hcCopyDir, srcPtrInfo, dstPtrInfo,
copyDevice ? &copyDevice->getDevice()->_acc : nullptr,
forceUnpinnedCopy);
#endif
}
return retStatus;
}
void ihipStream_t::addSymbolPtrToTracker(hc::accelerator& acc, void* ptr, size_t sizeBytes) {
@@ -2338,8 +2348,9 @@ void ihipStream_t::locked_copyAsync(void* dst, const void* src, size_t sizeBytes
}
}
void ihipStream_t::locked_copy2DAsync(void* dst, const void* src, size_t width, size_t height, size_t srcPitch, size_t dstPitch, unsigned kind)
bool ihipStream_t::locked_copy2DAsync(void* dst, const void* src, size_t width, size_t height, size_t srcPitch, size_t dstPitch, unsigned kind)
{
bool retStatus = true;
const ihipCtx_t* ctx = this->getCtx();
if ((ctx == nullptr) || (ctx->getDevice() == nullptr)) {
@@ -2374,13 +2385,26 @@ void ihipStream_t::locked_copy2DAsync(void* dst, const void* src, size_t width,
try {
if (HIP_FORCE_SYNC_COPY) {
#if (__hcc_workweek__ >= 19062)
if(!crit->_av.copy2d_ext(src, dst, width, height, srcPitch, dstPitch, hcCopyDir, srcPtrInfo, dstPtrInfo,
&copyDevice->getDevice()->_acc,
forceUnpinnedCopy)){
tprintf(DB_COPY,"locked_copy2DASync with HIP_FORCE_SYNC_COPY failed to use SDMA\n");
retStatus = false;
}
#else
crit->_av.copy2d_ext(src, dst, width, height, srcPitch, dstPitch, hcCopyDir, srcPtrInfo, dstPtrInfo,
&copyDevice->getDevice()->_acc,
forceUnpinnedCopy);
#endif
} else {
crit->_av.copy2d_async_ext(src, dst, width, height, srcPitch, dstPitch, hcCopyDir, srcPtrInfo, dstPtrInfo,
const auto& future = crit->_av.copy2d_async_ext(src, dst, width, height, srcPitch, dstPitch, hcCopyDir, srcPtrInfo, dstPtrInfo,
&copyDevice->getDevice()->_acc);
if(!future.valid()) {
tprintf(DB_COPY, "locked_copy2DAsync failed to use SDMA\n");
retStatus = false;
}
}
} catch (Kalmar::runtime_exception) {
throw ihipException(hipErrorRuntimeOther);
@@ -2395,10 +2419,20 @@ void ihipStream_t::locked_copy2DAsync(void* dst, const void* src, size_t width,
} else {
//Do sync 2D copy
LockedAccessor_StreamCrit_t crit(_criticalData);
#if (__hcc_workweek__ >= 19062)
if(!crit->_av.copy2d_ext(src, dst, width, height, srcPitch, dstPitch, hcCopyDir, srcPtrInfo, dstPtrInfo,
copyDevice ? &copyDevice->getDevice()->_acc : nullptr,
forceUnpinnedCopy)){
tprintf(DB_COPY, "locked_copy2DAsync Sync copy failed to use SDMA\n");
retStatus = false;
}
#else
crit->_av.copy2d_ext(src, dst, width, height, srcPitch, dstPitch, hcCopyDir, srcPtrInfo, dstPtrInfo,
copyDevice ? &copyDevice->getDevice()->_acc : nullptr,
forceUnpinnedCopy);
#endif
}
return retStatus;
}
//-------------------------------------------------------------------------------------------------
@@ -2485,4 +2519,4 @@ namespace hip_impl {
std::terminate();
#endif
}
} // Namespace hip_impl.
} // Namespace hip_impl.
+2 -2
Просмотреть файл
@@ -535,12 +535,12 @@ class ihipStream_t {
void locked_copySync(void* dst, const void* src, size_t sizeBytes, unsigned kind,
bool resolveOn = true);
void locked_copy2DSync(void* dst, const void* src, size_t width, size_t height, size_t srcPitch, size_t dstPitch, unsigned kind,
bool locked_copy2DSync(void* dst, const void* src, size_t width, size_t height, size_t srcPitch, size_t dstPitch, unsigned kind,
bool resolveOn = true);
void locked_copyAsync(void* dst, const void* src, size_t sizeBytes, unsigned kind);
void locked_copy2DAsync(void* dst, const void* src, size_t width, size_t height, size_t srcPitch, size_t dstPitch, unsigned kind);
bool locked_copy2DAsync(void* dst, const void* src, size_t width, size_t height, size_t srcPitch, size_t dstPitch, unsigned kind);
void lockedSymbolCopySync(hc::accelerator& acc, void* dst, void* src, size_t sizeBytes,
size_t offset, unsigned kind);
+7 -2
Просмотреть файл
@@ -1573,7 +1573,10 @@ hipError_t ihipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch
stream->locked_copySync((unsigned char*)dst + i * dpitch,
(unsigned char*)src + i * spitch, width, kind);
} else {
stream->locked_copy2DSync(dst, src, width, height, spitch, dpitch, kind);
if(!stream->locked_copy2DSync(dst, src, width, height, spitch, dpitch, kind)){
ihipMemcpy2dKernel<uint8_t> (stream, static_cast<uint8_t*> (dst), static_cast<const uint8_t*> (src), width, height, dpitch, spitch);
stream->locked_wait();
}
}
} catch (ihipException& ex) {
e = ex._code;
@@ -1621,7 +1624,9 @@ hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t sp
e = hip_internal::memcpyAsync((unsigned char*)dst + i * dpitch,
(unsigned char*)src + i * spitch, width, kind, stream);
} else{
stream->locked_copy2DAsync(dst, src, width, height, spitch, dpitch, kind);
if(!stream->locked_copy2DAsync(dst, src, width, height, spitch, dpitch, kind)){
ihipMemcpy2dKernel<uint8_t> (stream, static_cast<uint8_t*> (dst), static_cast<const uint8_t*> (src), width, height, dpitch, spitch);
}
}
} catch (ihipException& ex) {
e = ex._code;