Merge pull request #608 from gargrahul/add_pinned_2d_sdma_copy
Added support for pinned 2D SDMA copy
Tá an tiomantas seo le fáil i:
@@ -2114,6 +2114,59 @@ void ihipStream_t::locked_copySync(void* dst, const void* src, size_t sizeBytes,
|
||||
}
|
||||
}
|
||||
|
||||
void ihipStream_t::locked_copy2DSync(void* dst, const void* src, size_t width, size_t height, size_t srcPitch, size_t dstPitch, unsigned kind,
|
||||
bool resolveOn) {
|
||||
ihipCtx_t* ctx = this->getCtx();
|
||||
const ihipDevice_t* device = ctx->getDevice();
|
||||
|
||||
if (device == NULL) {
|
||||
throw ihipException(hipErrorInvalidDevice);
|
||||
}
|
||||
size_t sizeBytes = width*height;
|
||||
hc::accelerator acc;
|
||||
#if (__hcc_workweek__ >= 17332)
|
||||
hc::AmPointerInfo dstPtrInfo(NULL, NULL, NULL, 0, acc, 0, 0);
|
||||
hc::AmPointerInfo srcPtrInfo(NULL, NULL, NULL, 0, acc, 0, 0);
|
||||
#else
|
||||
hc::AmPointerInfo dstPtrInfo(NULL, NULL, 0, acc, 0, 0);
|
||||
hc::AmPointerInfo srcPtrInfo(NULL, NULL, 0, acc, 0, 0);
|
||||
#endif
|
||||
bool dstTracked = getTailoredPtrInfo(" dst", &dstPtrInfo, dst, sizeBytes);
|
||||
bool srcTracked = getTailoredPtrInfo(" src", &srcPtrInfo, src, sizeBytes);
|
||||
|
||||
// Some code in HCC and in printPointerInfo uses _sizeBytes==0 as an indication ptr is not
|
||||
// // valid, so check it here:
|
||||
if (!dstTracked) {
|
||||
assert(dstPtrInfo._sizeBytes == 0);
|
||||
}
|
||||
if (!srcTracked) {
|
||||
assert(srcPtrInfo._sizeBytes == 0);
|
||||
}
|
||||
|
||||
|
||||
hc::hcCommandKind hcCopyDir;
|
||||
ihipCtx_t* copyDevice;
|
||||
bool forceUnpinnedCopy;
|
||||
resolveHcMemcpyDirection(kind, &dstPtrInfo, &srcPtrInfo, &hcCopyDir, ©Device,
|
||||
&forceUnpinnedCopy);
|
||||
|
||||
{
|
||||
LockedAccessor_StreamCrit_t crit(_criticalData);
|
||||
tprintf(DB_COPY,
|
||||
"copy2DSync copyDev:%d dst=%p (phys_dev:%d, isDevMem:%d) src=%p(phys_dev:%d, "
|
||||
"isDevMem:%d) sz=%zu dir=%s forceUnpinnedCopy=%d\n",
|
||||
copyDevice ? copyDevice->getDeviceNum() : -1, dst, dstPtrInfo._appId,
|
||||
dstPtrInfo._isInDeviceMem, src, srcPtrInfo._appId, srcPtrInfo._isInDeviceMem,
|
||||
sizeBytes, hcMemcpyStr(hcCopyDir), forceUnpinnedCopy);
|
||||
printPointerInfo(DB_COPY, " dst", dst, dstPtrInfo);
|
||||
printPointerInfo(DB_COPY, " src", src, srcPtrInfo);
|
||||
|
||||
crit->_av.copy2d_ext(src, dst, width, height, srcPitch, dstPitch, hcCopyDir, srcPtrInfo, dstPtrInfo,
|
||||
copyDevice ? ©Device->getDevice()->_acc : nullptr,
|
||||
forceUnpinnedCopy);
|
||||
}
|
||||
}
|
||||
|
||||
void ihipStream_t::addSymbolPtrToTracker(hc::accelerator& acc, void* ptr, size_t sizeBytes) {
|
||||
#if (__hcc_workweek__ >= 17332)
|
||||
hc::AmPointerInfo ptrInfo(NULL, ptr, ptr, sizeBytes, acc, true, false);
|
||||
@@ -2284,6 +2337,68 @@ void ihipStream_t::locked_copyAsync(void* dst, const void* src, size_t sizeBytes
|
||||
}
|
||||
}
|
||||
|
||||
void ihipStream_t::locked_copy2DAsync(void* dst, const void* src, size_t width, size_t height, size_t srcPitch, size_t dstPitch, unsigned kind)
|
||||
{
|
||||
const ihipCtx_t* ctx = this->getCtx();
|
||||
|
||||
if ((ctx == nullptr) || (ctx->getDevice() == nullptr)) {
|
||||
tprintf(DB_COPY, "locked_copy2DAsync bad ctx or device\n");
|
||||
throw ihipException(hipErrorInvalidDevice);
|
||||
}
|
||||
hc::accelerator acc;
|
||||
size_t sizeBytes = width*height;
|
||||
#if (__hcc_workweek__ >= 17332)
|
||||
hc::AmPointerInfo dstPtrInfo(NULL, NULL, NULL, 0, acc, 0, 0);
|
||||
hc::AmPointerInfo srcPtrInfo(NULL, NULL, NULL, 0, acc, 0, 0);
|
||||
#else
|
||||
hc::AmPointerInfo dstPtrInfo(NULL, NULL, 0, acc, 0, 0);
|
||||
hc::AmPointerInfo srcPtrInfo(NULL, NULL, 0, acc, 0, 0);
|
||||
#endif
|
||||
tprintf(DB_COPY, "copy2DAsync dst=%p src=%p, sz=%zu\n", dst, src, sizeBytes);
|
||||
bool dstTracked = getTailoredPtrInfo(" dst", &dstPtrInfo, dst, sizeBytes);
|
||||
bool srcTracked = getTailoredPtrInfo(" src", &srcPtrInfo, src, sizeBytes);
|
||||
|
||||
|
||||
hc::hcCommandKind hcCopyDir;
|
||||
ihipCtx_t* copyDevice;
|
||||
bool forceUnpinnedCopy;
|
||||
resolveHcMemcpyDirection(kind, &dstPtrInfo, &srcPtrInfo, &hcCopyDir, ©Device,
|
||||
&forceUnpinnedCopy);
|
||||
tprintf(DB_COPY, " copyDev:%d dir=%s forceUnpinnedCopy=%d\n",
|
||||
copyDevice ? copyDevice->getDeviceNum() : -1, hcMemcpyStr(hcCopyDir),
|
||||
forceUnpinnedCopy);
|
||||
if (dstTracked && srcTracked && !forceUnpinnedCopy &&
|
||||
copyDevice /*code below assumes this is !nullptr*/) {
|
||||
LockedAccessor_StreamCrit_t crit(_criticalData);
|
||||
|
||||
try {
|
||||
if (HIP_FORCE_SYNC_COPY) {
|
||||
crit->_av.copy2d_ext(src, dst, width, height, srcPitch, dstPitch, hcCopyDir, srcPtrInfo, dstPtrInfo,
|
||||
©Device->getDevice()->_acc,
|
||||
forceUnpinnedCopy);
|
||||
|
||||
} else {
|
||||
crit->_av.copy2d_async_ext(src, dst, width, height, srcPitch, dstPitch, hcCopyDir, srcPtrInfo, dstPtrInfo,
|
||||
©Device->getDevice()->_acc);
|
||||
}
|
||||
} catch (Kalmar::runtime_exception) {
|
||||
throw ihipException(hipErrorRuntimeOther);
|
||||
};
|
||||
|
||||
if (HIP_API_BLOCKING) {
|
||||
tprintf(DB_SYNC, "%s LAUNCH_BLOCKING for completion of hipMemcpy2DAsync(sz=%zu)\n",
|
||||
ToString(this).c_str(), sizeBytes);
|
||||
this->wait(crit);
|
||||
}
|
||||
|
||||
} else {
|
||||
//Do sync 2D copy
|
||||
LockedAccessor_StreamCrit_t crit(_criticalData);
|
||||
crit->_av.copy2d_ext(src, dst, width, height, srcPitch, dstPitch, hcCopyDir, srcPtrInfo, dstPtrInfo,
|
||||
copyDevice ? ©Device->getDevice()->_acc : nullptr,
|
||||
forceUnpinnedCopy);
|
||||
}
|
||||
}
|
||||
|
||||
//-------------------------------------------------------------------------------------------------
|
||||
//-------------------------------------------------------------------------------------------------
|
||||
|
||||
@@ -533,8 +533,14 @@ class ihipStream_t {
|
||||
// kind is hipMemcpyKind
|
||||
void locked_copySync(void* dst, const void* src, size_t sizeBytes, unsigned kind,
|
||||
bool resolveOn = true);
|
||||
|
||||
void locked_copy2DSync(void* dst, const void* src, size_t width, size_t height, size_t srcPitch, size_t dstPitch, unsigned kind,
|
||||
bool resolveOn = true);
|
||||
|
||||
void locked_copyAsync(void* dst, const void* src, size_t sizeBytes, unsigned kind);
|
||||
|
||||
void locked_copy2DAsync(void* dst, const void* src, size_t width, size_t height, size_t srcPitch, size_t dstPitch, unsigned kind);
|
||||
|
||||
void lockedSymbolCopySync(hc::accelerator& acc, void* dst, void* src, size_t sizeBytes,
|
||||
size_t offset, unsigned kind);
|
||||
void lockedSymbolCopyAsync(hc::accelerator& acc, void* dst, void* src, size_t sizeBytes,
|
||||
|
||||
@@ -1637,8 +1637,7 @@ hipError_t ihipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch
|
||||
stream->locked_copySync((unsigned char*)dst + i * dpitch,
|
||||
(unsigned char*)src + i * spitch, width, kind);
|
||||
} else {
|
||||
ihipMemcpy2dKernel<uint32_t> (stream, static_cast<uint32_t*> (actualDest), static_cast<const uint32_t*> (actualSrc), width, height, dpitch, spitch);
|
||||
stream->locked_wait();
|
||||
stream->locked_copy2DSync(dst, src, width, height, spitch, dpitch, kind);
|
||||
}
|
||||
} catch (ihipException& ex) {
|
||||
e = ex._code;
|
||||
@@ -1686,7 +1685,7 @@ hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t sp
|
||||
e = hip_internal::memcpyAsync((unsigned char*)dst + i * dpitch,
|
||||
(unsigned char*)src + i * spitch, width, kind, stream);
|
||||
} else{
|
||||
ihipMemcpy2dKernel<uint32_t> (stream, static_cast<uint32_t*> (actualDest), static_cast<const uint32_t*> (actualSrc), width, height, dpitch, spitch);
|
||||
stream->locked_copy2DAsync(dst, src, width, height, spitch, dpitch, kind);
|
||||
}
|
||||
} catch (ihipException& ex) {
|
||||
e = ex._code;
|
||||
|
||||
Tagairt in Eagrán Nua
Cuir bac ar úsáideoir