Merge pull request #608 from gargrahul/add_pinned_2d_sdma_copy

Added support for pinned 2D SDMA copy
Tá an tiomantas seo le fáil i:
Maneesh Gupta
2018-12-12 07:44:16 +05:30
tiomanta ag GitHub
tuismitheoir f3d8d3d989 1e57764378
tiomantas 0dd26b4f63
D'athraigh 3 comhad le 123 breiseanna agus 3 scriosta
+115
Féach ar an gComhad
@@ -2114,6 +2114,59 @@ void ihipStream_t::locked_copySync(void* dst, const void* src, size_t sizeBytes,
}
}
void ihipStream_t::locked_copy2DSync(void* dst, const void* src, size_t width, size_t height, size_t srcPitch, size_t dstPitch, unsigned kind,
bool resolveOn) {
ihipCtx_t* ctx = this->getCtx();
const ihipDevice_t* device = ctx->getDevice();
if (device == NULL) {
throw ihipException(hipErrorInvalidDevice);
}
size_t sizeBytes = width*height;
hc::accelerator acc;
#if (__hcc_workweek__ >= 17332)
hc::AmPointerInfo dstPtrInfo(NULL, NULL, NULL, 0, acc, 0, 0);
hc::AmPointerInfo srcPtrInfo(NULL, NULL, NULL, 0, acc, 0, 0);
#else
hc::AmPointerInfo dstPtrInfo(NULL, NULL, 0, acc, 0, 0);
hc::AmPointerInfo srcPtrInfo(NULL, NULL, 0, acc, 0, 0);
#endif
bool dstTracked = getTailoredPtrInfo(" dst", &dstPtrInfo, dst, sizeBytes);
bool srcTracked = getTailoredPtrInfo(" src", &srcPtrInfo, src, sizeBytes);
// Some code in HCC and in printPointerInfo uses _sizeBytes==0 as an indication ptr is not
// // valid, so check it here:
if (!dstTracked) {
assert(dstPtrInfo._sizeBytes == 0);
}
if (!srcTracked) {
assert(srcPtrInfo._sizeBytes == 0);
}
hc::hcCommandKind hcCopyDir;
ihipCtx_t* copyDevice;
bool forceUnpinnedCopy;
resolveHcMemcpyDirection(kind, &dstPtrInfo, &srcPtrInfo, &hcCopyDir, &copyDevice,
&forceUnpinnedCopy);
{
LockedAccessor_StreamCrit_t crit(_criticalData);
tprintf(DB_COPY,
"copy2DSync copyDev:%d dst=%p (phys_dev:%d, isDevMem:%d) src=%p(phys_dev:%d, "
"isDevMem:%d) sz=%zu dir=%s forceUnpinnedCopy=%d\n",
copyDevice ? copyDevice->getDeviceNum() : -1, dst, dstPtrInfo._appId,
dstPtrInfo._isInDeviceMem, src, srcPtrInfo._appId, srcPtrInfo._isInDeviceMem,
sizeBytes, hcMemcpyStr(hcCopyDir), forceUnpinnedCopy);
printPointerInfo(DB_COPY, " dst", dst, dstPtrInfo);
printPointerInfo(DB_COPY, " src", src, srcPtrInfo);
crit->_av.copy2d_ext(src, dst, width, height, srcPitch, dstPitch, hcCopyDir, srcPtrInfo, dstPtrInfo,
copyDevice ? &copyDevice->getDevice()->_acc : nullptr,
forceUnpinnedCopy);
}
}
void ihipStream_t::addSymbolPtrToTracker(hc::accelerator& acc, void* ptr, size_t sizeBytes) {
#if (__hcc_workweek__ >= 17332)
hc::AmPointerInfo ptrInfo(NULL, ptr, ptr, sizeBytes, acc, true, false);
@@ -2284,6 +2337,68 @@ void ihipStream_t::locked_copyAsync(void* dst, const void* src, size_t sizeBytes
}
}
void ihipStream_t::locked_copy2DAsync(void* dst, const void* src, size_t width, size_t height, size_t srcPitch, size_t dstPitch, unsigned kind)
{
const ihipCtx_t* ctx = this->getCtx();
if ((ctx == nullptr) || (ctx->getDevice() == nullptr)) {
tprintf(DB_COPY, "locked_copy2DAsync bad ctx or device\n");
throw ihipException(hipErrorInvalidDevice);
}
hc::accelerator acc;
size_t sizeBytes = width*height;
#if (__hcc_workweek__ >= 17332)
hc::AmPointerInfo dstPtrInfo(NULL, NULL, NULL, 0, acc, 0, 0);
hc::AmPointerInfo srcPtrInfo(NULL, NULL, NULL, 0, acc, 0, 0);
#else
hc::AmPointerInfo dstPtrInfo(NULL, NULL, 0, acc, 0, 0);
hc::AmPointerInfo srcPtrInfo(NULL, NULL, 0, acc, 0, 0);
#endif
tprintf(DB_COPY, "copy2DAsync dst=%p src=%p, sz=%zu\n", dst, src, sizeBytes);
bool dstTracked = getTailoredPtrInfo(" dst", &dstPtrInfo, dst, sizeBytes);
bool srcTracked = getTailoredPtrInfo(" src", &srcPtrInfo, src, sizeBytes);
hc::hcCommandKind hcCopyDir;
ihipCtx_t* copyDevice;
bool forceUnpinnedCopy;
resolveHcMemcpyDirection(kind, &dstPtrInfo, &srcPtrInfo, &hcCopyDir, &copyDevice,
&forceUnpinnedCopy);
tprintf(DB_COPY, " copyDev:%d dir=%s forceUnpinnedCopy=%d\n",
copyDevice ? copyDevice->getDeviceNum() : -1, hcMemcpyStr(hcCopyDir),
forceUnpinnedCopy);
if (dstTracked && srcTracked && !forceUnpinnedCopy &&
copyDevice /*code below assumes this is !nullptr*/) {
LockedAccessor_StreamCrit_t crit(_criticalData);
try {
if (HIP_FORCE_SYNC_COPY) {
crit->_av.copy2d_ext(src, dst, width, height, srcPitch, dstPitch, hcCopyDir, srcPtrInfo, dstPtrInfo,
&copyDevice->getDevice()->_acc,
forceUnpinnedCopy);
} else {
crit->_av.copy2d_async_ext(src, dst, width, height, srcPitch, dstPitch, hcCopyDir, srcPtrInfo, dstPtrInfo,
&copyDevice->getDevice()->_acc);
}
} catch (Kalmar::runtime_exception) {
throw ihipException(hipErrorRuntimeOther);
};
if (HIP_API_BLOCKING) {
tprintf(DB_SYNC, "%s LAUNCH_BLOCKING for completion of hipMemcpy2DAsync(sz=%zu)\n",
ToString(this).c_str(), sizeBytes);
this->wait(crit);
}
} else {
//Do sync 2D copy
LockedAccessor_StreamCrit_t crit(_criticalData);
crit->_av.copy2d_ext(src, dst, width, height, srcPitch, dstPitch, hcCopyDir, srcPtrInfo, dstPtrInfo,
copyDevice ? &copyDevice->getDevice()->_acc : nullptr,
forceUnpinnedCopy);
}
}
//-------------------------------------------------------------------------------------------------
//-------------------------------------------------------------------------------------------------
+6
Féach ar an gComhad
@@ -533,8 +533,14 @@ class ihipStream_t {
// kind is hipMemcpyKind
void locked_copySync(void* dst, const void* src, size_t sizeBytes, unsigned kind,
bool resolveOn = true);
void locked_copy2DSync(void* dst, const void* src, size_t width, size_t height, size_t srcPitch, size_t dstPitch, unsigned kind,
bool resolveOn = true);
void locked_copyAsync(void* dst, const void* src, size_t sizeBytes, unsigned kind);
void locked_copy2DAsync(void* dst, const void* src, size_t width, size_t height, size_t srcPitch, size_t dstPitch, unsigned kind);
void lockedSymbolCopySync(hc::accelerator& acc, void* dst, void* src, size_t sizeBytes,
size_t offset, unsigned kind);
void lockedSymbolCopyAsync(hc::accelerator& acc, void* dst, void* src, size_t sizeBytes,
+2 -3
Féach ar an gComhad
@@ -1637,8 +1637,7 @@ hipError_t ihipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch
stream->locked_copySync((unsigned char*)dst + i * dpitch,
(unsigned char*)src + i * spitch, width, kind);
} else {
ihipMemcpy2dKernel<uint32_t> (stream, static_cast<uint32_t*> (actualDest), static_cast<const uint32_t*> (actualSrc), width, height, dpitch, spitch);
stream->locked_wait();
stream->locked_copy2DSync(dst, src, width, height, spitch, dpitch, kind);
}
} catch (ihipException& ex) {
e = ex._code;
@@ -1686,7 +1685,7 @@ hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t sp
e = hip_internal::memcpyAsync((unsigned char*)dst + i * dpitch,
(unsigned char*)src + i * spitch, width, kind, stream);
} else{
ihipMemcpy2dKernel<uint32_t> (stream, static_cast<uint32_t*> (actualDest), static_cast<const uint32_t*> (actualSrc), width, height, dpitch, spitch);
stream->locked_copy2DAsync(dst, src, width, height, spitch, dpitch, kind);
}
} catch (ihipException& ex) {
e = ex._code;