Refactor copy and P2P logic.
Prefer use of source-engine for DMA copies, even if user submits copy in a stream attached to a different device. The stream is now used only for synchronization, and HIP makes the most optimal decision for which engine to perform the copy - typically the source copy engine. HIP now makes decision on which engine should perform the copy and passes this to HCC using new apis. HIP has additional information about peer visibility and will make a decision which agent should perform the copy . Change-Id: I0cf4cfebeae256e6ca795f08a7ed7130f4857d1f
このコミットが含まれているのは:
+1
-4
@@ -1,4 +1 @@
|
||||
:set tabstop=4
|
||||
:set shiftwidth=4
|
||||
:set expandtab
|
||||
:set smartindent
|
||||
:set makeprg=make\ -C\ build.hcc-LC.db
|
||||
|
||||
+70
-71
@@ -1731,35 +1731,34 @@ void ihipSetTs(hipEvent_t e)
|
||||
// Returns true if thisCtx can see the memory allocated on dstCtx and srcCtx.
|
||||
// The peer-list for a context controls which contexts have access to the memory allocated on that context.
|
||||
// So we check dstCtx's and srcCtx's peerList to see if the both include thisCtx.
|
||||
bool ihipStream_t::canSeePeerMemory(const ihipCtx_t *thisCtx, const hc::AmPointerInfo *dstPtrInfo, const hc::AmPointerInfo *srcPtrInfo)
|
||||
// TODO- change these to use dst and src ptr info.
|
||||
bool ihipStream_t::chooseDirectPeerToPeer(const ihipCtx_t *copyEngineCtx, const hc::AmPointerInfo *dstPtrInfo, const hc::AmPointerInfo *srcPtrInfo)
|
||||
{
|
||||
|
||||
if (dstPtrInfo->_appId != -1) {
|
||||
// TODO - pointer-info stores a deviceID not a context,may have some unusual side-effects here:
|
||||
ihipCtx_t *dstCtx = ihipGetPrimaryCtx(dstPtrInfo->_appId);
|
||||
if (thisCtx != dstCtx) {
|
||||
// Only checks peer list if contexts are different
|
||||
LockedAccessor_CtxCrit_t ctxCrit(dstCtx->criticalData());
|
||||
//tprintf(DB_SYNC, "dstCrit lock succeeded\n");
|
||||
if (!ctxCrit->isPeerWatcher(thisCtx)) {
|
||||
return false;
|
||||
};
|
||||
}
|
||||
// Make sure this is a device-to-device copy with all memory available to the requested copy engine
|
||||
//
|
||||
// TODO - pointer-info stores a deviceID not a context,may have some unusual side-effects here:
|
||||
ihipCtx_t *dstCtx = ihipGetPrimaryCtx(dstPtrInfo->_appId);
|
||||
if (copyEngineCtx != dstCtx) {
|
||||
// Only checks peer list if contexts are different
|
||||
LockedAccessor_CtxCrit_t ctxCrit(dstCtx->criticalData());
|
||||
//tprintf(DB_SYNC, "dstCrit lock succeeded\n");
|
||||
if (!ctxCrit->isPeerWatcher(copyEngineCtx)) {
|
||||
return false;
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
|
||||
if (srcPtrInfo->_appId != -1) {
|
||||
// TODO - pointer-info stores a deviceID not a context,may have some unusual side-effects here:
|
||||
ihipCtx_t *srcCtx = ihipGetPrimaryCtx(srcPtrInfo->_appId);
|
||||
if (thisCtx != srcCtx) {
|
||||
// Only checks peer list if contexts are different
|
||||
LockedAccessor_CtxCrit_t ctxCrit(srcCtx->criticalData());
|
||||
//tprintf(DB_SYNC, "srcCrit lock succeeded\n");
|
||||
if (!ctxCrit->isPeerWatcher(thisCtx)) {
|
||||
return false;
|
||||
};
|
||||
}
|
||||
|
||||
// TODO - pointer-info stores a deviceID not a context,may have some unusual side-effects here:
|
||||
ihipCtx_t *srcCtx = ihipGetPrimaryCtx(srcPtrInfo->_appId);
|
||||
if (copyEngineCtx != srcCtx) {
|
||||
// Only checks peer list if contexts are different
|
||||
LockedAccessor_CtxCrit_t ctxCrit(srcCtx->criticalData());
|
||||
//tprintf(DB_SYNC, "srcCrit lock succeeded\n");
|
||||
if (!ctxCrit->isPeerWatcher(copyEngineCtx)) {
|
||||
return false;
|
||||
};
|
||||
}
|
||||
|
||||
return true;
|
||||
@@ -1812,14 +1811,16 @@ unsigned ihipStream_t::resolveMemcpyDirection(bool srcInDeviceMem, bool dstInDev
|
||||
|
||||
|
||||
// hipMemKind must be "resolved" to a specific direction - cannot be default.
|
||||
void ihipStream_t::resolveHcMemcpyDirection(unsigned hipMemKind, const hc::AmPointerInfo *dstPtrInfo, const hc::AmPointerInfo *srcPtrInfo,
|
||||
hc::hcCommandKind *hcCopyDir, bool *forceP2PCopyEngine)
|
||||
void ihipStream_t::resolveHcMemcpyDirection(unsigned hipMemKind,
|
||||
const hc::AmPointerInfo *dstPtrInfo,
|
||||
const hc::AmPointerInfo *srcPtrInfo,
|
||||
hc::hcCommandKind *hcCopyDir,
|
||||
ihipCtx_t **copyDevice)
|
||||
{
|
||||
ihipCtx_t *ctx = this->getCtx();
|
||||
// Ignore what the user tells us and always resolve the direction:
|
||||
// Some apps apparently rely on this.
|
||||
hipMemKind = resolveMemcpyDirection(srcPtrInfo->_isInDeviceMem, dstPtrInfo->_isInDeviceMem);
|
||||
|
||||
if (hipMemKind == hipMemcpyDefault) {
|
||||
hipMemKind = resolveMemcpyDirection(srcPtrInfo->_isInDeviceMem, dstPtrInfo->_isInDeviceMem);
|
||||
}
|
||||
|
||||
switch (hipMemKind) {
|
||||
case hipMemcpyHostToHost: *hcCopyDir = hc::hcMemcpyHostToHost; break;
|
||||
@@ -1829,20 +1830,24 @@ void ihipStream_t::resolveHcMemcpyDirection(unsigned hipMemKind, const hc::AmPoi
|
||||
default: throw ihipException(hipErrorRuntimeOther);
|
||||
};
|
||||
|
||||
|
||||
// If this is P2P access, we need to check to see if the copy agent (specified by the stream where the copy is enqueued)
|
||||
// has peer access enabled to both the source and dest. If this is true, then the copy agent can see both pointers
|
||||
// and we can perform the access with the copy engine from the current stream. If not true, then we will copy through the host. (*forceP2PCopyEngine=true).
|
||||
*forceP2PCopyEngine = false;
|
||||
if (!canSeePeerMemory(ctx, dstPtrInfo, srcPtrInfo)) {
|
||||
*forceP2PCopyEngine = true;
|
||||
tprintf (DB_COPY, "P2P: copy engine(dev:%d) cannot see both host and device pointers - forcing copy through staging buffers.\n", ctx->getDeviceNum());
|
||||
if (srcPtrInfo->_isInDeviceMem) {
|
||||
*copyDevice = ihipGetPrimaryCtx(srcPtrInfo->_appId);
|
||||
} else if (dstPtrInfo->_isInDeviceMem) {
|
||||
*copyDevice = ihipGetPrimaryCtx(dstPtrInfo->_appId);
|
||||
} else {
|
||||
if (HIP_FORCE_P2P_HOST ) {
|
||||
*forceP2PCopyEngine = true;
|
||||
tprintf (DB_COPY, "P2P. Copy engine (dev:%d) can see src and dst but HIP_FORCE_P2P_HOST=0, forcing copy through staging buffers.\n", ctx->getDeviceNum());
|
||||
*copyDevice = nullptr;
|
||||
}
|
||||
|
||||
if (hipMemKind == hipMemcpyDeviceToDevice) {
|
||||
if (chooseDirectPeerToPeer(*copyDevice, dstPtrInfo, srcPtrInfo)) {
|
||||
if (HIP_FORCE_P2P_HOST ) {
|
||||
tprintf (DB_COPY, "P2P. Copy engine (dev:%d) can see src and dst but HIP_FORCE_P2P_HOST=0, forcing copy through staging buffers.\n", (*copyDevice)->getDeviceNum());
|
||||
} else {
|
||||
tprintf (DB_COPY, "P2P. Copy engine (dev:%d) can see src and dst.\n", (*copyDevice)->getDeviceNum());
|
||||
}
|
||||
} else {
|
||||
tprintf (DB_COPY, "P2P. Copy engine (dev:%d) can see src and dst.\n", ctx->getDeviceNum());
|
||||
*copyDevice = nullptr;
|
||||
tprintf (DB_COPY, "P2P: copy engine(dev:%d) cannot see both host and device pointers - forcing copy through staging buffers.\n", (*copyDevice)->getDeviceNum());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1866,25 +1871,20 @@ void ihipStream_t::locked_copySync(void* dst, const void* src, size_t sizeBytes,
|
||||
|
||||
|
||||
hc::hcCommandKind hcCopyDir;
|
||||
bool forceP2PCopyEngine;
|
||||
resolveHcMemcpyDirection(kind, &dstPtrInfo, &srcPtrInfo, &hcCopyDir, &forceP2PCopyEngine);
|
||||
ihipCtx_t *copyDevice;
|
||||
resolveHcMemcpyDirection(kind, &dstPtrInfo, &srcPtrInfo, &hcCopyDir, ©Device);
|
||||
|
||||
|
||||
// copy_ext will use copy-engine to perform the copy. nullptr then
|
||||
|
||||
{
|
||||
LockedAccessor_StreamCrit_t crit (_criticalData);
|
||||
#if DISABLE_COPY_EXT
|
||||
#warning ("Disabled copy_ext path, P2P host staging copies will not work")
|
||||
tprintf (DB_COPY, "copySync copyEngine_dev:%d dst=%p(home_dev:%d) src=%p(home_dev:%d) sz=%zu dstTracked=%d srcTracked=%d dir=%s forceP2PCopyEngine=%d. Call HCC copy\n",
|
||||
ctx->getDeviceNum(), dst, dstPtrInfo._appId, src, srcPtrInfo._appId, sizeBytes, dstTracked, srcTracked, hcMemcpyStr(hcCopyDir), forceP2PCopyEngine);
|
||||
// Note - peer-to-peer copies which require host staging will not work in this path.
|
||||
crit->_av.copy(src, dst, sizeBytes);
|
||||
#else
|
||||
// If srcTracked == dstTracked =1 and forceP2PCopyEngine=0 then we wil use async SDMA. (assuming HCC implementation doesn't override somehow)
|
||||
tprintf (DB_COPY, "copySync copyEngine_dev:%d dst=%p(home_dev:%d) src=%p(home_dev:%d) sz=%zu dstTracked=%d srcTracked=%d dir=%s forceP2PCopyEngine=%d. Call HCC copy_ext.\n",
|
||||
ctx->getDeviceNum(), dst, dstPtrInfo._appId, src, srcPtrInfo._appId, sizeBytes, dstTracked, srcTracked, hcMemcpyStr(hcCopyDir), forceP2PCopyEngine);
|
||||
crit->_av.copy_ext(src, dst, sizeBytes, hcCopyDir, srcPtrInfo, dstPtrInfo, forceP2PCopyEngine);
|
||||
#endif
|
||||
tprintf (DB_COPY, "copySync copyDev:%d dst=%p(home_dev:%d, tracked:%d, isDevMem:%d) src=%p(home_dev:%d, tracked:%d, isDevMem:%d) sz=%zu dir=%s\n",
|
||||
copyDevice ? copyDevice->getDeviceNum():-1,
|
||||
dst, dstPtrInfo._appId, dstTracked, dstPtrInfo._isInDeviceMem,
|
||||
src, srcPtrInfo._appId, srcTracked, srcPtrInfo._isInDeviceMem,
|
||||
sizeBytes, hcMemcpyStr(hcCopyDir));
|
||||
crit->_av.copy_ext(src, dst, sizeBytes, hcCopyDir, srcPtrInfo, dstPtrInfo, copyDevice ? ©Device->getDevice()->_acc : nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1921,22 +1921,26 @@ void ihipStream_t::locked_copyAsync(void* dst, const void* src, size_t sizeBytes
|
||||
|
||||
|
||||
hc::hcCommandKind hcCopyDir;
|
||||
bool forceP2PCopyEngine;
|
||||
resolveHcMemcpyDirection(kind, &dstPtrInfo, &srcPtrInfo, &hcCopyDir, &forceP2PCopyEngine);
|
||||
ihipCtx_t *copyDevice;
|
||||
resolveHcMemcpyDirection(kind, &dstPtrInfo, &srcPtrInfo, &hcCopyDir, ©Device);
|
||||
|
||||
|
||||
tprintf (DB_COPY, "copyAsync dst=%p(home_dev:%d) src=%p(home_dev:%d) sz=%zu dstTracked=%d srcTracked=%d dir=%s forceP2PCopyEngine=%d\n",
|
||||
dst, dstPtrInfo._appId, src, srcPtrInfo._appId, sizeBytes, dstTracked, srcTracked, hcMemcpyStr(hcCopyDir), forceP2PCopyEngine);
|
||||
|
||||
tprintf (DB_COPY, "copyASync copyEngine_dev:%d dst=%p(home_dev:%d, tracked:%d, isDevMem:%d) src=%p(home_dev:%d, tracked:%d, isDevMem:%d) sz=%zu dir=%s . \n",
|
||||
copyDevice->getDeviceNum(),
|
||||
dst, dstPtrInfo._appId, dstTracked, dstPtrInfo._isInDeviceMem,
|
||||
src, srcPtrInfo._appId, srcTracked, srcPtrInfo._isInDeviceMem,
|
||||
sizeBytes, hcMemcpyStr(hcCopyDir));
|
||||
|
||||
// "tracked" really indicates if the pointer's virtual address is available in the GPU address space.
|
||||
// If both pointers are not tracked, we need to fall back to a sync copy.
|
||||
if (dstTracked && srcTracked && !forceP2PCopyEngine) {
|
||||
if (dstTracked && srcTracked && copyDevice) {
|
||||
LockedAccessor_StreamCrit_t crit(_criticalData);
|
||||
|
||||
// Perform asynchronous copy:
|
||||
// Perform fast asynchronous copy:
|
||||
try {
|
||||
crit->_av.copy_async(src, dst, sizeBytes);
|
||||
printf ("forcing copy to use synchronous path: !!!!!\n");
|
||||
//crit->_av.copy_async_ext(src, dst, sizeBytes, hcCopyDir, srcPtrInfo, dstPtrInfo, copyDevice);
|
||||
crit->_av.copy_ext(src, dst, sizeBytes, hcCopyDir, srcPtrInfo, dstPtrInfo, ©Device->getDevice()->_acc );
|
||||
} catch (Kalmar::runtime_exception) {
|
||||
throw ihipException(hipErrorRuntimeOther);
|
||||
};
|
||||
@@ -1949,17 +1953,12 @@ void ihipStream_t::locked_copyAsync(void* dst, const void* src, size_t sizeBytes
|
||||
|
||||
} else {
|
||||
LockedAccessor_StreamCrit_t crit(_criticalData);
|
||||
#if DISABLE_COPY_EXT
|
||||
#warning ("Disabled copy_ext path, P2P host staging copies will not work")
|
||||
// Note - peer-to-peer copies which require host staging will not work in this path.
|
||||
crit->_av.copy(src, dst, sizeBytes);
|
||||
#else
|
||||
crit->_av.copy_ext(src, dst, sizeBytes, hcCopyDir, srcPtrInfo, dstPtrInfo, forceP2PCopyEngine);
|
||||
#endif
|
||||
crit->_av.copy_ext(src, dst, sizeBytes, hcCopyDir, srcPtrInfo, dstPtrInfo, copyDevice ? ©Device->getDevice()->_acc : nullptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//-------------------------------------------------------------------------------------------------
|
||||
//-------------------------------------------------------------------------------------------------
|
||||
//Profiler, really these should live elsewhere:
|
||||
|
||||
+9
-3
@@ -496,10 +496,10 @@ private:
|
||||
|
||||
// The unsigned return is hipMemcpyKind
|
||||
unsigned resolveMemcpyDirection(bool srcInDeviceMem, bool dstInDeviceMem);
|
||||
void resolveHcMemcpyDirection(unsigned hipMemKind, const hc::AmPointerInfo *dstPtrInfo, const hc::AmPointerInfo *srcPtrInfo,
|
||||
hc::hcCommandKind *hcCopyDir, bool *forceHostCopyEngine);
|
||||
void resolveHcMemcpyDirection(unsigned hipMemKind, const hc::AmPointerInfo *dstPtrInfo, const hc::AmPointerInfo *srcPtrInfo,
|
||||
hc::hcCommandKind *hcCopyDir, ihipCtx_t **copyDevice);
|
||||
|
||||
bool canSeePeerMemory(const ihipCtx_t *thisCtx, const hc::AmPointerInfo *dstInfo, const hc::AmPointerInfo *srcInfo);
|
||||
bool chooseDirectPeerToPeer(const ihipCtx_t *thisCtx, const hc::AmPointerInfo *dstInfo, const hc::AmPointerInfo *srcInfo);
|
||||
|
||||
|
||||
private: // Data
|
||||
@@ -764,4 +764,10 @@ inline std::ostream& operator<<(std::ostream& os, const ihipCtx_t* c)
|
||||
}
|
||||
|
||||
|
||||
// Helper functions that are used across src files:
|
||||
namespace hip_internal {
|
||||
hipError_t memcpyAsync (void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream);
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
+49
-90
@@ -164,47 +164,50 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags)
|
||||
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
|
||||
if(ctx){
|
||||
// am_alloc requires writeable __acc, perhaps could be refactored?
|
||||
// TODO-P1 - Review and test this logic. Seems :
|
||||
// hipHostMallocPortable should map the host memory into all contexts, regardless of peer status.
|
||||
// peer mappings should always be honored.
|
||||
// hipHostMallocMapped should be ignored on ROCM - all memory is mapped to host.
|
||||
auto device = ctx->getWriteableDevice();
|
||||
// If HIP_COHERENT_HOST_ALLOC is defined, we always alloc coherent host system memroy
|
||||
if (sizeBytes == 0) {
|
||||
hip_status = hipSuccess;
|
||||
// TODO - should size of 0 return err or be siliently ignored?
|
||||
} else if ((ctx==nullptr) || (ptr == nullptr)) {
|
||||
hip_status = hipErrorInvalidValue;
|
||||
} else {
|
||||
unsigned trueFlags = flags;
|
||||
if (flags == hipHostMallocDefault) {
|
||||
trueFlags = hipHostMallocMapped | hipHostMallocWriteCombined;
|
||||
}
|
||||
|
||||
const unsigned supportedFlags = hipHostMallocPortable | hipHostMallocMapped | hipHostMallocWriteCombined;
|
||||
|
||||
if (flags & ~supportedFlags) {
|
||||
hip_status = hipErrorInvalidValue;
|
||||
} else {
|
||||
#if HIP_COHERENT_HOST_ALLOC
|
||||
// TODOD - let's make this an environment variable
|
||||
*ptr = hc::am_alloc(sizeBytes, device->_acc, amHostPinned);
|
||||
if(sizeBytes < 1 && (*ptr == NULL)){
|
||||
hip_status = hipErrorMemoryAllocation;
|
||||
} else {
|
||||
hc::am_memtracker_update(*ptr, device->_deviceId, amHostCoherent);
|
||||
}
|
||||
tprintf(DB_MEM, " %s: pinned ptr=%p\n", __func__, *ptr);
|
||||
#else
|
||||
if ((flags == hipHostMallocDefault) || (flags == hipHostMallocPortable)) {
|
||||
// TODO - let's make this an environment variable
|
||||
*ptr = hc::am_alloc(sizeBytes, device->_acc, amHostPinned);
|
||||
if (sizeBytes < 1 && (*ptr == NULL)) {
|
||||
if(sizeBytes < 1 && (*ptr == NULL)){
|
||||
hip_status = hipErrorMemoryAllocation;
|
||||
} else {
|
||||
hc::am_memtracker_update(*ptr, device->_deviceId, amHostPinned);
|
||||
hc::am_memtracker_update(*ptr, device->_deviceId, amHostCoherent);
|
||||
}
|
||||
tprintf(DB_MEM, "allocated pinned_host ptr:%p size:%zu on dev:%d\n", *ptr, sizeBytes, device->_deviceId);
|
||||
} else if(flags & hipHostMallocMapped) {
|
||||
tprintf(DB_MEM, " %s: pinned ptr=%p\n", __func__, *ptr);
|
||||
#else
|
||||
// TODO - am_alloc requires writeable __acc, perhaps could be refactored?
|
||||
// TODO - hipHostMallocMapped is be ignored on ROCM - all memory is mapped to host address space as WC.
|
||||
auto device = ctx->getWriteableDevice();
|
||||
*ptr = hc::am_alloc(sizeBytes, device->_acc, amHostPinned);
|
||||
if (sizeBytes && (*ptr == NULL)) {
|
||||
if (*ptr == NULL) {
|
||||
hip_status = hipErrorMemoryAllocation;
|
||||
} else {
|
||||
hc::am_memtracker_update(*ptr, device->_deviceId, flags);
|
||||
// TODO-hipHostMallocPortable should map the host memory into all contexts, regardless of peer status.
|
||||
int peerCnt=0;
|
||||
{
|
||||
LockedAccessor_CtxCrit_t crit(ctx->criticalData());
|
||||
peerCnt = crit->peerCnt();
|
||||
if (peerCnt) {
|
||||
if (peerCnt > 1) {
|
||||
hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr);
|
||||
}
|
||||
}
|
||||
tprintf(DB_MEM, "allocated pinned_host ptr:%p size:%zu on dev:%d, allow access to %d peer(s)\n", *ptr, sizeBytes, device->_deviceId, peerCnt);
|
||||
tprintf(DB_MEM, "allocated pinned_host ptr:%p size:%zu on dev:%d and allow access to %d other peer(s)\n", *ptr, sizeBytes, device->_deviceId, peerCnt-1);
|
||||
}
|
||||
}
|
||||
#endif //HIP_COHERENT_HOST_ALLOC
|
||||
@@ -595,10 +598,13 @@ hipError_t hipMemcpyHtoH(void* dst, void* src, size_t sizeBytes)
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream)
|
||||
{
|
||||
HIP_INIT_API(dst, src, sizeBytes, kind, stream);
|
||||
|
||||
|
||||
// Internal copy sync:
|
||||
namespace hip_internal {
|
||||
|
||||
hipError_t memcpyAsync (void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream)
|
||||
{
|
||||
hipError_t e = hipSuccess;
|
||||
|
||||
stream = ihipSyncAndResolveStream(stream);
|
||||
@@ -617,86 +623,39 @@ hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcp
|
||||
e = hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
return ihipLogStatus(e);
|
||||
return e;
|
||||
}
|
||||
} // end namespace hip_internal
|
||||
|
||||
|
||||
hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream)
|
||||
{
|
||||
HIP_INIT_API(dst, src, sizeBytes, kind, stream);
|
||||
|
||||
return ihipLogStatus(hip_internal::memcpyAsync(dst, src, sizeBytes, kind, stream));
|
||||
|
||||
}
|
||||
|
||||
|
||||
hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, void* src, size_t sizeBytes, hipStream_t stream)
|
||||
{
|
||||
HIP_INIT_API(dst, src, sizeBytes, stream);
|
||||
|
||||
hipError_t e = hipSuccess;
|
||||
|
||||
stream = ihipSyncAndResolveStream(stream);
|
||||
|
||||
hipMemcpyKind kind = hipMemcpyHostToDevice;
|
||||
|
||||
if ((dst == NULL) || (src == NULL)) {
|
||||
e= hipErrorInvalidValue;
|
||||
} else if (stream) {
|
||||
try {
|
||||
stream->locked_copyAsync((void*)dst, src, sizeBytes, kind);
|
||||
}
|
||||
catch (ihipException ex) {
|
||||
e = ex._code;
|
||||
}
|
||||
} else {
|
||||
e = hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
return ihipLogStatus(e);
|
||||
return ihipLogStatus(hip_internal::memcpyAsync(dst, src, sizeBytes, hipMemcpyHostToDevice, stream));
|
||||
}
|
||||
|
||||
hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes, hipStream_t stream)
|
||||
{
|
||||
HIP_INIT_API(dst, src, sizeBytes, stream);
|
||||
|
||||
hipError_t e = hipSuccess;
|
||||
|
||||
hipMemcpyKind kind = hipMemcpyDeviceToDevice;
|
||||
|
||||
stream = ihipSyncAndResolveStream(stream);
|
||||
|
||||
|
||||
if ((dst == NULL) || (src == NULL)) {
|
||||
e= hipErrorInvalidValue;
|
||||
} else if (stream) {
|
||||
try {
|
||||
stream->locked_copyAsync((void*)dst, (void*)src, sizeBytes, kind);
|
||||
}
|
||||
catch (ihipException ex) {
|
||||
e = ex._code;
|
||||
}
|
||||
} else {
|
||||
e = hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
return ihipLogStatus(e);
|
||||
return ihipLogStatus(hip_internal::memcpyAsync(dst, src, sizeBytes, hipMemcpyDeviceToDevice, stream));
|
||||
}
|
||||
|
||||
hipError_t hipMemcpyDtoHAsync(void* dst, hipDeviceptr_t src, size_t sizeBytes, hipStream_t stream)
|
||||
{
|
||||
HIP_INIT_API(dst, src, sizeBytes, stream);
|
||||
|
||||
hipError_t e = hipSuccess;
|
||||
|
||||
stream = ihipSyncAndResolveStream(stream);
|
||||
|
||||
hipMemcpyKind kind = hipMemcpyDeviceToHost;
|
||||
|
||||
if ((dst == NULL) || (src == NULL)) {
|
||||
e= hipErrorInvalidValue;
|
||||
} else if (stream) {
|
||||
try {
|
||||
stream->locked_copyAsync(dst, (void*)src, sizeBytes, kind);
|
||||
}
|
||||
catch (ihipException ex) {
|
||||
e = ex._code;
|
||||
}
|
||||
} else {
|
||||
e = hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
return ihipLogStatus(e);
|
||||
return ihipLogStatus(hip_internal::memcpyAsync(dst, src, sizeBytes, hipMemcpyDeviceToHost, stream));
|
||||
}
|
||||
|
||||
// TODO - review and optimize
|
||||
|
||||
@@ -149,7 +149,7 @@ hipError_t hipMemcpyPeer (void* dst, hipCtx_t dstCtx, const void* src, hipCtx_t
|
||||
|
||||
// TODO - move to ihip memory copy implementaion.
|
||||
// HCC has a unified memory architecture so device specifiers are not required.
|
||||
return hipMemcpy(dst, src, sizeBytes, hipMemcpyDefault);
|
||||
return ihipLogStatus(hipMemcpy(dst, src, sizeBytes, hipMemcpyDefault));
|
||||
};
|
||||
|
||||
|
||||
@@ -160,7 +160,7 @@ hipError_t hipMemcpyPeerAsync (void* dst, hipCtx_t dstDevice, const void* src, h
|
||||
|
||||
// TODO - move to ihip memory copy implementaion.
|
||||
// HCC has a unified memory architecture so device specifiers are not required.
|
||||
return hipMemcpyAsync(dst, src, sizeBytes, hipMemcpyDefault, stream);
|
||||
return ihipLogStatus(hip_internal::memcpyAsync(dst, src, sizeBytes, hipMemcpyDefault, stream));
|
||||
};
|
||||
|
||||
|
||||
@@ -173,7 +173,7 @@ hipError_t hipMemcpyPeerAsync (void* dst, hipCtx_t dstDevice, const void* src, h
|
||||
hipError_t hipDeviceCanAccessPeer (int* canAccessPeer, int deviceId, int peerDeviceId)
|
||||
{
|
||||
HIP_INIT_API(canAccessPeer, deviceId, peerDeviceId);
|
||||
return ihipDeviceCanAccessPeer(canAccessPeer, ihipGetPrimaryCtx(deviceId), ihipGetPrimaryCtx(peerDeviceId));
|
||||
return ihipLogStatus(ihipDeviceCanAccessPeer(canAccessPeer, ihipGetPrimaryCtx(deviceId), ihipGetPrimaryCtx(peerDeviceId)));
|
||||
}
|
||||
|
||||
|
||||
@@ -196,14 +196,14 @@ hipError_t hipDeviceEnablePeerAccess (int peerDeviceId, unsigned int flags)
|
||||
hipError_t hipMemcpyPeer (void* dst, int dstDevice, const void* src, int srcDevice, size_t sizeBytes)
|
||||
{
|
||||
HIP_INIT_API(dst, dstDevice, src, srcDevice, sizeBytes);
|
||||
return hipMemcpyPeer(dst, ihipGetPrimaryCtx(dstDevice), src, ihipGetPrimaryCtx(srcDevice), sizeBytes);
|
||||
return ihipLogStatus(hipMemcpyPeer(dst, ihipGetPrimaryCtx(dstDevice), src, ihipGetPrimaryCtx(srcDevice), sizeBytes));
|
||||
}
|
||||
|
||||
|
||||
hipError_t hipMemcpyPeerAsync (void* dst, int dstDevice, const void* src, int srcDevice, size_t sizeBytes, hipStream_t stream)
|
||||
{
|
||||
HIP_INIT_API(dst, dstDevice, src, srcDevice, sizeBytes, stream);
|
||||
return hipMemcpyPeerAsync(dst, ihipGetPrimaryCtx(dstDevice), src, ihipGetPrimaryCtx(srcDevice), sizeBytes, stream);
|
||||
return ihipLogStatus(hip_internal::memcpyAsync(dst, src, sizeBytes, hipMemcpyDefault, stream));
|
||||
}
|
||||
|
||||
hipError_t hipCtxEnablePeerAccess (hipCtx_t peerCtx, unsigned int flags)
|
||||
|
||||
新しいイシューから参照
ユーザーをブロックする