diff --git a/hipamd/.vimrc b/hipamd/.vimrc index ed64acd347..019afa57e6 100644 --- a/hipamd/.vimrc +++ b/hipamd/.vimrc @@ -1,4 +1 @@ -:set tabstop=4 -:set shiftwidth=4 -:set expandtab -:set smartindent +:set makeprg=make\ -C\ build.hcc-LC.db diff --git a/hipamd/src/hip_hcc.cpp b/hipamd/src/hip_hcc.cpp index 278ada0c94..b35c2db7ec 100644 --- a/hipamd/src/hip_hcc.cpp +++ b/hipamd/src/hip_hcc.cpp @@ -1731,35 +1731,34 @@ void ihipSetTs(hipEvent_t e) // Returns true if thisCtx can see the memory allocated on dstCtx and srcCtx. // The peer-list for a context controls which contexts have access to the memory allocated on that context. // So we check dstCtx's and srcCtx's peerList to see if the both include thisCtx. -bool ihipStream_t::canSeePeerMemory(const ihipCtx_t *thisCtx, const hc::AmPointerInfo *dstPtrInfo, const hc::AmPointerInfo *srcPtrInfo) +// TODO- change these to use dst and src ptr info. +bool ihipStream_t::chooseDirectPeerToPeer(const ihipCtx_t *copyEngineCtx, const hc::AmPointerInfo *dstPtrInfo, const hc::AmPointerInfo *srcPtrInfo) { - if (dstPtrInfo->_appId != -1) { - // TODO - pointer-info stores a deviceID not a context,may have some unusual side-effects here: - ihipCtx_t *dstCtx = ihipGetPrimaryCtx(dstPtrInfo->_appId); - if (thisCtx != dstCtx) { - // Only checks peer list if contexts are different - LockedAccessor_CtxCrit_t ctxCrit(dstCtx->criticalData()); - //tprintf(DB_SYNC, "dstCrit lock succeeded\n"); - if (!ctxCrit->isPeerWatcher(thisCtx)) { - return false; - }; - } + // Make sure this is a device-to-device copy with all memory available to the requested copy engine + // + // TODO - pointer-info stores a deviceID not a context,may have some unusual side-effects here: + ihipCtx_t *dstCtx = ihipGetPrimaryCtx(dstPtrInfo->_appId); + if (copyEngineCtx != dstCtx) { + // Only checks peer list if contexts are different + LockedAccessor_CtxCrit_t ctxCrit(dstCtx->criticalData()); + //tprintf(DB_SYNC, "dstCrit lock succeeded\n"); + if (!ctxCrit->isPeerWatcher(copyEngineCtx)) { + return false; + }; } - - if (srcPtrInfo->_appId != -1) { - // TODO - pointer-info stores a deviceID not a context,may have some unusual side-effects here: - ihipCtx_t *srcCtx = ihipGetPrimaryCtx(srcPtrInfo->_appId); - if (thisCtx != srcCtx) { - // Only checks peer list if contexts are different - LockedAccessor_CtxCrit_t ctxCrit(srcCtx->criticalData()); - //tprintf(DB_SYNC, "srcCrit lock succeeded\n"); - if (!ctxCrit->isPeerWatcher(thisCtx)) { - return false; - }; - } + + // TODO - pointer-info stores a deviceID not a context,may have some unusual side-effects here: + ihipCtx_t *srcCtx = ihipGetPrimaryCtx(srcPtrInfo->_appId); + if (copyEngineCtx != srcCtx) { + // Only checks peer list if contexts are different + LockedAccessor_CtxCrit_t ctxCrit(srcCtx->criticalData()); + //tprintf(DB_SYNC, "srcCrit lock succeeded\n"); + if (!ctxCrit->isPeerWatcher(copyEngineCtx)) { + return false; + }; } return true; @@ -1812,14 +1811,16 @@ unsigned ihipStream_t::resolveMemcpyDirection(bool srcInDeviceMem, bool dstInDev // hipMemKind must be "resolved" to a specific direction - cannot be default. -void ihipStream_t::resolveHcMemcpyDirection(unsigned hipMemKind, const hc::AmPointerInfo *dstPtrInfo, const hc::AmPointerInfo *srcPtrInfo, - hc::hcCommandKind *hcCopyDir, bool *forceP2PCopyEngine) +void ihipStream_t::resolveHcMemcpyDirection(unsigned hipMemKind, + const hc::AmPointerInfo *dstPtrInfo, + const hc::AmPointerInfo *srcPtrInfo, + hc::hcCommandKind *hcCopyDir, + ihipCtx_t **copyDevice) { - ihipCtx_t *ctx = this->getCtx(); + // Ignore what the user tells us and always resolve the direction: + // Some apps apparently rely on this. + hipMemKind = resolveMemcpyDirection(srcPtrInfo->_isInDeviceMem, dstPtrInfo->_isInDeviceMem); - if (hipMemKind == hipMemcpyDefault) { - hipMemKind = resolveMemcpyDirection(srcPtrInfo->_isInDeviceMem, dstPtrInfo->_isInDeviceMem); - } switch (hipMemKind) { case hipMemcpyHostToHost: *hcCopyDir = hc::hcMemcpyHostToHost; break; @@ -1829,20 +1830,24 @@ void ihipStream_t::resolveHcMemcpyDirection(unsigned hipMemKind, const hc::AmPoi default: throw ihipException(hipErrorRuntimeOther); }; - - // If this is P2P access, we need to check to see if the copy agent (specified by the stream where the copy is enqueued) - // has peer access enabled to both the source and dest. If this is true, then the copy agent can see both pointers - // and we can perform the access with the copy engine from the current stream. If not true, then we will copy through the host. (*forceP2PCopyEngine=true). - *forceP2PCopyEngine = false; - if (!canSeePeerMemory(ctx, dstPtrInfo, srcPtrInfo)) { - *forceP2PCopyEngine = true; - tprintf (DB_COPY, "P2P: copy engine(dev:%d) cannot see both host and device pointers - forcing copy through staging buffers.\n", ctx->getDeviceNum()); + if (srcPtrInfo->_isInDeviceMem) { + *copyDevice = ihipGetPrimaryCtx(srcPtrInfo->_appId); + } else if (dstPtrInfo->_isInDeviceMem) { + *copyDevice = ihipGetPrimaryCtx(dstPtrInfo->_appId); } else { - if (HIP_FORCE_P2P_HOST ) { - *forceP2PCopyEngine = true; - tprintf (DB_COPY, "P2P. Copy engine (dev:%d) can see src and dst but HIP_FORCE_P2P_HOST=0, forcing copy through staging buffers.\n", ctx->getDeviceNum()); + *copyDevice = nullptr; + } + + if (hipMemKind == hipMemcpyDeviceToDevice) { + if (chooseDirectPeerToPeer(*copyDevice, dstPtrInfo, srcPtrInfo)) { + if (HIP_FORCE_P2P_HOST ) { + tprintf (DB_COPY, "P2P. Copy engine (dev:%d) can see src and dst but HIP_FORCE_P2P_HOST=0, forcing copy through staging buffers.\n", (*copyDevice)->getDeviceNum()); + } else { + tprintf (DB_COPY, "P2P. Copy engine (dev:%d) can see src and dst.\n", (*copyDevice)->getDeviceNum()); + } } else { - tprintf (DB_COPY, "P2P. Copy engine (dev:%d) can see src and dst.\n", ctx->getDeviceNum()); + *copyDevice = nullptr; + tprintf (DB_COPY, "P2P: copy engine(dev:%d) cannot see both host and device pointers - forcing copy through staging buffers.\n", (*copyDevice)->getDeviceNum()); } } } @@ -1866,25 +1871,20 @@ void ihipStream_t::locked_copySync(void* dst, const void* src, size_t sizeBytes, hc::hcCommandKind hcCopyDir; - bool forceP2PCopyEngine; - resolveHcMemcpyDirection(kind, &dstPtrInfo, &srcPtrInfo, &hcCopyDir, &forceP2PCopyEngine); + ihipCtx_t *copyDevice; + resolveHcMemcpyDirection(kind, &dstPtrInfo, &srcPtrInfo, &hcCopyDir, ©Device); + // copy_ext will use copy-engine to perform the copy. nullptr then { LockedAccessor_StreamCrit_t crit (_criticalData); -#if DISABLE_COPY_EXT -#warning ("Disabled copy_ext path, P2P host staging copies will not work") - tprintf (DB_COPY, "copySync copyEngine_dev:%d dst=%p(home_dev:%d) src=%p(home_dev:%d) sz=%zu dstTracked=%d srcTracked=%d dir=%s forceP2PCopyEngine=%d. Call HCC copy\n", - ctx->getDeviceNum(), dst, dstPtrInfo._appId, src, srcPtrInfo._appId, sizeBytes, dstTracked, srcTracked, hcMemcpyStr(hcCopyDir), forceP2PCopyEngine); - // Note - peer-to-peer copies which require host staging will not work in this path. - crit->_av.copy(src, dst, sizeBytes); -#else - // If srcTracked == dstTracked =1 and forceP2PCopyEngine=0 then we wil use async SDMA. (assuming HCC implementation doesn't override somehow) - tprintf (DB_COPY, "copySync copyEngine_dev:%d dst=%p(home_dev:%d) src=%p(home_dev:%d) sz=%zu dstTracked=%d srcTracked=%d dir=%s forceP2PCopyEngine=%d. Call HCC copy_ext.\n", - ctx->getDeviceNum(), dst, dstPtrInfo._appId, src, srcPtrInfo._appId, sizeBytes, dstTracked, srcTracked, hcMemcpyStr(hcCopyDir), forceP2PCopyEngine); - crit->_av.copy_ext(src, dst, sizeBytes, hcCopyDir, srcPtrInfo, dstPtrInfo, forceP2PCopyEngine); -#endif + tprintf (DB_COPY, "copySync copyDev:%d dst=%p(home_dev:%d, tracked:%d, isDevMem:%d) src=%p(home_dev:%d, tracked:%d, isDevMem:%d) sz=%zu dir=%s\n", + copyDevice ? copyDevice->getDeviceNum():-1, + dst, dstPtrInfo._appId, dstTracked, dstPtrInfo._isInDeviceMem, + src, srcPtrInfo._appId, srcTracked, srcPtrInfo._isInDeviceMem, + sizeBytes, hcMemcpyStr(hcCopyDir)); + crit->_av.copy_ext(src, dst, sizeBytes, hcCopyDir, srcPtrInfo, dstPtrInfo, copyDevice ? ©Device->getDevice()->_acc : nullptr); } } @@ -1921,22 +1921,26 @@ void ihipStream_t::locked_copyAsync(void* dst, const void* src, size_t sizeBytes hc::hcCommandKind hcCopyDir; - bool forceP2PCopyEngine; - resolveHcMemcpyDirection(kind, &dstPtrInfo, &srcPtrInfo, &hcCopyDir, &forceP2PCopyEngine); + ihipCtx_t *copyDevice; + resolveHcMemcpyDirection(kind, &dstPtrInfo, &srcPtrInfo, &hcCopyDir, ©Device); - tprintf (DB_COPY, "copyAsync dst=%p(home_dev:%d) src=%p(home_dev:%d) sz=%zu dstTracked=%d srcTracked=%d dir=%s forceP2PCopyEngine=%d\n", - dst, dstPtrInfo._appId, src, srcPtrInfo._appId, sizeBytes, dstTracked, srcTracked, hcMemcpyStr(hcCopyDir), forceP2PCopyEngine); - + tprintf (DB_COPY, "copyASync copyEngine_dev:%d dst=%p(home_dev:%d, tracked:%d, isDevMem:%d) src=%p(home_dev:%d, tracked:%d, isDevMem:%d) sz=%zu dir=%s . \n", + copyDevice->getDeviceNum(), + dst, dstPtrInfo._appId, dstTracked, dstPtrInfo._isInDeviceMem, + src, srcPtrInfo._appId, srcTracked, srcPtrInfo._isInDeviceMem, + sizeBytes, hcMemcpyStr(hcCopyDir)); // "tracked" really indicates if the pointer's virtual address is available in the GPU address space. // If both pointers are not tracked, we need to fall back to a sync copy. - if (dstTracked && srcTracked && !forceP2PCopyEngine) { + if (dstTracked && srcTracked && copyDevice) { LockedAccessor_StreamCrit_t crit(_criticalData); - // Perform asynchronous copy: + // Perform fast asynchronous copy: try { - crit->_av.copy_async(src, dst, sizeBytes); + printf ("forcing copy to use synchronous path: !!!!!\n"); + //crit->_av.copy_async_ext(src, dst, sizeBytes, hcCopyDir, srcPtrInfo, dstPtrInfo, copyDevice); + crit->_av.copy_ext(src, dst, sizeBytes, hcCopyDir, srcPtrInfo, dstPtrInfo, ©Device->getDevice()->_acc ); } catch (Kalmar::runtime_exception) { throw ihipException(hipErrorRuntimeOther); }; @@ -1949,17 +1953,12 @@ void ihipStream_t::locked_copyAsync(void* dst, const void* src, size_t sizeBytes } else { LockedAccessor_StreamCrit_t crit(_criticalData); -#if DISABLE_COPY_EXT -#warning ("Disabled copy_ext path, P2P host staging copies will not work") - // Note - peer-to-peer copies which require host staging will not work in this path. - crit->_av.copy(src, dst, sizeBytes); -#else - crit->_av.copy_ext(src, dst, sizeBytes, hcCopyDir, srcPtrInfo, dstPtrInfo, forceP2PCopyEngine); -#endif + crit->_av.copy_ext(src, dst, sizeBytes, hcCopyDir, srcPtrInfo, dstPtrInfo, copyDevice ? ©Device->getDevice()->_acc : nullptr); } } } + //------------------------------------------------------------------------------------------------- //------------------------------------------------------------------------------------------------- //Profiler, really these should live elsewhere: diff --git a/hipamd/src/hip_hcc.h b/hipamd/src/hip_hcc.h index d8b7030e4f..ca759ba78d 100644 --- a/hipamd/src/hip_hcc.h +++ b/hipamd/src/hip_hcc.h @@ -496,10 +496,10 @@ private: // The unsigned return is hipMemcpyKind unsigned resolveMemcpyDirection(bool srcInDeviceMem, bool dstInDeviceMem); - void resolveHcMemcpyDirection(unsigned hipMemKind, const hc::AmPointerInfo *dstPtrInfo, const hc::AmPointerInfo *srcPtrInfo, - hc::hcCommandKind *hcCopyDir, bool *forceHostCopyEngine); + void resolveHcMemcpyDirection(unsigned hipMemKind, const hc::AmPointerInfo *dstPtrInfo, const hc::AmPointerInfo *srcPtrInfo, + hc::hcCommandKind *hcCopyDir, ihipCtx_t **copyDevice); - bool canSeePeerMemory(const ihipCtx_t *thisCtx, const hc::AmPointerInfo *dstInfo, const hc::AmPointerInfo *srcInfo); + bool chooseDirectPeerToPeer(const ihipCtx_t *thisCtx, const hc::AmPointerInfo *dstInfo, const hc::AmPointerInfo *srcInfo); private: // Data @@ -764,4 +764,10 @@ inline std::ostream& operator<<(std::ostream& os, const ihipCtx_t* c) } +// Helper functions that are used across src files: +namespace hip_internal { + hipError_t memcpyAsync (void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream); +}; + + #endif diff --git a/hipamd/src/hip_memory.cpp b/hipamd/src/hip_memory.cpp index 5be319d9ed..2f1eb1e27f 100644 --- a/hipamd/src/hip_memory.cpp +++ b/hipamd/src/hip_memory.cpp @@ -164,47 +164,50 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) auto ctx = ihipGetTlsDefaultCtx(); - if(ctx){ - // am_alloc requires writeable __acc, perhaps could be refactored? - // TODO-P1 - Review and test this logic. Seems : - // hipHostMallocPortable should map the host memory into all contexts, regardless of peer status. - // peer mappings should always be honored. - // hipHostMallocMapped should be ignored on ROCM - all memory is mapped to host. - auto device = ctx->getWriteableDevice(); - // If HIP_COHERENT_HOST_ALLOC is defined, we always alloc coherent host system memroy + if (sizeBytes == 0) { + hip_status = hipSuccess; + // TODO - should size of 0 return err or be siliently ignored? + } else if ((ctx==nullptr) || (ptr == nullptr)) { + hip_status = hipErrorInvalidValue; + } else { + unsigned trueFlags = flags; + if (flags == hipHostMallocDefault) { + trueFlags = hipHostMallocMapped | hipHostMallocWriteCombined; + } + + const unsigned supportedFlags = hipHostMallocPortable | hipHostMallocMapped | hipHostMallocWriteCombined; + + if (flags & ~supportedFlags) { + hip_status = hipErrorInvalidValue; + } else { #if HIP_COHERENT_HOST_ALLOC - // TODOD - let's make this an environment variable - *ptr = hc::am_alloc(sizeBytes, device->_acc, amHostPinned); - if(sizeBytes < 1 && (*ptr == NULL)){ - hip_status = hipErrorMemoryAllocation; - } else { - hc::am_memtracker_update(*ptr, device->_deviceId, amHostCoherent); - } - tprintf(DB_MEM, " %s: pinned ptr=%p\n", __func__, *ptr); -#else - if ((flags == hipHostMallocDefault) || (flags == hipHostMallocPortable)) { + // TODO - let's make this an environment variable *ptr = hc::am_alloc(sizeBytes, device->_acc, amHostPinned); - if (sizeBytes < 1 && (*ptr == NULL)) { + if(sizeBytes < 1 && (*ptr == NULL)){ hip_status = hipErrorMemoryAllocation; } else { - hc::am_memtracker_update(*ptr, device->_deviceId, amHostPinned); + hc::am_memtracker_update(*ptr, device->_deviceId, amHostCoherent); } - tprintf(DB_MEM, "allocated pinned_host ptr:%p size:%zu on dev:%d\n", *ptr, sizeBytes, device->_deviceId); - } else if(flags & hipHostMallocMapped) { + tprintf(DB_MEM, " %s: pinned ptr=%p\n", __func__, *ptr); +#else + // TODO - am_alloc requires writeable __acc, perhaps could be refactored? + // TODO - hipHostMallocMapped is be ignored on ROCM - all memory is mapped to host address space as WC. + auto device = ctx->getWriteableDevice(); *ptr = hc::am_alloc(sizeBytes, device->_acc, amHostPinned); - if (sizeBytes && (*ptr == NULL)) { + if (*ptr == NULL) { hip_status = hipErrorMemoryAllocation; } else { hc::am_memtracker_update(*ptr, device->_deviceId, flags); + // TODO-hipHostMallocPortable should map the host memory into all contexts, regardless of peer status. int peerCnt=0; { LockedAccessor_CtxCrit_t crit(ctx->criticalData()); peerCnt = crit->peerCnt(); - if (peerCnt) { + if (peerCnt > 1) { hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr); } } - tprintf(DB_MEM, "allocated pinned_host ptr:%p size:%zu on dev:%d, allow access to %d peer(s)\n", *ptr, sizeBytes, device->_deviceId, peerCnt); + tprintf(DB_MEM, "allocated pinned_host ptr:%p size:%zu on dev:%d and allow access to %d other peer(s)\n", *ptr, sizeBytes, device->_deviceId, peerCnt-1); } } #endif //HIP_COHERENT_HOST_ALLOC @@ -595,10 +598,13 @@ hipError_t hipMemcpyHtoH(void* dst, void* src, size_t sizeBytes) return ihipLogStatus(e); } -hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream) -{ - HIP_INIT_API(dst, src, sizeBytes, kind, stream); + +// Internal copy sync: +namespace hip_internal { + +hipError_t memcpyAsync (void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream) +{ hipError_t e = hipSuccess; stream = ihipSyncAndResolveStream(stream); @@ -617,86 +623,39 @@ hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcp e = hipErrorInvalidValue; } - return ihipLogStatus(e); + return e; } +} // end namespace hip_internal + + +hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream) +{ + HIP_INIT_API(dst, src, sizeBytes, kind, stream); + + return ihipLogStatus(hip_internal::memcpyAsync(dst, src, sizeBytes, kind, stream)); + +} + hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, void* src, size_t sizeBytes, hipStream_t stream) { HIP_INIT_API(dst, src, sizeBytes, stream); - hipError_t e = hipSuccess; - - stream = ihipSyncAndResolveStream(stream); - - hipMemcpyKind kind = hipMemcpyHostToDevice; - - if ((dst == NULL) || (src == NULL)) { - e= hipErrorInvalidValue; - } else if (stream) { - try { - stream->locked_copyAsync((void*)dst, src, sizeBytes, kind); - } - catch (ihipException ex) { - e = ex._code; - } - } else { - e = hipErrorInvalidValue; - } - - return ihipLogStatus(e); + return ihipLogStatus(hip_internal::memcpyAsync(dst, src, sizeBytes, hipMemcpyHostToDevice, stream)); } hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes, hipStream_t stream) { HIP_INIT_API(dst, src, sizeBytes, stream); - hipError_t e = hipSuccess; - - hipMemcpyKind kind = hipMemcpyDeviceToDevice; - - stream = ihipSyncAndResolveStream(stream); - - - if ((dst == NULL) || (src == NULL)) { - e= hipErrorInvalidValue; - } else if (stream) { - try { - stream->locked_copyAsync((void*)dst, (void*)src, sizeBytes, kind); - } - catch (ihipException ex) { - e = ex._code; - } - } else { - e = hipErrorInvalidValue; - } - - return ihipLogStatus(e); + return ihipLogStatus(hip_internal::memcpyAsync(dst, src, sizeBytes, hipMemcpyDeviceToDevice, stream)); } hipError_t hipMemcpyDtoHAsync(void* dst, hipDeviceptr_t src, size_t sizeBytes, hipStream_t stream) { HIP_INIT_API(dst, src, sizeBytes, stream); - hipError_t e = hipSuccess; - - stream = ihipSyncAndResolveStream(stream); - - hipMemcpyKind kind = hipMemcpyDeviceToHost; - - if ((dst == NULL) || (src == NULL)) { - e= hipErrorInvalidValue; - } else if (stream) { - try { - stream->locked_copyAsync(dst, (void*)src, sizeBytes, kind); - } - catch (ihipException ex) { - e = ex._code; - } - } else { - e = hipErrorInvalidValue; - } - - return ihipLogStatus(e); + return ihipLogStatus(hip_internal::memcpyAsync(dst, src, sizeBytes, hipMemcpyDeviceToHost, stream)); } // TODO - review and optimize diff --git a/hipamd/src/hip_peer.cpp b/hipamd/src/hip_peer.cpp index 95ea4719a9..b0e4eeef52 100644 --- a/hipamd/src/hip_peer.cpp +++ b/hipamd/src/hip_peer.cpp @@ -149,7 +149,7 @@ hipError_t hipMemcpyPeer (void* dst, hipCtx_t dstCtx, const void* src, hipCtx_t // TODO - move to ihip memory copy implementaion. // HCC has a unified memory architecture so device specifiers are not required. - return hipMemcpy(dst, src, sizeBytes, hipMemcpyDefault); + return ihipLogStatus(hipMemcpy(dst, src, sizeBytes, hipMemcpyDefault)); }; @@ -160,7 +160,7 @@ hipError_t hipMemcpyPeerAsync (void* dst, hipCtx_t dstDevice, const void* src, h // TODO - move to ihip memory copy implementaion. // HCC has a unified memory architecture so device specifiers are not required. - return hipMemcpyAsync(dst, src, sizeBytes, hipMemcpyDefault, stream); + return ihipLogStatus(hip_internal::memcpyAsync(dst, src, sizeBytes, hipMemcpyDefault, stream)); }; @@ -173,7 +173,7 @@ hipError_t hipMemcpyPeerAsync (void* dst, hipCtx_t dstDevice, const void* src, h hipError_t hipDeviceCanAccessPeer (int* canAccessPeer, int deviceId, int peerDeviceId) { HIP_INIT_API(canAccessPeer, deviceId, peerDeviceId); - return ihipDeviceCanAccessPeer(canAccessPeer, ihipGetPrimaryCtx(deviceId), ihipGetPrimaryCtx(peerDeviceId)); + return ihipLogStatus(ihipDeviceCanAccessPeer(canAccessPeer, ihipGetPrimaryCtx(deviceId), ihipGetPrimaryCtx(peerDeviceId))); } @@ -196,14 +196,14 @@ hipError_t hipDeviceEnablePeerAccess (int peerDeviceId, unsigned int flags) hipError_t hipMemcpyPeer (void* dst, int dstDevice, const void* src, int srcDevice, size_t sizeBytes) { HIP_INIT_API(dst, dstDevice, src, srcDevice, sizeBytes); - return hipMemcpyPeer(dst, ihipGetPrimaryCtx(dstDevice), src, ihipGetPrimaryCtx(srcDevice), sizeBytes); + return ihipLogStatus(hipMemcpyPeer(dst, ihipGetPrimaryCtx(dstDevice), src, ihipGetPrimaryCtx(srcDevice), sizeBytes)); } hipError_t hipMemcpyPeerAsync (void* dst, int dstDevice, const void* src, int srcDevice, size_t sizeBytes, hipStream_t stream) { HIP_INIT_API(dst, dstDevice, src, srcDevice, sizeBytes, stream); - return hipMemcpyPeerAsync(dst, ihipGetPrimaryCtx(dstDevice), src, ihipGetPrimaryCtx(srcDevice), sizeBytes, stream); + return ihipLogStatus(hip_internal::memcpyAsync(dst, src, sizeBytes, hipMemcpyDefault, stream)); } hipError_t hipCtxEnablePeerAccess (hipCtx_t peerCtx, unsigned int flags)