diff --git a/projects/clr/hipamd/src/hip_hcc.cpp b/projects/clr/hipamd/src/hip_hcc.cpp index fec5bb6a8a..760f46046a 100644 --- a/projects/clr/hipamd/src/hip_hcc.cpp +++ b/projects/clr/hipamd/src/hip_hcc.cpp @@ -1803,6 +1803,20 @@ void ihipStream_t::resolveHcMemcpyDirection(unsigned hipMemKind, } +void printPointerInfo(unsigned dbFlag, const char *tag, const void *ptr, const hc::AmPointerInfo &ptrInfo) +{ + tprintf (dbFlag, " %s=%p baseHost=%p baseDev=%p sz=%zu home_dev=%d tracked=%d isDevMem=%d registered=%d\n", + tag, ptr, + ptrInfo._hostPointer, ptrInfo._devicePointer, ptrInfo._sizeBytes, + ptrInfo._appId, ptrInfo._sizeBytes != 0, ptrInfo._isInDeviceMem, !ptrInfo._isAmManaged); +} + + +// TODO : For registered and host memory, if the portable flag is set, we need to recognize that and perform appropriate copy operation. +// What can happen now is that Portable memory is mapped into multiple devices but Peer access is not enabled. i +// The peer detection logic doesn't see that the memory is already mapped and so tries to use an unpinned copy algorithm. If this is PinInPlace, then an error can occur. +// Need to track Portable flag correctly or use new RT functionality to query the peer status for the pointer. +// // TODO - remove kind parm from here or use it below? void ihipStream_t::locked_copySync(void* dst, const void* src, size_t sizeBytes, unsigned kind, bool resolveOn) { @@ -1819,6 +1833,16 @@ void ihipStream_t::locked_copySync(void* dst, const void* src, size_t sizeBytes, bool dstTracked = (hc::am_memtracker_getinfo(&dstPtrInfo, dst) == AM_SUCCESS); bool srcTracked = (hc::am_memtracker_getinfo(&srcPtrInfo, src) == AM_SUCCESS); + + // Some code in HCC and in printPointerInfo uses _sizeBytes==0 as an indication ptr is not valid, so check it here: + if (!dstTracked) { + assert (dstPtrInfo._sizeBytes == 0); + } + if (!srcTracked) { + assert (srcPtrInfo._sizeBytes == 0); + } + + hc::hcCommandKind hcCopyDir; ihipCtx_t *copyDevice; bool forceUnpinnedCopy; @@ -1831,12 +1855,8 @@ void ihipStream_t::locked_copySync(void* dst, const void* src, size_t sizeBytes, dst, dstPtrInfo._appId, dstPtrInfo._isInDeviceMem, src, srcPtrInfo._appId, srcPtrInfo._isInDeviceMem, sizeBytes, hcMemcpyStr(hcCopyDir), forceUnpinnedCopy); - tprintf (DB_COPY, " dst=%p baseHost=%p baseDev=%p sz=%zu home_dev=%d tracked=%d isDevMem=%d\n", - dst, dstPtrInfo._hostPointer, dstPtrInfo._devicePointer, dstPtrInfo._sizeBytes, - dstPtrInfo._appId, dstTracked, dstPtrInfo._isInDeviceMem); - tprintf (DB_COPY, " src=%p baseHost=%p baseDev=%p sz=%zu home_dev=%d tracked=%d isDevMem=%d\n", - src, srcPtrInfo._hostPointer, srcPtrInfo._devicePointer, srcPtrInfo._sizeBytes, - srcPtrInfo._appId, srcTracked, srcPtrInfo._isInDeviceMem); + printPointerInfo(DB_COPY, " dst", dst, dstPtrInfo); + printPointerInfo(DB_COPY, " src", src, srcPtrInfo); this->ensureHaveQueue(crit); @@ -1921,12 +1941,8 @@ void ihipStream_t::locked_copyAsync(void* dst, const void* src, size_t sizeBytes dst, dstPtrInfo._appId, dstPtrInfo._isInDeviceMem, src, srcPtrInfo._appId, srcPtrInfo._isInDeviceMem, sizeBytes, hcMemcpyStr(hcCopyDir), forceUnpinnedCopy); - tprintf (DB_COPY, " dst=%p baseHost=%p baseDev=%p sz=%zu home_dev=%d tracked=%d isDevMem=%d\n", - dst, dstPtrInfo._hostPointer, dstPtrInfo._devicePointer, dstPtrInfo._sizeBytes, - dstPtrInfo._appId, dstTracked, dstPtrInfo._isInDeviceMem); - tprintf (DB_COPY, " src=%p baseHost=%p baseDev=%p sz=%zu home_dev=%d tracked=%d isDevMem=%d\n", - src, srcPtrInfo._hostPointer, srcPtrInfo._devicePointer, srcPtrInfo._sizeBytes, - srcPtrInfo._appId, srcTracked, srcPtrInfo._isInDeviceMem); + printPointerInfo(DB_COPY, " dst", dst, dstPtrInfo); + printPointerInfo(DB_COPY, " src", src, srcPtrInfo); // "tracked" really indicates if the pointer's virtual address is available in the GPU address space. // If both pointers are not tracked, we need to fall back to a sync copy. diff --git a/projects/clr/hipamd/src/hip_memory.cpp b/projects/clr/hipamd/src/hip_memory.cpp index 314dee97dc..b8a1d1646a 100644 --- a/projects/clr/hipamd/src/hip_memory.cpp +++ b/projects/clr/hipamd/src/hip_memory.cpp @@ -35,6 +35,14 @@ THE SOFTWARE. // Memory // // +// +//HIP uses several "app*" fields HC memory tracker to track state necessary for the HIP API. +//_appId : DeviceID. For device mem, this is device where the memory is physically allocated. +// For host or registered mem, this is the current device when the memory is allocated or registered. This device will have a GPUVM mapping for the host mem. +// +//_appAllocationFlags : These are flags provided by the user when allocation is performed. They are returned to user in hipHostGetFlags and other APIs. +// TODO - add more info here when available. +// hipError_t hipPointerGetAttributes(hipPointerAttribute_t *attributes, void* ptr) { HIP_INIT_API(attributes, ptr); @@ -78,6 +86,7 @@ hipError_t hipPointerGetAttributes(hipPointerAttribute_t *attributes, void* ptr) return ihipLogStatus(e); } + hipError_t hipHostGetDevicePointer(void **devicePointer, void *hostPointer, unsigned flags) { HIP_INIT_API(devicePointer, hostPointer, flags); @@ -102,6 +111,7 @@ hipError_t hipHostGetDevicePointer(void **devicePointer, void *hostPointer, unsi return ihipLogStatus(e); } + hipError_t hipMalloc(void** ptr, size_t sizeBytes) { HIP_INIT_API(ptr, sizeBytes); @@ -227,16 +237,20 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) return ihipLogStatus(hip_status); } +// Deprecated function: hipError_t hipMallocHost(void** ptr, size_t sizeBytes) { return hipHostMalloc(ptr, sizeBytes, 0); } + +// Deprecated function: hipError_t hipHostAlloc(void** ptr, size_t sizeBytes, unsigned int flags) { return hipHostMalloc(ptr, sizeBytes, flags); }; + // width in bytes hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height) { @@ -374,6 +388,8 @@ hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) return ihipLogStatus(hip_status); } + +// TODO - need to fix several issues here related to P2P access, host memory fallback. hipError_t hipHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags) { HIP_INIT_API(hostPtr, sizeBytes, flags); @@ -406,7 +422,7 @@ hipError_t hipHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags) am_status = hc::am_memory_host_lock(device->_acc, hostPtr, sizeBytes, &vecAcc[0], vecAcc.size()); hc::am_memtracker_update(hostPtr, device->_deviceId, flags); - tprintf(DB_MEM, " %s registered ptr=%p\n", __func__, hostPtr); + tprintf(DB_MEM, " %s registered ptr=%p and allowed access to %zu peers\n", __func__, hostPtr, vecAcc.size()); if(am_status == AM_SUCCESS){ hip_status = hipSuccess; } else { @@ -605,6 +621,7 @@ hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind return ihipLogStatus(e); } + hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void* src, size_t sizeBytes) { HIP_INIT_CMD_API(dst, src, sizeBytes); @@ -626,6 +643,7 @@ hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void* src, size_t sizeBytes) return ihipLogStatus(e); } + hipError_t hipMemcpyDtoH(void* dst, hipDeviceptr_t src, size_t sizeBytes) { HIP_INIT_CMD_API(dst, src, sizeBytes); @@ -647,6 +665,7 @@ hipError_t hipMemcpyDtoH(void* dst, hipDeviceptr_t src, size_t sizeBytes) return ihipLogStatus(e); } + hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes) { HIP_INIT_CMD_API(dst, src, sizeBytes); @@ -668,6 +687,7 @@ hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeByte return ihipLogStatus(e); } + hipError_t hipMemcpyHtoH(void* dst, void* src, size_t sizeBytes) { HIP_INIT_CMD_API(dst, src, sizeBytes); diff --git a/projects/clr/hipamd/tests/src/runtimeApi/memory/hipHostRegister.cpp b/projects/clr/hipamd/tests/src/runtimeApi/memory/hipHostRegister.cpp index 6c81ec0d91..eae73e1a65 100644 --- a/projects/clr/hipamd/tests/src/runtimeApi/memory/hipHostRegister.cpp +++ b/projects/clr/hipamd/tests/src/runtimeApi/memory/hipHostRegister.cpp @@ -45,17 +45,13 @@ int main(){ A[i] = float(1); } - // Copy to B, this should be optimal pinned malloc copy: - float *B; - HIPCHECK(hipMalloc(&B, size)); - HIPCHECK(hipMemcpy(B, A, size, hipMemcpyHostToDevice)); - for(int i=0;i