Update hipHostRegister debug and pointerTracker debug and notes
[ROCm/clr commit: e43592721e]
This commit is contained in:
@@ -1803,6 +1803,20 @@ void ihipStream_t::resolveHcMemcpyDirection(unsigned hipMemKind,
|
||||
}
|
||||
|
||||
|
||||
void printPointerInfo(unsigned dbFlag, const char *tag, const void *ptr, const hc::AmPointerInfo &ptrInfo)
|
||||
{
|
||||
tprintf (dbFlag, " %s=%p baseHost=%p baseDev=%p sz=%zu home_dev=%d tracked=%d isDevMem=%d registered=%d\n",
|
||||
tag, ptr,
|
||||
ptrInfo._hostPointer, ptrInfo._devicePointer, ptrInfo._sizeBytes,
|
||||
ptrInfo._appId, ptrInfo._sizeBytes != 0, ptrInfo._isInDeviceMem, !ptrInfo._isAmManaged);
|
||||
}
|
||||
|
||||
|
||||
// TODO : For registered and host memory, if the portable flag is set, we need to recognize that and perform appropriate copy operation.
|
||||
// What can happen now is that Portable memory is mapped into multiple devices but Peer access is not enabled. i
|
||||
// The peer detection logic doesn't see that the memory is already mapped and so tries to use an unpinned copy algorithm. If this is PinInPlace, then an error can occur.
|
||||
// Need to track Portable flag correctly or use new RT functionality to query the peer status for the pointer.
|
||||
//
|
||||
// TODO - remove kind parm from here or use it below?
|
||||
void ihipStream_t::locked_copySync(void* dst, const void* src, size_t sizeBytes, unsigned kind, bool resolveOn)
|
||||
{
|
||||
@@ -1819,6 +1833,16 @@ void ihipStream_t::locked_copySync(void* dst, const void* src, size_t sizeBytes,
|
||||
bool dstTracked = (hc::am_memtracker_getinfo(&dstPtrInfo, dst) == AM_SUCCESS);
|
||||
bool srcTracked = (hc::am_memtracker_getinfo(&srcPtrInfo, src) == AM_SUCCESS);
|
||||
|
||||
|
||||
// Some code in HCC and in printPointerInfo uses _sizeBytes==0 as an indication ptr is not valid, so check it here:
|
||||
if (!dstTracked) {
|
||||
assert (dstPtrInfo._sizeBytes == 0);
|
||||
}
|
||||
if (!srcTracked) {
|
||||
assert (srcPtrInfo._sizeBytes == 0);
|
||||
}
|
||||
|
||||
|
||||
hc::hcCommandKind hcCopyDir;
|
||||
ihipCtx_t *copyDevice;
|
||||
bool forceUnpinnedCopy;
|
||||
@@ -1831,12 +1855,8 @@ void ihipStream_t::locked_copySync(void* dst, const void* src, size_t sizeBytes,
|
||||
dst, dstPtrInfo._appId, dstPtrInfo._isInDeviceMem,
|
||||
src, srcPtrInfo._appId, srcPtrInfo._isInDeviceMem,
|
||||
sizeBytes, hcMemcpyStr(hcCopyDir), forceUnpinnedCopy);
|
||||
tprintf (DB_COPY, " dst=%p baseHost=%p baseDev=%p sz=%zu home_dev=%d tracked=%d isDevMem=%d\n",
|
||||
dst, dstPtrInfo._hostPointer, dstPtrInfo._devicePointer, dstPtrInfo._sizeBytes,
|
||||
dstPtrInfo._appId, dstTracked, dstPtrInfo._isInDeviceMem);
|
||||
tprintf (DB_COPY, " src=%p baseHost=%p baseDev=%p sz=%zu home_dev=%d tracked=%d isDevMem=%d\n",
|
||||
src, srcPtrInfo._hostPointer, srcPtrInfo._devicePointer, srcPtrInfo._sizeBytes,
|
||||
srcPtrInfo._appId, srcTracked, srcPtrInfo._isInDeviceMem);
|
||||
printPointerInfo(DB_COPY, " dst", dst, dstPtrInfo);
|
||||
printPointerInfo(DB_COPY, " src", src, srcPtrInfo);
|
||||
|
||||
this->ensureHaveQueue(crit);
|
||||
|
||||
@@ -1921,12 +1941,8 @@ void ihipStream_t::locked_copyAsync(void* dst, const void* src, size_t sizeBytes
|
||||
dst, dstPtrInfo._appId, dstPtrInfo._isInDeviceMem,
|
||||
src, srcPtrInfo._appId, srcPtrInfo._isInDeviceMem,
|
||||
sizeBytes, hcMemcpyStr(hcCopyDir), forceUnpinnedCopy);
|
||||
tprintf (DB_COPY, " dst=%p baseHost=%p baseDev=%p sz=%zu home_dev=%d tracked=%d isDevMem=%d\n",
|
||||
dst, dstPtrInfo._hostPointer, dstPtrInfo._devicePointer, dstPtrInfo._sizeBytes,
|
||||
dstPtrInfo._appId, dstTracked, dstPtrInfo._isInDeviceMem);
|
||||
tprintf (DB_COPY, " src=%p baseHost=%p baseDev=%p sz=%zu home_dev=%d tracked=%d isDevMem=%d\n",
|
||||
src, srcPtrInfo._hostPointer, srcPtrInfo._devicePointer, srcPtrInfo._sizeBytes,
|
||||
srcPtrInfo._appId, srcTracked, srcPtrInfo._isInDeviceMem);
|
||||
printPointerInfo(DB_COPY, " dst", dst, dstPtrInfo);
|
||||
printPointerInfo(DB_COPY, " src", src, srcPtrInfo);
|
||||
|
||||
// "tracked" really indicates if the pointer's virtual address is available in the GPU address space.
|
||||
// If both pointers are not tracked, we need to fall back to a sync copy.
|
||||
|
||||
@@ -35,6 +35,14 @@ THE SOFTWARE.
|
||||
// Memory
|
||||
//
|
||||
//
|
||||
//
|
||||
//HIP uses several "app*" fields HC memory tracker to track state necessary for the HIP API.
|
||||
//_appId : DeviceID. For device mem, this is device where the memory is physically allocated.
|
||||
// For host or registered mem, this is the current device when the memory is allocated or registered. This device will have a GPUVM mapping for the host mem.
|
||||
//
|
||||
//_appAllocationFlags : These are flags provided by the user when allocation is performed. They are returned to user in hipHostGetFlags and other APIs.
|
||||
// TODO - add more info here when available.
|
||||
//
|
||||
hipError_t hipPointerGetAttributes(hipPointerAttribute_t *attributes, void* ptr)
|
||||
{
|
||||
HIP_INIT_API(attributes, ptr);
|
||||
@@ -78,6 +86,7 @@ hipError_t hipPointerGetAttributes(hipPointerAttribute_t *attributes, void* ptr)
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
|
||||
hipError_t hipHostGetDevicePointer(void **devicePointer, void *hostPointer, unsigned flags)
|
||||
{
|
||||
HIP_INIT_API(devicePointer, hostPointer, flags);
|
||||
@@ -102,6 +111,7 @@ hipError_t hipHostGetDevicePointer(void **devicePointer, void *hostPointer, unsi
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
|
||||
hipError_t hipMalloc(void** ptr, size_t sizeBytes)
|
||||
{
|
||||
HIP_INIT_API(ptr, sizeBytes);
|
||||
@@ -227,16 +237,20 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags)
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
// Deprecated function:
|
||||
hipError_t hipMallocHost(void** ptr, size_t sizeBytes)
|
||||
{
|
||||
return hipHostMalloc(ptr, sizeBytes, 0);
|
||||
}
|
||||
|
||||
|
||||
// Deprecated function:
|
||||
hipError_t hipHostAlloc(void** ptr, size_t sizeBytes, unsigned int flags)
|
||||
{
|
||||
return hipHostMalloc(ptr, sizeBytes, flags);
|
||||
};
|
||||
|
||||
|
||||
// width in bytes
|
||||
hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height)
|
||||
{
|
||||
@@ -374,6 +388,8 @@ hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr)
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
|
||||
// TODO - need to fix several issues here related to P2P access, host memory fallback.
|
||||
hipError_t hipHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags)
|
||||
{
|
||||
HIP_INIT_API(hostPtr, sizeBytes, flags);
|
||||
@@ -406,7 +422,7 @@ hipError_t hipHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags)
|
||||
am_status = hc::am_memory_host_lock(device->_acc, hostPtr, sizeBytes, &vecAcc[0], vecAcc.size());
|
||||
hc::am_memtracker_update(hostPtr, device->_deviceId, flags);
|
||||
|
||||
tprintf(DB_MEM, " %s registered ptr=%p\n", __func__, hostPtr);
|
||||
tprintf(DB_MEM, " %s registered ptr=%p and allowed access to %zu peers\n", __func__, hostPtr, vecAcc.size());
|
||||
if(am_status == AM_SUCCESS){
|
||||
hip_status = hipSuccess;
|
||||
} else {
|
||||
@@ -605,6 +621,7 @@ hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
|
||||
hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void* src, size_t sizeBytes)
|
||||
{
|
||||
HIP_INIT_CMD_API(dst, src, sizeBytes);
|
||||
@@ -626,6 +643,7 @@ hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void* src, size_t sizeBytes)
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
|
||||
hipError_t hipMemcpyDtoH(void* dst, hipDeviceptr_t src, size_t sizeBytes)
|
||||
{
|
||||
HIP_INIT_CMD_API(dst, src, sizeBytes);
|
||||
@@ -647,6 +665,7 @@ hipError_t hipMemcpyDtoH(void* dst, hipDeviceptr_t src, size_t sizeBytes)
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
|
||||
hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes)
|
||||
{
|
||||
HIP_INIT_CMD_API(dst, src, sizeBytes);
|
||||
@@ -668,6 +687,7 @@ hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeByte
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
|
||||
hipError_t hipMemcpyHtoH(void* dst, void* src, size_t sizeBytes)
|
||||
{
|
||||
HIP_INIT_CMD_API(dst, src, sizeBytes);
|
||||
|
||||
@@ -45,17 +45,13 @@ int main(){
|
||||
A[i] = float(1);
|
||||
}
|
||||
|
||||
// Copy to B, this should be optimal pinned malloc copy:
|
||||
float *B;
|
||||
HIPCHECK(hipMalloc(&B, size));
|
||||
HIPCHECK(hipMemcpy(B, A, size, hipMemcpyHostToDevice));
|
||||
|
||||
|
||||
for(int i=0;i<num_devices;i++){
|
||||
HIPCHECK(hipSetDevice(i));
|
||||
HIPCHECK(hipHostGetDevicePointer((void**)&Ad[i], A, 0));
|
||||
}
|
||||
|
||||
// Use device pointer inside a kernel:
|
||||
for(int i=0;i<num_devices;i++){
|
||||
HIPCHECK(hipSetDevice(i));
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/512), dim3(512), 0, 0, Ad[i]);
|
||||
@@ -63,6 +59,43 @@ int main(){
|
||||
HIPCHECK(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
|
||||
{
|
||||
// Senstizes HIP bug if device does not match where the memory was registered.
|
||||
HIPCHECK(hipSetDevice(0));
|
||||
|
||||
// Copy to B, this should be optimal pinned malloc copy:
|
||||
// Note we are using the host pointer here:
|
||||
float *Bh, *Bd;
|
||||
Bh = (float*)malloc(size);
|
||||
HIPCHECK(hipMalloc(&Bd, size));
|
||||
|
||||
for(int i=0;i<N;i++){
|
||||
A[i] = float(i);
|
||||
Bh[i] = 0.0f;
|
||||
}
|
||||
|
||||
HIPCHECK(hipMemcpy(Bd, A, size, hipMemcpyHostToDevice));
|
||||
|
||||
HIPCHECK(hipMemcpy(Bh, Bd, size, hipMemcpyDeviceToHost));
|
||||
|
||||
#if 0
|
||||
//TODO - disable check until we update HCC to deal with registered memory pointers.
|
||||
for(int i=0;i<N;i++){
|
||||
if (Bh[i] != A[i]) {
|
||||
printf ("mismatch at Bh[%d]=%f, A[%d]=%f\n", i, Bh[i], i, A[i]);
|
||||
failed("mismatch");
|
||||
};
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
// Make sure the copy worked
|
||||
}
|
||||
|
||||
|
||||
|
||||
HIPASSERT(A[10] == 1.0f + float(num_devices));
|
||||
HIPCHECK(hipHostUnregister(A));
|
||||
passed();
|
||||
|
||||
Reference in New Issue
Block a user