Update hipHostRegister debug and pointerTracker debug and notes

[ROCm/clr commit: e43592721e]
This commit is contained in:
Ben Sander
2017-02-17 15:43:09 -06:00
parent c8595b0373
commit a0932fbcc3
3 changed files with 87 additions and 18 deletions
+28 -12
View File
@@ -1803,6 +1803,20 @@ void ihipStream_t::resolveHcMemcpyDirection(unsigned hipMemKind,
}
void printPointerInfo(unsigned dbFlag, const char *tag, const void *ptr, const hc::AmPointerInfo &ptrInfo)
{
tprintf (dbFlag, " %s=%p baseHost=%p baseDev=%p sz=%zu home_dev=%d tracked=%d isDevMem=%d registered=%d\n",
tag, ptr,
ptrInfo._hostPointer, ptrInfo._devicePointer, ptrInfo._sizeBytes,
ptrInfo._appId, ptrInfo._sizeBytes != 0, ptrInfo._isInDeviceMem, !ptrInfo._isAmManaged);
}
// TODO : For registered and host memory, if the portable flag is set, we need to recognize that and perform appropriate copy operation.
// What can happen now is that Portable memory is mapped into multiple devices but Peer access is not enabled. i
// The peer detection logic doesn't see that the memory is already mapped and so tries to use an unpinned copy algorithm. If this is PinInPlace, then an error can occur.
// Need to track Portable flag correctly or use new RT functionality to query the peer status for the pointer.
//
// TODO - remove kind parm from here or use it below?
void ihipStream_t::locked_copySync(void* dst, const void* src, size_t sizeBytes, unsigned kind, bool resolveOn)
{
@@ -1819,6 +1833,16 @@ void ihipStream_t::locked_copySync(void* dst, const void* src, size_t sizeBytes,
bool dstTracked = (hc::am_memtracker_getinfo(&dstPtrInfo, dst) == AM_SUCCESS);
bool srcTracked = (hc::am_memtracker_getinfo(&srcPtrInfo, src) == AM_SUCCESS);
// Some code in HCC and in printPointerInfo uses _sizeBytes==0 as an indication ptr is not valid, so check it here:
if (!dstTracked) {
assert (dstPtrInfo._sizeBytes == 0);
}
if (!srcTracked) {
assert (srcPtrInfo._sizeBytes == 0);
}
hc::hcCommandKind hcCopyDir;
ihipCtx_t *copyDevice;
bool forceUnpinnedCopy;
@@ -1831,12 +1855,8 @@ void ihipStream_t::locked_copySync(void* dst, const void* src, size_t sizeBytes,
dst, dstPtrInfo._appId, dstPtrInfo._isInDeviceMem,
src, srcPtrInfo._appId, srcPtrInfo._isInDeviceMem,
sizeBytes, hcMemcpyStr(hcCopyDir), forceUnpinnedCopy);
tprintf (DB_COPY, " dst=%p baseHost=%p baseDev=%p sz=%zu home_dev=%d tracked=%d isDevMem=%d\n",
dst, dstPtrInfo._hostPointer, dstPtrInfo._devicePointer, dstPtrInfo._sizeBytes,
dstPtrInfo._appId, dstTracked, dstPtrInfo._isInDeviceMem);
tprintf (DB_COPY, " src=%p baseHost=%p baseDev=%p sz=%zu home_dev=%d tracked=%d isDevMem=%d\n",
src, srcPtrInfo._hostPointer, srcPtrInfo._devicePointer, srcPtrInfo._sizeBytes,
srcPtrInfo._appId, srcTracked, srcPtrInfo._isInDeviceMem);
printPointerInfo(DB_COPY, " dst", dst, dstPtrInfo);
printPointerInfo(DB_COPY, " src", src, srcPtrInfo);
this->ensureHaveQueue(crit);
@@ -1921,12 +1941,8 @@ void ihipStream_t::locked_copyAsync(void* dst, const void* src, size_t sizeBytes
dst, dstPtrInfo._appId, dstPtrInfo._isInDeviceMem,
src, srcPtrInfo._appId, srcPtrInfo._isInDeviceMem,
sizeBytes, hcMemcpyStr(hcCopyDir), forceUnpinnedCopy);
tprintf (DB_COPY, " dst=%p baseHost=%p baseDev=%p sz=%zu home_dev=%d tracked=%d isDevMem=%d\n",
dst, dstPtrInfo._hostPointer, dstPtrInfo._devicePointer, dstPtrInfo._sizeBytes,
dstPtrInfo._appId, dstTracked, dstPtrInfo._isInDeviceMem);
tprintf (DB_COPY, " src=%p baseHost=%p baseDev=%p sz=%zu home_dev=%d tracked=%d isDevMem=%d\n",
src, srcPtrInfo._hostPointer, srcPtrInfo._devicePointer, srcPtrInfo._sizeBytes,
srcPtrInfo._appId, srcTracked, srcPtrInfo._isInDeviceMem);
printPointerInfo(DB_COPY, " dst", dst, dstPtrInfo);
printPointerInfo(DB_COPY, " src", src, srcPtrInfo);
// "tracked" really indicates if the pointer's virtual address is available in the GPU address space.
// If both pointers are not tracked, we need to fall back to a sync copy.
+21 -1
View File
@@ -35,6 +35,14 @@ THE SOFTWARE.
// Memory
//
//
//
//HIP uses several "app*" fields HC memory tracker to track state necessary for the HIP API.
//_appId : DeviceID. For device mem, this is device where the memory is physically allocated.
// For host or registered mem, this is the current device when the memory is allocated or registered. This device will have a GPUVM mapping for the host mem.
//
//_appAllocationFlags : These are flags provided by the user when allocation is performed. They are returned to user in hipHostGetFlags and other APIs.
// TODO - add more info here when available.
//
hipError_t hipPointerGetAttributes(hipPointerAttribute_t *attributes, void* ptr)
{
HIP_INIT_API(attributes, ptr);
@@ -78,6 +86,7 @@ hipError_t hipPointerGetAttributes(hipPointerAttribute_t *attributes, void* ptr)
return ihipLogStatus(e);
}
hipError_t hipHostGetDevicePointer(void **devicePointer, void *hostPointer, unsigned flags)
{
HIP_INIT_API(devicePointer, hostPointer, flags);
@@ -102,6 +111,7 @@ hipError_t hipHostGetDevicePointer(void **devicePointer, void *hostPointer, unsi
return ihipLogStatus(e);
}
hipError_t hipMalloc(void** ptr, size_t sizeBytes)
{
HIP_INIT_API(ptr, sizeBytes);
@@ -227,16 +237,20 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags)
return ihipLogStatus(hip_status);
}
// Deprecated function:
hipError_t hipMallocHost(void** ptr, size_t sizeBytes)
{
return hipHostMalloc(ptr, sizeBytes, 0);
}
// Deprecated function:
hipError_t hipHostAlloc(void** ptr, size_t sizeBytes, unsigned int flags)
{
return hipHostMalloc(ptr, sizeBytes, flags);
};
// width in bytes
hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height)
{
@@ -374,6 +388,8 @@ hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr)
return ihipLogStatus(hip_status);
}
// TODO - need to fix several issues here related to P2P access, host memory fallback.
hipError_t hipHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags)
{
HIP_INIT_API(hostPtr, sizeBytes, flags);
@@ -406,7 +422,7 @@ hipError_t hipHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags)
am_status = hc::am_memory_host_lock(device->_acc, hostPtr, sizeBytes, &vecAcc[0], vecAcc.size());
hc::am_memtracker_update(hostPtr, device->_deviceId, flags);
tprintf(DB_MEM, " %s registered ptr=%p\n", __func__, hostPtr);
tprintf(DB_MEM, " %s registered ptr=%p and allowed access to %zu peers\n", __func__, hostPtr, vecAcc.size());
if(am_status == AM_SUCCESS){
hip_status = hipSuccess;
} else {
@@ -605,6 +621,7 @@ hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind
return ihipLogStatus(e);
}
hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void* src, size_t sizeBytes)
{
HIP_INIT_CMD_API(dst, src, sizeBytes);
@@ -626,6 +643,7 @@ hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void* src, size_t sizeBytes)
return ihipLogStatus(e);
}
hipError_t hipMemcpyDtoH(void* dst, hipDeviceptr_t src, size_t sizeBytes)
{
HIP_INIT_CMD_API(dst, src, sizeBytes);
@@ -647,6 +665,7 @@ hipError_t hipMemcpyDtoH(void* dst, hipDeviceptr_t src, size_t sizeBytes)
return ihipLogStatus(e);
}
hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes)
{
HIP_INIT_CMD_API(dst, src, sizeBytes);
@@ -668,6 +687,7 @@ hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeByte
return ihipLogStatus(e);
}
hipError_t hipMemcpyHtoH(void* dst, void* src, size_t sizeBytes)
{
HIP_INIT_CMD_API(dst, src, sizeBytes);
@@ -45,17 +45,13 @@ int main(){
A[i] = float(1);
}
// Copy to B, this should be optimal pinned malloc copy:
float *B;
HIPCHECK(hipMalloc(&B, size));
HIPCHECK(hipMemcpy(B, A, size, hipMemcpyHostToDevice));
for(int i=0;i<num_devices;i++){
HIPCHECK(hipSetDevice(i));
HIPCHECK(hipHostGetDevicePointer((void**)&Ad[i], A, 0));
}
// Use device pointer inside a kernel:
for(int i=0;i<num_devices;i++){
HIPCHECK(hipSetDevice(i));
hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/512), dim3(512), 0, 0, Ad[i]);
@@ -63,6 +59,43 @@ int main(){
HIPCHECK(hipDeviceSynchronize());
}
{
// Senstizes HIP bug if device does not match where the memory was registered.
HIPCHECK(hipSetDevice(0));
// Copy to B, this should be optimal pinned malloc copy:
// Note we are using the host pointer here:
float *Bh, *Bd;
Bh = (float*)malloc(size);
HIPCHECK(hipMalloc(&Bd, size));
for(int i=0;i<N;i++){
A[i] = float(i);
Bh[i] = 0.0f;
}
HIPCHECK(hipMemcpy(Bd, A, size, hipMemcpyHostToDevice));
HIPCHECK(hipMemcpy(Bh, Bd, size, hipMemcpyDeviceToHost));
#if 0
//TODO - disable check until we update HCC to deal with registered memory pointers.
for(int i=0;i<N;i++){
if (Bh[i] != A[i]) {
printf ("mismatch at Bh[%d]=%f, A[%d]=%f\n", i, Bh[i], i, A[i]);
failed("mismatch");
};
}
#endif
// Make sure the copy worked
}
HIPASSERT(A[10] == 1.0f + float(num_devices));
HIPCHECK(hipHostUnregister(A));
passed();