Refactor asyncCopy and syncCopy to fix deadlock case.

- Minimize time that locks are held.
- Eliminate copy code that locked stream and ctx at same time.
    - Stream was locked to ensure thread-safe enqueue to the queue.
    - Devices were locked to query peer-lists.

Change-Id: Ibe8880bb7fb995a3da8f90ff911f212d81525018


[ROCm/hip commit: 4ff6dc8f38]
このコミットが含まれているのは:
Ben Sander
2016-09-27 15:27:21 -05:00
コミット 7fc988bc45
3個のファイルの変更26行の追加34行の削除
+9 -3
ファイルの表示
@@ -1,5 +1,5 @@
/*
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
Link errors represented as this:Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
@@ -371,10 +371,17 @@ public:
};
// if HIP code needs to acquire locks for both ihipCtx_t and ihipStream_t, it should first acquire the lock
// for the ihipCtx_t and then for the individual streams. The locks should not be acquired in reverse order
// or deadlock may occur. In some cases, it may be possible to reduce the range where the locks must be held.
// HIP routines should avoid acquiring and releasing the same lock during the execution of a single HIP API.
typedef ihipStreamCriticalBase_t<StreamMutex> ihipStreamCritical_t;
typedef LockedAccessor<ihipStreamCritical_t> LockedAccessor_StreamCrit_t;
//---
// Internal stream structure.
class ihipStream_t {
public:
@@ -383,11 +390,10 @@ typedef uint64_t SeqNum_t ;
~ihipStream_t();
// kind is hipMemcpyKind
void copySync (LockedAccessor_StreamCrit_t &crit, void* dst, const void* src, size_t sizeBytes, unsigned kind, bool resolveOn = true);
void locked_copySync (void* dst, const void* src, size_t sizeBytes, unsigned kind, bool resolveOn = true);
void copyAsync(void* dst, const void* src, size_t sizeBytes, unsigned kind);
void locked_copyAsync(void* dst, const void* src, size_t sizeBytes, unsigned kind);
//---
+13 -27
ファイルの表示
@@ -1409,7 +1409,7 @@ unsigned ihipStream_t::resolveMemcpyDirection(bool srcTracked, bool dstTracked,
// TODO - remove kind parm from here or use it below?
void ihipStream_t::copySync(LockedAccessor_StreamCrit_t &crit, void* dst, const void* src, size_t sizeBytes, unsigned kind, bool resolveOn)
void ihipStream_t::locked_copySync(void* dst, const void* src, size_t sizeBytes, unsigned kind, bool resolveOn)
{
ihipCtx_t *ctx = this->getCtx();
const ihipDevice_t *device = ctx->getDevice();
@@ -1436,7 +1436,7 @@ void ihipStream_t::copySync(LockedAccessor_StreamCrit_t &crit, void* dst, const
};
// If this is P2P accessi, we need to check to see if the copy agent (specified by the stream where the copy is enqueued)
// If this is P2P access, we need to check to see if the copy agent (specified by the stream where the copy is enqueued)
// has peer access enabled to both the source and dest. If this is true, then the copy agent can see both pointers
// and we can perform the access with the copy engine from the current stream. If not true, then we will copy through the host. (forceHostCopyEngine=true).
bool forceHostCopyEngine = false;
@@ -1449,21 +1449,15 @@ void ihipStream_t::copySync(LockedAccessor_StreamCrit_t &crit, void* dst, const
}
};
crit->_av.copy_ext(src, dst, sizeBytes, hcCopyDir, srcPtrInfo, dstPtrInfo, forceHostCopyEngine);
{
LockedAccessor_StreamCrit_t crit (_criticalData);
crit->_av.copy_ext(src, dst, sizeBytes, hcCopyDir, srcPtrInfo, dstPtrInfo, forceHostCopyEngine);
}
}
// Sync copy that acquires lock:
void ihipStream_t::locked_copySync(void* dst, const void* src, size_t sizeBytes, unsigned kind, bool resolveOn)
void ihipStream_t::locked_copyAsync(void* dst, const void* src, size_t sizeBytes, unsigned kind)
{
LockedAccessor_StreamCrit_t crit (_criticalData);
copySync(crit, dst, src, sizeBytes, kind, resolveOn);
}
void ihipStream_t::copyAsync(void* dst, const void* src, size_t sizeBytes, unsigned kind)
{
LockedAccessor_StreamCrit_t crit(_criticalData);
const ihipCtx_t *ctx = this->getCtx();
@@ -1478,12 +1472,12 @@ void ihipStream_t::copyAsync(void* dst, const void* src, size_t sizeBytes, unsig
/* As this is a CPU op, we need to wait until all
the commands in current stream are finished.
*/
LockedAccessor_StreamCrit_t crit(_criticalData);
this->wait(crit);
memcpy(dst, src, sizeBytes);
} else {
bool trueAsync = true;
hc::accelerator acc;
hc::AmPointerInfo dstPtrInfo(NULL, NULL, 0, acc, 0, 0);
@@ -1498,17 +1492,12 @@ void ihipStream_t::copyAsync(void* dst, const void* src, size_t sizeBytes, unsig
}
// "tracked" really indicates if the pointer's virtual address is available in the GPU address space.
// If both pointers are not tracked, we need to fall back to a sync copy.
if (!dstTracked || !srcTracked || !copyEngineCanSeeSrcAndDest) {
trueAsync = false;
}
if (dstTracked && srcTracked && copyEngineCanSeeSrcAndDest) {
LockedAccessor_StreamCrit_t crit(_criticalData);
if (trueAsync == true) {
// Perform a synchronous copy:
// Perform asynchronous copy:
try {
crit->_av.copy_async(src, dst, sizeBytes);
} catch (Kalmar::runtime_exception) {
@@ -1520,12 +1509,9 @@ void ihipStream_t::copyAsync(void* dst, const void* src, size_t sizeBytes, unsig
tprintf(DB_SYNC, "LAUNCH_BLOCKING for completion of hipMemcpyAsync(%zu)\n", sizeBytes);
this->wait(crit);
}
} else {
// Perform a synchronous copy:
if (kind == hipMemcpyDefault) {
kind = resolveMemcpyDirection(srcTracked, dstTracked, srcPtrInfo._isInDeviceMem, dstPtrInfo._isInDeviceMem);
}
copySync(crit, dst, src, sizeBytes, kind);
locked_copySync(dst, src, sizeBytes, kind);
}
}
}
+4 -4
ファイルの表示
@@ -508,7 +508,7 @@ hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcp
e= hipErrorInvalidValue;
} else if (stream) {
try {
stream->copyAsync(dst, src, sizeBytes, kind);
stream->locked_copyAsync(dst, src, sizeBytes, kind);
}
catch (ihipException ex) {
e = ex._code;
@@ -534,7 +534,7 @@ hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, void* src, size_t sizeBytes, h
e= hipErrorInvalidValue;
} else if (stream) {
try {
stream->copyAsync((void*)dst, src, sizeBytes, kind);
stream->locked_copyAsync((void*)dst, src, sizeBytes, kind);
}
catch (ihipException ex) {
e = ex._code;
@@ -561,7 +561,7 @@ hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t siz
e= hipErrorInvalidValue;
} else if (stream) {
try {
stream->copyAsync((void*)dst, (void*)src, sizeBytes, kind);
stream->locked_copyAsync((void*)dst, (void*)src, sizeBytes, kind);
}
catch (ihipException ex) {
e = ex._code;
@@ -587,7 +587,7 @@ hipError_t hipMemcpyDtoHAsync(void* dst, hipDeviceptr_t src, size_t sizeBytes, h
e= hipErrorInvalidValue;
} else if (stream) {
try {
stream->copyAsync(dst, (void*)src, sizeBytes, kind);
stream->locked_copyAsync(dst, (void*)src, sizeBytes, kind);
}
catch (ihipException ex) {
e = ex._code;