Refactor asyncCopy and syncCopy to fix deadlock case.
- Minimize time that locks are held.
- Eliminate copy code that locked stream and ctx at same time.
- Stream was locked to ensure thread-safe enqueue to the queue.
- Devices were locked to query peer-lists.
Change-Id: Ibe8880bb7fb995a3da8f90ff911f212d81525018
[ROCm/hip commit: 4ff6dc8f38]
このコミットが含まれているのは:
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Link errors represented as this:Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
@@ -371,10 +371,17 @@ public:
|
||||
};
|
||||
|
||||
|
||||
// if HIP code needs to acquire locks for both ihipCtx_t and ihipStream_t, it should first acquire the lock
|
||||
// for the ihipCtx_t and then for the individual streams. The locks should not be acquired in reverse order
|
||||
// or deadlock may occur. In some cases, it may be possible to reduce the range where the locks must be held.
|
||||
// HIP routines should avoid acquiring and releasing the same lock during the execution of a single HIP API.
|
||||
|
||||
|
||||
typedef ihipStreamCriticalBase_t<StreamMutex> ihipStreamCritical_t;
|
||||
typedef LockedAccessor<ihipStreamCritical_t> LockedAccessor_StreamCrit_t;
|
||||
|
||||
|
||||
//---
|
||||
// Internal stream structure.
|
||||
class ihipStream_t {
|
||||
public:
|
||||
@@ -383,11 +390,10 @@ typedef uint64_t SeqNum_t ;
|
||||
~ihipStream_t();
|
||||
|
||||
// kind is hipMemcpyKind
|
||||
void copySync (LockedAccessor_StreamCrit_t &crit, void* dst, const void* src, size_t sizeBytes, unsigned kind, bool resolveOn = true);
|
||||
void locked_copySync (void* dst, const void* src, size_t sizeBytes, unsigned kind, bool resolveOn = true);
|
||||
|
||||
|
||||
void copyAsync(void* dst, const void* src, size_t sizeBytes, unsigned kind);
|
||||
void locked_copyAsync(void* dst, const void* src, size_t sizeBytes, unsigned kind);
|
||||
|
||||
|
||||
//---
|
||||
|
||||
+13
-27
@@ -1409,7 +1409,7 @@ unsigned ihipStream_t::resolveMemcpyDirection(bool srcTracked, bool dstTracked,
|
||||
|
||||
|
||||
// TODO - remove kind parm from here or use it below?
|
||||
void ihipStream_t::copySync(LockedAccessor_StreamCrit_t &crit, void* dst, const void* src, size_t sizeBytes, unsigned kind, bool resolveOn)
|
||||
void ihipStream_t::locked_copySync(void* dst, const void* src, size_t sizeBytes, unsigned kind, bool resolveOn)
|
||||
{
|
||||
ihipCtx_t *ctx = this->getCtx();
|
||||
const ihipDevice_t *device = ctx->getDevice();
|
||||
@@ -1436,7 +1436,7 @@ void ihipStream_t::copySync(LockedAccessor_StreamCrit_t &crit, void* dst, const
|
||||
};
|
||||
|
||||
|
||||
// If this is P2P accessi, we need to check to see if the copy agent (specified by the stream where the copy is enqueued)
|
||||
// If this is P2P access, we need to check to see if the copy agent (specified by the stream where the copy is enqueued)
|
||||
// has peer access enabled to both the source and dest. If this is true, then the copy agent can see both pointers
|
||||
// and we can perform the access with the copy engine from the current stream. If not true, then we will copy through the host. (forceHostCopyEngine=true).
|
||||
bool forceHostCopyEngine = false;
|
||||
@@ -1449,21 +1449,15 @@ void ihipStream_t::copySync(LockedAccessor_StreamCrit_t &crit, void* dst, const
|
||||
}
|
||||
};
|
||||
|
||||
crit->_av.copy_ext(src, dst, sizeBytes, hcCopyDir, srcPtrInfo, dstPtrInfo, forceHostCopyEngine);
|
||||
{
|
||||
LockedAccessor_StreamCrit_t crit (_criticalData);
|
||||
crit->_av.copy_ext(src, dst, sizeBytes, hcCopyDir, srcPtrInfo, dstPtrInfo, forceHostCopyEngine);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Sync copy that acquires lock:
|
||||
void ihipStream_t::locked_copySync(void* dst, const void* src, size_t sizeBytes, unsigned kind, bool resolveOn)
|
||||
void ihipStream_t::locked_copyAsync(void* dst, const void* src, size_t sizeBytes, unsigned kind)
|
||||
{
|
||||
LockedAccessor_StreamCrit_t crit (_criticalData);
|
||||
copySync(crit, dst, src, sizeBytes, kind, resolveOn);
|
||||
}
|
||||
|
||||
|
||||
void ihipStream_t::copyAsync(void* dst, const void* src, size_t sizeBytes, unsigned kind)
|
||||
{
|
||||
LockedAccessor_StreamCrit_t crit(_criticalData);
|
||||
|
||||
const ihipCtx_t *ctx = this->getCtx();
|
||||
|
||||
@@ -1478,12 +1472,12 @@ void ihipStream_t::copyAsync(void* dst, const void* src, size_t sizeBytes, unsig
|
||||
/* As this is a CPU op, we need to wait until all
|
||||
the commands in current stream are finished.
|
||||
*/
|
||||
LockedAccessor_StreamCrit_t crit(_criticalData);
|
||||
this->wait(crit);
|
||||
|
||||
memcpy(dst, src, sizeBytes);
|
||||
|
||||
} else {
|
||||
bool trueAsync = true;
|
||||
|
||||
hc::accelerator acc;
|
||||
hc::AmPointerInfo dstPtrInfo(NULL, NULL, 0, acc, 0, 0);
|
||||
@@ -1498,17 +1492,12 @@ void ihipStream_t::copyAsync(void* dst, const void* src, size_t sizeBytes, unsig
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// "tracked" really indicates if the pointer's virtual address is available in the GPU address space.
|
||||
// If both pointers are not tracked, we need to fall back to a sync copy.
|
||||
if (!dstTracked || !srcTracked || !copyEngineCanSeeSrcAndDest) {
|
||||
trueAsync = false;
|
||||
}
|
||||
if (dstTracked && srcTracked && copyEngineCanSeeSrcAndDest) {
|
||||
LockedAccessor_StreamCrit_t crit(_criticalData);
|
||||
|
||||
|
||||
if (trueAsync == true) {
|
||||
// Perform a synchronous copy:
|
||||
// Perform asynchronous copy:
|
||||
try {
|
||||
crit->_av.copy_async(src, dst, sizeBytes);
|
||||
} catch (Kalmar::runtime_exception) {
|
||||
@@ -1520,12 +1509,9 @@ void ihipStream_t::copyAsync(void* dst, const void* src, size_t sizeBytes, unsig
|
||||
tprintf(DB_SYNC, "LAUNCH_BLOCKING for completion of hipMemcpyAsync(%zu)\n", sizeBytes);
|
||||
this->wait(crit);
|
||||
}
|
||||
|
||||
} else {
|
||||
// Perform a synchronous copy:
|
||||
if (kind == hipMemcpyDefault) {
|
||||
kind = resolveMemcpyDirection(srcTracked, dstTracked, srcPtrInfo._isInDeviceMem, dstPtrInfo._isInDeviceMem);
|
||||
}
|
||||
copySync(crit, dst, src, sizeBytes, kind);
|
||||
locked_copySync(dst, src, sizeBytes, kind);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -508,7 +508,7 @@ hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcp
|
||||
e= hipErrorInvalidValue;
|
||||
} else if (stream) {
|
||||
try {
|
||||
stream->copyAsync(dst, src, sizeBytes, kind);
|
||||
stream->locked_copyAsync(dst, src, sizeBytes, kind);
|
||||
}
|
||||
catch (ihipException ex) {
|
||||
e = ex._code;
|
||||
@@ -534,7 +534,7 @@ hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, void* src, size_t sizeBytes, h
|
||||
e= hipErrorInvalidValue;
|
||||
} else if (stream) {
|
||||
try {
|
||||
stream->copyAsync((void*)dst, src, sizeBytes, kind);
|
||||
stream->locked_copyAsync((void*)dst, src, sizeBytes, kind);
|
||||
}
|
||||
catch (ihipException ex) {
|
||||
e = ex._code;
|
||||
@@ -561,7 +561,7 @@ hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t siz
|
||||
e= hipErrorInvalidValue;
|
||||
} else if (stream) {
|
||||
try {
|
||||
stream->copyAsync((void*)dst, (void*)src, sizeBytes, kind);
|
||||
stream->locked_copyAsync((void*)dst, (void*)src, sizeBytes, kind);
|
||||
}
|
||||
catch (ihipException ex) {
|
||||
e = ex._code;
|
||||
@@ -587,7 +587,7 @@ hipError_t hipMemcpyDtoHAsync(void* dst, hipDeviceptr_t src, size_t sizeBytes, h
|
||||
e= hipErrorInvalidValue;
|
||||
} else if (stream) {
|
||||
try {
|
||||
stream->copyAsync(dst, (void*)src, sizeBytes, kind);
|
||||
stream->locked_copyAsync(dst, (void*)src, sizeBytes, kind);
|
||||
}
|
||||
catch (ihipException ex) {
|
||||
e = ex._code;
|
||||
|
||||
新しいイシューから参照
ユーザーをブロックする