Merge pull request #172 from bensander/streamwaitevent

Streamwaitevent
Этот коммит содержится в:
Maneesh Gupta
2017-09-14 16:07:19 +05:30
коммит произвёл GitHub
родитель f6fda276ce fff74eee21
Коммит bf8f23b2a4
4 изменённых файлов: 73 добавлений и 29 удалений
+7
Просмотреть файл
@@ -88,6 +88,10 @@ int HIP_HOST_COHERENT = 1;
int HIP_SYNC_HOST_ALLOC = 1;
int HIP_SYNC_STREAM_WAIT = 0;
int HIP_FORCE_NULL_STREAM=0;
#if (__hcc_workweek__ >= 17300)
// Make sure we have required bug fix in HCC
@@ -1249,6 +1253,9 @@ void HipReadEnv()
READ_ENV_I(release, HIP_SYNC_HOST_ALLOC, 0, "Sync before and after all host memory allocations. May help stability");
READ_ENV_I(release, HIP_SYNC_NULL_STREAM, 0, "Synchronize on host for null stream submissions");
READ_ENV_I(release, HIP_FORCE_NULL_STREAM, 0, "Force all stream allocations to secretly return the null stream");
READ_ENV_I(release, HIP_SYNC_STREAM_WAIT, 0, "hipStreamWaitEvent will synchronize to host");
READ_ENV_I(release, HIP_HOST_COHERENT, 0, "If set, all host memory will be allocated as fine-grained system memory. This allows threadfence_system to work but prevents host memory from being cached on GPU which may have performance impact.");
+2
Просмотреть файл
@@ -65,8 +65,10 @@ extern int HIP_HIDDEN_FREE_MEM;
//---
// Chicken bits for disabling functionality to work around potential issues:
extern int HIP_SYNC_HOST_ALLOC;
extern int HIP_SYNC_STREAM_WAIT;
extern int HIP_SYNC_NULL_STREAM;
extern int HIP_FORCE_NULL_STREAM;
// TODO - remove when this is standard behavior.
extern int HCC_OPT_FLUSH;
+28 -25
Просмотреть файл
@@ -38,21 +38,26 @@ hipError_t ihipStreamCreate(hipStream_t *stream, unsigned int flags)
hipError_t e = hipSuccess;
if (ctx) {
hc::accelerator acc = ctx->getWriteableDevice()->_acc;
// TODO - se try-catch loop to detect memory exception?
//
//Note this is an execute_in_order queue, so all kernels submitted will atuomatically wait for prev to complete:
//This matches CUDA stream behavior:
if (HIP_FORCE_NULL_STREAM) {
*stream = 0;
} else {
hc::accelerator acc = ctx->getWriteableDevice()->_acc;
{
// Obtain mutex access to the device critical data, release by destructor
LockedAccessor_CtxCrit_t ctxCrit(ctx->criticalData());
// TODO - se try-catch loop to detect memory exception?
//
//Note this is an execute_in_order queue, so all kernels submitted will atuomatically wait for prev to complete:
//This matches CUDA stream behavior:
auto istream = new ihipStream_t(ctx, acc.create_view(), flags);
{
// Obtain mutex access to the device critical data, release by destructor
LockedAccessor_CtxCrit_t ctxCrit(ctx->criticalData());
ctxCrit->addStream(istream);
*stream = istream;
auto istream = new ihipStream_t(ctx, acc.create_view(), flags);
ctxCrit->addStream(istream);
*stream = istream;
}
}
tprintf(DB_SYNC, "hipStreamCreate, %s\n", ToString(*stream).c_str());
@@ -93,18 +98,15 @@ hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int
} else if (event->_state != hipEventStatusUnitialized) {
if (stream != hipStreamNull) {
if (HIP_SYNC_STREAM_WAIT || (HIP_SYNC_NULL_STREAM && (stream == 0))) {
// conservative wait on host for the specified event to complete:
event->locked_waitComplete((event->_flags & hipEventBlockingSync) ? hc::hcWaitModeBlocked : hc::hcWaitModeActive);
} else {
stream = ihipSyncAndResolveStream(stream);
// This will user create_blocking_marker to wait on the specified queue.
stream->locked_streamWaitEvent(event);
} else {
// TODO-hcc Convert to use create_blocking_marker(...) functionality.
// Currently we have a super-conservative version of this - block on host, and drain the queue.
// This should create a barrier packet in the target queue.
// TODO-HIP_SYNC_NULL_STREAM
stream->locked_wait();
}
} // else event not recorded, return immediately and don't create marker.
return ihipLogStatus(e);
@@ -122,15 +124,14 @@ hipError_t hipStreamQuery(hipStream_t stream)
stream = device->_defaultStream;
}
int pendingOps = 0;
bool isEmpty = 0;
{
LockedAccessor_StreamCrit_t crit(stream->_criticalData);
pendingOps = crit->_av.get_pending_async_ops();
isEmpty = crit->_av.get_is_empty();
}
hipError_t e = (pendingOps > 0) ? hipErrorNotReady : hipSuccess;
hipError_t e = isEmpty ? hipSuccess : hipErrorNotReady ;
return ihipLogStatus(e);
}
@@ -170,7 +171,9 @@ hipError_t hipStreamDestroy(hipStream_t stream)
//--- Drain the stream:
if (stream == NULL) {
e = hipErrorInvalidResourceHandle; // TODO - review - what happens if try to destroy null stream
if (!HIP_FORCE_NULL_STREAM) {
e = hipErrorInvalidResourceHandle;
}
} else {
stream->locked_wait();
+36 -4
Просмотреть файл
@@ -446,9 +446,41 @@ int main(int argc, char *argv[])
if (p_tests & 0x1000) {
printf ("==> Test 0x1000 try null stream\n");
hipStreamQuery(0/* try null stream*/);
printf ("==> Test 0x1000 simple null stream tests\n");
// try some null stream:
hipStreamQuery(0);
hipStream_t s1;
hipEvent_t e1;
{
// stream null waits on event in s1 stream:
HIPCHECK(hipStreamCreate(&s1));
HIPCHECK(hipEventCreate(&e1));
HIPCHECK(hipEventRecord(e1, s1))
HIPCHECK(hipStreamWaitEvent(hipStream_t(0), e1, 0/*flags*/));
HIPCHECK(hipStreamDestroy(s1));
HIPCHECK(hipEventDestroy(e1));
}
{
// stream s1 waits on event in null stream:
HIPCHECK(hipStreamCreate(&s1));
HIPCHECK(hipEventCreate(&e1));
HIPCHECK(hipEventRecord(e1, hipStream_t(0)))
HIPCHECK(hipStreamWaitEvent(s1, e1, 0/*flags*/));
HIPCHECK(hipStreamDestroy(s1));
HIPCHECK(hipEventDestroy(e1));
}
}
@@ -471,8 +503,8 @@ int main(int argc, char *argv[])
}
{
printf ("test: alternating memcpy/count-reverse followed by event\n");
if (p_tests & 0x4000 ) {
printf ("test: %x alternating memcpy/count-reverse followed by event\n", p_tests);
RUN_SYNC_TEST(0x4000, streamersDev0, sync_queryAllUntilComplete(streamersDev0), true);
RUN_SYNC_TEST(0x8000, streamersDev0, sync_streamWaitEvent(streamersDev0.back()->event(), 0, sideStreams[0], false), true);
}