diff --git a/rocclr/hip_event.cpp b/rocclr/hip_event.cpp index 665a4c2148..6bd2056cbc 100755 --- a/rocclr/hip_event.cpp +++ b/rocclr/hip_event.cpp @@ -229,12 +229,10 @@ hipError_t ihipEventQuery(hipEvent_t event) { hip::Event* e = reinterpret_cast(event); if ((e->flags & hipEventInterprocess) && (e->ipc_evt_.ipc_shmem_)) { int prev_read_idx = e->ipc_evt_.ipc_shmem_->read_index; - if (prev_read_idx > 0) { - int offset = (prev_read_idx % IPC_SIGNALS_PER_EVENT); - while ((e->ipc_evt_.ipc_shmem_->read_index < prev_read_idx + IPC_SIGNALS_PER_EVENT) - && (e->ipc_evt_.ipc_shmem_->signal[offset] != 0)) { - } - } + int offset = (prev_read_idx % IPC_SIGNALS_PER_EVENT); + if (e->ipc_evt_.ipc_shmem_->read_index < prev_read_idx+IPC_SIGNALS_PER_EVENT && e->ipc_evt_.ipc_shmem_->signal[offset] != 0) { + return hipErrorNotReady; + } return hipSuccess; } else { return e->query(); @@ -261,6 +259,8 @@ hipError_t hipEventDestroy(hipEvent_t event) { hip::Event* e = reinterpret_cast(event); if ((e->flags & hipEventInterprocess) && (e->ipc_evt_.ipc_shmem_)) { int owners = -- e->ipc_evt_.ipc_shmem_->owners; + // Make sure event is synchronized + hipEventSynchronize(event); if (!amd::Os::MemoryUnmapFile(e->ipc_evt_.ipc_shmem_,sizeof(hip::ihipIpcEventShmem_t))) { HIP_RETURN(hipErrorInvalidHandle); } @@ -337,12 +337,7 @@ hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) { bool isRecorded = e->isRecorded(); if ((e->flags & hipEventInterprocess) && !isRecorded) { - amd::Command* command = queue->getLastQueuedCommand(true); - if (command == nullptr) { - command = new amd::Marker(*queue, kMarkerDisableFlush); - command->enqueue(); - } - e->addMarker(queue, command, true); + amd::Command* command = new amd::Marker(*queue, kMarkerDisableFlush); amd::Event& tEvent = command->event(); createIpcEventShmemIfNeeded(e->ipc_evt_); int write_index = e->ipc_evt_.ipc_shmem_->write_index++; @@ -357,11 +352,11 @@ hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) { std::atomic *signal = &e->ipc_evt_.ipc_shmem_->signal[offset]; StreamCallback* cbo = new StreamCallback(stream, reinterpret_cast (ipcEventCallback), signal, command); - command->enqueue(); if (!tEvent.setCallback(CL_COMPLETE, ihipStreamCallback,cbo)) { command->release(); return hipErrorInvalidHandle; } + command->enqueue(); tEvent.notifyCmdQueue(); // Update read index to indicate new signal. int expected = write_index - 1; @@ -385,7 +380,7 @@ hipError_t hipEventSynchronize(hipEvent_t event) { hip::Event* e = reinterpret_cast(event); if ((e->flags & hipEventInterprocess) && (e->ipc_evt_.ipc_shmem_)) { int prev_read_idx = e->ipc_evt_.ipc_shmem_->read_index; - if (prev_read_idx > 0) { + if (prev_read_idx >= 0) { int offset = (prev_read_idx % IPC_SIGNALS_PER_EVENT); while ((e->ipc_evt_.ipc_shmem_->read_index < prev_read_idx + IPC_SIGNALS_PER_EVENT) && (e->ipc_evt_.ipc_shmem_->signal[offset] != 0)) { diff --git a/rocclr/hip_stream.cpp b/rocclr/hip_stream.cpp index f4f6196a4d..b80fa28e17 100755 --- a/rocclr/hip_stream.cpp +++ b/rocclr/hip_stream.cpp @@ -350,19 +350,15 @@ hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int hip::Event* e = reinterpret_cast(event); if (e->flags & hipEventInterprocess) { - amd::Command* command = queue->getLastQueuedCommand(true); - if (command == nullptr) { - command = new amd::Marker(*queue, false); - command->enqueue(); - } + amd::Command* command = new amd::Marker(*queue, false); auto t{new CallbackData{e->ipc_evt_.ipc_shmem_->read_index, e->ipc_evt_.ipc_shmem_}}; StreamCallback* cbo = new StreamCallback(stream, reinterpret_cast (WaitThenDecrementSignal), t, command); - command->enqueue(); if (!command->setCallback(CL_COMPLETE, ihipStreamCallback,cbo)) { command->release(); return hipErrorInvalidHandle; } + command->enqueue(); command->awaitCompletion(); HIP_RETURN(hipSuccess); } else { diff --git a/tests/src/runtimeApi/event/hipEventIpc.cpp b/tests/src/runtimeApi/event/hipEventIpc.cpp index cb2883fe03..2b18cf1db2 100644 --- a/tests/src/runtimeApi/event/hipEventIpc.cpp +++ b/tests/src/runtimeApi/event/hipEventIpc.cpp @@ -24,7 +24,7 @@ THE SOFTWARE. // forces synchronization : set /* HIT_START - * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM all + * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvidia * TEST: %t --iterations 10 * HIT_END */ @@ -96,29 +96,13 @@ int main(int argc, char* argv[]) { HIPCHECK(hipEventSynchronize(ipc_event)); - HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); - -#ifndef __HIP_PLATFORM_AMD__ HIPCHECK(hipEventDestroy(ipc_event)); HIPCHECK(hipEventDestroy(start)); HIPCHECK(hipEventDestroy(stop)); -#endif HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); - printf("check:\n"); -#ifdef __HIP_PLATFORM_AMD__ - // Due to implementation bug of hipEventInterprocess, as a workaround, - // we have to move hipEventDestroy here. Otherwise sporadic crash will - // happen in above hipMemcpy(). If hipEventInterprocess is officially - // implemented, we should revisit here and move these back to match cuda - // event behavior. - HIPCHECK(hipEventDestroy(ipc_event)); - HIPCHECK(hipEventDestroy(start)); - HIPCHECK(hipEventDestroy(stop)); -#endif - HipTest::checkVectorADD(A_h, B_h, C_h, N, true); passed();