diff --git a/hipamd/src/hip_event.hpp b/hipamd/src/hip_event.hpp index 8758eb1c3d..4f50f1d523 100644 --- a/hipamd/src/hip_event.hpp +++ b/hipamd/src/hip_event.hpp @@ -48,7 +48,7 @@ typedef struct ihipIpcEventShmem_s { std::atomic owners_process_id; std::atomic read_index; std::atomic write_index; - std::atomic signal[IPC_SIGNALS_PER_EVENT]; + uint32_t signal[IPC_SIGNALS_PER_EVENT]; } ihipIpcEventShmem_t; class EventMarker : public amd::Marker { @@ -186,6 +186,7 @@ class IPCEvent : public Event { int owners = --ipc_evt_.ipc_shmem_->owners; // Make sure event is synchronized hipError_t status = synchronize(); + status = ihipHostUnregister(&ipc_evt_.ipc_shmem_->signal); if (!amd::Os::MemoryUnmapFile(ipc_evt_.ipc_shmem_, sizeof(hip::ihipIpcEventShmem_t))) { // print hipErrorInvalidHandle; } diff --git a/hipamd/src/hip_event_ipc.cpp b/hipamd/src/hip_event_ipc.cpp index 24807ebbf2..7daf249aba 100644 --- a/hipamd/src/hip_event_ipc.cpp +++ b/hipamd/src/hip_event_ipc.cpp @@ -25,11 +25,6 @@ #include #endif -void ipcEventCallback(hipStream_t stream, hipError_t status, void* user_data) { - std::atomic* signal = reinterpret_cast*>(user_data); - signal->store(0); - return; -} // ================================================================================================ hipError_t ihipEventCreateWithFlags(hipEvent_t* event, unsigned flags); @@ -53,6 +48,7 @@ bool IPCEvent::createIpcEventShmemIfNeeded() { sizeof(hip::ihipIpcEventShmem_t))) { return false; } + close(temp_fd); ipc_evt_.ipc_shmem_->owners = 1; ipc_evt_.ipc_shmem_->read_index = -1; ipc_evt_.ipc_shmem_->write_index = 0; @@ -60,7 +56,13 @@ bool IPCEvent::createIpcEventShmemIfNeeded() { ipc_evt_.ipc_shmem_->signal[sig_idx] = 0; } - close(temp_fd); + // device sets 0 to this ptr when the ipc event is completed + hipError_t status = ihipHostRegister(&ipc_evt_.ipc_shmem_->signal, + sizeof(uint32_t) * IPC_SIGNALS_PER_EVENT, + 0); + if (status != hipSuccess) { + return false; + } return true; #else return false; @@ -154,17 +156,17 @@ hipError_t IPCEvent::enqueueRecordCommand(hipStream_t stream, amd::Command* comm // Lock signal. ipc_evt_.ipc_shmem_->signal[offset] = 1; ipc_evt_.ipc_shmem_->owners_device_id = deviceId(); - - std::atomic* signal = &ipc_evt_.ipc_shmem_->signal[offset]; - StreamCallback* cbo = new StreamCallback( - stream, reinterpret_cast(ipcEventCallback), signal, command); - if (!tEvent.setCallback(CL_COMPLETE, ihipStreamCallback, cbo)) { - command->release(); - return hipErrorInvalidHandle; - } command->enqueue(); - // waiting for the call back to be called - command->awaitCompletion(); + + // device writes 0 to signal after the hipEventRecord command is completed + // the signal value is checked by WaitThenDecrementSignal cb + hipError_t status = ihipStreamOperation(stream, ROCCLR_COMMAND_STREAM_WRITE_VALUE, + &(ipc_evt_.ipc_shmem_->signal[offset]), + 0, + 0, 0, sizeof(uint32_t)); + if (status != hipSuccess) { + return status; + } // Update read index to indicate new signal. int expected = write_index - 1; @@ -203,8 +205,12 @@ hipError_t IPCEvent::OpenHandle(ihipIpcEventHandle_t* handle) { ipc_evt_.ipc_shmem_->owners += 1; setDeviceId(ipc_evt_.ipc_shmem_->owners_device_id.load()); - - return hipSuccess; + // device sets 0 to this ptr when the ipc event is completed + hipError_t status = hipSuccess; + status = ihipHostRegister(&ipc_evt_.ipc_shmem_->signal, + sizeof(uint32_t) * IPC_SIGNALS_PER_EVENT, + 0); + return status; } } // namespace hip diff --git a/hipamd/src/hip_internal.hpp b/hipamd/src/hip_internal.hpp index 38b5310d4c..0a88265218 100644 --- a/hipamd/src/hip_internal.hpp +++ b/hipamd/src/hip_internal.hpp @@ -493,10 +493,13 @@ extern amd::Memory* getMemoryObjectWithOffset(const void* ptr, const size_t size extern void getStreamPerThread(hipStream_t& stream); extern hipStream_t getPerThreadDefaultStream(); extern hipError_t ihipUnbindTexture(textureReference* texRef); - +extern hipError_t ihipHostRegister(void* hostPtr, size_t sizeBytes, unsigned int flags); +extern hipError_t ihipHostUnregister(void* hostPtr); extern hipError_t ihipGetDeviceProperties(hipDeviceProp_t* props, hipDevice_t device); extern hipError_t ihipDeviceGet(hipDevice_t* device, int deviceId); +extern hipError_t ihipStreamOperation(hipStream_t stream, cl_command_type cmdType, void* ptr, + uint64_t value, uint64_t mask, unsigned int flags, size_t sizeBytes); constexpr bool kOptionChangeable = true; constexpr bool kNewDevProg = false; diff --git a/hipamd/src/hip_memory.cpp b/hipamd/src/hip_memory.cpp index bfe7a53cc2..3259d3c138 100644 --- a/hipamd/src/hip_memory.cpp +++ b/hipamd/src/hip_memory.cpp @@ -997,11 +997,9 @@ hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) { HIP_RETURN(hipSuccess); } -hipError_t hipHostRegister(void* hostPtr, size_t sizeBytes, unsigned int flags) { - HIP_INIT_API(hipHostRegister, hostPtr, sizeBytes, flags); - CHECK_STREAM_CAPTURE_SUPPORTED(); +hipError_t ihipHostRegister(void* hostPtr, size_t sizeBytes, unsigned int flags) { if (hostPtr == nullptr) { - HIP_RETURN(hipErrorInvalidValue); + return hipErrorInvalidValue; } else { amd::Memory* mem = new (*hip::host_device->asContext()) amd::Buffer(*hip::host_device->asContext(), CL_MEM_USE_HOST_PTR | CL_MEM_SVM_ATOMICS, sizeBytes); @@ -1012,7 +1010,7 @@ hipError_t hipHostRegister(void* hostPtr, size_t sizeBytes, unsigned int flags) if (!mem->create(hostPtr, sysMemAlloc, skipAlloc, forceAlloc)) { mem->release(); LogPrintfError("Cannot create memory for size: %u with flags: %d \n", sizeBytes, flags); - HIP_RETURN(hipErrorOutOfMemory); + return hipErrorOutOfMemory; } for (const auto& device : g_devices) { @@ -1029,14 +1027,17 @@ hipError_t hipHostRegister(void* hostPtr, size_t sizeBytes, unsigned int flags) if (mem != nullptr) { mem->getUserData().deviceId = hip::getCurrentDevice()->deviceId(); } - HIP_RETURN(hipSuccess); + return hipSuccess; } } -hipError_t hipHostUnregister(void* hostPtr) { - HIP_INIT_API(hipHostUnregister, hostPtr); +hipError_t hipHostRegister(void* hostPtr, size_t sizeBytes, unsigned int flags) { + HIP_INIT_API(hipHostRegister, hostPtr, sizeBytes, flags); CHECK_STREAM_CAPTURE_SUPPORTED(); + HIP_RETURN(ihipHostRegister(hostPtr, sizeBytes,flags)); +} +hipError_t ihipHostUnregister(void* hostPtr) { size_t offset = 0; amd::Memory* mem = getMemoryObject(hostPtr, offset); @@ -1056,11 +1057,18 @@ hipError_t hipHostUnregister(void* hostPtr) { } amd::MemObjMap::RemoveMemObj(hostPtr); mem->release(); - HIP_RETURN(hipSuccess); + return hipSuccess; } LogPrintfError("Cannot unregister host_ptr: 0x%x \n", hostPtr); - HIP_RETURN(hipErrorHostMemoryNotRegistered); + return hipErrorHostMemoryNotRegistered; +} + + +hipError_t hipHostUnregister(void* hostPtr) { + HIP_INIT_API(hipHostUnregister, hostPtr); + CHECK_STREAM_CAPTURE_SUPPORTED(); + HIP_RETURN(ihipHostUnregister(hostPtr)); } // Deprecated function: