From ef596cd088b82331a314350f04e64ed3f97eb4b1 Mon Sep 17 00:00:00 2001 From: Jeff Daily Date: Thu, 16 Apr 2020 22:01:22 -0700 Subject: [PATCH] add IPC event support (#1996) --- include/hip/hcc_detail/hip_runtime_api.h | 17 +- src/hip_event.cpp | 310 ++++++++++++++++++--- src/hip_hcc.cpp | 45 ++- src/hip_hcc_internal.h | 38 ++- src/hip_stream.cpp | 31 ++- src/trace_helper.h | 5 + tests/src/runtimeApi/event/hipEventIpc.cpp | 112 ++++++++ 7 files changed, 486 insertions(+), 72 deletions(-) create mode 100644 tests/src/runtimeApi/event/hipEventIpc.cpp diff --git a/include/hip/hcc_detail/hip_runtime_api.h b/include/hip/hcc_detail/hip_runtime_api.h index 12fd9b7a91..b0d1c3570d 100644 --- a/include/hip/hcc_detail/hip_runtime_api.h +++ b/include/hip/hcc_detail/hip_runtime_api.h @@ -97,8 +97,6 @@ typedef int hipDevice_t; typedef struct ihipStream_t* hipStream_t; -// TODO: IPC implementation - #define hipIpcMemLazyEnablePeerAccess 0 #define HIP_IPC_HANDLE_SIZE 64 @@ -107,12 +105,9 @@ typedef struct hipIpcMemHandle_st { char reserved[HIP_IPC_HANDLE_SIZE]; } hipIpcMemHandle_t; -// TODO: IPC event handle currently unsupported -struct ihipIpcEventHandle_t; -typedef struct ihipIpcEventHandle_t* hipIpcEventHandle_t; - - -// END TODO +typedef struct hipIpcEventHandle_st { + char reserved[HIP_IPC_HANDLE_SIZE]; +} hipIpcEventHandle_t; typedef struct ihipModule_t* hipModule_t; @@ -3154,10 +3149,8 @@ hipError_t hipIpcOpenMemHandle(void** devPtr, hipIpcMemHandle_t handle, unsigned hipError_t hipIpcCloseMemHandle(void* devPtr); -// hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* devPtr); -// hipError_t hipIpcCloseMemHandle(void *devPtr); -// // hipError_t hipIpcOpenEventHandle(hipEvent_t* event, hipIpcEventHandle_t handle); -// hipError_t hipIpcOpenMemHandle(void** devPtr, hipIpcMemHandle_t handle, unsigned int flags); +hipError_t hipIpcGetEventHandle(hipIpcEventHandle_t* handle, hipEvent_t event); +hipError_t hipIpcOpenEventHandle(hipEvent_t* event, hipIpcEventHandle_t handle); /** diff --git a/src/hip_event.cpp b/src/hip_event.cpp index b297fabbd9..c626f7956d 100644 --- a/src/hip_event.cpp +++ b/src/hip_event.cpp @@ -24,6 +24,74 @@ THE SOFTWARE. #include "hip_hcc_internal.h" #include "trace_helper.h" +#include // errno, ENOENT +#include // O_RDWR, O_CREATE +#include // shm_open, shm_unlink, mmap, munmap, PROT_READ, PROT_WRITE, MAP_SHARED, MAP_FAILED +#include // ftruncate, close + +namespace { + + inline + const char* hsa_to_string(hsa_status_t err) noexcept + { + const char* r{}; + + if (hsa_status_string(err, &r) == HSA_STATUS_SUCCESS) return r; + + return "Unknown."; + } + + template + inline + void throwing_result_check(hsa_status_t res, const char (&file)[m], + const char (&function)[n], int line) { + if (res == HSA_STATUS_SUCCESS) return; + + throw std::runtime_error{"Failed in file " + (file + + (", in function \"" + (function + + ("\", on line " + std::to_string(line))))) + + ", with error: " + hsa_to_string(res)}; + } + + template + inline + void throwing_retval_check(int good, int retval, const char (&file)[m], + const char (&function)[n], int line) { + if (retval == good) return; + + throw std::runtime_error{"Failed in file " + (file + + (", in function \"" + (function + + ("\", on line " + std::to_string(line))))) + + ", with error: " + strerror(retval)}; + } + + template + inline + void throwing_msg_check(bool bad, const char (&msg)[o], + const char (&file)[m], + const char (&function)[n], int line) { + if (!bad) return; + + throw std::runtime_error{"Failed in file " + (file + + (", in function \"" + (function + + ("\", on line " + std::to_string(line))))) + + ", with error: " + msg}; + } + + template + inline + void throwing_errno_check(bool bad, const char (&file)[m], + const char (&function)[n], int line) { + if (!bad) return; + + throw std::runtime_error{"Failed in file " + (file + + (", in function \"" + (function + + ("\", on line " + std::to_string(line))))) + + ", with error: " + strerror(errno)}; + } + +} // Unnamed namespace. + //------------------------------------------------------------------------------------------------- //------------------------------------------------------------------------------------------------- // Events @@ -49,6 +117,43 @@ void ihipEvent_t::attachToCompletionFuture(const hc::completion_future* cf, hipS } +static void createIpcEventShmemIfNeeded(ihipEventData_t &ecd) { + if (!ecd._ipc_name.empty()) return; + + // create random shmem name + char name_template[] = "/tmp/eventXXXXXX"; + int temp_fd = mkstemp(name_template); + throwing_errno_check(-1 == temp_fd, __FILE__, __func__, __LINE__); + + // copy shmem name into event data, reformat to use a single slash + ecd._ipc_name = name_template; + ecd._ipc_name.replace(0, 5, "/hip_"); + + // open shmem + ecd._ipc_fd = shm_open(ecd._ipc_name.c_str(), O_RDWR | O_CREAT, 0777); + throwing_errno_check(ecd._ipc_fd < 0, __FILE__, __func__, __LINE__); + + // size it + throwing_retval_check(0, ftruncate(ecd._ipc_fd, sizeof(ihipIpcEventShmem_t)), __FILE__, __func__, __LINE__); + + // mmap it + ecd._ipc_shmem = (ihipIpcEventShmem_t*)mmap(0, sizeof(ihipIpcEventShmem_t), PROT_READ | PROT_WRITE, MAP_SHARED, ecd._ipc_fd, 0); + throwing_errno_check(NULL == ecd._ipc_shmem, __FILE__, __func__, __LINE__); + + // initialize shared state + ecd._ipc_shmem->owners = 1; + ecd._ipc_shmem->read_index = -1; + ecd._ipc_shmem->write_index = 0; + for (int i=0; i < IPC_SIGNALS_PER_EVENT; i++) { + ecd._ipc_shmem->signal[i] = 0; + } + + // remove temp file + throwing_errno_check(-1 == close(temp_fd), __FILE__, __func__, __LINE__); + throwing_errno_check(-1 == unlink(name_template), __FILE__, __func__, __LINE__); +} + + static std::pair refreshEventStatus(ihipEventData_t &ecd) { if (ecd._state == hipEventStatusRecording && ecd.marker().is_ready()) { if ((ecd._type == hipEventTypeIndependent) || @@ -75,9 +180,9 @@ static std::pair refreshEventStatus(ihipEventData_t hipError_t ihipEventCreate(hipEvent_t* event, unsigned flags) { hipError_t e = hipSuccess; - // TODO-IPC - support hipEventInterprocess. unsigned supportedFlags = hipEventDefault | hipEventBlockingSync | hipEventDisableTiming | - hipEventReleaseToDevice | hipEventReleaseToSystem; + hipEventReleaseToDevice | hipEventReleaseToSystem | + hipEventInterprocess; const unsigned releaseFlags = (hipEventReleaseToDevice | hipEventReleaseToSystem); const bool illegalFlags = @@ -105,29 +210,66 @@ hipError_t hipEventCreate(hipEvent_t* event) { return ihipLogStatus(ihipEventCreate(event, 0)); } - hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) { HIP_INIT_SPECIAL_API(hipEventRecord, TRACE_SYNC, event, stream); if (!event) return ihipLogStatus(hipErrorInvalidHandle); stream = ihipSyncAndResolveStream(stream); LockedAccessor_EventCrit_t eCrit(event->criticalData()); - if (eCrit->_eventData._state == hipEventStatusUnitialized) return ihipLogStatus(hipErrorInvalidHandle); + auto &ecd{eCrit->_eventData}; + if (ecd._state == hipEventStatusUnitialized) return ihipLogStatus(hipErrorInvalidHandle); if (HIP_SYNC_NULL_STREAM && stream->isDefaultStream()) { // TODO-HIP_SYNC_NULL_STREAM : can remove this code when HIP_SYNC_NULL_STREAM = 0 // If default stream , then wait on all queues. ihipCtx_t* ctx = ihipGetTlsDefaultCtx(); ctx->locked_syncDefaultStream(true, true); - eCrit->_eventData.marker(hc::completion_future()); // reset event - eCrit->_eventData._stream = stream; - eCrit->_eventData._timestamp = hc::get_system_ticks(); - eCrit->_eventData._state = hipEventStatusComplete; + ecd.marker(hc::completion_future()); // reset event + ecd._stream = stream; + ecd._timestamp = hc::get_system_ticks(); + ecd._state = hipEventStatusComplete; + // TODO handle IPC case? } else { // Record the event in the stream: - eCrit->_eventData.marker(stream->locked_recordEvent(event)); - eCrit->_eventData._stream = stream; - eCrit->_eventData._timestamp = 0; - eCrit->_eventData._state = hipEventStatusRecording; + ecd.marker(stream->locked_recordEvent(event)); + ecd._stream = stream; + ecd._timestamp = 0; + ecd._state = hipEventStatusRecording; + if (event->_flags & hipEventInterprocess) { + createIpcEventShmemIfNeeded(ecd); + int write_index = ecd._ipc_shmem->write_index++; // fetch add + int offset = write_index % IPC_SIGNALS_PER_EVENT; + // While event still valid and still locked, spin. + while (ecd._ipc_shmem->signal[offset] != 0) { + // TODO backoff + } + // Lock signal. + ecd._ipc_shmem->signal[offset] = 1; + // forward signal state from local signal to IPC signal via host callback + // create callback that can be passed to hsa_amd_signal_async_handler + // this function decrements the IPC signal by 1 to indicate completion + std::atomic *signal = &ecd._ipc_shmem->signal[offset]; + auto t{new std::function{[=]() { + signal->store(0); + }}}; + // register above callback with HSA runtime to be called when local signal + // is decremented from 1 to 0 by CP + auto local_signal = *reinterpret_cast(eCrit->_eventData.marker().get_native_handle()); + hsa_amd_signal_async_handler(local_signal, HSA_SIGNAL_CONDITION_LT, 1, + [](hsa_signal_value_t x, void* p) { + (*static_cast(p))(); + delete static_cast(p); + return false; + }, t); + // Update read index to indicate new signal. + int expected = write_index-1; + while (!ecd._ipc_shmem->read_index.compare_exchange_weak(expected, write_index)) { + throwing_msg_check( + expected >= write_index, + "IPC event record update read index failure", + __FILE__, __func__, __LINE__); + expected = write_index-1; + } + } } return ihipLogStatus(hipSuccess); } @@ -137,8 +279,18 @@ hipError_t hipEventDestroy(hipEvent_t event) { HIP_INIT_API(hipEventDestroy, event); if (event) { + { + LockedAccessor_EventCrit_t crit(event->criticalData()); + auto &ecd{crit->_eventData}; + if (ecd._ipc_shmem) { + int owners = --ecd._ipc_shmem->owners; + throwing_errno_check(-1 == munmap(ecd._ipc_shmem, sizeof(ihipIpcEventShmem_t)), __FILE__, __func__, __LINE__); + throwing_errno_check(-1 == close(ecd._ipc_fd), __FILE__, __func__, __LINE__); + if (0 == owners) + throwing_errno_check(-1 == shm_unlink(ecd._ipc_name.c_str()), __FILE__, __func__, __LINE__); + } + } delete event; - return ihipLogStatus(hipSuccess); } else { return ihipLogStatus(hipErrorInvalidHandle); @@ -148,31 +300,44 @@ hipError_t hipEventDestroy(hipEvent_t event) { hipError_t hipEventSynchronize(hipEvent_t event) { HIP_INIT_SPECIAL_API(hipEventSynchronize, TRACE_SYNC, event); - if (event){ - if (!(event->_flags & hipEventReleaseToSystem)) { - tprintf(DB_WARN, - "hipEventSynchronize on event without system-scope fence ; consider creating with " - "hipEventReleaseToSystem\n"); - } - auto ecd = event->locked_copyCrit(); + if (!event) return ihipLogStatus(hipErrorInvalidHandle); - if (ecd._state == hipEventStatusUnitialized) { - return ihipLogStatus(hipErrorInvalidHandle); - } else if (ecd._state == hipEventStatusCreated) { - // Created but not actually recorded on any device: - return ihipLogStatus(hipSuccess); - } else if (HIP_SYNC_NULL_STREAM && (ecd._stream->isDefaultStream())) { - auto* ctx = ihipGetTlsDefaultCtx(); - // TODO-HIP_SYNC_NULL_STREAM - can remove this code - ctx->locked_syncDefaultStream(true, true); - return ihipLogStatus(hipSuccess); - } else { - ecd.marker().wait((event->_flags & hipEventBlockingSync) ? hc::hcWaitModeBlocked - : hc::hcWaitModeActive); - return ihipLogStatus(hipSuccess); + if (!(event->_flags & hipEventReleaseToSystem)) { + tprintf(DB_WARN, + "hipEventSynchronize on event without system-scope fence ; consider creating with " + "hipEventReleaseToSystem\n"); + } + + auto ecd = event->locked_copyCrit(); + + if (event->_flags & hipEventInterprocess) { + // this is an IPC event + int previous_read_index = ecd._ipc_shmem->read_index; + if (previous_read_index >= 0) { + // we have at least one recorded event, so proceed + int offset = previous_read_index % IPC_SIGNALS_PER_EVENT; + // While event still valid and still locked, spin. + while (ecd._ipc_shmem->read_index < previous_read_index+IPC_SIGNALS_PER_EVENT && ecd._ipc_shmem->signal[offset] != 0) { + // TODO backoff + } } - } else { + return ihipLogStatus(hipSuccess); + } + + if (ecd._state == hipEventStatusUnitialized) { return ihipLogStatus(hipErrorInvalidHandle); + } else if (ecd._state == hipEventStatusCreated) { + // Created but not actually recorded on any device: + return ihipLogStatus(hipSuccess); + } else if (HIP_SYNC_NULL_STREAM && (ecd._stream->isDefaultStream())) { + auto* ctx = ihipGetTlsDefaultCtx(); + // TODO-HIP_SYNC_NULL_STREAM - can remove this code + ctx->locked_syncDefaultStream(true, true); + return ihipLogStatus(hipSuccess); + } else { + ecd.marker().wait((event->_flags & hipEventBlockingSync) ? hc::hcWaitModeBlocked + : hc::hcWaitModeActive); + return ihipLogStatus(hipSuccess); } } @@ -239,9 +404,80 @@ hipError_t hipEventQuery(hipEvent_t event) { auto ecd = event->locked_copyCrit(); - if (ecd._state == hipEventStatusRecording && !ecd.marker().is_ready()) { - return ihipLogStatus(hipErrorNotReady); + // this event is either from an ipc handle, or the owner of a local ipc event + if (event->_flags & hipEventInterprocess) { + if (ecd._ipc_shmem) { + int previous_read_index = ecd._ipc_shmem->read_index; + int offset = previous_read_index % IPC_SIGNALS_PER_EVENT; + if (ecd._ipc_shmem->read_index < previous_read_index+IPC_SIGNALS_PER_EVENT && ecd._ipc_shmem->signal[offset] != 0) { + return ihipLogStatus(hipErrorNotReady); + } + else { + return ihipLogStatus(hipSuccess); + } + } + } + // normal event + else { + if (ecd._state == hipEventStatusRecording && !ecd.marker().is_ready()) { + return ihipLogStatus(hipErrorNotReady); + } } return ihipLogStatus(hipSuccess); } + +hipError_t hipIpcGetEventHandle(hipIpcEventHandle_t* handle, hipEvent_t event) +{ + HIP_INIT_API(hipIpcGetEventHandle, handle, event); + +#if USE_IPC && ATOMIC_INT_LOCK_FREE == 2 + if (!handle) return ihipLogStatus(hipErrorInvalidHandle); + if (!event) return ihipLogStatus(hipErrorInvalidHandle); + if (!(event->_flags & hipEventInterprocess)) return ihipLogStatus(hipErrorInvalidHandle); + if (!(event->_flags & hipEventDisableTiming)) return ihipLogStatus(hipErrorInvalidHandle); + + LockedAccessor_EventCrit_t crit(event->criticalData()); + + auto &ecd{crit->_eventData}; + createIpcEventShmemIfNeeded(ecd); + // copy name into handle + ihipIpcEventHandle_t* iHandle = (ihipIpcEventHandle_t*)handle; + memset(iHandle->shmem_name, 0, HIP_IPC_HANDLE_SIZE); + ecd._ipc_name.copy(iHandle->shmem_name, std::string::npos); + + return ihipLogStatus(hipSuccess); +#else + return ihipLogStatus(hipErrorNotSupported); +#endif +} + +hipError_t hipIpcOpenEventHandle(hipEvent_t* event, hipIpcEventHandle_t handle) +{ + HIP_INIT_API(hipIpcOpenEventHandle, event, &handle); + +#if USE_IPC && ATOMIC_INT_LOCK_FREE == 2 + if (!event) return ihipLogStatus(hipErrorInvalidHandle); + + // create a new event with timing disabled, per spec + auto hip_status = ihipEventCreate(event, hipEventDisableTiming | hipEventInterprocess); + if (hip_status != hipSuccess) return ihipLogStatus(hip_status); + + LockedAccessor_EventCrit_t crit((*event)->criticalData()); + auto &ecd{crit->_eventData}; + ihipIpcEventHandle_t* iHandle = (ihipIpcEventHandle_t*)&handle; + ecd._ipc_name = iHandle->shmem_name; + // open shmem + ecd._ipc_fd = shm_open(ecd._ipc_name.c_str(), O_RDWR, 0777); + throwing_errno_check(ecd._ipc_fd < 0, __FILE__, __func__, __LINE__); + // mmap it + ecd._ipc_shmem = (ihipIpcEventShmem_t*)mmap(0, sizeof(ihipIpcEventShmem_t), PROT_READ | PROT_WRITE, MAP_SHARED, ecd._ipc_fd, 0); + throwing_errno_check(NULL == ecd._ipc_shmem, __FILE__, __func__, __LINE__); + // update shared state + ecd._ipc_shmem->owners += 1; + + return ihipLogStatus(hipSuccess); +#else + return ihipLogStatus(hipErrorNotSupported); +#endif +} diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index 2fd40903d7..5159254d57 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -332,12 +332,55 @@ void ihipStream_t::locked_wait() { locked_wait(waited); }; +typedef struct { + int previous_read_index; + ihipIpcEventShmem_t *shmem; + hsa_signal_t signal; +} callback_data_t; + +static void WaitThenDecrementSignal(callback_data_t *data) { + int offset = data->previous_read_index % IPC_SIGNALS_PER_EVENT; + // While event valid and locked, spin. + while (data->shmem->read_index < data->previous_read_index+IPC_SIGNALS_PER_EVENT && data->shmem->signal[offset] != 0) { + } + hsa_signal_store_relaxed(data->signal, 0); + delete data; +} + // Causes current stream to wait for specified event to complete: // Note this does not provide any kind of host serialization. void ihipStream_t::locked_streamWaitEvent(ihipEventData_t& ecd) { LockedAccessor_StreamCrit_t crit(_criticalData); - crit->_av.create_blocking_marker(ecd.marker(), hc::accelerator_scope); + // if event is an IPC event, it doesn't have a marker + // we use a host callback to block stream with a signal wait + if (ecd._ipc_shmem) { + // create first marker + auto cf = crit->_av.create_marker(hc::no_scope); + // get its signal + auto signal = *reinterpret_cast(cf.get_native_handle()); + // increment its signal value + hsa_signal_add_relaxed(signal, 1); + + // create callback that can be passed to hsa_amd_signal_async_handler + // this function will host wait on IPC signal, then sets first packet's signal to 0 to indicate completion + auto t{new callback_data_t{ecd._ipc_shmem->read_index, ecd._ipc_shmem, signal}}; + + // register above callback with HSA runtime to be called when first packet's signal + // is decremented from 2 to 1 by CP (or it is already at 1) + // the HSA async handler is single threaded, so we can't block, therefore use a detached thread + hsa_amd_signal_async_handler(signal, HSA_SIGNAL_CONDITION_EQ, 1, + [](hsa_signal_value_t x, void* p) { + std::thread(WaitThenDecrementSignal, static_cast(p)).detach(); + return false; + }, t); + + // create additional marker that blocks on the first one + crit->_av.create_blocking_marker(cf, hc::accelerator_scope); + } + else { + crit->_av.create_blocking_marker(ecd.marker(), hc::accelerator_scope); + } } diff --git a/src/hip_hcc_internal.h b/src/hip_hcc_internal.h index 2c3fb25b3a..b1777955aa 100644 --- a/src/hip_hcc_internal.h +++ b/src/hip_hcc_internal.h @@ -389,18 +389,28 @@ const hipStream_t hipStreamNull = 0x0; /** - * HIP IPC Handle Size + * HIP IPC Mem Handle Size */ -#define HIP_IPC_RESERVED_SIZE 24 +#define HIP_IPC_MEM_RESERVED_SIZE 24 class ihipIpcMemHandle_t { public: #if USE_IPC hsa_amd_ipc_memory_t ipc_handle; ///< ipc memory handle on ROCr #endif size_t psize; - char reserved[HIP_IPC_RESERVED_SIZE]; + char reserved[HIP_IPC_MEM_RESERVED_SIZE]; }; +/** + * HIP IPC Event Handle Size + */ +#define HIP_IPC_EVENT_RESERVED_SIZE 32 +class ihipIpcEventHandle_t { + public: +#if USE_IPC + char shmem_name[HIP_IPC_HANDLE_SIZE]; +#endif +}; struct ihipModule_t { std::string fileName; @@ -686,6 +696,14 @@ enum ihipEventType_t { hipEventTypeStopCommand, }; +#define IPC_SIGNALS_PER_EVENT 32 +typedef struct ihipIpcEventShmem_s { + std::atomic owners; + std::atomic read_index; + std::atomic write_index; + std::atomic signal[IPC_SIGNALS_PER_EVENT]; +} ihipIpcEventShmem_t; + struct ihipEventData_t { ihipEventData_t() { @@ -693,18 +711,24 @@ struct ihipEventData_t { _stream = NULL; _timestamp = 0; _type = hipEventTypeIndependent; + _ipc_name = ""; + _ipc_fd = 0; + _ipc_shmem = NULL; }; - void marker(const hc::completion_future& marker) { _marker = marker; }; + void marker(const hc::completion_future& marker) { _marker = marker; } hc::completion_future& marker() { return _marker; } - uint64_t timestamp() const { return _timestamp; }; - ihipEventType_t type() const { return _type; }; + uint64_t timestamp() const { return _timestamp; } + ihipEventType_t type() const { return _type; } ihipEventType_t _type; hipEventStatus_t _state; hipStream_t _stream; // Stream where the event is recorded. Null stream is resolved to actual // stream when recorded uint64_t _timestamp; // store timestamp, may be set on host or by marker. + std::string _ipc_name; + int _ipc_fd; + ihipIpcEventShmem_t *_ipc_shmem; private: hc::completion_future _marker; }; @@ -716,7 +740,7 @@ template class ihipEventCriticalBase_t : LockedBase { public: explicit ihipEventCriticalBase_t(const ihipEvent_t* parentEvent) : _parent(parentEvent) {} - ~ihipEventCriticalBase_t(){}; + ~ihipEventCriticalBase_t() {} // Keep data in structure so it can be easily copied into snapshots // (used to reduce lock contention and preserve correct lock order) diff --git a/src/hip_stream.cpp b/src/hip_stream.cpp index 63551d1204..5b56b71cd8 100644 --- a/src/hip_stream.cpp +++ b/src/hip_stream.cpp @@ -63,11 +63,11 @@ hipError_t ihipStreamCreate(TlsData *tls, hipStream_t* stream, unsigned int flag // TODO - se try-catch loop to detect memory exception? // - // Note this is an execute_any_order queue, + // Note this is an execute_any_order queue, // CUDA stream behavior is that all kernels submitted will automatically - // wait for prev to complete, this behaviour will be mainatined by - // hipModuleLaunchKernel. execute_any_order will help - // hipExtModuleLaunchKernel , which uses a special flag + // wait for prev to complete, this behaviour will be mainatined by + // hipModuleLaunchKernel. execute_any_order will help + // hipExtModuleLaunchKernel , which uses a special flag { // Obtain mutex access to the device critical data, release by destructor @@ -130,18 +130,19 @@ hipError_t hipDeviceGetStreamPriorityRange(int* leastPriority, int* greatestPrio hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags) { HIP_INIT_SPECIAL_API(hipStreamWaitEvent, TRACE_SYNC, stream, event, flags); - hipError_t e = hipSuccess; + if (!event) return ihipLogStatus(hipErrorInvalidHandle); - if (event == nullptr) { - e = hipErrorInvalidHandle; - - } else { - auto ecd = event->locked_copyCrit(); + auto ecd = event->locked_copyCrit(); + if (event->_flags & hipEventInterprocess) { + // this is an IPC event + if (ecd._ipc_shmem->read_index >= 0) { + // we have at least one recorded event, so proceed + stream->locked_streamWaitEvent(ecd); + } + } + else { if ((ecd._state != hipEventStatusUnitialized) && (ecd._state != hipEventStatusCreated)) { if (HIP_SYNC_STREAM_WAIT || (HIP_SYNC_NULL_STREAM && (stream == 0))) { - // conservative wait on host for the specified event to complete: - // return _stream->locked_eventWaitComplete(this, waitMode); - // ecd.marker().wait((event->_flags & hipEventBlockingSync) ? hc::hcWaitModeBlocked : hc::hcWaitModeActive); } else { @@ -150,9 +151,9 @@ hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int stream->locked_streamWaitEvent(ecd); } } - } // else event not recorded, return immediately and don't create marker. + } - return ihipLogStatus(e); + return ihipLogStatus(hipSuccess); }; diff --git a/src/trace_helper.h b/src/trace_helper.h index 84f218a438..202a302f70 100644 --- a/src/trace_helper.h +++ b/src/trace_helper.h @@ -71,6 +71,11 @@ inline std::string ToString(hipEvent_t v) { ss << v; return ss.str(); }; +// hipIpcEventHandle_t specialization. TODO +template <> +inline std::string ToString(hipIpcEventHandle_t v) { + return std::string{}; +}; // hipStream_t template <> inline std::string ToString(hipStream_t v) { diff --git a/tests/src/runtimeApi/event/hipEventIpc.cpp b/tests/src/runtimeApi/event/hipEventIpc.cpp new file mode 100644 index 0000000000..b62e0a16aa --- /dev/null +++ b/tests/src/runtimeApi/event/hipEventIpc.cpp @@ -0,0 +1,112 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +// Test hipEventRecord serialization behavior. +// Through manual inspection of the reported timestamps, can determine if recording a NULL event +// forces synchronization : set + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc + * TEST: %t --iterations 10 + * HIT_END + */ + +#include "hip/hip_runtime.h" +#include "test_common.h" + +int main(int argc, char* argv[]) { + HipTest::parseStandardArguments(argc, argv, true); + + unsigned blocks = (N + threadsPerBlock - 1) / threadsPerBlock; + if (blocks > 1024) blocks = 1024; + if (blocks == 0) blocks = 1; + + printf("N=%zu (A+B+C= %6.1f MB total) blocks=%u threadsPerBlock=%u iterations=%d\n", N, + ((double)3 * N * sizeof(float)) / 1024 / 1024, blocks, threadsPerBlock, iterations); + printf("iterations=%d\n", iterations); + + size_t Nbytes = N * sizeof(float); + + float *A_h, *B_h, *C_h; + float *A_d, *B_d, *C_d; + HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N); + + hipEvent_t start, stop; + + // NULL stream check: + HIPCHECK(hipEventCreateWithFlags(&start, hipEventDisableTiming|hipEventInterprocess)); + HIPCHECK(hipEventCreateWithFlags(&stop, hipEventDisableTiming|hipEventInterprocess)); + + + HIPCHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); + HIPCHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice)); + + + for (int i = 0; i < iterations; i++) { + //--- START TIMED REGION + long long hostStart = HipTest::get_time(); + // Record the start event + HIPCHECK(hipEventRecord(start, NULL)); + + hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, + static_cast(A_d), static_cast(B_d), C_d, N); + + + HIPCHECK(hipEventRecord(stop, NULL)); + HIPCHECK(hipEventSynchronize(stop)); + HIPCHECK(hipEventQuery(stop)); + long long hostStop = HipTest::get_time(); + //--- STOP TIMED REGION + + + float eventMs = 1.0f; + // should fail + HIPASSERT(hipSuccess != hipEventElapsedTime(&eventMs, start, stop)); + float hostMs = HipTest::elapsed_time(hostStart, hostStop); + + printf("host_time (gettimeofday) =%6.3fms\n", hostMs); + printf("kernel_time (hipEventElapsedTime) =%6.3fms\n", eventMs); + printf("\n"); + + } + + hipIpcEventHandle_t ipc_handle; + HIPCHECK(hipIpcGetEventHandle(&ipc_handle, start)); + + hipEvent_t ipc_event; + HIPCHECK(hipIpcOpenEventHandle(&ipc_event, ipc_handle)); + + HIPCHECK(hipEventSynchronize(ipc_event)); + + HIPCHECK(hipEventDestroy(ipc_event)); + HIPCHECK(hipEventDestroy(start)); + HIPCHECK(hipEventDestroy(stop)); + + HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); + + + printf("check:\n"); + + HipTest::checkVectorADD(A_h, B_h, C_h, N, true); + + + passed(); +}