add IPC event support (#1996)

Este commit está contenido en:
Jeff Daily
2020-04-16 22:01:22 -07:00
cometido por GitHub
padre 0ccb7f08a5
commit ef596cd088
Se han modificado 7 ficheros con 486 adiciones y 72 borrados
+5 -12
Ver fichero
@@ -97,8 +97,6 @@ typedef int hipDevice_t;
typedef struct ihipStream_t* hipStream_t;
// TODO: IPC implementation
#define hipIpcMemLazyEnablePeerAccess 0
#define HIP_IPC_HANDLE_SIZE 64
@@ -107,12 +105,9 @@ typedef struct hipIpcMemHandle_st {
char reserved[HIP_IPC_HANDLE_SIZE];
} hipIpcMemHandle_t;
// TODO: IPC event handle currently unsupported
struct ihipIpcEventHandle_t;
typedef struct ihipIpcEventHandle_t* hipIpcEventHandle_t;
// END TODO
typedef struct hipIpcEventHandle_st {
char reserved[HIP_IPC_HANDLE_SIZE];
} hipIpcEventHandle_t;
typedef struct ihipModule_t* hipModule_t;
@@ -3154,10 +3149,8 @@ hipError_t hipIpcOpenMemHandle(void** devPtr, hipIpcMemHandle_t handle, unsigned
hipError_t hipIpcCloseMemHandle(void* devPtr);
// hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* devPtr);
// hipError_t hipIpcCloseMemHandle(void *devPtr);
// // hipError_t hipIpcOpenEventHandle(hipEvent_t* event, hipIpcEventHandle_t handle);
// hipError_t hipIpcOpenMemHandle(void** devPtr, hipIpcMemHandle_t handle, unsigned int flags);
hipError_t hipIpcGetEventHandle(hipIpcEventHandle_t* handle, hipEvent_t event);
hipError_t hipIpcOpenEventHandle(hipEvent_t* event, hipIpcEventHandle_t handle);
/**
+273 -37
Ver fichero
@@ -24,6 +24,74 @@ THE SOFTWARE.
#include "hip_hcc_internal.h"
#include "trace_helper.h"
#include <errno.h> // errno, ENOENT
#include <fcntl.h> // O_RDWR, O_CREATE
#include <sys/mman.h> // shm_open, shm_unlink, mmap, munmap, PROT_READ, PROT_WRITE, MAP_SHARED, MAP_FAILED
#include <unistd.h> // ftruncate, close
namespace {
inline
const char* hsa_to_string(hsa_status_t err) noexcept
{
const char* r{};
if (hsa_status_string(err, &r) == HSA_STATUS_SUCCESS) return r;
return "Unknown.";
}
template<std::size_t m, std::size_t n>
inline
void throwing_result_check(hsa_status_t res, const char (&file)[m],
const char (&function)[n], int line) {
if (res == HSA_STATUS_SUCCESS) return;
throw std::runtime_error{"Failed in file " + (file +
(", in function \"" + (function +
("\", on line " + std::to_string(line))))) +
", with error: " + hsa_to_string(res)};
}
template<std::size_t m, std::size_t n>
inline
void throwing_retval_check(int good, int retval, const char (&file)[m],
const char (&function)[n], int line) {
if (retval == good) return;
throw std::runtime_error{"Failed in file " + (file +
(", in function \"" + (function +
("\", on line " + std::to_string(line))))) +
", with error: " + strerror(retval)};
}
template<std::size_t m, std::size_t n, std::size_t o>
inline
void throwing_msg_check(bool bad, const char (&msg)[o],
const char (&file)[m],
const char (&function)[n], int line) {
if (!bad) return;
throw std::runtime_error{"Failed in file " + (file +
(", in function \"" + (function +
("\", on line " + std::to_string(line))))) +
", with error: " + msg};
}
template<std::size_t m, std::size_t n>
inline
void throwing_errno_check(bool bad, const char (&file)[m],
const char (&function)[n], int line) {
if (!bad) return;
throw std::runtime_error{"Failed in file " + (file +
(", in function \"" + (function +
("\", on line " + std::to_string(line))))) +
", with error: " + strerror(errno)};
}
} // Unnamed namespace.
//-------------------------------------------------------------------------------------------------
//-------------------------------------------------------------------------------------------------
// Events
@@ -49,6 +117,43 @@ void ihipEvent_t::attachToCompletionFuture(const hc::completion_future* cf, hipS
}
static void createIpcEventShmemIfNeeded(ihipEventData_t &ecd) {
if (!ecd._ipc_name.empty()) return;
// create random shmem name
char name_template[] = "/tmp/eventXXXXXX";
int temp_fd = mkstemp(name_template);
throwing_errno_check(-1 == temp_fd, __FILE__, __func__, __LINE__);
// copy shmem name into event data, reformat to use a single slash
ecd._ipc_name = name_template;
ecd._ipc_name.replace(0, 5, "/hip_");
// open shmem
ecd._ipc_fd = shm_open(ecd._ipc_name.c_str(), O_RDWR | O_CREAT, 0777);
throwing_errno_check(ecd._ipc_fd < 0, __FILE__, __func__, __LINE__);
// size it
throwing_retval_check(0, ftruncate(ecd._ipc_fd, sizeof(ihipIpcEventShmem_t)), __FILE__, __func__, __LINE__);
// mmap it
ecd._ipc_shmem = (ihipIpcEventShmem_t*)mmap(0, sizeof(ihipIpcEventShmem_t), PROT_READ | PROT_WRITE, MAP_SHARED, ecd._ipc_fd, 0);
throwing_errno_check(NULL == ecd._ipc_shmem, __FILE__, __func__, __LINE__);
// initialize shared state
ecd._ipc_shmem->owners = 1;
ecd._ipc_shmem->read_index = -1;
ecd._ipc_shmem->write_index = 0;
for (int i=0; i < IPC_SIGNALS_PER_EVENT; i++) {
ecd._ipc_shmem->signal[i] = 0;
}
// remove temp file
throwing_errno_check(-1 == close(temp_fd), __FILE__, __func__, __LINE__);
throwing_errno_check(-1 == unlink(name_template), __FILE__, __func__, __LINE__);
}
static std::pair<hipEventStatus_t, uint64_t> refreshEventStatus(ihipEventData_t &ecd) {
if (ecd._state == hipEventStatusRecording && ecd.marker().is_ready()) {
if ((ecd._type == hipEventTypeIndependent) ||
@@ -75,9 +180,9 @@ static std::pair<hipEventStatus_t, uint64_t> refreshEventStatus(ihipEventData_t
hipError_t ihipEventCreate(hipEvent_t* event, unsigned flags) {
hipError_t e = hipSuccess;
// TODO-IPC - support hipEventInterprocess.
unsigned supportedFlags = hipEventDefault | hipEventBlockingSync | hipEventDisableTiming |
hipEventReleaseToDevice | hipEventReleaseToSystem;
hipEventReleaseToDevice | hipEventReleaseToSystem |
hipEventInterprocess;
const unsigned releaseFlags = (hipEventReleaseToDevice | hipEventReleaseToSystem);
const bool illegalFlags =
@@ -105,29 +210,66 @@ hipError_t hipEventCreate(hipEvent_t* event) {
return ihipLogStatus(ihipEventCreate(event, 0));
}
hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) {
HIP_INIT_SPECIAL_API(hipEventRecord, TRACE_SYNC, event, stream);
if (!event) return ihipLogStatus(hipErrorInvalidHandle);
stream = ihipSyncAndResolveStream(stream);
LockedAccessor_EventCrit_t eCrit(event->criticalData());
if (eCrit->_eventData._state == hipEventStatusUnitialized) return ihipLogStatus(hipErrorInvalidHandle);
auto &ecd{eCrit->_eventData};
if (ecd._state == hipEventStatusUnitialized) return ihipLogStatus(hipErrorInvalidHandle);
if (HIP_SYNC_NULL_STREAM && stream->isDefaultStream()) {
// TODO-HIP_SYNC_NULL_STREAM : can remove this code when HIP_SYNC_NULL_STREAM = 0
// If default stream , then wait on all queues.
ihipCtx_t* ctx = ihipGetTlsDefaultCtx();
ctx->locked_syncDefaultStream(true, true);
eCrit->_eventData.marker(hc::completion_future()); // reset event
eCrit->_eventData._stream = stream;
eCrit->_eventData._timestamp = hc::get_system_ticks();
eCrit->_eventData._state = hipEventStatusComplete;
ecd.marker(hc::completion_future()); // reset event
ecd._stream = stream;
ecd._timestamp = hc::get_system_ticks();
ecd._state = hipEventStatusComplete;
// TODO handle IPC case?
}
else {
// Record the event in the stream:
eCrit->_eventData.marker(stream->locked_recordEvent(event));
eCrit->_eventData._stream = stream;
eCrit->_eventData._timestamp = 0;
eCrit->_eventData._state = hipEventStatusRecording;
ecd.marker(stream->locked_recordEvent(event));
ecd._stream = stream;
ecd._timestamp = 0;
ecd._state = hipEventStatusRecording;
if (event->_flags & hipEventInterprocess) {
createIpcEventShmemIfNeeded(ecd);
int write_index = ecd._ipc_shmem->write_index++; // fetch add
int offset = write_index % IPC_SIGNALS_PER_EVENT;
// While event still valid and still locked, spin.
while (ecd._ipc_shmem->signal[offset] != 0) {
// TODO backoff
}
// Lock signal.
ecd._ipc_shmem->signal[offset] = 1;
// forward signal state from local signal to IPC signal via host callback
// create callback that can be passed to hsa_amd_signal_async_handler
// this function decrements the IPC signal by 1 to indicate completion
std::atomic<int> *signal = &ecd._ipc_shmem->signal[offset];
auto t{new std::function<void()>{[=]() {
signal->store(0);
}}};
// register above callback with HSA runtime to be called when local signal
// is decremented from 1 to 0 by CP
auto local_signal = *reinterpret_cast<hsa_signal_t*>(eCrit->_eventData.marker().get_native_handle());
hsa_amd_signal_async_handler(local_signal, HSA_SIGNAL_CONDITION_LT, 1,
[](hsa_signal_value_t x, void* p) {
(*static_cast<decltype(t)>(p))();
delete static_cast<decltype(t)>(p);
return false;
}, t);
// Update read index to indicate new signal.
int expected = write_index-1;
while (!ecd._ipc_shmem->read_index.compare_exchange_weak(expected, write_index)) {
throwing_msg_check(
expected >= write_index,
"IPC event record update read index failure",
__FILE__, __func__, __LINE__);
expected = write_index-1;
}
}
}
return ihipLogStatus(hipSuccess);
}
@@ -137,8 +279,18 @@ hipError_t hipEventDestroy(hipEvent_t event) {
HIP_INIT_API(hipEventDestroy, event);
if (event) {
{
LockedAccessor_EventCrit_t crit(event->criticalData());
auto &ecd{crit->_eventData};
if (ecd._ipc_shmem) {
int owners = --ecd._ipc_shmem->owners;
throwing_errno_check(-1 == munmap(ecd._ipc_shmem, sizeof(ihipIpcEventShmem_t)), __FILE__, __func__, __LINE__);
throwing_errno_check(-1 == close(ecd._ipc_fd), __FILE__, __func__, __LINE__);
if (0 == owners)
throwing_errno_check(-1 == shm_unlink(ecd._ipc_name.c_str()), __FILE__, __func__, __LINE__);
}
}
delete event;
return ihipLogStatus(hipSuccess);
} else {
return ihipLogStatus(hipErrorInvalidHandle);
@@ -148,31 +300,44 @@ hipError_t hipEventDestroy(hipEvent_t event) {
hipError_t hipEventSynchronize(hipEvent_t event) {
HIP_INIT_SPECIAL_API(hipEventSynchronize, TRACE_SYNC, event);
if (event){
if (!(event->_flags & hipEventReleaseToSystem)) {
tprintf(DB_WARN,
"hipEventSynchronize on event without system-scope fence ; consider creating with "
"hipEventReleaseToSystem\n");
}
auto ecd = event->locked_copyCrit();
if (!event) return ihipLogStatus(hipErrorInvalidHandle);
if (ecd._state == hipEventStatusUnitialized) {
return ihipLogStatus(hipErrorInvalidHandle);
} else if (ecd._state == hipEventStatusCreated) {
// Created but not actually recorded on any device:
return ihipLogStatus(hipSuccess);
} else if (HIP_SYNC_NULL_STREAM && (ecd._stream->isDefaultStream())) {
auto* ctx = ihipGetTlsDefaultCtx();
// TODO-HIP_SYNC_NULL_STREAM - can remove this code
ctx->locked_syncDefaultStream(true, true);
return ihipLogStatus(hipSuccess);
} else {
ecd.marker().wait((event->_flags & hipEventBlockingSync) ? hc::hcWaitModeBlocked
: hc::hcWaitModeActive);
return ihipLogStatus(hipSuccess);
if (!(event->_flags & hipEventReleaseToSystem)) {
tprintf(DB_WARN,
"hipEventSynchronize on event without system-scope fence ; consider creating with "
"hipEventReleaseToSystem\n");
}
auto ecd = event->locked_copyCrit();
if (event->_flags & hipEventInterprocess) {
// this is an IPC event
int previous_read_index = ecd._ipc_shmem->read_index;
if (previous_read_index >= 0) {
// we have at least one recorded event, so proceed
int offset = previous_read_index % IPC_SIGNALS_PER_EVENT;
// While event still valid and still locked, spin.
while (ecd._ipc_shmem->read_index < previous_read_index+IPC_SIGNALS_PER_EVENT && ecd._ipc_shmem->signal[offset] != 0) {
// TODO backoff
}
}
} else {
return ihipLogStatus(hipSuccess);
}
if (ecd._state == hipEventStatusUnitialized) {
return ihipLogStatus(hipErrorInvalidHandle);
} else if (ecd._state == hipEventStatusCreated) {
// Created but not actually recorded on any device:
return ihipLogStatus(hipSuccess);
} else if (HIP_SYNC_NULL_STREAM && (ecd._stream->isDefaultStream())) {
auto* ctx = ihipGetTlsDefaultCtx();
// TODO-HIP_SYNC_NULL_STREAM - can remove this code
ctx->locked_syncDefaultStream(true, true);
return ihipLogStatus(hipSuccess);
} else {
ecd.marker().wait((event->_flags & hipEventBlockingSync) ? hc::hcWaitModeBlocked
: hc::hcWaitModeActive);
return ihipLogStatus(hipSuccess);
}
}
@@ -239,9 +404,80 @@ hipError_t hipEventQuery(hipEvent_t event) {
auto ecd = event->locked_copyCrit();
if (ecd._state == hipEventStatusRecording && !ecd.marker().is_ready()) {
return ihipLogStatus(hipErrorNotReady);
// this event is either from an ipc handle, or the owner of a local ipc event
if (event->_flags & hipEventInterprocess) {
if (ecd._ipc_shmem) {
int previous_read_index = ecd._ipc_shmem->read_index;
int offset = previous_read_index % IPC_SIGNALS_PER_EVENT;
if (ecd._ipc_shmem->read_index < previous_read_index+IPC_SIGNALS_PER_EVENT && ecd._ipc_shmem->signal[offset] != 0) {
return ihipLogStatus(hipErrorNotReady);
}
else {
return ihipLogStatus(hipSuccess);
}
}
}
// normal event
else {
if (ecd._state == hipEventStatusRecording && !ecd.marker().is_ready()) {
return ihipLogStatus(hipErrorNotReady);
}
}
return ihipLogStatus(hipSuccess);
}
hipError_t hipIpcGetEventHandle(hipIpcEventHandle_t* handle, hipEvent_t event)
{
HIP_INIT_API(hipIpcGetEventHandle, handle, event);
#if USE_IPC && ATOMIC_INT_LOCK_FREE == 2
if (!handle) return ihipLogStatus(hipErrorInvalidHandle);
if (!event) return ihipLogStatus(hipErrorInvalidHandle);
if (!(event->_flags & hipEventInterprocess)) return ihipLogStatus(hipErrorInvalidHandle);
if (!(event->_flags & hipEventDisableTiming)) return ihipLogStatus(hipErrorInvalidHandle);
LockedAccessor_EventCrit_t crit(event->criticalData());
auto &ecd{crit->_eventData};
createIpcEventShmemIfNeeded(ecd);
// copy name into handle
ihipIpcEventHandle_t* iHandle = (ihipIpcEventHandle_t*)handle;
memset(iHandle->shmem_name, 0, HIP_IPC_HANDLE_SIZE);
ecd._ipc_name.copy(iHandle->shmem_name, std::string::npos);
return ihipLogStatus(hipSuccess);
#else
return ihipLogStatus(hipErrorNotSupported);
#endif
}
hipError_t hipIpcOpenEventHandle(hipEvent_t* event, hipIpcEventHandle_t handle)
{
HIP_INIT_API(hipIpcOpenEventHandle, event, &handle);
#if USE_IPC && ATOMIC_INT_LOCK_FREE == 2
if (!event) return ihipLogStatus(hipErrorInvalidHandle);
// create a new event with timing disabled, per spec
auto hip_status = ihipEventCreate(event, hipEventDisableTiming | hipEventInterprocess);
if (hip_status != hipSuccess) return ihipLogStatus(hip_status);
LockedAccessor_EventCrit_t crit((*event)->criticalData());
auto &ecd{crit->_eventData};
ihipIpcEventHandle_t* iHandle = (ihipIpcEventHandle_t*)&handle;
ecd._ipc_name = iHandle->shmem_name;
// open shmem
ecd._ipc_fd = shm_open(ecd._ipc_name.c_str(), O_RDWR, 0777);
throwing_errno_check(ecd._ipc_fd < 0, __FILE__, __func__, __LINE__);
// mmap it
ecd._ipc_shmem = (ihipIpcEventShmem_t*)mmap(0, sizeof(ihipIpcEventShmem_t), PROT_READ | PROT_WRITE, MAP_SHARED, ecd._ipc_fd, 0);
throwing_errno_check(NULL == ecd._ipc_shmem, __FILE__, __func__, __LINE__);
// update shared state
ecd._ipc_shmem->owners += 1;
return ihipLogStatus(hipSuccess);
#else
return ihipLogStatus(hipErrorNotSupported);
#endif
}
+44 -1
Ver fichero
@@ -332,12 +332,55 @@ void ihipStream_t::locked_wait() {
locked_wait(waited);
};
typedef struct {
int previous_read_index;
ihipIpcEventShmem_t *shmem;
hsa_signal_t signal;
} callback_data_t;
static void WaitThenDecrementSignal(callback_data_t *data) {
int offset = data->previous_read_index % IPC_SIGNALS_PER_EVENT;
// While event valid and locked, spin.
while (data->shmem->read_index < data->previous_read_index+IPC_SIGNALS_PER_EVENT && data->shmem->signal[offset] != 0) {
}
hsa_signal_store_relaxed(data->signal, 0);
delete data;
}
// Causes current stream to wait for specified event to complete:
// Note this does not provide any kind of host serialization.
void ihipStream_t::locked_streamWaitEvent(ihipEventData_t& ecd) {
LockedAccessor_StreamCrit_t crit(_criticalData);
crit->_av.create_blocking_marker(ecd.marker(), hc::accelerator_scope);
// if event is an IPC event, it doesn't have a marker
// we use a host callback to block stream with a signal wait
if (ecd._ipc_shmem) {
// create first marker
auto cf = crit->_av.create_marker(hc::no_scope);
// get its signal
auto signal = *reinterpret_cast<hsa_signal_t*>(cf.get_native_handle());
// increment its signal value
hsa_signal_add_relaxed(signal, 1);
// create callback that can be passed to hsa_amd_signal_async_handler
// this function will host wait on IPC signal, then sets first packet's signal to 0 to indicate completion
auto t{new callback_data_t{ecd._ipc_shmem->read_index, ecd._ipc_shmem, signal}};
// register above callback with HSA runtime to be called when first packet's signal
// is decremented from 2 to 1 by CP (or it is already at 1)
// the HSA async handler is single threaded, so we can't block, therefore use a detached thread
hsa_amd_signal_async_handler(signal, HSA_SIGNAL_CONDITION_EQ, 1,
[](hsa_signal_value_t x, void* p) {
std::thread(WaitThenDecrementSignal, static_cast<decltype(t)>(p)).detach();
return false;
}, t);
// create additional marker that blocks on the first one
crit->_av.create_blocking_marker(cf, hc::accelerator_scope);
}
else {
crit->_av.create_blocking_marker(ecd.marker(), hc::accelerator_scope);
}
}
+31 -7
Ver fichero
@@ -389,18 +389,28 @@ const hipStream_t hipStreamNull = 0x0;
/**
* HIP IPC Handle Size
* HIP IPC Mem Handle Size
*/
#define HIP_IPC_RESERVED_SIZE 24
#define HIP_IPC_MEM_RESERVED_SIZE 24
class ihipIpcMemHandle_t {
public:
#if USE_IPC
hsa_amd_ipc_memory_t ipc_handle; ///< ipc memory handle on ROCr
#endif
size_t psize;
char reserved[HIP_IPC_RESERVED_SIZE];
char reserved[HIP_IPC_MEM_RESERVED_SIZE];
};
/**
* HIP IPC Event Handle Size
*/
#define HIP_IPC_EVENT_RESERVED_SIZE 32
class ihipIpcEventHandle_t {
public:
#if USE_IPC
char shmem_name[HIP_IPC_HANDLE_SIZE];
#endif
};
struct ihipModule_t {
std::string fileName;
@@ -686,6 +696,14 @@ enum ihipEventType_t {
hipEventTypeStopCommand,
};
#define IPC_SIGNALS_PER_EVENT 32
typedef struct ihipIpcEventShmem_s {
std::atomic<int> owners;
std::atomic<int> read_index;
std::atomic<int> write_index;
std::atomic<int> signal[IPC_SIGNALS_PER_EVENT];
} ihipIpcEventShmem_t;
struct ihipEventData_t {
ihipEventData_t() {
@@ -693,18 +711,24 @@ struct ihipEventData_t {
_stream = NULL;
_timestamp = 0;
_type = hipEventTypeIndependent;
_ipc_name = "";
_ipc_fd = 0;
_ipc_shmem = NULL;
};
void marker(const hc::completion_future& marker) { _marker = marker; };
void marker(const hc::completion_future& marker) { _marker = marker; }
hc::completion_future& marker() { return _marker; }
uint64_t timestamp() const { return _timestamp; };
ihipEventType_t type() const { return _type; };
uint64_t timestamp() const { return _timestamp; }
ihipEventType_t type() const { return _type; }
ihipEventType_t _type;
hipEventStatus_t _state;
hipStream_t _stream; // Stream where the event is recorded. Null stream is resolved to actual
// stream when recorded
uint64_t _timestamp; // store timestamp, may be set on host or by marker.
std::string _ipc_name;
int _ipc_fd;
ihipIpcEventShmem_t *_ipc_shmem;
private:
hc::completion_future _marker;
};
@@ -716,7 +740,7 @@ template <typename MUTEX_TYPE>
class ihipEventCriticalBase_t : LockedBase<MUTEX_TYPE> {
public:
explicit ihipEventCriticalBase_t(const ihipEvent_t* parentEvent) : _parent(parentEvent) {}
~ihipEventCriticalBase_t(){};
~ihipEventCriticalBase_t() {}
// Keep data in structure so it can be easily copied into snapshots
// (used to reduce lock contention and preserve correct lock order)
+16 -15
Ver fichero
@@ -63,11 +63,11 @@ hipError_t ihipStreamCreate(TlsData *tls, hipStream_t* stream, unsigned int flag
// TODO - se try-catch loop to detect memory exception?
//
// Note this is an execute_any_order queue,
// Note this is an execute_any_order queue,
// CUDA stream behavior is that all kernels submitted will automatically
// wait for prev to complete, this behaviour will be mainatined by
// hipModuleLaunchKernel. execute_any_order will help
// hipExtModuleLaunchKernel , which uses a special flag
// wait for prev to complete, this behaviour will be mainatined by
// hipModuleLaunchKernel. execute_any_order will help
// hipExtModuleLaunchKernel , which uses a special flag
{
// Obtain mutex access to the device critical data, release by destructor
@@ -130,18 +130,19 @@ hipError_t hipDeviceGetStreamPriorityRange(int* leastPriority, int* greatestPrio
hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags) {
HIP_INIT_SPECIAL_API(hipStreamWaitEvent, TRACE_SYNC, stream, event, flags);
hipError_t e = hipSuccess;
if (!event) return ihipLogStatus(hipErrorInvalidHandle);
if (event == nullptr) {
e = hipErrorInvalidHandle;
} else {
auto ecd = event->locked_copyCrit();
auto ecd = event->locked_copyCrit();
if (event->_flags & hipEventInterprocess) {
// this is an IPC event
if (ecd._ipc_shmem->read_index >= 0) {
// we have at least one recorded event, so proceed
stream->locked_streamWaitEvent(ecd);
}
}
else {
if ((ecd._state != hipEventStatusUnitialized) && (ecd._state != hipEventStatusCreated)) {
if (HIP_SYNC_STREAM_WAIT || (HIP_SYNC_NULL_STREAM && (stream == 0))) {
// conservative wait on host for the specified event to complete:
// return _stream->locked_eventWaitComplete(this, waitMode);
//
ecd.marker().wait((event->_flags & hipEventBlockingSync) ? hc::hcWaitModeBlocked
: hc::hcWaitModeActive);
} else {
@@ -150,9 +151,9 @@ hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int
stream->locked_streamWaitEvent(ecd);
}
}
} // else event not recorded, return immediately and don't create marker.
}
return ihipLogStatus(e);
return ihipLogStatus(hipSuccess);
};
+5
Ver fichero
@@ -71,6 +71,11 @@ inline std::string ToString(hipEvent_t v) {
ss << v;
return ss.str();
};
// hipIpcEventHandle_t specialization. TODO
template <>
inline std::string ToString(hipIpcEventHandle_t v) {
return std::string{};
};
// hipStream_t
template <>
inline std::string ToString(hipStream_t v) {
+112
Ver fichero
@@ -0,0 +1,112 @@
/*
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
// Test hipEventRecord serialization behavior.
// Through manual inspection of the reported timestamps, can determine if recording a NULL event
// forces synchronization : set
/* HIT_START
* BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc
* TEST: %t --iterations 10
* HIT_END
*/
#include "hip/hip_runtime.h"
#include "test_common.h"
int main(int argc, char* argv[]) {
HipTest::parseStandardArguments(argc, argv, true);
unsigned blocks = (N + threadsPerBlock - 1) / threadsPerBlock;
if (blocks > 1024) blocks = 1024;
if (blocks == 0) blocks = 1;
printf("N=%zu (A+B+C= %6.1f MB total) blocks=%u threadsPerBlock=%u iterations=%d\n", N,
((double)3 * N * sizeof(float)) / 1024 / 1024, blocks, threadsPerBlock, iterations);
printf("iterations=%d\n", iterations);
size_t Nbytes = N * sizeof(float);
float *A_h, *B_h, *C_h;
float *A_d, *B_d, *C_d;
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N);
hipEvent_t start, stop;
// NULL stream check:
HIPCHECK(hipEventCreateWithFlags(&start, hipEventDisableTiming|hipEventInterprocess));
HIPCHECK(hipEventCreateWithFlags(&stop, hipEventDisableTiming|hipEventInterprocess));
HIPCHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
HIPCHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
for (int i = 0; i < iterations; i++) {
//--- START TIMED REGION
long long hostStart = HipTest::get_time();
// Record the start event
HIPCHECK(hipEventRecord(start, NULL));
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0,
static_cast<const float*>(A_d), static_cast<const float*>(B_d), C_d, N);
HIPCHECK(hipEventRecord(stop, NULL));
HIPCHECK(hipEventSynchronize(stop));
HIPCHECK(hipEventQuery(stop));
long long hostStop = HipTest::get_time();
//--- STOP TIMED REGION
float eventMs = 1.0f;
// should fail
HIPASSERT(hipSuccess != hipEventElapsedTime(&eventMs, start, stop));
float hostMs = HipTest::elapsed_time(hostStart, hostStop);
printf("host_time (gettimeofday) =%6.3fms\n", hostMs);
printf("kernel_time (hipEventElapsedTime) =%6.3fms\n", eventMs);
printf("\n");
}
hipIpcEventHandle_t ipc_handle;
HIPCHECK(hipIpcGetEventHandle(&ipc_handle, start));
hipEvent_t ipc_event;
HIPCHECK(hipIpcOpenEventHandle(&ipc_event, ipc_handle));
HIPCHECK(hipEventSynchronize(ipc_event));
HIPCHECK(hipEventDestroy(ipc_event));
HIPCHECK(hipEventDestroy(start));
HIPCHECK(hipEventDestroy(stop));
HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
printf("check:\n");
HipTest::checkVectorADD(A_h, B_h, C_h, N, true);
passed();
}