add IPC event support (#1996)
Este commit está contenido en:
@@ -97,8 +97,6 @@ typedef int hipDevice_t;
|
||||
|
||||
typedef struct ihipStream_t* hipStream_t;
|
||||
|
||||
// TODO: IPC implementation
|
||||
|
||||
#define hipIpcMemLazyEnablePeerAccess 0
|
||||
|
||||
#define HIP_IPC_HANDLE_SIZE 64
|
||||
@@ -107,12 +105,9 @@ typedef struct hipIpcMemHandle_st {
|
||||
char reserved[HIP_IPC_HANDLE_SIZE];
|
||||
} hipIpcMemHandle_t;
|
||||
|
||||
// TODO: IPC event handle currently unsupported
|
||||
struct ihipIpcEventHandle_t;
|
||||
typedef struct ihipIpcEventHandle_t* hipIpcEventHandle_t;
|
||||
|
||||
|
||||
// END TODO
|
||||
typedef struct hipIpcEventHandle_st {
|
||||
char reserved[HIP_IPC_HANDLE_SIZE];
|
||||
} hipIpcEventHandle_t;
|
||||
|
||||
typedef struct ihipModule_t* hipModule_t;
|
||||
|
||||
@@ -3154,10 +3149,8 @@ hipError_t hipIpcOpenMemHandle(void** devPtr, hipIpcMemHandle_t handle, unsigned
|
||||
hipError_t hipIpcCloseMemHandle(void* devPtr);
|
||||
|
||||
|
||||
// hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* devPtr);
|
||||
// hipError_t hipIpcCloseMemHandle(void *devPtr);
|
||||
// // hipError_t hipIpcOpenEventHandle(hipEvent_t* event, hipIpcEventHandle_t handle);
|
||||
// hipError_t hipIpcOpenMemHandle(void** devPtr, hipIpcMemHandle_t handle, unsigned int flags);
|
||||
hipError_t hipIpcGetEventHandle(hipIpcEventHandle_t* handle, hipEvent_t event);
|
||||
hipError_t hipIpcOpenEventHandle(hipEvent_t* event, hipIpcEventHandle_t handle);
|
||||
|
||||
|
||||
/**
|
||||
|
||||
+273
-37
@@ -24,6 +24,74 @@ THE SOFTWARE.
|
||||
#include "hip_hcc_internal.h"
|
||||
#include "trace_helper.h"
|
||||
|
||||
#include <errno.h> // errno, ENOENT
|
||||
#include <fcntl.h> // O_RDWR, O_CREATE
|
||||
#include <sys/mman.h> // shm_open, shm_unlink, mmap, munmap, PROT_READ, PROT_WRITE, MAP_SHARED, MAP_FAILED
|
||||
#include <unistd.h> // ftruncate, close
|
||||
|
||||
namespace {
|
||||
|
||||
inline
|
||||
const char* hsa_to_string(hsa_status_t err) noexcept
|
||||
{
|
||||
const char* r{};
|
||||
|
||||
if (hsa_status_string(err, &r) == HSA_STATUS_SUCCESS) return r;
|
||||
|
||||
return "Unknown.";
|
||||
}
|
||||
|
||||
template<std::size_t m, std::size_t n>
|
||||
inline
|
||||
void throwing_result_check(hsa_status_t res, const char (&file)[m],
|
||||
const char (&function)[n], int line) {
|
||||
if (res == HSA_STATUS_SUCCESS) return;
|
||||
|
||||
throw std::runtime_error{"Failed in file " + (file +
|
||||
(", in function \"" + (function +
|
||||
("\", on line " + std::to_string(line))))) +
|
||||
", with error: " + hsa_to_string(res)};
|
||||
}
|
||||
|
||||
template<std::size_t m, std::size_t n>
|
||||
inline
|
||||
void throwing_retval_check(int good, int retval, const char (&file)[m],
|
||||
const char (&function)[n], int line) {
|
||||
if (retval == good) return;
|
||||
|
||||
throw std::runtime_error{"Failed in file " + (file +
|
||||
(", in function \"" + (function +
|
||||
("\", on line " + std::to_string(line))))) +
|
||||
", with error: " + strerror(retval)};
|
||||
}
|
||||
|
||||
template<std::size_t m, std::size_t n, std::size_t o>
|
||||
inline
|
||||
void throwing_msg_check(bool bad, const char (&msg)[o],
|
||||
const char (&file)[m],
|
||||
const char (&function)[n], int line) {
|
||||
if (!bad) return;
|
||||
|
||||
throw std::runtime_error{"Failed in file " + (file +
|
||||
(", in function \"" + (function +
|
||||
("\", on line " + std::to_string(line))))) +
|
||||
", with error: " + msg};
|
||||
}
|
||||
|
||||
template<std::size_t m, std::size_t n>
|
||||
inline
|
||||
void throwing_errno_check(bool bad, const char (&file)[m],
|
||||
const char (&function)[n], int line) {
|
||||
if (!bad) return;
|
||||
|
||||
throw std::runtime_error{"Failed in file " + (file +
|
||||
(", in function \"" + (function +
|
||||
("\", on line " + std::to_string(line))))) +
|
||||
", with error: " + strerror(errno)};
|
||||
}
|
||||
|
||||
} // Unnamed namespace.
|
||||
|
||||
//-------------------------------------------------------------------------------------------------
|
||||
//-------------------------------------------------------------------------------------------------
|
||||
// Events
|
||||
@@ -49,6 +117,43 @@ void ihipEvent_t::attachToCompletionFuture(const hc::completion_future* cf, hipS
|
||||
}
|
||||
|
||||
|
||||
static void createIpcEventShmemIfNeeded(ihipEventData_t &ecd) {
|
||||
if (!ecd._ipc_name.empty()) return;
|
||||
|
||||
// create random shmem name
|
||||
char name_template[] = "/tmp/eventXXXXXX";
|
||||
int temp_fd = mkstemp(name_template);
|
||||
throwing_errno_check(-1 == temp_fd, __FILE__, __func__, __LINE__);
|
||||
|
||||
// copy shmem name into event data, reformat to use a single slash
|
||||
ecd._ipc_name = name_template;
|
||||
ecd._ipc_name.replace(0, 5, "/hip_");
|
||||
|
||||
// open shmem
|
||||
ecd._ipc_fd = shm_open(ecd._ipc_name.c_str(), O_RDWR | O_CREAT, 0777);
|
||||
throwing_errno_check(ecd._ipc_fd < 0, __FILE__, __func__, __LINE__);
|
||||
|
||||
// size it
|
||||
throwing_retval_check(0, ftruncate(ecd._ipc_fd, sizeof(ihipIpcEventShmem_t)), __FILE__, __func__, __LINE__);
|
||||
|
||||
// mmap it
|
||||
ecd._ipc_shmem = (ihipIpcEventShmem_t*)mmap(0, sizeof(ihipIpcEventShmem_t), PROT_READ | PROT_WRITE, MAP_SHARED, ecd._ipc_fd, 0);
|
||||
throwing_errno_check(NULL == ecd._ipc_shmem, __FILE__, __func__, __LINE__);
|
||||
|
||||
// initialize shared state
|
||||
ecd._ipc_shmem->owners = 1;
|
||||
ecd._ipc_shmem->read_index = -1;
|
||||
ecd._ipc_shmem->write_index = 0;
|
||||
for (int i=0; i < IPC_SIGNALS_PER_EVENT; i++) {
|
||||
ecd._ipc_shmem->signal[i] = 0;
|
||||
}
|
||||
|
||||
// remove temp file
|
||||
throwing_errno_check(-1 == close(temp_fd), __FILE__, __func__, __LINE__);
|
||||
throwing_errno_check(-1 == unlink(name_template), __FILE__, __func__, __LINE__);
|
||||
}
|
||||
|
||||
|
||||
static std::pair<hipEventStatus_t, uint64_t> refreshEventStatus(ihipEventData_t &ecd) {
|
||||
if (ecd._state == hipEventStatusRecording && ecd.marker().is_ready()) {
|
||||
if ((ecd._type == hipEventTypeIndependent) ||
|
||||
@@ -75,9 +180,9 @@ static std::pair<hipEventStatus_t, uint64_t> refreshEventStatus(ihipEventData_t
|
||||
hipError_t ihipEventCreate(hipEvent_t* event, unsigned flags) {
|
||||
hipError_t e = hipSuccess;
|
||||
|
||||
// TODO-IPC - support hipEventInterprocess.
|
||||
unsigned supportedFlags = hipEventDefault | hipEventBlockingSync | hipEventDisableTiming |
|
||||
hipEventReleaseToDevice | hipEventReleaseToSystem;
|
||||
hipEventReleaseToDevice | hipEventReleaseToSystem |
|
||||
hipEventInterprocess;
|
||||
const unsigned releaseFlags = (hipEventReleaseToDevice | hipEventReleaseToSystem);
|
||||
|
||||
const bool illegalFlags =
|
||||
@@ -105,29 +210,66 @@ hipError_t hipEventCreate(hipEvent_t* event) {
|
||||
return ihipLogStatus(ihipEventCreate(event, 0));
|
||||
}
|
||||
|
||||
|
||||
hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) {
|
||||
HIP_INIT_SPECIAL_API(hipEventRecord, TRACE_SYNC, event, stream);
|
||||
if (!event) return ihipLogStatus(hipErrorInvalidHandle);
|
||||
stream = ihipSyncAndResolveStream(stream);
|
||||
LockedAccessor_EventCrit_t eCrit(event->criticalData());
|
||||
if (eCrit->_eventData._state == hipEventStatusUnitialized) return ihipLogStatus(hipErrorInvalidHandle);
|
||||
auto &ecd{eCrit->_eventData};
|
||||
if (ecd._state == hipEventStatusUnitialized) return ihipLogStatus(hipErrorInvalidHandle);
|
||||
if (HIP_SYNC_NULL_STREAM && stream->isDefaultStream()) {
|
||||
// TODO-HIP_SYNC_NULL_STREAM : can remove this code when HIP_SYNC_NULL_STREAM = 0
|
||||
// If default stream , then wait on all queues.
|
||||
ihipCtx_t* ctx = ihipGetTlsDefaultCtx();
|
||||
ctx->locked_syncDefaultStream(true, true);
|
||||
eCrit->_eventData.marker(hc::completion_future()); // reset event
|
||||
eCrit->_eventData._stream = stream;
|
||||
eCrit->_eventData._timestamp = hc::get_system_ticks();
|
||||
eCrit->_eventData._state = hipEventStatusComplete;
|
||||
ecd.marker(hc::completion_future()); // reset event
|
||||
ecd._stream = stream;
|
||||
ecd._timestamp = hc::get_system_ticks();
|
||||
ecd._state = hipEventStatusComplete;
|
||||
// TODO handle IPC case?
|
||||
}
|
||||
else {
|
||||
// Record the event in the stream:
|
||||
eCrit->_eventData.marker(stream->locked_recordEvent(event));
|
||||
eCrit->_eventData._stream = stream;
|
||||
eCrit->_eventData._timestamp = 0;
|
||||
eCrit->_eventData._state = hipEventStatusRecording;
|
||||
ecd.marker(stream->locked_recordEvent(event));
|
||||
ecd._stream = stream;
|
||||
ecd._timestamp = 0;
|
||||
ecd._state = hipEventStatusRecording;
|
||||
if (event->_flags & hipEventInterprocess) {
|
||||
createIpcEventShmemIfNeeded(ecd);
|
||||
int write_index = ecd._ipc_shmem->write_index++; // fetch add
|
||||
int offset = write_index % IPC_SIGNALS_PER_EVENT;
|
||||
// While event still valid and still locked, spin.
|
||||
while (ecd._ipc_shmem->signal[offset] != 0) {
|
||||
// TODO backoff
|
||||
}
|
||||
// Lock signal.
|
||||
ecd._ipc_shmem->signal[offset] = 1;
|
||||
// forward signal state from local signal to IPC signal via host callback
|
||||
// create callback that can be passed to hsa_amd_signal_async_handler
|
||||
// this function decrements the IPC signal by 1 to indicate completion
|
||||
std::atomic<int> *signal = &ecd._ipc_shmem->signal[offset];
|
||||
auto t{new std::function<void()>{[=]() {
|
||||
signal->store(0);
|
||||
}}};
|
||||
// register above callback with HSA runtime to be called when local signal
|
||||
// is decremented from 1 to 0 by CP
|
||||
auto local_signal = *reinterpret_cast<hsa_signal_t*>(eCrit->_eventData.marker().get_native_handle());
|
||||
hsa_amd_signal_async_handler(local_signal, HSA_SIGNAL_CONDITION_LT, 1,
|
||||
[](hsa_signal_value_t x, void* p) {
|
||||
(*static_cast<decltype(t)>(p))();
|
||||
delete static_cast<decltype(t)>(p);
|
||||
return false;
|
||||
}, t);
|
||||
// Update read index to indicate new signal.
|
||||
int expected = write_index-1;
|
||||
while (!ecd._ipc_shmem->read_index.compare_exchange_weak(expected, write_index)) {
|
||||
throwing_msg_check(
|
||||
expected >= write_index,
|
||||
"IPC event record update read index failure",
|
||||
__FILE__, __func__, __LINE__);
|
||||
expected = write_index-1;
|
||||
}
|
||||
}
|
||||
}
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
@@ -137,8 +279,18 @@ hipError_t hipEventDestroy(hipEvent_t event) {
|
||||
HIP_INIT_API(hipEventDestroy, event);
|
||||
|
||||
if (event) {
|
||||
{
|
||||
LockedAccessor_EventCrit_t crit(event->criticalData());
|
||||
auto &ecd{crit->_eventData};
|
||||
if (ecd._ipc_shmem) {
|
||||
int owners = --ecd._ipc_shmem->owners;
|
||||
throwing_errno_check(-1 == munmap(ecd._ipc_shmem, sizeof(ihipIpcEventShmem_t)), __FILE__, __func__, __LINE__);
|
||||
throwing_errno_check(-1 == close(ecd._ipc_fd), __FILE__, __func__, __LINE__);
|
||||
if (0 == owners)
|
||||
throwing_errno_check(-1 == shm_unlink(ecd._ipc_name.c_str()), __FILE__, __func__, __LINE__);
|
||||
}
|
||||
}
|
||||
delete event;
|
||||
|
||||
return ihipLogStatus(hipSuccess);
|
||||
} else {
|
||||
return ihipLogStatus(hipErrorInvalidHandle);
|
||||
@@ -148,31 +300,44 @@ hipError_t hipEventDestroy(hipEvent_t event) {
|
||||
hipError_t hipEventSynchronize(hipEvent_t event) {
|
||||
HIP_INIT_SPECIAL_API(hipEventSynchronize, TRACE_SYNC, event);
|
||||
|
||||
if (event){
|
||||
if (!(event->_flags & hipEventReleaseToSystem)) {
|
||||
tprintf(DB_WARN,
|
||||
"hipEventSynchronize on event without system-scope fence ; consider creating with "
|
||||
"hipEventReleaseToSystem\n");
|
||||
}
|
||||
auto ecd = event->locked_copyCrit();
|
||||
if (!event) return ihipLogStatus(hipErrorInvalidHandle);
|
||||
|
||||
if (ecd._state == hipEventStatusUnitialized) {
|
||||
return ihipLogStatus(hipErrorInvalidHandle);
|
||||
} else if (ecd._state == hipEventStatusCreated) {
|
||||
// Created but not actually recorded on any device:
|
||||
return ihipLogStatus(hipSuccess);
|
||||
} else if (HIP_SYNC_NULL_STREAM && (ecd._stream->isDefaultStream())) {
|
||||
auto* ctx = ihipGetTlsDefaultCtx();
|
||||
// TODO-HIP_SYNC_NULL_STREAM - can remove this code
|
||||
ctx->locked_syncDefaultStream(true, true);
|
||||
return ihipLogStatus(hipSuccess);
|
||||
} else {
|
||||
ecd.marker().wait((event->_flags & hipEventBlockingSync) ? hc::hcWaitModeBlocked
|
||||
: hc::hcWaitModeActive);
|
||||
return ihipLogStatus(hipSuccess);
|
||||
if (!(event->_flags & hipEventReleaseToSystem)) {
|
||||
tprintf(DB_WARN,
|
||||
"hipEventSynchronize on event without system-scope fence ; consider creating with "
|
||||
"hipEventReleaseToSystem\n");
|
||||
}
|
||||
|
||||
auto ecd = event->locked_copyCrit();
|
||||
|
||||
if (event->_flags & hipEventInterprocess) {
|
||||
// this is an IPC event
|
||||
int previous_read_index = ecd._ipc_shmem->read_index;
|
||||
if (previous_read_index >= 0) {
|
||||
// we have at least one recorded event, so proceed
|
||||
int offset = previous_read_index % IPC_SIGNALS_PER_EVENT;
|
||||
// While event still valid and still locked, spin.
|
||||
while (ecd._ipc_shmem->read_index < previous_read_index+IPC_SIGNALS_PER_EVENT && ecd._ipc_shmem->signal[offset] != 0) {
|
||||
// TODO backoff
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
|
||||
if (ecd._state == hipEventStatusUnitialized) {
|
||||
return ihipLogStatus(hipErrorInvalidHandle);
|
||||
} else if (ecd._state == hipEventStatusCreated) {
|
||||
// Created but not actually recorded on any device:
|
||||
return ihipLogStatus(hipSuccess);
|
||||
} else if (HIP_SYNC_NULL_STREAM && (ecd._stream->isDefaultStream())) {
|
||||
auto* ctx = ihipGetTlsDefaultCtx();
|
||||
// TODO-HIP_SYNC_NULL_STREAM - can remove this code
|
||||
ctx->locked_syncDefaultStream(true, true);
|
||||
return ihipLogStatus(hipSuccess);
|
||||
} else {
|
||||
ecd.marker().wait((event->_flags & hipEventBlockingSync) ? hc::hcWaitModeBlocked
|
||||
: hc::hcWaitModeActive);
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -239,9 +404,80 @@ hipError_t hipEventQuery(hipEvent_t event) {
|
||||
|
||||
auto ecd = event->locked_copyCrit();
|
||||
|
||||
if (ecd._state == hipEventStatusRecording && !ecd.marker().is_ready()) {
|
||||
return ihipLogStatus(hipErrorNotReady);
|
||||
// this event is either from an ipc handle, or the owner of a local ipc event
|
||||
if (event->_flags & hipEventInterprocess) {
|
||||
if (ecd._ipc_shmem) {
|
||||
int previous_read_index = ecd._ipc_shmem->read_index;
|
||||
int offset = previous_read_index % IPC_SIGNALS_PER_EVENT;
|
||||
if (ecd._ipc_shmem->read_index < previous_read_index+IPC_SIGNALS_PER_EVENT && ecd._ipc_shmem->signal[offset] != 0) {
|
||||
return ihipLogStatus(hipErrorNotReady);
|
||||
}
|
||||
else {
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
}
|
||||
}
|
||||
// normal event
|
||||
else {
|
||||
if (ecd._state == hipEventStatusRecording && !ecd.marker().is_ready()) {
|
||||
return ihipLogStatus(hipErrorNotReady);
|
||||
}
|
||||
}
|
||||
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipIpcGetEventHandle(hipIpcEventHandle_t* handle, hipEvent_t event)
|
||||
{
|
||||
HIP_INIT_API(hipIpcGetEventHandle, handle, event);
|
||||
|
||||
#if USE_IPC && ATOMIC_INT_LOCK_FREE == 2
|
||||
if (!handle) return ihipLogStatus(hipErrorInvalidHandle);
|
||||
if (!event) return ihipLogStatus(hipErrorInvalidHandle);
|
||||
if (!(event->_flags & hipEventInterprocess)) return ihipLogStatus(hipErrorInvalidHandle);
|
||||
if (!(event->_flags & hipEventDisableTiming)) return ihipLogStatus(hipErrorInvalidHandle);
|
||||
|
||||
LockedAccessor_EventCrit_t crit(event->criticalData());
|
||||
|
||||
auto &ecd{crit->_eventData};
|
||||
createIpcEventShmemIfNeeded(ecd);
|
||||
// copy name into handle
|
||||
ihipIpcEventHandle_t* iHandle = (ihipIpcEventHandle_t*)handle;
|
||||
memset(iHandle->shmem_name, 0, HIP_IPC_HANDLE_SIZE);
|
||||
ecd._ipc_name.copy(iHandle->shmem_name, std::string::npos);
|
||||
|
||||
return ihipLogStatus(hipSuccess);
|
||||
#else
|
||||
return ihipLogStatus(hipErrorNotSupported);
|
||||
#endif
|
||||
}
|
||||
|
||||
hipError_t hipIpcOpenEventHandle(hipEvent_t* event, hipIpcEventHandle_t handle)
|
||||
{
|
||||
HIP_INIT_API(hipIpcOpenEventHandle, event, &handle);
|
||||
|
||||
#if USE_IPC && ATOMIC_INT_LOCK_FREE == 2
|
||||
if (!event) return ihipLogStatus(hipErrorInvalidHandle);
|
||||
|
||||
// create a new event with timing disabled, per spec
|
||||
auto hip_status = ihipEventCreate(event, hipEventDisableTiming | hipEventInterprocess);
|
||||
if (hip_status != hipSuccess) return ihipLogStatus(hip_status);
|
||||
|
||||
LockedAccessor_EventCrit_t crit((*event)->criticalData());
|
||||
auto &ecd{crit->_eventData};
|
||||
ihipIpcEventHandle_t* iHandle = (ihipIpcEventHandle_t*)&handle;
|
||||
ecd._ipc_name = iHandle->shmem_name;
|
||||
// open shmem
|
||||
ecd._ipc_fd = shm_open(ecd._ipc_name.c_str(), O_RDWR, 0777);
|
||||
throwing_errno_check(ecd._ipc_fd < 0, __FILE__, __func__, __LINE__);
|
||||
// mmap it
|
||||
ecd._ipc_shmem = (ihipIpcEventShmem_t*)mmap(0, sizeof(ihipIpcEventShmem_t), PROT_READ | PROT_WRITE, MAP_SHARED, ecd._ipc_fd, 0);
|
||||
throwing_errno_check(NULL == ecd._ipc_shmem, __FILE__, __func__, __LINE__);
|
||||
// update shared state
|
||||
ecd._ipc_shmem->owners += 1;
|
||||
|
||||
return ihipLogStatus(hipSuccess);
|
||||
#else
|
||||
return ihipLogStatus(hipErrorNotSupported);
|
||||
#endif
|
||||
}
|
||||
|
||||
+44
-1
@@ -332,12 +332,55 @@ void ihipStream_t::locked_wait() {
|
||||
locked_wait(waited);
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
int previous_read_index;
|
||||
ihipIpcEventShmem_t *shmem;
|
||||
hsa_signal_t signal;
|
||||
} callback_data_t;
|
||||
|
||||
static void WaitThenDecrementSignal(callback_data_t *data) {
|
||||
int offset = data->previous_read_index % IPC_SIGNALS_PER_EVENT;
|
||||
// While event valid and locked, spin.
|
||||
while (data->shmem->read_index < data->previous_read_index+IPC_SIGNALS_PER_EVENT && data->shmem->signal[offset] != 0) {
|
||||
}
|
||||
hsa_signal_store_relaxed(data->signal, 0);
|
||||
delete data;
|
||||
}
|
||||
|
||||
// Causes current stream to wait for specified event to complete:
|
||||
// Note this does not provide any kind of host serialization.
|
||||
void ihipStream_t::locked_streamWaitEvent(ihipEventData_t& ecd) {
|
||||
LockedAccessor_StreamCrit_t crit(_criticalData);
|
||||
|
||||
crit->_av.create_blocking_marker(ecd.marker(), hc::accelerator_scope);
|
||||
// if event is an IPC event, it doesn't have a marker
|
||||
// we use a host callback to block stream with a signal wait
|
||||
if (ecd._ipc_shmem) {
|
||||
// create first marker
|
||||
auto cf = crit->_av.create_marker(hc::no_scope);
|
||||
// get its signal
|
||||
auto signal = *reinterpret_cast<hsa_signal_t*>(cf.get_native_handle());
|
||||
// increment its signal value
|
||||
hsa_signal_add_relaxed(signal, 1);
|
||||
|
||||
// create callback that can be passed to hsa_amd_signal_async_handler
|
||||
// this function will host wait on IPC signal, then sets first packet's signal to 0 to indicate completion
|
||||
auto t{new callback_data_t{ecd._ipc_shmem->read_index, ecd._ipc_shmem, signal}};
|
||||
|
||||
// register above callback with HSA runtime to be called when first packet's signal
|
||||
// is decremented from 2 to 1 by CP (or it is already at 1)
|
||||
// the HSA async handler is single threaded, so we can't block, therefore use a detached thread
|
||||
hsa_amd_signal_async_handler(signal, HSA_SIGNAL_CONDITION_EQ, 1,
|
||||
[](hsa_signal_value_t x, void* p) {
|
||||
std::thread(WaitThenDecrementSignal, static_cast<decltype(t)>(p)).detach();
|
||||
return false;
|
||||
}, t);
|
||||
|
||||
// create additional marker that blocks on the first one
|
||||
crit->_av.create_blocking_marker(cf, hc::accelerator_scope);
|
||||
}
|
||||
else {
|
||||
crit->_av.create_blocking_marker(ecd.marker(), hc::accelerator_scope);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -389,18 +389,28 @@ const hipStream_t hipStreamNull = 0x0;
|
||||
|
||||
|
||||
/**
|
||||
* HIP IPC Handle Size
|
||||
* HIP IPC Mem Handle Size
|
||||
*/
|
||||
#define HIP_IPC_RESERVED_SIZE 24
|
||||
#define HIP_IPC_MEM_RESERVED_SIZE 24
|
||||
class ihipIpcMemHandle_t {
|
||||
public:
|
||||
#if USE_IPC
|
||||
hsa_amd_ipc_memory_t ipc_handle; ///< ipc memory handle on ROCr
|
||||
#endif
|
||||
size_t psize;
|
||||
char reserved[HIP_IPC_RESERVED_SIZE];
|
||||
char reserved[HIP_IPC_MEM_RESERVED_SIZE];
|
||||
};
|
||||
|
||||
/**
|
||||
* HIP IPC Event Handle Size
|
||||
*/
|
||||
#define HIP_IPC_EVENT_RESERVED_SIZE 32
|
||||
class ihipIpcEventHandle_t {
|
||||
public:
|
||||
#if USE_IPC
|
||||
char shmem_name[HIP_IPC_HANDLE_SIZE];
|
||||
#endif
|
||||
};
|
||||
|
||||
struct ihipModule_t {
|
||||
std::string fileName;
|
||||
@@ -686,6 +696,14 @@ enum ihipEventType_t {
|
||||
hipEventTypeStopCommand,
|
||||
};
|
||||
|
||||
#define IPC_SIGNALS_PER_EVENT 32
|
||||
typedef struct ihipIpcEventShmem_s {
|
||||
std::atomic<int> owners;
|
||||
std::atomic<int> read_index;
|
||||
std::atomic<int> write_index;
|
||||
std::atomic<int> signal[IPC_SIGNALS_PER_EVENT];
|
||||
} ihipIpcEventShmem_t;
|
||||
|
||||
|
||||
struct ihipEventData_t {
|
||||
ihipEventData_t() {
|
||||
@@ -693,18 +711,24 @@ struct ihipEventData_t {
|
||||
_stream = NULL;
|
||||
_timestamp = 0;
|
||||
_type = hipEventTypeIndependent;
|
||||
_ipc_name = "";
|
||||
_ipc_fd = 0;
|
||||
_ipc_shmem = NULL;
|
||||
};
|
||||
|
||||
void marker(const hc::completion_future& marker) { _marker = marker; };
|
||||
void marker(const hc::completion_future& marker) { _marker = marker; }
|
||||
hc::completion_future& marker() { return _marker; }
|
||||
uint64_t timestamp() const { return _timestamp; };
|
||||
ihipEventType_t type() const { return _type; };
|
||||
uint64_t timestamp() const { return _timestamp; }
|
||||
ihipEventType_t type() const { return _type; }
|
||||
|
||||
ihipEventType_t _type;
|
||||
hipEventStatus_t _state;
|
||||
hipStream_t _stream; // Stream where the event is recorded. Null stream is resolved to actual
|
||||
// stream when recorded
|
||||
uint64_t _timestamp; // store timestamp, may be set on host or by marker.
|
||||
std::string _ipc_name;
|
||||
int _ipc_fd;
|
||||
ihipIpcEventShmem_t *_ipc_shmem;
|
||||
private:
|
||||
hc::completion_future _marker;
|
||||
};
|
||||
@@ -716,7 +740,7 @@ template <typename MUTEX_TYPE>
|
||||
class ihipEventCriticalBase_t : LockedBase<MUTEX_TYPE> {
|
||||
public:
|
||||
explicit ihipEventCriticalBase_t(const ihipEvent_t* parentEvent) : _parent(parentEvent) {}
|
||||
~ihipEventCriticalBase_t(){};
|
||||
~ihipEventCriticalBase_t() {}
|
||||
|
||||
// Keep data in structure so it can be easily copied into snapshots
|
||||
// (used to reduce lock contention and preserve correct lock order)
|
||||
|
||||
+16
-15
@@ -63,11 +63,11 @@ hipError_t ihipStreamCreate(TlsData *tls, hipStream_t* stream, unsigned int flag
|
||||
|
||||
// TODO - se try-catch loop to detect memory exception?
|
||||
//
|
||||
// Note this is an execute_any_order queue,
|
||||
// Note this is an execute_any_order queue,
|
||||
// CUDA stream behavior is that all kernels submitted will automatically
|
||||
// wait for prev to complete, this behaviour will be mainatined by
|
||||
// hipModuleLaunchKernel. execute_any_order will help
|
||||
// hipExtModuleLaunchKernel , which uses a special flag
|
||||
// wait for prev to complete, this behaviour will be mainatined by
|
||||
// hipModuleLaunchKernel. execute_any_order will help
|
||||
// hipExtModuleLaunchKernel , which uses a special flag
|
||||
|
||||
{
|
||||
// Obtain mutex access to the device critical data, release by destructor
|
||||
@@ -130,18 +130,19 @@ hipError_t hipDeviceGetStreamPriorityRange(int* leastPriority, int* greatestPrio
|
||||
hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags) {
|
||||
HIP_INIT_SPECIAL_API(hipStreamWaitEvent, TRACE_SYNC, stream, event, flags);
|
||||
|
||||
hipError_t e = hipSuccess;
|
||||
if (!event) return ihipLogStatus(hipErrorInvalidHandle);
|
||||
|
||||
if (event == nullptr) {
|
||||
e = hipErrorInvalidHandle;
|
||||
|
||||
} else {
|
||||
auto ecd = event->locked_copyCrit();
|
||||
auto ecd = event->locked_copyCrit();
|
||||
if (event->_flags & hipEventInterprocess) {
|
||||
// this is an IPC event
|
||||
if (ecd._ipc_shmem->read_index >= 0) {
|
||||
// we have at least one recorded event, so proceed
|
||||
stream->locked_streamWaitEvent(ecd);
|
||||
}
|
||||
}
|
||||
else {
|
||||
if ((ecd._state != hipEventStatusUnitialized) && (ecd._state != hipEventStatusCreated)) {
|
||||
if (HIP_SYNC_STREAM_WAIT || (HIP_SYNC_NULL_STREAM && (stream == 0))) {
|
||||
// conservative wait on host for the specified event to complete:
|
||||
// return _stream->locked_eventWaitComplete(this, waitMode);
|
||||
//
|
||||
ecd.marker().wait((event->_flags & hipEventBlockingSync) ? hc::hcWaitModeBlocked
|
||||
: hc::hcWaitModeActive);
|
||||
} else {
|
||||
@@ -150,9 +151,9 @@ hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int
|
||||
stream->locked_streamWaitEvent(ecd);
|
||||
}
|
||||
}
|
||||
} // else event not recorded, return immediately and don't create marker.
|
||||
}
|
||||
|
||||
return ihipLogStatus(e);
|
||||
return ihipLogStatus(hipSuccess);
|
||||
};
|
||||
|
||||
|
||||
|
||||
@@ -71,6 +71,11 @@ inline std::string ToString(hipEvent_t v) {
|
||||
ss << v;
|
||||
return ss.str();
|
||||
};
|
||||
// hipIpcEventHandle_t specialization. TODO
|
||||
template <>
|
||||
inline std::string ToString(hipIpcEventHandle_t v) {
|
||||
return std::string{};
|
||||
};
|
||||
// hipStream_t
|
||||
template <>
|
||||
inline std::string ToString(hipStream_t v) {
|
||||
|
||||
@@ -0,0 +1,112 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
// Test hipEventRecord serialization behavior.
|
||||
// Through manual inspection of the reported timestamps, can determine if recording a NULL event
|
||||
// forces synchronization : set
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc
|
||||
* TEST: %t --iterations 10
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "test_common.h"
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
HipTest::parseStandardArguments(argc, argv, true);
|
||||
|
||||
unsigned blocks = (N + threadsPerBlock - 1) / threadsPerBlock;
|
||||
if (blocks > 1024) blocks = 1024;
|
||||
if (blocks == 0) blocks = 1;
|
||||
|
||||
printf("N=%zu (A+B+C= %6.1f MB total) blocks=%u threadsPerBlock=%u iterations=%d\n", N,
|
||||
((double)3 * N * sizeof(float)) / 1024 / 1024, blocks, threadsPerBlock, iterations);
|
||||
printf("iterations=%d\n", iterations);
|
||||
|
||||
size_t Nbytes = N * sizeof(float);
|
||||
|
||||
float *A_h, *B_h, *C_h;
|
||||
float *A_d, *B_d, *C_d;
|
||||
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N);
|
||||
|
||||
hipEvent_t start, stop;
|
||||
|
||||
// NULL stream check:
|
||||
HIPCHECK(hipEventCreateWithFlags(&start, hipEventDisableTiming|hipEventInterprocess));
|
||||
HIPCHECK(hipEventCreateWithFlags(&stop, hipEventDisableTiming|hipEventInterprocess));
|
||||
|
||||
|
||||
HIPCHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
|
||||
HIPCHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
|
||||
|
||||
|
||||
for (int i = 0; i < iterations; i++) {
|
||||
//--- START TIMED REGION
|
||||
long long hostStart = HipTest::get_time();
|
||||
// Record the start event
|
||||
HIPCHECK(hipEventRecord(start, NULL));
|
||||
|
||||
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0,
|
||||
static_cast<const float*>(A_d), static_cast<const float*>(B_d), C_d, N);
|
||||
|
||||
|
||||
HIPCHECK(hipEventRecord(stop, NULL));
|
||||
HIPCHECK(hipEventSynchronize(stop));
|
||||
HIPCHECK(hipEventQuery(stop));
|
||||
long long hostStop = HipTest::get_time();
|
||||
//--- STOP TIMED REGION
|
||||
|
||||
|
||||
float eventMs = 1.0f;
|
||||
// should fail
|
||||
HIPASSERT(hipSuccess != hipEventElapsedTime(&eventMs, start, stop));
|
||||
float hostMs = HipTest::elapsed_time(hostStart, hostStop);
|
||||
|
||||
printf("host_time (gettimeofday) =%6.3fms\n", hostMs);
|
||||
printf("kernel_time (hipEventElapsedTime) =%6.3fms\n", eventMs);
|
||||
printf("\n");
|
||||
|
||||
}
|
||||
|
||||
hipIpcEventHandle_t ipc_handle;
|
||||
HIPCHECK(hipIpcGetEventHandle(&ipc_handle, start));
|
||||
|
||||
hipEvent_t ipc_event;
|
||||
HIPCHECK(hipIpcOpenEventHandle(&ipc_event, ipc_handle));
|
||||
|
||||
HIPCHECK(hipEventSynchronize(ipc_event));
|
||||
|
||||
HIPCHECK(hipEventDestroy(ipc_event));
|
||||
HIPCHECK(hipEventDestroy(start));
|
||||
HIPCHECK(hipEventDestroy(stop));
|
||||
|
||||
HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
|
||||
printf("check:\n");
|
||||
|
||||
HipTest::checkVectorADD(A_h, B_h, C_h, N, true);
|
||||
|
||||
|
||||
passed();
|
||||
}
|
||||
Referencia en una nueva incidencia
Block a user