SWDEV-567852 - Clean-up HIP events (#2708)

* SWDEV-567852 - Clean-up HIP events

Removed unused fields, optimized memory allocation, improved encapsulation, modernized with C++11 auto, added documentation
Cette révision appartient à :
German Andryeyev
2026-01-28 13:34:07 -05:00
révisé par GitHub
Parent 9de4a2ebb1
révision a5ada1e6e3
4 fichiers modifiés avec 323 ajouts et 289 suppressions
+171 -160
Voir le fichier
@@ -32,28 +32,24 @@ namespace hip {
static std::shared_mutex eventSetLock{}; static std::shared_mutex eventSetLock{};
static std::unordered_set<hipEvent_t> eventSet; static std::unordered_set<hipEvent_t> eventSet;
// ================================================================================================
bool Event::ready() { bool Event::ready() {
if (event_->status() != CL_COMPLETE) {
event_->notifyCmdQueue();
}
// Check HW status of the ROCcrl event. Note: not all ROCclr modes support HW status // Check HW status of the ROCcrl event. Note: not all ROCclr modes support HW status
bool ready = CheckHwEvent(); if (CheckHwEvent() || event_->status() == CL_COMPLETE) {
if (!ready) { return true;
ready = (event_->status() == CL_COMPLETE);
} }
return ready;
event_->notifyCmdQueue();
return false;
} }
// ================================================================================================
bool EventDD::ready() { bool EventDD::ready() {
// Check HW status of the ROCcrl event. Note: not all ROCclr modes support HW status // Check HW status of the ROCcrl event. Note: not all ROCclr modes support HW status
bool ready = CheckHwEvent(); return CheckHwEvent() || (event_->status() == CL_COMPLETE);
// FIXME: Remove status check entirely
if (!ready) {
ready = (event_->status() == CL_COMPLETE);
}
return ready;
} }
// ================================================================================================
hipError_t Event::query() { hipError_t Event::query() {
amd::ScopedLock lock(lock_); amd::ScopedLock lock(lock_);
@@ -74,12 +70,13 @@ hipError_t Event::synchronize() {
return hipSuccess; return hipSuccess;
} }
auto hip_device = g_devices[deviceId()];
// Check HW status of the ROCcrl event. Note: not all ROCclr modes support HW status // Check HW status of the ROCcrl event. Note: not all ROCclr modes support HW status
static constexpr bool kWaitCompletion = true; constexpr bool kWaitCompletion = true;
amd::SyncPolicy policy = const amd::SyncPolicy policy =
(flags_ == hipEventBlockingSync) ? amd::SyncPolicy::Blocking : amd::SyncPolicy::Auto; (flags_ == hipEventBlockingSync) ? amd::SyncPolicy::Blocking : amd::SyncPolicy::Auto;
if (!hip_device->devices()[0]->IsHwEventReady(*event_, kWaitCompletion, policy)) {
const amd::Device* device = g_devices[deviceId()]->devices()[0];
if (!device->IsHwEventReady(*event_, kWaitCompletion, policy)) {
event_->awaitCompletion(); event_->awaitCompletion();
} }
return hipSuccess; return hipSuccess;
@@ -88,65 +85,62 @@ hipError_t Event::synchronize() {
// ================================================================================================ // ================================================================================================
bool Event::awaitEventCompletion() { return event_->awaitCompletion(); } bool Event::awaitEventCompletion() { return event_->awaitCompletion(); }
// ================================================================================================
bool EventDD::awaitEventCompletion() { bool EventDD::awaitEventCompletion() {
amd::SyncPolicy policy = constexpr bool kWaitCompletion = true;
const amd::SyncPolicy policy =
(flags_ == hipEventBlockingSync) ? amd::SyncPolicy::Blocking : amd::SyncPolicy::Auto; (flags_ == hipEventBlockingSync) ? amd::SyncPolicy::Blocking : amd::SyncPolicy::Auto;
return g_devices[deviceId()]->devices()[0]->IsHwEventReady(*event_, true, policy); return g_devices[deviceId()]->devices()[0]->IsHwEventReady(*event_, kWaitCompletion, policy);
} }
// ================================================================================================
hipError_t Event::elapsedTime(Event& eStop, float& ms) { hipError_t Event::elapsedTime(Event& eStop, float& ms) {
amd::ScopedLock startLock(lock_); amd::ScopedLock startLock(lock_);
// Handle same event case
if (this == &eStop) { if (this == &eStop) {
if (event_ == nullptr || (flags_ & hipEventDisableTiming)) {
return hipErrorInvalidHandle;
}
ms = 0.f; ms = 0.f;
if (event_ == nullptr) { return ready() ? hipSuccess : hipErrorNotReady;
return hipErrorInvalidHandle;
} }
if (flags_ & hipEventDisableTiming) {
return hipErrorInvalidHandle;
}
if (!ready()) {
return hipErrorNotReady;
}
return hipSuccess;
}
amd::ScopedLock stopLock(eStop.lock()); amd::ScopedLock stopLock(eStop.lock());
// Validate events
if (event_ == nullptr || eStop.event() == nullptr) { if (event_ == nullptr || eStop.event() == nullptr) {
return hipErrorInvalidHandle; return hipErrorInvalidHandle;
} }
if ((flags_ | eStop.flags_) & hipEventDisableTiming) { if ((flags_ | eStop.flags_) & hipEventDisableTiming) {
return hipErrorInvalidHandle; return hipErrorInvalidHandle;
} }
if (!ready() || !eStop.ready()) { if (!ready() || !eStop.ready()) {
return hipErrorNotReady; return hipErrorNotReady;
} }
constexpr float kNsToMs = 1.0f / 1000000.0f;
if (event_ == eStop.event_) { if (event_ == eStop.event_) {
// Events are the same, which indicates the stream is empty and likely // Events are the same, which indicates the stream is empty and likely
// eventRecord is called on another stream. For such cases insert and measure a // eventRecord is called on another stream. For such cases insert and measure a marker.
// marker. auto* command = new amd::Marker(*event_->command().queue(), kMarkerDisableFlush);
amd::Command* command = new amd::Marker(*event_->command().queue(), kMarkerDisableFlush);
command->enqueue(); command->enqueue();
command->awaitCompletion(); command->awaitCompletion();
ms = static_cast<float>(static_cast<int64_t>(command->event().profilingInfo().end_) - ms = static_cast<float>(static_cast<int64_t>(command->event().profilingInfo().end_) -
time(false)) / time(false)) * kNsToMs;
1000000.f;
command->release(); command->release();
} else { } else {
// Note: with direct dispatch eStop.ready() relies on HW event, but CPU status can be delayed. // Note: with direct dispatch eStop.ready() relies on HW event, but CPU status can be delayed.
// Hence for now make sure CPU status is updated by calling awaitCompletion(); // Hence for now make sure CPU status is updated by calling awaitCompletion();
awaitEventCompletion(); awaitEventCompletion();
eStop.awaitEventCompletion(); eStop.awaitEventCompletion();
ms = static_cast<float>(eStop.time(false) - time(false)) / 1000000.f; ms = static_cast<float>(eStop.time(false) - time(false)) * kNsToMs;
} }
return hipSuccess; return hipSuccess;
} }
// ================================================================================================
int64_t Event::time(bool getStartTs) const { int64_t Event::time(bool getStartTs) const {
assert(event_ != nullptr); assert(event_ != nullptr);
if (getStartTs) { if (getStartTs) {
@@ -156,51 +150,51 @@ int64_t Event::time(bool getStartTs) const {
} }
} }
// ================================================================================================
int64_t EventDD::time(bool getStartTs) const { int64_t EventDD::time(bool getStartTs) const {
uint64_t start = 0, end = 0;
assert(event_ != nullptr); assert(event_ != nullptr);
uint64_t start = 0, end = 0;
g_devices[deviceId()]->devices()[0]->getHwEventTime(*event_, &start, &end); g_devices[deviceId()]->devices()[0]->getHwEventTime(*event_, &start, &end);
// FIXME: This is only needed if the command had to wait CL_COMPLETE status
if (start == 0 || end == 0) { // Select the requested timestamp and fallback to CPU profiling if not available
const uint64_t timestamp = getStartTs ? start : end;
if (timestamp == 0) {
return Event::time(getStartTs); return Event::time(getStartTs);
} }
if (getStartTs) { return static_cast<int64_t>(timestamp);
return static_cast<int64_t>(start);
} else {
return static_cast<int64_t>(end);
}
} }
// ================================================================================================ // ================================================================================================
hipError_t Event::streamWaitCommand(amd::Command*& command, hip::Stream* stream) { hipError_t Event::streamWaitCommand(amd::Command*& command, hip::Stream* stream) {
amd::Command::EventWaitList eventWaitList; const amd::Command::EventWaitList eventWaitList =
if (event_ != nullptr) { (event_ != nullptr) ? amd::Command::EventWaitList{event_} : amd::Command::EventWaitList{};
eventWaitList.push_back(event_);
}
command = new amd::Marker(*stream, kMarkerDisableFlush, eventWaitList); command = new amd::Marker(*stream, kMarkerDisableFlush, eventWaitList);
if (command == nullptr) {
return hipErrorOutOfMemory;
}
// Since we only need to have a dependency on an existing event, // Since we only need to have a dependency on an existing event,
// we may not need to flush any caches. // we may not need to flush any caches.
command->setCommandEntryScope(amd::Device::kCacheStateIgnore); command->setCommandEntryScope(amd::Device::kCacheStateIgnore);
if (command == NULL) {
return hipErrorOutOfMemory;
}
return hipSuccess; return hipSuccess;
} }
// ================================================================================================ // ================================================================================================
hipError_t Event::streamWait(hip::Stream* stream, uint flags) { hipError_t Event::streamWait(hip::Stream* stream, uint flags) {
// Access to event_ object must be lock protected
amd::ScopedLock lock(lock_); amd::ScopedLock lock(lock_);
// Early return if event is not recorded, same stream, or already ready
if ((event_ == nullptr) || (event_->command().queue() == stream) || ready()) { if ((event_ == nullptr) || (event_->command().queue() == stream) || ready()) {
return hipSuccess; return hipSuccess;
} }
if (!event_->notifyCmdQueue()) { if (!event_->notifyCmdQueue()) {
return hipErrorLaunchOutOfResources; return hipErrorLaunchOutOfResources;
} }
amd::Command* command; amd::Command* command;
hipError_t status = streamWaitCommand(command, stream); if (const auto status = streamWaitCommand(command, stream); status != hipSuccess) {
if (status != hipSuccess) {
return status; return status;
} }
command->enqueue(); command->enqueue();
command->release(); command->release();
return hipSuccess; return hipSuccess;
@@ -209,33 +203,38 @@ hipError_t Event::streamWait(hip::Stream* stream, uint flags) {
// ================================================================================================ // ================================================================================================
hipError_t Event::recordCommand(amd::Command*& command, amd::HostQueue* stream, uint32_t ext_flags, hipError_t Event::recordCommand(amd::Command*& command, amd::HostQueue* stream, uint32_t ext_flags,
bool batch_flush) { bool batch_flush) {
if (command == nullptr) { if (command != nullptr) {
int32_t releaseFlags = return hipSuccess;
((ext_flags == 0) ? flags_ : ext_flags) &
(hipEventReleaseToDevice | hipEventReleaseToSystem | hipEventDisableSystemFence);
if (releaseFlags & hipEventDisableSystemFence) {
releaseFlags = amd::Device::kCacheStateIgnore;
} else {
releaseFlags = amd::Device::kCacheStateInvalid;
} }
// Always submit a EventMarker.
const auto flags = (ext_flags == 0) ? flags_ : ext_flags;
const auto releaseFlags = [&]() {
if (flags & hipEventDisableSystemFence) {
return amd::Device::kCacheStateIgnore;
}
return amd::Device::kCacheStateInvalid;
}();
constexpr bool kMarkerTs = true; constexpr bool kMarkerTs = true;
command = constexpr bool kFlushCache = false;
new hip::EventMarker(*stream, !kMarkerDisableFlush, kMarkerTs, releaseFlags, batch_flush); command = new hip::EventMarker(*stream, kFlushCache, kMarkerTs, releaseFlags, batch_flush);
}
return hipSuccess; return hipSuccess;
} }
// ================================================================================================ // ================================================================================================
hipError_t Event::enqueueRecordCommand(hip::Stream* stream, amd::Command* command) { hipError_t Event::enqueueRecordCommand(hip::Stream* stream, amd::Command* command) {
command->enqueue(); command->enqueue();
if (event_ == &command->event()) {
amd::Event& new_event = command->event();
if (event_ == &new_event) {
return hipSuccess; return hipSuccess;
} }
if (event_ != nullptr) { if (event_ != nullptr) {
event_->release(); event_->release();
} }
event_ = &command->event(); event_ = &new_event;
return hipSuccess; return hipSuccess;
} }
@@ -244,73 +243,69 @@ hipError_t Event::enqueueRecordCommand(hip::Stream* stream, amd::Command* comman
hipError_t Event::addMarker(hip::Stream* hip_stream, amd::Command* command, bool batch_flush) { hipError_t Event::addMarker(hip::Stream* hip_stream, amd::Command* command, bool batch_flush) {
// Keep the lock always at the beginning of this to avoid a race. SWDEV-277847 // Keep the lock always at the beginning of this to avoid a race. SWDEV-277847
amd::ScopedLock lock(lock_); amd::ScopedLock lock(lock_);
hipError_t status = recordCommand(command, hip_stream, 0, batch_flush); if (const auto status = recordCommand(command, hip_stream, 0, batch_flush);
if (status != hipSuccess) { status != hipSuccess) {
return hipSuccess;
}
status = enqueueRecordCommand(hip_stream, command);
return status; return status;
}
return enqueueRecordCommand(hip_stream, command);
} }
// ================================================================================================ // ================================================================================================
bool isValid(hipEvent_t event) { bool isValid(hipEvent_t event) {
// NULL event is always valid
if (event == nullptr) { if (event == nullptr) {
return true; return true;
} }
std::shared_lock lock(eventSetLock); std::shared_lock lock(eventSetLock);
if (eventSet.find(event) == eventSet.end()) { return eventSet.find(event) != eventSet.end();
return false;
}
return true;
} }
// ================================================================================================ // ================================================================================================
hipError_t ihipEventCreateWithFlags(hipEvent_t* event, unsigned flags) { hipError_t ihipEventCreateWithFlags(hipEvent_t* event, uint32_t flags) {
unsigned supportedFlags = hipEventDefault | hipEventBlockingSync | hipEventDisableTiming | // Define supported event flags
hipEventReleaseToDevice | hipEventReleaseToSystem | constexpr uint32_t kSupportedFlags = hipEventDefault | hipEventBlockingSync |
hipEventInterprocess | hipEventDisableSystemFence; hipEventDisableTiming | hipEventReleaseToDevice |
hipEventReleaseToSystem | hipEventInterprocess |
hipEventDisableSystemFence;
constexpr uint32_t kReleaseFlags = (hipEventReleaseToDevice | hipEventReleaseToSystem |
hipEventDisableSystemFence);
const unsigned releaseFlags = // Helper to count set bits for validating multiple release flags
(hipEventReleaseToDevice | hipEventReleaseToSystem | hipEventDisableSystemFence); constexpr auto countBits = [](uint32_t num) {
// can't set any unsupported flags. uint32_t count = 0;
// can set only one of the release flags. while (num) {
// if hipEventInterprocess flag is set, then hipEventDisableTiming flag also must be set
const bool illegalFlags = (flags & ~supportedFlags) || ([](unsigned int num) {
unsigned int bitcount;
for (bitcount = 0; num; bitcount++) {
num &= num - 1; num &= num - 1;
++count;
} }
return bitcount; return count;
}(flags & releaseFlags) > 1) || };
((flags & hipEventInterprocess) && !(flags & hipEventDisableTiming));
if (!illegalFlags) { // Validate flags: no unsupported flags, max one release flag,
// interprocess requires disable timing
if ((flags & ~kSupportedFlags) || (countBits(flags & kReleaseFlags) > 1) ||
((flags & hipEventInterprocess) && !(flags & hipEventDisableTiming))) {
return hipErrorInvalidValue;
}
// Create appropriate event type based on flags
hip::Event* e = nullptr; hip::Event* e = nullptr;
if (flags & hipEventInterprocess) { if (flags & hipEventInterprocess) {
e = new hip::IPCEvent(); e = new hip::IPCEvent(flags);
} else { } else if (AMD_DIRECT_DISPATCH) {
if (AMD_DIRECT_DISPATCH) {
e = new hip::EventDD(flags); e = new hip::EventDD(flags);
} else { } else {
e = new hip::Event(flags); e = new hip::Event(flags);
} }
}
// App might have used combination of flags i.e. hipEventInterprocess|hipEventDisableTiming
// However based on hipEventInterprocess flag, IPCEvent creates even with
// JUST hipEventInterprocess and hence, Actual hipEventInterprocess|hipEventDisableTiming
// flag is getting supressed with hipEventInterprocess
e->flags_ = flags;
if (e == nullptr) { if (e == nullptr) {
return hipErrorOutOfMemory; return hipErrorOutOfMemory;
} }
*event = reinterpret_cast<hipEvent_t>(e); *event = reinterpret_cast<hipEvent_t>(e);
// Register event in global set
std::unique_lock lock(hip::eventSetLock); std::unique_lock lock(hip::eventSetLock);
hip::eventSet.insert(*event); hip::eventSet.insert(*event);
} else {
return hipErrorInvalidValue;
}
return hipSuccess; return hipSuccess;
} }
@@ -349,14 +344,13 @@ hipError_t hipEventDestroy(hipEvent_t event) {
return hipErrorContextIsDestroyed; return hipErrorContextIsDestroyed;
} }
hip::Event* e = reinterpret_cast<hip::Event*>(event); auto* e = reinterpret_cast<hip::Event*>(event);
// There is a possibility that stream destroy be called first // Handle capture stream cleanup (stream might be destroyed first)
hipStream_t s = e->GetCaptureStream(); hipStream_t stream = e->GetCaptureStream();
if (hip::isValid(s)) { if (hip::isValid(stream) && stream != nullptr && stream != hipStreamLegacy) {
if (s != nullptr && s != hipStreamLegacy) { reinterpret_cast<hip::Stream*>(stream)->EraseCaptureEvent(event);
reinterpret_cast<hip::Stream*>(e->GetCaptureStream())->EraseCaptureEvent(event);
}
} }
delete e; delete e;
HIP_RETURN(hipSuccess); HIP_RETURN(hipSuccess);
} }
@@ -365,16 +359,16 @@ hipError_t hipEventDestroy(hipEvent_t event) {
hipError_t hipEventElapsedTime(float* ms, hipEvent_t start, hipEvent_t stop) { hipError_t hipEventElapsedTime(float* ms, hipEvent_t start, hipEvent_t stop) {
HIP_INIT_API(hipEventElapsedTime, ms, start, stop); HIP_INIT_API(hipEventElapsedTime, ms, start, stop);
// Validate parameters
if (ms == nullptr) { if (ms == nullptr) {
HIP_RETURN(hipErrorInvalidValue); HIP_RETURN(hipErrorInvalidValue);
} }
if (start == nullptr || stop == nullptr) { if (start == nullptr || stop == nullptr) {
HIP_RETURN(hipErrorInvalidHandle); HIP_RETURN(hipErrorInvalidHandle);
} }
hip::Event* eStart = reinterpret_cast<hip::Event*>(start); auto* const eStart = reinterpret_cast<hip::Event*>(start);
hip::Event* eStop = reinterpret_cast<hip::Event*>(stop); auto* const eStop = reinterpret_cast<hip::Event*>(stop);
if (eStart->deviceId() != eStop->deviceId()) { if (eStart->deviceId() != eStop->deviceId()) {
HIP_RETURN(hipErrorInvalidResourceHandle); HIP_RETURN(hipErrorInvalidResourceHandle);
@@ -384,52 +378,58 @@ hipError_t hipEventElapsedTime(float* ms, hipEvent_t start, hipEvent_t stop) {
} }
// ================================================================================================ // ================================================================================================
hipError_t hipEventRecord_common(hipEvent_t event, hipStream_t stream, unsigned int flags) { hipError_t hipEventRecord_common(hipEvent_t event, hipStream_t stream, uint32_t flags) {
if (!(flags == hipEventRecordDefault || flags == hipEventRecordExternal)) { // Validate flags and event
if (flags != hipEventRecordDefault && flags != hipEventRecordExternal) {
return hipErrorInvalidValue; return hipErrorInvalidValue;
} }
hipError_t status = hipSuccess;
if (event == nullptr) { if (event == nullptr) {
return hipErrorInvalidHandle; return hipErrorInvalidHandle;
} }
getStreamPerThread(stream); getStreamPerThread(stream);
hip::Event* e = reinterpret_cast<hip::Event*>(event); auto* const e = reinterpret_cast<hip::Event*>(event);
hip::Stream* s = reinterpret_cast<hip::Stream*>(stream); auto* const hip_stream = hip::getStream(stream);
hip::Stream* hip_stream = hip::getStream(stream); if (hip_stream == nullptr) {
if (hipStream_t lastCaptureStream = e->GetCaptureStream()) { return hipErrorInvalidValue;
if (hip::isValid(lastCaptureStream)) {
if ((lastCaptureStream != nullptr) && (lastCaptureStream != hipStreamLegacy)) {
reinterpret_cast<hip::Stream*>(e->GetCaptureStream())->EraseCaptureEvent(event);
}
} }
// Clean up previous capture stream association
hipStream_t lastCaptureStream = e->GetCaptureStream();
if (hip::isValid(lastCaptureStream) && lastCaptureStream != nullptr &&
lastCaptureStream != hipStreamLegacy) {
reinterpret_cast<hip::Stream*>(lastCaptureStream)->EraseCaptureEvent(event);
} }
e->SetCaptureStream(stream); e->SetCaptureStream(stream);
if ((stream != nullptr && stream != hipStreamLegacy) &&
(s->GetCaptureStatus() == hipStreamCaptureStatusActive)) { // Handle stream capture mode
if (stream != nullptr && stream != hipStreamLegacy &&
hip_stream->GetCaptureStatus() == hipStreamCaptureStatusActive) {
ClPrint(amd::LOG_INFO, amd::LOG_CODE, ClPrint(amd::LOG_INFO, amd::LOG_CODE,
"[hipGraph] Current capture node EventRecord on stream : %p, Event %p", stream, event); "[hipGraph] Current capture node EventRecord on stream : %p, Event %p", stream, event);
s->SetCaptureEvent(event); hip_stream->SetCaptureEvent(event);
std::vector<hip::GraphNode*> lastCapturedNodes = s->GetLastCapturedNodes(); const auto& lastCapturedNodes = hip_stream->GetLastCapturedNodes();
e->SetNodesPrevToRecorded(lastCapturedNodes); e->SetNodesPrevToRecorded(lastCapturedNodes);
if (flags == hipEventRecordExternal) { if (flags == hipEventRecordExternal) {
hip::GraphNode* node = new hip::GraphEventRecordNode(reinterpret_cast<hipEvent_t>(e)); auto* const node = new hip::GraphEventRecordNode(event);
hipError_t status = hip::ihipGraphAddNode( const auto status = hip::ihipGraphAddNode(node, hip_stream->GetCaptureGraph(),
node, reinterpret_cast<hip::Graph*>(s->GetCaptureGraph()), lastCapturedNodes.data(),
reinterpret_cast<hip::GraphNode* const*>(s->GetLastCapturedNodes().data()), lastCapturedNodes.size(), false);
s->GetLastCapturedNodes().size(), false);
if (status != hipSuccess) { if (status != hipSuccess) {
ClPrint(amd::LOG_ERROR, amd::LOG_CODE, "hipEventRecord add external event node failed"); ClPrint(amd::LOG_ERROR, amd::LOG_CODE, "hipEventRecord add external event node failed");
return status; return status;
} }
s->SetLastCapturedNode(node); hip_stream->SetLastCapturedNode(node);
} }
} else { return hipSuccess;
}
// Normal event recording
if (e->deviceId() != hip_stream->DeviceId()) { if (e->deviceId() != hip_stream->DeviceId()) {
return hipErrorInvalidResourceHandle; return hipErrorInvalidResourceHandle;
} }
status = e->addMarker(hip_stream, nullptr, !hip::Event::kBatchFlush); return e->addMarker(hip_stream, nullptr, !hip::Event::kBatchFlush);
}
return status;
} }
// ================================================================================================ // ================================================================================================
@@ -446,7 +446,7 @@ hipError_t hipEventRecord_spt(hipEvent_t event, hipStream_t stream) {
} }
// ================================================================================================ // ================================================================================================
hipError_t hipEventRecordWithFlags(hipEvent_t event, hipStream_t stream, unsigned int flags) { hipError_t hipEventRecordWithFlags(hipEvent_t event, hipStream_t stream, uint32_t flags) {
HIP_INIT_API(hipEventRecordWithFlags, event, stream, flags); HIP_INIT_API(hipEventRecordWithFlags, event, stream, flags);
HIP_RETURN(hipEventRecord_common(event, stream, flags)); HIP_RETURN(hipEventRecord_common(event, stream, flags));
} }
@@ -458,19 +458,24 @@ hipError_t hipEventSynchronize(hipEvent_t event) {
if (event == nullptr) { if (event == nullptr) {
HIP_RETURN(hipErrorInvalidHandle); HIP_RETURN(hipErrorInvalidHandle);
} }
hip::Event* e = reinterpret_cast<hip::Event*>(event);
auto hip_stream = e->GetCaptureStream(); auto* e = reinterpret_cast<hip::Event*>(event);
hip::Stream* s = reinterpret_cast<hip::Stream*>(hip_stream); const auto hip_stream = e->GetCaptureStream();
if ((hip_stream != nullptr && hip_stream != hipStreamLegacy) &&
(s->GetCaptureStatus() == hipStreamCaptureStatusActive)) { // Check for active capture
if (hip_stream != nullptr && hip_stream != hipStreamLegacy) {
auto* s = reinterpret_cast<hip::Stream*>(hip_stream);
if (s->GetCaptureStatus() == hipStreamCaptureStatusActive) {
s->SetCaptureStatus(hipStreamCaptureStatusInvalidated); s->SetCaptureStatus(hipStreamCaptureStatusInvalidated);
HIP_RETURN(hipErrorCapturedEvent); HIP_RETURN(hipErrorCapturedEvent);
} }
if (hip::Stream::StreamCaptureOngoing(hip_stream) == true) { }
if (hip::Stream::StreamCaptureOngoing(hip_stream)) {
HIP_RETURN(hipErrorStreamCaptureUnsupported); HIP_RETURN(hipErrorStreamCaptureUnsupported);
} }
hipError_t status = e->synchronize(); const auto status = e->synchronize();
// Release freed memory for all memory pools on the device // Release freed memory for all memory pools on the device
g_devices[e->deviceId()]->ReleaseFreedMemory(); g_devices[e->deviceId()]->ReleaseFreedMemory();
HIP_RETURN(status); HIP_RETURN(status);
@@ -482,20 +487,26 @@ hipError_t ihipEventQuery(hipEvent_t event) {
return hipErrorInvalidHandle; return hipErrorInvalidHandle;
} }
hip::Event* e = reinterpret_cast<hip::Event*>(event); auto* e = reinterpret_cast<hip::Event*>(event);
auto hip_stream = e->GetCaptureStream(); const auto hip_stream = e->GetCaptureStream();
hip::Stream* s = reinterpret_cast<hip::Stream*>(hip_stream);
if ((hip_stream != nullptr && hip_stream != hipStreamLegacy) && // Check for active capture
(s->GetCaptureStatus() == hipStreamCaptureStatusActive)) { if (hip_stream != nullptr && hip_stream != hipStreamLegacy) {
auto* s = reinterpret_cast<hip::Stream*>(hip_stream);
if (s->GetCaptureStatus() == hipStreamCaptureStatusActive) {
s->SetCaptureStatus(hipStreamCaptureStatusInvalidated); s->SetCaptureStatus(hipStreamCaptureStatusInvalidated);
HIP_RETURN(hipErrorCapturedEvent); HIP_RETURN(hipErrorCapturedEvent);
} }
if (hip::Stream::StreamCaptureOngoing(e->GetCaptureStream())) { }
if (hip::Stream::StreamCaptureOngoing(hip_stream)) {
HIP_RETURN(hipErrorStreamCaptureUnsupported); HIP_RETURN(hipErrorStreamCaptureUnsupported);
} }
return e->query(); return e->query();
} }
// ================================================================================================
hipError_t hipEventQuery(hipEvent_t event) { hipError_t hipEventQuery(hipEvent_t event) {
HIP_INIT_API(hipEventQuery, event); HIP_INIT_API(hipEventQuery, event);
HIP_RETURN(ihipEventQuery(event)); HIP_RETURN(ihipEventQuery(event));
+58 -48
Voir le fichier
@@ -32,55 +32,58 @@
namespace hip { namespace hip {
class StreamCallback { class StreamCallback {
protected: protected:
void* userData_; void* userData_; //!< User data passed to callback function
public: public:
StreamCallback(void* userData) : userData_(userData) {} explicit StreamCallback(void* userData) : userData_(userData) {}
virtual void CL_CALLBACK callback() = 0; virtual void CL_CALLBACK callback() = 0;
virtual ~StreamCallback() {}; virtual ~StreamCallback() = default;
}; };
class StreamAddCallback : public StreamCallback { class StreamAddCallback : public StreamCallback {
hipStreamCallback_t callBack_; hipStreamCallback_t callBack_; //!< Stream callback function pointer
hipStream_t stream_; hipStream_t stream_; //!< Stream associated with the callback
public: public:
StreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, void* userData) StreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, void* userData)
: StreamCallback(userData) { : StreamCallback(userData), stream_(stream), callBack_(callback) {}
stream_ = stream;
callBack_ = callback;
}
void CL_CALLBACK callback() { void CL_CALLBACK callback() override {
hipError_t status = hipSuccess; hipError_t status = hipSuccess;
callBack_(stream_, status, userData_); callBack_(stream_, status, userData_);
} }
}; };
class LaunchHostFuncCallback : public StreamCallback { class LaunchHostFuncCallback : public StreamCallback {
hipHostFn_t callBack_; hipHostFn_t callBack_; //!< Host function callback pointer
public: public:
LaunchHostFuncCallback(hipHostFn_t callback, void* userData) : StreamCallback(userData) { LaunchHostFuncCallback(hipHostFn_t callback, void* userData)
callBack_ = callback; : StreamCallback(userData), callBack_(callback) {}
}
void CL_CALLBACK callback() { callBack_(userData_); } void CL_CALLBACK callback() override { callBack_(userData_); }
}; };
void CL_CALLBACK ihipStreamCallback(cl_event event, cl_int command_exec_status, void* user_data); void CL_CALLBACK ihipStreamCallback(cl_event event, cl_int command_exec_status, void* user_data);
#define IPC_SIGNALS_PER_EVENT 32 #define IPC_SIGNALS_PER_EVENT 32
// Optimized IPC event shared memory structure
// Note: All atomics use relaxed memory ordering where safe for performance
typedef struct ihipIpcEventShmem_s { typedef struct ihipIpcEventShmem_s {
// Reference counting for shared memory lifecycle
std::atomic<int> owners; std::atomic<int> owners;
// Metadata: only written once during initialization, relaxed ordering safe
std::atomic<int> owners_device_id; std::atomic<int> owners_device_id;
std::atomic<int> owners_process_id; std::atomic<int> owners_process_id;
// Ring buffer indices: requires acquire-release ordering for synchronization
std::atomic<int> read_index; std::atomic<int> read_index;
std::atomic<int> write_index; std::atomic<int> write_index;
uint32_t signal[IPC_SIGNALS_PER_EVENT]; // Signal array: GPU-accessible memory for event signaling
// Using uint32_t for GPU compatibility
alignas(64) uint32_t signal[IPC_SIGNALS_PER_EVENT];
} ihipIpcEventShmem_t; } ihipIpcEventShmem_t;
class EventMarker : public amd::Marker { class EventMarker : public amd::Marker {
@@ -97,24 +100,25 @@ class EventMarker : public amd::Marker {
}; };
class Event { class Event {
/// capture stream where event is recorded /// Capture stream where event is recorded
hipStream_t captureStream_ = nullptr; hipStream_t captureStream_ = nullptr;
/// Previous captured nodes before event record /// Previous captured nodes before event record
std::vector<hip::GraphNode*> nodesPrevToRecorded_; std::vector<hip::GraphNode*> nodesPrevToRecorded_;
protected: protected:
bool CheckHwEvent() { bool CheckHwEvent() {
amd::SyncPolicy policy = const amd::SyncPolicy policy =
(flags_ == hipEventBlockingSync) ? amd::SyncPolicy::Blocking : amd::SyncPolicy::Auto; (flags_ == hipEventBlockingSync) ? amd::SyncPolicy::Blocking : amd::SyncPolicy::Auto;
return g_devices[deviceId()]->devices()[0]->IsHwEventReady(*event_, false, policy); return g_devices[deviceId()]->devices()[0]->IsHwEventReady(*event_, false, policy);
} }
public: public:
constexpr static bool kBatchFlush = true; //!< Flushes CPU command batch in direct dispatch mode // Flushes CPU command batch in direct dispatch mode
static constexpr bool kBatchFlush = true;
Event(uint32_t flags) explicit Event(uint32_t flags)
: flags_(flags), lock_(true) /* hipEvent_t lock*/, event_(nullptr), stream_(nullptr) { : flags_(flags), lock_(true), event_(nullptr) {
device_id_ = hip::getCurrentDevice()->deviceId(); // Created in current device ctx device_id_ = hip::getCurrentDevice()->deviceId();
} }
virtual ~Event() { virtual ~Event() {
@@ -122,7 +126,6 @@ class Event {
event_->release(); event_->release();
} }
} }
uint32_t flags_; //!< flags associated with the event
virtual hipError_t query(); virtual hipError_t query();
virtual hipError_t synchronize(); virtual hipError_t synchronize();
@@ -136,6 +139,8 @@ class Event {
virtual hipError_t enqueueRecordCommand(hip::Stream* stream, amd::Command* command); virtual hipError_t enqueueRecordCommand(hip::Stream* stream, amd::Command* command);
hipError_t addMarker(hip::Stream* stream, amd::Command* command, bool batch_flush = true); hipError_t addMarker(hip::Stream* stream, amd::Command* command, bool batch_flush = true);
uint32_t flags() const { return flags_; }
void BindCommand(amd::Command& command) { void BindCommand(amd::Command& command) {
amd::ScopedLock lock(lock_); amd::ScopedLock lock(lock_);
if (event_ != nullptr) { if (event_ != nullptr) {
@@ -146,17 +151,20 @@ class Event {
} }
amd::Monitor& lock() { return lock_; } amd::Monitor& lock() { return lock_; }
const int deviceId() const { return device_id_; } int deviceId() const { return device_id_; }
void setDeviceId(int id) { device_id_ = id; } void setDeviceId(int id) { device_id_ = id; }
amd::Event* event() { return event_; } amd::Event* event() { return event_; }
/// Get capture stream where event is recorded /// Get capture stream where event is recorded
hipStream_t GetCaptureStream() const { return captureStream_; } hipStream_t GetCaptureStream() const { return captureStream_; }
/// Set capture stream where event is recorded /// Set capture stream where event is recorded
void SetCaptureStream(hipStream_t stream) { captureStream_ = stream; } void SetCaptureStream(hipStream_t stream) { captureStream_ = stream; }
/// Returns previous captured nodes before event record /// Returns previous captured nodes before event record
std::vector<hip::GraphNode*> GetNodesPrevToRecorded() const { return nodesPrevToRecorded_; } const std::vector<hip::GraphNode*>& GetNodesPrevToRecorded() const {
return nodesPrevToRecorded_;
}
/// Set last captured graph node before event record /// Set last captured graph node before event record
void SetNodesPrevToRecorded(std::vector<hip::GraphNode*>& graphNode) { void SetNodesPrevToRecorded(const std::vector<hip::GraphNode*>& graphNode) {
nodesPrevToRecorded_ = graphNode; nodesPrevToRecorded_ = graphNode;
} }
virtual hipError_t GetHandle(ihipIpcEventHandle_t* handle) { virtual hipError_t GetHandle(ihipIpcEventHandle_t* handle) {
@@ -170,35 +178,38 @@ class Event {
virtual int64_t time(bool getStartTs) const; virtual int64_t time(bool getStartTs) const;
protected: protected:
amd::Monitor lock_; uint32_t flags_; //!< Flags associated with the event
hip::Stream* stream_; amd::Monitor lock_; //!< Mutex for thread-safe access to event state
amd::Event* event_; amd::Event* event_; //!< Underlying ROCclr event object for GPU synchronization
int device_id_; int device_id_; //!< Device ID where this event was created
}; };
class EventDD : public Event { class EventDD : public Event {
public: public:
EventDD(unsigned int flags) : Event(flags) {} explicit EventDD(uint32_t flags) : Event(flags) {}
virtual ~EventDD() {} ~EventDD() override = default;
virtual bool awaitEventCompletion(); bool awaitEventCompletion() override;
virtual bool ready(); bool ready() override;
virtual int64_t time(bool getStartTs) const; int64_t time(bool getStartTs) const override;
}; };
class IPCEvent : public Event { class IPCEvent : public Event {
// IPC Events /// IPC event metadata structure
struct ihipIpcEvent_t { struct ihipIpcEvent_t {
std::string ipc_name_; std::string ipc_name_; //!< Name of the shared memory object for IPC
int ipc_fd_; ihipIpcEventShmem_t* ipc_shmem_; //!< Pointer to mapped IPC shared memory structure
ihipIpcEventShmem_t* ipc_shmem_;
ihipIpcEvent_t() : ipc_name_("dummy"), ipc_fd_(0), ipc_shmem_(nullptr) {} ihipIpcEvent_t() : ipc_shmem_(nullptr) {
void setipcname(const char* name) { ipc_name_ = std::string(name); } ipc_name_.reserve(32); // Reserve space for typical IPC name "/hip_<pid>_<counter>"
}
void setipcname(const char* name) { ipc_name_ = name; }
}; };
ihipIpcEvent_t ipc_evt_; ihipIpcEvent_t ipc_evt_;
public: public:
~IPCEvent() { explicit IPCEvent(uint32_t flags = hipEventInterprocess) : Event(flags) {}
~IPCEvent() override {
if (ipc_evt_.ipc_shmem_) { if (ipc_evt_.ipc_shmem_) {
int owners = --ipc_evt_.ipc_shmem_->owners; int owners = --ipc_evt_.ipc_shmem_->owners;
// Make sure event is synchronized // Make sure event is synchronized
@@ -213,12 +224,11 @@ class IPCEvent : public Event {
} }
#if !defined(_MSC_VER) #if !defined(_MSC_VER)
// Clean up the POSIX shared memory object // Clean up the POSIX shared memory object
if (!ipc_evt_.ipc_name_.empty() && ipc_evt_.ipc_name_ != "dummy") { if (!ipc_evt_.ipc_name_.empty()) {
shm_unlink(ipc_evt_.ipc_name_.c_str()); shm_unlink(ipc_evt_.ipc_name_.c_str());
} }
#endif #endif
} }
IPCEvent() : Event(hipEventInterprocess) {}
bool createIpcEventShmemIfNeeded(); bool createIpcEventShmemIfNeeded();
hipError_t GetHandle(ihipIpcEventHandle_t* handle) override; hipError_t GetHandle(ihipIpcEventHandle_t* handle) override;
hipError_t OpenHandle(ihipIpcEventHandle_t* handle) override; hipError_t OpenHandle(ihipIpcEventHandle_t* handle) override;
@@ -232,10 +242,10 @@ class IPCEvent : public Event {
hipError_t enqueueRecordCommand(hip::Stream* stream, amd::Command* command) override; hipError_t enqueueRecordCommand(hip::Stream* stream, amd::Command* command) override;
}; };
/// Callback data for IPC event stream wait operations
struct CallbackData { struct CallbackData {
int previous_read_index; const int previous_read_index; //!< Snapshot of read index for synchronization
hip::ihipIpcEventShmem_t* shmem; hip::ihipIpcEventShmem_t* const shmem; //!< IPC shared memory for event signaling
}; };
} // namespace hip } // namespace hip
+71 -58
Voir le fichier
@@ -32,12 +32,14 @@ namespace hip {
hipError_t ihipEventCreateWithFlags(hipEvent_t* event, unsigned flags); hipError_t ihipEventCreateWithFlags(hipEvent_t* event, unsigned flags);
// ================================================================================================
bool IPCEvent::createIpcEventShmemIfNeeded() { bool IPCEvent::createIpcEventShmemIfNeeded() {
// Early return if shared memory already exists
if (ipc_evt_.ipc_shmem_) { if (ipc_evt_.ipc_shmem_) {
// ipc_shmem_ already created, no need to create it again
return true; return true;
} }
// Generate unique IPC name
#if !defined(_MSC_VER) #if !defined(_MSC_VER)
static std::atomic<int> counter{0}; static std::atomic<int> counter{0};
ipc_evt_.ipc_name_ = "/hip_" + std::to_string(getpid()) + "_" + std::to_string(counter++); ipc_evt_.ipc_name_ = "/hip_" + std::to_string(getpid()) + "_" + std::to_string(counter++);
@@ -48,34 +50,33 @@ bool IPCEvent::createIpcEventShmemIfNeeded() {
ipc_evt_.ipc_name_.replace(0, 5, "/hip_"); ipc_evt_.ipc_name_.replace(0, 5, "/hip_");
#endif #endif
if (!amd::Os::MemoryMapFileTruncated( // Create memory-mapped file for shared memory
ipc_evt_.ipc_name_.c_str(), auto** shmem_ptr = reinterpret_cast<void**>(&ipc_evt_.ipc_shmem_);
const_cast<const void**>(reinterpret_cast<void**>(&(ipc_evt_.ipc_shmem_))), if (!amd::Os::MemoryMapFileTruncated(ipc_evt_.ipc_name_.c_str(),
const_cast<const void**>(shmem_ptr),
sizeof(hip::ihipIpcEventShmem_t))) { sizeof(hip::ihipIpcEventShmem_t))) {
return false; return false;
} }
ipc_evt_.ipc_shmem_->owners = 1; // Initialize shared memory fields
ipc_evt_.ipc_shmem_->read_index = -1; auto* const shmem = ipc_evt_.ipc_shmem_;
ipc_evt_.ipc_shmem_->write_index = 0; shmem->owners = 1;
for (uint32_t sig_idx = 0; sig_idx < IPC_SIGNALS_PER_EVENT; ++sig_idx) { shmem->read_index = -1;
ipc_evt_.ipc_shmem_->signal[sig_idx] = 0; shmem->write_index = 0;
} std::fill_n(shmem->signal, IPC_SIGNALS_PER_EVENT, 0);
// device sets 0 to this ptr when the ipc event is completed // Register signal array with device
hipError_t status = constexpr size_t kSignalArraySize = sizeof(uint32_t) * IPC_SIGNALS_PER_EVENT;
ihipHostRegister(&ipc_evt_.ipc_shmem_->signal, sizeof(uint32_t) * IPC_SIGNALS_PER_EVENT, 0); const auto status = ihipHostRegister(&shmem->signal, kSignalArraySize, 0);
if (status != hipSuccess) { return status == hipSuccess;
return false;
}
return true;
} }
// ================================================================================================ // ================================================================================================
hipError_t IPCEvent::query() { hipError_t IPCEvent::query() {
if (ipc_evt_.ipc_shmem_) { if (ipc_evt_.ipc_shmem_) {
int prev_read_idx = ipc_evt_.ipc_shmem_->read_index; const int prev_read_idx = ipc_evt_.ipc_shmem_->read_index;
int offset = (prev_read_idx % IPC_SIGNALS_PER_EVENT); const int offset = prev_read_idx % IPC_SIGNALS_PER_EVENT;
if (ipc_evt_.ipc_shmem_->read_index < prev_read_idx + IPC_SIGNALS_PER_EVENT && if (ipc_evt_.ipc_shmem_->read_index < prev_read_idx + IPC_SIGNALS_PER_EVENT &&
ipc_evt_.ipc_shmem_->signal[offset] != 0) { ipc_evt_.ipc_shmem_->signal[offset] != 0) {
return hipErrorNotReady; return hipErrorNotReady;
@@ -101,11 +102,12 @@ hipError_t IPCEvent::synchronize() {
// ================================================================================================ // ================================================================================================
hipError_t IPCEvent::streamWait(hip::Stream* stream, uint flags) { hipError_t IPCEvent::streamWait(hip::Stream* stream, uint flags) {
int offset = ipc_evt_.ipc_shmem_->read_index; const int offset = ipc_evt_.ipc_shmem_->read_index;
hipError_t status = return ihipStreamOperation(
ihipStreamOperation(reinterpret_cast<hipStream_t>(stream), ROCCLR_COMMAND_STREAM_WAIT_VALUE, reinterpret_cast<hipStream_t>(stream),
&(ipc_evt_.ipc_shmem_->signal[offset]), 0, 1, 1, sizeof(uint32_t)); ROCCLR_COMMAND_STREAM_WAIT_VALUE,
return status; &(ipc_evt_.ipc_shmem_->signal[offset]), 0, 1, 1,
sizeof(uint32_t));
} }
// ================================================================================================ // ================================================================================================
@@ -117,37 +119,41 @@ hipError_t IPCEvent::recordCommand(amd::Command*& command, amd::HostQueue* strea
// ================================================================================================ // ================================================================================================
hipError_t IPCEvent::enqueueRecordCommand(hip::Stream* stream, amd::Command* command) { hipError_t IPCEvent::enqueueRecordCommand(hip::Stream* stream, amd::Command* command) {
amd::Event& tEvent = command->event();
createIpcEventShmemIfNeeded(); createIpcEventShmemIfNeeded();
int write_index = ipc_evt_.ipc_shmem_->write_index++;
int offset = write_index % IPC_SIGNALS_PER_EVENT; // Allocate signal slot for this event
while (ipc_evt_.ipc_shmem_->signal[offset] != 0) { auto* const shmem = ipc_evt_.ipc_shmem_;
const int write_index = shmem->write_index++;
const int offset = write_index % IPC_SIGNALS_PER_EVENT;
auto& signal = shmem->signal[offset];
// Wait for signal slot to become available
while (signal != 0) {
amd::Os::sleep(1); amd::Os::sleep(1);
} }
// Lock signal.
ipc_evt_.ipc_shmem_->signal[offset] = 1; // Lock signal and set device ID
ipc_evt_.ipc_shmem_->owners_device_id = deviceId(); signal = 1;
shmem->owners_device_id = deviceId();
command->enqueue(); command->enqueue();
// Set event_ in order to release marked command when event is destroyed // Set event_ to release marked command when event is destroyed
if (event_ != nullptr) { if (event_ != nullptr) {
event_->release(); event_->release();
} }
event_ = &command->event(); event_ = &command->event();
// device writes 0 to signal after the hipEventRecord command is completed // Device writes 0 to signal after hipEventRecord command completes
// the signal value is checked by WaitThenDecrementSignal cb const auto status = ihipStreamOperation(reinterpret_cast<hipStream_t>(stream),
hipError_t status = ROCCLR_COMMAND_STREAM_WRITE_VALUE, &signal, 0, 0, 0,
ihipStreamOperation(reinterpret_cast<hipStream_t>(stream), ROCCLR_COMMAND_STREAM_WRITE_VALUE, sizeof(uint32_t));
&(ipc_evt_.ipc_shmem_->signal[offset]), 0, 0, 0, sizeof(uint32_t));
if (status != hipSuccess) { if (status != hipSuccess) {
return status; return status;
} }
// Update read index to indicate new signal. // Update read index to indicate new signal
int expected = write_index - 1; int expected = write_index - 1;
while (!ipc_evt_.ipc_shmem_->read_index.compare_exchange_weak(expected, write_index)) { while (!shmem->read_index.compare_exchange_weak(expected, write_index)) {
amd::Os::sleep(1); amd::Os::sleep(1);
} }
@@ -169,23 +175,28 @@ hipError_t IPCEvent::GetHandle(ihipIpcEventHandle_t* handle) {
// ================================================================================================ // ================================================================================================
hipError_t IPCEvent::OpenHandle(ihipIpcEventHandle_t* handle) { hipError_t IPCEvent::OpenHandle(ihipIpcEventHandle_t* handle) {
ipc_evt_.ipc_name_ = handle->shmem_name; ipc_evt_.ipc_name_ = handle->shmem_name;
// Map shared memory from IPC handle
auto** shmem_ptr = reinterpret_cast<void**>(&ipc_evt_.ipc_shmem_);
if (!amd::Os::MemoryMapFileTruncated(ipc_evt_.ipc_name_.c_str(), if (!amd::Os::MemoryMapFileTruncated(ipc_evt_.ipc_name_.c_str(),
(const void**)&(ipc_evt_.ipc_shmem_), const_cast<const void**>(shmem_ptr),
sizeof(ihipIpcEventShmem_t))) { sizeof(ihipIpcEventShmem_t))) {
return hipErrorInvalidValue; return hipErrorInvalidValue;
} }
if (amd::Os::getProcessId() == ipc_evt_.ipc_shmem_->owners_process_id.load()) { auto* const shmem = ipc_evt_.ipc_shmem_;
// If this is in the same process, return error.
// Prevent opening in the same process
const auto current_process_id = amd::Os::getProcessId();
if (current_process_id == shmem->owners_process_id.load()) {
return hipErrorInvalidContext; return hipErrorInvalidContext;
} }
ipc_evt_.ipc_shmem_->owners += 1; shmem->owners += 1;
// device sets 0 to this ptr when the ipc event is completed
hipError_t status = hipSuccess; // Register signal array with device
status = constexpr size_t kSignalArraySize = sizeof(uint32_t) * IPC_SIGNALS_PER_EVENT;
ihipHostRegister(&ipc_evt_.ipc_shmem_->signal, sizeof(uint32_t) * IPC_SIGNALS_PER_EVENT, 0); return ihipHostRegister(&shmem->signal, kSignalArraySize, 0);
return status;
} }
// ================================================================================================ // ================================================================================================
@@ -195,31 +206,33 @@ hipError_t hipIpcGetEventHandle(hipIpcEventHandle_t* handle, hipEvent_t event) {
if (handle == nullptr || event == nullptr) { if (handle == nullptr || event == nullptr) {
HIP_RETURN(hipErrorInvalidValue); HIP_RETURN(hipErrorInvalidValue);
} }
hip::Event* e = reinterpret_cast<hip::Event*>(event);
auto e = reinterpret_cast<hip::Event*>(event);
HIP_RETURN(e->GetHandle(reinterpret_cast<ihipIpcEventHandle_t*>(handle))); HIP_RETURN(e->GetHandle(reinterpret_cast<ihipIpcEventHandle_t*>(handle)));
} }
// ================================================================================================
hipError_t hipIpcOpenEventHandle(hipEvent_t* event, hipIpcEventHandle_t handle) { hipError_t hipIpcOpenEventHandle(hipEvent_t* event, hipIpcEventHandle_t handle) {
HIP_INIT_API(hipIpcOpenEventHandle, event, handle); HIP_INIT_API(hipIpcOpenEventHandle, event, handle);
hipError_t status = hipSuccess;
if (event == nullptr) { if (event == nullptr) {
HIP_RETURN(hipErrorInvalidValue); HIP_RETURN(hipErrorInvalidValue);
} }
status = ihipEventCreateWithFlags(event, hipEventDisableTiming | hipEventInterprocess); // Create IPC event with timing disabled
constexpr uint32_t kIpcEventFlags = hipEventDisableTiming | hipEventInterprocess;
auto status = ihipEventCreateWithFlags(event, kIpcEventFlags);
if (status != hipSuccess) { if (status != hipSuccess) {
HIP_RETURN(status); HIP_RETURN(status);
} }
hip::Event* e = reinterpret_cast<hip::Event*>(*event); auto* const e = reinterpret_cast<hip::Event*>(*event);
ihipIpcEventHandle_t* iHandle = reinterpret_cast<ihipIpcEventHandle_t*>(&handle); auto* const iHandle = reinterpret_cast<ihipIpcEventHandle_t*>(&handle);
status = e->OpenHandle(iHandle); const auto open_status = e->OpenHandle(iHandle);
// Free the event in case of failure if (open_status != hipSuccess) {
if (status != hipSuccess) {
delete e; delete e;
} }
HIP_RETURN(status); HIP_RETURN(open_status);
} }
} // namespace hip } // namespace hip
+1 -1
Voir le fichier
@@ -509,7 +509,7 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, amd::LaunchParams& launch_par
if (stopEvent != nullptr) { if (stopEvent != nullptr) {
hip::Event* eStop = reinterpret_cast<hip::Event*>(stopEvent); hip::Event* eStop = reinterpret_cast<hip::Event*>(stopEvent);
if (eStop->flags_ & hipEventDisableSystemFence) { if (eStop->flags() & hipEventDisableSystemFence) {
command->setCommandEntryScope(amd::Device::kCacheStateIgnore); command->setCommandEntryScope(amd::Device::kCacheStateIgnore);
} else { } else {
command->setCommandEntryScope(amd::Device::kCacheStateSystem); command->setCommandEntryScope(amd::Device::kCacheStateSystem);