SWDEV-472840 SWDEV-461980 - Fix null stream sync performance

=> If null stream is not created during sync skip nullstrm creation
=> Do cpu wait on blocking & null stream if it exists

Change-Id: I90d6ced6a2dd1782ba58f3fed4e3608fc0efa55a


[ROCm/clr commit: 17e7b7c2ef]
Этот коммит содержится в:
Anusha GodavarthySurya
2024-10-11 17:10:12 +00:00
коммит произвёл Anusha Godavarthy Surya
родитель 0cb25faf88
Коммит bfc89974e0
3 изменённых файлов: 36 добавлений и 11 удалений
+19 -4
Просмотреть файл
@@ -257,15 +257,30 @@ void Device::destroyAllStreams() {
}
// ================================================================================================
void Device::SyncAllStreams( bool cpu_wait) {
void Device::SyncAllStreams(bool cpu_wait, bool wait_blocking_streams_only) {
// Make a local copy to avoid stalls for GPU finish with multiple threads
std::vector<hip::Stream*> streams;
streams.reserve(streamSet.size());
{
amd::ScopedLock lock(streamSetLock);
for (auto it : streamSet) {
streams.push_back(it);
it->retain();
if (wait_blocking_streams_only) {
auto null_stream = GetNullStream();
for (auto it : streamSet) {
if (it != null_stream && (it->Flags() & hipStreamNonBlocking) == 0) {
streams.push_back(it);
it->retain();
}
}
// Add null stream to the end of the list so that wait happens after all blocking streams.
if (null_stream != nullptr) {
streams.push_back(null_stream);
null_stream->retain();
}
} else {
for (auto it : streamSet) {
streams.push_back(it);
it->retain();
}
}
}
for (auto it : streams) {
+1 -1
Просмотреть файл
@@ -595,7 +595,7 @@ public:
void destroyAllStreams();
void SyncAllStreams( bool cpu_wait = true);
void SyncAllStreams( bool cpu_wait = true, bool wait_blocking_streams_only = false);
bool StreamCaptureBlocking();
+16 -6
Просмотреть файл
@@ -357,13 +357,23 @@ hipError_t hipStreamSynchronize_common(hipStream_t stream) {
HIP_RETURN(hipErrorStreamCaptureUnsupported);
}
}
bool wait = (stream == nullptr || stream == hipStreamLegacy) ? true : false;
auto hip_stream = hip::getStream(stream, wait);
// Wait for the current host queue
hip_stream->finish();
// Release freed memory for all memory pools on the device
hip_stream->GetDevice()->ReleaseFreedMemory();
if (stream == nullptr) {
// Do cpu wait on null stream and only on blocking streams
constexpr bool WaitblockingStreamOnly = true;
getCurrentDevice()->SyncAllStreams(true, WaitblockingStreamOnly);
// Release freed memory for all memory pools on the device
getCurrentDevice()->ReleaseFreedMemory();
} else {
constexpr bool wait = false;
auto hip_stream = hip::getStream(stream, wait);
// Wait for the current host queue
hip_stream->finish();
// Release freed memory for all memory pools on the device
hip_stream->GetDevice()->ReleaseFreedMemory();
}
return hipSuccess;
}