SWDEV-231579 - [hipclang-vdi-rocm][perf]
- HIPPerfDispatchSpeed disparity between HIP/HCC vs HIP/VDI
Insert a wait marker command in the default stream only when
HIP has pending operations on other async streams
Change-Id: I68660a54867fab7571ba57eb1df5feb1bca1c61a
[ROCm/hip commit: db70fc66b7]
Tá an tiomantas seo le fáil i:
@@ -80,13 +80,36 @@ void setCurrentDevice(unsigned int index) {
|
||||
|
||||
amd::HostQueue* getQueue(hipStream_t stream) {
|
||||
if (stream == nullptr) {
|
||||
syncStreams();
|
||||
return getNullStream();
|
||||
} else {
|
||||
hip::Stream* s = reinterpret_cast<hip::Stream*>(stream);
|
||||
// Wait for null stream
|
||||
if ((s->flags & hipStreamNonBlocking) == 0) {
|
||||
getNullStream()->finish();
|
||||
amd::HostQueue* nullStream = getNullStream();
|
||||
amd::Command::EventWaitList eventWaitList;
|
||||
|
||||
amd::Command* command = nullStream->getLastQueuedCommand(true);
|
||||
if ((command != nullptr) &&
|
||||
// Check the current active status
|
||||
(command->status() != CL_COMPLETE)) {
|
||||
eventWaitList.push_back(command);
|
||||
}
|
||||
|
||||
// Check if we have to wait anything
|
||||
if (eventWaitList.size() > 0) {
|
||||
amd::Command* command = new amd::Marker(*s->asHostQueue(), false, eventWaitList);
|
||||
if (command != nullptr) {
|
||||
command->enqueue();
|
||||
command->release();
|
||||
}
|
||||
}
|
||||
|
||||
// Release all active commands. It's safe after the marker was enqueued
|
||||
for (const auto& it : eventWaitList) {
|
||||
it->release();
|
||||
}
|
||||
}
|
||||
|
||||
return s->asHostQueue();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,6 +35,8 @@ amd::HostQueue* Device::defaultStream() {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
// Wait for all active streams before executing commands on the default
|
||||
iHipWaitActiveStreams(defaultStream_);
|
||||
return defaultStream_;
|
||||
}
|
||||
|
||||
|
||||
@@ -432,8 +432,6 @@ hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config ) {
|
||||
hipError_t hipDeviceSynchronize ( void ) {
|
||||
HIP_INIT_API(hipDeviceSynchronize);
|
||||
|
||||
hip::syncStreams();
|
||||
|
||||
amd::HostQueue* queue = hip::getNullStream();
|
||||
|
||||
if (!queue) {
|
||||
|
||||
@@ -222,8 +222,7 @@ hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) {
|
||||
hip::Stream* s = reinterpret_cast<hip::Stream*>(stream);
|
||||
amd::HostQueue* queue = hip::getQueue(stream);
|
||||
|
||||
amd::Command* command = (s != nullptr && (s->flags & hipStreamNonBlocking)) ?
|
||||
queue->getLastQueuedCommand(true) : nullptr;
|
||||
amd::Command* command = queue->getLastQueuedCommand(true);
|
||||
|
||||
if (command == nullptr) {
|
||||
command = new amd::Marker(*queue, false);
|
||||
|
||||
@@ -143,11 +143,6 @@ namespace hip {
|
||||
extern amd::HostQueue* getNullStream(amd::Context&);
|
||||
/// Get default stream of the thread
|
||||
extern amd::HostQueue* getNullStream();
|
||||
/// Sync Blocking streams on the current device
|
||||
extern void syncStreams();
|
||||
/// Sync blocking streams on the given device
|
||||
extern void syncStreams(int devId);
|
||||
|
||||
|
||||
struct Function {
|
||||
amd::Kernel* function_;
|
||||
@@ -289,9 +284,12 @@ public:
|
||||
void configureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem, hipStream_t stream);
|
||||
|
||||
void popExec(ihipExec_t& exec);
|
||||
|
||||
};
|
||||
|
||||
/// Wait all active streams on the blocking queue. The method enqueues a wait command and
|
||||
/// doesn't stall the current thread
|
||||
extern void iHipWaitActiveStreams(amd::HostQueue* blocking_queue);
|
||||
|
||||
extern std::vector<hip::Device*> g_devices;
|
||||
extern hipError_t ihipDeviceGetCount(int* count);
|
||||
extern int ihipGetDevice();
|
||||
|
||||
@@ -52,7 +52,6 @@ hipError_t ihipFree(void *ptr)
|
||||
if (queue != nullptr) {
|
||||
queue->finish();
|
||||
}
|
||||
hip::syncStreams(dev->deviceId());
|
||||
}
|
||||
amd::SvmBuffer::free(*hip::getCurrentDevice()->asContext(), ptr);
|
||||
return hipSuccess;
|
||||
@@ -240,7 +239,6 @@ hipError_t hipFree(void* ptr) {
|
||||
hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind) {
|
||||
HIP_INIT_API(hipMemcpy, dst, src, sizeBytes, kind);
|
||||
|
||||
hip::syncStreams();
|
||||
amd::HostQueue* queue = hip::getNullStream();
|
||||
HIP_RETURN(ihipMemcpy(dst, src, sizeBytes, kind, *queue));
|
||||
}
|
||||
@@ -289,7 +287,6 @@ hipError_t ihipArrayDestroy(hipArray* array) {
|
||||
if (queue != nullptr) {
|
||||
queue->finish();
|
||||
}
|
||||
hip::syncStreams(dev->deviceId());
|
||||
}
|
||||
as_amd(memObj)->release();
|
||||
|
||||
@@ -691,7 +688,6 @@ hipError_t hipHostUnregister(void* hostPtr) {
|
||||
if (queue != nullptr) {
|
||||
queue->finish();
|
||||
}
|
||||
hip::syncStreams(dev->deviceId());
|
||||
}
|
||||
|
||||
if (amd::SvmBuffer::malloced(hostPtr)) {
|
||||
@@ -1917,7 +1913,6 @@ hipError_t hipIpcCloseMemHandle(void* dev_ptr) {
|
||||
amd::Device* device = nullptr;
|
||||
amd::Memory* amd_mem_obj = nullptr;
|
||||
|
||||
hip::syncStreams();
|
||||
hip::getNullStream()->finish();
|
||||
|
||||
if (dev_ptr == nullptr) {
|
||||
|
||||
@@ -42,20 +42,16 @@ class StreamCallback {
|
||||
|
||||
namespace hip {
|
||||
|
||||
void syncStreams(int devId) {
|
||||
void syncStreams() {
|
||||
amd::ScopedLock lock(streamSetLock);
|
||||
|
||||
for (const auto& it : streamSet) {
|
||||
if (it->device->deviceId() == devId) {
|
||||
if (it->device->deviceId() == getCurrentDevice()->deviceId()) {
|
||||
it->finish();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void syncStreams() {
|
||||
syncStreams(getCurrentDevice()->deviceId());
|
||||
}
|
||||
|
||||
Stream::Stream(hip::Device* dev, amd::CommandQueue::Priority p, unsigned int f) :
|
||||
queue(nullptr), lock("Stream Callback lock"), device(dev), priority(p), flags(f) {}
|
||||
|
||||
@@ -89,6 +85,44 @@ void Stream::finish() {
|
||||
|
||||
};
|
||||
|
||||
void iHipWaitActiveStreams(amd::HostQueue* blocking_queue) {
|
||||
amd::Command::EventWaitList eventWaitList;
|
||||
{
|
||||
amd::ScopedLock lock(streamSetLock);
|
||||
|
||||
for (const auto& it : streamSet) {
|
||||
// If it's the current device
|
||||
if ((it->queue != nullptr) && (&it->queue->device() == &blocking_queue->device()) &&
|
||||
// and it's a blocking streamclan
|
||||
((it->flags & hipStreamNonBlocking) == 0) &&
|
||||
// and it's not the current stream
|
||||
(it->asHostQueue() != blocking_queue)) {
|
||||
// Get the last valid so command
|
||||
amd::Command* command = it->asHostQueue()->getLastQueuedCommand(true);
|
||||
if ((command != nullptr) &&
|
||||
// Check the current active status
|
||||
(command->status() != CL_COMPLETE)) {
|
||||
eventWaitList.push_back(command);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check if we have to wait anything
|
||||
if (eventWaitList.size() > 0) {
|
||||
amd::Command* command = new amd::Marker(*blocking_queue, false, eventWaitList);
|
||||
if (command != nullptr) {
|
||||
command->enqueue();
|
||||
command->release();
|
||||
}
|
||||
}
|
||||
|
||||
// Release all active commands. It's safe after the marker was enqueued
|
||||
for (const auto& it : eventWaitList) {
|
||||
it->release();
|
||||
}
|
||||
}
|
||||
|
||||
void CL_CALLBACK ihipStreamCallback(cl_event event, cl_int command_exec_status, void* user_data) {
|
||||
hipError_t status = hipSuccess;
|
||||
StreamCallback* cbo = reinterpret_cast<StreamCallback*>(user_data);
|
||||
@@ -270,5 +304,3 @@ hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
|
||||
|
||||
Tagairt in Eagrán Nua
Cuir bac ar úsáideoir