SWDEV-231579 - [hipclang-vdi-rocm][perf]

- HIPPerfDispatchSpeed disparity between HIP/HCC vs HIP/VDI
Insert a wait marker command in the default stream only when
HIP has pending operations on other async streams

Change-Id: I68660a54867fab7571ba57eb1df5feb1bca1c61a


[ROCm/hip commit: db70fc66b7]
Tá an tiomantas seo le fáil i:
German Andryeyev
2020-04-17 10:42:46 -04:00
tuismitheoir 45f8e9444a
tiomantas 45306fcdf6
D'athraigh 7 comhad le 72 breiseanna agus 25 scriosta
+25 -2
Féach ar an gComhad
@@ -80,13 +80,36 @@ void setCurrentDevice(unsigned int index) {
amd::HostQueue* getQueue(hipStream_t stream) {
if (stream == nullptr) {
syncStreams();
return getNullStream();
} else {
hip::Stream* s = reinterpret_cast<hip::Stream*>(stream);
// Wait for null stream
if ((s->flags & hipStreamNonBlocking) == 0) {
getNullStream()->finish();
amd::HostQueue* nullStream = getNullStream();
amd::Command::EventWaitList eventWaitList;
amd::Command* command = nullStream->getLastQueuedCommand(true);
if ((command != nullptr) &&
// Check the current active status
(command->status() != CL_COMPLETE)) {
eventWaitList.push_back(command);
}
// Check if we have to wait anything
if (eventWaitList.size() > 0) {
amd::Command* command = new amd::Marker(*s->asHostQueue(), false, eventWaitList);
if (command != nullptr) {
command->enqueue();
command->release();
}
}
// Release all active commands. It's safe after the marker was enqueued
for (const auto& it : eventWaitList) {
it->release();
}
}
return s->asHostQueue();
}
}
+2
Féach ar an gComhad
@@ -35,6 +35,8 @@ amd::HostQueue* Device::defaultStream() {
return nullptr;
}
}
// Wait for all active streams before executing commands on the default
iHipWaitActiveStreams(defaultStream_);
return defaultStream_;
}
-2
Féach ar an gComhad
@@ -432,8 +432,6 @@ hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config ) {
hipError_t hipDeviceSynchronize ( void ) {
HIP_INIT_API(hipDeviceSynchronize);
hip::syncStreams();
amd::HostQueue* queue = hip::getNullStream();
if (!queue) {
+1 -2
Féach ar an gComhad
@@ -222,8 +222,7 @@ hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) {
hip::Stream* s = reinterpret_cast<hip::Stream*>(stream);
amd::HostQueue* queue = hip::getQueue(stream);
amd::Command* command = (s != nullptr && (s->flags & hipStreamNonBlocking)) ?
queue->getLastQueuedCommand(true) : nullptr;
amd::Command* command = queue->getLastQueuedCommand(true);
if (command == nullptr) {
command = new amd::Marker(*queue, false);
+4 -6
Féach ar an gComhad
@@ -143,11 +143,6 @@ namespace hip {
extern amd::HostQueue* getNullStream(amd::Context&);
/// Get default stream of the thread
extern amd::HostQueue* getNullStream();
/// Sync Blocking streams on the current device
extern void syncStreams();
/// Sync blocking streams on the given device
extern void syncStreams(int devId);
struct Function {
amd::Kernel* function_;
@@ -289,9 +284,12 @@ public:
void configureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem, hipStream_t stream);
void popExec(ihipExec_t& exec);
};
/// Wait all active streams on the blocking queue. The method enqueues a wait command and
/// doesn't stall the current thread
extern void iHipWaitActiveStreams(amd::HostQueue* blocking_queue);
extern std::vector<hip::Device*> g_devices;
extern hipError_t ihipDeviceGetCount(int* count);
extern int ihipGetDevice();
-5
Féach ar an gComhad
@@ -52,7 +52,6 @@ hipError_t ihipFree(void *ptr)
if (queue != nullptr) {
queue->finish();
}
hip::syncStreams(dev->deviceId());
}
amd::SvmBuffer::free(*hip::getCurrentDevice()->asContext(), ptr);
return hipSuccess;
@@ -240,7 +239,6 @@ hipError_t hipFree(void* ptr) {
hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind) {
HIP_INIT_API(hipMemcpy, dst, src, sizeBytes, kind);
hip::syncStreams();
amd::HostQueue* queue = hip::getNullStream();
HIP_RETURN(ihipMemcpy(dst, src, sizeBytes, kind, *queue));
}
@@ -289,7 +287,6 @@ hipError_t ihipArrayDestroy(hipArray* array) {
if (queue != nullptr) {
queue->finish();
}
hip::syncStreams(dev->deviceId());
}
as_amd(memObj)->release();
@@ -691,7 +688,6 @@ hipError_t hipHostUnregister(void* hostPtr) {
if (queue != nullptr) {
queue->finish();
}
hip::syncStreams(dev->deviceId());
}
if (amd::SvmBuffer::malloced(hostPtr)) {
@@ -1917,7 +1913,6 @@ hipError_t hipIpcCloseMemHandle(void* dev_ptr) {
amd::Device* device = nullptr;
amd::Memory* amd_mem_obj = nullptr;
hip::syncStreams();
hip::getNullStream()->finish();
if (dev_ptr == nullptr) {
+40 -8
Féach ar an gComhad
@@ -42,20 +42,16 @@ class StreamCallback {
namespace hip {
void syncStreams(int devId) {
void syncStreams() {
amd::ScopedLock lock(streamSetLock);
for (const auto& it : streamSet) {
if (it->device->deviceId() == devId) {
if (it->device->deviceId() == getCurrentDevice()->deviceId()) {
it->finish();
}
}
}
void syncStreams() {
syncStreams(getCurrentDevice()->deviceId());
}
Stream::Stream(hip::Device* dev, amd::CommandQueue::Priority p, unsigned int f) :
queue(nullptr), lock("Stream Callback lock"), device(dev), priority(p), flags(f) {}
@@ -89,6 +85,44 @@ void Stream::finish() {
};
void iHipWaitActiveStreams(amd::HostQueue* blocking_queue) {
amd::Command::EventWaitList eventWaitList;
{
amd::ScopedLock lock(streamSetLock);
for (const auto& it : streamSet) {
// If it's the current device
if ((it->queue != nullptr) && (&it->queue->device() == &blocking_queue->device()) &&
// and it's a blocking streamclan
((it->flags & hipStreamNonBlocking) == 0) &&
// and it's not the current stream
(it->asHostQueue() != blocking_queue)) {
// Get the last valid so command
amd::Command* command = it->asHostQueue()->getLastQueuedCommand(true);
if ((command != nullptr) &&
// Check the current active status
(command->status() != CL_COMPLETE)) {
eventWaitList.push_back(command);
}
}
}
}
// Check if we have to wait anything
if (eventWaitList.size() > 0) {
amd::Command* command = new amd::Marker(*blocking_queue, false, eventWaitList);
if (command != nullptr) {
command->enqueue();
command->release();
}
}
// Release all active commands. It's safe after the marker was enqueued
for (const auto& it : eventWaitList) {
it->release();
}
}
void CL_CALLBACK ihipStreamCallback(cl_event event, cl_int command_exec_status, void* user_data) {
hipError_t status = hipSuccess;
StreamCallback* cbo = reinterpret_cast<StreamCallback*>(user_data);
@@ -270,5 +304,3 @@ hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback
HIP_RETURN(hipSuccess);
}