From 977f314094e90f7889ff4831b747c7b503ddd9e7 Mon Sep 17 00:00:00 2001
From: foreman
Date: Tue, 8 Jul 2014 15:30:04 -0400
Subject: [PATCH] P4 to Git Change 1053268 by gandryey@gera-dev-w7 on
2014/07/08 15:08:51
ECR #304775 - Device enqueuing
- Add printing of the waiting events
- Add early exit in the scheduler if nothing to launch
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuschedcl.cpp#19 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#321 edit
---
rocclr/runtime/device/gpu/gpuschedcl.cpp | 15 ++++++---------
rocclr/runtime/device/gpu/gpuvirtual.cpp | 14 +++++++++++++-
2 files changed, 19 insertions(+), 10 deletions(-)
diff --git a/rocclr/runtime/device/gpu/gpuschedcl.cpp b/rocclr/runtime/device/gpu/gpuschedcl.cpp
index 7503532e01..0708bf975e 100644
--- a/rocclr/runtime/device/gpu/gpuschedcl.cpp
+++ b/rocclr/runtime/device/gpu/gpuschedcl.cpp
@@ -388,15 +388,12 @@ scheduler(
continue;
}
- // Check if the command has any the wait events
- if (disp->wait_num != 0) {
- // Check if the wait list is COMPLETE
- launch = checkWaitEvents(
- (__global AmdEvent**)(disp->wait_list), disp->wait_num);
- }
- else {
- launch = 1;
- }
+ // Check if the wait list is COMPLETE
+ launch = checkWaitEvents(
+ (__global AmdEvent**)(disp->wait_list), disp->wait_num);
+
+ if (launch == 0) continue;
+
uint tmp = 0;
if (atomic_compare_exchange_strong_explicit(
(__global atomic_uint*)¶m->launch, &tmp, launch,
diff --git a/rocclr/runtime/device/gpu/gpuvirtual.cpp b/rocclr/runtime/device/gpu/gpuvirtual.cpp
index 0e6b55cd65..9a4497be2c 100644
--- a/rocclr/runtime/device/gpu/gpuvirtual.cpp
+++ b/rocclr/runtime/device/gpu/gpuvirtual.cpp
@@ -1734,6 +1734,7 @@ VirtualGPU::submitKernelInternalHSA(
uint p = 0;
for (uint i = 0; i < gpuDefQueue->vqHeader_->aql_slot_num; ++i) {
if (wraps[i].state != 0) {
+ uint j;
if (p == GPU_PRINT_CHILD_KERNEL) {
break;
}
@@ -1748,6 +1749,18 @@ VirtualGPU::submitKernelInternalHSA(
print << "\tparent_wrap: " << wraps[i].parent_wrap << "\n";
print << "\twait_list: " << wraps[i].wait_list << "\n";
print << "\twait_num: " << wraps[i].wait_num << "\n";
+ uint offsEvents = wraps[i].wait_list -
+ gpuDefQueue->virtualQueue_->vmAddress();
+ size_t* events = reinterpret_cast(
+ gpuDefQueue->virtualQueue_->data() + offsEvents);
+ for (j = 0; j < wraps[i].wait_num; ++j) {
+ uint offs = static_cast(events[j]) -
+ gpuDefQueue->virtualQueue_->vmAddress();
+ AmdEvent* eventD = (AmdEvent*)(gpuDefQueue->virtualQueue_->data() + offs);
+ print << "Wait Event#: " << j << "\n";
+ print << "\tState: " << eventD->state <<
+ "; Counter: " << eventD->counter << "\n";
+ }
print << "WorkGroupSize[ " << wraps[i].aql.workgroup_size[0] << ", ";
print << wraps[i].aql.workgroup_size[1] << ", ";
print << wraps[i].aql.workgroup_size[2] << "]\n";
@@ -1757,7 +1770,6 @@ VirtualGPU::submitKernelInternalHSA(
uint64_t* kernels = (uint64_t*)(
const_cast(hsaKernel.prog().kernelTable())->map(this));
- uint j;
for (j = 0; j < hsaKernel.prog().kernels().size(); ++j) {
if (kernels[j] == wraps[i].aql.kernel_object_address) {
break;