From 977f314094e90f7889ff4831b747c7b503ddd9e7 Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 8 Jul 2014 15:30:04 -0400 Subject: [PATCH] P4 to Git Change 1053268 by gandryey@gera-dev-w7 on 2014/07/08 15:08:51 ECR #304775 - Device enqueuing - Add printing of the waiting events - Add early exit in the scheduler if nothing to launch Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuschedcl.cpp#19 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#321 edit --- rocclr/runtime/device/gpu/gpuschedcl.cpp | 15 ++++++--------- rocclr/runtime/device/gpu/gpuvirtual.cpp | 14 +++++++++++++- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/rocclr/runtime/device/gpu/gpuschedcl.cpp b/rocclr/runtime/device/gpu/gpuschedcl.cpp index 7503532e01..0708bf975e 100644 --- a/rocclr/runtime/device/gpu/gpuschedcl.cpp +++ b/rocclr/runtime/device/gpu/gpuschedcl.cpp @@ -388,15 +388,12 @@ scheduler( continue; } - // Check if the command has any the wait events - if (disp->wait_num != 0) { - // Check if the wait list is COMPLETE - launch = checkWaitEvents( - (__global AmdEvent**)(disp->wait_list), disp->wait_num); - } - else { - launch = 1; - } + // Check if the wait list is COMPLETE + launch = checkWaitEvents( + (__global AmdEvent**)(disp->wait_list), disp->wait_num); + + if (launch == 0) continue; + uint tmp = 0; if (atomic_compare_exchange_strong_explicit( (__global atomic_uint*)¶m->launch, &tmp, launch, diff --git a/rocclr/runtime/device/gpu/gpuvirtual.cpp b/rocclr/runtime/device/gpu/gpuvirtual.cpp index 0e6b55cd65..9a4497be2c 100644 --- a/rocclr/runtime/device/gpu/gpuvirtual.cpp +++ b/rocclr/runtime/device/gpu/gpuvirtual.cpp @@ -1734,6 +1734,7 @@ VirtualGPU::submitKernelInternalHSA( uint p = 0; for (uint i = 0; i < gpuDefQueue->vqHeader_->aql_slot_num; ++i) { if (wraps[i].state != 0) { + uint j; if (p == GPU_PRINT_CHILD_KERNEL) { break; } @@ -1748,6 +1749,18 @@ VirtualGPU::submitKernelInternalHSA( print << "\tparent_wrap: " << wraps[i].parent_wrap << "\n"; print << "\twait_list: " << wraps[i].wait_list << "\n"; print << "\twait_num: " << wraps[i].wait_num << "\n"; + uint offsEvents = wraps[i].wait_list - + gpuDefQueue->virtualQueue_->vmAddress(); + size_t* events = reinterpret_cast( + gpuDefQueue->virtualQueue_->data() + offsEvents); + for (j = 0; j < wraps[i].wait_num; ++j) { + uint offs = static_cast(events[j]) - + gpuDefQueue->virtualQueue_->vmAddress(); + AmdEvent* eventD = (AmdEvent*)(gpuDefQueue->virtualQueue_->data() + offs); + print << "Wait Event#: " << j << "\n"; + print << "\tState: " << eventD->state << + "; Counter: " << eventD->counter << "\n"; + } print << "WorkGroupSize[ " << wraps[i].aql.workgroup_size[0] << ", "; print << wraps[i].aql.workgroup_size[1] << ", "; print << wraps[i].aql.workgroup_size[2] << "]\n"; @@ -1757,7 +1770,6 @@ VirtualGPU::submitKernelInternalHSA( uint64_t* kernels = (uint64_t*)( const_cast(hsaKernel.prog().kernelTable())->map(this)); - uint j; for (j = 0; j < hsaKernel.prog().kernels().size(); ++j) { if (kernels[j] == wraps[i].aql.kernel_object_address) { break;