Don't create a marker for start event in hipModuleLaunchKernel
And also don't optimize the case where start==stop event to compute
elapsed time since the command can be a NDRange one.
HIP directed test will need to be fixed for that.
Change-Id: I64fadd6ab8ab1a490e7a2b7165a591df5a5cf3a2
[ROCm/hip commit: 9692ac6b5f]
Bu işleme şunda yer alıyor:
@@ -86,12 +86,8 @@ hipError_t Event::elapsedTime(Event& eStop, float& ms) {
|
||||
return hipErrorNotReady;
|
||||
}
|
||||
|
||||
if (event_ != eStop.event_) {
|
||||
ms = static_cast<float>(static_cast<int64_t>(eStop.event_->profilingInfo().end_ -
|
||||
event_->profilingInfo().start_))/1000000.f;
|
||||
} else {
|
||||
ms = 0.f;
|
||||
}
|
||||
ms = static_cast<float>(static_cast<int64_t>(eStop.event_->profilingInfo().end_ -
|
||||
event_->profilingInfo().start_))/1000000.f;
|
||||
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
@@ -324,12 +324,6 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f,
|
||||
}
|
||||
}
|
||||
|
||||
if(startEvent != nullptr) {
|
||||
amd::Command* startCommand = new hip::TimerMarker(*queue);
|
||||
startCommand->enqueue();
|
||||
eStart->addMarker(queue, startCommand);
|
||||
}
|
||||
|
||||
amd::NDRangeKernelCommand* command = new amd::NDRangeKernelCommand(
|
||||
*queue, waitList, *kernel, ndrange, sharedMemBytes,
|
||||
params, gridId, numGrids, prevGridSum, allGridSum, firstDevice);
|
||||
@@ -345,11 +339,14 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f,
|
||||
|
||||
command->enqueue();
|
||||
|
||||
if(startEvent != nullptr) {
|
||||
eStart->addMarker(queue, command);
|
||||
command->retain();
|
||||
}
|
||||
if(stopEvent != nullptr) {
|
||||
eStop->addMarker(queue, command);
|
||||
command->retain();
|
||||
}
|
||||
|
||||
command->release();
|
||||
|
||||
return hipSuccess;
|
||||
|
||||
Yeni konuda referans
Bir kullanıcı engelle