Enable queue profile only if we attach a profiler
Submit explicit profile marker for hipEventRecord to record timestamps. Enable explicit signal profiling if the API specifies start and stop events. Toggle this with env var HIP_FORCE_QUEUE_PROFILING=0 Change-Id: Iae449a63ec3ebf6c2880e65d7b1dd1031a29018f
This commit is contained in:
@@ -140,6 +140,19 @@ hipError_t Event::streamWait(amd::HostQueue* hostQueue, uint flags) {
|
||||
void Event::addMarker(amd::HostQueue* queue, amd::Command* command, bool record) {
|
||||
amd::ScopedLock lock(lock_);
|
||||
|
||||
if (queue->properties().test(CL_QUEUE_PROFILING_ENABLE)) {
|
||||
if (command == nullptr) {
|
||||
command = queue->getLastQueuedCommand(true);
|
||||
if (command == nullptr) {
|
||||
command = new amd::Marker(*queue, kMarkerDisableFlush);
|
||||
command->enqueue();
|
||||
}
|
||||
}
|
||||
} else if (command == nullptr) {
|
||||
command = new hip::ProfileMarker(*queue, false);
|
||||
command->enqueue();
|
||||
}
|
||||
|
||||
if (event_ == &command->event()) return;
|
||||
|
||||
if (event_ != nullptr) {
|
||||
@@ -239,16 +252,9 @@ hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) {
|
||||
}
|
||||
|
||||
hip::Event* e = reinterpret_cast<hip::Event*>(event);
|
||||
amd::ScopedLock lock(e->lock());
|
||||
|
||||
amd::HostQueue* queue = hip::getQueue(stream);
|
||||
amd::Command* command = queue->getLastQueuedCommand(true);
|
||||
if (command == nullptr) {
|
||||
command = new amd::Marker(*queue, kMarkerDisableFlush);
|
||||
command->enqueue();
|
||||
}
|
||||
|
||||
e->addMarker(queue, command, true);
|
||||
e->addMarker(queue, nullptr, true);
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
|
||||
@@ -26,12 +26,13 @@
|
||||
|
||||
namespace hip {
|
||||
|
||||
class TimerMarker: public amd::Marker {
|
||||
class ProfileMarker: public amd::Marker {
|
||||
public:
|
||||
TimerMarker(amd::HostQueue& queue) : amd::Marker(queue, false) {
|
||||
ProfileMarker(amd::HostQueue& queue, bool disableFlush)
|
||||
: amd::Marker(queue, disableFlush) {
|
||||
profilingInfo_.enabled_ = true;
|
||||
profilingInfo_.callback_ = nullptr;
|
||||
profilingInfo_.start_ = profilingInfo_.end_ = 0;
|
||||
profilingInfo_.clear();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -249,7 +249,7 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
|
||||
size_t localWorkSize[3] = { blockDimX, blockDimY, blockDimZ };
|
||||
amd::NDRangeContainer ndrange(3, globalWorkOffset, globalWorkSize, localWorkSize);
|
||||
amd::Command::EventWaitList waitList;
|
||||
|
||||
bool profileNDRange = false;
|
||||
address kernargs = nullptr;
|
||||
|
||||
// 'extra' is a struct that contains the following info: {
|
||||
@@ -273,13 +273,16 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
|
||||
desc.type_ == T_POINTER/*svmBound*/);
|
||||
} else {
|
||||
assert(extra == nullptr);
|
||||
kernel->parameters().set(i, desc.size_, kernelParams[i], desc.type_ == T_POINTER/*svmBound*/);
|
||||
kernel->parameters().set(i, desc.size_, kernelParams[i],
|
||||
desc.type_ == T_POINTER/*svmBound*/);
|
||||
}
|
||||
}
|
||||
|
||||
profileNDRange = (startEvent != nullptr && stopEvent != nullptr);
|
||||
|
||||
amd::NDRangeKernelCommand* command = new amd::NDRangeKernelCommand(
|
||||
*queue, waitList, *kernel, ndrange, sharedMemBytes,
|
||||
params, gridId, numGrids, prevGridSum, allGridSum, firstDevice);
|
||||
params, gridId, numGrids, prevGridSum, allGridSum, firstDevice, profileNDRange);
|
||||
if (!command) {
|
||||
return hipErrorOutOfMemory;
|
||||
}
|
||||
@@ -472,7 +475,7 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL
|
||||
uint64_t prevGridSize = 0;
|
||||
uint32_t firstDevice = 0;
|
||||
|
||||
// Sync the execution streams on all devices
|
||||
// Sync the execution streams on all devices
|
||||
if ((flags & hipCooperativeLaunchMultiDeviceNoPreSync) == 0) {
|
||||
for (int i = 0; i < numDevices; ++i) {
|
||||
amd::HostQueue* queue =
|
||||
@@ -520,7 +523,7 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL
|
||||
prevGridSize += launch.gridDim.x * launch.gridDim.y * launch.gridDim.z;
|
||||
}
|
||||
|
||||
// Sync the execution streams on all devices
|
||||
// Sync the execution streams on all devices
|
||||
if ((flags & hipCooperativeLaunchMultiDeviceNoPostSync) == 0) {
|
||||
for (int i = 0; i < numDevices; ++i) {
|
||||
amd::HostQueue* queue =
|
||||
|
||||
@@ -22,6 +22,9 @@
|
||||
#include "hip_internal.hpp"
|
||||
#include "hip_event.hpp"
|
||||
#include "thread/monitor.hpp"
|
||||
#include "hip_prof_api.h"
|
||||
|
||||
extern api_callbacks_table_t callbacks_table;
|
||||
|
||||
static amd::Monitor streamSetLock{"Guards global stream set"};
|
||||
static std::unordered_set<hip::Stream*> streamSet;
|
||||
@@ -50,7 +53,12 @@ Stream::Stream(hip::Device* dev, Priority p,
|
||||
|
||||
// ================================================================================================
|
||||
bool Stream::Create() {
|
||||
cl_command_queue_properties properties = CL_QUEUE_PROFILING_ENABLE;
|
||||
// Enable queue profiling if a profiler is attached which sets the callback_table flag
|
||||
// or if we force it with env var. This would enable time stamp collection for every
|
||||
// command submitted to the stream(queue).
|
||||
cl_command_queue_properties properties = (callbacks_table.is_enabled() ||
|
||||
HIP_FORCE_QUEUE_PROFILING) ?
|
||||
CL_QUEUE_PROFILING_ENABLE : 0;
|
||||
amd::CommandQueue::Priority p;
|
||||
switch (priority_) {
|
||||
case Priority::High:
|
||||
@@ -64,8 +72,9 @@ bool Stream::Create() {
|
||||
p = amd::CommandQueue::Priority::Normal;
|
||||
break;
|
||||
}
|
||||
amd::HostQueue* queue = new amd::HostQueue(*device_->asContext(), *device_->devices()[0], properties,
|
||||
amd::CommandQueue::RealTimeDisabled, p, cuMask_);
|
||||
amd::HostQueue* queue = new amd::HostQueue(*device_->asContext(), *device_->devices()[0],
|
||||
properties, amd::CommandQueue::RealTimeDisabled,
|
||||
p, cuMask_);
|
||||
|
||||
// Create a host queue
|
||||
bool result = (queue != nullptr) ? queue->create() : false;
|
||||
|
||||
Reference in New Issue
Block a user