543cc3223a
SWDEV-79278 - [OCL][PAL] use HwPipeBottom to query the start time of an event. The reason is that if there are many dispatches run in a loop with profiling enabled, then it is possible that the HwPipeStart of an event can occur before the HwPipeBottom of the previous dispatch and this will affect the timing reported by profiling. ReviewBoardURL = http://ocltc.amd.com/reviews/r/13242/diff/1/ Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paltimestamp.cpp#3 edit
102 righe
3.1 KiB
C++
102 righe
3.1 KiB
C++
//
|
|
// Copyright (c) 2015 Advanced Micro Devices, Inc. All rights reserved.
|
|
//
|
|
#include "os/os.hpp"
|
|
#include "platform/perfctr.hpp"
|
|
#include "device/pal/paldefs.hpp"
|
|
#include "device/pal/paltimestamp.hpp"
|
|
#include "device/pal/palvirtual.hpp"
|
|
#include "device/pal/palcounters.hpp"
|
|
|
|
namespace pal {
|
|
|
|
TimeStamp::TimeStamp(const VirtualGPU& gpu, Pal::IGpuMemory* iMem, uint memOffset, address cpuAddr)
|
|
: gpu_(gpu), iMem_(iMem), memOffset_(memOffset) {
|
|
values_ = reinterpret_cast<volatile uint64_t*>(cpuAddr + memOffset);
|
|
}
|
|
|
|
TimeStamp::~TimeStamp() {}
|
|
|
|
void TimeStamp::begin(bool sdma) {
|
|
if (!flags_.beginIssued_) {
|
|
gpu().iCmd()->CmdWriteTimestamp(Pal::HwPipePoint::HwPipeBottom, *iMem_,
|
|
memOffset_ + CommandStartTime * sizeof(uint64_t));
|
|
flags_.beginIssued_ = true;
|
|
}
|
|
}
|
|
|
|
void TimeStamp::end(bool sdma) {
|
|
CondLog(!flags_.beginIssued_, "We didn't issue a begin operation!");
|
|
gpu().iCmd()->CmdWriteTimestamp(Pal::HwPipePoint::HwPipeBottom, *iMem_,
|
|
memOffset_ + CommandEndTime * sizeof(uint64_t));
|
|
flags_.endIssued_ = true;
|
|
flags_.sdma_ = sdma;
|
|
}
|
|
|
|
inline void SetValue(uint64_t* time, uint64_t val, double nanos) {
|
|
*time = static_cast<uint64_t>(static_cast<double>(val) * nanos);
|
|
}
|
|
|
|
void TimeStamp::value(uint64_t* startTime, uint64_t* endTime) {
|
|
CondLog(!flags_.endIssued_, "We didn't send the counter end operation!");
|
|
//! @todo optimize!
|
|
const double NanoSecondsPerTick = 1000000000.0 / (gpu_.dev().properties().timestampFrequency);
|
|
|
|
SetValue(startTime, values_[CommandStartTime], NanoSecondsPerTick);
|
|
SetValue(endTime, values_[CommandEndTime], NanoSecondsPerTick);
|
|
}
|
|
|
|
TimeStampCache::~TimeStampCache() {
|
|
// Release all time stamp objects from the cache
|
|
for (uint i = 0; i < freedTS_.size(); ++i) {
|
|
delete freedTS_[i];
|
|
}
|
|
freedTS_.clear();
|
|
|
|
// Release all memory objects
|
|
for (uint i = 0; i < tsBuf_.size(); ++i) {
|
|
tsBuf_[i]->unmap(&gpu_);
|
|
gpu_.queue(MainEngine).removeMemRef(tsBuf_[i]->iMem());
|
|
gpu_.queue(SdmaEngine).removeMemRef(tsBuf_[i]->iMem());
|
|
delete tsBuf_[i];
|
|
}
|
|
tsBuf_.clear();
|
|
}
|
|
|
|
TimeStamp* TimeStampCache::allocTimeStamp() {
|
|
TimeStamp* ts = nullptr;
|
|
if (0 != freedTS_.size()) {
|
|
ts = freedTS_.back();
|
|
freedTS_.pop_back();
|
|
}
|
|
|
|
if (nullptr == ts) {
|
|
if ((tsBufCpu_ == nullptr) || ((tsOffset_ + TimerSlotSize) > TimerBufSize)) {
|
|
Memory* buf = new Memory(gpu_.dev(), TimerBufSize);
|
|
if (buf == nullptr || !buf->create(Resource::Remote)) {
|
|
return nullptr;
|
|
}
|
|
gpu_.queue(MainEngine).addMemRef(buf->iMem());
|
|
gpu_.queue(SdmaEngine).addMemRef(buf->iMem());
|
|
tsBufCpu_ = reinterpret_cast<address>(buf->map(&gpu_));
|
|
memset(tsBufCpu_, 0, TimerBufSize);
|
|
tsOffset_ = 0;
|
|
tsBuf_.push_back(buf);
|
|
}
|
|
// Allocate a TimeStamp object
|
|
ts = new TimeStamp(gpu_, tsBuf_[(tsBuf_.size() - 1)]->iMem(), tsOffset_, tsBufCpu_);
|
|
// Create a timestamp
|
|
if (ts == nullptr) {
|
|
return nullptr;
|
|
}
|
|
tsOffset_ += TimerSlotSize;
|
|
}
|
|
|
|
// Set this timestamp into DRM profile mode if it was requested
|
|
ts->clearStates();
|
|
|
|
return ts;
|
|
}
|
|
|
|
} // namespace pal
|