Files
rocm-systems/rocclr/runtime/device/pal/paltimestamp.cpp
T
foreman 543cc3223a P4 to Git Change 1443821 by asalmanp@asalmanp-ocl-stg on 2017/08/04 16:24:19
SWDEV-79278 - [OCL][PAL] use HwPipeBottom to query the start time of an event.
	The reason is that if there are many dispatches run in a loop with profiling enabled, then it is possible that the HwPipeStart of an event can occur before the HwPipeBottom of the previous dispatch and this will affect the timing reported by profiling.

	ReviewBoardURL = http://ocltc.amd.com/reviews/r/13242/diff/1/

Affected files ...

... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paltimestamp.cpp#3 edit
2017-08-04 16:48:42 -04:00

102 righe
3.1 KiB
C++

//
// Copyright (c) 2015 Advanced Micro Devices, Inc. All rights reserved.
//
#include "os/os.hpp"
#include "platform/perfctr.hpp"
#include "device/pal/paldefs.hpp"
#include "device/pal/paltimestamp.hpp"
#include "device/pal/palvirtual.hpp"
#include "device/pal/palcounters.hpp"
namespace pal {
TimeStamp::TimeStamp(const VirtualGPU& gpu, Pal::IGpuMemory* iMem, uint memOffset, address cpuAddr)
: gpu_(gpu), iMem_(iMem), memOffset_(memOffset) {
values_ = reinterpret_cast<volatile uint64_t*>(cpuAddr + memOffset);
}
TimeStamp::~TimeStamp() {}
void TimeStamp::begin(bool sdma) {
if (!flags_.beginIssued_) {
gpu().iCmd()->CmdWriteTimestamp(Pal::HwPipePoint::HwPipeBottom, *iMem_,
memOffset_ + CommandStartTime * sizeof(uint64_t));
flags_.beginIssued_ = true;
}
}
void TimeStamp::end(bool sdma) {
CondLog(!flags_.beginIssued_, "We didn't issue a begin operation!");
gpu().iCmd()->CmdWriteTimestamp(Pal::HwPipePoint::HwPipeBottom, *iMem_,
memOffset_ + CommandEndTime * sizeof(uint64_t));
flags_.endIssued_ = true;
flags_.sdma_ = sdma;
}
inline void SetValue(uint64_t* time, uint64_t val, double nanos) {
*time = static_cast<uint64_t>(static_cast<double>(val) * nanos);
}
void TimeStamp::value(uint64_t* startTime, uint64_t* endTime) {
CondLog(!flags_.endIssued_, "We didn't send the counter end operation!");
//! @todo optimize!
const double NanoSecondsPerTick = 1000000000.0 / (gpu_.dev().properties().timestampFrequency);
SetValue(startTime, values_[CommandStartTime], NanoSecondsPerTick);
SetValue(endTime, values_[CommandEndTime], NanoSecondsPerTick);
}
TimeStampCache::~TimeStampCache() {
// Release all time stamp objects from the cache
for (uint i = 0; i < freedTS_.size(); ++i) {
delete freedTS_[i];
}
freedTS_.clear();
// Release all memory objects
for (uint i = 0; i < tsBuf_.size(); ++i) {
tsBuf_[i]->unmap(&gpu_);
gpu_.queue(MainEngine).removeMemRef(tsBuf_[i]->iMem());
gpu_.queue(SdmaEngine).removeMemRef(tsBuf_[i]->iMem());
delete tsBuf_[i];
}
tsBuf_.clear();
}
TimeStamp* TimeStampCache::allocTimeStamp() {
TimeStamp* ts = nullptr;
if (0 != freedTS_.size()) {
ts = freedTS_.back();
freedTS_.pop_back();
}
if (nullptr == ts) {
if ((tsBufCpu_ == nullptr) || ((tsOffset_ + TimerSlotSize) > TimerBufSize)) {
Memory* buf = new Memory(gpu_.dev(), TimerBufSize);
if (buf == nullptr || !buf->create(Resource::Remote)) {
return nullptr;
}
gpu_.queue(MainEngine).addMemRef(buf->iMem());
gpu_.queue(SdmaEngine).addMemRef(buf->iMem());
tsBufCpu_ = reinterpret_cast<address>(buf->map(&gpu_));
memset(tsBufCpu_, 0, TimerBufSize);
tsOffset_ = 0;
tsBuf_.push_back(buf);
}
// Allocate a TimeStamp object
ts = new TimeStamp(gpu_, tsBuf_[(tsBuf_.size() - 1)]->iMem(), tsOffset_, tsBufCpu_);
// Create a timestamp
if (ts == nullptr) {
return nullptr;
}
tsOffset_ += TimerSlotSize;
}
// Set this timestamp into DRM profile mode if it was requested
ts->clearStates();
return ts;
}
} // namespace pal