Files
rocm-systems/rocclr/runtime/device/pal/palcounters.cpp
T
foreman d8e5ee9889 P4 to Git Change 1290360 by jatang@jatang-opencl-hsa-stg2 on 2016/07/12 10:20:47
SWDEV-95919 - OCL PerfCounter on PAL.

Affected files ...

... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palcounters.cpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palcounters.hpp#5 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#17 edit
2016-07-12 10:29:41 -04:00

189 строки
5.2 KiB
C++

//
// Copyright (c) 2015 Advanced Micro Devices, Inc. All rights reserved.
//
#include "device/pal/palcounters.hpp"
#include "device/pal/palvirtual.hpp"
namespace pal {
PalCounterReference*
PalCounterReference::Create(VirtualGPU& gpu)
{
Pal::Result result;
// Create performance experiment
Pal::PerfExperimentCreateInfo createInfo = {};
createInfo.optionFlags.sampleInternalOperations = 1;
createInfo.optionFlags.cacheFlushOnCounterCollection = 1;
createInfo.optionFlags.sqShaderMask = 1;
createInfo.optionValues.sampleInternalOperations = true;
createInfo.optionValues.cacheFlushOnCounterCollection = true;
createInfo.optionValues.sqShaderMask = Pal::PerfShaderMaskCs;
size_t palExperSize = gpu.dev().iDev()->GetPerfExperimentSize(
createInfo, &result);
if (result != Pal::Result::Success) {
return nullptr;
}
PalCounterReference* memRef = new (palExperSize) PalCounterReference(gpu);
if (memRef != nullptr) {
result = gpu.dev().iDev()->CreatePerfExperiment(createInfo,
&memRef[1], &memRef->perfExp_);
if (result != Pal::Result::Success) {
memRef->release();
return nullptr;
}
}
return memRef;
}
PalCounterReference::~PalCounterReference()
{
// The counter object is always associated with a particular queue,
// so we have to lock just this queue
amd::ScopedLock lock(gpu_.execution());
if (layout_ != nullptr) {
delete layout_;
}
if (memory_ != nullptr) {
delete memory_;
}
if (nullptr != iPerf()) {
iPerf()->Destroy();
}
}
uint64_t PalCounterReference::result(uint index)
{
if (layout_ != nullptr) {
assert(index <= layout_->sampleCount && "index not in range");
const Pal::GlobalSampleLayout& sample = layout_->samples[index];
if (sample.dataType == Pal::PerfCounterDataType::Uint32) {
uint32_t beginVal = *reinterpret_cast<uint32_t*>(reinterpret_cast<char*>(cpuAddr_) + sample.beginValueOffset);
uint32_t endVal = *reinterpret_cast<uint32_t*>(reinterpret_cast<char*>(cpuAddr_) + sample.endValueOffset);
return (endVal - beginVal);
}
else if (sample.dataType == Pal::PerfCounterDataType::Uint64) {
uint64_t beginVal = *reinterpret_cast<uint64_t*>(reinterpret_cast<char*>(cpuAddr_) + sample.beginValueOffset);
uint64_t endVal = *reinterpret_cast<uint64_t*>(reinterpret_cast<char*>(cpuAddr_) + sample.endValueOffset);
return (endVal - beginVal);
}
else {
assert(0 && "dataType should be either Uint32 or Uint64");
return 0;
}
}
return 0;
}
bool PalCounterReference::finalize()
{
Pal::Result result;
iPerf()->Finalize();
// Acquire GPU memory for the query from the pool and bind it.
Pal::GpuMemoryRequirements gpuMemReqs = {};
iPerf()->GetGpuMemoryRequirements(&gpuMemReqs);
memory_ = new Memory(gpu().dev(), amd::alignUp(gpuMemReqs.size, gpuMemReqs.alignment));
if (nullptr == memory_) {
return false;
}
if (!memory_->create(Resource::Remote)) {
return false;
}
cpuAddr_ = memory_->cpuMap(gpu_);
if (nullptr == cpuAddr_) {
return false;
}
gpu_.queue(gpu_.engineID_).addMemRef(memory_->iMem());
result = iPerf()->BindGpuMemory(memory_->iMem(), 0);
if (result == Pal::Result::Success) {
Pal::GlobalCounterLayout layout = {};
iPerf()->GetGlobalCounterLayout(&layout);
size_t size = sizeof(Pal::GlobalCounterLayout) + (sizeof(Pal::GlobalSampleLayout) * (layout.sampleCount - 1));
layout_ = reinterpret_cast<Pal::GlobalCounterLayout*>(new char[size]);
if (layout_ != nullptr) {
layout_->sampleCount = layout.sampleCount;
iPerf()->GetGlobalCounterLayout(layout_);
}
return true;
}
else {
return false;
}
}
PerfCounter::~PerfCounter()
{
if (palRef_ == nullptr) {
return;
}
// Release the counter reference object
palRef_->release();
}
bool
PerfCounter::create()
{
index_ = palRef_->retain() - 2;
// Initialize the counter
Pal::PerfCounterInfo counterInfo = {};
counterInfo.counterType = Pal::PerfCounterType::Global;
counterInfo.block = static_cast<Pal::GpuBlock>(info_.blockIndex_);
counterInfo.instance = info_.counterIndex_;
counterInfo.eventId = info_.eventIndex_;
Pal::Result result = iPerf()->AddCounter(counterInfo);
if (result != Pal::Result::Success) {
return false;
}
return true;
}
uint64_t
PerfCounter::getInfo(uint64_t infoType) const
{
switch (infoType) {
case CL_PERFCOUNTER_GPU_BLOCK_INDEX: {
// Return the GPU block index
return info()->blockIndex_;
}
case CL_PERFCOUNTER_GPU_COUNTER_INDEX: {
// Return the GPU counter index
return info()->counterIndex_;
}
case CL_PERFCOUNTER_GPU_EVENT_INDEX: {
// Return the GPU event index
return info()->eventIndex_;
}
case CL_PERFCOUNTER_DATA: {
return palRef_->result(index_);
}
default:
LogError("Wrong PerfCounter::getInfo parameter");
}
return 0;
}
} // namespace pal