5fc5006853
SWDEV-193973 - Update perfcounter info to accomodate PAL interface changes Gfx103 added perf counters for three new blocks - GeDist, GeSe and Df 1. Update the blockIdToIndexSelect array to reflect these changes. ReviewBoardURL = http://ocltc.amd.com/reviews/r/18063/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palcounters.cpp#25 edit
798 строки
26 KiB
C++
798 строки
26 KiB
C++
//
|
|
// Copyright (c) 2015 Advanced Micro Devices, Inc. All rights reserved.
|
|
//
|
|
|
|
#include "device/pal/palcounters.hpp"
|
|
#include "device/pal/palvirtual.hpp"
|
|
#include <array>
|
|
|
|
namespace pal {
|
|
|
|
PalCounterReference* PalCounterReference::Create(VirtualGPU& gpu) {
|
|
Pal::Result result;
|
|
|
|
// Create performance experiment
|
|
Pal::PerfExperimentCreateInfo createInfo = {};
|
|
|
|
createInfo.optionFlags.sampleInternalOperations = 1;
|
|
createInfo.optionFlags.cacheFlushOnCounterCollection = 1;
|
|
createInfo.optionFlags.sqShaderMask = 1;
|
|
createInfo.optionValues.sampleInternalOperations = true;
|
|
createInfo.optionValues.cacheFlushOnCounterCollection = true;
|
|
createInfo.optionValues.sqShaderMask = Pal::PerfShaderMaskCs;
|
|
|
|
size_t palExperSize = gpu.dev().iDev()->GetPerfExperimentSize(createInfo, &result);
|
|
if (result != Pal::Result::Success) {
|
|
return nullptr;
|
|
}
|
|
|
|
PalCounterReference* memRef = new (palExperSize) PalCounterReference(gpu);
|
|
if (memRef != nullptr) {
|
|
result = gpu.dev().iDev()->CreatePerfExperiment(createInfo, &memRef[1], &memRef->perfExp_);
|
|
if (result != Pal::Result::Success) {
|
|
memRef->release();
|
|
return nullptr;
|
|
}
|
|
}
|
|
|
|
return memRef;
|
|
}
|
|
|
|
PalCounterReference::~PalCounterReference() {
|
|
// The counter object is always associated with a particular queue,
|
|
// so we have to lock just this queue
|
|
amd::ScopedLock lock(gpu_.execution());
|
|
|
|
if (layout_ != nullptr) {
|
|
delete layout_;
|
|
}
|
|
|
|
if (memory_ != nullptr) {
|
|
delete memory_;
|
|
}
|
|
|
|
if (nullptr != iPerf()) {
|
|
iPerf()->Destroy();
|
|
}
|
|
}
|
|
|
|
uint64_t PalCounterReference::result(const std::vector<int>& index) {
|
|
if (index.size() == 0) {
|
|
// These are counters that have no corresponding PalSample created
|
|
return 0;
|
|
}
|
|
|
|
if (layout_ == nullptr) {
|
|
return 0;
|
|
}
|
|
|
|
uint64_t result = 0;
|
|
for (auto const& i : index) {
|
|
assert(i <= static_cast<int>(layout_->sampleCount) && "index not in range");
|
|
const Pal::GlobalSampleLayout& sample = layout_->samples[i];
|
|
if (sample.dataType == Pal::PerfCounterDataType::Uint32) {
|
|
uint32_t beginVal =
|
|
*reinterpret_cast<uint32_t*>(reinterpret_cast<char*>(cpuAddr_) + sample.beginValueOffset);
|
|
uint32_t endVal =
|
|
*reinterpret_cast<uint32_t*>(reinterpret_cast<char*>(cpuAddr_) + sample.endValueOffset);
|
|
result += (endVal - beginVal);
|
|
} else if (sample.dataType == Pal::PerfCounterDataType::Uint64) {
|
|
uint64_t beginVal =
|
|
*reinterpret_cast<uint64_t*>(reinterpret_cast<char*>(cpuAddr_) + sample.beginValueOffset);
|
|
uint64_t endVal =
|
|
*reinterpret_cast<uint64_t*>(reinterpret_cast<char*>(cpuAddr_) + sample.endValueOffset);
|
|
result += (endVal - beginVal);
|
|
} else {
|
|
assert(0 && "dataType should be either Uint32 or Uint64");
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
bool PalCounterReference::finalize() {
|
|
Pal::Result result;
|
|
|
|
iPerf()->Finalize();
|
|
|
|
// Acquire GPU memory for the query from the pool and bind it.
|
|
Pal::GpuMemoryRequirements gpuMemReqs = {};
|
|
iPerf()->GetGpuMemoryRequirements(&gpuMemReqs);
|
|
|
|
memory_ = new Memory(gpu().dev(), amd::alignUp(gpuMemReqs.size, gpuMemReqs.alignment));
|
|
|
|
if (nullptr == memory_) {
|
|
return false;
|
|
}
|
|
|
|
if (!memory_->create(Resource::Remote)) {
|
|
return false;
|
|
}
|
|
|
|
cpuAddr_ = memory_->cpuMap(gpu_);
|
|
|
|
if (nullptr == cpuAddr_) {
|
|
return false;
|
|
}
|
|
|
|
gpu_.queue(gpu_.engineID_).addMemRef(memory_->iMem());
|
|
|
|
result = iPerf()->BindGpuMemory(memory_->iMem(), 0);
|
|
|
|
if (result == Pal::Result::Success) {
|
|
Pal::GlobalCounterLayout layout = {};
|
|
iPerf()->GetGlobalCounterLayout(&layout);
|
|
|
|
assert(layout.sampleCount == numExpCounters_);
|
|
size_t size = sizeof(Pal::GlobalCounterLayout) +
|
|
(sizeof(Pal::GlobalSampleLayout) * (layout.sampleCount - 1));
|
|
layout_ = reinterpret_cast<Pal::GlobalCounterLayout*>(new char[size]);
|
|
if (layout_ != nullptr) {
|
|
layout_->sampleCount = layout.sampleCount;
|
|
iPerf()->GetGlobalCounterLayout(layout_);
|
|
}
|
|
return true;
|
|
} else {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
#if !IS_MAINLINE && (PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 543)
|
|
static const std::array<PCIndexSelect, 49> blockIdToIndexSelect = {{
|
|
#else
|
|
static const std::array<PCIndexSelect, 46> blockIdToIndexSelect = {{
|
|
#endif
|
|
PCIndexSelect::None, // CPF
|
|
PCIndexSelect::ShaderEngine, // IA
|
|
PCIndexSelect::ShaderEngine, // VGT
|
|
PCIndexSelect::ShaderArray, // PA
|
|
PCIndexSelect::ShaderArray, // SC
|
|
PCIndexSelect::ShaderEngine, // SPI
|
|
PCIndexSelect::ShaderEngine, // SQ
|
|
PCIndexSelect::ShaderArray, // SX
|
|
PCIndexSelect::ComputeUnit, // TA
|
|
PCIndexSelect::ComputeUnit, // TD
|
|
PCIndexSelect::ComputeUnit, // TCP
|
|
PCIndexSelect::Instance, // TCC
|
|
PCIndexSelect::Instance, // TCA
|
|
PCIndexSelect::ShaderArray, // DB
|
|
PCIndexSelect::ShaderArray, // CB
|
|
PCIndexSelect::None, // GDS
|
|
PCIndexSelect::None, // SRBM
|
|
PCIndexSelect::None, // GRBM
|
|
PCIndexSelect::ShaderEngine, // GRBMSE
|
|
PCIndexSelect::None, // RLC
|
|
PCIndexSelect::Instance, // DMA
|
|
PCIndexSelect::None, // MC
|
|
PCIndexSelect::None, // CPG
|
|
PCIndexSelect::None, // CPC
|
|
PCIndexSelect::None, // WD
|
|
PCIndexSelect::None, // TCS
|
|
PCIndexSelect::None, // ATC
|
|
PCIndexSelect::None, // ATCL2
|
|
PCIndexSelect::None, // MCVML2
|
|
PCIndexSelect::Instance, // EA
|
|
PCIndexSelect::None, // RPB
|
|
PCIndexSelect::ShaderArray, // RMI
|
|
PCIndexSelect::Instance, // UMCCH
|
|
PCIndexSelect::Instance, // GE
|
|
PCIndexSelect::ShaderArray, // GL1A
|
|
PCIndexSelect::ShaderArray, // GL1C
|
|
PCIndexSelect::ShaderArray, // GL1CG
|
|
PCIndexSelect::Instance, // GL2A
|
|
PCIndexSelect::Instance, // GL2C
|
|
PCIndexSelect::None, // CHA
|
|
PCIndexSelect::Instance, // CHC
|
|
PCIndexSelect::None, // CHCG
|
|
PCIndexSelect::None, // GUS
|
|
PCIndexSelect::None, // GCR
|
|
PCIndexSelect::None, // PH
|
|
PCIndexSelect::ShaderArray, // UTCL1
|
|
#if !IS_MAINLINE && PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 543
|
|
PCIndexSelect::None, // GeDist
|
|
PCIndexSelect::ShaderEngine, // GeSe
|
|
PCIndexSelect::None, // Df
|
|
#endif
|
|
}};
|
|
|
|
static_assert(blockIdToIndexSelect.size() == static_cast<size_t>(Pal::GpuBlock::Count), "size of blockIdToIndexSelect does not match GpuBlock::Count");
|
|
|
|
// Converting from ORCA cmndefs.h to PAL palPerfExperiment.h
|
|
static const std::array<std::pair<int, int>, 83> ciBlockIdOrcaToPal = {{
|
|
{0x0E, 0}, // CB0
|
|
{0x0E, 1}, // CB1
|
|
{0x0E, 2}, // CB2
|
|
{0x0E, 3}, // CB3
|
|
{0x00, 0}, // CPF
|
|
{0x0D, 0}, // DB0
|
|
{0x0D, 1}, // DB1
|
|
{0x0D, 2}, // DB2
|
|
{0x0D, 3}, // DB3
|
|
{0x11, 0}, // GRBM
|
|
{0x12, 0}, // GRBMSE
|
|
{0x03, 0}, // PA_SU
|
|
{0x04, 0}, // PA_SC
|
|
{0x05, 0}, // SPI
|
|
{0x06, 0}, // SQ
|
|
{0x06, 0}, // SQ_ES
|
|
{0x06, 0}, // SQ_GS
|
|
{0x06, 0}, // SQ_VS
|
|
{0x06, 0}, // SQ_PS
|
|
{0x06, 0}, // SQ_LS
|
|
{0x06, 0}, // SQ_HS
|
|
{0x06, 0}, // SQ_CS
|
|
{0x07, 0}, // SX
|
|
{0x08, 0}, // TA0
|
|
{0x08, 1}, // TA1
|
|
{0x08, 2}, // TA2
|
|
{0x08, 3}, // TA3
|
|
{0x08, 4}, // TA4
|
|
{0x08, 5}, // TA5
|
|
{0x08, 6}, // TA6
|
|
{0x08, 7}, // TA7
|
|
{0x08, 8}, // TA8
|
|
{0x08, 9}, // TA9
|
|
{0x08, 0x0a}, // TA10
|
|
{0x0C, 0}, // TCA0
|
|
{0x0C, 1}, // TCA1
|
|
{0x0B, 0}, // TCC0
|
|
{0x0B, 1}, // TCC1
|
|
{0x0B, 2}, // TCC2
|
|
{0x0B, 3}, // TCC3
|
|
{0x0B, 4}, // TCC4
|
|
{0x0B, 5}, // TCC5
|
|
{0x0B, 6}, // TCC6
|
|
{0x0B, 7}, // TCC7
|
|
{0x0B, 8}, // TCC8
|
|
{0x0B, 9}, // TCC9
|
|
{0x0B, 0x0a}, // TCC10
|
|
{0x0B, 0x0b}, // TCC11
|
|
{0x0B, 0x0c}, // TCC12
|
|
{0x0B, 0x0d}, // TCC13
|
|
{0x0B, 0x0e}, // TCC14
|
|
{0x0B, 0x0f}, // TCC15
|
|
{0x09, 0}, // TD0
|
|
{0x09, 1}, // TD1
|
|
{0x09, 2}, // TD2
|
|
{0x09, 3}, // TD3
|
|
{0x09, 4}, // TD4
|
|
{0x09, 5}, // TD5
|
|
{0x09, 6}, // TD6
|
|
{0x09, 7}, // TD7
|
|
{0x09, 8}, // TD8
|
|
{0x09, 9}, // TD9
|
|
{0x09, 0x0a}, // TD10
|
|
{0x0A, 0}, // TCP0
|
|
{0x0A, 1}, // TCP1
|
|
{0x0A, 2}, // TCP2
|
|
{0x0A, 3}, // TCP3
|
|
{0x0A, 4}, // TCP4
|
|
{0x0A, 5}, // TCP5
|
|
{0x0A, 6}, // TCP6
|
|
{0x0A, 7}, // TCP7
|
|
{0x0A, 8}, // TCP8
|
|
{0x0A, 9}, // TCP9
|
|
{0x0A, 0x0a}, // TCP10
|
|
{0x0F, 0}, // GDS
|
|
{0x02, 0}, // VGT
|
|
{0x01, 0}, // IA
|
|
{0x15, 0}, // MC
|
|
{0x10, 0}, // SRBM
|
|
{0x19, 0}, // TCS
|
|
{0x18, 0}, // WD
|
|
{0x16, 0}, // CPG
|
|
{0x17, 0}, // CPC
|
|
}};
|
|
|
|
static const std::array<std::pair<int, int>, 97> viBlockIdOrcaToPal = {{
|
|
{0x0E, 0}, // CB0
|
|
{0x0E, 1}, // CB1
|
|
{0x0E, 2}, // CB2
|
|
{0x0E, 3}, // CB3
|
|
{0x00, 0}, // CPF
|
|
{0x0D, 0}, // DB0
|
|
{0x0D, 1}, // DB1
|
|
{0x0D, 2}, // DB2
|
|
{0x0D, 3}, // DB3
|
|
{0x11, 0}, // GRBM
|
|
{0x12, 0}, // GRBMSE
|
|
{0x03, 0}, // PA_SU
|
|
{0x04, 0}, // PA_SC
|
|
{0x05, 0}, // SPI
|
|
{0x06, 0}, // SQ
|
|
{0x06, 0}, // SQ_ES
|
|
{0x06, 0}, // SQ_GS
|
|
{0x06, 0}, // SQ_VS
|
|
{0x06, 0}, // SQ_PS
|
|
{0x06, 0}, // SQ_LS
|
|
{0x06, 0}, // SQ_HS
|
|
{0x06, 0}, // SQ_CS
|
|
{0x07, 0}, // SX
|
|
{0x08, 0}, // TA0
|
|
{0x08, 1}, // TA1
|
|
{0x08, 2}, // TA2
|
|
{0x08, 3}, // TA3
|
|
{0x08, 4}, // TA4
|
|
{0x08, 5}, // TA5
|
|
{0x08, 6}, // TA6
|
|
{0x08, 7}, // TA7
|
|
{0x08, 8}, // TA8
|
|
{0x08, 9}, // TA9
|
|
{0x08, 0x0a}, // TA10
|
|
{0x08, 0x0b}, // TA11
|
|
{0x08, 0x0c}, // TA12
|
|
{0x08, 0x0d}, // TA13
|
|
{0x08, 0x0e}, // TA14
|
|
{0x08, 0x0f}, // TA15
|
|
{0x0C, 0}, // TCA0
|
|
{0x0C, 1}, // TCA1
|
|
{0x0B, 0}, // TCC0
|
|
{0x0B, 1}, // TCC1
|
|
{0x0B, 2}, // TCC2
|
|
{0x0B, 3}, // TCC3
|
|
{0x0B, 4}, // TCC4
|
|
{0x0B, 5}, // TCC5
|
|
{0x0B, 6}, // TCC6
|
|
{0x0B, 7}, // TCC7
|
|
{0x0B, 8}, // TCC8
|
|
{0x0B, 9}, // TCC9
|
|
{0x0B, 0x0a}, // TCC10
|
|
{0x0B, 0x0b}, // TCC11
|
|
{0x0B, 0x0c}, // TCC12
|
|
{0x0B, 0x0d}, // TCC13
|
|
{0x0B, 0x0e}, // TCC14
|
|
{0x0B, 0x0f}, // TCC15
|
|
{0x09, 0}, // TD0
|
|
{0x09, 1}, // TD1
|
|
{0x09, 2}, // TD2
|
|
{0x09, 3}, // TD3
|
|
{0x09, 4}, // TD4
|
|
{0x09, 5}, // TD5
|
|
{0x09, 6}, // TD6
|
|
{0x09, 7}, // TD7
|
|
{0x09, 8}, // TD8
|
|
{0x09, 9}, // TD9
|
|
{0x09, 0x0a}, // TD10
|
|
{0x09, 0x0b}, // TD11
|
|
{0x09, 0x0c}, // TD12
|
|
{0x09, 0x0d}, // TD13
|
|
{0x09, 0x0e}, // TD14
|
|
{0x09, 0x0f}, // TD15
|
|
{0x0A, 0}, // TCP0
|
|
{0x0A, 1}, // TCP1
|
|
{0x0A, 2}, // TCP2
|
|
{0x0A, 3}, // TCP3
|
|
{0x0A, 4}, // TCP4
|
|
{0x0A, 5}, // TCP5
|
|
{0x0A, 6}, // TCP6
|
|
{0x0A, 7}, // TCP7
|
|
{0x0A, 8}, // TCP8
|
|
{0x0A, 9}, // TCP9
|
|
{0x0A, 0x0a}, // TCP10
|
|
{0x0A, 0x0b}, // TCP11
|
|
{0x0A, 0x0c}, // TCP12
|
|
{0x0A, 0x0d}, // TCP13
|
|
{0x0A, 0x0e}, // TCP14
|
|
{0x0A, 0x0f}, // TCP15
|
|
{0x0F, 0}, // GDS
|
|
{0x02, 0}, // VGT
|
|
{0x01, 0}, // IA
|
|
{0x15, 0}, // MC
|
|
{0x10, 0}, // SRBM
|
|
{0x18, 0}, // WD
|
|
{0x16, 0}, // CPG
|
|
{0x17, 0}, // CPC
|
|
}};
|
|
|
|
// The number of counters per block has been increased for gfx9 but this table may not reflect all
|
|
// of them
|
|
// as compute may not use all of them.
|
|
static const std::array<std::pair<int, int>, 123> gfx9BlockIdPal = {{
|
|
{0x0E, 0}, // CB0 - 0
|
|
{0x0E, 1}, // CB1 - 1
|
|
{0x0E, 2}, // CB2 - 2
|
|
{0x0E, 3}, // CB3 - 3
|
|
{0x00, 0}, // CPF - 4
|
|
{0x0D, 0}, // DB0 - 5
|
|
{0x0D, 1}, // DB1 - 6
|
|
{0x0D, 2}, // DB2 - 7
|
|
{0x0D, 3}, // DB3 - 8
|
|
{0x11, 0}, // GRBM - 9
|
|
{0x12, 0}, // GRBMSE - 10
|
|
{0x03, 0}, // PA_SU - 11
|
|
{0x04, 0}, // PA_SC - 12
|
|
{0x05, 0}, // SPI - 13
|
|
{0x06, 0}, // SQ - 14
|
|
{0x06, 0}, // SQ_ES - 15
|
|
{0x06, 0}, // SQ_GS - 16
|
|
{0x06, 0}, // SQ_VS - 17
|
|
{0x06, 0}, // SQ_PS - 18
|
|
{0x06, 0}, // SQ_LS - 19
|
|
{0x06, 0}, // SQ_HS - 20
|
|
{0x06, 0}, // SQ_CS - 21
|
|
{0x07, 0}, // SX - 22
|
|
{0x08, 0}, // TA0 - 23
|
|
{0x08, 1}, // TA1 - 24
|
|
{0x08, 2}, // TA2 - 25
|
|
{0x08, 3}, // TA3 - 26
|
|
{0x08, 4}, // TA4 - 27
|
|
{0x08, 5}, // TA5 - 28
|
|
{0x08, 6}, // TA6 - 29
|
|
{0x08, 7}, // TA7 - 30
|
|
{0x08, 8}, // TA8 - 31
|
|
{0x08, 9}, // TA9 - 32
|
|
{0x08, 0x0a}, // TA10 - 33
|
|
{0x08, 0x0b}, // TA11 - 34
|
|
{0x08, 0x0c}, // TA12 - 35
|
|
{0x08, 0x0d}, // TA13 - 36
|
|
{0x08, 0x0e}, // TA14 - 37
|
|
{0x08, 0x0f}, // TA15 - 38
|
|
{0x0C, 0}, // TCA0 - 39
|
|
{0x0C, 1}, // TCA1 - 40
|
|
{0x0B, 0}, // TCC0 - 41
|
|
{0x0B, 1}, // TCC1 - 42
|
|
{0x0B, 2}, // TCC2 - 43
|
|
{0x0B, 3}, // TCC3 - 44
|
|
{0x0B, 4}, // TCC4 - 45
|
|
{0x0B, 5}, // TCC5 - 46
|
|
{0x0B, 6}, // TCC6 - 47
|
|
{0x0B, 7}, // TCC7 - 48
|
|
{0x0B, 8}, // TCC8 - 49
|
|
{0x0B, 9}, // TCC9 - 50
|
|
{0x0B, 0x0a}, // TCC10 - 51
|
|
{0x0B, 0x0b}, // TCC11 - 52
|
|
{0x0B, 0x0c}, // TCC12 - 53
|
|
{0x0B, 0x0d}, // TCC13 - 54
|
|
{0x0B, 0x0e}, // TCC14 - 55
|
|
{0x0B, 0x0f}, // TCC15 - 56
|
|
{0x09, 0}, // TD0 - 57
|
|
{0x09, 1}, // TD1 - 58
|
|
{0x09, 2}, // TD2 - 59
|
|
{0x09, 3}, // TD3 - 60
|
|
{0x09, 4}, // TD4 - 61
|
|
{0x09, 5}, // TD5 - 62
|
|
{0x09, 6}, // TD6 - 63
|
|
{0x09, 7}, // TD7 - 64
|
|
{0x09, 8}, // TD8 - 65
|
|
{0x09, 9}, // TD9 - 66
|
|
{0x09, 0x0a}, // TD10 - 67
|
|
{0x09, 0x0b}, // TD11 - 68
|
|
{0x09, 0x0c}, // TD12 - 69
|
|
{0x09, 0x0d}, // TD13 - 70
|
|
{0x09, 0x0e}, // TD14 - 71
|
|
{0x09, 0x0f}, // TD15 - 72
|
|
{0x0A, 0}, // TCP0 - 73
|
|
{0x0A, 1}, // TCP1 - 74
|
|
{0x0A, 2}, // TCP2 - 75
|
|
{0x0A, 3}, // TCP3 - 76
|
|
{0x0A, 4}, // TCP4 - 77
|
|
{0x0A, 5}, // TCP5 - 78
|
|
{0x0A, 6}, // TCP6 - 79
|
|
{0x0A, 7}, // TCP7 - 80
|
|
{0x0A, 8}, // TCP8 - 81
|
|
{0x0A, 9}, // TCP9 - 82
|
|
{0x0A, 0x0a}, // TCP10 - 83
|
|
{0x0A, 0x0b}, // TCP11 - 84
|
|
{0x0A, 0x0c}, // TCP12 - 85
|
|
{0x0A, 0x0d}, // TCP13 - 86
|
|
{0x0A, 0x0e}, // TCP14 - 87
|
|
{0x0A, 0x0f}, // TCP15 - 88
|
|
{0x0F, 0}, // GDS - 89
|
|
{0x02, 0}, // VGT - 90
|
|
{0x01, 0}, // IA - 91
|
|
{0x18, 0}, // WD - 92
|
|
{0x16, 0}, // CPG - 93
|
|
{0x17, 0}, // CPC - 94
|
|
{0x1A, 0}, // ATC - 95
|
|
{0x1B, 0}, // ATCL2 - 96
|
|
{0x1C, 0}, // MCVML2 - 97
|
|
{0x1D, 0}, // EA0 - 98
|
|
{0x1D, 1}, // EA1 - 99
|
|
{0x1D, 2}, // EA2 - 100
|
|
{0x1D, 3}, // EA3 - 101
|
|
{0x1D, 4}, // EA4 - 102
|
|
{0x1D, 5}, // EA5 - 103
|
|
{0x1D, 6}, // EA6 - 104
|
|
{0x1D, 7}, // EA7 - 105
|
|
{0x1D, 8}, // EA8 - 106
|
|
{0x1D, 9}, // EA9 - 107
|
|
{0x1D, 0x0a}, // EA10 - 108
|
|
{0x1D, 0x0b}, // EA11 - 109
|
|
{0x1D, 0x0c}, // EA12 - 110
|
|
{0x1D, 0x0d}, // EA13 - 111
|
|
{0x1D, 0x0e}, // EA14 - 112
|
|
{0x1D, 0x0f}, // EA15 - 113
|
|
{0x1E, 0}, // RPB - 114
|
|
{0x1F, 0}, // RMI0 - 115
|
|
{0x1F, 1}, // RMI1 - 116
|
|
{0x1F, 2}, // RMI2 - 117
|
|
{0x1F, 3}, // RMI3 - 118
|
|
{0x1F, 4}, // RMI4 - 119
|
|
{0x1F, 5}, // RMI5 - 120
|
|
{0x1F, 6}, // RMI6 - 121
|
|
{0x1F, 7}, // RMI7 - 122
|
|
}};
|
|
|
|
static const std::array<std::pair<int, int>, 139> gfx10BlockIdPal = {{
|
|
{0x0E, 0}, // CB0 - 0
|
|
{0x0E, 1}, // CB1 - 1
|
|
{0x0E, 2}, // CB2 - 2
|
|
{0x0E, 3}, // CB3 - 3
|
|
{0x00, 0}, // CPF - 4
|
|
{0x0D, 0}, // DB0 - 5
|
|
{0x0D, 1}, // DB1 - 6
|
|
{0x0D, 2}, // DB2 - 7
|
|
{0x0D, 3}, // DB3 - 8
|
|
{0x11, 0}, // GRBM - 9
|
|
{0x12, 0}, // GRBMSE - 10
|
|
{0x03, 0}, // PA_SU - 11
|
|
{0x04, 0}, // PA_SC0 - 12
|
|
{0x04, 1}, // PA_SC1 - 13
|
|
{0x05, 0}, // SPI - 14
|
|
{0x06, 0}, // SQ - 15
|
|
{0x06, 0}, // SQ_ES - 16
|
|
{0x06, 0}, // SQ_GS - 17
|
|
{0x06, 0}, // SQ_VS - 18
|
|
{0x06, 0}, // SQ_PS - 19
|
|
{0x06, 0}, // SQ_LS - 20
|
|
{0x06, 0}, // SQ_HS - 21
|
|
{0x06, 0}, // SQ_CS - 22
|
|
{0x07, 0}, // SX - 23
|
|
{0x08, 0}, // TA0 - 24
|
|
{0x08, 1}, // TA1 - 25
|
|
{0x08, 2}, // TA2 - 26
|
|
{0x08, 3}, // TA3 - 27
|
|
{0x08, 4}, // TA4 - 28
|
|
{0x08, 5}, // TA5 - 29
|
|
{0x08, 6}, // TA6 - 30
|
|
{0x08, 7}, // TA7 - 31
|
|
{0x08, 8}, // TA8 - 32
|
|
{0x08, 9}, // TA9 - 33
|
|
{0x08, 0x0a}, // TA10 - 34
|
|
{0x08, 0x0b}, // TA11 - 35
|
|
{0x08, 0x0c}, // TA12 - 36
|
|
{0x08, 0x0d}, // TA13 - 37
|
|
{0x08, 0x0e}, // TA14 - 38
|
|
{0x08, 0x0f}, // TA15 - 39
|
|
{0x09, 0}, // TD0 - 40
|
|
{0x09, 1}, // TD1 - 41
|
|
{0x09, 2}, // TD2 - 42
|
|
{0x09, 3}, // TD3 - 43
|
|
{0x09, 4}, // TD4 - 44
|
|
{0x09, 5}, // TD5 - 45
|
|
{0x09, 6}, // TD6 - 46
|
|
{0x09, 7}, // TD7 - 47
|
|
{0x09, 8}, // TD8 - 48
|
|
{0x09, 9}, // TD9 - 49
|
|
{0x09, 0x0a}, // TD10 - 50
|
|
{0x09, 0x0b}, // TD11 - 51
|
|
{0x09, 0x0c}, // TD12 - 52
|
|
{0x09, 0x0d}, // TD13 - 53
|
|
{0x09, 0x0e}, // TD14 - 54
|
|
{0x09, 0x0f}, // TD15 - 55
|
|
{0x0A, 0}, // TCP0 - 56
|
|
{0x0A, 1}, // TCP1 - 57
|
|
{0x0A, 2}, // TCP2 - 58
|
|
{0x0A, 3}, // TCP3 - 59
|
|
{0x0A, 4}, // TCP4 - 60
|
|
{0x0A, 5}, // TCP5 - 61
|
|
{0x0A, 6}, // TCP6 - 62
|
|
{0x0A, 7}, // TCP7 - 63
|
|
{0x0A, 8}, // TCP8 - 64
|
|
{0x0A, 9}, // TCP9 - 65
|
|
{0x0A, 0x0a}, // TCP10 - 66
|
|
{0x0A, 0x0b}, // TCP11 - 67
|
|
{0x0A, 0x0c}, // TCP12 - 68
|
|
{0x0A, 0x0d}, // TCP13 - 69
|
|
{0x0A, 0x0e}, // TCP14 - 70
|
|
{0x0A, 0x0f}, // TCP15 - 71
|
|
{0x0F, 0}, // GDS - 72
|
|
{0x16, 0}, // CPG - 73
|
|
{0x17, 0}, // CPC - 74
|
|
{0x1A, 0}, // ATC - 75
|
|
{0x1B, 0}, // ATCL2 - 76
|
|
{0x1C, 0}, // MCVML2 - 77
|
|
{0x1D, 0}, // EA0 - 78
|
|
{0x1D, 1}, // EA1 - 79
|
|
{0x1D, 2}, // EA2 - 80
|
|
{0x1D, 3}, // EA3 - 81
|
|
{0x1D, 4}, // EA4 - 82
|
|
{0x1D, 5}, // EA5 - 83
|
|
{0x1D, 6}, // EA6 - 84
|
|
{0x1D, 7}, // EA7 - 85
|
|
{0x1D, 8}, // EA8 - 86
|
|
{0x1D, 9}, // EA9 - 87
|
|
{0x1D, 0x0a}, // EA10 - 88
|
|
{0x1D, 0x0b}, // EA11 - 89
|
|
{0x1D, 0x0c}, // EA12 - 90
|
|
{0x1D, 0x0d}, // EA13 - 91
|
|
{0x1D, 0x0e}, // EA14 - 92
|
|
{0x1D, 0x0f}, // EA15 - 93
|
|
{0x1E, 0}, // RPB - 94
|
|
{0x1F, 0}, // RMI0 - 95
|
|
{0x1F, 1}, // RMI1 - 96
|
|
{0x21, 0}, // GE - 97
|
|
{0x22, 0}, // GL1A - 98
|
|
{0x23, 0}, // GL1C - 99
|
|
{0x24, 0}, // GL1CG0 - 100
|
|
{0x24, 1}, // GL1CG1 - 101
|
|
{0x24, 2}, // GL1CG2 - 102
|
|
{0x24, 3}, // GL1CG3 - 103
|
|
{0x25, 0}, // GL2A0 - 104
|
|
{0x25, 1}, // GL2A1 - 105
|
|
{0x25, 2}, // GL2A2 - 106
|
|
{0x25, 3}, // GL2A3 - 107
|
|
{0x26, 0}, // GL2C0 - 108
|
|
{0x26, 1}, // GL2C1 - 109
|
|
{0x26, 2}, // GL2C2 - 110
|
|
{0x26, 3}, // GL2C3 - 111
|
|
{0x26, 4}, // GL2C4 - 112
|
|
{0x26, 5}, // GL2C5 - 113
|
|
{0x26, 6}, // GL2C6 - 114
|
|
{0x26, 7}, // GL2C7 - 115
|
|
{0x26, 8}, // GL2C8 - 116
|
|
{0x26, 9}, // GL2C9 - 117
|
|
{0x26, 0x0a}, // GL2C10 - 118
|
|
{0x26, 0x0b}, // GL2C11 - 119
|
|
{0x26, 0x0c}, // GL2C12 - 120
|
|
{0x26, 0x0d}, // GL2C13 - 121
|
|
{0x26, 0x0e}, // GL2C14 - 122
|
|
{0x26, 0x0f}, // GL2C15 - 123
|
|
{0x26, 0x10}, // GL2C16 - 124
|
|
{0x26, 0x11}, // GL2C17 - 125
|
|
{0x26, 0x12}, // GL2C18 - 126
|
|
{0x26, 0x13}, // GL2C19 - 127
|
|
{0x26, 0x14}, // GL2C20 - 128
|
|
{0x26, 0x15}, // GL2C21 - 129
|
|
{0x26, 0x16}, // GL2C22 - 130
|
|
{0x26, 0x17}, // GL2C23 - 131
|
|
{0x27, 0}, // CHA - 132
|
|
{0x28, 0}, // CHC - 133
|
|
{0x29, 0}, // CHCG - 134
|
|
{0x2A, 0}, // GUS - 135
|
|
{0x2B, 0}, // GCR - 136
|
|
{0x2C, 0}, // PH - 137
|
|
{0x2C, 0}, // UTCL1 - 138
|
|
}};
|
|
|
|
void PerfCounter::convertInfo() {
|
|
switch (dev().ipLevel()) {
|
|
case Pal::GfxIpLevel::GfxIp7:
|
|
if (info_.blockIndex_ < ciBlockIdOrcaToPal.size()) {
|
|
auto p = ciBlockIdOrcaToPal[info_.blockIndex_];
|
|
info_.blockIndex_ = std::get<0>(p);
|
|
info_.counterIndex_ = std::get<1>(p);
|
|
}
|
|
break;
|
|
case Pal::GfxIpLevel::GfxIp8:
|
|
if (info_.blockIndex_ < viBlockIdOrcaToPal.size()) {
|
|
auto p = viBlockIdOrcaToPal[info_.blockIndex_];
|
|
info_.blockIndex_ = std::get<0>(p);
|
|
info_.counterIndex_ = std::get<1>(p);
|
|
}
|
|
break;
|
|
case Pal::GfxIpLevel::GfxIp9:
|
|
if (info_.blockIndex_ < gfx9BlockIdPal.size()) {
|
|
auto p = gfx9BlockIdPal[info_.blockIndex_];
|
|
info_.blockIndex_ = std::get<0>(p);
|
|
info_.counterIndex_ = std::get<1>(p);
|
|
}
|
|
break;
|
|
case Pal::GfxIpLevel::GfxIp10_1:
|
|
if (info_.blockIndex_ < gfx10BlockIdPal.size()) {
|
|
auto p = gfx10BlockIdPal[info_.blockIndex_];
|
|
info_.blockIndex_ = std::get<0>(p);
|
|
info_.counterIndex_ = std::get<1>(p);
|
|
}
|
|
break;
|
|
default:
|
|
Unimplemented();
|
|
break;
|
|
}
|
|
|
|
assert(info_.blockIndex_ < blockIdToIndexSelect.size());
|
|
info_.indexSelect_ = blockIdToIndexSelect.at(info_.blockIndex_);
|
|
}
|
|
|
|
PerfCounter::~PerfCounter() {
|
|
if (palRef_ == nullptr) {
|
|
return;
|
|
}
|
|
|
|
// Release the counter reference object
|
|
palRef_->release();
|
|
}
|
|
|
|
bool PerfCounter::create() {
|
|
palRef_->retain();
|
|
|
|
// Initialize the counter
|
|
Pal::PerfCounterInfo counterInfo = {};
|
|
counterInfo.counterType = Pal::PerfCounterType::Global;
|
|
counterInfo.block = static_cast<Pal::GpuBlock>(info_.blockIndex_);
|
|
counterInfo.eventId = info_.eventIndex_;
|
|
|
|
Pal::PerfExperimentProperties perfExpProps;
|
|
Pal::Result result;
|
|
result = dev().iDev()->GetPerfExperimentProperties(&perfExpProps);
|
|
if (result != Pal::Result::Success) {
|
|
return false;
|
|
}
|
|
|
|
const auto& blockProps = perfExpProps.blocks[static_cast<uint32_t>(counterInfo.block)];
|
|
uint32_t counter_start, counter_step;
|
|
|
|
switch (info_.indexSelect_) {
|
|
case PCIndexSelect::ShaderEngine:
|
|
case PCIndexSelect::None:
|
|
counter_start = 0;
|
|
counter_step = 1;
|
|
break;
|
|
|
|
case PCIndexSelect::ShaderArray:
|
|
if (info_.counterIndex_ >=
|
|
(dev().properties().gfxipProperties.shaderCore.numShaderArrays * dev().properties().gfxipProperties.shaderCore.numShaderEngines)) {
|
|
return true;
|
|
}
|
|
counter_start = info_.counterIndex_;
|
|
counter_step = dev().properties().gfxipProperties.shaderCore.numShaderArrays * dev().properties().gfxipProperties.shaderCore.numShaderEngines;
|
|
break;
|
|
|
|
case PCIndexSelect::ComputeUnit:
|
|
if (info_.counterIndex_ >=
|
|
dev().properties().gfxipProperties.shaderCore.maxCusPerShaderArray) {
|
|
return true;
|
|
}
|
|
counter_start = info_.counterIndex_;
|
|
counter_step = dev().properties().gfxipProperties.shaderCore.maxCusPerShaderArray;
|
|
break;
|
|
|
|
case PCIndexSelect::Instance:
|
|
counter_start = info_.counterIndex_;
|
|
counter_step = blockProps.instanceCount;
|
|
break;
|
|
|
|
default:
|
|
assert(0 && "Unknown indexSelect_");
|
|
return true;
|
|
}
|
|
|
|
for (uint32_t i = counter_start; i < blockProps.instanceCount; i += counter_step) {
|
|
counterInfo.instance = i;
|
|
result = iPerf()->AddCounter(counterInfo);
|
|
if (result == Pal::Result::Success) {
|
|
index_.push_back(palRef_->getPalCounterIndex());
|
|
} else {
|
|
// Get here when there's no HW PerfCounter matching the counterInfo
|
|
assert(0 && "AddCounter() failed");
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
uint64_t PerfCounter::getInfo(uint64_t infoType) const {
|
|
switch (infoType) {
|
|
case CL_PERFCOUNTER_GPU_BLOCK_INDEX: {
|
|
// Return the GPU block index
|
|
return info()->blockIndex_;
|
|
}
|
|
case CL_PERFCOUNTER_GPU_COUNTER_INDEX: {
|
|
// Return the GPU counter index
|
|
return info()->counterIndex_;
|
|
}
|
|
case CL_PERFCOUNTER_GPU_EVENT_INDEX: {
|
|
// Return the GPU event index
|
|
return info()->eventIndex_;
|
|
}
|
|
case CL_PERFCOUNTER_DATA: {
|
|
return palRef_->result(index_);
|
|
}
|
|
default:
|
|
LogError("Wrong PerfCounter::getInfo parameter");
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
} // namespace pal
|