/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "device/pal/palcounters.hpp" #include "device/pal/palvirtual.hpp" #include namespace amd::pal { static constexpr std::array blockIdToIndexSelect = {{ PCIndexSelect::None, // CPF PCIndexSelect::ShaderEngine, // IA PCIndexSelect::ShaderEngine, // VGT PCIndexSelect::ShaderArray, // PA PCIndexSelect::ShaderArray, // SC PCIndexSelect::ShaderEngine, // SPI PCIndexSelect::ShaderEngine, // SQ PCIndexSelect::ShaderArray, // SX PCIndexSelect::ComputeUnit, // TA PCIndexSelect::ComputeUnit, // TD PCIndexSelect::ComputeUnit, // TCP PCIndexSelect::Instance, // TCC PCIndexSelect::Instance, // TCA PCIndexSelect::ShaderArray, // DB PCIndexSelect::ShaderArray, // CB PCIndexSelect::None, // GDS PCIndexSelect::None, // SRBM PCIndexSelect::None, // GRBM PCIndexSelect::ShaderEngine, // GRBMSE PCIndexSelect::None, // RLC PCIndexSelect::Instance, // DMA PCIndexSelect::None, // MC PCIndexSelect::None, // CPG PCIndexSelect::None, // CPC PCIndexSelect::None, // WD PCIndexSelect::None, // TCS PCIndexSelect::None, // ATC PCIndexSelect::None, // ATCL2 PCIndexSelect::None, // MCVML2 PCIndexSelect::Instance, // EA PCIndexSelect::None, // RPB PCIndexSelect::ShaderArray, // RMI PCIndexSelect::Instance, // UMCCH PCIndexSelect::Instance, // GE PCIndexSelect::ShaderArray, // GL1A PCIndexSelect::ShaderArray, // GL1C PCIndexSelect::ShaderArray, // GL1CG PCIndexSelect::Instance, // GL2A PCIndexSelect::Instance, // GL2C PCIndexSelect::None, // CHA PCIndexSelect::Instance, // CHC PCIndexSelect::None, // CHCG PCIndexSelect::None, // GUS PCIndexSelect::None, // GCR PCIndexSelect::None, // PH PCIndexSelect::ShaderArray, // UTCL1 PCIndexSelect::None, // GeDist PCIndexSelect::ShaderEngine, // GeSe PCIndexSelect::None, // Df PCIndexSelect::ComputeUnit, // SqWgp }}; PalCounterReference* PalCounterReference::Create(VirtualGPU& gpu) { Pal::Result result; if (blockIdToIndexSelect.size() != static_cast(Pal::GpuBlock::Count)) { LogError("Size of blockIdToIndexSelect does not match GpuBlock::Count"); } // Create performance experiment Pal::PerfExperimentCreateInfo createInfo = {}; createInfo.optionFlags.sampleInternalOperations = 1; createInfo.optionFlags.cacheFlushOnCounterCollection = 1; createInfo.optionFlags.sqShaderMask = 1; createInfo.optionValues.sampleInternalOperations = true; createInfo.optionValues.cacheFlushOnCounterCollection = true; createInfo.optionValues.sqShaderMask = Pal::PerfShaderMaskCs; size_t palExperSize = gpu.dev().iDev()->GetPerfExperimentSize(createInfo, &result); if (result != Pal::Result::Success) { return nullptr; } PalCounterReference* memRef = new (palExperSize) PalCounterReference(gpu); if (memRef != nullptr) { result = gpu.dev().iDev()->CreatePerfExperiment(createInfo, &memRef[1], &memRef->perfExp_); if (result != Pal::Result::Success) { memRef->release(); return nullptr; } } return memRef; } PalCounterReference::~PalCounterReference() { // The counter object is always associated with a particular queue, // so we have to lock just this queue amd::ScopedLock lock(gpu_.execution()); delete layout_; delete memory_; if (nullptr != iPerf()) { iPerf()->Destroy(); } } uint64_t PalCounterReference::result(const std::vector& index) { if (index.size() == 0) { // These are counters that have no corresponding PalSample created return 0; } if (layout_ == nullptr) { return 0; } uint64_t result = 0; for (auto const& i : index) { assert(i <= static_cast(layout_->sampleCount) && "index not in range"); const Pal::GlobalSampleLayout& sample = layout_->samples[i]; if (sample.dataType == Pal::PerfCounterDataType::Uint32) { uint32_t beginVal = *reinterpret_cast(reinterpret_cast(cpuAddr_) + sample.beginValueOffset); uint32_t endVal = *reinterpret_cast(reinterpret_cast(cpuAddr_) + sample.endValueOffset); result += (endVal - beginVal); } else if (sample.dataType == Pal::PerfCounterDataType::Uint64) { uint64_t beginVal = *reinterpret_cast(reinterpret_cast(cpuAddr_) + sample.beginValueOffset); uint64_t endVal = *reinterpret_cast(reinterpret_cast(cpuAddr_) + sample.endValueOffset); result += (endVal - beginVal); } else { assert(0 && "dataType should be either Uint32 or Uint64"); return 0; } } return result; } bool PalCounterReference::finalize() { Pal::Result result; iPerf()->Finalize(); // Acquire GPU memory for the query from the pool and bind it. Pal::GpuMemoryRequirements gpuMemReqs = {}; iPerf()->GetGpuMemoryRequirements(&gpuMemReqs); memory_ = new Memory(gpu().dev(), amd::alignUp(gpuMemReqs.size, gpuMemReqs.alignment)); if (nullptr == memory_) { return false; } if (!memory_->create(Resource::Remote)) { return false; } cpuAddr_ = memory_->cpuMap(gpu_); if (nullptr == cpuAddr_) { return false; } gpu_.queue(gpu_.engineID_).addMemRef(memory_->iMem()); result = iPerf()->BindGpuMemory(memory_->iMem(), 0); if (result == Pal::Result::Success) { Pal::GlobalCounterLayout layout = {}; iPerf()->GetGlobalCounterLayout(&layout); assert(layout.sampleCount == numExpCounters_); size_t size = sizeof(Pal::GlobalCounterLayout) + (sizeof(Pal::GlobalSampleLayout) * (layout.sampleCount - 1)); layout_ = reinterpret_cast(new char[size]); if (layout_ != nullptr) { layout_->sampleCount = layout.sampleCount; iPerf()->GetGlobalCounterLayout(layout_); } return true; } else { return false; } } // Converting from ORCA cmndefs.h to PAL palPerfExperiment.h static constexpr std::array, 83> ciBlockIdOrcaToPal = {{ {0x0E, 0}, // CB0 {0x0E, 1}, // CB1 {0x0E, 2}, // CB2 {0x0E, 3}, // CB3 {0x00, 0}, // CPF {0x0D, 0}, // DB0 {0x0D, 1}, // DB1 {0x0D, 2}, // DB2 {0x0D, 3}, // DB3 {0x11, 0}, // GRBM {0x12, 0}, // GRBMSE {0x03, 0}, // PA_SU {0x04, 0}, // PA_SC {0x05, 0}, // SPI {0x06, 0}, // SQ {0x06, 0}, // SQ_ES {0x06, 0}, // SQ_GS {0x06, 0}, // SQ_VS {0x06, 0}, // SQ_PS {0x06, 0}, // SQ_LS {0x06, 0}, // SQ_HS {0x06, 0}, // SQ_CS {0x07, 0}, // SX {0x08, 0}, // TA0 {0x08, 1}, // TA1 {0x08, 2}, // TA2 {0x08, 3}, // TA3 {0x08, 4}, // TA4 {0x08, 5}, // TA5 {0x08, 6}, // TA6 {0x08, 7}, // TA7 {0x08, 8}, // TA8 {0x08, 9}, // TA9 {0x08, 0x0a}, // TA10 {0x0C, 0}, // TCA0 {0x0C, 1}, // TCA1 {0x0B, 0}, // TCC0 {0x0B, 1}, // TCC1 {0x0B, 2}, // TCC2 {0x0B, 3}, // TCC3 {0x0B, 4}, // TCC4 {0x0B, 5}, // TCC5 {0x0B, 6}, // TCC6 {0x0B, 7}, // TCC7 {0x0B, 8}, // TCC8 {0x0B, 9}, // TCC9 {0x0B, 0x0a}, // TCC10 {0x0B, 0x0b}, // TCC11 {0x0B, 0x0c}, // TCC12 {0x0B, 0x0d}, // TCC13 {0x0B, 0x0e}, // TCC14 {0x0B, 0x0f}, // TCC15 {0x09, 0}, // TD0 {0x09, 1}, // TD1 {0x09, 2}, // TD2 {0x09, 3}, // TD3 {0x09, 4}, // TD4 {0x09, 5}, // TD5 {0x09, 6}, // TD6 {0x09, 7}, // TD7 {0x09, 8}, // TD8 {0x09, 9}, // TD9 {0x09, 0x0a}, // TD10 {0x0A, 0}, // TCP0 {0x0A, 1}, // TCP1 {0x0A, 2}, // TCP2 {0x0A, 3}, // TCP3 {0x0A, 4}, // TCP4 {0x0A, 5}, // TCP5 {0x0A, 6}, // TCP6 {0x0A, 7}, // TCP7 {0x0A, 8}, // TCP8 {0x0A, 9}, // TCP9 {0x0A, 0x0a}, // TCP10 {0x0F, 0}, // GDS {0x02, 0}, // VGT {0x01, 0}, // IA {0x15, 0}, // MC {0x10, 0}, // SRBM {0x19, 0}, // TCS {0x18, 0}, // WD {0x16, 0}, // CPG {0x17, 0}, // CPC }}; static constexpr std::array, 97> viBlockIdOrcaToPal = {{ {0x0E, 0}, // CB0 {0x0E, 1}, // CB1 {0x0E, 2}, // CB2 {0x0E, 3}, // CB3 {0x00, 0}, // CPF {0x0D, 0}, // DB0 {0x0D, 1}, // DB1 {0x0D, 2}, // DB2 {0x0D, 3}, // DB3 {0x11, 0}, // GRBM {0x12, 0}, // GRBMSE {0x03, 0}, // PA_SU {0x04, 0}, // PA_SC {0x05, 0}, // SPI {0x06, 0}, // SQ {0x06, 0}, // SQ_ES {0x06, 0}, // SQ_GS {0x06, 0}, // SQ_VS {0x06, 0}, // SQ_PS {0x06, 0}, // SQ_LS {0x06, 0}, // SQ_HS {0x06, 0}, // SQ_CS {0x07, 0}, // SX {0x08, 0}, // TA0 {0x08, 1}, // TA1 {0x08, 2}, // TA2 {0x08, 3}, // TA3 {0x08, 4}, // TA4 {0x08, 5}, // TA5 {0x08, 6}, // TA6 {0x08, 7}, // TA7 {0x08, 8}, // TA8 {0x08, 9}, // TA9 {0x08, 0x0a}, // TA10 {0x08, 0x0b}, // TA11 {0x08, 0x0c}, // TA12 {0x08, 0x0d}, // TA13 {0x08, 0x0e}, // TA14 {0x08, 0x0f}, // TA15 {0x0C, 0}, // TCA0 {0x0C, 1}, // TCA1 {0x0B, 0}, // TCC0 {0x0B, 1}, // TCC1 {0x0B, 2}, // TCC2 {0x0B, 3}, // TCC3 {0x0B, 4}, // TCC4 {0x0B, 5}, // TCC5 {0x0B, 6}, // TCC6 {0x0B, 7}, // TCC7 {0x0B, 8}, // TCC8 {0x0B, 9}, // TCC9 {0x0B, 0x0a}, // TCC10 {0x0B, 0x0b}, // TCC11 {0x0B, 0x0c}, // TCC12 {0x0B, 0x0d}, // TCC13 {0x0B, 0x0e}, // TCC14 {0x0B, 0x0f}, // TCC15 {0x09, 0}, // TD0 {0x09, 1}, // TD1 {0x09, 2}, // TD2 {0x09, 3}, // TD3 {0x09, 4}, // TD4 {0x09, 5}, // TD5 {0x09, 6}, // TD6 {0x09, 7}, // TD7 {0x09, 8}, // TD8 {0x09, 9}, // TD9 {0x09, 0x0a}, // TD10 {0x09, 0x0b}, // TD11 {0x09, 0x0c}, // TD12 {0x09, 0x0d}, // TD13 {0x09, 0x0e}, // TD14 {0x09, 0x0f}, // TD15 {0x0A, 0}, // TCP0 {0x0A, 1}, // TCP1 {0x0A, 2}, // TCP2 {0x0A, 3}, // TCP3 {0x0A, 4}, // TCP4 {0x0A, 5}, // TCP5 {0x0A, 6}, // TCP6 {0x0A, 7}, // TCP7 {0x0A, 8}, // TCP8 {0x0A, 9}, // TCP9 {0x0A, 0x0a}, // TCP10 {0x0A, 0x0b}, // TCP11 {0x0A, 0x0c}, // TCP12 {0x0A, 0x0d}, // TCP13 {0x0A, 0x0e}, // TCP14 {0x0A, 0x0f}, // TCP15 {0x0F, 0}, // GDS {0x02, 0}, // VGT {0x01, 0}, // IA {0x15, 0}, // MC {0x10, 0}, // SRBM {0x18, 0}, // WD {0x16, 0}, // CPG {0x17, 0}, // CPC }}; // The number of counters per block has been increased for gfx9 but this table may not reflect all // of them // as compute may not use all of them. static constexpr std::array, 123> gfx9BlockIdPal = {{ {0x0E, 0}, // CB0 - 0 {0x0E, 1}, // CB1 - 1 {0x0E, 2}, // CB2 - 2 {0x0E, 3}, // CB3 - 3 {0x00, 0}, // CPF - 4 {0x0D, 0}, // DB0 - 5 {0x0D, 1}, // DB1 - 6 {0x0D, 2}, // DB2 - 7 {0x0D, 3}, // DB3 - 8 {0x11, 0}, // GRBM - 9 {0x12, 0}, // GRBMSE - 10 {0x03, 0}, // PA_SU - 11 {0x04, 0}, // PA_SC - 12 {0x05, 0}, // SPI - 13 {0x06, 0}, // SQ - 14 {0x06, 0}, // SQ_ES - 15 {0x06, 0}, // SQ_GS - 16 {0x06, 0}, // SQ_VS - 17 {0x06, 0}, // SQ_PS - 18 {0x06, 0}, // SQ_LS - 19 {0x06, 0}, // SQ_HS - 20 {0x06, 0}, // SQ_CS - 21 {0x07, 0}, // SX - 22 {0x08, 0}, // TA0 - 23 {0x08, 1}, // TA1 - 24 {0x08, 2}, // TA2 - 25 {0x08, 3}, // TA3 - 26 {0x08, 4}, // TA4 - 27 {0x08, 5}, // TA5 - 28 {0x08, 6}, // TA6 - 29 {0x08, 7}, // TA7 - 30 {0x08, 8}, // TA8 - 31 {0x08, 9}, // TA9 - 32 {0x08, 0x0a}, // TA10 - 33 {0x08, 0x0b}, // TA11 - 34 {0x08, 0x0c}, // TA12 - 35 {0x08, 0x0d}, // TA13 - 36 {0x08, 0x0e}, // TA14 - 37 {0x08, 0x0f}, // TA15 - 38 {0x0C, 0}, // TCA0 - 39 {0x0C, 1}, // TCA1 - 40 {0x0B, 0}, // TCC0 - 41 {0x0B, 1}, // TCC1 - 42 {0x0B, 2}, // TCC2 - 43 {0x0B, 3}, // TCC3 - 44 {0x0B, 4}, // TCC4 - 45 {0x0B, 5}, // TCC5 - 46 {0x0B, 6}, // TCC6 - 47 {0x0B, 7}, // TCC7 - 48 {0x0B, 8}, // TCC8 - 49 {0x0B, 9}, // TCC9 - 50 {0x0B, 0x0a}, // TCC10 - 51 {0x0B, 0x0b}, // TCC11 - 52 {0x0B, 0x0c}, // TCC12 - 53 {0x0B, 0x0d}, // TCC13 - 54 {0x0B, 0x0e}, // TCC14 - 55 {0x0B, 0x0f}, // TCC15 - 56 {0x09, 0}, // TD0 - 57 {0x09, 1}, // TD1 - 58 {0x09, 2}, // TD2 - 59 {0x09, 3}, // TD3 - 60 {0x09, 4}, // TD4 - 61 {0x09, 5}, // TD5 - 62 {0x09, 6}, // TD6 - 63 {0x09, 7}, // TD7 - 64 {0x09, 8}, // TD8 - 65 {0x09, 9}, // TD9 - 66 {0x09, 0x0a}, // TD10 - 67 {0x09, 0x0b}, // TD11 - 68 {0x09, 0x0c}, // TD12 - 69 {0x09, 0x0d}, // TD13 - 70 {0x09, 0x0e}, // TD14 - 71 {0x09, 0x0f}, // TD15 - 72 {0x0A, 0}, // TCP0 - 73 {0x0A, 1}, // TCP1 - 74 {0x0A, 2}, // TCP2 - 75 {0x0A, 3}, // TCP3 - 76 {0x0A, 4}, // TCP4 - 77 {0x0A, 5}, // TCP5 - 78 {0x0A, 6}, // TCP6 - 79 {0x0A, 7}, // TCP7 - 80 {0x0A, 8}, // TCP8 - 81 {0x0A, 9}, // TCP9 - 82 {0x0A, 0x0a}, // TCP10 - 83 {0x0A, 0x0b}, // TCP11 - 84 {0x0A, 0x0c}, // TCP12 - 85 {0x0A, 0x0d}, // TCP13 - 86 {0x0A, 0x0e}, // TCP14 - 87 {0x0A, 0x0f}, // TCP15 - 88 {0x0F, 0}, // GDS - 89 {0x02, 0}, // VGT - 90 {0x01, 0}, // IA - 91 {0x18, 0}, // WD - 92 {0x16, 0}, // CPG - 93 {0x17, 0}, // CPC - 94 {0x1A, 0}, // ATC - 95 {0x1B, 0}, // ATCL2 - 96 {0x1C, 0}, // MCVML2 - 97 {0x1D, 0}, // EA0 - 98 {0x1D, 1}, // EA1 - 99 {0x1D, 2}, // EA2 - 100 {0x1D, 3}, // EA3 - 101 {0x1D, 4}, // EA4 - 102 {0x1D, 5}, // EA5 - 103 {0x1D, 6}, // EA6 - 104 {0x1D, 7}, // EA7 - 105 {0x1D, 8}, // EA8 - 106 {0x1D, 9}, // EA9 - 107 {0x1D, 0x0a}, // EA10 - 108 {0x1D, 0x0b}, // EA11 - 109 {0x1D, 0x0c}, // EA12 - 110 {0x1D, 0x0d}, // EA13 - 111 {0x1D, 0x0e}, // EA14 - 112 {0x1D, 0x0f}, // EA15 - 113 {0x1E, 0}, // RPB - 114 {0x1F, 0}, // RMI0 - 115 {0x1F, 1}, // RMI1 - 116 {0x1F, 2}, // RMI2 - 117 {0x1F, 3}, // RMI3 - 118 {0x1F, 4}, // RMI4 - 119 {0x1F, 5}, // RMI5 - 120 {0x1F, 6}, // RMI6 - 121 {0x1F, 7}, // RMI7 - 122 }}; static constexpr std::array, 140> gfx10BlockIdPal = {{ {0x0E, 0}, // CB0 - 0 {0x0E, 1}, // CB1 - 1 {0x0E, 2}, // CB2 - 2 {0x0E, 3}, // CB3 - 3 {0x00, 0}, // CPF - 4 {0x0D, 0}, // DB0 - 5 {0x0D, 1}, // DB1 - 6 {0x0D, 2}, // DB2 - 7 {0x0D, 3}, // DB3 - 8 {0x11, 0}, // GRBM - 9 {0x12, 0}, // GRBMSE - 10 {0x03, 0}, // PA_SU - 11 {0x04, 0}, // PA_SC0 - 12 {0x04, 1}, // PA_SC1 - 13 {0x05, 0}, // SPI - 14 {0x06, 0}, // SQ - 15 {0x06, 0}, // SQ_ES - 16 {0x06, 0}, // SQ_GS - 17 {0x06, 0}, // SQ_VS - 18 {0x06, 0}, // SQ_PS - 19 {0x06, 0}, // SQ_LS - 20 {0x06, 0}, // SQ_HS - 21 {0x06, 0}, // SQ_CS - 22 {0x07, 0}, // SX - 23 {0x08, 0}, // TA0 - 24 {0x08, 1}, // TA1 - 25 {0x08, 2}, // TA2 - 26 {0x08, 3}, // TA3 - 27 {0x08, 4}, // TA4 - 28 {0x08, 5}, // TA5 - 29 {0x08, 6}, // TA6 - 30 {0x08, 7}, // TA7 - 31 {0x08, 8}, // TA8 - 32 {0x08, 9}, // TA9 - 33 {0x08, 0x0a}, // TA10 - 34 {0x08, 0x0b}, // TA11 - 35 {0x08, 0x0c}, // TA12 - 36 {0x08, 0x0d}, // TA13 - 37 {0x08, 0x0e}, // TA14 - 38 {0x08, 0x0f}, // TA15 - 39 {0x09, 0}, // TD0 - 40 {0x09, 1}, // TD1 - 41 {0x09, 2}, // TD2 - 42 {0x09, 3}, // TD3 - 43 {0x09, 4}, // TD4 - 44 {0x09, 5}, // TD5 - 45 {0x09, 6}, // TD6 - 46 {0x09, 7}, // TD7 - 47 {0x09, 8}, // TD8 - 48 {0x09, 9}, // TD9 - 49 {0x09, 0x0a}, // TD10 - 50 {0x09, 0x0b}, // TD11 - 51 {0x09, 0x0c}, // TD12 - 52 {0x09, 0x0d}, // TD13 - 53 {0x09, 0x0e}, // TD14 - 54 {0x09, 0x0f}, // TD15 - 55 {0x0A, 0}, // TCP0 - 56 {0x0A, 1}, // TCP1 - 57 {0x0A, 2}, // TCP2 - 58 {0x0A, 3}, // TCP3 - 59 {0x0A, 4}, // TCP4 - 60 {0x0A, 5}, // TCP5 - 61 {0x0A, 6}, // TCP6 - 62 {0x0A, 7}, // TCP7 - 63 {0x0A, 8}, // TCP8 - 64 {0x0A, 9}, // TCP9 - 65 {0x0A, 0x0a}, // TCP10 - 66 {0x0A, 0x0b}, // TCP11 - 67 {0x0A, 0x0c}, // TCP12 - 68 {0x0A, 0x0d}, // TCP13 - 69 {0x0A, 0x0e}, // TCP14 - 70 {0x0A, 0x0f}, // TCP15 - 71 {0x0F, 0}, // GDS - 72 {0x16, 0}, // CPG - 73 {0x17, 0}, // CPC - 74 {0x1A, 0}, // ATC - 75 {0x1B, 0}, // ATCL2 - 76 {0x1C, 0}, // MCVML2 - 77 {0x1D, 0}, // EA0 - 78 {0x1D, 1}, // EA1 - 79 {0x1D, 2}, // EA2 - 80 {0x1D, 3}, // EA3 - 81 {0x1D, 4}, // EA4 - 82 {0x1D, 5}, // EA5 - 83 {0x1D, 6}, // EA6 - 84 {0x1D, 7}, // EA7 - 85 {0x1D, 8}, // EA8 - 86 {0x1D, 9}, // EA9 - 87 {0x1D, 0x0a}, // EA10 - 88 {0x1D, 0x0b}, // EA11 - 89 {0x1D, 0x0c}, // EA12 - 90 {0x1D, 0x0d}, // EA13 - 91 {0x1D, 0x0e}, // EA14 - 92 {0x1D, 0x0f}, // EA15 - 93 {0x1E, 0}, // RPB - 94 {0x1F, 0}, // RMI0 - 95 {0x1F, 1}, // RMI1 - 96 {0x21, 0}, // GE - 97 {0x22, 0}, // GL1A - 98 {0x23, 0}, // GL1C - 99 {0x24, 0}, // GL1CG0 - 100 {0x24, 1}, // GL1CG1 - 101 {0x24, 2}, // GL1CG2 - 102 {0x24, 3}, // GL1CG3 - 103 {0x25, 0}, // GL2A0 - 104 {0x25, 1}, // GL2A1 - 105 {0x25, 2}, // GL2A2 - 106 {0x25, 3}, // GL2A3 - 107 {0x26, 0}, // GL2C0 - 108 {0x26, 1}, // GL2C1 - 109 {0x26, 2}, // GL2C2 - 110 {0x26, 3}, // GL2C3 - 111 {0x26, 4}, // GL2C4 - 112 {0x26, 5}, // GL2C5 - 113 {0x26, 6}, // GL2C6 - 114 {0x26, 7}, // GL2C7 - 115 {0x26, 8}, // GL2C8 - 116 {0x26, 9}, // GL2C9 - 117 {0x26, 0x0a}, // GL2C10 - 118 {0x26, 0x0b}, // GL2C11 - 119 {0x26, 0x0c}, // GL2C12 - 120 {0x26, 0x0d}, // GL2C13 - 121 {0x26, 0x0e}, // GL2C14 - 122 {0x26, 0x0f}, // GL2C15 - 123 {0x26, 0x10}, // GL2C16 - 124 {0x26, 0x11}, // GL2C17 - 125 {0x26, 0x12}, // GL2C18 - 126 {0x26, 0x13}, // GL2C19 - 127 {0x26, 0x14}, // GL2C20 - 128 {0x26, 0x15}, // GL2C21 - 129 {0x26, 0x16}, // GL2C22 - 130 {0x26, 0x17}, // GL2C23 - 131 {0x27, 0}, // CHA - 132 {0x28, 0}, // CHC - 133 {0x29, 0}, // CHCG - 134 {0x2A, 0}, // GUS - 135 {0x2B, 0}, // GCR - 136 {0x2C, 0}, // PH - 137 {0x2D, 0}, // UTCL1 - 138 {0x31, 0}, // SqWgp - 139 }}; void PerfCounter::convertInfo() { switch (dev().ipLevel()) { case Pal::GfxIpLevel::GfxIp10_1: case Pal::GfxIpLevel::GfxIp10_3: case Pal::GfxIpLevel::GfxIp11_0: case Pal::GfxIpLevel::GfxIp11_5: if (info_.blockIndex_ < gfx10BlockIdPal.size()) { auto p = gfx10BlockIdPal[info_.blockIndex_]; info_.blockIndex_ = std::get<0>(p); info_.counterIndex_ = std::get<1>(p); } break; default: Unimplemented(); break; } assert(info_.blockIndex_ < blockIdToIndexSelect.size()); info_.indexSelect_ = blockIdToIndexSelect.at(info_.blockIndex_); } PerfCounter::~PerfCounter() { if (palRef_ == nullptr) { return; } // Release the counter reference object palRef_->release(); } bool PerfCounter::create() { palRef_->retain(); // Initialize the counter Pal::PerfCounterInfo counterInfo = {}; counterInfo.counterType = Pal::PerfCounterType::Global; counterInfo.block = static_cast(info_.blockIndex_); counterInfo.eventId = info_.eventIndex_; Pal::PerfExperimentProperties perfExpProps; Pal::Result result; result = dev().iDev()->GetPerfExperimentProperties(&perfExpProps); if (result != Pal::Result::Success) { return false; } const auto& blockProps = perfExpProps.blocks[static_cast(counterInfo.block)]; uint32_t counter_start, counter_step; switch (info_.indexSelect_) { case PCIndexSelect::ShaderEngine: case PCIndexSelect::None: counter_start = 0; counter_step = 1; break; case PCIndexSelect::ShaderArray: if (info_.counterIndex_ >= (dev().properties().gfxipProperties.shaderCore.numShaderArrays * dev().properties().gfxipProperties.shaderCore.numShaderEngines)) { return true; } counter_start = info_.counterIndex_; counter_step = dev().properties().gfxipProperties.shaderCore.numShaderArrays * dev().properties().gfxipProperties.shaderCore.numShaderEngines; break; case PCIndexSelect::ComputeUnit: if (info_.counterIndex_ >= dev().properties().gfxipProperties.shaderCore.maxCusPerShaderArray) { return true; } counter_start = info_.counterIndex_; counter_step = dev().properties().gfxipProperties.shaderCore.maxCusPerShaderArray; break; case PCIndexSelect::Instance: counter_start = info_.counterIndex_; counter_step = blockProps.instanceCount; break; default: assert(0 && "Unknown indexSelect_"); return true; } for (uint32_t i = counter_start; i < blockProps.instanceCount; i += counter_step) { counterInfo.instance = i; result = iPerf()->AddCounter(counterInfo); if (result == Pal::Result::Success) { index_.push_back(palRef_->getPalCounterIndex()); } else { // Get here when there's no HW PerfCounter matching the counterInfo assert(0 && "AddCounter() failed"); } } return true; } uint64_t PerfCounter::getInfo(uint64_t infoType) const { switch (infoType) { case CL_PERFCOUNTER_GPU_BLOCK_INDEX: { // Return the GPU block index return info()->blockIndex_; } case CL_PERFCOUNTER_GPU_COUNTER_INDEX: { // Return the GPU counter index return info()->counterIndex_; } case CL_PERFCOUNTER_GPU_EVENT_INDEX: { // Return the GPU event index return info()->eventIndex_; } case CL_PERFCOUNTER_DATA: { return palRef_->result(index_); } default: LogError("Wrong PerfCounter::getInfo parameter"); } return 0; } } // namespace amd::pal