/* Copyright (c) 2017 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "device/rocm/roccounters.hpp" #include "device/rocm/rocvirtual.hpp" #include hsa_status_t PerfCounterCallback( hsa_ven_amd_aqlprofile_info_type_t info_type, hsa_ven_amd_aqlprofile_info_data_t* info_data, void* callback_data) { typedef std::vector passed_data_t; if (info_type == HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA) { reinterpret_cast(callback_data)->push_back(*info_data); } return HSA_STATUS_SUCCESS; } namespace roc { /* Converting from ORCA cmndefs.h to ROCR hsa_ven_amd_aqlprofile.h Note that some blocks are not defined in cmndefs.h */ static constexpr std::array, 97> viBlockIdOrcaToRocr = {{ {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // CB0 - 0 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 1}, // CB1 - 1 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 2}, // CB2 - 2 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 3}, // CB3 - 3 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPF, 0}, // CPF - 4 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // DB0 - 5 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 1}, // DB1 - 6 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 2}, // DB2 - 7 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 3}, // DB3 - 8 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GRBM, 0}, // GRBM - 9 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GRBMSE, 0}, // GRBMSE - 10 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // PA_SU - 11 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // PA_SC - 12 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SPI, 0}, // SPI - 13 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQ, 0}, // SQ - 14 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // SQ_ES - 15 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // SQ_GS - 16 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // SQ_VS - 17 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // SQ_PS - 18 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // SQ_LS - 19 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // SQ_HS - 20 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQCS, 0}, // SQ_CS - 21 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SX, 0}, // SX - 22 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 0}, // TA0 - 23 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 1}, // TA1 - 24 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 2}, // TA2 - 25 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 3}, // TA3 - 26 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 4}, // TA4 - 27 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 5}, // TA5 - 28 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 6}, // TA6 - 29 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 7}, // TA7 - 30 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 8}, // TA8 - 31 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 9}, // TA9 - 32 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 0x0a}, // TA10 - 33 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 0x0b}, // TA11 - 34 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 0x0c}, // TA12 - 35 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 0x0d}, // TA13 - 36 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 0x0e}, // TA14 - 37 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 0x0f}, // TA15 - 38 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCA, 0}, // TCA0 - 39 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCA, 1}, // TCA1 - 40 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 0}, // TCC0 - 41 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 1}, // TCC1 - 42 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 2}, // TCC2 - 43 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 3}, // TCC3 - 44 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 4}, // TCC4 - 45 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 5}, // TCC5 - 46 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 6}, // TCC6 - 47 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 7}, // TCC7 - 48 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 8}, // TCC8 - 49 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 9}, // TCC9 - 50 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 0x0a}, // TCC10 - 51 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 0x0b}, // TCC11 - 52 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 0x0c}, // TCC12 - 53 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 0x0d}, // TCC13 - 54 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 0x0e}, // TCC14 - 55 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 0x0f}, // TCC15 - 56 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 0}, // TD0 - 57 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 1}, // TD1 - 58 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 2}, // TD2 - 59 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 3}, // TD3 - 60 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 4}, // TD4 - 61 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 5}, // TD5 - 62 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 6}, // TD6 - 63 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 7}, // TD7 - 64 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 8}, // TD8 - 65 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 9}, // TD9 - 66 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 0x0a}, // TD10 - 67 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 0x0b}, // TD11 - 68 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 0x0c}, // TD12 - 69 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 0x0d}, // TD13 - 70 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 0x0e}, // TD14 - 71 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 0x0f}, // TD15 - 72 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0}, // TCP0 - 73 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 1}, // TCP1 - 74 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 2}, // TCP2 - 75 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 3}, // TCP3 - 76 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 4}, // TCP4 - 77 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 5}, // TCP5 - 78 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 6}, // TCP6 - 79 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 7}, // TCP7 - 80 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 8}, // TCP8 - 81 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 9}, // TCP9 - 82 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0x0a}, // TCP10 - 83 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0x0b}, // TCP11 - 84 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0x0c}, // TCP12 - 85 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0x0d}, // TCP13 - 86 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0x0e}, // TCP14 - 87 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0x0f}, // TCP15 - 88 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GDS, 0}, // GDS - 89 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // VGT - 90 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // IA - 91 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCSEQ, 0}, // MC - 92 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SRBM, 0}, // SRBM - 93 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // WD - 94 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // CPG - 95 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPC, 0}, // CPC - 96 }}; // The number of counters per block has been increased for gfx9 but this table may not reflect all // of them // as compute may not use all of them. static constexpr std::array, 125> gfx9BlockIdOrcaToRocr = {{ {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // CB0 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 1}, // CB1 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 2}, // CB2 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 3}, // CB3 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPF, 0}, // CPF {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // DB0 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 1}, // DB1 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 2}, // DB2 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 3}, // DB3 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GRBM, 0}, // GRBM {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GRBMSE, 0}, // GRBMSE {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // PA_SU {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // PA_SC {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SPI, 0}, // SPI {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQ, 0}, // SQ {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // SQ_ES {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // SQ_GS {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // SQ_VS {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // SQ_PS {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // SQ_LS {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // SQ_HS {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQCS, 0}, // SQ_CS {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SX, 0}, // SX {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 0}, // TA0 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 1}, // TA1 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 2}, // TA2 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 3}, // TA3 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 4}, // TA4 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 5}, // TA5 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 6}, // TA6 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 7}, // TA7 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 8}, // TA8 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 9}, // TA9 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 0x0a}, // TA10 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 0x0b}, // TA11 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 0x0c}, // TA12 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 0x0d}, // TA13 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 0x0e}, // TA14 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 0x0f}, // TA15 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCA, 0}, // TCA0 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCA, 1}, // TCA1 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 0}, // TCC0 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 1}, // TCC1 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 2}, // TCC2 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 3}, // TCC3 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 4}, // TCC4 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 5}, // TCC5 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 6}, // TCC6 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 7}, // TCC7 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 8}, // TCC8 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 9}, // TCC9 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 0x0a}, // TCC10 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 0x0b}, // TCC11 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 0x0c}, // TCC12 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 0x0d}, // TCC13 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 0x0e}, // TCC14 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 0x0f}, // TCC15 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 0}, // TD0 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 1}, // TD1 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 2}, // TD2 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 3}, // TD3 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 4}, // TD4 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 5}, // TD5 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 6}, // TD6 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 7}, // TD7 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 8}, // TD8 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 9}, // TD9 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 0x0a}, // TD10 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 0x0b}, // TD11 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 0x0c}, // TD12 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 0x0d}, // TD13 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 0x0e}, // TD14 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 0x0f}, // TD15 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0}, // TCP0 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 1}, // TCP1 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 2}, // TCP2 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 3}, // TCP3 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 4}, // TCP4 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 5}, // TCP5 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 6}, // TCP6 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 7}, // TCP7 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 8}, // TCP8 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 9}, // TCP9 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0x0a}, // TCP10 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0x0b}, // TCP11 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0x0c}, // TCP12 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0x0d}, // TCP13 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0x0e}, // TCP14 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0x0f}, // TCP15 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GDS, 0}, // GDS - 89 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // VGT - 90 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // IA - 91 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // WD - 92 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // CPG - 93 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPC, 0}, // CPC - 94 // blocks that are not defined in GSL {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_ATC, 0}, // ATC - 97 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_ATCL2, 0}, // ATCL2 - 98 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCVML2, 0}, // MCVML2 - 99 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // EA - 100 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 1}, // EA - 101 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 2}, // EA - 102 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 3}, // EA - 103 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 4}, // EA - 104 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 5}, // EA - 105 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 6}, // EA - 106 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 7}, // EA - 107 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 8}, // EA - 108 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 9}, // EA - 109 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x0a}, // EA - 110 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x0b}, // EA - 111 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x0c}, // EA - 112 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x0d}, // EA - 113 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x0e}, // EA - 114 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x0f}, // EA - 115 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // RPB - 116 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // RMI - 117 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 1}, // RMI - 118 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 2}, // RMI - 119 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 3}, // RMI - 120 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 4}, // RMI - 121 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 5}, // RMI - 122 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 6}, // RMI - 123 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 7}, // RMI - 124 }}; static constexpr std::array, 139> gfx10BlockIdOrcaToRocr = {{ {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // CB0 - 0 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 1}, // CB1 - 1 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 2}, // CB2 - 2 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 3}, // CB3 - 3 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPF, 0}, // CPF - 4 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // DB0 - 5 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 1}, // DB1 - 6 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 2}, // DB2 - 7 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 3}, // DB3 - 8 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GRBM, 0}, // GRBM - 9 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GRBMSE, 0}, // GRBMSE - 10 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // PA_SU - 11 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // PA_SC0 - 12 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // PA_SC1 - 13 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SPI, 0}, // SPI - 14 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQ, 0}, // SQ - 15 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // SQ_ES - 16 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // SQ_GS - 17 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // SQ_VS - 18 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // SQ_PS - 19 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // SQ_LS - 20 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // SQ_HS - 21 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQCS, 0}, // SQ_CS - 22 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SX, 0}, // SX - 23 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 0}, // TA0 - 24 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 1}, // TA1 - 25 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 2}, // TA2 - 26 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 3}, // TA3 - 27 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 4}, // TA4 - 28 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 5}, // TA5 - 29 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 6}, // TA6 - 30 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 7}, // TA7 - 31 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 8}, // TA8 - 32 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 9}, // TA9 - 33 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 0x0a}, // TA10 - 34 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 0x0b}, // TA11 - 35 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 0x0c}, // TA12 - 36 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 0x0d}, // TA13 - 37 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 0x0e}, // TA14 - 38 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 0x0f}, // TA15 - 39 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 0}, // TD0 - 40 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 1}, // TD1 - 41 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 2}, // TD2 - 42 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 3}, // TD3 - 43 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 4}, // TD4 - 44 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 5}, // TD5 - 45 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 6}, // TD6 - 46 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 7}, // TD7 - 47 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 8}, // TD8 - 48 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 9}, // TD9 - 49 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 0x0a}, // TD10 - 50 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 0x0b}, // TD11 - 51 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 0x0c}, // TD12 - 52 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 0x0d}, // TD13 - 53 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 0x0e}, // TD14 - 54 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 0x0f}, // TD15 - 55 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0}, // TCP0 - 56 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 1}, // TCP1 - 57 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 2}, // TCP2 - 58 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 3}, // TCP3 - 59 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 4}, // TCP4 - 60 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 5}, // TCP5 - 61 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 6}, // TCP6 - 62 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 7}, // TCP7 - 63 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 8}, // TCP8 - 64 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 9}, // TCP9 - 65 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0x0a}, // TCP10 - 66 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0x0b}, // TCP11 - 67 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0x0c}, // TCP12 - 68 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0x0d}, // TCP13 - 69 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0x0e}, // TCP14 - 70 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0x0f}, // TCP15 - 71 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GDS, 0}, // GDS - 72 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // CPG - 73 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPC, 0}, // CPC - 74 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_ATC, 0}, // ATC - 75 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_ATCL2, 0}, // ATCL2 - 76 {HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCVML2, 0}, // MCVML2 - 77 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // EA - 78 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 1}, // EA - 79 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 2}, // EA - 80 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 3}, // EA - 81 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 4}, // EA - 82 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 5}, // EA - 83 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 6}, // EA - 84 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 7}, // EA - 85 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 8}, // EA - 86 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 9}, // EA - 87 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x0a}, // EA - 88 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x0b}, // EA - 89 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x0c}, // EA - 90 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x0d}, // EA - 91 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x0e}, // EA - 92 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x0f}, // EA - 93 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // RPB - 94 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // RMI0 - 95 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 1}, // RMI1 - 96 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // GE - 97 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // GL1A - 98 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // GL1C - 99 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // GL1CG0 - 100 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 1}, // GL1CG1 - 101 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 2}, // GL1CG2 - 102 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 3}, // GL1CG3 - 103 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // GL2A0 - 104 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 1}, // GL2A1 - 105 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 2}, // GL2A2 - 106 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 3}, // GL2A3 - 107 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // GL2C0 - 108 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 1}, // GL2C1 - 109 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 2}, // GL2C2 - 110 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 3}, // GL2C3 - 111 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 4}, // GL2C4 - 112 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 5}, // GL2C5 - 113 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 6}, // GL2C6 - 114 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 7}, // GL2C7 - 115 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 8}, // GL2C8 - 116 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 9}, // GL2C9 - 117 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x0a}, // GL2C10 - 118 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x0b}, // GL2C11 - 119 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x0c}, // GL2C12 - 120 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x0d}, // GL2C13 - 121 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x0e}, // GL2C14 - 122 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x0f}, // GL2C15 - 123 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x10}, // GL2C16 - 124 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x11}, // GL2C17 - 125 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x12}, // GL2C18 - 126 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x13}, // GL2C19 - 127 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x14}, // GL2C20 - 128 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x15}, // GL2C21 - 129 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x16}, // GL2C22 - 130 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x17}, // GL2C23 - 131 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // CHA - 132 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // CHC - 133 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // CHCG - 134 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // GUS - 135 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // GCR - 136 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // PH - 137 {HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // UTCL1 - 138 }}; //! Constructor for the ROC PerfCounter object PerfCounter::PerfCounter(const Device& device, //!< A ROC device object uint32_t blockIndex, //!< HW block index uint32_t counterIndex, //!< Counter index (Counter register) within the block uint32_t eventIndex) //!< Event index (Counter selection) for profiling : roc_device_(device), profileRef_(nullptr) { info_.blockIndex_ = blockIndex; // Block name + block index info_.counterIndex_ = counterIndex; // Ignored as not being used in PPT library info_.eventIndex_ = eventIndex; // Counter Event Selection (counter_id) // these block indices are valid for the SI (Gfx8) & Gfx9 devices switch (roc_device_.isa().versionMajor()) { case (8): gfxVersion_ = ROC_GFX8; if (blockIndex < viBlockIdOrcaToRocr.size()) { auto p = viBlockIdOrcaToRocr[blockIndex]; event_.block_name = std::get<0>(p); event_.block_index = std::get<1>(p); } break; case (9): gfxVersion_ = ROC_GFX9; if (blockIndex < gfx9BlockIdOrcaToRocr.size()) { auto p = gfx9BlockIdOrcaToRocr[blockIndex]; event_.block_name = std::get<0>(p); event_.block_index = std::get<1>(p); } break; case (10): gfxVersion_ = ROC_GFX10; if (blockIndex < gfx10BlockIdOrcaToRocr.size()) { auto p = gfx10BlockIdOrcaToRocr[blockIndex]; event_.block_name = std::get<0>(p); event_.block_index = std::get<1>(p); } break; default: gfxVersion_ = ROC_UNSUPPORTED; event_.block_name = HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER; event_.block_index = 0; break; } event_.counter_id = eventIndex; } void PerfCounter::setProfile(PerfCounterProfile* profileRef) { profileRef->perfCounters().push_back(this); profileRef->addEvent(event_); if (profileRef_ != nullptr) { profileRef_->release(); } profileRef_ = profileRef; profileRef->retain(); } uint64_t PerfCounter::getInfo(uint64_t infoType) const { switch (infoType) { case CL_PERFCOUNTER_GPU_BLOCK_INDEX: { // Return the GPU block index return info()->blockIndex_; } case CL_PERFCOUNTER_GPU_COUNTER_INDEX: { // Return the GPU counter index return info()->counterIndex_; } case CL_PERFCOUNTER_GPU_EVENT_INDEX: { // Return the GPU event index return info()->eventIndex_; } case CL_PERFCOUNTER_DATA: { const hsa_ven_amd_aqlprofile_profile_t* profile = profileRef_->profile(); std::vector data; profileRef_->api()->hsa_ven_amd_aqlprofile_iterate_data(profile, PerfCounterCallback, &data); uint64_t result = 0; for (const auto& it : data) { if (it.pmc_data.event.block_name == event_.block_name && it.pmc_data.event.block_index == event_.block_index && it.pmc_data.event.counter_id == event_.counter_id) { result += it.pmc_data.result; } } return result; } default: LogError("Wrong PerfCounter::getInfo parameter"); } return 0; } PerfCounter::~PerfCounter() { if (profileRef_ != nullptr) { profileRef_->release(); profileRef_ = nullptr; } } bool PerfCounterProfile::initialize() { // save the current command and output buffer information hsa_ven_amd_aqlprofile_descriptor_t cmd_buf = profile_.command_buffer; hsa_ven_amd_aqlprofile_descriptor_t out_buf = profile_.output_buffer; // determine the required buffer sizes for the profiling events profile_.events = &events_[0]; profile_.event_count = events_.size(); profile_.command_buffer = {nullptr, 0}; profile_.output_buffer = {nullptr, 0}; if (api_.hsa_ven_amd_aqlprofile_start(&profile_, nullptr) != HSA_STATUS_SUCCESS) { LogError("Start hsa aql profile counter failed"); return false; } const uint32_t alignment = amd::Os::pageSize(); // use page alignment if (cmd_buf.ptr != nullptr && cmd_buf.size != profile_.command_buffer.size) { roc_device_.memFree(cmd_buf.ptr, cmd_buf.size); cmd_buf.ptr = nullptr; } if (cmd_buf.ptr == nullptr) { void *buf_ptr = roc_device_.hostAlloc(profile_.command_buffer.size, alignment, Device::MemorySegment::kAtomics); if (buf_ptr != nullptr) { profile_.command_buffer.ptr = buf_ptr; } else { LogError("Failed to allocate profile counter command buffer"); return false; } } if (out_buf.ptr != nullptr && out_buf.size != profile_.output_buffer.size) { roc_device_.memFree(out_buf.ptr, out_buf.size); out_buf.ptr = nullptr; } if (out_buf.ptr == nullptr) { void *buf_ptr = roc_device_.hostAlloc(profile_.output_buffer.size, alignment, Device::MemorySegment::kAtomics); if (buf_ptr != nullptr) { profile_.output_buffer.ptr = buf_ptr; } else { roc_device_.hostFree(profile_.command_buffer.ptr, profile_.command_buffer.size); LogError("Failed to allocate profile counter output buffer"); return false; } } // create the completion signal if (hsa_signal_create(1, 0, nullptr, &completionSignal_) != HSA_STATUS_SUCCESS) { LogError("Failed to create signal for profile counter"); return false; } return true; } hsa_ext_amd_aql_pm4_packet_t* PerfCounterProfile::createStartPacket() { profile_.events = &events_[0]; profile_.event_count = events_.size(); // set up the profile aql packets for capturing performance counter if (api_.hsa_ven_amd_aqlprofile_start(&profile_, &prePacket_) != HSA_STATUS_SUCCESS) { DevLogError("Cannot Start AQL Profile \n"); return nullptr; } return &prePacket_; } hsa_ext_amd_aql_pm4_packet_t* PerfCounterProfile::createStopPacket() { profile_.events = &events_[0]; profile_.event_count = events_.size(); // set up the profile aql packets for post-capturing performance counter // and create the completion signal if (api_.hsa_ven_amd_aqlprofile_stop(&profile_, &postPacket_) != HSA_STATUS_SUCCESS) { DevLogError("Cannot Stop AQL Profile \n"); return nullptr; } postPacket_.completion_signal = completionSignal_; return &postPacket_; } PerfCounterProfile::~PerfCounterProfile() { if (completionSignal_.handle != 0) { hsa_signal_destroy(completionSignal_); } if (profile_.command_buffer.ptr) { roc_device_.memFree(profile_.command_buffer.ptr, profile_.command_buffer.size); } if (profile_.output_buffer.ptr) { roc_device_.memFree(profile_.output_buffer.ptr, profile_.output_buffer.size); } } } // namespace roc