Files
rocm-systems/rocclr/device/rocm/roccounters.cpp
T
Ioannis Assiouras d7f352dbed SWDEV-453301 - Remove the option to write multiple packets in dispatchGenericAqlPacket
Dispatching multiple packets with ring the doorbell once is not supported by the lower layers

Change-Id: I7665a2dcdd4ef9e47dadfe410180fed64c5a4ee0
2024-04-05 05:28:10 -04:00

629 строки
32 KiB
C++

/* Copyright (c) 2017 - 2021 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include "device/rocm/roccounters.hpp"
#include "device/rocm/rocvirtual.hpp"
#include <array>
hsa_status_t PerfCounterCallback(
hsa_ven_amd_aqlprofile_info_type_t info_type,
hsa_ven_amd_aqlprofile_info_data_t* info_data,
void* callback_data)
{
typedef std::vector<hsa_ven_amd_aqlprofile_info_data_t> passed_data_t;
if (info_type == HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA) {
reinterpret_cast<passed_data_t*>(callback_data)->push_back(*info_data);
}
return HSA_STATUS_SUCCESS;
}
namespace roc {
/*
Converting from ORCA cmndefs.h to ROCR hsa_ven_amd_aqlprofile.h
Note that some blocks are not defined in cmndefs.h
*/
static constexpr std::array<std::pair<hsa_ven_amd_aqlprofile_block_name_t, int>, 97> viBlockIdOrcaToRocr = {{
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // CB0 - 0
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 1}, // CB1 - 1
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 2}, // CB2 - 2
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 3}, // CB3 - 3
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPF, 0}, // CPF - 4
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // DB0 - 5
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 1}, // DB1 - 6
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 2}, // DB2 - 7
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 3}, // DB3 - 8
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GRBM, 0}, // GRBM - 9
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GRBMSE, 0}, // GRBMSE - 10
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // PA_SU - 11
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // PA_SC - 12
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SPI, 0}, // SPI - 13
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQ, 0}, // SQ - 14
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // SQ_ES - 15
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // SQ_GS - 16
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // SQ_VS - 17
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // SQ_PS - 18
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // SQ_LS - 19
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // SQ_HS - 20
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQCS, 0}, // SQ_CS - 21
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SX, 0}, // SX - 22
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 0}, // TA0 - 23
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 1}, // TA1 - 24
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 2}, // TA2 - 25
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 3}, // TA3 - 26
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 4}, // TA4 - 27
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 5}, // TA5 - 28
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 6}, // TA6 - 29
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 7}, // TA7 - 30
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 8}, // TA8 - 31
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 9}, // TA9 - 32
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 0x0a}, // TA10 - 33
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 0x0b}, // TA11 - 34
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 0x0c}, // TA12 - 35
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 0x0d}, // TA13 - 36
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 0x0e}, // TA14 - 37
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 0x0f}, // TA15 - 38
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCA, 0}, // TCA0 - 39
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCA, 1}, // TCA1 - 40
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 0}, // TCC0 - 41
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 1}, // TCC1 - 42
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 2}, // TCC2 - 43
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 3}, // TCC3 - 44
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 4}, // TCC4 - 45
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 5}, // TCC5 - 46
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 6}, // TCC6 - 47
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 7}, // TCC7 - 48
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 8}, // TCC8 - 49
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 9}, // TCC9 - 50
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 0x0a}, // TCC10 - 51
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 0x0b}, // TCC11 - 52
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 0x0c}, // TCC12 - 53
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 0x0d}, // TCC13 - 54
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 0x0e}, // TCC14 - 55
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 0x0f}, // TCC15 - 56
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 0}, // TD0 - 57
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 1}, // TD1 - 58
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 2}, // TD2 - 59
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 3}, // TD3 - 60
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 4}, // TD4 - 61
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 5}, // TD5 - 62
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 6}, // TD6 - 63
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 7}, // TD7 - 64
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 8}, // TD8 - 65
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 9}, // TD9 - 66
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 0x0a}, // TD10 - 67
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 0x0b}, // TD11 - 68
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 0x0c}, // TD12 - 69
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 0x0d}, // TD13 - 70
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 0x0e}, // TD14 - 71
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 0x0f}, // TD15 - 72
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0}, // TCP0 - 73
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 1}, // TCP1 - 74
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 2}, // TCP2 - 75
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 3}, // TCP3 - 76
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 4}, // TCP4 - 77
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 5}, // TCP5 - 78
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 6}, // TCP6 - 79
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 7}, // TCP7 - 80
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 8}, // TCP8 - 81
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 9}, // TCP9 - 82
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0x0a}, // TCP10 - 83
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0x0b}, // TCP11 - 84
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0x0c}, // TCP12 - 85
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0x0d}, // TCP13 - 86
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0x0e}, // TCP14 - 87
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0x0f}, // TCP15 - 88
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GDS, 0}, // GDS - 89
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // VGT - 90
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // IA - 91
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCSEQ, 0}, // MC - 92
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SRBM, 0}, // SRBM - 93
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // WD - 94
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // CPG - 95
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPC, 0}, // CPC - 96
}};
// The number of counters per block has been increased for gfx9 but this table may not reflect all
// of them
// as compute may not use all of them.
static constexpr std::array<std::pair<hsa_ven_amd_aqlprofile_block_name_t, int>, 125> gfx9BlockIdOrcaToRocr = {{
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // CB0
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 1}, // CB1
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 2}, // CB2
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 3}, // CB3
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPF, 0}, // CPF
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // DB0
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 1}, // DB1
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 2}, // DB2
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 3}, // DB3
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GRBM, 0}, // GRBM
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GRBMSE, 0}, // GRBMSE
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // PA_SU
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // PA_SC
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SPI, 0}, // SPI
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQ, 0}, // SQ
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // SQ_ES
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // SQ_GS
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // SQ_VS
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // SQ_PS
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // SQ_LS
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // SQ_HS
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQCS, 0}, // SQ_CS
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SX, 0}, // SX
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 0}, // TA0
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 1}, // TA1
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 2}, // TA2
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 3}, // TA3
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 4}, // TA4
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 5}, // TA5
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 6}, // TA6
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 7}, // TA7
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 8}, // TA8
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 9}, // TA9
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 0x0a}, // TA10
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 0x0b}, // TA11
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 0x0c}, // TA12
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 0x0d}, // TA13
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 0x0e}, // TA14
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 0x0f}, // TA15
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCA, 0}, // TCA0
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCA, 1}, // TCA1
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 0}, // TCC0
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 1}, // TCC1
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 2}, // TCC2
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 3}, // TCC3
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 4}, // TCC4
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 5}, // TCC5
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 6}, // TCC6
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 7}, // TCC7
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 8}, // TCC8
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 9}, // TCC9
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 0x0a}, // TCC10
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 0x0b}, // TCC11
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 0x0c}, // TCC12
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 0x0d}, // TCC13
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 0x0e}, // TCC14
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 0x0f}, // TCC15
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 0}, // TD0
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 1}, // TD1
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 2}, // TD2
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 3}, // TD3
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 4}, // TD4
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 5}, // TD5
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 6}, // TD6
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 7}, // TD7
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 8}, // TD8
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 9}, // TD9
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 0x0a}, // TD10
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 0x0b}, // TD11
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 0x0c}, // TD12
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 0x0d}, // TD13
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 0x0e}, // TD14
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 0x0f}, // TD15
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0}, // TCP0
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 1}, // TCP1
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 2}, // TCP2
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 3}, // TCP3
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 4}, // TCP4
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 5}, // TCP5
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 6}, // TCP6
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 7}, // TCP7
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 8}, // TCP8
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 9}, // TCP9
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0x0a}, // TCP10
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0x0b}, // TCP11
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0x0c}, // TCP12
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0x0d}, // TCP13
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0x0e}, // TCP14
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0x0f}, // TCP15
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GDS, 0}, // GDS - 89
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // VGT - 90
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // IA - 91
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // WD - 92
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // CPG - 93
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPC, 0}, // CPC - 94
// blocks that are not defined in GSL
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_ATC, 0}, // ATC - 97
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_ATCL2, 0}, // ATCL2 - 98
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCVML2, 0}, // MCVML2 - 99
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // EA - 100
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 1}, // EA - 101
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 2}, // EA - 102
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 3}, // EA - 103
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 4}, // EA - 104
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 5}, // EA - 105
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 6}, // EA - 106
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 7}, // EA - 107
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 8}, // EA - 108
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 9}, // EA - 109
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x0a}, // EA - 110
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x0b}, // EA - 111
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x0c}, // EA - 112
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x0d}, // EA - 113
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x0e}, // EA - 114
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x0f}, // EA - 115
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // RPB - 116
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // RMI - 117
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 1}, // RMI - 118
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 2}, // RMI - 119
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 3}, // RMI - 120
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 4}, // RMI - 121
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 5}, // RMI - 122
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 6}, // RMI - 123
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 7}, // RMI - 124
}};
static constexpr std::array<std::pair<hsa_ven_amd_aqlprofile_block_name_t, int>, 139> gfx10BlockIdOrcaToRocr = {{
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // CB0 - 0
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 1}, // CB1 - 1
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 2}, // CB2 - 2
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 3}, // CB3 - 3
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPF, 0}, // CPF - 4
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // DB0 - 5
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 1}, // DB1 - 6
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 2}, // DB2 - 7
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 3}, // DB3 - 8
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GRBM, 0}, // GRBM - 9
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GRBMSE, 0}, // GRBMSE - 10
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // PA_SU - 11
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // PA_SC0 - 12
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // PA_SC1 - 13
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SPI, 0}, // SPI - 14
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQ, 0}, // SQ - 15
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // SQ_ES - 16
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // SQ_GS - 17
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // SQ_VS - 18
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // SQ_PS - 19
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // SQ_LS - 20
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // SQ_HS - 21
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQCS, 0}, // SQ_CS - 22
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SX, 0}, // SX - 23
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 0}, // TA0 - 24
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 1}, // TA1 - 25
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 2}, // TA2 - 26
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 3}, // TA3 - 27
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 4}, // TA4 - 28
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 5}, // TA5 - 29
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 6}, // TA6 - 30
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 7}, // TA7 - 31
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 8}, // TA8 - 32
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 9}, // TA9 - 33
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 0x0a}, // TA10 - 34
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 0x0b}, // TA11 - 35
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 0x0c}, // TA12 - 36
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 0x0d}, // TA13 - 37
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 0x0e}, // TA14 - 38
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA, 0x0f}, // TA15 - 39
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 0}, // TD0 - 40
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 1}, // TD1 - 41
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 2}, // TD2 - 42
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 3}, // TD3 - 43
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 4}, // TD4 - 44
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 5}, // TD5 - 45
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 6}, // TD6 - 46
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 7}, // TD7 - 47
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 8}, // TD8 - 48
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 9}, // TD9 - 49
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 0x0a}, // TD10 - 50
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 0x0b}, // TD11 - 51
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 0x0c}, // TD12 - 52
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 0x0d}, // TD13 - 53
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 0x0e}, // TD14 - 54
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD, 0x0f}, // TD15 - 55
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0}, // TCP0 - 56
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 1}, // TCP1 - 57
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 2}, // TCP2 - 58
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 3}, // TCP3 - 59
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 4}, // TCP4 - 60
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 5}, // TCP5 - 61
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 6}, // TCP6 - 62
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 7}, // TCP7 - 63
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 8}, // TCP8 - 64
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 9}, // TCP9 - 65
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0x0a}, // TCP10 - 66
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0x0b}, // TCP11 - 67
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0x0c}, // TCP12 - 68
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0x0d}, // TCP13 - 69
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0x0e}, // TCP14 - 70
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP, 0x0f}, // TCP15 - 71
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GDS, 0}, // GDS - 72
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // CPG - 73
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPC, 0}, // CPC - 74
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_ATC, 0}, // ATC - 75
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_ATCL2, 0}, // ATCL2 - 76
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCVML2, 0}, // MCVML2 - 77
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // EA - 78
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 1}, // EA - 79
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 2}, // EA - 80
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 3}, // EA - 81
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 4}, // EA - 82
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 5}, // EA - 83
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 6}, // EA - 84
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 7}, // EA - 85
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 8}, // EA - 86
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 9}, // EA - 87
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x0a}, // EA - 88
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x0b}, // EA - 89
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x0c}, // EA - 90
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x0d}, // EA - 91
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x0e}, // EA - 92
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x0f}, // EA - 93
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // RPB - 94
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // RMI0 - 95
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 1}, // RMI1 - 96
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // GE - 97
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // GL1A - 98
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // GL1C - 99
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // GL1CG0 - 100
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 1}, // GL1CG1 - 101
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 2}, // GL1CG2 - 102
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 3}, // GL1CG3 - 103
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // GL2A0 - 104
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 1}, // GL2A1 - 105
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 2}, // GL2A2 - 106
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 3}, // GL2A3 - 107
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // GL2C0 - 108
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 1}, // GL2C1 - 109
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 2}, // GL2C2 - 110
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 3}, // GL2C3 - 111
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 4}, // GL2C4 - 112
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 5}, // GL2C5 - 113
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 6}, // GL2C6 - 114
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 7}, // GL2C7 - 115
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 8}, // GL2C8 - 116
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 9}, // GL2C9 - 117
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x0a}, // GL2C10 - 118
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x0b}, // GL2C11 - 119
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x0c}, // GL2C12 - 120
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x0d}, // GL2C13 - 121
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x0e}, // GL2C14 - 122
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x0f}, // GL2C15 - 123
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x10}, // GL2C16 - 124
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x11}, // GL2C17 - 125
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x12}, // GL2C18 - 126
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x13}, // GL2C19 - 127
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x14}, // GL2C20 - 128
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x15}, // GL2C21 - 129
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x16}, // GL2C22 - 130
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0x17}, // GL2C23 - 131
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // CHA - 132
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // CHC - 133
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // CHCG - 134
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // GUS - 135
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // GCR - 136
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // PH - 137
{HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER, 0}, // UTCL1 - 138
}};
//! Constructor for the ROC PerfCounter object
PerfCounter::PerfCounter(const Device& device, //!< A ROC device object
uint32_t blockIndex, //!< HW block index
uint32_t counterIndex, //!< Counter index (Counter register) within the block
uint32_t eventIndex) //!< Event index (Counter selection) for profiling
: roc_device_(device),
profileRef_(nullptr) {
info_.blockIndex_ = blockIndex; // Block name + block index
info_.counterIndex_ = counterIndex; // Ignored as not being used in PPT library
info_.eventIndex_ = eventIndex; // Counter Event Selection (counter_id)
// these block indices are valid for the SI (Gfx8) & Gfx9 devices
switch (roc_device_.isa().versionMajor()) {
case (9):
gfxVersion_ = ROC_GFX9;
if (blockIndex < gfx9BlockIdOrcaToRocr.size()) {
auto p = gfx9BlockIdOrcaToRocr[blockIndex];
event_.block_name = std::get<0>(p);
event_.block_index = std::get<1>(p);
}
break;
case (10):
gfxVersion_ = ROC_GFX10;
if (blockIndex < gfx10BlockIdOrcaToRocr.size()) {
auto p = gfx10BlockIdOrcaToRocr[blockIndex];
event_.block_name = std::get<0>(p);
event_.block_index = std::get<1>(p);
}
break;
default:
gfxVersion_ = ROC_UNSUPPORTED;
event_.block_name = HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER;
event_.block_index = 0;
break;
}
event_.counter_id = eventIndex;
}
void PerfCounter::setProfile(PerfCounterProfile* profileRef) {
profileRef->perfCounters().push_back(this);
profileRef->addEvent(event_);
if (profileRef_ != nullptr) {
profileRef_->release();
}
profileRef_ = profileRef;
profileRef->retain();
}
uint64_t PerfCounter::getInfo(uint64_t infoType) const {
switch (infoType) {
case CL_PERFCOUNTER_GPU_BLOCK_INDEX: {
// Return the GPU block index
return info()->blockIndex_;
}
case CL_PERFCOUNTER_GPU_COUNTER_INDEX: {
// Return the GPU counter index
return info()->counterIndex_;
}
case CL_PERFCOUNTER_GPU_EVENT_INDEX: {
// Return the GPU event index
return info()->eventIndex_;
}
case CL_PERFCOUNTER_DATA: {
const hsa_ven_amd_aqlprofile_profile_t* profile = profileRef_->profile();
std::vector<hsa_ven_amd_aqlprofile_info_data_t> data;
profileRef_->api()->hsa_ven_amd_aqlprofile_iterate_data(profile,
PerfCounterCallback,
&data);
uint64_t result = 0;
for (const auto& it : data) {
if (it.pmc_data.event.block_name == event_.block_name &&
it.pmc_data.event.block_index == event_.block_index &&
it.pmc_data.event.counter_id == event_.counter_id) {
result += it.pmc_data.result;
}
}
return result;
}
default:
LogError("Wrong PerfCounter::getInfo parameter");
}
return 0;
}
PerfCounter::~PerfCounter() {
if (profileRef_ != nullptr) {
profileRef_->release();
profileRef_ = nullptr;
}
}
bool PerfCounterProfile::initialize() {
// save the current command and output buffer information
hsa_ven_amd_aqlprofile_descriptor_t cmd_buf = profile_.command_buffer;
hsa_ven_amd_aqlprofile_descriptor_t out_buf = profile_.output_buffer;
// determine the required buffer sizes for the profiling events
profile_.events = &events_[0];
profile_.event_count = events_.size();
profile_.command_buffer = {nullptr, 0};
profile_.output_buffer = {nullptr, 0};
if (api_.hsa_ven_amd_aqlprofile_start(&profile_, nullptr) != HSA_STATUS_SUCCESS) {
LogError("Start hsa aql profile counter failed");
return false;
}
const uint32_t alignment = amd::Os::pageSize(); // use page alignment
if (cmd_buf.ptr != nullptr && cmd_buf.size != profile_.command_buffer.size) {
roc_device_.memFree(cmd_buf.ptr, cmd_buf.size);
cmd_buf.ptr = nullptr;
}
if (cmd_buf.ptr == nullptr) {
void *buf_ptr = roc_device_.hostAlloc(profile_.command_buffer.size, alignment,
Device::MemorySegment::kAtomics);
if (buf_ptr != nullptr) {
profile_.command_buffer.ptr = buf_ptr;
}
else {
LogError("Failed to allocate profile counter command buffer");
return false;
}
}
if (out_buf.ptr != nullptr && out_buf.size != profile_.output_buffer.size) {
roc_device_.memFree(out_buf.ptr, out_buf.size);
out_buf.ptr = nullptr;
}
if (out_buf.ptr == nullptr) {
void *buf_ptr = roc_device_.hostAlloc(profile_.output_buffer.size, alignment,
Device::MemorySegment::kAtomics);
if (buf_ptr != nullptr) {
profile_.output_buffer.ptr = buf_ptr;
}
else {
roc_device_.hostFree(profile_.command_buffer.ptr, profile_.command_buffer.size);
LogError("Failed to allocate profile counter output buffer");
return false;
}
}
// create the completion signal
if (hsa_signal_create(1, 0, nullptr, &completionSignal_) != HSA_STATUS_SUCCESS) {
LogError("Failed to create signal for profile counter");
return false;
}
return true;
}
hsa_ext_amd_aql_pm4_packet_t* PerfCounterProfile::createStartPacket() {
profile_.events = &events_[0];
profile_.event_count = events_.size();
// set up the profile aql packets for capturing performance counter
if (api_.hsa_ven_amd_aqlprofile_start(&profile_, &prePacket_) != HSA_STATUS_SUCCESS) {
DevLogError("Cannot Start AQL Profile \n");
return nullptr;
}
return &prePacket_;
}
hsa_ext_amd_aql_pm4_packet_t* PerfCounterProfile::createStopPacket() {
profile_.events = &events_[0];
profile_.event_count = events_.size();
// set up the profile aql packets for post-capturing performance counter
// and create the completion signal
if (api_.hsa_ven_amd_aqlprofile_stop(&profile_, &postPacket_) != HSA_STATUS_SUCCESS) {
DevLogError("Cannot Stop AQL Profile \n");
return nullptr;
}
postPacket_.completion_signal = completionSignal_;
return &postPacket_;
}
PerfCounterProfile::~PerfCounterProfile() {
if (completionSignal_.handle != 0) {
hsa_signal_destroy(completionSignal_);
}
if (profile_.command_buffer.ptr) {
roc_device_.memFree(profile_.command_buffer.ptr, profile_.command_buffer.size);
}
if (profile_.output_buffer.ptr) {
roc_device_.memFree(profile_.output_buffer.ptr, profile_.output_buffer.size);
}
}
} // namespace roc