Files
rocm-systems/source/lib/rocprofiler-sdk/pc_sampling/parser/translation.hpp
T
Rawat, Swati 97b7a6315d update copyright date to 2025 (#102)
* Update LICENSE

* Update conf.py

* Update copyright year

* [fix] Update copyright year

* Update copyright year "ROCm Developer Tools"

* Add license headers to c++ files

* Add license to *.py

* Update licenses in rocdecode sources

---------

Co-authored-by: srawat <120587655+SwRaw@users.noreply.github.com>
Co-authored-by: Mythreya <mythreya.kuricheti@amd.com>
Co-authored-by: Jonathan R. Madsen <jonathanrmadsen@gmail.com>
2025-01-22 19:11:20 -06:00

312 строки
11 KiB
C++

// MIT License
//
// Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#pragma once
#include <array>
#include <cstdint>
#include <cstring>
#include "lib/rocprofiler-sdk/pc_sampling/parser/gfx11.hpp"
#include "lib/rocprofiler-sdk/pc_sampling/parser/gfx9.hpp"
#include "lib/rocprofiler-sdk/pc_sampling/parser/parser_types.hpp"
#include "lib/rocprofiler-sdk/pc_sampling/parser/rocr.h"
#include "lib/rocprofiler-sdk/pc_sampling/parser/stochastic_records.h"
// TODO: refactor the commented code for stochastic sampling
// template <typename gfx>
// inline rocprofiler_pc_sampling_record_t
// copyStochasticSample(const perf_sample_snapshot_v1& sample);
// template <>
// inline rocprofiler_pc_sampling_record_t
// copyStochasticSample<GFX9>(const perf_sample_snapshot_v1& sample)
// {
// rocprofiler_pc_sampling_record_t ret = copySampleHeader<perf_sample_snapshot_v1>(sample);
// ret.flags.valid = sample.perf_snapshot_data & (~sample.perf_snapshot_data >> 26) & 0x1;
// // Check wave_id matches snapshot_wave_id
// ret.flags.has_wave_cnt = true;
// ret.flags.has_stall_reason = true;
// ret.wave_count = sample.perf_snapshot_data1 & 0x3F;
// ret.wave_issued = sample.perf_snapshot_data >> 1;
// ret.snapshot.dual_issue_valu = sample.perf_snapshot_data >> 2;
// ret.snapshot.inst_type = sample.perf_snapshot_data >> 3;
// ret.snapshot.reason_not_issued = (sample.perf_snapshot_data >> 7) & 0x7;
// ret.snapshot.arb_state_issue = (sample.perf_snapshot_data >> 10) & 0xFF;
// ret.snapshot.arb_state_stall = (sample.perf_snapshot_data >> 18) & 0xFF;
// ret.reserved = 0;
// return ret;
// }
// template <>
// inline rocprofiler_pc_sampling_record_t
// copyStochasticSample<GFX11>(const perf_sample_snapshot_v1& sample)
// {
// rocprofiler_pc_sampling_record_t ret = copySampleHeader<perf_sample_snapshot_v1>(sample);
// ret.flags.valid = sample.perf_snapshot_data & (~sample.perf_snapshot_data >> 23) & 0x1;
// // Check wave_id matches snapshot_wave_id
// ret.flags.has_stall_reason = true;
// ret.wave_issued = sample.perf_snapshot_data >> 1;
// ret.snapshot.inst_type = sample.perf_snapshot_data >> 2;
// ret.snapshot.reason_not_issued = (sample.perf_snapshot_data >> 6) & 0x7;
// ret.snapshot.arb_state_issue = (sample.perf_snapshot_data >> 9) & 0x7F;
// ret.snapshot.arb_state_stall = (sample.perf_snapshot_data >> 16) & 0x7F;
// ret.snapshot.dual_issue_valu = false;
// ret.reserved = 0;
// return ret;
// }
// #define BITSHIFT(sname) out |= ((in >> GFX::sname) & 1) << PCSAMPLE::sname
// template <typename GFX>
// inline int
// translate_arb(int in)
// {
// size_t out = 0;
// BITSHIFT(ISSUE_VALU);
// BITSHIFT(ISSUE_MATRIX);
// BITSHIFT(ISSUE_LDS);
// BITSHIFT(ISSUE_LDS_DIRECT);
// BITSHIFT(ISSUE_SCALAR);
// BITSHIFT(ISSUE_VMEM_TEX);
// BITSHIFT(ISSUE_FLAT);
// BITSHIFT(ISSUE_EXP);
// BITSHIFT(ISSUE_MISC);
// BITSHIFT(ISSUE_BRMSG);
// return out & 0x3FF;
// }
// #undef BITSHIFT
// #define LUTOVERLOAD(sname) this->operator[](GFX::sname) = PCSAMPLE::sname
// template <typename GFX>
// class GFX_REASON_LUT : public std::array<int, 32>
// {
// public:
// GFX_REASON_LUT()
// {
// std::memset(data(), 0, size() * sizeof(int));
// LUTOVERLOAD(REASON_NOT_AVAILABLE);
// LUTOVERLOAD(REASON_ALU);
// LUTOVERLOAD(REASON_WAITCNT);
// LUTOVERLOAD(REASON_INTERNAL);
// LUTOVERLOAD(REASON_BARRIER);
// LUTOVERLOAD(REASON_ARBITER);
// LUTOVERLOAD(REASON_EX_STALL);
// LUTOVERLOAD(REASON_OTHER_WAIT);
// LUTOVERLOAD(REASON_SLEEP);
// }
// };
// template <typename GFX>
// class GFX_INST_LUT : public std::array<int, 32>
// {
// public:
// GFX_INST_LUT()
// {
// std::memset(data(), 0, size() * sizeof(int));
// LUTOVERLOAD(TYPE_VALU);
// LUTOVERLOAD(TYPE_MATRIX);
// LUTOVERLOAD(TYPE_SCALAR);
// LUTOVERLOAD(TYPE_TEX);
// LUTOVERLOAD(TYPE_LDS);
// LUTOVERLOAD(TYPE_LDS_DIRECT);
// LUTOVERLOAD(TYPE_FLAT);
// LUTOVERLOAD(TYPE_EXP);
// LUTOVERLOAD(TYPE_MESSAGE);
// LUTOVERLOAD(TYPE_BARRIER);
// LUTOVERLOAD(TYPE_BRANCH_NOT_TAKEN);
// LUTOVERLOAD(TYPE_BRANCH_TAKEN);
// LUTOVERLOAD(TYPE_JUMP);
// LUTOVERLOAD(TYPE_OTHER);
// LUTOVERLOAD(TYPE_NO_INST);
// LUTOVERLOAD(TYPE_DUAL_VALU);
// }
// };
// template <typename GFX>
// inline int
// translate_reason(int in)
// {
// static GFX_REASON_LUT<GFX> lut;
// return lut[in & 0x1F];
// }
// template <typename GFX>
// inline int
// translate_inst(int in)
// {
// static GFX_INST_LUT<GFX> lut;
// return lut[in & 0x1F];
// }
// #undef LUTOVERLOAD
// template <bool HostTrap, typename GFX>
// inline rocprofiler_pc_sampling_record_t
// copySample(const void* sample)
// {
// if(HostTrap) return copyHostTrapSample(*(const perf_sample_host_trap_v1*) sample);
// rocprofiler_pc_sampling_record_t ret =
// copyStochasticSample<GFX>(*(const perf_sample_snapshot_v1*) sample);
// ret.snapshot.inst_type = translate_inst<GFX>(ret.snapshot.inst_type);
// ret.snapshot.arb_state_issue = translate_arb<GFX>(ret.snapshot.arb_state_issue);
// ret.snapshot.arb_state_stall = translate_arb<GFX>(ret.snapshot.arb_state_stall);
// ret.snapshot.reason_not_issued = translate_reason<GFX>(ret.snapshot.reason_not_issued);
// return ret;
// }
#define EXTRACT_BITS(val, bit_end, bit_start) \
(val >> bit_start) & ((1U << (bit_end - bit_start + 1)) - 1)
template <typename GFX, typename PcSamplingRecordT, typename SType>
inline void
copyChipletId(PcSamplingRecordT& record, const SType& sample)
{
// extract chiplet record
record.hw_id.chiplet = sample.chiplet_and_wave_id >> 8;
}
template <typename GFX9, typename HwIdT>
inline void
copyHwId(HwIdT& hw_id, const uint32_t hsa_hw_id);
template <>
inline void
copyHwId<GFX9, rocprofiler_pc_sampling_hw_id_v0_t>(rocprofiler_pc_sampling_hw_id_v0_t& hw_id,
const uint32_t hw_id_reg)
{
// 3:0 -> wave_id
hw_id.wave_id = EXTRACT_BITS(hw_id_reg, 3, 0);
// 5:4 -> simd_id
hw_id.simd_id = EXTRACT_BITS(hw_id_reg, 5, 4);
// 7:6 -> pipe_id;
hw_id.pipe_id = EXTRACT_BITS(hw_id_reg, 7, 6);
// 11:8 -> cu_id
hw_id.cu_or_wgp_id = EXTRACT_BITS(hw_id_reg, 11, 8);
// 12 -> sa_id
hw_id.shader_array_id = EXTRACT_BITS(hw_id_reg, 12, 12);
// 15:13 -> se_id
hw_id.shader_engine_id = EXTRACT_BITS(hw_id_reg, 15, 13);
// 19:16 -> tg_id
hw_id.workgroup_id = EXTRACT_BITS(hw_id_reg, 19, 16);
// 23:20 -> vm_id
hw_id.vm_id = EXTRACT_BITS(hw_id_reg, 23, 20);
// 26:24 -> queue_id
hw_id.queue_id = EXTRACT_BITS(hw_id_reg, 26, 24);
// 29:27 -> state_id (ignored)
// 31:30 -> me_id
hw_id.microengine_id = EXTRACT_BITS(hw_id_reg, 31, 30);
}
#undef EXTRACT_BITS
template <typename PcSamplingRecordT, typename SType>
inline PcSamplingRecordT
copySampleHeader(const SType& sample)
{
PcSamplingRecordT ret;
// zero out all record fields
std::memset(&ret, 0, sizeof(PcSamplingRecordT));
// Decode fields common for all host-trap and stochastic on all architectures.
ret.size = sizeof(PcSamplingRecordT);
ret.wave_in_group = sample.chiplet_and_wave_id & 0x3F;
ret.exec_mask = sample.exec_mask;
ret.workgroup_id.x = sample.workgroup_id_x;
ret.workgroup_id.y = sample.workgroup_id_y;
ret.workgroup_id.z = sample.workgroup_id_z;
ret.timestamp = sample.timestamp;
return ret;
}
template <typename GFX, typename PcSamplingRecordT>
inline PcSamplingRecordT
copySample(const void* sample);
/**
* @brief Host trap V0 sample for GFX9
*/
template <>
inline rocprofiler_pc_sampling_record_host_trap_v0_t
copySample<GFX9, rocprofiler_pc_sampling_record_host_trap_v0_t>(const void* sample)
{
const auto& sample_ = *static_cast<const perf_sample_host_trap_v1*>(sample);
auto ret = copySampleHeader<rocprofiler_pc_sampling_record_host_trap_v0_t>(sample_);
copyChipletId<GFX9>(ret, sample_);
copyHwId<GFX9>(ret.hw_id, sample_.hw_id);
// copyHwId<GFX9>(&ret, sample);
return ret;
}
template <>
inline rocprofiler_pc_sampling_record_stochastic_v0_t
copySample<GFX9, rocprofiler_pc_sampling_record_stochastic_v0_t>(const void* sample)
{
const auto& sample_ = *static_cast<const perf_sample_snapshot_v1*>(sample);
auto ret = copySampleHeader<rocprofiler_pc_sampling_record_stochastic_v0_t>(sample_);
copyChipletId<GFX9>(ret, sample_);
copyHwId<GFX9>(ret.hw_id, sample_.hw_id);
ret.wave_count = sample_.perf_snapshot_data1 & 0x3F;
// TODO: implement logic for manipulating stochastic related fields
return ret;
}
/**
* @brief Host trap V0 sample for GFX11
*/
template <>
inline rocprofiler_pc_sampling_record_host_trap_v0_t
copySample<GFX11, rocprofiler_pc_sampling_record_host_trap_v0_t>(const void* sample)
{
const auto& sample_ = *static_cast<const perf_sample_host_trap_v1*>(sample);
auto ret = copySampleHeader<rocprofiler_pc_sampling_record_host_trap_v0_t>(sample_);
// TODO: decode other fields.
return ret;
}
// TODO: implement stochastic for GFX11
template <>
inline rocprofiler_pc_sampling_record_stochastic_v0_t
copySample<GFX11, rocprofiler_pc_sampling_record_stochastic_v0_t>(const void* sample)
{
const auto& sample_ = *static_cast<const perf_sample_snapshot_v1*>(sample);
auto ret = copySampleHeader<rocprofiler_pc_sampling_record_stochastic_v0_t>(sample_);
// TODO: decode other fields
// TODO: implement logic for manipulating stochastic related fields
// ret.wave_count = sample_.perf_snapshot_data1 & 0x3F;
return ret;
}