007285272b
* [SWDEV-518071] Return HSA not loaded status (device counter collection) This is a state that a caller would want to know about to understand if they got no counters because of a failure or if they were trying to collect counters too early (as is the case in the sample, which can attempt to collect counters before HSA is inited). * Minor edit * format * [SWDEV-518081] Simplify Metric Loading (#243) * [SWDEV-518071] Return HSA not loaded status (device counter collection) This is a state that a caller would want to know about to understand if they got no counters because of a failure or if they were trying to collect counters too early (as is the case in the sample, which can attempt to collect counters before HSA is inited). * [SWDEV-518324] Add AST update support Allows the ability for ASTs to be updated (instead of an unchangable static value). Adds a shared pointer return type to protect against static destructors/modifications from invalidating potentially in use AST definitions. No functionality/use changes in this PR. * [SWDEV-518593] Add updatable dimension cache + fix string issues (#252) * [SWDEV-518593] Add updatable dimension cache + fix string issues Updates dimension cache to use the same design pattern as AST/Metrics. Fixes the string scoping issue seen in ASTs, which appears here as well. * Add rocprofiler_create_counter Creates derived counters based on input from the API. This PR does three things: 1. Adds the API + test case 2. Validates that an AST can be constructed from the counter supplied. 3. Updates metrics, ast, and dimension caches to include the new metric. Metric should be available for use immediately after the call completes. Due to the regeneration of ASTs, this call should not be performed in performance sensitive code. * Suggestion fixes --------- Co-authored-by: Benjamin Welton <bewelton@amd.com> * Minor tweak --------- Co-authored-by: Benjamin Welton <bewelton@amd.com> Co-authored-by: Venkateshwar Reddy Kandula <vkandula@amd.com> --------- Co-authored-by: Benjamin Welton <bewelton@amd.com> Co-authored-by: Venkateshwar Reddy Kandula <vkandula@amd.com> * Fixes for comments --------- Co-authored-by: Benjamin Welton <bewelton@amd.com> Co-authored-by: Kandula, Venkateshwar reddy <Venkateshwarreddy.Kandula@amd.com> Co-authored-by: Venkateshwar Reddy Kandula <vkandula@amd.com> --------- Co-authored-by: Benjamin Welton <bewelton@amd.com> Co-authored-by: Kandula, Venkateshwar reddy <Venkateshwarreddy.Kandula@amd.com> Co-authored-by: Venkateshwar Reddy Kandula <vkandula@amd.com>
920 строки
34 KiB
C++
920 строки
34 KiB
C++
// MIT License
|
|
//
|
|
// Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
|
|
//
|
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
// of this software and associated documentation files (the "Software"), to deal
|
|
// in the Software without restriction, including without limitation the rights
|
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
// copies of the Software, and to permit persons to whom the Software is
|
|
// furnished to do so, subject to the following conditions:
|
|
//
|
|
// The above copyright notice and this permission notice shall be included in all
|
|
// copies or substantial portions of the Software.
|
|
//
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
// SOFTWARE.
|
|
|
|
#include "lib/rocprofiler-sdk/counters/evaluate_ast.hpp"
|
|
#include "lib/common/static_object.hpp"
|
|
#include "lib/common/synchronized.hpp"
|
|
|
|
#include <algorithm>
|
|
#include <cstdint>
|
|
#include <exception>
|
|
#include <numeric>
|
|
#include <optional>
|
|
#include <stdexcept>
|
|
#include <unordered_map>
|
|
#include <vector>
|
|
|
|
#include <fmt/core.h>
|
|
#include <fmt/ranges.h>
|
|
#include <rocprofiler-sdk/rocprofiler.h>
|
|
|
|
#include "lib/common/utility.hpp"
|
|
#include "lib/rocprofiler-sdk/counters/dimensions.hpp"
|
|
#include "lib/rocprofiler-sdk/counters/id_decode.hpp"
|
|
#include "lib/rocprofiler-sdk/counters/parser/reader.hpp"
|
|
#include "rocprofiler-sdk/fwd.h"
|
|
|
|
namespace rocprofiler
|
|
{
|
|
namespace counters
|
|
{
|
|
namespace
|
|
{
|
|
ReduceOperation
|
|
get_reduce_op_type_from_string(const std::string& op)
|
|
{
|
|
static const std::unordered_map<std::string, ReduceOperation> reduce_op_string_to_type = {
|
|
{"min", REDUCE_MIN}, {"max", REDUCE_MAX}, {"sum", REDUCE_SUM}, {"avr", REDUCE_AVG}};
|
|
|
|
ReduceOperation type = REDUCE_NONE;
|
|
if(op.empty()) return REDUCE_NONE;
|
|
const auto* reduce_op_type = rocprofiler::common::get_val(reduce_op_string_to_type, op);
|
|
if(reduce_op_type) type = *reduce_op_type;
|
|
return type;
|
|
}
|
|
|
|
void
|
|
perform_reduction_to_single_instance(ReduceOperation reduce_op,
|
|
std::vector<rocprofiler_record_counter_t>* input_array,
|
|
rocprofiler_record_counter_t* result)
|
|
{
|
|
switch(reduce_op)
|
|
{
|
|
case REDUCE_NONE: break;
|
|
case REDUCE_MIN:
|
|
{
|
|
*result =
|
|
*std::min_element(input_array->begin(), input_array->end(), [](auto& a, auto& b) {
|
|
return a.counter_value < b.counter_value;
|
|
});
|
|
break;
|
|
}
|
|
case REDUCE_MAX:
|
|
{
|
|
*result =
|
|
*std::max_element(input_array->begin(), input_array->end(), [](auto& a, auto& b) {
|
|
return a.counter_value < b.counter_value;
|
|
});
|
|
break;
|
|
}
|
|
case REDUCE_SUM: [[fallthrough]];
|
|
case REDUCE_AVG:
|
|
{
|
|
*result = std::accumulate(
|
|
input_array->begin(),
|
|
input_array->end(),
|
|
rocprofiler_record_counter_t{.id = input_array->begin()->id,
|
|
.counter_value = 0,
|
|
.dispatch_id = input_array->begin()->dispatch_id,
|
|
.user_data = input_array->begin()->user_data,
|
|
.agent_id = input_array->begin()->agent_id},
|
|
[](auto& a, auto& b) {
|
|
return rocprofiler_record_counter_t{
|
|
.id = a.id,
|
|
.counter_value = a.counter_value + b.counter_value,
|
|
.dispatch_id = a.dispatch_id,
|
|
.user_data = a.user_data,
|
|
.agent_id = a.agent_id};
|
|
});
|
|
if(reduce_op == REDUCE_AVG)
|
|
{
|
|
(*result).counter_value /= input_array->size();
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
std::vector<rocprofiler_record_counter_t>*
|
|
perform_reduction(
|
|
ReduceOperation reduce_op,
|
|
std::vector<rocprofiler_record_counter_t>* input_array,
|
|
const std::unordered_set<rocprofiler_profile_counter_instance_types>& _reduce_dimension_set)
|
|
{
|
|
if(input_array->empty()) return input_array;
|
|
if(_reduce_dimension_set.empty() ||
|
|
_reduce_dimension_set.size() == ROCPROFILER_DIMENSION_LAST - 1)
|
|
{
|
|
rocprofiler_record_counter_t result{.id = 0,
|
|
.counter_value = 0,
|
|
.dispatch_id = 0,
|
|
.user_data = {.value = 0},
|
|
.agent_id = input_array->begin()->agent_id};
|
|
perform_reduction_to_single_instance(reduce_op, input_array, &result);
|
|
input_array->clear();
|
|
input_array->push_back(result);
|
|
set_dim_in_rec(input_array->begin()->id, ROCPROFILER_DIMENSION_NONE, 0);
|
|
return input_array;
|
|
}
|
|
|
|
std::unordered_map<int64_t, std::vector<rocprofiler_record_counter_t>> rec_groups;
|
|
size_t bit_length = DIM_BIT_LENGTH / ROCPROFILER_DIMENSION_LAST;
|
|
|
|
for(auto& rec : *input_array)
|
|
{
|
|
for(auto dim : _reduce_dimension_set)
|
|
{
|
|
int64_t mask_dim = (MAX_64 >> (64 - bit_length)) << ((dim - 1) * bit_length);
|
|
|
|
rec.id = rec.id | mask_dim;
|
|
rec.id = rec.id ^ mask_dim;
|
|
}
|
|
rec_groups[rec.id].push_back(rec);
|
|
}
|
|
|
|
input_array->clear();
|
|
for(auto& rec_pair : rec_groups)
|
|
{
|
|
rocprofiler_record_counter_t result{.id = 0,
|
|
.counter_value = 0,
|
|
.dispatch_id = 0,
|
|
.user_data = {.value = 0},
|
|
.agent_id = {.handle = 0}};
|
|
|
|
perform_reduction_to_single_instance(reduce_op, &rec_pair.second, &result);
|
|
input_array->push_back(result);
|
|
}
|
|
if(input_array->size() == 1)
|
|
{
|
|
set_dim_in_rec(input_array->begin()->id, ROCPROFILER_DIMENSION_NONE, 0);
|
|
}
|
|
return input_array;
|
|
}
|
|
|
|
int64_t
|
|
get_int_encoded_dimensions_from_string(const std::string& rangeStr)
|
|
{
|
|
int64_t result = 0;
|
|
std::istringstream iss(rangeStr);
|
|
std::string token;
|
|
size_t bit_length = DIM_BIT_LENGTH / ROCPROFILER_DIMENSION_LAST;
|
|
|
|
while(std::getline(iss, token, ','))
|
|
{
|
|
token.erase(std::remove_if(token.begin(), token.end(), ::isspace), token.end());
|
|
|
|
size_t dash_pos = token.find(':');
|
|
if(dash_pos != std::string::npos)
|
|
{
|
|
throw std::runtime_error(
|
|
fmt::format("Range based selection not supported by Dimension API. only select "
|
|
"single value for each dimension."));
|
|
int start = std::stoi(token.substr(0, dash_pos));
|
|
int end = std::stoi(token.substr(dash_pos + 1));
|
|
result |= (1LL << std::min(64, end + 1)) - (1LL << std::max(start, 0));
|
|
}
|
|
else
|
|
{
|
|
int num = std::stoi(token);
|
|
if(num < (1 << bit_length))
|
|
{
|
|
result |= (1LL << num);
|
|
}
|
|
else
|
|
{
|
|
throw std::runtime_error(fmt::format("Dimension value exceeds max allowed."));
|
|
}
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
std::vector<rocprofiler_record_counter_t>*
|
|
perform_selection(std::map<rocprofiler_profile_counter_instance_types, std::string>& dimension_map,
|
|
std::vector<rocprofiler_record_counter_t>* input_array)
|
|
{
|
|
if(input_array->empty()) return input_array;
|
|
for(auto& dim_pair : dimension_map)
|
|
{
|
|
int64_t encoded_dim_values = get_int_encoded_dimensions_from_string(dim_pair.second);
|
|
size_t bit_length = DIM_BIT_LENGTH / ROCPROFILER_DIMENSION_LAST;
|
|
int64_t mask = (MAX_64 >> (64 - bit_length)) << ((dim_pair.first - 1) * bit_length);
|
|
|
|
input_array->erase(std::remove_if(input_array->begin(),
|
|
input_array->end(),
|
|
[&](rocprofiler_record_counter_t& rec) {
|
|
bool should_remove =
|
|
(encoded_dim_values &
|
|
(1 << rocprofiler::counters::rec_to_dim_pos(
|
|
rec.id, dim_pair.first))) == 0;
|
|
if(!should_remove)
|
|
{
|
|
rec.id = rec.id | mask;
|
|
rec.id = rec.id ^ mask;
|
|
}
|
|
return should_remove;
|
|
}),
|
|
input_array->end());
|
|
}
|
|
|
|
return input_array;
|
|
}
|
|
|
|
const ASTs
|
|
load_asts()
|
|
{
|
|
std::unordered_map<std::string, EvaluateASTMap> data;
|
|
|
|
auto mets = counters::loadMetrics(true);
|
|
const auto& metric_map = mets->arch_to_metric;
|
|
for(const auto& [gfx, metrics] : metric_map)
|
|
{
|
|
// TODO: Remove global XML from derived counters...
|
|
if(gfx == "global") continue;
|
|
|
|
std::unordered_map<std::string, Metric> by_name;
|
|
for(const auto& metric : metrics)
|
|
{
|
|
by_name.emplace(metric.name(), metric);
|
|
}
|
|
|
|
auto& eval_map = data.emplace(gfx, EvaluateASTMap{}).first->second;
|
|
for(auto& [_, metric] : by_name)
|
|
{
|
|
RawAST* ast = nullptr;
|
|
auto* buf = yy_scan_string(metric.expression().empty() ? metric.name().c_str()
|
|
: metric.expression().c_str());
|
|
yyparse(&ast);
|
|
if(!ast)
|
|
{
|
|
ROCP_ERROR << fmt::format("Unable to parse metric {}", metric);
|
|
throw std::runtime_error(fmt::format("Unable to parse metric {}", metric));
|
|
}
|
|
try
|
|
{
|
|
auto& evaluate_ast_node =
|
|
eval_map
|
|
.emplace(metric.name(),
|
|
EvaluateAST({.handle = metric.id()}, by_name, *ast, gfx))
|
|
.first->second;
|
|
evaluate_ast_node.validate_raw_ast(
|
|
by_name); // TODO: refactor and consolidate internal post-construction
|
|
// logic as a Finish() method
|
|
} catch(std::exception& e)
|
|
{
|
|
ROCP_ERROR << e.what();
|
|
throw std::runtime_error(
|
|
fmt::format("AST was not generated for {}:{}", gfx, metric.name()));
|
|
}
|
|
yy_delete_buffer(buf);
|
|
delete ast;
|
|
}
|
|
|
|
for(auto& [name, ast] : eval_map)
|
|
{
|
|
ast.expand_derived(eval_map);
|
|
}
|
|
}
|
|
|
|
return {.arch_to_counter_asts = data};
|
|
}
|
|
|
|
} // namespace
|
|
|
|
rocprofiler_status_t
|
|
check_ast_generation(std::string_view arch, Metric metric)
|
|
{
|
|
auto metrics = counters::loadMetrics();
|
|
const auto* metric_list =
|
|
rocprofiler::common::get_val(metrics->arch_to_metric, std::string(arch));
|
|
if(!metric_list) return ROCPROFILER_STATUS_ERROR_AGENT_NOT_FOUND;
|
|
|
|
RawAST* ast = nullptr;
|
|
auto* buf = yy_scan_string(metric.expression().empty() ? metric.name().c_str()
|
|
: metric.expression().c_str());
|
|
|
|
auto delete_ast = [&]() {
|
|
yy_delete_buffer(buf);
|
|
delete ast;
|
|
};
|
|
|
|
yyparse(&ast);
|
|
if(!ast)
|
|
{
|
|
if(buf) yy_delete_buffer(buf);
|
|
ROCP_ERROR << fmt::format("Unable to parse metric {}", metric);
|
|
return ROCPROFILER_STATUS_ERROR_AST_GENERATION_FAILED;
|
|
}
|
|
|
|
std::unordered_map<std::string, Metric> by_name;
|
|
for(const auto& existing_metric : *metric_list)
|
|
{
|
|
by_name.emplace(existing_metric.name(), existing_metric);
|
|
}
|
|
|
|
if(!by_name.emplace(metric.name(), metric).second)
|
|
{
|
|
delete_ast();
|
|
return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT;
|
|
}
|
|
|
|
try
|
|
{
|
|
auto evaluate_ast_node =
|
|
EvaluateAST({.handle = metric.id()}, by_name, *ast, std::string(arch));
|
|
evaluate_ast_node.validate_raw_ast(by_name);
|
|
} catch(std::exception& e)
|
|
{
|
|
ROCP_ERROR << fmt::format("Unable to generate AST for {} error: {}", metric, e.what());
|
|
delete_ast();
|
|
return ROCPROFILER_STATUS_ERROR_AST_GENERATION_FAILED;
|
|
}
|
|
|
|
delete_ast();
|
|
return ROCPROFILER_STATUS_SUCCESS;
|
|
}
|
|
|
|
std::shared_ptr<const ASTs>
|
|
get_ast_map(bool reload)
|
|
{
|
|
using ASTSync = common::Synchronized<std::shared_ptr<const ASTs>>;
|
|
static ASTSync*& ast_data = common::static_object<ASTSync>::construct(
|
|
[&]() { return std::make_shared<const ASTs>(load_asts()); }());
|
|
|
|
if(!reload)
|
|
{
|
|
return ast_data->rlock([](const auto& data) {
|
|
CHECK(data);
|
|
return data;
|
|
});
|
|
}
|
|
|
|
return ast_data->wlock([&](auto& data) {
|
|
data = std::make_shared<const ASTs>(load_asts());
|
|
CHECK(data);
|
|
return data;
|
|
});
|
|
}
|
|
|
|
std::optional<std::set<Metric>>
|
|
get_required_hardware_counters(const std::unordered_map<std::string, EvaluateASTMap>& asts,
|
|
const std::string& agent,
|
|
const Metric& metric)
|
|
{
|
|
const auto* agent_map = rocprofiler::common::get_val(asts, agent);
|
|
if(!agent_map) return std::nullopt;
|
|
const auto* counter_ast = rocprofiler::common::get_val(*agent_map, metric.name());
|
|
if(!counter_ast) return std::nullopt;
|
|
|
|
std::set<Metric> required_counters;
|
|
counter_ast->get_required_counters(*agent_map, required_counters);
|
|
return required_counters;
|
|
}
|
|
|
|
EvaluateAST::EvaluateAST(rocprofiler_counter_id_t out_id,
|
|
const std::unordered_map<std::string, Metric>& metrics,
|
|
const RawAST& ast,
|
|
std::string agent)
|
|
: _type(ast.type)
|
|
, _reduce_op(get_reduce_op_type_from_string(ast.reduce_op))
|
|
, _agent(std::move(agent))
|
|
, _reduce_dimension_set(ast.reduce_dimension_set)
|
|
, _select_dimension_map(ast.select_dimension_map)
|
|
, _out_id(out_id)
|
|
{
|
|
if(_type == NodeType::REFERENCE_NODE || _type == NodeType::ACCUMULATE_NODE)
|
|
{
|
|
try
|
|
{
|
|
_metric = metrics.at(std::get<std::string>(ast.value));
|
|
if(_type == NodeType::ACCUMULATE_NODE)
|
|
{
|
|
_metric.setflags(static_cast<int>(ast.accumulate_op));
|
|
}
|
|
} catch(std::exception& e)
|
|
{
|
|
throw std::runtime_error(
|
|
fmt::format("Unable to lookup metric {}", std::get<std::string>(ast.value)));
|
|
}
|
|
}
|
|
|
|
if(_type == NodeType::NUMBER_NODE)
|
|
{
|
|
_raw_value = std::get<int64_t>(ast.value);
|
|
_static_value.push_back({.id = 0,
|
|
.counter_value = static_cast<double>(std::get<int64_t>(ast.value)),
|
|
.dispatch_id = 0,
|
|
.user_data = {.value = 0},
|
|
.agent_id = {.handle = 0}});
|
|
}
|
|
|
|
for(const auto& nextAst : ast.counter_set)
|
|
{
|
|
_children.emplace_back(_out_id, metrics, *nextAst, _agent);
|
|
}
|
|
}
|
|
|
|
std::vector<MetricDimension>
|
|
EvaluateAST::set_dimensions()
|
|
{
|
|
if(!_dimension_types.empty())
|
|
{
|
|
return _dimension_types;
|
|
}
|
|
|
|
auto get_dim_types = [&](auto& metric) { return getBlockDimensions(_agent, metric); };
|
|
|
|
switch(_type)
|
|
{
|
|
case NONE:
|
|
case RANGE_NODE:
|
|
case CONSTANT_NODE:
|
|
case NUMBER_NODE:
|
|
{
|
|
_dimension_types =
|
|
std::vector<MetricDimension>{{dimension_map().at(ROCPROFILER_DIMENSION_INSTANCE),
|
|
1,
|
|
ROCPROFILER_DIMENSION_INSTANCE}};
|
|
}
|
|
break;
|
|
case ADDITION_NODE:
|
|
case SUBTRACTION_NODE:
|
|
case MULTIPLY_NODE:
|
|
case DIVIDE_NODE:
|
|
{
|
|
auto first = _children[0].set_dimensions();
|
|
auto second = _children[1].set_dimensions();
|
|
// - first.size() > 1 && second.size() > 1
|
|
// This is an explicit compatibility change to allow existing integer * COUNTER
|
|
// derived counters to function
|
|
if(first != second && first.size() > 1 && second.size() > 1)
|
|
throw std::runtime_error(
|
|
fmt::format("Dimension mis-mismatch: {} (dims: {}) and {} (dims: {})",
|
|
_children[0].metric(),
|
|
fmt::join(_children[0].set_dimensions(), ","),
|
|
_children[1].metric(),
|
|
fmt::join(_children[1].set_dimensions(), ",")));
|
|
_dimension_types = first.size() > second.size() ? first : second;
|
|
}
|
|
break;
|
|
case ACCUMULATE_NODE:
|
|
case REFERENCE_NODE:
|
|
{
|
|
_dimension_types = get_dim_types(_metric);
|
|
}
|
|
break;
|
|
case REDUCE_NODE:
|
|
{
|
|
if(_reduce_dimension_set.empty())
|
|
{
|
|
_dimension_types = std::vector<MetricDimension>{
|
|
{dimension_map().at(ROCPROFILER_DIMENSION_INSTANCE),
|
|
1,
|
|
ROCPROFILER_DIMENSION_INSTANCE}};
|
|
}
|
|
|
|
else
|
|
{
|
|
_dimension_types = std::vector<MetricDimension>{
|
|
{dimension_map().at(ROCPROFILER_DIMENSION_INSTANCE),
|
|
1,
|
|
ROCPROFILER_DIMENSION_INSTANCE}};
|
|
auto first = _children[0].set_dimensions();
|
|
first.erase(std::remove_if(first.begin(),
|
|
first.end(),
|
|
[&](const MetricDimension& dim) {
|
|
return _reduce_dimension_set.find(dim.type()) !=
|
|
_reduce_dimension_set.end();
|
|
}),
|
|
first.end());
|
|
if(!first.empty()) _dimension_types = first;
|
|
}
|
|
}
|
|
break;
|
|
case SELECT_NODE:
|
|
{
|
|
auto first = _children[0].set_dimensions();
|
|
first.erase(std::remove_if(first.begin(),
|
|
first.end(),
|
|
[&](const MetricDimension& dim) {
|
|
return _select_dimension_map.find(dim.type()) !=
|
|
_select_dimension_map.end();
|
|
}),
|
|
first.end());
|
|
if(first.empty())
|
|
{
|
|
_dimension_types = std::vector<MetricDimension>{
|
|
{dimension_map().at(ROCPROFILER_DIMENSION_INSTANCE),
|
|
1,
|
|
ROCPROFILER_DIMENSION_INSTANCE}};
|
|
}
|
|
else
|
|
{
|
|
_dimension_types = first;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
return _dimension_types;
|
|
}
|
|
|
|
void
|
|
EvaluateAST::get_required_counters(const std::unordered_map<std::string, EvaluateAST>& asts,
|
|
std::set<Metric>& counters) const
|
|
{
|
|
if(!_metric.empty() && children().empty() && _type != NodeType::NUMBER_NODE)
|
|
{
|
|
// Base counter
|
|
if(_metric.expression().empty())
|
|
{
|
|
counters.insert(_metric);
|
|
return;
|
|
}
|
|
|
|
// Derrived Counter
|
|
const auto* expr_ptr = rocprofiler::common::get_val(asts, _metric.name());
|
|
if(!expr_ptr) throw std::runtime_error("could not find derived counter");
|
|
expr_ptr->get_required_counters(asts, counters);
|
|
// TODO: Add guards against infinite recursion
|
|
return;
|
|
}
|
|
|
|
for(const auto& child : children())
|
|
{
|
|
child.get_required_counters(asts, counters);
|
|
}
|
|
}
|
|
|
|
bool
|
|
EvaluateAST::validate_raw_ast(const std::unordered_map<std::string, Metric>& metrics)
|
|
{
|
|
bool ret = true;
|
|
|
|
try
|
|
{
|
|
switch(_type)
|
|
{
|
|
case NONE:
|
|
case RANGE_NODE:
|
|
case CONSTANT_NODE:
|
|
case NUMBER_NODE: break;
|
|
case ADDITION_NODE:
|
|
case SUBTRACTION_NODE:
|
|
case MULTIPLY_NODE:
|
|
case DIVIDE_NODE:
|
|
{
|
|
// For arithmetic operations '+' '-' '*' '/' check if
|
|
// dimensions of both operands are matching. (handled in set_dimensions())
|
|
for(auto& child : _children)
|
|
{
|
|
child.validate_raw_ast(metrics);
|
|
}
|
|
}
|
|
break;
|
|
case REFERENCE_NODE:
|
|
{
|
|
// handled in constructor
|
|
}
|
|
break;
|
|
case REDUCE_NODE:
|
|
{
|
|
// Future TODO
|
|
// Check #1 : Should be applied on a base metric. Derived metric support will be
|
|
// added later. Check #2 : Operation should be a supported operation. Check #3 :
|
|
// Dimensions specified should be valid for this metric and GPU
|
|
|
|
// validate the members of RawAST, not the members of this class
|
|
}
|
|
break;
|
|
case SELECT_NODE:
|
|
{
|
|
// Future TODO
|
|
// Check #1 : Should be applied on a base metric. Derived metric support will be
|
|
// added later. Check #2 : Operation should be a supported operation. Check #3 :
|
|
// Dimensions specified should be valid for this metric and GPU. Check #4 :
|
|
// Dimensionindex values should be within limits for this metric and GPU.
|
|
}
|
|
break;
|
|
case ACCUMULATE_NODE:
|
|
{
|
|
// Future todo only to be applied on sq metric
|
|
}
|
|
break;
|
|
}
|
|
} catch(std::exception& e)
|
|
{
|
|
throw;
|
|
}
|
|
|
|
// Future TODO:
|
|
// check if there are cycles in the graph
|
|
|
|
return ret;
|
|
}
|
|
|
|
namespace
|
|
{
|
|
using property_function_t = int64_t (*)(const rocprofiler_agent_t&);
|
|
#define GEN_MAP_ENTRY(name, value) \
|
|
{ \
|
|
name, property_function_t([](const rocprofiler_agent_t& agent_info) { \
|
|
return static_cast<int64_t>(value); \
|
|
}) \
|
|
}
|
|
} // namespace
|
|
|
|
int64_t
|
|
get_agent_property(std::string_view property, const rocprofiler_agent_t& agent)
|
|
{
|
|
using map_t = std::unordered_map<std::string_view, property_function_t>;
|
|
|
|
static auto*& _props = common::static_object<common::Synchronized<map_t>>::construct(map_t{
|
|
GEN_MAP_ENTRY("cpu_cores_count", agent_info.cpu_cores_count),
|
|
GEN_MAP_ENTRY("simd_count", agent_info.simd_count),
|
|
GEN_MAP_ENTRY("mem_banks_count", agent_info.mem_banks_count),
|
|
GEN_MAP_ENTRY("caches_count", agent_info.caches_count),
|
|
GEN_MAP_ENTRY("io_links_count", agent_info.io_links_count),
|
|
GEN_MAP_ENTRY("cpu_core_id_base", agent_info.cpu_core_id_base),
|
|
GEN_MAP_ENTRY("simd_id_base", agent_info.simd_id_base),
|
|
GEN_MAP_ENTRY("max_waves_per_simd", agent_info.max_waves_per_simd),
|
|
GEN_MAP_ENTRY("lds_size_in_kb", agent_info.lds_size_in_kb),
|
|
GEN_MAP_ENTRY("gds_size_in_kb", agent_info.gds_size_in_kb),
|
|
GEN_MAP_ENTRY("num_gws", agent_info.num_gws),
|
|
GEN_MAP_ENTRY("wave_front_size", agent_info.wave_front_size),
|
|
GEN_MAP_ENTRY("array_count", agent_info.array_count),
|
|
GEN_MAP_ENTRY("simd_arrays_per_engine", agent_info.simd_arrays_per_engine),
|
|
GEN_MAP_ENTRY("cu_per_simd_array", agent_info.cu_per_simd_array),
|
|
GEN_MAP_ENTRY("simd_per_cu", agent_info.simd_per_cu),
|
|
GEN_MAP_ENTRY("max_slots_scratch_cu", agent_info.max_slots_scratch_cu),
|
|
GEN_MAP_ENTRY("gfx_target_version", agent_info.gfx_target_version),
|
|
GEN_MAP_ENTRY("vendor_id", agent_info.vendor_id),
|
|
GEN_MAP_ENTRY("device_id", agent_info.device_id),
|
|
GEN_MAP_ENTRY("location_id", agent_info.location_id),
|
|
GEN_MAP_ENTRY("domain", agent_info.domain),
|
|
GEN_MAP_ENTRY("drm_render_minor", agent_info.drm_render_minor),
|
|
GEN_MAP_ENTRY("hive_id", agent_info.hive_id),
|
|
GEN_MAP_ENTRY("num_sdma_engines", agent_info.num_sdma_engines),
|
|
GEN_MAP_ENTRY("num_sdma_xgmi_engines", agent_info.num_sdma_xgmi_engines),
|
|
GEN_MAP_ENTRY("num_sdma_queues_per_engine", agent_info.num_sdma_queues_per_engine),
|
|
GEN_MAP_ENTRY("num_cp_queues", agent_info.num_cp_queues),
|
|
GEN_MAP_ENTRY("max_engine_clk_ccompute", agent_info.max_engine_clk_ccompute),
|
|
});
|
|
|
|
return CHECK_NOTNULL(_props)->wlock([&property, &agent](map_t& props) -> int64_t {
|
|
if(const auto* func = rocprofiler::common::get_val(props, property))
|
|
{
|
|
return (*func)(agent);
|
|
}
|
|
return 0;
|
|
});
|
|
}
|
|
|
|
void
|
|
EvaluateAST::read_special_counters(
|
|
const rocprofiler_agent_t& agent,
|
|
const std::set<counters::Metric>& required_special_counters,
|
|
std::unordered_map<uint64_t, std::vector<rocprofiler_record_counter_t>>& out_map)
|
|
{
|
|
for(const auto& metric : required_special_counters)
|
|
{
|
|
if(!out_map[metric.id()].empty()) out_map[metric.id()].clear();
|
|
auto& record = out_map[metric.id()].emplace_back();
|
|
set_counter_in_rec(record.id, {.handle = metric.id()});
|
|
set_dim_in_rec(record.id, ROCPROFILER_DIMENSION_NONE, 0);
|
|
|
|
record.counter_value = get_agent_property(metric.name(), agent);
|
|
}
|
|
}
|
|
|
|
std::unordered_map<uint64_t, std::vector<rocprofiler_record_counter_t>>
|
|
EvaluateAST::read_pkt(const aql::CounterPacketConstruct* pkt_gen, hsa::AQLPacket& pkt)
|
|
{
|
|
struct it_data
|
|
{
|
|
std::unordered_map<uint64_t, std::vector<rocprofiler_record_counter_t>>* data;
|
|
const aql::CounterPacketConstruct* pkt_gen;
|
|
aqlprofile_agent_handle_t agent;
|
|
};
|
|
|
|
auto aql_agent = *CHECK_NOTNULL(rocprofiler::agent::get_aql_agent(pkt_gen->agent()));
|
|
|
|
std::unordered_map<uint64_t, std::vector<rocprofiler_record_counter_t>> ret;
|
|
if(pkt.empty) return ret;
|
|
it_data aql_data{.data = &ret, .pkt_gen = pkt_gen, .agent = aql_agent};
|
|
|
|
hsa_status_t status = aqlprofile_pmc_iterate_data(
|
|
pkt.handle,
|
|
[](aqlprofile_pmc_event_t event, uint64_t counter_id, uint64_t counter_value, void* data) {
|
|
CHECK(data);
|
|
auto& it = *static_cast<it_data*>(data);
|
|
const auto* metric = it.pkt_gen->event_to_metric(event);
|
|
|
|
if(!metric) return HSA_STATUS_SUCCESS;
|
|
|
|
auto& vec = it.data->emplace(metric->id(), std::vector<rocprofiler_record_counter_t>{})
|
|
.first->second;
|
|
auto& next_rec = vec.emplace_back();
|
|
set_counter_in_rec(next_rec.id, {.handle = metric->id()});
|
|
// Actual dimension info needs to be used here in the future
|
|
auto aql_status = aql::set_dim_id_from_sample(next_rec.id, it.agent, event, counter_id);
|
|
CHECK_EQ(aql_status, ROCPROFILER_STATUS_SUCCESS)
|
|
<< rocprofiler_get_status_string(aql_status);
|
|
|
|
// set_dim_in_rec(next_rec.id, ROCPROFILER_DIMENSION_NONE, vec.size() - 1);
|
|
// Note: in the near future we need to use hw_counter here instead
|
|
next_rec.counter_value = counter_value;
|
|
return HSA_STATUS_SUCCESS;
|
|
},
|
|
&aql_data);
|
|
|
|
if(status != HSA_STATUS_SUCCESS)
|
|
{
|
|
ROCP_ERROR << "AqlProfile could not decode packet";
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
void
|
|
EvaluateAST::set_out_id(std::vector<rocprofiler_record_counter_t>& results) const
|
|
{
|
|
for(auto& record : results)
|
|
{
|
|
set_counter_in_rec(record.id, _out_id);
|
|
}
|
|
}
|
|
|
|
void
|
|
EvaluateAST::expand_derived(std::unordered_map<std::string, EvaluateAST>& asts)
|
|
{
|
|
if(_expanded) return;
|
|
_expanded = true;
|
|
for(auto& child : _children)
|
|
{
|
|
if(child._type == NodeType::ACCUMULATE_NODE) continue;
|
|
if(auto* ptr = rocprofiler::common::get_val(asts, child.metric().name()))
|
|
{
|
|
ptr->expand_derived(asts);
|
|
child = *ptr;
|
|
}
|
|
else
|
|
{
|
|
child.expand_derived(asts);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* This covers cases where a derived metric is not a child at all. I.e.
|
|
* <metric name="MemWrites32B" expr=WRITE_REQ_32B>. This will expand
|
|
* WRITE_REQ_32B to its proper expression.
|
|
*/
|
|
if(!_metric.expression().empty())
|
|
{
|
|
if(auto* ptr = rocprofiler::common::get_val(asts, _metric.name()))
|
|
{
|
|
ptr->expand_derived(asts);
|
|
_children = ptr->children();
|
|
_type = ptr->type();
|
|
_reduce_op = ptr->reduce_op();
|
|
}
|
|
}
|
|
}
|
|
|
|
// convert to buffer at some point
|
|
std::vector<rocprofiler_record_counter_t>*
|
|
EvaluateAST::evaluate(
|
|
std::unordered_map<uint64_t, std::vector<rocprofiler_record_counter_t>>& results_map,
|
|
std::vector<std::unique_ptr<std::vector<rocprofiler_record_counter_t>>>& cache)
|
|
{
|
|
auto perform_op = [&](auto&& op) {
|
|
auto* r1 = _children.at(0).evaluate(results_map, cache);
|
|
auto* r2 = _children.at(1).evaluate(results_map, cache);
|
|
|
|
if(r1->size() < r2->size()) swap(r1, r2);
|
|
|
|
CHECK(!r1->empty() && !r2->empty());
|
|
|
|
if(r2->size() == 1)
|
|
{
|
|
// Special operation on either a number node
|
|
// or special node. This is typically a multiple/divide
|
|
// or some other type of constant op.
|
|
for(auto& val : *r1)
|
|
{
|
|
val = op(val, *r2->begin());
|
|
}
|
|
}
|
|
else if(r2->size() == r1->size())
|
|
{
|
|
// Normal combination
|
|
std::transform(r1->begin(), r1->end(), r2->begin(), r1->begin(), op);
|
|
}
|
|
else
|
|
{
|
|
throw std::runtime_error(
|
|
fmt::format("Mismatched Sizes {}, {}", r1->size(), r2->size()));
|
|
}
|
|
return r1;
|
|
};
|
|
|
|
switch(_type)
|
|
{
|
|
case NONE:
|
|
case CONSTANT_NODE:
|
|
case RANGE_NODE: break;
|
|
case NUMBER_NODE:
|
|
{
|
|
cache.emplace_back(std::make_unique<std::vector<rocprofiler_record_counter_t>>());
|
|
*cache.back() = _static_value;
|
|
return cache.back().get();
|
|
}
|
|
case ADDITION_NODE:
|
|
return perform_op([](auto& a, auto& b) {
|
|
return rocprofiler_record_counter_t{
|
|
.id = a.id,
|
|
.counter_value = a.counter_value + b.counter_value,
|
|
.dispatch_id = a.dispatch_id,
|
|
.user_data = {.value = 0},
|
|
.agent_id = {.handle = 0}};
|
|
});
|
|
case SUBTRACTION_NODE:
|
|
return perform_op([](auto& a, auto& b) {
|
|
return rocprofiler_record_counter_t{
|
|
.id = a.id,
|
|
.counter_value = a.counter_value - b.counter_value,
|
|
.dispatch_id = a.dispatch_id,
|
|
.user_data = {.value = 0},
|
|
.agent_id = {.handle = 0}};
|
|
});
|
|
case MULTIPLY_NODE:
|
|
return perform_op([](auto& a, auto& b) {
|
|
return rocprofiler_record_counter_t{
|
|
.id = a.id,
|
|
.counter_value = a.counter_value * b.counter_value,
|
|
.dispatch_id = a.dispatch_id,
|
|
.user_data = {.value = 0},
|
|
.agent_id = {.handle = 0}};
|
|
});
|
|
case DIVIDE_NODE:
|
|
return perform_op([](auto& a, auto& b) {
|
|
return rocprofiler_record_counter_t{
|
|
.id = a.id,
|
|
.counter_value = (b.counter_value == 0 ? 0 : a.counter_value / b.counter_value),
|
|
.dispatch_id = a.dispatch_id,
|
|
.user_data = {.value = 0},
|
|
.agent_id = {.handle = 0}};
|
|
});
|
|
case ACCUMULATE_NODE:
|
|
// todo update how to read the hybrid metric
|
|
case REFERENCE_NODE:
|
|
{
|
|
auto* result = rocprofiler::common::get_val(results_map, _metric.id());
|
|
if(!result)
|
|
throw std::runtime_error(
|
|
fmt::format("Unable to lookup results for metric {}", _metric.name()));
|
|
|
|
cache.emplace_back(std::make_unique<std::vector<rocprofiler_record_counter_t>>());
|
|
*cache.back() = *result;
|
|
result = cache.back().get();
|
|
return result;
|
|
}
|
|
break;
|
|
case REDUCE_NODE:
|
|
{
|
|
auto* result = _children.at(0).evaluate(results_map, cache);
|
|
if(_reduce_op == REDUCE_NONE)
|
|
throw std::runtime_error(fmt::format("Invalid Second argument to reduce(): {}",
|
|
static_cast<int>(_reduce_op)));
|
|
return perform_reduction(_reduce_op, result, _reduce_dimension_set);
|
|
}
|
|
case SELECT_NODE:
|
|
{
|
|
auto* result = _children.at(0).evaluate(results_map, cache);
|
|
return perform_selection(_select_dimension_map, result);
|
|
}
|
|
}
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
} // namespace counters
|
|
} // namespace rocprofiler
|