Dimension support for reduce operator (#1147)
* cache reference nodes
* evaluation based on dim args
* format
* add dimensions for reduce operator
* add dimensions for reduce operator
* add dimensions for reduce operator docs
* add dimensions for reduce operator.
* refactor switch cases
* Update CHANGELOG.md
* updated doc with data example
* updated doc with data example for reduce operation.
* added fallthrough in switch case sum.
* changelog.md
* format
* fix bug in constuct_test_data()
[ROCm/rocprofiler-sdk commit: 472907a576]
Этот коммит содержится в:
@@ -142,6 +142,8 @@ Full documentation for ROCprofiler-SDK is available at [rocm.docs.amd.com/projec
|
||||
### Added
|
||||
|
||||
- Added support for select() operation in counter expression.
|
||||
- Added reduce operation for counter expression wrt dimension.
|
||||
|
||||
### Changed
|
||||
|
||||
### Resolved issues
|
||||
|
||||
+44
-7
@@ -289,6 +289,50 @@ Expression: 100*reduce(GL2C_HIT,sum)/(reduce(GL2C_HIT,sum)+reduce(GL2C_MISS,sum)
|
||||
|
||||
The reduce function reduces counter values across all dimensions such as shader engine, SIMD, and so on, to produce a single output value. This helps to collect and compare values across the entire device.
|
||||
Here are the common reduction operations:
|
||||
- `sum`: Sums to create a single output. For example, `reduce(GL2C_HIT,sum)` sums all `GL2C_HIT` hardware register values.
|
||||
- `avr`: Calculates the average across all dimensions.
|
||||
- `min`: Selects minimum value across all dimensions.
|
||||
- `max`: Selects the maximum value across all dimensions.
|
||||
|
||||
```yaml
|
||||
expression: reduce(X,sum,[DIMENSION_XCC])
|
||||
```
|
||||
Reduce() also supports dimension wise reduction, when provided dimensions in 3rd parameter. In the expression above, if `X` has two dimensions `DIMENSION_XCC`, `DIMENSION_SHADER_ARRAY`, and `DIMENSION_WGP`, the reduce happens across counter values where `DIMENSION_SHADER_ARRAY` and `DIMENSION_WGP` dimensions are same as shown below.
|
||||
|
||||
Let's say DIM sizes of XCC, SHADER_ARRAY(SH), WGP be 2, 4, 4 respectively.
|
||||
|
||||
Raw Counter Data in 3D space:
|
||||
|
||||
#### XCC[0]:
|
||||
| |WGP[0]|WGP[1]|WGP[2]|WGP[3]|
|
||||
|-------|------|------|------|------|
|
||||
| SH[0] | 1 | 2 | 3 | 4 |
|
||||
| SH[1] | 5 | 6 | 7 | 8 |
|
||||
| SH[2] | 9 | 10 | 11 | 12 |
|
||||
| SH[3] | 13 | 14 | 15 | 16 |
|
||||
|
||||
#### XCC[1]:
|
||||
| |WGP[0]|WGP[1]|WGP[2]|WGP[3]|
|
||||
|-------|------|------|------|------|
|
||||
| SH[0] | 1 | 2 | 3 | 4 |
|
||||
| SH[1] | 5 | 6 | 7 | 8 |
|
||||
| SH[2] | 9 | 10 | 11 | 12 |
|
||||
| SH[3] | 13 | 14 | 15 | 16 |
|
||||
|
||||
Reducing XCC dim with sum, results to 2D space with only WGP and SH.
|
||||
|
||||
| |WGP[0]|WGP[1]|WGP[2]|WGP[3]|
|
||||
|-------|------|------|------|------|
|
||||
| SH[0] | 2 | 4 | 6 | 8 |
|
||||
| SH[1] | 10 | 12 | 14 | 16 |
|
||||
| SH[2] | 18 | 20 | 22 | 24 |
|
||||
| SH[3] | 26 | 28 | 30 | 32 |
|
||||
|
||||
similarly, for `reduce(X,sum,[DIMENSION_XCC,DIMENSION_SHADER_ARRAY])` results in only WGP dimension.
|
||||
|
||||
| |WGP[0]|WGP[1]|WGP[2]|WGP[3]|
|
||||
|-------|------|------|------|------|
|
||||
| | 56 | 64 | 72 | 80 |
|
||||
|
||||
### Select Function
|
||||
|
||||
@@ -333,13 +377,6 @@ similarly, for `select(Y, [DIMENSION_XCC=[0],DIMENSION_SHADER_ENGINE=[2]])` resu
|
||||
|-------|------|------|------|------|
|
||||
| | 9 | 10 | 11 | 12 |
|
||||
|
||||
### Accumulate Function
|
||||
|
||||
- `sum`: Sums to create a single output. For example, `reduce(GL2C_HIT,sum)` sums all `GL2C_HIT` hardware register values.
|
||||
- `avr`: Calculates the average across all dimensions.
|
||||
- `min`: Selects minimum value across all dimensions.
|
||||
- `max`: Selects the maximum value across all dimensions.
|
||||
|
||||
### Accumulate function
|
||||
|
||||
```yaml
|
||||
|
||||
+108
-57
@@ -60,21 +60,17 @@ get_reduce_op_type_from_string(const std::string& op)
|
||||
return type;
|
||||
}
|
||||
|
||||
std::vector<rocprofiler_record_counter_t>*
|
||||
perform_reduction(ReduceOperation reduce_op, std::vector<rocprofiler_record_counter_t>* input_array)
|
||||
void
|
||||
perform_reduction_to_single_instance(ReduceOperation reduce_op,
|
||||
std::vector<rocprofiler_record_counter_t>* input_array,
|
||||
rocprofiler_record_counter_t* result)
|
||||
{
|
||||
rocprofiler_record_counter_t result{.id = 0,
|
||||
.counter_value = 0,
|
||||
.dispatch_id = 0,
|
||||
.user_data = {.value = 0},
|
||||
.agent_id = {.handle = 0}};
|
||||
if(input_array->empty()) return input_array;
|
||||
switch(reduce_op)
|
||||
{
|
||||
case REDUCE_NONE: break;
|
||||
case REDUCE_MIN:
|
||||
{
|
||||
result =
|
||||
*result =
|
||||
*std::min_element(input_array->begin(), input_array->end(), [](auto& a, auto& b) {
|
||||
return a.counter_value < b.counter_value;
|
||||
});
|
||||
@@ -82,55 +78,92 @@ perform_reduction(ReduceOperation reduce_op, std::vector<rocprofiler_record_coun
|
||||
}
|
||||
case REDUCE_MAX:
|
||||
{
|
||||
result =
|
||||
*result =
|
||||
*std::max_element(input_array->begin(), input_array->end(), [](auto& a, auto& b) {
|
||||
return a.counter_value < b.counter_value;
|
||||
});
|
||||
break;
|
||||
}
|
||||
case REDUCE_SUM:
|
||||
{
|
||||
result = std::accumulate(input_array->begin(),
|
||||
input_array->end(),
|
||||
rocprofiler_record_counter_t{.id = 0,
|
||||
.counter_value = 0,
|
||||
.dispatch_id = 0,
|
||||
.user_data = {.value = 0},
|
||||
.agent_id = {.handle = 0}},
|
||||
[](auto& a, auto& b) {
|
||||
return rocprofiler_record_counter_t{
|
||||
.id = a.id,
|
||||
.counter_value = a.counter_value + b.counter_value,
|
||||
.dispatch_id = a.dispatch_id,
|
||||
.user_data = {.value = 0},
|
||||
.agent_id = {.handle = 0}};
|
||||
});
|
||||
break;
|
||||
}
|
||||
case REDUCE_SUM: [[fallthrough]];
|
||||
case REDUCE_AVG:
|
||||
{
|
||||
result = std::accumulate(input_array->begin(),
|
||||
input_array->end(),
|
||||
rocprofiler_record_counter_t{.id = 0,
|
||||
.counter_value = 0,
|
||||
.dispatch_id = 0,
|
||||
.user_data = {.value = 0},
|
||||
.agent_id = {.handle = 0}},
|
||||
[](auto& a, auto& b) {
|
||||
return rocprofiler_record_counter_t{
|
||||
.id = a.id,
|
||||
.counter_value = a.counter_value + b.counter_value,
|
||||
.dispatch_id = a.dispatch_id,
|
||||
.user_data = {.value = 0},
|
||||
.agent_id = {.handle = 0}};
|
||||
});
|
||||
result.counter_value /= input_array->size();
|
||||
*result = std::accumulate(input_array->begin(),
|
||||
input_array->end(),
|
||||
rocprofiler_record_counter_t{.id = 0,
|
||||
.counter_value = 0,
|
||||
.dispatch_id = 0,
|
||||
.user_data = {.value = 0},
|
||||
.agent_id = {.handle = 0}},
|
||||
[](auto& a, auto& b) {
|
||||
return rocprofiler_record_counter_t{
|
||||
.id = a.id,
|
||||
.counter_value = a.counter_value + b.counter_value,
|
||||
.dispatch_id = a.dispatch_id,
|
||||
.user_data = {.value = 0},
|
||||
.agent_id = {.handle = 0}};
|
||||
});
|
||||
if(reduce_op == REDUCE_AVG)
|
||||
{
|
||||
(*result).counter_value /= input_array->size();
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<rocprofiler_record_counter_t>*
|
||||
perform_reduction(
|
||||
ReduceOperation reduce_op,
|
||||
std::vector<rocprofiler_record_counter_t>* input_array,
|
||||
const std::unordered_set<rocprofiler_profile_counter_instance_types>& _reduce_dimension_set)
|
||||
{
|
||||
if(input_array->empty()) return input_array;
|
||||
if(_reduce_dimension_set.empty() ||
|
||||
_reduce_dimension_set.size() == ROCPROFILER_DIMENSION_LAST - 1)
|
||||
{
|
||||
rocprofiler_record_counter_t result{.id = 0,
|
||||
.counter_value = 0,
|
||||
.dispatch_id = 0,
|
||||
.user_data = {.value = 0},
|
||||
.agent_id = {.handle = 0}};
|
||||
perform_reduction_to_single_instance(reduce_op, input_array, &result);
|
||||
input_array->clear();
|
||||
input_array->push_back(result);
|
||||
set_dim_in_rec(input_array->begin()->id, ROCPROFILER_DIMENSION_NONE, 0);
|
||||
return input_array;
|
||||
}
|
||||
|
||||
std::unordered_map<int64_t, std::vector<rocprofiler_record_counter_t>> rec_groups;
|
||||
size_t bit_length = DIM_BIT_LENGTH / ROCPROFILER_DIMENSION_LAST;
|
||||
|
||||
for(auto& rec : *input_array)
|
||||
{
|
||||
for(auto dim : _reduce_dimension_set)
|
||||
{
|
||||
int64_t mask_dim = (MAX_64 >> (64 - bit_length)) << ((dim - 1) * bit_length);
|
||||
|
||||
rec.id = rec.id | mask_dim;
|
||||
rec.id = rec.id ^ mask_dim;
|
||||
}
|
||||
rec_groups[rec.id].push_back(rec);
|
||||
}
|
||||
|
||||
input_array->clear();
|
||||
input_array->push_back(result);
|
||||
set_dim_in_rec(input_array->begin()->id, ROCPROFILER_DIMENSION_NONE, 0);
|
||||
for(auto& rec_pair : rec_groups)
|
||||
{
|
||||
rocprofiler_record_counter_t result{.id = 0,
|
||||
.counter_value = 0,
|
||||
.dispatch_id = 0,
|
||||
.user_data = {.value = 0},
|
||||
.agent_id = {.handle = 0}};
|
||||
|
||||
perform_reduction_to_single_instance(reduce_op, &rec_pair.second, &result);
|
||||
input_array->push_back(result);
|
||||
}
|
||||
if(input_array->size() == 1)
|
||||
{
|
||||
set_dim_in_rec(input_array->begin()->id, ROCPROFILER_DIMENSION_NONE, 0);
|
||||
}
|
||||
return input_array;
|
||||
}
|
||||
|
||||
@@ -375,11 +408,30 @@ EvaluateAST::set_dimensions()
|
||||
break;
|
||||
case REDUCE_NODE:
|
||||
{
|
||||
// Reduction down to a single instance supported for now.
|
||||
_dimension_types =
|
||||
std::vector<MetricDimension>{{dimension_map().at(ROCPROFILER_DIMENSION_INSTANCE),
|
||||
1,
|
||||
ROCPROFILER_DIMENSION_INSTANCE}};
|
||||
if(_reduce_dimension_set.empty())
|
||||
{
|
||||
_dimension_types = std::vector<MetricDimension>{
|
||||
{dimension_map().at(ROCPROFILER_DIMENSION_INSTANCE),
|
||||
1,
|
||||
ROCPROFILER_DIMENSION_INSTANCE}};
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
_dimension_types = std::vector<MetricDimension>{
|
||||
{dimension_map().at(ROCPROFILER_DIMENSION_INSTANCE),
|
||||
1,
|
||||
ROCPROFILER_DIMENSION_INSTANCE}};
|
||||
auto first = _children[0].set_dimensions();
|
||||
first.erase(std::remove_if(first.begin(),
|
||||
first.end(),
|
||||
[&](const MetricDimension& dim) {
|
||||
return _reduce_dimension_set.find(dim.type()) !=
|
||||
_reduce_dimension_set.end();
|
||||
}),
|
||||
first.end());
|
||||
if(!first.empty()) _dimension_types = first;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case SELECT_NODE:
|
||||
@@ -678,10 +730,6 @@ EvaluateAST::evaluate(
|
||||
|
||||
if(r1->size() < r2->size()) swap(r1, r2);
|
||||
|
||||
cache.emplace_back(std::make_unique<std::vector<rocprofiler_record_counter_t>>());
|
||||
*cache.back() = *r1;
|
||||
r1 = cache.back().get();
|
||||
|
||||
CHECK(!r1->empty() && !r2->empty());
|
||||
|
||||
if(r2->size() == 1)
|
||||
@@ -758,6 +806,9 @@ EvaluateAST::evaluate(
|
||||
throw std::runtime_error(
|
||||
fmt::format("Unable to lookup results for metric {}", _metric.name()));
|
||||
|
||||
cache.emplace_back(std::make_unique<std::vector<rocprofiler_record_counter_t>>());
|
||||
*cache.back() = *result;
|
||||
result = cache.back().get();
|
||||
return result;
|
||||
}
|
||||
break;
|
||||
@@ -767,7 +818,7 @@ EvaluateAST::evaluate(
|
||||
if(_reduce_op == REDUCE_NONE)
|
||||
throw std::runtime_error(fmt::format("Invalid Second argument to reduce(): {}",
|
||||
static_cast<int>(_reduce_op)));
|
||||
return perform_reduction(_reduce_op, result);
|
||||
return perform_reduction(_reduce_op, result, _reduce_dimension_set);
|
||||
}
|
||||
case SELECT_NODE:
|
||||
{
|
||||
|
||||
+140
@@ -33,7 +33,9 @@
|
||||
#include "lib/rocprofiler-sdk/agent.hpp"
|
||||
#include "lib/rocprofiler-sdk/counters/evaluate_ast.hpp"
|
||||
#include "lib/rocprofiler-sdk/counters/id_decode.hpp"
|
||||
#include "lib/rocprofiler-sdk/counters/metrics.hpp"
|
||||
#include "lib/rocprofiler-sdk/counters/parser/reader.hpp"
|
||||
#include "rocprofiler-sdk/fwd.h"
|
||||
|
||||
namespace
|
||||
{
|
||||
@@ -1366,3 +1368,141 @@ TEST(evatuate_ast, evaluate_select)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(evaluate_ast, counter_reduction_dimension)
|
||||
{
|
||||
using namespace rocprofiler::counters;
|
||||
|
||||
size_t bit_length = DIM_BIT_LENGTH / ROCPROFILER_DIMENSION_LAST;
|
||||
|
||||
auto get_base_rec_id = [](uint64_t counter_id) {
|
||||
rocprofiler_counter_instance_id_t base_id = 0;
|
||||
set_counter_in_rec(base_id, {.handle = counter_id});
|
||||
return base_id;
|
||||
};
|
||||
|
||||
auto max_dim = [&](auto&& a) -> auto
|
||||
{
|
||||
std::unordered_map<int64_t, rocprofiler_record_counter_t> groups_dim;
|
||||
std::vector<rocprofiler_record_counter_t> result;
|
||||
for(auto rec : a)
|
||||
{
|
||||
int64_t mask_dim = (MAX_64 >> (64 - bit_length)) << (bit_length * 0);
|
||||
|
||||
rec.id = rec.id | mask_dim;
|
||||
rec.id = rec.id ^ mask_dim;
|
||||
if(groups_dim.find(rec.id) == groups_dim.end())
|
||||
{
|
||||
groups_dim[rec.id] = rec;
|
||||
}
|
||||
else
|
||||
{
|
||||
groups_dim[rec.id].counter_value =
|
||||
std::max(groups_dim[rec.id].counter_value, rec.counter_value);
|
||||
}
|
||||
}
|
||||
for(auto& rec_pair : groups_dim)
|
||||
{
|
||||
result.push_back(rec_pair.second);
|
||||
}
|
||||
return result;
|
||||
};
|
||||
|
||||
auto sum_dim = [&](auto&& a) -> auto
|
||||
{
|
||||
std::vector<rocprofiler_record_counter_t> result;
|
||||
double counter_value = 0;
|
||||
result.push_back(a[0]);
|
||||
set_dim_in_rec(result.begin()->id, ROCPROFILER_DIMENSION_NONE, 0);
|
||||
for(auto& rec : a)
|
||||
{
|
||||
counter_value += rec.counter_value;
|
||||
}
|
||||
result.begin()->counter_value = counter_value;
|
||||
return result;
|
||||
};
|
||||
|
||||
std::unordered_map<std::string, Metric> metrics = {
|
||||
{"VOORHEES", Metric("gfx9", "VOORHEES", "a", "a", "a", "", "", 0)},
|
||||
{"KRUEGER", Metric("gfx9", "KRUEGER", "a", "a", "a", "", "", 1)},
|
||||
{"max_BATES",
|
||||
Metric("gfx9",
|
||||
"max_BATES",
|
||||
"C",
|
||||
"C",
|
||||
"C",
|
||||
"reduce(VOORHEES+KRUEGER,max, [DIMENSION_XCC])",
|
||||
"",
|
||||
2)},
|
||||
{"sum_BATES",
|
||||
Metric("gfx9",
|
||||
"sum_BATES",
|
||||
"C",
|
||||
"C",
|
||||
"C",
|
||||
"reduce(VOORHEES+KRUEGER,sum, [DIMENSION_XCC, DIMENSION_AID])",
|
||||
"",
|
||||
3)}};
|
||||
|
||||
std::unordered_map<std::string, std::vector<rocprofiler_record_counter_t>> base_counter_data = {
|
||||
{"VOORHEES",
|
||||
construct_test_data_dim(
|
||||
get_base_rec_id(0), {ROCPROFILER_DIMENSION_XCC, ROCPROFILER_DIMENSION_AID}, 8)},
|
||||
{"KRUEGER",
|
||||
construct_test_data_dim(
|
||||
get_base_rec_id(1), {ROCPROFILER_DIMENSION_XCC, ROCPROFILER_DIMENSION_AID}, 8)},
|
||||
};
|
||||
|
||||
std::unordered_map<std::string, std::unordered_map<std::string, EvaluateAST>> asts;
|
||||
for(const auto& [val, metric] : metrics)
|
||||
{
|
||||
RawAST* ast = nullptr;
|
||||
auto buf = yy_scan_string(metric.expression().empty() ? metric.name().c_str()
|
||||
: metric.expression().c_str());
|
||||
yyparse(&ast);
|
||||
ASSERT_TRUE(ast) << metric.expression() << " " << metric.name();
|
||||
asts.emplace("gfx9", std::unordered_map<std::string, EvaluateAST>{})
|
||||
.first->second.emplace(val,
|
||||
EvaluateAST({.handle = metric.id()}, metrics, *ast, "gfx9"));
|
||||
yy_delete_buffer(buf);
|
||||
delete ast;
|
||||
}
|
||||
|
||||
std::vector<std::tuple<std::string, std::vector<rocprofiler_record_counter_t>, int64_t>>
|
||||
derived_counters = {
|
||||
{"max_BATES",
|
||||
max_dim(plus_vec(base_counter_data["VOORHEES"], base_counter_data["KRUEGER"])),
|
||||
2},
|
||||
{"sum_BATES",
|
||||
sum_dim(plus_vec(base_counter_data["VOORHEES"], base_counter_data["KRUEGER"])),
|
||||
2},
|
||||
};
|
||||
|
||||
std::unordered_map<uint64_t, std::vector<rocprofiler_record_counter_t>> base_counter_decode;
|
||||
for(const auto& [name, base_counter_v] : base_counter_data)
|
||||
{
|
||||
base_counter_decode[metrics[name].id()] = base_counter_v;
|
||||
}
|
||||
|
||||
for(auto& [name, expected, eval_count] : derived_counters)
|
||||
{
|
||||
ROCP_INFO << name;
|
||||
auto eval_counters =
|
||||
rocprofiler::counters::get_required_hardware_counters(asts, "gfx9", metrics[name]);
|
||||
ASSERT_TRUE(eval_counters);
|
||||
ASSERT_EQ(eval_counters->size(), eval_count);
|
||||
std::vector<std::unique_ptr<std::vector<rocprofiler_record_counter_t>>> cache;
|
||||
asts.at("gfx9").at(name).expand_derived(asts.at("gfx9"));
|
||||
auto ret = asts.at("gfx9").at(name).evaluate(base_counter_decode, cache);
|
||||
EXPECT_EQ(ret->size(), expected.size());
|
||||
int pos = 0;
|
||||
asts.at("gfx9").at(name).set_out_id(*ret);
|
||||
for(const auto& v : *ret)
|
||||
{
|
||||
set_counter_in_rec(expected[pos].id, {.handle = metrics[name].id()});
|
||||
EXPECT_EQ(v.id, expected[pos].id);
|
||||
EXPECT_FLOAT_EQ(v.counter_value, expected[pos].counter_value);
|
||||
pos++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ссылка в новой задаче
Block a user